# Testing Notebook
The purpose of this notebook is to verify that all the components of the larger program are functioning properly.

# Setup
This section checks to see if all the libraries import correctly and that we can access the GPUs

In [1]:
# Imports
import torch
import numpy as np

from agents.dqn_agent import QNetwork
from common.replay_buffer import ReplayBuffer, Experience

print("Imports successful!")

# Define configuration parameters for testing
# These would normally come from a .yaml file

# Environment/Wrapper params
INPUT_SHAPE = (4, 84, 84) # (num_stack, height, width)
NUM_ACTIONS = 9          # Ms. Pac-Man has 9 actions

# Replay Buffer params
BUFFER_CAPACITY = 1000
BATCH_SIZE = 4           # Use a small batch size for easy testing

# Set the device for PyTorch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Imports successful!
Using device: cpu


# Testing QNetwork
The following checks to make sure that we can successfully create an instance of a QNetwork

In [None]:
print("--- Testing QNetwork ---")

# Instantiate the network
q_network = QNetwork(input_shape=INPUT_SHAPE, num_actions=NUM_ACTIONS).to(device)
print("Successfully instantiated QNetwork.")
print(q_network) # Print the network architecture

# Create a dummy input batch
dummy_input_shape = (BATCH_SIZE,) + INPUT_SHAPE 
# Create a numpy array of random data with this shape
dummy_np_array = np.random.rand(*dummy_input_shape).astype(np.uint8)

print(f"\nCreated a dummy numpy input with shape: {dummy_np_array.shape} and dtype: {dummy_np_array.dtype}")

# Prepare the data for the network
dummy_tensor = torch.tensor(dummy_np_array, dtype=torch.float32).to(device) / 255.0

print(f"Converted to a dummy torch tensor with shape: {dummy_tensor.shape} and dtype: {dummy_tensor.dtype}")

# Perform a forward pass
with torch.no_grad(): # Disable gradient calculations because we are not training
    output = q_network(dummy_tensor)

print(f"\nPerformed forward pass successfully.")
print(f"Output tensor shape: {output.shape}")
print(f"Expected output shape: {(BATCH_SIZE, NUM_ACTIONS)}")

# Final check
if output.shape == (BATCH_SIZE, NUM_ACTIONS):
    print("\nTest PASSED: Output shape is correct.")
else:
    print(f"\nTest FAILED: Output shape is {output.shape}, but expected {(BATCH_SIZE, NUM_ACTIONS)}.")

--- Testing QNetwork ---
Successfully instantiated QNetwork.
QNetwork(
  (conv): Sequential(
    (0): Conv2d(4, 32, kernel_size=(8, 8), stride=(4, 4))
    (1): ReLU()
    (2): Conv2d(32, 64, kernel_size=(4, 4), stride=(2, 2))
    (3): ReLU()
    (4): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1))
    (5): ReLU()
  )
  (fc): Sequential(
    (0): Linear(in_features=3136, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=9, bias=True)
  )
)

Created a dummy numpy input with shape: (4, 4, 84, 84) and dtype: uint8
Converted to a dummy torch tensor with shape: torch.Size([4, 4, 84, 84]) and dtype: torch.float32

Performed forward pass successfully.
Output tensor shape: torch.Size([4, 9])
Expected output shape: (4, 9)

Test PASSED: Output shape is correct.


# Test replay_buffer.py

In [4]:
print('\n--- Testing Replay Buffer ---')

# Instantiate the replay buffer
replay_buffer = ReplayBuffer(capacity=BUFFER_CAPACITY, batch_size=BATCH_SIZE)
print(f"Successfully instantiated ReplayBuffer with capacity: {BUFFER_CAPACITY} and batch size: {BATCH_SIZE}")

# Create dummy experiences and add them to the buffer

num_experiences_to_add = 10
print(f"\nAdding {num_experiences_to_add} dummy experiences to the replay buffer...")

for i in range(num_experiences_to_add):
    dummy_state = np.ones(INPUT_SHAPE) * i
    dummy_action = i % NUM_ACTIONS
    dummy_reward = float(i)
    dummy_next_state = np.ones(INPUT_SHAPE) * (i + 1)
    dummy_done = (i == num_experiences_to_add - 1) # Last experience is terminal

    replay_buffer.add(dummy_state, dummy_action, dummy_reward, dummy_next_state, dummy_done)

# Check length of buffer
print(f"Current buffer size: {len(replay_buffer)} (should be {num_experiences_to_add})")
if len(replay_buffer) == num_experiences_to_add:
    print("TEST PASSED: Replay buffer size is correct.")
else:
    print("TEST FAILED: Replay buffer size is incorrect.")

# Sample a batch
print(f"\nSampling a batch of size {BATCH_SIZE} from the replay buffer...")
states, actions, rewards, next_states, dones = replay_buffer.sample()

print(f"Sampled states shape: {states.shape}, Expected: ({BATCH_SIZE}, {INPUT_SHAPE})")
print(f"Sampled actions shape: {actions.shape}, Expected: ({BATCH_SIZE},)")
print(f"Sampled rewards shape: {rewards.shape}, Expected: ({BATCH_SIZE},)")
print(f"Sampled next_states shape: {next_states.shape}, Expected: ({BATCH_SIZE}, {INPUT_SHAPE})")
print(f"Sampled dones shape: {dones.shape}, Expected: ({BATCH_SIZE},)")

# Final check for sampled batch shapes
correct_shapes = all([
    states.shape == (BATCH_SIZE, *INPUT_SHAPE),
    actions.shape == (BATCH_SIZE,),
    rewards.shape == (BATCH_SIZE,),
    next_states.shape == (BATCH_SIZE, *INPUT_SHAPE),
    dones.shape == (BATCH_SIZE,)
])

if correct_shapes:
    print("TEST PASSED: Sampled batch shapes are correct.")
else:
    print("TEST FAILED: Sampled batch shapes are incorrect.")



--- Testing Replay Buffer ---
Successfully instantiated ReplayBuffer with capacity: 1000 and batch size: 4

Adding 10 dummy experiences to the replay buffer...
Current buffer size: 10 (should be 10)
TEST PASSED: Replay buffer size is correct.

Sampling a batch of size 4 from the replay buffer...
Sampled states shape: (4, 4, 84, 84), Expected: (4, (4, 84, 84))
Sampled actions shape: (4,), Expected: (4,)
Sampled rewards shape: (4,), Expected: (4,)
Sampled next_states shape: (4, 4, 84, 84), Expected: (4, (4, 84, 84))
Sampled dones shape: (4,), Expected: (4,)
TEST PASSED: Sampled batch shapes are correct.
