# Visualise the pre-trained agent in action

Modify the path to the weights and run the notebook.

In [1]:
WEIGHTS = './weights/weights-20000'  # NB: without '.pth'

In [2]:
import torch
import imageio
import gymnasium as gym

from upload.agent import VisionDeepQ

In [3]:
network = {
    "input_channels": 1, "outputs": 5,
    "channels": [32, 32],
    "kernels": [3, 5],
    "padding": ["same", "same"]
}
optimizer = {
    "optimizer": torch.optim.Adam,
    "lr": 0.001,
    "hyperparameters": {}
}
reshape = (1, 1, 203-27, 64-22)

In [4]:
value_agent = VisionDeepQ(
    network=network, optimizer=optimizer, shape=reshape,
)

weights = torch.load(f'{WEIGHTS}.pth', map_location=torch.device('cpu'))
value_agent.load_state_dict(weights)

<All keys matched successfully>

In [5]:
environment = gym.make('ALE/Tetris-v5', render_mode="rgb_array",
                       obs_type="grayscale", frameskip=4, repeat_action_probability=0.25)
environment.metadata["render_fps"] = 30

A.L.E: Arcade Learning Environment (version 0.8.1+53f58b7)
[Powered by Stella]


In [6]:
state = value_agent.preprocess(environment.reset()[0], (1, 1, 210, 160))

images = []
TERMINATED = TRUNCATED = False
while not (TERMINATED or TRUNCATED):
    action = value_agent(state).argmax(1).item()

    state, reward, TERMINATED, TRUNCATED, _ = environment.step(action)
    state = value_agent.preprocess(state, (1, 1, 210, 160))

    images.append(environment.render())
_ = imageio.mimsave(f'./{WEIGHTS}.gif', images, duration=25)