# Deep Convolutional Q-Learning for Pac-Man

## Part 0 - Installing the required packages and importing the libraries

### Installing Gymnasium

In [1]:
!pip install gymnasium
!pip install "gymnasium[atari, accept-rom-license]"
!pip install ale-py
!apt-get install -y swig
!pip install gymnasium[box2d]

/bin/bash: apt-get: command not found


### Importing the libraries

In [1]:
import os
import random
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from collections import deque
from torch.utils.data import DataLoader, TensorDataset


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.3.2 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "/Users/gthrone/anaconda3/envs/udemyai/lib/python3.12/runpy.py", line 198, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/Users/gthrone/anaconda3/envs/udemyai/lib/python3.12/runpy.py", line 88, in _run_code
    exec(code, run_globals)
  File "/Users/gthrone/anaconda3/envs/udemyai/lib/python3.12/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "/Users/gthrone/anaconda3/envs/udemyai/lib/python3.12/site-packages/traitlets/config/application.

## Part 1 - Building the AI

### Creating the architecture of the Neural Network

In [2]:
class Network(nn.Module):
    def __init__(self, action_size, seed = 42):
        super(Nework, self).__init__()
        self.seed = torch.manual_seed(seed)
        self.conv1 = nn.Conv2d(3, 32, kernel_size = 8, stride = 4) #3 for rgb, and other parameters are from previous testing
        self.bn1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 64, kernel_size = 4, stride = 2) #parameters are from previous testing
        self.bn2 = nn.BatchNorm2d(64)
        self.conv3 = nn.Conv2d(64, 64, kernel_size = 3, stride = 1) #parameters are from previous testing
        self.bn3 = nn.BatchNorm2d(64)
        self.conv4 = nn.Conv2d(64, 128, kernel_size = 3, stride = 1) #parameters are from previous testing
        self.bn4 = nn.BatchNorm2d(128)
        # These are the eyes for our AI above
        self.fc1 = nn.Linear(10*10*128, 512) # explaining formula to calculate output size of 4 layer, 512 from experimenting
        self.fc2 = nn.Linear(512, 256) # 256 from experimentation
        self.fc3 = nn.Linear(265, action_size)

    def forward(self, state):
        x = F.relu(self.bn1(self.conv1(state))) # state is image of pacman
        x = F.relu(self.bn2(self.conv2(x)))
        x = F.relu(self.bn3(self.conv3(x)))
        x = F.relu(self.bn4(self.conv4(x)))
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return self.fc3(x)

## Part 2 - Training the AI

### Setting up the environment

In [3]:
import ale_py
import gymnasium as gym
#env = gym.make('MsPacmanDeterministic-v0', full_action_space = False) #full action to simplify things
# changing as it seems out of date
# old: https://gymnasium.farama.org/v0.28.0/environments/atari/pacman/
# new: https://ale.farama.org/environments/pacman/
env = gym.make('ALE/MsPacman-v5', full_action_space = False)
state_shape = env.observation_space.shape
state_size = env.observation_space.shape[0]
number_actions = env.action_space.n
print('State shape: ', state_shape)
print('State size: ', state_size)
print('Number of actions: ', number_actions)

State shape:  (210, 160, 3)
State size:  210
Number of actions:  9


A.L.E: Arcade Learning Environment (version 0.11.2+ecc1138)
[Powered by Stella]


### Initializing the hyperparameters

In [4]:
learning_rate = 5e-4
minibatch_size = 64
discount_factor = 0.99
# no soft update needed

### Preprocessing the frames

In [5]:
from PIL import Image
print(torch.__version__)
from torchvision import transforms


def preprocess_frame(frame):
    frame = Image.fromarray(frame)
    preprocess = transforms.Compose([transforms.Resize((128,128)), transforms.ToTensor()])
    return preprocess(frame).unsqueeze(0)

2.2.2


### Implementing the DCQN class

### Initializing the DCQN agent

### Training the DCQN agent

## Part 3 - Visualizing the results

In [None]:
import glob
import io
import base64
import imageio
from IPython.display import HTML, display

def show_video_of_model(agent, env_name):
    env = gym.make(env_name, render_mode='rgb_array')
    state, _ = env.reset()
    done = False
    frames = []
    while not done:
        frame = env.render()
        frames.append(frame)
        action = agent.act(state)
        state, reward, done, _, _ = env.step(action)
    env.close()
    imageio.mimsave('video.mp4', frames, fps=30)

show_video_of_model(agent, 'MsPacmanDeterministic-v0')

def show_video():
    mp4list = glob.glob('*.mp4')
    if len(mp4list) > 0:
        mp4 = mp4list[0]
        video = io.open(mp4, 'r+b').read()
        encoded = base64.b64encode(video)
        display(HTML(data='''<video alt="test" autoplay
                loop controls style="height: 400px;">
                <source src="data:video/mp4;base64,{0}" type="video/mp4" />
             </video>'''.format(encoded.decode('ascii'))))
    else:
        print("Could not find video")

show_video()