# Gym's Pacman 

## Imports 

In [1]:
import gym
import numpy as np
import matplotlib.pyplot as plt
from collections import namedtuple
from PIL import Image

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as T

## Device 

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

##  Replay Memory

We will store our agents memory to reuse this data later for training, sampling randomly for better results with our Neural Network

- So let's create a ```class``` named ```Transitions``` that will represent a transition in our environment. It stores our ```("state", "action")``` and ```("next_state", "reward")```. 
- And a ```class``` ```ReplayMemory``` with limited size that will hold transitions during our training.


In [4]:
Transitions = namedtuple("Transitions", 
                         ("state", "action", "next_state", "reward"))

In [5]:
class ReplayMemory(object):
    
    def __init__(self, max_size):
        """
        Creates the ReplayMemory object with the maximum size of transitions
        """
        self.max_size = max_size
        self.memory = []
        self.position = 0
        
    def push(self, *args):
        """
        Saves a transition during training
        """
        if len(self.memory) < self.max_size:
            self.memory.append(None)
        self.memory[self.position] = Transition(*args)
        self.position = (self.position + 1) % self.max_size
        
    def sample(self, batch_size):
        """
        Takes a random sample of our training memory
        """
        return random.sample(self.memory, batch_size)
    
    def __len__(self):
        """
        Defines the len() of the ReplayMemory
        """
        return len(self.memory)

## DQN 

Here we will create a **Deep Q Network** with a convolutional network that will get as ```input``` the state of the environment and will return as ```output``` the expected value for every possible action in that same state.

In [7]:
class DQN(nn.Module):
    
    def __init__(self, h, w, outputs):
        """
        Creates the ConvNet 
        """
        super(DQN, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=5, stride=2)
        self.bn1 = nn.BatchNorm2d(16)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=5, stride=2)
        self.bn2 = nn.BatchNorm2d(32)
        self.conv3 = nn.Conv2d(32, 32, kernel_size=5, stride=2)
        self.bn3 = nn.BatchNorm2d(32)
        
        # Calculate the output size of conv to be the input of linear
        def conv2d_size_out(size, kernel_size = 5, stride = 2):
            return (size - (kernel_size - 1) - 1) // stride  + 1
        
        convw = conv2d_size_out(conv2d_size_out(conv2d_size_out(w)))
        convh = conv2d_size_out(conv2d_size_out(conv2d_size_out(h)))
        linear_input_size = convw * convh * 32
        self.head = nn.Linear(linear_input_size, outputs)   
        
    def forward(self, x):
        """
        Pass a batch through the ConvNet
        """
        x = F.relu(self.bn1(self.conv1(x)))
        x = F.relu(self.bn2(self.conv2(x)))
        x = F.relu(self.bn3(self.conv3(x)))
        return self.head(x.view(x.size(0), -1))

## Image Processing 

In this application, our input (our state) will be the images of Pacman, then we need to process these to use in out network

In [8]:
resize = T.Compose([T.ToPILImage(),
                    T.Resize(40, interpolation=Image.CUBIC),
                    T.ToTensor()])

def get_screen():
    # Returned screen requested by gym is 400x600x3, but is sometimes larger
    # such as 800x1200x3. Transpose it into torch order (CHW).
    screen = env.render(mode='rgb_array').transpose((2, 0, 1))
    # Cart is in the lower half, so strip off the top and bottom of the screen
    _, screen_height, screen_width = screen.shape
    screen = screen[:, int(screen_height*0.4):int(screen_height * 0.8)]
    view_width = int(screen_width * 0.6)
    cart_location = get_cart_location(screen_width)
    if cart_location < view_width // 2:
        slice_range = slice(view_width)
    elif cart_location > (screen_width - view_width // 2):
        slice_range = slice(-view_width, None)
    else:
        slice_range = slice(cart_location - view_width // 2,
                            cart_location + view_width // 2)
    # Strip off the edges, so that we have a square image centered on a cart
    screen = screen[:, :, slice_range]
    # Convert to float, rescale, convert to torch tensor
    # (this doesn't require a copy)
    screen = np.ascontiguousarray(screen, dtype=np.float32) / 255
    screen = torch.from_numpy(screen)
    # Resize, and add a batch dimension (BCHW)
    return resize(screen).unsqueeze(0).to(device)