# Gym's Pacman 

## Imports 

In [1]:
import gym
import numpy as np
import matplotlib.pyplot as plt
from collections import namedtuple
from PIL import Image

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

## Device 

In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

##  Replay Memory

We will store our agents memory to reuse this data later for training, sampling randomly for better results with our Neural Network

- So let's create a ```class``` named ```Transitions``` that will represent a transition in our environment. It stores our ```("state", "action")``` and ```("next_state", "reward")```. 
- And a ```class``` ```ReplayMemory``` with limited size that will hold transitions during our training.


In [10]:
Transitions = namedtuple("Transitions", 
                         ("state", "action", "next_state", "reward"))

In [11]:
class ReplayMemory(object):
    
    def __init__(self, max_size):
        """
        Creates the ReplayMemory object with the maximum size of transitions
        """
        self.max_size = max_size
        self.memory = []
        self.position = 0
        
    def push(self, *args):
        """
        Saves a transition during training
        """
        if len(self.memory) < self.max_size:
            self.memory.append(None)
        self.memory[self.position] = Transition(*args)
        self.position = (self.position + 1) % self.max_size
        
    def sample(self, batch_size):
        """
        Takes a random sample of our training memory
        """
        return random.sample(self.memory, batch_size)
    
    def __len__(self):
        """
        Defines the len() of the ReplayMemory
        """
        return len(self.memory)

## DQN 

Here we will create a **Deep Q Network** with a convolutional network that will get as ```input``` the state of the environment and will return as ```output``` the expected value for every possible action in that same state.

In [None]:
class DQN(nn.Module):
    
    def __init__(self, h, w, outputs):
        """
        Creates the ConvNet 
        """