# Deep Reinforcement learning on PyTorch to solve Open AI Mountain Car

In [2]:
#import required packages
import gym
import math
import random
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from collections import namedtuple
from itertools import count
from PIL import Image

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as T

In [None]:
env = gym.make('MountainCar-v1').unwrapped

# setting matplotlib
is_ipython = 'inline' in matplotlib.get_backend()
if is_python:
    from Ipython import Display
    
# check if gpu is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

### Replay Memory:

Replay memory will be used for training our DQN network which helps in storing the transitions the agent undergoes allowing us to make use of this data later.

In [None]:
Transition = namedtuple('transition',('state','action','next_state','reward'))

class ReplayMem(object):
    def __init__(self, capacity):
        self.capacity = capacity
        self.memory = []
        self.position = 0
    
    def push(self, *args):
        ## Save a transition
        if len(self.memory) < self.capacity:
            self.memory.append(None)
        self.memory[self.position] = Transition(*args)
        self.position = (self.position + 1) % self.capacity
    
    def sample(self, batch_size):
        return random.sample(self.memory, batch_size)
    
    def __len__(self):
        return len(self.memory)
        

### DQN Neural Network Architecture:

In [2]:


class Network(nn.Module):
    def __init__(self, h, w, out):
        super(Network, self).__init__()
        
        self.total_actions = 3
        self.gamma = 0.99
        self.final_epsilon = 0.0001
        self.initial_epsilon = 0.1
        self.num_iterations = 2000000
        self.replay_mem_size = 10000
        self.batch_size = 32
        
        self.conv1 = nn.Conv2d(3, 16, kernel_size = 5, stride = 2)
        self.bn1 = nn.BatchNorm2d(16)
        self.conv2 = nn.Conv2d(16, 32, kernel_size = 5, stride = 2)
        self.bn2 = nn.BatchNorm2d(32)
        self.conv3 = nn.Conv2d(32, 32, kernel_size = 5, stride = 2)
        self.bn2 = nn.BatchNorm2d(32)
        
    def forward(self, x):
        out = self.conv1(x)
        out = self.relu1(out)
        out = self.conv2(out)
        out = self.relu2(out)
        out = self.conv3(out)
        out = self.relu3(out)
        out = self.fc4(out)
        out = self.relu4(out)
        out = self.fc5(out)
        
        return out

### Initialize weights:

In [3]:
def ini_weights(m):
    if type(m) == nn.Conv2d or type(m) == nn.Linear:
        torch.nn.init.uniform(m.weight, -0.01, 0.01)
        m.bias.data.fill_(0.01)

### Convert image to tensor:

In [4]:
def img_to_tensor(img):
    img_tensor = img.transpose(2,0,1)
    img_tensor = img_tensor.astype(np.float32)
    img_tensor = torch.from_numpy(img_tensor)
    #check if cuda is available
    if torch.cuda.is_available():
        img_tensor = img_tensor.cuda()
    return img_tensor

### Resizing and colour to grey scale conversion:

In [None]:
def resize_and_rgb2grey(img):
    img = img[0:288, 0:404]
    img_data = cv2.cvtColor(cv2.resize(image, (84,84)), cv2.COLOR_BGR2GRAY)
    img_data[img_data > 0] = 255
    img_data = np.reshape(img_data, (84,84,1))
    return img_data

### Training Model: