# Stuff to Render The Environment

In [1]:
!pip install gym pyvirtualdisplay > /dev/null 2>&1
!apt-get install -y xvfb python-opengl ffmpeg > /dev/null 2>&1

In [2]:
!apt-get update > /dev/null 2>&1
!apt-get install cmake > /dev/null 2>&1
!pip install --upgrade setuptools 2>&1
!pip install ez_setup > /dev/null 2>&1
!pip install gym[atari] > /dev/null 2>&1

Collecting setuptools
[?25l  Downloading https://files.pythonhosted.org/packages/f7/30/f963996d7efea5a336455a3c727711469280c318e2711e295007dea04d7e/setuptools-52.0.0-py3-none-any.whl (784kB)
[K     |▍                               | 10kB 16.7MB/s eta 0:00:01[K     |▉                               | 20kB 14.0MB/s eta 0:00:01[K     |█▎                              | 30kB 16.1MB/s eta 0:00:01[K     |█▊                              | 40kB 12.4MB/s eta 0:00:01[K     |██                              | 51kB 14.0MB/s eta 0:00:01[K     |██▌                             | 61kB 15.3MB/s eta 0:00:01[K     |███                             | 71kB 13.0MB/s eta 0:00:01[K     |███▍                            | 81kB 14.0MB/s eta 0:00:01[K     |███▊                            | 92kB 11.5MB/s eta 0:00:01[K     |████▏                           | 102kB 11.8MB/s eta 0:00:01[K     |████▋                           | 112kB 11.8MB/s eta 0:00:01[K     |█████                           | 122

In [3]:
import gym
from gym import logger as gymlogger
from gym.wrappers import Monitor
gymlogger.set_level(40) #error only
import tensorflow as tf
import numpy as np
import random
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
import math
import glob
import io
import base64
from IPython.display import HTML

from IPython import display as ipythondisplay

In [4]:
from pyvirtualdisplay import Display
display = Display(visible=0, size=(1400, 900))
display.start()

<pyvirtualdisplay.display.Display at 0x7fa271859240>

In [5]:
def show_video():
  mp4list = glob.glob('video/*.mp4')
  if len(mp4list) > 0:
    mp4 = mp4list[0]
    video = io.open(mp4, 'r+b').read()
    encoded = base64.b64encode(video)
    ipythondisplay.display(HTML(data='''<video alt="test" autoplay 
                loop controls style="height: 400px;">
                <source src="data:video/mp4;base64,{0}" type="video/mp4" />
             </video>'''.format(encoded.decode('ascii'))))
  else: 
    print("Could not find video")
    

def wrap_env(env):
  env = Monitor(env, './video', force=True)
  return env

In [10]:
env = wrap_env(gym.make("LunarLander-v2"))

In [11]:
print(env.action_space)

Discrete(4)


# Import/Setup

This part of the code is for importing all the necessary packages + setting it up. 

Packages

In [6]:
import math
import random
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from collections import namedtuple
from itertools import count
from PIL import Image

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as T

from tqdm import tqdm

Settting up the Lunar Lander Gym environment

In [7]:
!pip install box2d-py
!pip install gym[Box_2D]
import gym
env = wrap_env(gym.make("LunarLander-v2"))

Collecting box2d-py
[?25l  Downloading https://files.pythonhosted.org/packages/06/bd/6cdc3fd994b0649dcf5d9bad85bd9e26172308bbe9a421bfc6fdbf5081a6/box2d_py-2.3.8-cp36-cp36m-manylinux1_x86_64.whl (448kB)
[K     |▊                               | 10kB 17.1MB/s eta 0:00:01[K     |█▌                              | 20kB 13.6MB/s eta 0:00:01[K     |██▏                             | 30kB 16.2MB/s eta 0:00:01[K     |███                             | 40kB 12.9MB/s eta 0:00:01[K     |███▋                            | 51kB 14.5MB/s eta 0:00:01[K     |████▍                           | 61kB 13.4MB/s eta 0:00:01[K     |█████▏                          | 71kB 13.0MB/s eta 0:00:01[K     |█████▉                          | 81kB 14.3MB/s eta 0:00:01[K     |██████▋                         | 92kB 11.8MB/s eta 0:00:01[K     |███████▎                        | 102kB 11.9MB/s eta 0:00:01[K     |████████                        | 112kB 11.9MB/s eta 0:00:01[K     |████████▊                 

Setting up MatPlotLib

In [8]:
is_ipython = 'inline' in matplotlib.get_backend()
if is_ipython:
    from IPython import display

plt.ion()

For the GPU

In [9]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Replay Memory

This memory replay is for reusing the data and essentially giving the model a memory to what happened in the past. 

In [12]:
 Transition = namedtuple('Transition',
                        ('state', 'action', 'next_state', 'reward'))


class ReplayMemory(object):

    def __init__(self, capacity):
        self.capacity = capacity
        self.memory = []
        self.position = 0

    def push(self, *args):
        """Saves a transition."""
        if len(self.memory) < self.capacity:
            self.memory.append(None)
        self.memory[self.position] = Transition(*args)
        self.position = (self.position + 1) % self.capacity

    def sample(self, batch_size):
        return random.sample(self.memory, batch_size)

    def __len__(self):
        return len(self.memory)

# Deep Q-Network

In [13]:
class DQN(nn.Module):

    def __init__(self, inputs, outputs):
        super(DQN, self).__init__()
        self.dense1 = nn.Linear(inputs, 16)
        self.dense2 = nn.Linear(16, 32)
        self.dense3 = nn.Linear(32, outputs)

    def forward(self, x):
        x = F.relu((self.dense1(x)))
        x = F.relu((self.dense2(x)))
        x = F.relu((self.dense3(x)))
        return x

# Getting The State

In [14]:
state = env.reset()

In [15]:
state

array([-1.3734817e-03,  1.3984876e+00, -1.3914530e-01, -5.5255502e-01,
        1.5984217e-03,  3.1518478e-02,  0.0000000e+00,  0.0000000e+00],
      dtype=float32)

# Training

In [16]:
BATCH_SIZE = 128
gamma = 0.9
epsilon = 0.9
epsilon_decay = 0.9999
TARGET_UPDATE = 10
action_space = 4
observation_space = 8

In [17]:
target_net = DQN(observation_space, action_space).to(device)

criterion = nn.MSELoss()

optimizer = optim.RMSprop(target_net.parameters())

In [18]:
epochs = 25000

for i in tqdm(range(epochs)):
    state = env.reset()
    done = False
    states = []
    rewards = []
    while not done:
      prediction = target_net(torch.tensor(state))
      prediction = prediction.cpu().detach().numpy()
      action = np.argmax(prediction)
      if np.random.random() < epsilon:
        action = env.action_space.sample()
      observation,reward,done,_ = env.step(action)
      q = target_net(torch.tensor(observation)).cpu().detach().numpy()
      future_reward = reward + q[np.argmax(action)]*gamma
      states.append(state)
      q_values = q
      q_values[np.argmax(action)] = future_reward
      rewards.append(q_values)
      x = torch.tensor(np.asarray(state))
      y = torch.tensor(np.asarray(q_values))
      y_pred = target_net(torch.tensor(x))
      optimizer.zero_grad()
      loss = criterion(y_pred, y)
      loss.backward()
      optimizer.step()
      state = observation
    epsilon*=epsilon_decay


100%|██████████| 25000/25000 [53:12<00:00,  7.83it/s]


# Viewing the Final Result

In [19]:
def plot_durations():
    plt.figure(2)
    plt.clf()
    durations_t = torch.tensor(episode_durations, dtype=torch.float)
    plt.title('Training...')
    plt.xlabel('Episode')
    plt.ylabel('Duration')
    plt.plot(durations_t.numpy())

    if len(durations_t) >= 100:
        means = durations_t.unfold(0, 100, 1).mean(1).view(-1)
        means = torch.cat((torch.zeros(99), means))
        plt.plot(means.numpy())

    plt.pause(0.001) 
    if is_ipython:
        display.clear_output(wait=True)
        display.display(plt.gcf())

In [20]:
observation = env.reset()

while True:
  
    env.render()
    
    action = env.action_space.sample() 
         
    observation, reward, done, info = env.step(action) 
   
        
    if done: 
      break;
            
env.close()
show_video()