## Libraries

In [1]:
import skimage.measure
import numpy as np
import gym
from gym import wrappers
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.parallel
import torch.nn.functional as F
import torch.optim as optim

## Functions

In [2]:
def preprocess(rgb_tensor):
    '''
    Transforms 3D RGB numpy tensor: crop, convert to 2D grayscale, downsample.
    '''
    crop = rgb_tensor[30:194,:,:]
    grayscale = np.dot(crop[...,:3], [0.2989, 0.5870, 0.1140])  ## using Matlab's formula
    downsample = skimage.measure.block_reduce(grayscale, (2,2), np.max)
    standardize = (downsample - downsample.mean()) / np.sqrt(downsample.var() + 1e-5)
    return standardize

## Classes

In [3]:
class CNN(nn.Module):
    '''Convolutional Neural Network'''
    def __init__(self,):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, 8, 4)  ## Conv2d(nChannels, filters, kernel, stride)
        self.conv2 = nn.Conv2d(16, 32, 4, 4)
        self.fc1 = nn.Linear(32 * 4 * 4, 256)
        self.fc2 = nn.Linear(256, 4)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = x.view(-1, 32 * 4 * 4)  ## reshape 
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

## Load Model

In [4]:
path = '/Users/davidziganto/Repositories/Deep_Reinforcement_Learning/'
cnn = CNN()
cnn.load_state_dict(torch.load(path + 'DL_RL_Atari_breakout_3e_1000t'))

## Setup

In [5]:
# Use GPUs?
gpu = 0

# GAME SETUP  
time_steps = 1000  ## max number of time steps per game
record = 1  ## record training or game play
render = 1  ## show game in real-time

# WHERE TO SAVE FILES
path_record = path + 'recorded_games/'

In [6]:
# Atari emulator
env = gym.make('Breakout-v0')
# whether to record training
if record:
    env = wrappers.Monitor(env, 
                           directory=path_record, 
                           video_callable=None, ## takes video when episode number is perfect cube
                           force=True)

[2017-09-08 11:20:33,018] Making new env: Breakout-v0
[2017-09-08 11:20:33,194] Clearing 4 monitor files from previous run (because force=True was provided)


## Play Game

In [7]:
# start game
seq = preprocess(env.reset())
    
for t in range(time_steps):
    
    ## show game in real-time
    if render:
        env.render()
        
    # take action (0=do nothing; 1=fire ball; 2=move right; 3=move left)
    if t == 0:
        action = 1  ## start game by firing ball
    else:
        # take agent-based action
        action = cnn(Variable(torch.Tensor(seq).unsqueeze(0).unsqueeze(0))).data.max(1)[1][0]  ## take optimal action according to NN
        
    # get feedback from emulator
    observation, reward, done, info = env.step(action)
        
    # preprocess new observation post action    
    seq = preprocess(observation)
        
    # stop if no more moves
    if done:
        break

[2017-09-08 11:20:33,239] Starting new video recorder writing to /Users/davidziganto/Repositories/Deep_Reinforcement_Learning/recorded_games/openaigym.video.0.6553.video000000.mp4


KeyboardInterrupt: 