In [20]:
import gym
import numpy as np
from math import floor, ceil
import time
import logging
import warnings
from PIL import Image
import imageio
import matplotlib.pyplot as plt
%matplotlib inline

from keras.models import Sequential, load_model
from keras.layers import Dense, Flatten  
from keras.layers.convolutional import Conv2D, MaxPooling2D

In [4]:
env = gym.make('Pong-v0')

In [5]:
UP = 2
NOOP = 3
DOWN = 5

In [6]:
def random_action():
    return np.random.choice([UP, NOOP, DOWN])

def tobits(action):
    if action == 2: return 1
    elif action == 5: return -1
    else: return 0

In [60]:
def process_data(data):
    if len(data.shape) == 3:
        cropped = data[34:-16,:]
        return np.mean(cropped[::2,::2,:], axis=2).astype(np.uint8)
    elif len(data.shape) == 4:
        cropped = data[:,34:-16,:]
        downsample = np.mean(cropped[:, ::2, ::2], axis=3).astype(np.uint8)
        return downsample.reshape(*downsample.shape, 1)
    else:
        raise ValueError()

In [61]:
def play_random_game(process=False):
    states, actions, rewards = [], [], []
    
    window, game_memory = [], []
    observation = env.reset()
    done = False

    while not done:
        action = random_action()
        observation, reward, done, info = env.step(action)

        states.append(process_data(observation) if process else observation)
        actions.append(tobits(actions))
        rewards.append(reward)
            
    env.close()

    return states, actions, rewards

In [62]:
def assign_credit(rewards, discount_rate=0.9, reward_cutoff=0.01):
    discounted_rewards, R = np.zeros(len(rewards)), 0
    it = reversed(list(enumerate(rewards)))
    discounted_rewards[-1] = next(it)[1]
    
    for i, reward in it:
        discounted_rewards[i] = discounted_rewards[i+1]*discount_rate + reward
        
    return discounted_rewards

In [77]:
def get_random_data(n_games=20, discount_rate=0.9, reward_cutoff=0.1, process=True, verbose=False):
    states_memory = n_games*1000*80*80*(10**(-9)) if process else \
                    n_games*1000*210*160*3*(10**(-9))
    if (states_memory >= 2):
        warnings.warn(
            "Memory necessary to capture games will be around {} GB. Ensure proper use and garbage collection of the states array"\
            .format(states_memory)
        )
    
    states, actions, rewards = [], [], []
    avg_time, bar_length = 0, 50
    
    for n in range(1, n_games+1):
        st = time.time()
        game_states, game_actions, game_rewards = play_random_game(process=process)
        states.extend(game_states)
        actions.extend(game_actions)
        rewards.extend(assign_credit(game_rewards))
        
        avg_time += (time.time()-st-avg_time)/n
        
        bar_stretch = n_games/bar_length
        bar = "[" + int(n/bar_stretch)*'#' + ceil((n_games-n)/bar_stretch)*' ' + ']'

        if verbose:
            print("Game {:0d}, progress: {}, time left: {:05f}".format(
                n, bar, avg_time*(n_games+1-n)
            ), end='\r')
    
    if verbose:
        print()
        print("Time elapsed: {:05f}".format(avg_time*(n_games+1)))
        
    return states, actions, rewards

In [86]:
states, actions, rewards = get_random_data(n_games=10, process=False, verbose=True)

Game 10, progress: [##################################################], time left: 0.787221
Time elapsed: 8.659435


Use the following functions to display games, check behavior of agent and see if downsampling removes any pertinent data. 

In [79]:
def disp_frame(img):
    if not hasattr(img, 'shape'): 
        raise ValueError
    
    if len(img.shape) == 3:
        if img.shape[2] == 1:
            return Image.fromarray(img[:,:,0])
    
    return Image.fromarray(img)

In [80]:
def save_game(filename, data, fps=24):
    try:
        if data.shape[3] == 1:
            return imageio.mimwrite(filename, data, fps=fps)
    finally:
        return imageio.mimwrite(filename, data, fps=fps)

In [81]:
def save_og_proc_game(filename, train_data, processed_data, fps=24):
    p_data = processed_data.astype(np.uint8)
    p_dim = p_data.shape
    d_dim = train_data.shape
    
    p_data_disp = np.zeros((d_dim[0], d_dim[1], int(d_dim[2]/2), d_dim[3]), dtype=np.uint8)
    p_data_disp[:, 74:-56, :] = np.broadcast_to(p_data, (p_dim[0], p_dim[1], p_dim[2], 3))
        
    im = np.concatenate((train_data.astype(np.uint8), p_data_disp), axis=2)

    return imageio.mimwrite(filename, im, fps=fps, subrectangles=True)

In [87]:
states = np.array(states)
processed_states = process_data(states)

In [89]:
save_game('vis/not_processed.gif', states)
save_game('vis/processed.gif', processed_states)

In [88]:
save_original_processed('vis/sbs.gif', states, processed_states)