# Atari gym environment
Random policy for MsPacman Pong and Boxing


# To load model, run cells at bottom of notebook

In [1]:
import sys
import os
import math
import pdb
import time
import random
import numpy as np
import tensorflow as tf
import gym
import matplotlib.pyplot as plt
import matplotlib.cm as cm

from datetime import datetime
from skimage.color import rgb2gray
from skimage.transform import resize

Initialize the state for an environment, need to get the firs four frames and stack them together

In [2]:
def get_init_state(env):
    
    state_list = []
    observation = env.reset()
    for _ in range(4):
        state_list.append(preprocess(observation))
        action = env.action_space.sample()
        observation, _, _, info = env.step(action)
    state = np.stack(state_list, axis=2)
    return env, state, observation

Need to keep four frames at a time, each time we see a new state we need to drop the oldest and add the newest

In [3]:
def update_state(state, observation):
    
    state = np.append(state, preprocess(observation).reshape(28, 28, 1), axis=2)
    new_state = state[:, :, 1:]
    return new_state

In [4]:
def clip_rewards(reward):
    
    if reward > 0:
        return 1
    elif reward < 0:
        return -1
    else:
        return 0

 Convert the 4 210x160x3 uint8 frames into a single agent state, size 28x28x4

In [5]:
def preprocess(observation):
    
    resized = resize(observation, (28, 28), preserve_range=True)
    return rgb2gray(resized).astype("uint8")
    

In [6]:
def epsilon_greedy(action_value, epsilon, env):
   
    if random.uniform(0, 1) <= epsilon:
        return env.action_space.sample()  # random action
    else:
        return np.argmax(action_value)  # best action

In [7]:
game = 'Pong-v3'
test_episodes = 100
gamma = 0.99
this_env = gym.make(game)  
num_actions = this_env.action_space.n
max_eps_length = 1000000

[2017-04-11 22:39:24,216] Making new env: Pong-v3


# Don' run this cell

In [None]:
score_list = []
frame_count_list = []

start = time.time()
print("\n#------ Running 100 episodes..." )
print("#------- Total time elapsed = %s\n" % str(time.time()-start))
for episode in range(test_episodes):
    print('Running episode:{}'.format(episode+1))
    print("#------- Total time elapsed = %s\n" % str(time.time()-start))
    score  = frame_count = 0
    this_env, s, test_obs = get_init_state(this_env)

    for test_t in range(max_eps_length):
        
        a  = this_env.action_space.sample()
        next_test_obs, test_r, done, info = this_env.step(a)
        

        if test_r > 0:
            score += test_r
        

        s_prime_test = update_state(s, test_obs)

        if done:
            score_list.append(score)
            frame_count_list.append(test_t+1)
            break
        s = s_prime_test
        test_obs = next_test_obs


# Print results
result_list = (np.mean(np.array(score_list)), np.std(np.array(score_list)), 
                 
                 np.mean(np.array(frame_count_list)), np.std(np.array(frame_count_list)))
print("\nMean score = %f, stdev score = %f, mean frame count = %f, stdev frame count = %f" % result_list)

# Save performance and loss results
saved_results = list(result_list)
filename = './results/B1results_'+game+'.npy'
#np.save(filename, saved_results)

# Load Data

In [2]:
def totuple(a):
    try:
        return tuple(totuple(i) for i in a)
    except TypeError:
        return a

In [3]:
games = ['MsPacman-V3','Pong-V3','Boxing-V3']
for i in range(3):
    game =games[i]
    filename = './results/B1results_'+game+'.npy'
    restored_data = np.load(filename)
    print('The results for {} were:'.format(game))
    print("Mean score = %f, stdev score = %f, mean frame count = %f, stdev frame count = %f\n" % totuple(restored_data))

The results for MsPacman-V3 were:
Mean score = 225.300000, stdev score = 80.317557, mean frame count = 664.630000, stdev frame count = 115.091846

The results for Pong-V3 were:
Mean score = 0.660000, stdev score = 0.815107, mean frame count = 1234.830000, stdev frame count = 151.189487

The results for Boxing-V3 were:
Mean score = 30.590000, stdev score = 4.925637, mean frame count = 2376.070000, stdev frame count = 14.027298



In [78]:
game

'Boxing-v3'