In [3]:
import gym
import ale_py as a
import time
import numpy as np
from AtariNet import *
from helper_functions import *

from PIL import Image, ImageSequence
import cv2
import os

Uncomment and run the code below to select a game

In [2]:
# [print(i,j) for i,j in enumerate(gym.envs.registry)]

# Genetic Algorithm

If training with a Genetic algorithm, select the parameters below, if not, continue to policy gradients

In [3]:
#Things to change for each run

#select a game from the above and set game as a string of the game name
game = "DemonAttack-v5"

#set the number of generations to train
num_gens = 10

#The current number of generations trained
cur_gen = 0

In [6]:
#environment setup
ale = a.ALEInterface()
env = gym.make(f'ALE/{game}')
env = gym.wrappers.GrayScaleObservation(env)

#hyperparameters
numactions = env.action_space.n                  #The number of possible actions a player can make
inshape = [1]+list(env.observation_space.shape)  #The shape of the observation/screen of the game
mut_power = 0.002                                #Mutation power in genetic algorithm
pop_size = 500                                   #The size of the genetic algorithm population
arch_size = 10                                   #The size of the archive (those that stay without mutation)
test_size = 10                                   #The size of the test set when evaluating fitness
numsurvivors = 50                                #The number of survivors from each population
poolsizes = [2,2]                                #The pooling in each dimension
numconvlayers = 3                                #Number of convolutional layers in the network
maxt = 1000                                      #max number of frames
almightyint = 1234                               #used for seeding

#for convenience when calling functions
params = [game,          #0
          cur_gen,       #1
          numactions,    #2
          inshape,       #3
          mut_power,     #4
          pop_size,      #5
          arch_size,     #6
          test_size,     #7
          numsurvivors,  #8
          poolsizes,     #9
          numconvlayers, #10
          maxt,          #11
          almightyint]   #12


#Create population, and mutations
if cur_gen != 0:
    population,mutations,avg_rewards = load_gen(game,cur_gen)
    select_and_mutate(population,mutations,avg_rewards,pop_size,trunc)

else:
    population = np.random.randint(0,almightyint,size = pop_size+arch_size)
    mutations = [[] for i in range(pop_size+arch_size)]
    avg_rewards = np.zeros(arch_size+pop_size)

In [None]:
#Train the agents
train_GA(num_gens,params,population,mutations,env)

# Policy Gradients

We'll use an actor-critic method to train an agent to play the game

In [30]:
#Things to change for each run

#select a game from the above and set game as a string of the game name
game = "DemonAttack-v5"

#Current number of batches trained
cur_batches = 34700

In [20]:
#environment setup
ale = a.ALEInterface()
env = gym.make(f'ALE/{game}')
env = gym.wrappers.GrayScaleObservation(env)

#hyperparameters
gamma = 0.99                    #decay of memory of previous runs
atarilr = 0.001                 #learning rate for actor
criticlr = 0.001                #learning rate for critic
num_batches = 5001              #number of batches for training
batch_size = 1                  #number of runs in each batch
numactions = env.action_space.n #number of actions an agent can take
maxt = 3000                     #max number of frames
inshape = list(env.observation_space.shape) #[210,160]
midsize = 100                   #size of the middle fully connected layer

params = [game,          #0
          cur_batches,   #1
          num_batches,   #2
          batch_size,    #3
          maxt,          #4
          gamma,         #5
          midsize,       #6 
          numactions]    #7

#initialize models
actor = AtariNetFC(inshape = inshape, midsize = midsize, outsize = numactions)
critic = AtariNetFC(inshape = inshape, midsize = midsize, outsize = 1)
actorOptimizer = torch.optim.Adam(actor.parameters(), lr = atarilr, weight_decay = 0)
criticOptimizer = torch.optim.Adam(critic.parameters(), lr = criticlr, weight_decay = 0)

#keep track of rewards, estimates, and batch lengths
sum_rewards = []
sum_rewards_est = []
batch_lens = []

In [31]:
#load previous runs
if cur_batches != 0:
    measures, actorOptimizer = actor.load("PGsaves/{}_m{}_{}.tar".format(game,midsize,cur_batches),actorOptimizer)
    measures, criticOptimizer = critic.load("PGsaves/Critic_{}_m{}_{}.tar".format(game,midsize,cur_batches),criticOptimizer)

    sum_rewards = measures[0]
    sum_rewards_est = measures[1]
    episode_lens = measures[2]
    cur_batches = len(sum_rewards)

In [22]:
#Train the agent
train_PG(params,
         sum_rewards,
         sum_rewards_est,
         batch_lens,
         env,actor,
         actorOptimizer,
         critic,
         criticOptimizer)

100%|████████████████████████████████████████████████| 5001/5001 [12:48<00:00,  6.51it/s]


In [32]:
#increase maxt
params[4] = 10000

#make gif
make_gameplay_gif(params,actor)

Episode finished after 1001 timesteps


Run this line to play the video of the gameplay

Note: jupyter saves checkpoints and often this will cause the gif to not update when rerunning this.
To account for this, change the number after the question mark in this markdown and it will load the
newest version

<img src="./gameplay.gif?5" width="250" height="250" />