@copy right: Ryan Amer

## General Setup

In [1]:
from __future__ import division
from IPython.display import clear_output
# imports to run OpenAI Gym in Jupyter
import gym
import matplotlib.pyplot as plt
from IPython import display
# import to do training
from tpg.tpg_trainer import TpgTrainer
# import to run an agent (always needed)
from tpg.tpg_agent import TpgAgent

from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.graph_objs as go
import numpy as np
import math

init_notebook_mode(connected=True)
import multiprocessing as mp
import time


# how to render in Jupyter: 
# https://stackoverflow.com/questions/40195740/how-to-run-openai-gym-render-over-a-server
# https://www.youtube.com/watch?v=O84KgRt6AJI
def show_state(env, step=0, name='', info=''):
    plt.figure()
    plt.clf()
    plt.imshow(env.render(mode='rgb_array'))
    plt.title("%s | Step: %d %s" % (name, step, info))
    plt.axis('off')

    display.clear_output(wait=True)
    display.display(plt.gcf())
    
# transforms the state into what the tpg agent can use.
# From 3D to 1D, taking only red data (from rgb array)
def getState(state):
    state2 = []
    for x in state:
        for y in x:
            state2.append(y[0])
            
    return state2

In [2]:
# run agent in function to work with multiprocessing
def runAgent(agenteqsq):
    agent = agenteqsq[0] # get agent
    eq = agenteqsq[1] # get environment queue
    sq = agenteqsq[2] # get score queue
    
    # check if agent already has score
    if agent.taskDone():
        sq.put((agent.getUid(), agent.getOutcomes()))
        return
        
    env = eq.get() # get an environment
    state = env.reset() # get initial state and prep environment
    score = 0
    for i in range(agenteqsq[3]): # run episodes that last 200 frames
        act = agent.act(getState(state)) # get action from agent

        # feedback from env
        state, reward, isDone, debug = env.step(act)
        score += reward # accumulate reward in score
        if isDone:
            break # end early if losing state
            
    lock.acquire() # may not actually need, mp is weird in python
    agent.reward(score) # must reward agent
    lock.release()
    
    sq.put((agent.getUid(), agent.getOutcomes())) # get outcomes with id
    eq.put(env) # put environment back
    
def gamebegin(generation,gametitle,processes,frames,action, rs, tpopsize, rtpopSize,
            curr_gap, pLearnerD, pLearnerA, pMutation,
            pAIT, mts, mps,
            ppd, ppa, pps,
            ppm, ppit, tgap,
            ar):

    tStart = time.time()

    trainer = TpgTrainer(actions=action, randSeed=rs, teamPopSize=tpopsize, rTeamPopSize=rtpopSize,
                    gap=curr_gap, pLearnerDelete=pLearnerD, pLearnerAdd=pLearnerA, pMutateAction=pMutation,
                    pActionIsTeam=pAIT, maxTeamSize=mts, maxProgramSize=mps,
                    pProgramDelete=ppd, pProgramAdd=ppa, pProgramSwap=pps,
                    pProgramMutate=ppm, popInit=ppit, tourneyGap=tgap,
                    actionRange=ar)


    m = mp.Manager()
    envQueue = m.Queue()
    # each process needs its own environment
    for i in range(processes):
        envQueue.put(gym.make(gametitle))

    pool = mp.Pool(processes=processes)

    summaryScores = [] # record score summaries for each gen (min, max, avg)


    for gen in range(generation): # generation loop
        scoreQueue = m.Queue() # hold agents when finish, to actually apply score

        # run generation
        # skipTasks=[] so we get all agents, even if already scored,
        # just to report the obtained score for all agents.
        pool.map(runAgent, 
                     [(agent, envQueue, scoreQueue,frames) 
                      for agent in trainer.getAllAgents(skipTasks=[])])

        scores = [] # convert scores into list
        while not scoreQueue.empty():
            scores.append(scoreQueue.get())

        # apply scores
        trainer.applyScores(scores)
        trainer.evolve(tasks=[]) # go into next gen

        # at end of generation, make summary of scores
        summaryScores.append((trainer.scoreStats['min'], 
                        trainer.scoreStats['max'],
                        trainer.scoreStats['average'])) # min, max, avg
        print(summaryScores[len(summaryScores)-1])
    return (time.time()-tStart,summaryScores)

In [3]:

"""
test 1
- popsize = 50
- frames = 50
"""



lock = mp.Lock() #create a globle locker

generation = 20 # number of generations
gametitle = 'Assault-v0'
env = gym.make(gametitle)

processes = 6 # how many to run concurrently (4 is best for my local desktop)
frames = 50 #total frames each play
tpopsize = 50 #teamPopSize
rtpopSize = 0 #rTeamPopSize
action=range(env.action_space.n) #action space
rs = 0 #randseed
curr_gap = 0.5 #gap
tgap = 0.5 #tourneyGap
pLearnerD = 0.7 #learner delete
pLearnerA = 0.7 #learner add
pMutation = 0.2 #player mutation rate
pAIT = 0.5 #pActionIsTeam
mts = 5 #maxTeamSize
mps = 96 #maxProgramSize
ppd = 0.5 #pProgramDelete
ppa = 0.5 #pProgramAdd
pps = 1.0 #pProgramSwap
ppm = 1.0 #pProgramMutate
ppit = None #popInit
ar = (0.0, 1.0, 0.05) #actionRange

print("test conditions: ",
     "\ngeneration:",generation,
     "\ngametitle:",gametitle,
     "\nprocesses:",processes,
     "\nframes:",frames,
     "\ntpopsize:",tpopsize,
     "\nrtpopsize:",rtpopSize,
     "\naction:",action,
     "\nrandomseed:",rs,
     "\ncurr_gap:",curr_gap,
     "\ntgap:",tgap,
     "\npLearnerD",pLearnerD,
)

data = []
    
summary = gamebegin(generation,gametitle,processes,frames,action, rs, tpopsize, rtpopSize,
            curr_gap, pLearnerD, pLearnerA, pMutation,
            pAIT, mts, mps,
            ppd, ppa, pps,
            ppm, ppit, tgap,
            ar)
#legend
data.append(go.Scatter(x=[i+1 for i in range(len(summary[1]))],y=[i[0] for i in summary[1]],name='min'))
data.append(go.Scatter(x=[i+1 for i in range(len(summary[1]))],y=[i[1] for i in summary[1]],name='max'))
data.append(go.Scatter(x=[i+1 for i in range(len(summary[1]))],y=[i[2] for i in summary[1]],name='average'))

fig = go.Figure(data=data,layout=go.Layout(showlegend=True,legend={'x':0.2,'y':0.6},title='test1-popsize:50,frames:50'))
iplot(fig)

#time compare
print('Time cost',summary[0])

test conditions:  
generation: 20 
gametitle: Assault-v0 
processes: 6 
frames: 50 
tpopsize: 50 
rtpopsize: 0 
action: range(0, 7) 
randomseed: 0 
curr_gap: 0.5 
tgap: 0.5 
pLearnerD 0.7
(0.0, 21.0, 4.62)
(0.0, 21.0, 9.068181818181818)
(0.0, 21.0, 12.25)
(0.0, 42.0, 14.0)
(0.0, 42.0, 16.8)
(0.0, 42.0, 18.4390243902439)
(0.0, 42.0, 17.85)
(0.0, 42.0, 20.44736842105263)
(0.0, 42.0, 17.76923076923077)
(0.0, 42.0, 18.16216216216216)
(0.0, 42.0, 21.5)
(0.0, 42.0, 22.5)
(0.0, 42.0, 21.0)
(0.0, 42.0, 20.045454545454547)
(0.0, 42.0, 20.5)
(0.0, 42.0, 24.906976744186046)
(0.0, 42.0, 24.0)
(0.0, 42.0, 26.25)
(0.0, 42.0, 23.692307692307693)
(0.0, 42.0, 25.725)


Time cost 156.0337097644806


In [4]:

"""
test 2
- frames, from 100 to 1000
"""



lock = mp.Lock() #create a globle locker

generation = 20 # number of generations
gametitle = 'Assault-v0'
env = gym.make(gametitle)

processes = 6 # how many to run concurrently (4 is best for my local desktop)
frames = 50 #total frames each play
tpopsize = 50 #teamPopSize
rtpopSize = 0 #rTeamPopSize
action=range(env.action_space.n) #action space
rs = 0 #randseed
curr_gap = 0.5 #gap
tgap = 0.5 #tourneyGap
pLearnerD = 0.7 #learner delete
pLearnerA = 0.7 #learner add
pMutation = 0.2 #player mutation rate
pAIT = 0.5 #pActionIsTeam
mts = 5 #maxTeamSize
mps = 96 #maxProgramSize
ppd = 0.5 #pProgramDelete
ppa = 0.5 #pProgramAdd
pps = 1.0 #pProgramSwap
ppm = 1.0 #pProgramMutate
ppit = None #popInit
ar = (0.0, 1.0, 0.05) #actionRange


print("test conditions: ",
     "\ngeneration:",generation,
     "\ngametitle:",gametitle,
     "\nprocesses:",processes,
     "\nframes:",frames,
     "\ntpopsize:",tpopsize,
     "\nrtpopsize:",rtpopSize,
     "\naction:",action,
     "\nrandomseed:",rs,
     "\ncurr_gap:",curr_gap,
     "\ntgap:",tgap,
     "\npLearnerD",pLearnerD,
)

min_plot = []
max_plot = []
average_plot = []
timeintervals = []
# for loops in order to simulate data
for i in range(0,2):
    frames = (1+i)*50
    print("Frames:",frames)
    
    summary = gamebegin(generation,gametitle,processes,frames,action, rs, tpopsize, rtpopSize,
                curr_gap, pLearnerD, pLearnerA, pMutation,
                pAIT, mts, mps,
                ppd, ppa, pps,
                ppm, ppit, tgap,
                ar)
    #legend
    min_plot.append(go.Scatter(x=[i+1 for i in range(len(summary[1]))],
                               y=[i[0] for i in summary[1]],
                               name='frames: '+str(frames)))
    max_plot.append(go.Scatter(x=[i+1 for i in range(len(summary[1]))],
                               y=[i[1] for i in summary[1]],
                               name='frames: '+str(frames)))
    average_plot.append(go.Scatter(x=[i+1 for i in range(len(summary[1]))],
                                   y=[i[2] for i in summary[1]],
                                   name='frames: '+str(frames)))
    timeintervals.append(summary[0])
#figures
min_fig = go.Figure(data=min_plot,
                    layout=go.Layout(showlegend=True,
                                     legend={'x':0.2,'y':0.6},
                                     title='Minimum'))
max_fig = go.Figure(data=max_plot,
                    layout=go.Layout(showlegend=True,
                                     legend={'x':0.2,'y':0.6},
                                     title='Maximum'))
average_fig = go.Figure(data=average_plot,
                        layout=go.Layout(showlegend=True,
                                         legend={'x':0.2,'y':0.6},
                                         title='Average'))
iplot(min_fig)
iplot(max_fig)
iplot(average_fig)
#time figure
time_fig = go.Figure(data=go.scatter(x=[i+1 for i in range(len(timeintervals))],
                                y=timeintervals,name="time"),
                     layout=go.Layout(showlegend=True,
                                      legend={'x':0.2,'y':0.6},title='Time'))
iplot(time_fig)

test conditions:  
generation: 20 
gametitle: Assault-v0 
processes: 6 
frames: 50 
tpopsize: 50 
rtpopsize: 0 
action: range(0, 7) 
randomseed: 0 
curr_gap: 0.5 
tgap: 0.5 
pLearnerD 0.7
Frames: 50
(0.0, 21.0, 3.36)
(0.0, 21.0, 4.883720930232558)
(0.0, 21.0, 7.682926829268292)
(0.0, 42.0, 12.6)
(0.0, 21.0, 16.916666666666668)
(0.0, 42.0, 17.76923076923077)
(0.0, 42.0, 19.53488372093023)
(0.0, 42.0, 16.153846153846153)
(0.0, 42.0, 20.475)
(0.0, 42.0, 18.72972972972973)
(0.0, 42.0, 21.525)
(0.0, 42.0, 20.48780487804878)
(0.0, 42.0, 17.23076923076923)
(0.0, 42.0, 19.864864864864863)
(0.0, 42.0, 23.0)
(0.0, 42.0, 22.536585365853657)
(0.0, 42.0, 22.575)
(0.0, 42.0, 23.5)
(0.0, 42.0, 25.083333333333332)
(0.0, 42.0, 24.5)
Frames: 100
(0.0, 105.0, 12.6)
(0.0, 105.0, 25.5)
(0.0, 105.0, 43.10526315789474)
(0.0, 105.0, 58.15384615384615)
(0.0, 105.0, 55.829268292682926)
(0.0, 105.0, 61.38461538461539)
(0.0, 105.0, 56.75675675675676)
(0.0, 105.0, 65.15384615384616)
(0.0, 105.0, 60.375)
(0.0, 105.

TypeError: 'module' object is not callable