@copy right: Ryan Amer

## General Setup

In [181]:
from __future__ import division
from IPython.display import clear_output
# imports to run OpenAI Gym in Jupyter
import gym
import matplotlib.pyplot as plt
from IPython import display
# import to do training
from tpg.tpg_trainer import TpgTrainer
# import to run an agent (always needed)
from tpg.tpg_agent import TpgAgent
#graph
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.graph_objs as go
init_notebook_mode(connected=True)
from random import randint
import math
import numpy as np

#muti threads
import multiprocessing as mp
import time
import tpg.extensions as exts

def plot_graph(nodes,edges):
    x = 0
    y = 0
    arc_R = 0.4
    node_dict = dict()
    for i in nodes:
        if i[0] == 'R':
            node_dict[i] = (x,y)
            x = math.cos(arc_R)*20
            y = math.sin(arc_R)*20
            arc_R += 1
        else:
            node_dict[i] = 0

    Notcomplete = True

    while(Notcomplete):

        for node in nodes:
            if node_dict[node] != 0:
                x,y = node_dict[node]
                arc_A = 0.3
                arc_T = 0
                for n1,n2 in edges:
                    if n1 == node:
                        if n2[0] == 'A':
                            node_dict[n2] = (x+2*math.cos(arc_A),y+2*math.sin(arc_A))
                            arc_A+=1
                        else:
                            node_dict[n2] = (x+5*math.cos(arc_T),y+5*math.sin(arc_T))
                            arc_T+=1

        Notcomplete = False
        for node in node_dict:
            if node_dict[node] == 0:
                Notcomplete = True


    edge_trace = go.Scatter(
        x=[],
        y=[],
        line=dict(width=0.5,color='#888'),
        hoverinfo='none',
        mode='lines')

    for edge in edges:
        x0, y0 = node_dict[edge[0]]
        x1, y1 = node_dict[edge[1]]
        edge_trace['x'] += tuple([x0, x1, None])
        edge_trace['y'] += tuple([y0, y1, None])

    node_trace = go.Scatter(
        x=[],
        y=[],
        text=[],
        mode='markers',
        hoverinfo='text',
        marker=dict(
            showscale=True,
            # colorscale options
            #'Greys' | 'YlGnBu' | 'Greens' | 'YlOrRd' | 'Bluered' | 'RdBu' |
            #'Reds' | 'Blues' | 'Picnic' | 'Rainbow' | 'Portland' | 'Jet' |
            #'Hot' | 'Blackbody' | 'Earth' | 'Electric' | 'Viridis' |
            colorscale='YlGnBu',
            reversescale=True,
            color=[],
            size=[],
            colorbar=dict(
                thickness=15,
                title='Node Connections',
                xanchor='left',
                titleside='right'
            ),
            line=dict(width=2)))

    for node in nodes:
        x, y = node_dict[node]
        node_trace['x'] += tuple([x])
        node_trace['y'] += tuple([y])
        if node[0] == 'R':
            node_trace['marker']['color']+=tuple(['black'])
            node_trace['marker']['size']+=tuple([20])
        elif node[0] == 'T':
            node_trace['marker']['color']+=tuple(['blue'])
            node_trace['marker']['size']+=tuple([15])
        else:
            node_trace['marker']['color']+=tuple(['red'])
            node_trace['marker']['size']+=tuple([10])
        node_trace['text']+=tuple([node])

    fig = go.Figure(data=[edge_trace, node_trace],
                 layout=go.Layout(
                    title='<br>Network graph made with Python',
                    titlefont=dict(size=16),
                    showlegend=False,
                    hovermode='closest',
                    margin=dict(b=20,l=5,r=5,t=40),
                    annotations=[ dict(
                        text="TPG nodes",
                        showarrow=False,
                        xref="paper", yref="paper",
                        x=0.005, y=0.005 ) ],
                    xaxis=dict(showgrid=True, zeroline=False, showticklabels=False),
                    yaxis=dict(showgrid=True, zeroline=False, showticklabels=False)))

    iplot(fig, filename='networkx')


# how to render in Jupyter: 
# https://stackoverflow.com/questions/40195740/how-to-run-openai-gym-render-over-a-server
# https://www.youtube.com/watch?v=O84KgRt6AJI
def show_state(env, step=0, name='', info=''):
    plt.figure()
    plt.clf()
    plt.imshow(env.render(mode='rgb_array'))
    plt.title("%s | Step: %d %s" % (name, step, info))
    plt.axis('off')

    display.clear_output(wait=True)
    display.display(plt.gcf())
    
# transforms the state into what the tpg agent can use.
# From 3D to 1D, taking only red data (from rgb array)
def getState(state):
    state2 = []
    for x in state:
        for y in x:
            state2.append(y[0])
            
    return state2

In [174]:

# run agent in function to work with multiprocessing
def runAgent(agenteqsq):
    agent = agenteqsq[0] # get agent
    eq = agenteqsq[1] # get environment queue
    sq = agenteqsq[2] # get score queue
    
    # check if agent already has score
    if agent.taskDone():
        sq.put((agent.getUid(), agent.getOutcomes()))
        return
        
    env = eq.get() # get an environment
    state = env.reset() # get initial state and prep environment
    score = 0
    for i in range(agenteqsq[3]): # run episodes that last 200 frames
        act = agent.act(getState(state)) # get action from agent

        # feedback from env
        state, reward, isDone, debug = env.step(act)
        score += reward # accumulate reward in score
        if isDone:
            break # end early if losing state
    
    lock.acquire() # may not actually need, mp is weird in python
    agent.reward(score) # must reward agent
    lock.release()
    
    sq.put((agent.getUid(), agent.getOutcomes())) # get outcomes with id
    eq.put(env) # put environment back
    
def gamebegin(generation,gametitle,processes,frames,action, rs, tpopsize, rtpopSize,
            curr_gap, pLearnerD, pLearnerA, pMutation,
            pAIT, mts, mps,
            ppd, ppa, pps,
            ppm, ppit, tgap,
            ar):

    tStart = time.time()

    trainer = TpgTrainer(actions=action, randSeed=rs, teamPopSize=tpopsize, rTeamPopSize=rtpopSize,
                    gap=curr_gap, pLearnerDelete=pLearnerD, pLearnerAdd=pLearnerA, pMutateAction=pMutation,
                    pActionIsTeam=pAIT, maxTeamSize=mts, maxProgramSize=mps,
                    pProgramDelete=ppd, pProgramAdd=ppa, pProgramSwap=pps,
                    pProgramMutate=ppm, popInit=ppit, tourneyGap=tgap,
                    actionRange=ar)
    


    m = mp.Manager()
    envQueue = m.Queue()
    # each process needs its own environment
    for i in range(processes):
        envQueue.put(gym.make(gametitle))

    pool = mp.Pool(processes=processes)

    summaryScores = [] # record score summaries for each gen (min, max, avg)


    for gen in range(generation): # generation loop
        scoreQueue = m.Queue() # hold agents when finish, to actually apply score

        # run generation
        # skipTasks=[] so we get all agents, even if already scored,
        # just to report the obtained score for all agents.
        pool.map(runAgent, 
                     [(agent, envQueue, scoreQueue,frames) 
                      for agent in trainer.getAllAgents(skipTasks=[])])

        scores = [] # convert scores into list
        while not scoreQueue.empty():
            scores.append(scoreQueue.get())

        # apply scores
        trainer.applyScores(scores)
        trainer.evolve(tasks=[]) # go into next gen

        # at end of generation, make summary of scores
        summaryScores.append((trainer.scoreStats['min'], 
                        trainer.scoreStats['max'],
                        trainer.scoreStats['average'])) # min, max, avg
        print(summaryScores[len(summaryScores)-1])
    return (time.time()-tStart,summaryScores,trainer.getAllAgents(skipTasks=[]),trainer)

In [195]:

"""
test 1
- popsize = 50
- frames = 50
"""



lock = mp.Lock() #create a globle locker

generation = 20 # number of generations
gametitle = 'Assault-v0'
env = gym.make(gametitle)

processes = 4 # how many to run concurrently (4 is best for my local desktop)
frames = 50 #total frames each play
tpopsize = 50 #teamPopSize
rtpopSize = 0 #rTeamPopSize
action=range(env.action_space.n) #action space
rs = 0 #randseed
curr_gap = 0.5 #gap
tgap = 0.5 #tourneyGap
pLearnerD = 0.7 #learner delete
pLearnerA = 0.7 #learner add
pMutation = 0.2 #player mutation rate
pAIT = 0.5 #pActionIsTeam
mts = 5 #maxTeamSize
mps = 96 #maxProgramSize
ppd = 0.5 #pProgramDelete
ppa = 0.5 #pProgramAdd
pps = 1.0 #pProgramSwap
ppm = 1.0 #pProgramMutate
ppit = None #popInit
ar = (0.0, 1.0, 0.05) #actionRange

print("test conditions: ",
     "\ngeneration:",generation,
     "\ngametitle:",gametitle,
     "\nprocesses:",processes,
     "\nframes:",frames,
     "\ntpopsize:",tpopsize,
     "\nrtpopsize:",rtpopSize,
     "\naction:",action,
     "\nrandomseed:",rs,
     "\ncurr_gap:",curr_gap,
     "\ntgap:",tgap,
     "\npLearnerD",pLearnerD,
)

data = []
    
summary = gamebegin(generation,gametitle,processes,frames,action, rs, tpopsize, rtpopSize,
            curr_gap, pLearnerD, pLearnerA, pMutation,
            pAIT, mts, mps,
            ppd, ppa, pps,
            ppm, ppit, tgap,
            ar)
#legend
data.append(go.Scatter(x=[i+1 for i in range(len(summary[1]))],y=[i[0] for i in summary[1]],name='min'))
data.append(go.Scatter(x=[i+1 for i in range(len(summary[1]))],y=[i[1] for i in summary[1]],name='max'))
data.append(go.Scatter(x=[i+1 for i in range(len(summary[1]))],y=[i[2] for i in summary[1]],name='average'))

fig = go.Figure(data=data,layout=go.Layout(showlegend=True,legend={'x':0.2,'y':0.6},title='test1-popsize:50,frames:50'))
iplot(fig)

#time compare
print('Time cost',summary[0])

test conditions:  
generation: 20 
gametitle: Assault-v0 
processes: 4 
frames: 50 
tpopsize: 50 
rtpopsize: 0 
action: range(0, 7) 
randomseed: 0 
curr_gap: 0.5 
tgap: 0.5 
pLearnerD 0.7
(0.0, 42.0, 3.36)
(0.0, 42.0, 4.914893617021277)
(0.0, 42.0, 6.72)
(0.0, 42.0, 10.266666666666667)
(0.0, 42.0, 14.933333333333334)
(0.0, 42.0, 19.133333333333333)
(0.0, 42.0, 17.425531914893618)
(0.0, 42.0, 20.125)
(0.0, 42.0, 17.23076923076923)
(0.0, 42.0, 21.0)
(0.0, 42.0, 21.466666666666665)
(0.0, 42.0, 20.543478260869566)
(0.0, 42.0, 23.441860465116278)
(0.0, 42.0, 20.44736842105263)
(0.0, 42.0, 22.0)
(0.0, 42.0, 25.0)
(0.0, 42.0, 23.5)
(0.0, 42.0, 27.825)
(0.0, 42.0, 25.295454545454547)
(0.0, 42.0, 25.88372093023256)


Time cost 197.27710056304932


In [196]:
trainer = summary[3]
nodes, edges = exts.getFullGraph(trainer)
plot_graph(nodes,edges)

In [3]:
# run agent in function to work with multiprocessing
def runAgent(agenteqsq):
    agent = agenteqsq[0] # get agent
    eq = agenteqsq[1] # get environment queue
    sq = agenteqsq[2] # get score queue
    
    # check if agent already has score
    if agent.taskDone():
        sq.put((agent.getUid(), agent.getOutcomes()))
        return
        
    env = eq.get() # get an environment
    state = env.reset() # get initial state and prep environment
    score = 0
    for i in range(agenteqsq[3]): # run episodes that last 200 frames
        act = agent.act(getState(state)) # get action from agent

        # feedback from env
        state, reward, isDone, debug = env.step(act)
        score += reward # accumulate reward in score
        if isDone:
            break # end early if losing state
            
    
    lock.acquire() # may not actually need, mp is weird in python
    agent.reward(score) # must reward agent
    lock.release()
    
    sq.put((agent.getUid(), agent.getOutcomes())) # get outcomes with id
    eq.put(env) # put environment back
    
def gamebegin(generation,gametitle,processes,frames,action, rs, tpopsize, rtpopSize,
            curr_gap, pLearnerD, pLearnerA, pMutation,
            pAIT, mts, mps,
            ppd, ppa, pps,
            ppm, ppit, tgap,
            ar):

    tStart = time.time()

    trainer = TpgTrainer(actions=action, randSeed=rs, teamPopSize=tpopsize, rTeamPopSize=rtpopSize,
                    gap=curr_gap, pLearnerDelete=pLearnerD, pLearnerAdd=pLearnerA, pMutateAction=pMutation,
                    pActionIsTeam=pAIT, maxTeamSize=mts, maxProgramSize=mps,
                    pProgramDelete=ppd, pProgramAdd=ppa, pProgramSwap=pps,
                    pProgramMutate=ppm, popInit=ppit, tourneyGap=tgap,
                    actionRange=ar)


    m = mp.Manager()
    envQueue = m.Queue()
    # each process needs its own environment
    for i in range(processes):
        envQueue.put(gym.make(gametitle))

    pool = mp.Pool(processes=processes)

    summaryScores = [] # record score summaries for each gen (min, max, avg)


    for gen in range(generation): # generation loop
        scoreQueue = m.Queue() # hold agents when finish, to actually apply score

        # run generation
        # skipTasks=[] so we get all agents, even if already scored,
        # just to report the obtained score for all agents.
        pool.map(runAgent, 
                     [(agent, envQueue, scoreQueue,frames) 
                      for agent in trainer.getAllAgents(skipTasks=[])])

        scores = [] # convert scores into list
        while not scoreQueue.empty():
            scores.append(scoreQueue.get())

        # apply scores
        trainer.applyScores(scores)
        trainer.evolve(tasks=[]) # go into next gen

        # at end of generation, make summary of scores
        summaryScores.append((trainer.scoreStats['min'], 
                        trainer.scoreStats['max'],
                        trainer.scoreStats['average'])) # min, max, avg
        print(summaryScores[len(summaryScores)-1])
    return (time.time()-tStart,summaryScores,trainer.getAllAgents(skipTasks=[]))

"""
test 2
- frames, from 50 to 300
"""



lock = mp.Lock() #create a globle locker

generation = 20 # number of generations
gametitle = 'Assault-v0'
env = gym.make(gametitle)

processes = 6 # how many to run concurrently (4 is best for my local desktop)
frames = 50 #total frames each play
tpopsize = 50 #teamPopSize
rtpopSize = 0 #rTeamPopSize
action=range(env.action_space.n) #action space
rs = 0 #randseed
curr_gap = 0.5 #gap
tgap = 0.5 #tourneyGap
pLearnerD = 0.7 #learner delete
pLearnerA = 0.7 #learner add
pMutation = 0.2 #player mutation rate
pAIT = 0.5 #pActionIsTeam
mts = 5 #maxTeamSize
mps = 96 #maxProgramSize
ppd = 0.5 #pProgramDelete
ppa = 0.5 #pProgramAdd
pps = 1.0 #pProgramSwap
ppm = 1.0 #pProgramMutate
ppit = None #popInit
ar = (0.0, 1.0, 0.05) #actionRange


print("test conditions: ",
     "\ngeneration:",generation,
     "\ngametitle:",gametitle,
     "\nprocesses:",processes,
     "\nframes:",frames,
     "\ntpopsize:",tpopsize,
     "\nrtpopsize:",rtpopSize,
     "\naction:",action,
     "\nrandomseed:",rs,
     "\ncurr_gap:",curr_gap,
     "\ntgap:",tgap,
     "\npLearnerD",pLearnerD,
)

min_plot = []
max_plot = []
average_plot = []
timeintervals = []
# for loops in order to simulate data
for i in range(0,5):
    frames = (1+i)*50
    print("Frames:",frames)
    
    summary = gamebegin(generation,gametitle,processes,frames,action, rs, tpopsize, rtpopSize,
                curr_gap, pLearnerD, pLearnerA, pMutation,
                pAIT, mts, mps,
                ppd, ppa, pps,
                ppm, ppit, tgap,
                ar)
    #legend
    min_plot.append(go.Scatter(x=[i+1 for i in range(len(summary[1]))],
                               y=[i[0] for i in summary[1]],
                               name='frames: '+str(frames)))
    max_plot.append(go.Scatter(x=[i+1 for i in range(len(summary[1]))],
                               y=[i[1] for i in summary[1]],
                               name='frames: '+str(frames)))
    average_plot.append(go.Scatter(x=[i+1 for i in range(len(summary[1]))],
                                   y=[i[2] for i in summary[1]],
                                   name='frames: '+str(frames)))
    timeintervals.append(summary[0])
#figures
min_fig = go.Figure(data=min_plot,
                    layout=go.Layout(showlegend=True,
                                     legend={'x':0.2,'y':0.6},
                                     title='Minimum'))
max_fig = go.Figure(data=max_plot,
                    layout=go.Layout(showlegend=True,
                                     legend={'x':0.2,'y':0.6},
                                     title='Maximum'))
average_fig = go.Figure(data=average_plot,
                        layout=go.Layout(showlegend=True,
                                         legend={'x':0.2,'y':0.6},
                                         title='Average'))
iplot(min_fig)
iplot(max_fig)
iplot(average_fig)
#time figure
time_fig = go.Figure(data=[go.Scatter(x=[i+1 for i in range(len(timeintervals))],
                                y=timeintervals,name="time")],
                     layout=go.Layout(showlegend=True,
                                      legend={'x':0.2,'y':0.6},title='Time'))
iplot(time_fig)

test conditions:  
generation: 20 
gametitle: Assault-v0 
processes: 6 
frames: 50 
tpopsize: 50 
rtpopsize: 0 
action: range(0, 7) 
randomseed: 0 
curr_gap: 0.5 
tgap: 0.5 
pLearnerD 0.7
Frames: 50



overflow encountered in double_scalars



(0.0, 42.0, 4.2)
(0.0, 42.0, 4.565217391304348)
(0.0, 42.0, 5.7272727272727275)
(0.0, 42.0, 11.2)
(0.0, 42.0, 17.181818181818183)
(0.0, 42.0, 19.09090909090909)
(0.0, 42.0, 19.25)
(0.0, 42.0, 18.2)
(0.0, 42.0, 21.976744186046513)
(0.0, 42.0, 20.522727272727273)
(0.0, 42.0, 20.023255813953487)
(0.0, 42.0, 24.34090909090909)
(0.0, 42.0, 24.906976744186046)
(0.0, 42.0, 25.88372093023256)
(0.0, 42.0, 26.25)
(0.0, 42.0, 28.0)
(0.0, 42.0, 29.925)
(0.0, 42.0, 30.08108108108108)
(0.0, 42.0, 27.81081081081081)
(0.0, 42.0, 26.10810810810811)
Frames: 100
(0.0, 63.0, 6.72)
(0.0, 63.0, 10.733333333333333)



overflow encountered in double_scalars



(0.0, 63.0, 19.133333333333333)
(0.0, 63.0, 30.068181818181817)
(0.0, 105.0, 40.09090909090909)
(0.0, 105.0, 50.925)
(0.0, 105.0, 55.829268292682926)
(0.0, 105.0, 55.674418604651166)
(0.0, 105.0, 54.80487804878049)
(0.0, 105.0, 58.275)
(0.0, 105.0, 55.46153846153846)
(0.0, 105.0, 62.475)
(0.0, 105.0, 63.0)



overflow encountered in double_scalars


overflow encountered in double_scalars


overflow encountered in double_scalars



(0.0, 105.0, 64.61538461538461)



overflow encountered in double_scalars


overflow encountered in double_scalars



(0.0, 105.0, 59.911764705882355)
(0.0, 105.0, 67.08333333333333)
(0.0, 105.0, 69.7741935483871)
(0.0, 105.0, 70.63636363636364)
(0.0, 105.0, 74.66666666666667)
(0.0, 105.0, 73.2)
Frames: 150
(0.0, 126.0, 9.24)
(0.0, 126.0, 25.5)
(0.0, 147.0, 45.266666666666666)
(0.0, 147.0, 69.83720930232558)
(0.0, 147.0, 78.5)
(0.0, 147.0, 91.73684210526316)
(0.0, 147.0, 87.23076923076923)
(0.0, 147.0, 95.78048780487805)
(0.0, 147.0, 95.60526315789474)
(0.0, 147.0, 107.15384615384616)
(0.0, 147.0, 107.76315789473684)
(0.0, 147.0, 98.75675675675676)
(0.0, 147.0, 106.61538461538461)
(0.0, 147.0, 103.92307692307692)



overflow encountered in double_scalars



(0.0, 147.0, 110.92307692307692)
(0.0, 147.0, 98.58333333333333)
(0.0, 147.0, 102.66666666666667)
(0.0, 147.0, 94.8)
(0.0, 147.0, 86.21052631578948)
(0.0, 147.0, 96.71052631578948)
Frames: 200
(0.0, 168.0, 21.84)
(0.0, 189.0, 45.857142857142854)
(0.0, 189.0, 77.0)
(0.0, 189.0, 106.43181818181819)
(0.0, 189.0, 112.1842105263158)
(0.0, 189.0, 135.39473684210526)
(0.0, 189.0, 136.0)
(0.0, 189.0, 148.13513513513513)
(0.0, 189.0, 139.46153846153845)
(0.0, 189.0, 137.35135135135135)
(0.0, 189.0, 140.53846153846155)
(0.0, 189.0, 148.6153846153846)
(0.0, 189.0, 144.30769230769232)
(0.0, 189.0, 141.3658536585366)
(0.0, 189.0, 148.6153846153846)
(0.0, 189.0, 151.97368421052633)
(0.0, 189.0, 151.30769230769232)
(84.0, 189.0, 158.55)
(0.0, 189.0, 142.69230769230768)
(0.0, 189.0, 158.0)
Frames: 250
(0.0, 189.0, 14.28)
(0.0, 189.0, 24.5)
(0.0, 210.0, 35.297872340425535)
(0.0, 210.0, 58.0)
(0.0, 210.0, 109.60975609756098)
(0.0, 210.0, 139.3170731707317)
(0.0, 210.0, 145.2972972972973)
(0.0, 210.0, 15

TypeError: 'module' object is not callable