In [1]:
import gym
import numpy as np
import torch
import time

from gym.wrappers import Monitor

import torch
from torch.distributions import Categorical
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import math
import copy
import os

from itertools import count

import matplotlib.pyplot as plt
%matplotlib notebook

# NEURO-EVO

In [2]:
class CartPoleAgent(nn.Module):
    def __init__(self):
        super(CartPoleAgent, self).__init__()
        self.affine1 = nn.Linear(4, 124)
        # self.dropout = nn.Dropout(p=0.6)
        self.affine2 = nn.Linear(124, 2)

        self.saved_log_probs = []
        self.rewards = []

    def forward(self, x):
        x = self.affine1(x)
        # x = self.dropout(x)
        x = F.relu(x)
        action_scores = self.affine2(x)
        return F.softmax(action_scores, dim=1)

In [3]:
def get_initialized_agents(folderName):
    agents = []
    for path in os.listdir(folderName):
        if path[-4:] == '.pth':
            try:
                model = CartPoleAgent()
                model.load_state_dict(torch.load(folderName + '/' + path))
                agents.append(model)
            except Exception as e:
                print(e)
    return agents

    get_initialized_agents(folderName)

In [4]:
def run_agents(agents):
    game_actions = 2
    reward_agents = []
    env = gym.make("CartPole-v1")
    env.spec.reward_threshold = 500
    
    for agent in agents:
        agent.eval()
    
        observation = env.reset()
        
        r, s = 0, 0
        for _ in range(250):
            
            inp = torch.tensor(observation).type('torch.FloatTensor').view(1,-1)
            output_probabilities = agent(inp).detach().numpy()[0]
            action = np.random.choice(range(game_actions), 1, p=output_probabilities).item()
            new_observation, reward, done, info = env.step(action)
            r = r + reward
            
            s = s + 1
            observation = new_observation

            if done:
                break

        reward_agents.append(r)        
        # reward_agents.append(s)
    
    return reward_agents

In [5]:
def return_average_score(agent, runs):
    score = 0.
    for i in range(runs):
        score += run_agents([agent])[0]
    return score / runs

In [6]:
def run_agents_n_times(agents, runs):
    return [return_average_score(agent, runs) for agent in agents]

In [7]:
def mutate(agent):
    child_agent = copy.deepcopy(agent)
    mutation_power = 0.02 # Set from https://arxiv.org/pdf/1712.06567.pdf
    for param in child_agent.parameters():
        if len(param.shape) == 4: # Weights of Conv2D
            for i0 in range(param.shape[0]):
                for i1 in range(param.shape[1]):
                    for i2 in range(param.shape[2]):
                        for i3 in range(param.shape[3]):
                            param[i0][i1][i2][i3] += mutation_power * np.random.randn()
        
        elif len(param.shape) == 2: # Weights of linear layer
            for i0 in range(param.shape[0]):
                for i1 in range(param.shape[1]):
                    param[i0][i1] += mutation_power * np.random.randn()
        
        elif len(param.shape) == 1: # Biases of linear layer or conv layer
            for i0 in range(param.shape[0]):
                
                param[i0] += mutation_power * np.random.randn()

    return child_agent

In [8]:
def return_children(agents, sorted_parent_indexes, elite_index):
    children_agents = []
    
    for i in range(len(agents)-1):
        selected_agent_index = sorted_parent_indexes[np.random.randint(len(sorted_parent_indexes))]
        children_agents.append(mutate(agents[selected_agent_index]))

    elite_child = add_elite(agents, sorted_parent_indexes, elite_index)
    children_agents.append(elite_child)
    elite_index = len(children_agents) - 1
    
    return children_agents, elite_index

In [9]:
def add_elite(agents, sorted_parent_indexes, elite_index=None, only_consider_top_n=10):
    candidate_elite_index = sorted_parent_indexes[:only_consider_top_n]
    
    if elite_index is not None:
        candidate_elite_index = np.append(candidate_elite_index,[elite_index])
        
    top_score = None
    top_elite_index = None
    
    for i in candidate_elite_index:
        score = return_average_score(agents[i],runs=5)
        print("Score for elite i ", i, " is ", score)
        
        if(top_score is None):
            top_score = score
            top_elite_index = i
        elif(score > top_score):
            top_score = score
            top_elite_index = i
            
    print("Elite selected with index ",top_elite_index, " and score", top_score)
    
    child_agent = copy.deepcopy(agents[top_elite_index])
    return child_agent
    

In [10]:
def play_agent(agent):
    try:
        env = gym.make("CartPole-v1")
        
        env_record = Monitor(env, './video', force=True)
        observation = env_record.reset()
        last_observation = observation
        
        r = 0
        for _ in range(250):
            env_record.render()
            inp = torch.tensor(observation).type('torch.FloatTensor').view(1,-1)
            output_probabilities = agent(inp).detach().numpy()[0]
            action = np.random.choice(range(game_actions), 1, p=output_probabilities).item()
            new_observation, reward, done, info = env_record.step(action)
            r=r+reward
            observation = new_observation

            if done:
                break

        env_record.close()
        print("Rewards: ", r)

    except Exception as e:
        env_record.close()
        print(e.__doc__)
        print(e.message)        

In [11]:
def trainEVOModel(folderName, fileName):
    game_actions = 2
    torch.set_grad_enabled(False)

    agents = get_initialized_agents('./' + folderName)

    top_limit = 5 # Number of top agents to consider as parents
    generations = 10

    elite_index = None
    for generation in range(generations):
        rewards = run_agents_n_times(agents, 10) # Average of k runs

        sorted_parent_indexes = np.argsort(rewards)[::-1][:top_limit]
        print('\n')

        top_rewards = []
        for best_parent in sorted_parent_indexes:
            top_rewards.append(rewards[best_parent])

        print("Generation ", generation, " | Mean rewards: ", np.mean(rewards), " | Mean of top 5: ",np.mean(top_rewards[:5]))
        # print(rewards)
        print("Top ",top_limit," scores", sorted_parent_indexes)
        print("Rewards for top: ",top_rewards)

        children_agents, elite_index = return_children(agents, sorted_parent_indexes, elite_index)
        agents = children_agents

# DQN

In [12]:
class Policy(nn.Module):
    def __init__(self):
        super(Policy, self).__init__()
        self.state_space = env.observation_space.shape[0]
        self.action_space = env.action_space.n
        self.affine1 = nn.Linear(self.state_space, 124)
        #self.dropout = nn.Dropout(p=0.6)
        self.affine2 = nn.Linear(124, self.action_space)

        self.saved_log_probs = []
        self.rewards = []

    def forward(self, x):
        x = self.affine1(x)
        #x = self.dropout(x)
        x = F.relu(x)
        action_scores = self.affine2(x)
        return F.softmax(action_scores, dim=1)

In [13]:
def select_action(state):
    state = torch.from_numpy(state).float().unsqueeze(0)
    probs = policy(state)
    m = Categorical(probs)
    action = m.sample()
    policy.saved_log_probs.append(m.log_prob(action))
    return action.item()

def sim_action(state):
    state = torch.from_numpy(state).float().unsqueeze(0)
    actions = policy(state)
    _, action = actions.max(1)
    return action.item()

def finish_episode():
    R = 0
    policy_loss = []
    returns = []
    for r in policy.rewards[::-1]:
        R = r + gamma * R
        returns.insert(0, R)
    returns = torch.tensor(returns)
    returns = (returns - returns.mean()) / (returns.std() + eps)
    for log_prob, R in zip(policy.saved_log_probs, returns):
        policy_loss.append(-log_prob * R)
    optimizer.zero_grad()
    policy_loss = torch.cat(policy_loss).sum()
    policy_loss.backward()
    optimizer.step()
    del policy.rewards[:]
    del policy.saved_log_probs[:]

def sim():
    tot_reward = 0
    state = env.reset()
    for t in range(1, 10000):
        action = sim_action(state)
        state, reward, done, _ = env.step(action)
        tot_reward += reward
        env.render()
        if done:
            print(tot_reward)
            break

In [14]:
def trainDQNmodel(returnReward = False):
    running_reward = 10
    for i_episode in range(numEpisodes):
        state, ep_reward = env.reset(), 0
        for t in range(1, 20000):  # Don't infinite loop while learning
            action = select_action(state)
            state, reward, done, _ = env.step(action)
            if render:
                env.render()
            policy.rewards.append(reward)
            ep_reward += reward
            if done:
                #duration.append(t)
                break

        running_reward = 0.04 * ep_reward + (1 - 0.04) * running_reward
        finish_episode()
        if i_episode % log_interval == 0:
            print('Episode {}\tLast reward: {:.2f}\tAverage reward: {:.2f}'.format(
                  i_episode, ep_reward, running_reward))
        #if running_reward > env.spec.reward_threshold:
         #   print("Solved! Running reward is now {} and "
        #          "the last episode runs to {} time steps!".format(running_reward, t))
         #   print("{},{}".format(i_episode, ep_reward))
         #   env.close()
         #   break
    if returnReward == True:
        env.close()
        return running_reward
    else:
        return
        

In [15]:
gamma = 0.99 # discount factor
seed  = 543
render = False
log_interval = 10

env = gym.make('CartPole-v1')
#numEpisodes = 15
#numSuccessions = 10

In [16]:
policy = Policy()
optimizer = optim.Adam(policy.parameters(), lr=1.10e-2)
eps = np.finfo(np.float32).eps.item()

# STOP RUNNING HERE

In [None]:
for successions in range(numSuccessions):
    
    folderName = "Succession" + str(successions)
    if not(os.path.isdir(folderName)):
        os.mkdir(folderName)
    
    for episodes in range(numEpisodes):
        policy = Policy()
        trainDQNmodel()
        fileName   = "Episode" + str(episodes) + ".pth"
        torch.save(policy.state_dict(), "./" + folderName + "/" + fileName)
        
    trainEVOModel(folderName, fileName)

In [None]:
env.spec.reward_threshold

In [None]:
from timeit import default_timer as timer

numEpisodesList = [50, 100, 150, 200, 250, 300, 350, 400, 450, 500]
numSuccessions = 100

avgReward = np.zeros(len(numEpisodesList))
avgTimeToRun = np.zeros(len(numEpisodesList))

for ii in range(len(numEpisodesList)):
    
    reward    = 0
    timeToRun = 0

    numEpisodes = numEpisodesList[ii]

    for successions in range(numSuccessions):
        
        print(numEpisodesList[ii])
        print(successions)
        start   = timer()
        
        policy = Policy()
        optimizer = optim.Adam(policy.parameters(), lr=1.10e-2)
        eps = np.finfo(np.float32).eps.item()
        #env = gym.make('CartPole-v1')
        reward += trainDQNmodel(returnReward = True)
        end     = timer()
        
        timeToRun = (end - start)
            
    avgReward[ii]    = reward/numSuccessions
    avgTimeToRun[ii] = timeToRun/numSuccessions

50
0
Episode 0	Last reward: 23.00	Average reward: 10.52
Episode 10	Last reward: 17.00	Average reward: 23.60
Episode 20	Last reward: 57.00	Average reward: 34.65
Episode 30	Last reward: 21.00	Average reward: 34.18
Episode 40	Last reward: 53.00	Average reward: 34.26
50
1
Episode 0	Last reward: 18.00	Average reward: 10.32
Episode 10	Last reward: 9.00	Average reward: 11.64
Episode 20	Last reward: 25.00	Average reward: 13.46
Episode 30	Last reward: 50.00	Average reward: 21.46
Episode 40	Last reward: 57.00	Average reward: 39.04
50
2
Episode 0	Last reward: 16.00	Average reward: 10.24
Episode 10	Last reward: 92.00	Average reward: 34.92
Episode 20	Last reward: 402.00	Average reward: 70.85
Episode 30	Last reward: 260.00	Average reward: 111.40
Episode 40	Last reward: 138.00	Average reward: 129.26
50
3
Episode 0	Last reward: 25.00	Average reward: 10.60
Episode 10	Last reward: 16.00	Average reward: 12.81
Episode 20	Last reward: 41.00	Average reward: 17.56
Episode 30	Last reward: 38.00	Average reward

50
31
Episode 0	Last reward: 59.00	Average reward: 11.96
Episode 10	Last reward: 10.00	Average reward: 11.95
Episode 20	Last reward: 9.00	Average reward: 11.66
Episode 30	Last reward: 9.00	Average reward: 10.95
Episode 40	Last reward: 9.00	Average reward: 10.88
50
32
Episode 0	Last reward: 15.00	Average reward: 10.20
Episode 10	Last reward: 37.00	Average reward: 14.83
Episode 20	Last reward: 14.00	Average reward: 16.02
Episode 30	Last reward: 20.00	Average reward: 17.10
Episode 40	Last reward: 130.00	Average reward: 38.54
50
33
Episode 0	Last reward: 14.00	Average reward: 10.16
Episode 10	Last reward: 75.00	Average reward: 19.22
Episode 20	Last reward: 93.00	Average reward: 49.81
Episode 30	Last reward: 22.00	Average reward: 65.35
Episode 40	Last reward: 25.00	Average reward: 54.77
50
34
Episode 0	Last reward: 28.00	Average reward: 10.72
Episode 10	Last reward: 40.00	Average reward: 15.75
Episode 20	Last reward: 26.00	Average reward: 22.68
Episode 30	Last reward: 53.00	Average reward: 

50
62
Episode 0	Last reward: 22.00	Average reward: 10.48
Episode 10	Last reward: 23.00	Average reward: 13.59
Episode 20	Last reward: 22.00	Average reward: 21.96
Episode 30	Last reward: 31.00	Average reward: 30.54
Episode 40	Last reward: 22.00	Average reward: 34.48
50
63
Episode 0	Last reward: 33.00	Average reward: 10.92
Episode 10	Last reward: 16.00	Average reward: 16.07
Episode 20	Last reward: 27.00	Average reward: 23.86
Episode 30	Last reward: 40.00	Average reward: 34.92
Episode 40	Last reward: 57.00	Average reward: 55.57
50
64
Episode 0	Last reward: 53.00	Average reward: 11.72
Episode 10	Last reward: 10.00	Average reward: 11.17
Episode 20	Last reward: 10.00	Average reward: 10.59
Episode 30	Last reward: 11.00	Average reward: 10.21
Episode 40	Last reward: 10.00	Average reward: 9.99
50
65
Episode 0	Last reward: 34.00	Average reward: 10.96
Episode 10	Last reward: 35.00	Average reward: 18.07
Episode 20	Last reward: 76.00	Average reward: 37.88
Episode 30	Last reward: 239.00	Average reward

Episode 10	Last reward: 12.00	Average reward: 13.62
Episode 20	Last reward: 56.00	Average reward: 23.62
Episode 30	Last reward: 27.00	Average reward: 31.30
Episode 40	Last reward: 44.00	Average reward: 32.24
50
94
Episode 0	Last reward: 12.00	Average reward: 10.08
Episode 10	Last reward: 10.00	Average reward: 11.37
Episode 20	Last reward: 14.00	Average reward: 14.01
Episode 30	Last reward: 35.00	Average reward: 20.26
Episode 40	Last reward: 71.00	Average reward: 30.97
50
95
Episode 0	Last reward: 12.00	Average reward: 10.08
Episode 10	Last reward: 11.00	Average reward: 10.69
Episode 20	Last reward: 11.00	Average reward: 10.50
Episode 30	Last reward: 10.00	Average reward: 10.22
Episode 40	Last reward: 8.00	Average reward: 9.88
50
96
Episode 0	Last reward: 52.00	Average reward: 11.68
Episode 10	Last reward: 15.00	Average reward: 16.25
Episode 20	Last reward: 86.00	Average reward: 25.16
Episode 30	Last reward: 76.00	Average reward: 36.32
Episode 40	Last reward: 16.00	Average reward: 46.89

Episode 20	Last reward: 15.00	Average reward: 12.53
Episode 30	Last reward: 16.00	Average reward: 13.49
Episode 40	Last reward: 33.00	Average reward: 19.03
Episode 50	Last reward: 74.00	Average reward: 32.36
Episode 60	Last reward: 30.00	Average reward: 36.59
Episode 70	Last reward: 34.00	Average reward: 34.41
Episode 80	Last reward: 59.00	Average reward: 36.22
Episode 90	Last reward: 37.00	Average reward: 41.98
100
13
Episode 0	Last reward: 32.00	Average reward: 10.88
Episode 10	Last reward: 10.00	Average reward: 10.78
Episode 20	Last reward: 10.00	Average reward: 10.40
Episode 30	Last reward: 10.00	Average reward: 9.97
Episode 40	Last reward: 9.00	Average reward: 9.81
Episode 50	Last reward: 10.00	Average reward: 9.75
Episode 60	Last reward: 9.00	Average reward: 9.50
Episode 70	Last reward: 10.00	Average reward: 9.41
Episode 80	Last reward: 11.00	Average reward: 9.51
Episode 90	Last reward: 9.00	Average reward: 9.44
100
14
Episode 0	Last reward: 14.00	Average reward: 10.16
Episode 10

Episode 70	Last reward: 30.00	Average reward: 93.42
Episode 80	Last reward: 14.00	Average reward: 67.82
Episode 90	Last reward: 47.00	Average reward: 55.32
100
28
Episode 0	Last reward: 16.00	Average reward: 10.24
Episode 10	Last reward: 82.00	Average reward: 19.03
Episode 20	Last reward: 21.00	Average reward: 25.96
Episode 30	Last reward: 197.00	Average reward: 60.98
Episode 40	Last reward: 99.00	Average reward: 104.17
Episode 50	Last reward: 49.00	Average reward: 91.18
Episode 60	Last reward: 105.00	Average reward: 97.68
Episode 70	Last reward: 499.00	Average reward: 172.13
Episode 80	Last reward: 163.00	Average reward: 188.68
Episode 90	Last reward: 198.00	Average reward: 191.61
100
29
Episode 0	Last reward: 18.00	Average reward: 10.32
Episode 10	Last reward: 35.00	Average reward: 16.83
Episode 20	Last reward: 38.00	Average reward: 23.45
Episode 30	Last reward: 26.00	Average reward: 29.35
Episode 40	Last reward: 25.00	Average reward: 36.99
Episode 50	Last reward: 29.00	Average rewar

Episode 20	Last reward: 116.00	Average reward: 40.24
Episode 30	Last reward: 124.00	Average reward: 47.27
Episode 40	Last reward: 107.00	Average reward: 61.26
Episode 50	Last reward: 207.00	Average reward: 73.25
Episode 60	Last reward: 347.00	Average reward: 115.76
Episode 70	Last reward: 125.00	Average reward: 131.31
Episode 80	Last reward: 90.00	Average reward: 146.85
Episode 90	Last reward: 31.00	Average reward: 131.41
100
44
Episode 0	Last reward: 31.00	Average reward: 10.84
Episode 10	Last reward: 42.00	Average reward: 20.32
Episode 20	Last reward: 136.00	Average reward: 36.96
Episode 30	Last reward: 162.00	Average reward: 45.98
Episode 40	Last reward: 39.00	Average reward: 51.67
Episode 50	Last reward: 168.00	Average reward: 65.10
Episode 60	Last reward: 25.00	Average reward: 75.83
Episode 70	Last reward: 60.00	Average reward: 64.09
Episode 80	Last reward: 58.00	Average reward: 60.57
Episode 90	Last reward: 248.00	Average reward: 96.57
100
45
Episode 0	Last reward: 14.00	Average 

Episode 70	Last reward: 122.00	Average reward: 124.55
Episode 80	Last reward: 53.00	Average reward: 104.58
Episode 90	Last reward: 61.00	Average reward: 85.33
100
59
Episode 0	Last reward: 11.00	Average reward: 10.04
Episode 10	Last reward: 45.00	Average reward: 19.13
Episode 20	Last reward: 71.00	Average reward: 32.16
Episode 30	Last reward: 40.00	Average reward: 39.22
Episode 40	Last reward: 28.00	Average reward: 37.33
Episode 50	Last reward: 51.00	Average reward: 35.68
Episode 60	Last reward: 31.00	Average reward: 38.72
Episode 70	Last reward: 127.00	Average reward: 42.65
Episode 80	Last reward: 44.00	Average reward: 50.91
Episode 90	Last reward: 31.00	Average reward: 54.92
100
60
Episode 0	Last reward: 11.00	Average reward: 10.04
Episode 10	Last reward: 9.00	Average reward: 13.35
Episode 20	Last reward: 79.00	Average reward: 27.25
Episode 30	Last reward: 43.00	Average reward: 46.78
Episode 40	Last reward: 117.00	Average reward: 51.39
Episode 50	Last reward: 101.00	Average reward: 8

Episode 30	Last reward: 38.00	Average reward: 23.89
Episode 40	Last reward: 29.00	Average reward: 29.96
Episode 50	Last reward: 48.00	Average reward: 42.82
Episode 60	Last reward: 74.00	Average reward: 54.57
Episode 70	Last reward: 135.00	Average reward: 75.72
Episode 80	Last reward: 108.00	Average reward: 86.73
Episode 90	Last reward: 283.00	Average reward: 184.33
100
75
Episode 0	Last reward: 41.00	Average reward: 11.24
Episode 10	Last reward: 16.00	Average reward: 14.31
Episode 20	Last reward: 52.00	Average reward: 21.11
Episode 30	Last reward: 27.00	Average reward: 23.42
Episode 40	Last reward: 54.00	Average reward: 39.05
Episode 50	Last reward: 134.00	Average reward: 50.28
Episode 60	Last reward: 49.00	Average reward: 55.59
Episode 70	Last reward: 142.00	Average reward: 66.39
Episode 80	Last reward: 151.00	Average reward: 118.95
Episode 90	Last reward: 17.00	Average reward: 95.96
100
76
Episode 0	Last reward: 24.00	Average reward: 10.56
Episode 10	Last reward: 78.00	Average reward

Episode 80	Last reward: 49.00	Average reward: 52.11
Episode 90	Last reward: 89.00	Average reward: 58.92
100
90
Episode 0	Last reward: 39.00	Average reward: 11.16
Episode 10	Last reward: 14.00	Average reward: 20.09
Episode 20	Last reward: 23.00	Average reward: 39.49
Episode 30	Last reward: 58.00	Average reward: 61.76
Episode 40	Last reward: 105.00	Average reward: 87.77
Episode 50	Last reward: 258.00	Average reward: 117.06
Episode 60	Last reward: 243.00	Average reward: 174.22
Episode 70	Last reward: 500.00	Average reward: 264.89
Episode 80	Last reward: 221.00	Average reward: 292.90
Episode 90	Last reward: 500.00	Average reward: 332.39
100
91
Episode 0	Last reward: 14.00	Average reward: 10.16
Episode 10	Last reward: 15.00	Average reward: 13.24
Episode 20	Last reward: 9.00	Average reward: 12.81
Episode 30	Last reward: 11.00	Average reward: 12.72
Episode 40	Last reward: 10.00	Average reward: 12.66
Episode 50	Last reward: 11.00	Average reward: 12.94
Episode 60	Last reward: 19.00	Average rewa

Episode 80	Last reward: 25.00	Average reward: 36.34
Episode 90	Last reward: 61.00	Average reward: 40.25
Episode 100	Last reward: 369.00	Average reward: 94.16
Episode 110	Last reward: 358.00	Average reward: 159.80
Episode 120	Last reward: 137.00	Average reward: 156.22
Episode 130	Last reward: 90.00	Average reward: 129.38
Episode 140	Last reward: 79.00	Average reward: 114.49
150
4
Episode 0	Last reward: 14.00	Average reward: 10.16
Episode 10	Last reward: 30.00	Average reward: 20.54
Episode 20	Last reward: 432.00	Average reward: 65.96
Episode 30	Last reward: 32.00	Average reward: 98.29
Episode 40	Last reward: 52.00	Average reward: 83.53
Episode 50	Last reward: 130.00	Average reward: 89.38
Episode 60	Last reward: 110.00	Average reward: 113.37
Episode 70	Last reward: 140.00	Average reward: 115.18
Episode 80	Last reward: 113.00	Average reward: 120.88
Episode 90	Last reward: 113.00	Average reward: 111.77
Episode 100	Last reward: 123.00	Average reward: 107.41
Episode 110	Last reward: 199.00	Av

Episode 130	Last reward: 189.00	Average reward: 130.11
Episode 140	Last reward: 500.00	Average reward: 218.73
150
14
Episode 0	Last reward: 13.00	Average reward: 10.12
Episode 10	Last reward: 18.00	Average reward: 14.45
Episode 20	Last reward: 94.00	Average reward: 27.75
Episode 30	Last reward: 73.00	Average reward: 80.96
Episode 40	Last reward: 129.00	Average reward: 100.78
Episode 50	Last reward: 332.00	Average reward: 194.94
Episode 60	Last reward: 150.00	Average reward: 177.26
Episode 70	Last reward: 177.00	Average reward: 173.32
Episode 80	Last reward: 168.00	Average reward: 176.84
Episode 90	Last reward: 112.00	Average reward: 158.77
Episode 100	Last reward: 111.00	Average reward: 141.50
Episode 110	Last reward: 133.00	Average reward: 135.49
Episode 120	Last reward: 83.00	Average reward: 135.09
Episode 130	Last reward: 85.00	Average reward: 116.89
Episode 140	Last reward: 100.00	Average reward: 106.46
150
15
Episode 0	Last reward: 12.00	Average reward: 10.08
Episode 10	Last rewar

Episode 20	Last reward: 63.00	Average reward: 27.58
Episode 30	Last reward: 70.00	Average reward: 43.54
Episode 40	Last reward: 142.00	Average reward: 61.68
Episode 50	Last reward: 159.00	Average reward: 101.00
Episode 60	Last reward: 99.00	Average reward: 112.48
Episode 70	Last reward: 52.00	Average reward: 94.16
Episode 80	Last reward: 90.00	Average reward: 91.69
Episode 90	Last reward: 113.00	Average reward: 119.32
Episode 100	Last reward: 79.00	Average reward: 117.45
Episode 110	Last reward: 47.00	Average reward: 95.68
Episode 120	Last reward: 112.00	Average reward: 99.78
Episode 130	Last reward: 106.00	Average reward: 105.09
Episode 140	Last reward: 453.00	Average reward: 155.48
150
25
Episode 0	Last reward: 17.00	Average reward: 10.28
Episode 10	Last reward: 13.00	Average reward: 11.40
Episode 20	Last reward: 9.00	Average reward: 11.40
Episode 30	Last reward: 18.00	Average reward: 11.84
Episode 40	Last reward: 8.00	Average reward: 13.12
Episode 50	Last reward: 21.00	Average rewar

Episode 60	Last reward: 190.00	Average reward: 134.13
Episode 70	Last reward: 257.00	Average reward: 174.43
Episode 80	Last reward: 119.00	Average reward: 213.16
Episode 90	Last reward: 119.00	Average reward: 164.83
Episode 100	Last reward: 109.00	Average reward: 151.95
Episode 110	Last reward: 125.00	Average reward: 136.49
Episode 120	Last reward: 96.00	Average reward: 128.25
Episode 130	Last reward: 106.00	Average reward: 118.74
Episode 140	Last reward: 127.00	Average reward: 121.11
150
35
Episode 0	Last reward: 12.00	Average reward: 10.08
Episode 10	Last reward: 36.00	Average reward: 16.75
Episode 20	Last reward: 20.00	Average reward: 19.39
Episode 30	Last reward: 46.00	Average reward: 26.92
Episode 40	Last reward: 40.00	Average reward: 52.75
Episode 50	Last reward: 197.00	Average reward: 70.33
Episode 60	Last reward: 434.00	Average reward: 148.03
Episode 70	Last reward: 339.00	Average reward: 185.47
Episode 80	Last reward: 114.00	Average reward: 189.46
Episode 90	Last reward: 117.0

Episode 100	Last reward: 199.00	Average reward: 161.36
Episode 110	Last reward: 88.00	Average reward: 138.59
Episode 120	Last reward: 123.00	Average reward: 125.23
Episode 130	Last reward: 135.00	Average reward: 125.72
Episode 140	Last reward: 100.00	Average reward: 126.85
150
45
Episode 0	Last reward: 54.00	Average reward: 11.76
Episode 10	Last reward: 21.00	Average reward: 12.00
Episode 20	Last reward: 76.00	Average reward: 19.02
Episode 30	Last reward: 20.00	Average reward: 27.17
Episode 40	Last reward: 46.00	Average reward: 28.51
Episode 50	Last reward: 123.00	Average reward: 45.83
Episode 60	Last reward: 87.00	Average reward: 75.09
Episode 70	Last reward: 404.00	Average reward: 121.10
Episode 80	Last reward: 225.00	Average reward: 224.58
Episode 90	Last reward: 224.00	Average reward: 232.89
Episode 100	Last reward: 155.00	Average reward: 249.59
Episode 110	Last reward: 175.00	Average reward: 230.32
Episode 120	Last reward: 163.00	Average reward: 222.09
Episode 130	Last reward: 104

Episode 130	Last reward: 95.00	Average reward: 76.99
Episode 140	Last reward: 100.00	Average reward: 84.46
150
55
Episode 0	Last reward: 21.00	Average reward: 10.44
Episode 10	Last reward: 29.00	Average reward: 15.28
Episode 20	Last reward: 28.00	Average reward: 19.15
Episode 30	Last reward: 30.00	Average reward: 25.54
Episode 40	Last reward: 158.00	Average reward: 47.45
Episode 50	Last reward: 79.00	Average reward: 52.84
Episode 60	Last reward: 45.00	Average reward: 53.73
Episode 70	Last reward: 51.00	Average reward: 57.98
Episode 80	Last reward: 73.00	Average reward: 64.80
Episode 90	Last reward: 500.00	Average reward: 123.17
Episode 100	Last reward: 231.00	Average reward: 183.97
Episode 110	Last reward: 303.00	Average reward: 197.30
Episode 120	Last reward: 268.00	Average reward: 205.56
Episode 130	Last reward: 500.00	Average reward: 245.50
Episode 140	Last reward: 500.00	Average reward: 325.63
150
56
Episode 0	Last reward: 12.00	Average reward: 10.08
Episode 10	Last reward: 8.00	Av

Episode 20	Last reward: 9.00	Average reward: 21.72
Episode 30	Last reward: 121.00	Average reward: 35.14
Episode 40	Last reward: 49.00	Average reward: 44.55
Episode 50	Last reward: 95.00	Average reward: 50.67
Episode 60	Last reward: 34.00	Average reward: 48.88
Episode 70	Last reward: 29.00	Average reward: 43.63
Episode 80	Last reward: 25.00	Average reward: 39.75
Episode 90	Last reward: 34.00	Average reward: 35.96
Episode 100	Last reward: 32.00	Average reward: 35.40
Episode 110	Last reward: 21.00	Average reward: 33.64
Episode 120	Last reward: 28.00	Average reward: 31.90
Episode 130	Last reward: 36.00	Average reward: 31.83
Episode 140	Last reward: 42.00	Average reward: 33.66
150
66
Episode 0	Last reward: 20.00	Average reward: 10.40
Episode 10	Last reward: 25.00	Average reward: 12.12
Episode 20	Last reward: 72.00	Average reward: 19.41
Episode 30	Last reward: 41.00	Average reward: 37.86
Episode 40	Last reward: 48.00	Average reward: 42.76
Episode 50	Last reward: 60.00	Average reward: 55.85
E

Episode 70	Last reward: 203.00	Average reward: 246.10
Episode 80	Last reward: 232.00	Average reward: 283.92
Episode 90	Last reward: 309.00	Average reward: 299.89
Episode 100	Last reward: 500.00	Average reward: 365.56
Episode 110	Last reward: 460.00	Average reward: 386.72
Episode 120	Last reward: 112.00	Average reward: 294.21
Episode 130	Last reward: 118.00	Average reward: 232.08
Episode 140	Last reward: 127.00	Average reward: 193.48
150
76
Episode 0	Last reward: 19.00	Average reward: 10.36
Episode 10	Last reward: 15.00	Average reward: 12.68
Episode 20	Last reward: 11.00	Average reward: 15.06
Episode 30	Last reward: 44.00	Average reward: 20.84
Episode 40	Last reward: 39.00	Average reward: 32.11
Episode 50	Last reward: 80.00	Average reward: 55.89
Episode 60	Last reward: 31.00	Average reward: 51.05
Episode 70	Last reward: 28.00	Average reward: 44.42
Episode 80	Last reward: 95.00	Average reward: 45.23
Episode 90	Last reward: 85.00	Average reward: 62.99
Episode 100	Last reward: 61.00	Averag

Episode 110	Last reward: 500.00	Average reward: 170.67
Episode 120	Last reward: 229.00	Average reward: 218.39
Episode 130	Last reward: 38.00	Average reward: 173.94
Episode 140	Last reward: 99.00	Average reward: 130.68
150
86
Episode 0	Last reward: 20.00	Average reward: 10.40
Episode 10	Last reward: 12.00	Average reward: 17.29
Episode 20	Last reward: 20.00	Average reward: 16.94
Episode 30	Last reward: 21.00	Average reward: 16.94
Episode 40	Last reward: 20.00	Average reward: 19.00
Episode 50	Last reward: 58.00	Average reward: 29.79
Episode 60	Last reward: 39.00	Average reward: 36.38
Episode 70	Last reward: 23.00	Average reward: 38.29
Episode 80	Last reward: 22.00	Average reward: 34.67
Episode 90	Last reward: 21.00	Average reward: 31.51
Episode 100	Last reward: 49.00	Average reward: 36.61
Episode 110	Last reward: 52.00	Average reward: 48.38
Episode 120	Last reward: 68.00	Average reward: 53.39
Episode 130	Last reward: 56.00	Average reward: 55.45
Episode 140	Last reward: 98.00	Average rewar

150
96
Episode 0	Last reward: 8.00	Average reward: 9.92
Episode 10	Last reward: 23.00	Average reward: 17.83
Episode 20	Last reward: 53.00	Average reward: 20.76
Episode 30	Last reward: 26.00	Average reward: 27.29
Episode 40	Last reward: 49.00	Average reward: 47.21
Episode 50	Last reward: 191.00	Average reward: 64.75
Episode 60	Last reward: 97.00	Average reward: 90.57
Episode 70	Last reward: 72.00	Average reward: 95.90
Episode 80	Last reward: 81.00	Average reward: 100.64
Episode 90	Last reward: 67.00	Average reward: 91.14
Episode 100	Last reward: 73.00	Average reward: 79.40
Episode 110	Last reward: 85.00	Average reward: 87.66
Episode 120	Last reward: 294.00	Average reward: 126.38
Episode 130	Last reward: 341.00	Average reward: 182.30
Episode 140	Last reward: 500.00	Average reward: 282.64
150
97
Episode 0	Last reward: 40.00	Average reward: 11.20
Episode 10	Last reward: 14.00	Average reward: 13.29
Episode 20	Last reward: 13.00	Average reward: 14.96
Episode 30	Last reward: 56.00	Average rew

Episode 140	Last reward: 9.00	Average reward: 9.50
Episode 150	Last reward: 9.00	Average reward: 9.63
Episode 160	Last reward: 8.00	Average reward: 9.39
Episode 170	Last reward: 10.00	Average reward: 9.42
Episode 180	Last reward: 12.00	Average reward: 9.52
Episode 190	Last reward: 10.00	Average reward: 9.46
200
5
Episode 0	Last reward: 20.00	Average reward: 10.40
Episode 10	Last reward: 17.00	Average reward: 14.82
Episode 20	Last reward: 13.00	Average reward: 16.57
Episode 30	Last reward: 13.00	Average reward: 18.31
Episode 40	Last reward: 46.00	Average reward: 27.96
Episode 50	Last reward: 107.00	Average reward: 42.72
Episode 60	Last reward: 73.00	Average reward: 59.67
Episode 70	Last reward: 55.00	Average reward: 58.84
Episode 80	Last reward: 33.00	Average reward: 56.30
Episode 90	Last reward: 118.00	Average reward: 59.43
Episode 100	Last reward: 53.00	Average reward: 59.47
Episode 110	Last reward: 107.00	Average reward: 59.60
Episode 120	Last reward: 108.00	Average reward: 60.66
Epi

Episode 80	Last reward: 265.00	Average reward: 329.02
Episode 90	Last reward: 15.00	Average reward: 269.67
Episode 100	Last reward: 119.00	Average reward: 209.39
Episode 110	Last reward: 108.00	Average reward: 168.71
Episode 120	Last reward: 115.00	Average reward: 151.01
Episode 130	Last reward: 34.00	Average reward: 122.57
Episode 140	Last reward: 62.00	Average reward: 97.41
Episode 150	Last reward: 79.00	Average reward: 93.38
Episode 160	Last reward: 93.00	Average reward: 94.53
Episode 170	Last reward: 94.00	Average reward: 100.04
Episode 180	Last reward: 69.00	Average reward: 90.38
Episode 190	Last reward: 58.00	Average reward: 76.87
200
13
Episode 0	Last reward: 15.00	Average reward: 10.20
Episode 10	Last reward: 39.00	Average reward: 19.62
Episode 20	Last reward: 239.00	Average reward: 41.88
Episode 30	Last reward: 487.00	Average reward: 85.39
Episode 40	Last reward: 219.00	Average reward: 110.65
Episode 50	Last reward: 61.00	Average reward: 103.24
Episode 60	Last reward: 52.00	Av

Episode 10	Last reward: 32.00	Average reward: 17.14
Episode 20	Last reward: 22.00	Average reward: 21.97
Episode 30	Last reward: 38.00	Average reward: 34.98
Episode 40	Last reward: 105.00	Average reward: 88.54
Episode 50	Last reward: 16.00	Average reward: 67.51
Episode 60	Last reward: 32.00	Average reward: 56.03
Episode 70	Last reward: 100.00	Average reward: 62.34
Episode 80	Last reward: 78.00	Average reward: 69.22
Episode 90	Last reward: 197.00	Average reward: 96.88
Episode 100	Last reward: 163.00	Average reward: 165.80
Episode 110	Last reward: 30.00	Average reward: 135.34
Episode 120	Last reward: 93.00	Average reward: 117.90
Episode 130	Last reward: 140.00	Average reward: 118.84
Episode 140	Last reward: 199.00	Average reward: 141.31
Episode 150	Last reward: 208.00	Average reward: 178.88
Episode 160	Last reward: 218.00	Average reward: 188.99
Episode 170	Last reward: 264.00	Average reward: 205.50
Episode 180	Last reward: 500.00	Average reward: 274.04
Episode 190	Last reward: 273.00	Aver

Episode 140	Last reward: 9.00	Average reward: 9.37
Episode 150	Last reward: 10.00	Average reward: 9.59
Episode 160	Last reward: 37.00	Average reward: 11.59
Episode 170	Last reward: 56.00	Average reward: 24.45
Episode 180	Last reward: 54.00	Average reward: 40.40
Episode 190	Last reward: 45.00	Average reward: 46.03
200
28
Episode 0	Last reward: 14.00	Average reward: 10.16
Episode 10	Last reward: 13.00	Average reward: 11.05
Episode 20	Last reward: 14.00	Average reward: 14.29
Episode 30	Last reward: 35.00	Average reward: 22.42
Episode 40	Last reward: 38.00	Average reward: 25.59
Episode 50	Last reward: 153.00	Average reward: 47.71
Episode 60	Last reward: 57.00	Average reward: 51.29
Episode 70	Last reward: 86.00	Average reward: 65.63
Episode 80	Last reward: 124.00	Average reward: 71.11
Episode 90	Last reward: 63.00	Average reward: 76.58
Episode 100	Last reward: 118.00	Average reward: 75.44
Episode 110	Last reward: 183.00	Average reward: 81.37
Episode 120	Last reward: 56.00	Average reward: 81

Episode 90	Last reward: 65.00	Average reward: 60.61
Episode 100	Last reward: 34.00	Average reward: 60.17
Episode 110	Last reward: 59.00	Average reward: 60.98
Episode 120	Last reward: 320.00	Average reward: 146.07
Episode 130	Last reward: 165.00	Average reward: 153.81
Episode 140	Last reward: 77.00	Average reward: 142.03
Episode 150	Last reward: 26.00	Average reward: 111.58
Episode 160	Last reward: 500.00	Average reward: 103.43
Episode 170	Last reward: 500.00	Average reward: 166.22
Episode 180	Last reward: 344.00	Average reward: 250.37
Episode 190	Last reward: 162.00	Average reward: 237.49
200
36
Episode 0	Last reward: 15.00	Average reward: 10.20
Episode 10	Last reward: 16.00	Average reward: 11.19
Episode 20	Last reward: 9.00	Average reward: 11.47
Episode 30	Last reward: 12.00	Average reward: 11.27
Episode 40	Last reward: 9.00	Average reward: 11.03
Episode 50	Last reward: 9.00	Average reward: 10.48
Episode 60	Last reward: 8.00	Average reward: 10.08
Episode 70	Last reward: 9.00	Average r

Episode 50	Last reward: 10.00	Average reward: 10.87
Episode 60	Last reward: 8.00	Average reward: 11.00
Episode 70	Last reward: 9.00	Average reward: 11.08
Episode 80	Last reward: 9.00	Average reward: 10.94
Episode 90	Last reward: 9.00	Average reward: 10.77
Episode 100	Last reward: 14.00	Average reward: 11.95
Episode 110	Last reward: 14.00	Average reward: 13.02
Episode 120	Last reward: 15.00	Average reward: 13.77
Episode 130	Last reward: 15.00	Average reward: 14.78
Episode 140	Last reward: 10.00	Average reward: 14.01
Episode 150	Last reward: 18.00	Average reward: 14.85
Episode 160	Last reward: 53.00	Average reward: 19.28
Episode 170	Last reward: 72.00	Average reward: 24.09
Episode 180	Last reward: 58.00	Average reward: 40.94
Episode 190	Last reward: 79.00	Average reward: 58.52
200
44
Episode 0	Last reward: 14.00	Average reward: 10.16
Episode 10	Last reward: 25.00	Average reward: 11.24
Episode 20	Last reward: 26.00	Average reward: 13.77
Episode 30	Last reward: 11.00	Average reward: 12.68


200
51
Episode 0	Last reward: 11.00	Average reward: 10.04
Episode 10	Last reward: 10.00	Average reward: 10.57
Episode 20	Last reward: 10.00	Average reward: 10.86
Episode 30	Last reward: 10.00	Average reward: 10.57
Episode 40	Last reward: 9.00	Average reward: 10.33
Episode 50	Last reward: 8.00	Average reward: 10.20
Episode 60	Last reward: 10.00	Average reward: 10.02
Episode 70	Last reward: 9.00	Average reward: 9.80
Episode 80	Last reward: 9.00	Average reward: 9.88
Episode 90	Last reward: 12.00	Average reward: 9.83
Episode 100	Last reward: 10.00	Average reward: 9.62
Episode 110	Last reward: 10.00	Average reward: 9.65
Episode 120	Last reward: 10.00	Average reward: 9.82
Episode 130	Last reward: 13.00	Average reward: 10.05
Episode 140	Last reward: 13.00	Average reward: 10.39
Episode 150	Last reward: 15.00	Average reward: 11.45
Episode 160	Last reward: 10.00	Average reward: 11.25
Episode 170	Last reward: 15.00	Average reward: 12.05
Episode 180	Last reward: 43.00	Average reward: 18.70
Episode

Episode 140	Last reward: 27.00	Average reward: 76.18
Episode 150	Last reward: 187.00	Average reward: 95.16
Episode 160	Last reward: 99.00	Average reward: 106.27
Episode 170	Last reward: 187.00	Average reward: 113.07
Episode 180	Last reward: 500.00	Average reward: 197.87
Episode 190	Last reward: 500.00	Average reward: 299.14
200
59
Episode 0	Last reward: 16.00	Average reward: 10.24
Episode 10	Last reward: 24.00	Average reward: 12.34
Episode 20	Last reward: 89.00	Average reward: 20.81
Episode 30	Last reward: 77.00	Average reward: 31.12
Episode 40	Last reward: 21.00	Average reward: 46.86
Episode 50	Last reward: 422.00	Average reward: 92.36
Episode 60	Last reward: 122.00	Average reward: 129.49
Episode 70	Last reward: 290.00	Average reward: 127.78
Episode 80	Last reward: 78.00	Average reward: 119.82
Episode 90	Last reward: 56.00	Average reward: 108.27
Episode 100	Last reward: 90.00	Average reward: 95.41
Episode 110	Last reward: 85.00	Average reward: 88.29
Episode 120	Last reward: 55.00	Aver

Episode 80	Last reward: 11.00	Average reward: 10.18
Episode 90	Last reward: 9.00	Average reward: 10.23
Episode 100	Last reward: 8.00	Average reward: 10.42
Episode 110	Last reward: 13.00	Average reward: 10.94
Episode 120	Last reward: 34.00	Average reward: 13.49
Episode 130	Last reward: 31.00	Average reward: 23.39
Episode 140	Last reward: 45.00	Average reward: 33.72
Episode 150	Last reward: 163.00	Average reward: 57.83
Episode 160	Last reward: 87.00	Average reward: 65.35
Episode 170	Last reward: 68.00	Average reward: 67.12
Episode 180	Last reward: 422.00	Average reward: 116.33
Episode 190	Last reward: 232.00	Average reward: 207.73
200
67
Episode 0	Last reward: 14.00	Average reward: 10.16
Episode 10	Last reward: 9.00	Average reward: 10.52
Episode 20	Last reward: 10.00	Average reward: 10.77
Episode 30	Last reward: 11.00	Average reward: 10.77
Episode 40	Last reward: 11.00	Average reward: 10.49
Episode 50	Last reward: 10.00	Average reward: 10.47
Episode 60	Last reward: 10.00	Average reward: 

Episode 20	Last reward: 47.00	Average reward: 32.48
Episode 30	Last reward: 130.00	Average reward: 86.52
Episode 40	Last reward: 500.00	Average reward: 170.24
Episode 50	Last reward: 217.00	Average reward: 237.44
Episode 60	Last reward: 212.00	Average reward: 205.21
Episode 70	Last reward: 195.00	Average reward: 220.70
Episode 80	Last reward: 238.00	Average reward: 235.14
Episode 90	Last reward: 500.00	Average reward: 257.87
Episode 100	Last reward: 197.00	Average reward: 264.83
Episode 110	Last reward: 435.00	Average reward: 262.62
Episode 120	Last reward: 429.00	Average reward: 307.18
Episode 130	Last reward: 447.00	Average reward: 355.62
Episode 140	Last reward: 500.00	Average reward: 404.01
Episode 150	Last reward: 500.00	Average reward: 436.18
Episode 160	Last reward: 500.00	Average reward: 457.57
Episode 170	Last reward: 500.00	Average reward: 457.00
Episode 180	Last reward: 500.00	Average reward: 465.74
Episode 190	Last reward: 500.00	Average reward: 477.22
200
75
Episode 0	Last

Episode 170	Last reward: 10.00	Average reward: 9.54
Episode 180	Last reward: 10.00	Average reward: 9.66
Episode 190	Last reward: 10.00	Average reward: 9.58
200
82
Episode 0	Last reward: 46.00	Average reward: 11.44
Episode 10	Last reward: 33.00	Average reward: 21.81
Episode 20	Last reward: 15.00	Average reward: 29.25
Episode 30	Last reward: 181.00	Average reward: 44.97
Episode 40	Last reward: 71.00	Average reward: 50.51
Episode 50	Last reward: 52.00	Average reward: 54.06
Episode 60	Last reward: 54.00	Average reward: 57.90
Episode 70	Last reward: 101.00	Average reward: 86.27
Episode 80	Last reward: 75.00	Average reward: 116.13
Episode 90	Last reward: 20.00	Average reward: 102.00
Episode 100	Last reward: 200.00	Average reward: 127.43
Episode 110	Last reward: 500.00	Average reward: 227.76
Episode 120	Last reward: 223.00	Average reward: 274.42
Episode 130	Last reward: 207.00	Average reward: 246.05
Episode 140	Last reward: 14.00	Average reward: 196.92
Episode 150	Last reward: 148.00	Average 

Episode 100	Last reward: 107.00	Average reward: 77.65
Episode 110	Last reward: 119.00	Average reward: 89.61
Episode 120	Last reward: 306.00	Average reward: 136.77
Episode 130	Last reward: 461.00	Average reward: 206.62
Episode 140	Last reward: 500.00	Average reward: 304.95
Episode 150	Last reward: 500.00	Average reward: 370.32
Episode 160	Last reward: 500.00	Average reward: 401.52
Episode 170	Last reward: 189.00	Average reward: 381.57
Episode 180	Last reward: 337.00	Average reward: 350.81
Episode 190	Last reward: 293.00	Average reward: 308.24
200
90
Episode 0	Last reward: 45.00	Average reward: 11.40
Episode 10	Last reward: 14.00	Average reward: 14.05
Episode 20	Last reward: 18.00	Average reward: 14.14
Episode 30	Last reward: 11.00	Average reward: 14.34
Episode 40	Last reward: 11.00	Average reward: 13.70
Episode 50	Last reward: 10.00	Average reward: 12.35
Episode 60	Last reward: 9.00	Average reward: 11.70
Episode 70	Last reward: 15.00	Average reward: 12.21
Episode 80	Last reward: 55.00	A

Episode 30	Last reward: 128.00	Average reward: 59.61
Episode 40	Last reward: 29.00	Average reward: 56.65
Episode 50	Last reward: 23.00	Average reward: 49.62
Episode 60	Last reward: 44.00	Average reward: 46.39
Episode 70	Last reward: 55.00	Average reward: 47.20
Episode 80	Last reward: 69.00	Average reward: 53.82
Episode 90	Last reward: 148.00	Average reward: 77.70
Episode 100	Last reward: 163.00	Average reward: 104.31
Episode 110	Last reward: 191.00	Average reward: 161.38
Episode 120	Last reward: 432.00	Average reward: 218.45
Episode 130	Last reward: 500.00	Average reward: 298.98
Episode 140	Last reward: 500.00	Average reward: 361.93
Episode 150	Last reward: 484.00	Average reward: 407.57
Episode 160	Last reward: 500.00	Average reward: 436.84
Episode 170	Last reward: 500.00	Average reward: 439.90
Episode 180	Last reward: 500.00	Average reward: 460.04
Episode 190	Last reward: 500.00	Average reward: 461.75
200
98
Episode 0	Last reward: 15.00	Average reward: 10.20
Episode 10	Last reward: 9.

Episode 230	Last reward: 258.00	Average reward: 401.54
Episode 240	Last reward: 500.00	Average reward: 405.39
250
4
Episode 0	Last reward: 30.00	Average reward: 10.80
Episode 10	Last reward: 73.00	Average reward: 15.84
Episode 20	Last reward: 27.00	Average reward: 23.90
Episode 30	Last reward: 21.00	Average reward: 22.31
Episode 40	Last reward: 17.00	Average reward: 21.19
Episode 50	Last reward: 25.00	Average reward: 22.40
Episode 60	Last reward: 177.00	Average reward: 34.40
Episode 70	Last reward: 40.00	Average reward: 46.23
Episode 80	Last reward: 56.00	Average reward: 45.83
Episode 90	Last reward: 54.00	Average reward: 50.41
Episode 100	Last reward: 32.00	Average reward: 47.87
Episode 110	Last reward: 65.00	Average reward: 45.58
Episode 120	Last reward: 500.00	Average reward: 130.79
Episode 130	Last reward: 500.00	Average reward: 254.53
Episode 140	Last reward: 500.00	Average reward: 336.81
Episode 150	Last reward: 500.00	Average reward: 387.06
Episode 160	Last reward: 500.00	Averag

Episode 20	Last reward: 116.00	Average reward: 17.30
Episode 30	Last reward: 63.00	Average reward: 38.62
Episode 40	Last reward: 96.00	Average reward: 66.97
Episode 50	Last reward: 62.00	Average reward: 65.05
Episode 60	Last reward: 21.00	Average reward: 50.46
Episode 70	Last reward: 32.00	Average reward: 46.47
Episode 80	Last reward: 101.00	Average reward: 55.65
Episode 90	Last reward: 96.00	Average reward: 64.80
Episode 100	Last reward: 111.00	Average reward: 71.39
Episode 110	Last reward: 73.00	Average reward: 67.33
Episode 120	Last reward: 67.00	Average reward: 64.97
Episode 130	Last reward: 107.00	Average reward: 74.59
Episode 140	Last reward: 500.00	Average reward: 162.57
Episode 150	Last reward: 196.00	Average reward: 262.80
Episode 160	Last reward: 175.00	Average reward: 232.59
Episode 170	Last reward: 164.00	Average reward: 213.93
Episode 180	Last reward: 240.00	Average reward: 205.81
Episode 190	Last reward: 500.00	Average reward: 294.76
Episode 200	Last reward: 500.00	Averag

Episode 50	Last reward: 40.00	Average reward: 37.90
Episode 60	Last reward: 121.00	Average reward: 55.78
Episode 70	Last reward: 391.00	Average reward: 114.60
Episode 80	Last reward: 413.00	Average reward: 188.39
Episode 90	Last reward: 204.00	Average reward: 244.24
Episode 100	Last reward: 155.00	Average reward: 228.21
Episode 110	Last reward: 222.00	Average reward: 206.99
Episode 120	Last reward: 500.00	Average reward: 285.02
Episode 130	Last reward: 222.00	Average reward: 256.01
Episode 140	Last reward: 172.00	Average reward: 246.06
Episode 150	Last reward: 237.00	Average reward: 266.27
Episode 160	Last reward: 500.00	Average reward: 331.18
Episode 170	Last reward: 500.00	Average reward: 387.77
Episode 180	Last reward: 10.00	Average reward: 264.82
Episode 190	Last reward: 19.00	Average reward: 182.17
Episode 200	Last reward: 18.00	Average reward: 125.68
Episode 210	Last reward: 28.00	Average reward: 88.04
Episode 220	Last reward: 11.00	Average reward: 63.56
Episode 230	Last reward: 

Episode 80	Last reward: 25.00	Average reward: 30.40
Episode 90	Last reward: 112.00	Average reward: 37.48
Episode 100	Last reward: 138.00	Average reward: 44.60
Episode 110	Last reward: 93.00	Average reward: 54.13
Episode 120	Last reward: 40.00	Average reward: 56.60
Episode 130	Last reward: 75.00	Average reward: 64.21
Episode 140	Last reward: 92.00	Average reward: 69.94
Episode 150	Last reward: 43.00	Average reward: 70.20
Episode 160	Last reward: 49.00	Average reward: 64.81
Episode 170	Last reward: 36.00	Average reward: 56.38
Episode 180	Last reward: 51.00	Average reward: 51.58
Episode 190	Last reward: 42.00	Average reward: 49.43
Episode 200	Last reward: 79.00	Average reward: 55.41
Episode 210	Last reward: 57.00	Average reward: 71.25
Episode 220	Last reward: 88.00	Average reward: 80.25
Episode 230	Last reward: 83.00	Average reward: 87.66
Episode 240	Last reward: 101.00	Average reward: 101.50
250
23
Episode 0	Last reward: 28.00	Average reward: 10.72
Episode 10	Last reward: 13.00	Average r

Episode 130	Last reward: 88.00	Average reward: 85.65
Episode 140	Last reward: 86.00	Average reward: 83.49
Episode 150	Last reward: 108.00	Average reward: 90.41
Episode 160	Last reward: 111.00	Average reward: 98.76
Episode 170	Last reward: 86.00	Average reward: 94.89
Episode 180	Last reward: 62.00	Average reward: 82.16
Episode 190	Last reward: 72.00	Average reward: 73.90
Episode 200	Last reward: 87.00	Average reward: 77.54
Episode 210	Last reward: 253.00	Average reward: 97.01
Episode 220	Last reward: 205.00	Average reward: 123.38
Episode 230	Last reward: 192.00	Average reward: 141.31
Episode 240	Last reward: 153.00	Average reward: 146.73
250
29
Episode 0	Last reward: 24.00	Average reward: 10.56
Episode 10	Last reward: 21.00	Average reward: 20.68
Episode 20	Last reward: 43.00	Average reward: 24.59
Episode 30	Last reward: 31.00	Average reward: 35.12
Episode 40	Last reward: 41.00	Average reward: 37.45
Episode 50	Last reward: 68.00	Average reward: 56.77
Episode 60	Last reward: 119.00	Averag

Episode 170	Last reward: 500.00	Average reward: 325.68
Episode 180	Last reward: 12.00	Average reward: 269.99
Episode 190	Last reward: 14.00	Average reward: 200.48
Episode 200	Last reward: 185.00	Average reward: 190.64
Episode 210	Last reward: 389.00	Average reward: 217.10
Episode 220	Last reward: 500.00	Average reward: 304.10
Episode 230	Last reward: 356.00	Average reward: 342.11
Episode 240	Last reward: 195.00	Average reward: 295.01
250
35
Episode 0	Last reward: 25.00	Average reward: 10.60
Episode 10	Last reward: 84.00	Average reward: 23.13
Episode 20	Last reward: 179.00	Average reward: 66.96
Episode 30	Last reward: 85.00	Average reward: 103.16
Episode 40	Last reward: 90.00	Average reward: 95.24
Episode 50	Last reward: 323.00	Average reward: 127.49
Episode 60	Last reward: 171.00	Average reward: 162.59
Episode 70	Last reward: 500.00	Average reward: 241.47
Episode 80	Last reward: 500.00	Average reward: 316.13
Episode 90	Last reward: 500.00	Average reward: 353.07
Episode 100	Last reward:

Episode 190	Last reward: 84.00	Average reward: 139.62
Episode 200	Last reward: 422.00	Average reward: 159.83
Episode 210	Last reward: 226.00	Average reward: 162.72
Episode 220	Last reward: 500.00	Average reward: 249.38
Episode 230	Last reward: 500.00	Average reward: 318.95
Episode 240	Last reward: 500.00	Average reward: 379.63
250
41
Episode 0	Last reward: 25.00	Average reward: 10.60
Episode 10	Last reward: 13.00	Average reward: 17.54
Episode 20	Last reward: 33.00	Average reward: 35.43
Episode 30	Last reward: 23.00	Average reward: 32.06
Episode 40	Last reward: 97.00	Average reward: 44.33
Episode 50	Last reward: 111.00	Average reward: 63.44
Episode 60	Last reward: 136.00	Average reward: 84.06
Episode 70	Last reward: 212.00	Average reward: 105.59
Episode 80	Last reward: 56.00	Average reward: 108.69
Episode 90	Last reward: 178.00	Average reward: 109.07
Episode 100	Last reward: 69.00	Average reward: 115.91
Episode 110	Last reward: 31.00	Average reward: 94.97
Episode 120	Last reward: 53.00	

Episode 240	Last reward: 9.00	Average reward: 9.42
250
47
Episode 0	Last reward: 12.00	Average reward: 10.08
Episode 10	Last reward: 66.00	Average reward: 18.90
Episode 20	Last reward: 190.00	Average reward: 37.71
Episode 30	Last reward: 114.00	Average reward: 70.24
Episode 40	Last reward: 172.00	Average reward: 118.58
Episode 50	Last reward: 268.00	Average reward: 203.35
Episode 60	Last reward: 257.00	Average reward: 205.26
Episode 70	Last reward: 500.00	Average reward: 242.20
Episode 80	Last reward: 145.00	Average reward: 237.68
Episode 90	Last reward: 200.00	Average reward: 215.34
Episode 100	Last reward: 500.00	Average reward: 253.02
Episode 110	Last reward: 500.00	Average reward: 335.80
Episode 120	Last reward: 500.00	Average reward: 390.83
Episode 130	Last reward: 500.00	Average reward: 427.42
Episode 140	Last reward: 399.00	Average reward: 447.33
Episode 150	Last reward: 310.00	Average reward: 407.59
Episode 160	Last reward: 418.00	Average reward: 364.82
Episode 170	Last reward:

Episode 40	Last reward: 71.00	Average reward: 27.73
Episode 50	Last reward: 48.00	Average reward: 41.32
Episode 60	Last reward: 107.00	Average reward: 52.08
Episode 70	Last reward: 60.00	Average reward: 58.54
Episode 80	Last reward: 150.00	Average reward: 76.50
Episode 90	Last reward: 389.00	Average reward: 101.09
Episode 100	Last reward: 474.00	Average reward: 142.65
Episode 110	Last reward: 500.00	Average reward: 241.12
Episode 120	Last reward: 17.00	Average reward: 199.81
Episode 130	Last reward: 153.00	Average reward: 158.21
Episode 140	Last reward: 489.00	Average reward: 182.71
Episode 150	Last reward: 500.00	Average reward: 289.05
Episode 160	Last reward: 500.00	Average reward: 353.27
Episode 170	Last reward: 500.00	Average reward: 383.97
Episode 180	Last reward: 13.00	Average reward: 335.71
Episode 190	Last reward: 500.00	Average reward: 344.00
Episode 200	Last reward: 500.00	Average reward: 396.28
Episode 210	Last reward: 500.00	Average reward: 431.05
Episode 220	Last reward: 5

In [None]:
plt.plot(numEpisodesList, avgReward)
plt.xlabel('Num. Ep. Run')
plt.ylabel('Avg. Reward')
plt.title('Average Reward By Episodes Trained (DQN)')
plt.savefig('DQN_Quality.png')

In [None]:
plt.plot(numEpisodesList, avgTimeToRun)
plt.xlabel('Num. Ep. Run')
plt.ylabel('Avg. Time To Run')
plt.title('Average Time to Run By Episodes Trained (DQN)')
plt.savefig('DQN_Quality.png')

In [None]:
print(numEpisodesList)
print(avgReward)

# RUN BELOW FOR EVO

In [None]:
# Old code assuming random initialization

def init_weights(m):
    if ((type(m) == nn.Linear) | (type(m) == nn.Conv2d)):
        torch.nn.init.xavier_uniform(m.weight)
        m.bias.data.fill_(0.00)

def return_random_agents(num_agents):
    agents = []
    for _ in range(num_agents):
        
        agent = CartPoleAI()
        
        for param in agent.parameters():
            param.requires_grad = False
            
        init_weights(agent)
        agents.append(agent)
        
    return agents

In [None]:
def trainEVOModel_uninit(generations, returnScore = False):
    game_actions = 2
    torch.set_grad_enabled(False)

    agents = return_random_agents(5)

    top_limit = 5 # Number of top agents to consider as parents
    generations = generations

    elite_index = None
    for generation in range(generations):
        rewards = run_agents_n_times(agents, 10) # Average of k runs

        sorted_parent_indexes = np.argsort(rewards)[::-1][:top_limit]
        print('\n')

        top_rewards = []
        for best_parent in sorted_parent_indexes:
            top_rewards.append(rewards[best_parent])

        print("Generation ", generation, " | Mean rewards: ", np.mean(rewards), " | Mean of top 5: ",np.mean(top_rewards[:5]))
        # print(rewards)
        print("Top ",top_limit," scores", sorted_parent_indexes)
        print("Rewards for top: ",top_rewards)

        children_agents, elite_index = return_children(agents, sorted_parent_indexes, elite_index)
        agents = children_agents
        
    if returnScore == True:
        return best_parent

In [None]:
from timeit import default_timer as timer

numEpisodesList = [50, 100, 150, 200, 250, 300, 350, 400, 450, 500]
numSuccessions = 100

avgReward = np.zeros(len(numEpisodesList))
avgTimeToRun = np.zeros(len(numEpisodesList))

folderName = "EVO_Quality"
fileName   = "Episode"

for ii in range(len(numEpisodesList)):
    
    reward    = 0
    timeToRun = 0

    numEpisodes = numEpisodesList[ii]

    for successions in range(numSuccessions):
        
        print(numEpisodesList[ii])
        print(successions)
        start   = timer()
        reward += trainEVOModel_uninit(numEpisodes, returnScore = True)
        end     = timer()
        
        timeToRun = (end - start)
            
    avgReward[ii]    = reward/numSuccessions
    avgTimeToRun[ii] = timeToRUn/numSuccessions

In [None]:
plt.plot(numEpisodesList, avgReward)
plt.xlabel('Num. Ep. Run')
plt.ylabel('Avg. Reward')
plt.title('Average Reward By Episodes Trained (DQN)')
plt.savefig('EVO_Quality_Rewar.png')

In [None]:
plt.plot(numEpisodesList, avgTimeToRun)
plt.xlabel('Num. Ep. Run')
plt.ylabel('Avg. Time To Run')
plt.title('Average Time to Run By Episodes Trained (DQN)')
plt.savefig('DQN_Quality.png')

In [None]:
print(numEpisodesList)
print(avgReward)

# RUN BELOW FOR HYBRID

In [None]:
from timeit import default_timer as timer

numEpisodesList = [50, 100, 150, 200, 250, 300, 350, 400, 450, 500]/2
numSuccessions = 100

avgReward = np.zeros(len(numEpisodesList))
avgTimeToRun = np.zeros(len(numEpisodesList))

for ii in range(len(numEpisodesList)):
    
    reward    = 0
    timeToRun = 0

    numEpisodes = numEpisodesList[ii]

    for successions in range(numSuccessions):
        
        print(numEpisodesList[ii])
        print(successions)
        start   = timer()
        trainDQNmodel(returnReward = False)
        reward += trainEVOModel(numEpisodes, returnScore = True)
        
        end     = timer()
        
        timeToRun = (end - start)
            
    avgReward[ii]    = reward/numSuccessions
    avgTimeToRun[ii] = timeToRUn/numSuccessions

In [None]:
plt.plot(numEpisodesList, avgReward)
plt.xlabel('Num. Ep. Run')
plt.ylabel('Avg. Reward')
plt.title('Average Reward By Episodes Trained (DQN)')
plt.savefig('HYBRID_Quality_reward.png')

In [None]:
plt.plot(numEpisodesList, avgTimeToRun)
plt.xlabel('Num. Ep. Run')
plt.ylabel('Avg. Time To Run')
plt.title('Average Time to Run By Episodes Trained (DQN)')
plt.savefig('HYBRID_Quality_time.png')

In [None]:
print(numEpisodesList)
print(avgReward)