In [1]:
import os
import glob
import time
from datetime import datetime

import torch
import torch.nn as nn
from torch.distributions import MultivariateNormal
from torch.distributions import Categorical

import numpy as np
import pandas as pd

import gym
from machine import Machine
from GymMachEnv import MachineEnv
import matplotlib.pyplot as plt

In [2]:
def gen_param_array(l16_array):
    full = []
    for row in range(len(l16_array)):
        x = l16_array[row]
        arr_params = []
        
        for col in range(len(x)):
            list_params = list(params.values())[col]
            value = list_params[x[col]]
            arr_params.append(value)
        print(arr_params)
        full.append(arr_params)
    return full

def train(params_array,folder_name):
    trial = 0
    for sets in params_array:
        print("trial ",trial)
#         if trial < 6: 
#             trial+=1
#             continue
        evaluate_and_test(folder_name,trial,sets)
        trial+=1
        
def compute_avg_return(environment, policy, num_episodes):
    total_return = 0.0
    for _ in range(num_episodes):
        state = torch.Tensor(environment.reset())
        state = state.unsqueeze(0)
        episode_return = 0.0 
        while not environment.done:
            action = policy.select_action(state,True)
            next_state, reward, done, _ = environment.step(action)
            next_state = torch.Tensor(next_state)
            next_state = next_state.unsqueeze(0)
            state = next_state
            episode_return += reward
        total_return += episode_return   
    avg_return = total_return / num_episodes
    return avg_return# Evaluate the agent's policy once before training.

In [3]:
params = { 'eps_clipping' : [2e-05,2e-04,2e-03,2e-02],
            'gamma' : [0.8,0.9,0.95,0.99],
            'actor_lr' : [1e-04,5e-03,1e-03,1e-02],
            'critic_lr' : [1e-04,5e-03,1e-03,1e-02]}


In [4]:
class RolloutBuffer:
    def __init__(self):
        self.actions = []
        self.states = []
        self.logprobs = []
        self.rewards = []
        self.is_terminals = []
    

    def clear(self):
        del self.actions[:]
        del self.states[:]
        del self.logprobs[:]
        del self.rewards[:]
        del self.is_terminals[:]


class ActorCritic(nn.Module):
    def __init__(self, state_dim, action_dim, has_continuous_action_space, action_std_init):
        super(ActorCritic, self).__init__()

        self.has_continuous_action_space = has_continuous_action_space


        self.actor = nn.Sequential(
                            nn.Linear(state_dim,128),
                            nn.ReLU(),
                            nn.Linear(128,64),
                            nn.ReLU(),
                            nn.Linear(64,action_dim),
                            
                            nn.Softmax(dim=-1)
                        )

        
        # critic
        self.critic = nn.Sequential(
                            nn.Linear(state_dim,128),
                            nn.ReLU(),
                            nn.Linear(128,64),
                            nn.ReLU(),
                            nn.Linear(64,1)
                    )
        
    def set_action_std(self, new_action_std):

        if self.has_continuous_action_space:
            self.action_var = torch.full((self.action_dim,), new_action_std * new_action_std)
        else:
            print("--------------------------------------------------------------------------------------------")
            print("WARNING : Calling ActorCritic::set_action_std() on discrete action space policy")
            print("--------------------------------------------------------------------------------------------")


    def forward(self):
        raise NotImplementedError
    

    def act(self, state):

        if self.has_continuous_action_space:
            action_mean = self.actor(state)
            cov_mat = torch.diag(self.action_var).unsqueeze(dim=0)
            dist = MultivariateNormal(action_mean, cov_mat)
        else:
            action_probs = self.actor(state)
            dist = Categorical(action_probs)

        action = dist.sample()
        action_logprob = dist.log_prob(action)
        
        return action.detach(), action_logprob.detach()
    

    def evaluate(self, state, action):

        if self.has_continuous_action_space:
            action_mean = self.actor(state)
            action_var = self.action_var.expand_as(action_mean)
            cov_mat = torch.diag_embed(action_var)
            dist = MultivariateNormal(action_mean, cov_mat)
            
            # for single action continuous environments
            if self.action_dim == 1:
                action = action.reshape(-1, self.action_dim)

        else:
            action_probs = self.actor(state)
            dist = Categorical(action_probs)

        action_logprobs = dist.log_prob(action)
        dist_entropy = dist.entropy()
        state_values = self.critic(state)
        
        return action_logprobs, state_values, dist_entropy


class PPO:
    def __init__(self, state_dim, action_dim, lr_actor, lr_critic, gamma, K_epochs, eps_clip, has_continuous_action_space, action_std_init=0.6):

        self.has_continuous_action_space = has_continuous_action_space

        if has_continuous_action_space:
            self.action_std = action_std_init

        self.gamma = gamma
        self.eps_clip = eps_clip
        self.K_epochs = K_epochs
        
        self.buffer = RolloutBuffer()

        self.policy = ActorCritic(state_dim, action_dim, has_continuous_action_space, action_std_init)
        self.optimizer = torch.optim.Adam([
                        {'params': self.policy.actor.parameters(), 'lr': lr_actor},
                        {'params': self.policy.critic.parameters(), 'lr': lr_critic}
                    ])

        self.policy_old = ActorCritic(state_dim, action_dim, has_continuous_action_space, action_std_init)
        self.policy_old.load_state_dict(self.policy.state_dict())
        
        self.MseLoss = nn.MSELoss()


    def set_action_std(self, new_action_std):
        
        if self.has_continuous_action_space:
            self.action_std = new_action_std
            self.policy.set_action_std(new_action_std)
            self.policy_old.set_action_std(new_action_std)
        
        else:
            print("--------------------------------------------------------------------------------------------")
            print("WARNING : Calling PPO::set_action_std() on discrete action space policy")
            print("--------------------------------------------------------------------------------------------")


    def decay_action_std(self, action_std_decay_rate, min_action_std):
        print("--------------------------------------------------------------------------------------------")

        if self.has_continuous_action_space:
            self.action_std = self.action_std - action_std_decay_rate
            self.action_std = round(self.action_std, 4)
            if (self.action_std <= min_action_std):
                self.action_std = min_action_std
                print("setting actor output action_std to min_action_std : ", self.action_std)
            else:
                print("setting actor output action_std to : ", self.action_std)
            self.set_action_std(self.action_std)

        else:
            print("WARNING : Calling PPO::decay_action_std() on discrete action space policy")

        print("--------------------------------------------------------------------------------------------")


    def select_action(self, state, val=False):

        if val:
            state = torch.FloatTensor(state)
            action, action_logprob = self.policy_old.act(state)
            return action.item()

        else:
            with torch.no_grad():
                state = torch.FloatTensor(state)
                action, action_logprob = self.policy_old.act(state)
            
            self.buffer.states.append(state)
            self.buffer.actions.append(action)
            self.buffer.logprobs.append(action_logprob)

            return action.item()


    def update(self):

        # Monte Carlo estimate of returns
        rewards = []
        discounted_reward = 0
        for reward, is_terminal in zip(reversed(self.buffer.rewards), reversed(self.buffer.is_terminals)):
            if is_terminal:
                discounted_reward = 0
            discounted_reward = reward + (self.gamma * discounted_reward)
            rewards.insert(0, discounted_reward)
            
        # Normalizing the rewards
        rewards = torch.tensor(rewards, dtype=torch.float32)
        rewards = (rewards - rewards.mean()) / (rewards.std() + 1e-7)

        # convert list to tensor
        old_states = torch.squeeze(torch.stack(self.buffer.states, dim=0)).detach()
        old_actions = torch.squeeze(torch.stack(self.buffer.actions, dim=0)).detach()
        old_logprobs = torch.squeeze(torch.stack(self.buffer.logprobs, dim=0)).detach()

        
        # Optimize policy for K epochs
        for _ in range(self.K_epochs):

            # Evaluating old actions and values
            logprobs, state_values, dist_entropy = self.policy.evaluate(old_states, old_actions)

            # match state_values tensor dimensions with rewards tensor
            state_values = torch.squeeze(state_values)
            
            # Finding the ratio (pi_theta / pi_theta__old)
            ratios = torch.exp(logprobs - old_logprobs.detach())

            # Finding Surrogate Loss
            advantages = rewards - state_values.detach()   
            surr1 = ratios * advantages
            surr2 = torch.clamp(ratios, 1-self.eps_clip, 1+self.eps_clip) * advantages

            # final loss of clipped objective PPO
            loss = -torch.min(surr1, surr2) + 0.5*self.MseLoss(state_values, rewards) - 0.01*dist_entropy
            
            # take gradient step
            self.optimizer.zero_grad()
            loss.mean().backward()
            self.optimizer.step()
            
        # Copy new weights into old policy
        self.policy_old.load_state_dict(self.policy.state_dict())

        # clear buffer
        self.buffer.clear()
    
    

In [5]:
l16_array = np.array(pd.read_csv('L16.txt',header=None, sep='\t'))
params_array = gen_param_array(l16_array)
params_array

[2e-05, 0.8, 0.0001, 0.0001]
[2e-05, 0.9, 0.005, 0.005]
[2e-05, 0.95, 0.001, 0.001]
[2e-05, 0.99, 0.01, 0.01]
[0.0002, 0.8, 0.005, 0.001]
[0.0002, 0.9, 0.0001, 0.01]
[0.0002, 0.95, 0.01, 0.0001]
[0.0002, 0.99, 0.001, 0.005]
[0.002, 0.8, 0.001, 0.01]
[0.002, 0.9, 0.01, 0.001]
[0.002, 0.95, 0.0001, 0.005]
[0.002, 0.99, 0.005, 0.0001]
[0.02, 0.8, 0.01, 0.005]
[0.02, 0.9, 0.001, 0.0001]
[0.02, 0.95, 0.005, 0.01]
[0.02, 0.99, 0.0001, 0.001]


[[2e-05, 0.8, 0.0001, 0.0001],
 [2e-05, 0.9, 0.005, 0.005],
 [2e-05, 0.95, 0.001, 0.001],
 [2e-05, 0.99, 0.01, 0.01],
 [0.0002, 0.8, 0.005, 0.001],
 [0.0002, 0.9, 0.0001, 0.01],
 [0.0002, 0.95, 0.01, 0.0001],
 [0.0002, 0.99, 0.001, 0.005],
 [0.002, 0.8, 0.001, 0.01],
 [0.002, 0.9, 0.01, 0.001],
 [0.002, 0.95, 0.0001, 0.005],
 [0.002, 0.99, 0.005, 0.0001],
 [0.02, 0.8, 0.01, 0.005],
 [0.02, 0.9, 0.001, 0.0001],
 [0.02, 0.95, 0.005, 0.01],
 [0.02, 0.99, 0.0001, 0.001]]

In [5]:
def running_score_cal(arr,window=50):
    array = np.array(arr)
    df = pd.DataFrame(array[:,1])
    run_score = df.rolling(50,min_periods=1).mean().values.flatten().tolist()
    return run_score[-1]

In [6]:
def evaluate_and_test(folder_name,trial_number,params):
    max_episodes = 40000
    max_ep_len = 100
    update_timestep = max_ep_len * 4      # update policy every n timesteps
    

    machine = Machine()
    machine.curr_state = 0
    env = MachineEnv(machine)

    #Eval Env
    machine2 = Machine()
    machine2.curr_state = 0
    env2 = MachineEnv(machine2)


    gamma = params[1]
    eps_clip = params[0]
    lr_actor = params[2]
    lr_critic = params[3]
    K_epochs = 40
    
    max_score = 0

    state_dim = env.observation_space.shape[0]
    action_dim = env.action_space.n

    ppo_agent = PPO(state_dim, action_dim, lr_actor, lr_critic, gamma, K_epochs, eps_clip, False, None)

    # printing and logging variables
    print_running_reward = 0
    print_running_episodes = 0

    log_running_reward = 0
    log_running_episodes = 0

    time_step = 0
    i_episode = 0

    tracker = []
    eval_tracker = []

    print_freq_ep = 10


    # training loop
    while i_episode <= max_episodes:

        state = env.reset()
        current_ep_reward = 0

        for t in range(1, max_ep_len+1):

            # select action with policy
            action = ppo_agent.select_action(state)
            state, reward, done, _ = env.step(action)

            # saving reward and is_terminals
            ppo_agent.buffer.rewards.append(reward)
            ppo_agent.buffer.is_terminals.append(done)

            time_step +=1
            current_ep_reward += reward

            # update PPO agent
            if time_step % update_timestep == 0:
                ppo_agent.update()

            if done:
                break

        print_running_reward += current_ep_reward
        print_running_episodes += 1

        i_episode += 1

        # printing average reward
        if i_episode % print_freq_ep == 0:

            # print average reward till last episode
            print_avg_reward = print_running_reward / print_running_episodes
            print_avg_reward = round(print_avg_reward, 2)
            tracker.append([i_episode,print_avg_reward])

            eval_score = compute_avg_return(env2,ppo_agent,20)
            eval_tracker.append([i_episode,eval_score])


            print("Episode : {} \t\t Timestep : {} \t\t Average Reward : {} \t\t Eval Score: {}".format(i_episode, time_step, print_avg_reward,eval_score))
            run_ave = np.mean(np.array(tracker)[-50:,1])
            
            if max_score < run_ave and i_episode > 100:
                torch.save(ppo_agent.policy,f'{folder_name}/trial_{trial_number}_ppo_agent.pt')
                max_score = run_ave
            
            print_running_reward = 0
            print_running_episodes = 0

    env.close()
    
    np.savetxt(f'{folder_name}/trial_{trial_number}.txt', tracker, delimiter=',',fmt='%s')
    np.savetxt(f'{folder_name}/eval_trial_{trial_number}.txt', eval_tracker, delimiter=',',fmt='%s')
    torch.save(ppo_agent.policy,f'{folder_name}/trial_{trial_number}_final_ppo_agent.pt')
    
    
    return 0



In [8]:
# folder_name = 'run_1'
# train(params_array,folder_name)

In [7]:
default_params = [2e-05, 0.95, 0.001, 0.001]
best_params = [2e-02,0.95,1e-04,1e-03]

In [8]:
evaluate_and_test('best',1,best_params)



Episode : 10 		 Timestep : 490 		 Average Reward : 21160.0 		 Eval Score: 18860.0
Episode : 20 		 Timestep : 881 		 Average Reward : 15430.0 		 Eval Score: 20775.0
Episode : 30 		 Timestep : 1307 		 Average Reward : 15730.0 		 Eval Score: 18500.0
Episode : 40 		 Timestep : 1638 		 Average Reward : 10620.0 		 Eval Score: 19540.0
Episode : 50 		 Timestep : 2030 		 Average Reward : 14990.0 		 Eval Score: 24355.0
Episode : 60 		 Timestep : 2686 		 Average Reward : 25380.0 		 Eval Score: 18925.0
Episode : 70 		 Timestep : 3057 		 Average Reward : 14910.0 		 Eval Score: 23800.0
Episode : 80 		 Timestep : 3363 		 Average Reward : 11970.0 		 Eval Score: 21390.0
Episode : 90 		 Timestep : 3976 		 Average Reward : 29400.0 		 Eval Score: 25970.0
Episode : 100 		 Timestep : 4369 		 Average Reward : 16200.0 		 Eval Score: 21190.0
Episode : 110 		 Timestep : 4915 		 Average Reward : 23660.0 		 Eval Score: 20925.0
Episode : 120 		 Timestep : 5476 		 Average Reward : 27260.0 		 Eval Score: 21865.0
Epi

Episode : 980 		 Timestep : 42460 		 Average Reward : 24940.0 		 Eval Score: 26000.0
Episode : 990 		 Timestep : 42956 		 Average Reward : 26120.0 		 Eval Score: 23450.0
Episode : 1000 		 Timestep : 43453 		 Average Reward : 25190.0 		 Eval Score: 16705.0
Episode : 1010 		 Timestep : 43729 		 Average Reward : 13140.0 		 Eval Score: 18465.0
Episode : 1020 		 Timestep : 44203 		 Average Reward : 24540.0 		 Eval Score: 20310.0
Episode : 1030 		 Timestep : 44498 		 Average Reward : 12190.0 		 Eval Score: 27680.0
Episode : 1040 		 Timestep : 44925 		 Average Reward : 20790.0 		 Eval Score: 19760.0
Episode : 1050 		 Timestep : 45410 		 Average Reward : 25490.0 		 Eval Score: 22150.0
Episode : 1060 		 Timestep : 45892 		 Average Reward : 23480.0 		 Eval Score: 20170.0
Episode : 1070 		 Timestep : 46376 		 Average Reward : 26070.0 		 Eval Score: 25350.0
Episode : 1080 		 Timestep : 46634 		 Average Reward : 12360.0 		 Eval Score: 21395.0
Episode : 1090 		 Timestep : 47117 		 Average Reward : 2

Episode : 1940 		 Timestep : 82041 		 Average Reward : 19040.0 		 Eval Score: 20510.0
Episode : 1950 		 Timestep : 82614 		 Average Reward : 30880.0 		 Eval Score: 22525.0
Episode : 1960 		 Timestep : 83028 		 Average Reward : 20240.0 		 Eval Score: 23055.0
Episode : 1970 		 Timestep : 83390 		 Average Reward : 17720.0 		 Eval Score: 20575.0
Episode : 1980 		 Timestep : 83820 		 Average Reward : 23970.0 		 Eval Score: 19705.0
Episode : 1990 		 Timestep : 84165 		 Average Reward : 17720.0 		 Eval Score: 25030.0
Episode : 2000 		 Timestep : 84619 		 Average Reward : 22870.0 		 Eval Score: 22905.0
Episode : 2010 		 Timestep : 84980 		 Average Reward : 17070.0 		 Eval Score: 27220.0
Episode : 2020 		 Timestep : 85412 		 Average Reward : 22050.0 		 Eval Score: 23940.0
Episode : 2030 		 Timestep : 85797 		 Average Reward : 19850.0 		 Eval Score: 27820.0
Episode : 2040 		 Timestep : 86178 		 Average Reward : 20230.0 		 Eval Score: 22000.0
Episode : 2050 		 Timestep : 86517 		 Average Reward :

Episode : 2890 		 Timestep : 123093 		 Average Reward : 14260.0 		 Eval Score: 17175.0
Episode : 2900 		 Timestep : 123601 		 Average Reward : 26220.0 		 Eval Score: 25930.0
Episode : 2910 		 Timestep : 123953 		 Average Reward : 17600.0 		 Eval Score: 25525.0
Episode : 2920 		 Timestep : 124386 		 Average Reward : 22450.0 		 Eval Score: 25455.0
Episode : 2930 		 Timestep : 124874 		 Average Reward : 24720.0 		 Eval Score: 30020.0
Episode : 2940 		 Timestep : 125546 		 Average Reward : 36490.0 		 Eval Score: 14925.0
Episode : 2950 		 Timestep : 125998 		 Average Reward : 22610.0 		 Eval Score: 23585.0
Episode : 2960 		 Timestep : 126389 		 Average Reward : 20090.0 		 Eval Score: 21050.0
Episode : 2970 		 Timestep : 126754 		 Average Reward : 18040.0 		 Eval Score: 23770.0
Episode : 2980 		 Timestep : 127280 		 Average Reward : 29380.0 		 Eval Score: 22665.0
Episode : 2990 		 Timestep : 127602 		 Average Reward : 17710.0 		 Eval Score: 23675.0
Episode : 3000 		 Timestep : 128215 		 Aver

Episode : 3840 		 Timestep : 165910 		 Average Reward : 23390.0 		 Eval Score: 26170.0
Episode : 3850 		 Timestep : 166352 		 Average Reward : 21900.0 		 Eval Score: 19560.0
Episode : 3860 		 Timestep : 166716 		 Average Reward : 20270.0 		 Eval Score: 19105.0
Episode : 3870 		 Timestep : 166999 		 Average Reward : 12200.0 		 Eval Score: 23085.0
Episode : 3880 		 Timestep : 167563 		 Average Reward : 30200.0 		 Eval Score: 23320.0
Episode : 3890 		 Timestep : 167822 		 Average Reward : 14690.0 		 Eval Score: 20495.0
Episode : 3900 		 Timestep : 168345 		 Average Reward : 29370.0 		 Eval Score: 35460.0
Episode : 3910 		 Timestep : 168736 		 Average Reward : 20180.0 		 Eval Score: 31050.0
Episode : 3920 		 Timestep : 169156 		 Average Reward : 22730.0 		 Eval Score: 26935.0
Episode : 3930 		 Timestep : 169463 		 Average Reward : 14040.0 		 Eval Score: 27670.0
Episode : 3940 		 Timestep : 170059 		 Average Reward : 33170.0 		 Eval Score: 11035.0
Episode : 3950 		 Timestep : 170634 		 Aver

Episode : 4790 		 Timestep : 208087 		 Average Reward : 22560.0 		 Eval Score: 28655.0
Episode : 4800 		 Timestep : 208355 		 Average Reward : 13840.0 		 Eval Score: 20100.0
Episode : 4810 		 Timestep : 208778 		 Average Reward : 20660.0 		 Eval Score: 22680.0
Episode : 4820 		 Timestep : 209304 		 Average Reward : 30760.0 		 Eval Score: 24535.0
Episode : 4830 		 Timestep : 209745 		 Average Reward : 23480.0 		 Eval Score: 22530.0
Episode : 4840 		 Timestep : 210215 		 Average Reward : 23870.0 		 Eval Score: 26615.0
Episode : 4850 		 Timestep : 210751 		 Average Reward : 27660.0 		 Eval Score: 23120.0
Episode : 4860 		 Timestep : 211208 		 Average Reward : 22940.0 		 Eval Score: 22340.0
Episode : 4870 		 Timestep : 211456 		 Average Reward : 12010.0 		 Eval Score: 19640.0
Episode : 4880 		 Timestep : 211950 		 Average Reward : 27960.0 		 Eval Score: 23010.0
Episode : 4890 		 Timestep : 212301 		 Average Reward : 18750.0 		 Eval Score: 25275.0
Episode : 4900 		 Timestep : 212878 		 Aver

Episode : 5740 		 Timestep : 250672 		 Average Reward : 30850.0 		 Eval Score: 24660.0
Episode : 5750 		 Timestep : 251257 		 Average Reward : 32950.0 		 Eval Score: 20500.0
Episode : 5760 		 Timestep : 251846 		 Average Reward : 34260.0 		 Eval Score: 21295.0
Episode : 5770 		 Timestep : 252082 		 Average Reward : 12120.0 		 Eval Score: 20870.0
Episode : 5780 		 Timestep : 252467 		 Average Reward : 20620.0 		 Eval Score: 23800.0
Episode : 5790 		 Timestep : 252732 		 Average Reward : 13160.0 		 Eval Score: 24895.0
Episode : 5800 		 Timestep : 253225 		 Average Reward : 26980.0 		 Eval Score: 21685.0
Episode : 5810 		 Timestep : 253706 		 Average Reward : 24780.0 		 Eval Score: 19540.0
Episode : 5820 		 Timestep : 254220 		 Average Reward : 28040.0 		 Eval Score: 25915.0
Episode : 5830 		 Timestep : 254767 		 Average Reward : 28810.0 		 Eval Score: 23860.0
Episode : 5840 		 Timestep : 255254 		 Average Reward : 25030.0 		 Eval Score: 21635.0
Episode : 5850 		 Timestep : 255799 		 Aver

Episode : 6690 		 Timestep : 294389 		 Average Reward : 25770.0 		 Eval Score: 29435.0
Episode : 6700 		 Timestep : 294797 		 Average Reward : 22040.0 		 Eval Score: 32450.0
Episode : 6710 		 Timestep : 295223 		 Average Reward : 21870.0 		 Eval Score: 22845.0
Episode : 6720 		 Timestep : 295649 		 Average Reward : 23270.0 		 Eval Score: 22950.0
Episode : 6730 		 Timestep : 296093 		 Average Reward : 25840.0 		 Eval Score: 22155.0
Episode : 6740 		 Timestep : 296523 		 Average Reward : 23560.0 		 Eval Score: 17445.0
Episode : 6750 		 Timestep : 297058 		 Average Reward : 30650.0 		 Eval Score: 29415.0
Episode : 6760 		 Timestep : 297401 		 Average Reward : 20020.0 		 Eval Score: 26640.0
Episode : 6770 		 Timestep : 297711 		 Average Reward : 17000.0 		 Eval Score: 20625.0
Episode : 6780 		 Timestep : 298169 		 Average Reward : 24230.0 		 Eval Score: 37600.0
Episode : 6790 		 Timestep : 298777 		 Average Reward : 34540.0 		 Eval Score: 20710.0
Episode : 6800 		 Timestep : 299363 		 Aver

Episode : 7640 		 Timestep : 334725 		 Average Reward : 27450.0 		 Eval Score: 20860.0
Episode : 7650 		 Timestep : 335392 		 Average Reward : 34910.0 		 Eval Score: 21975.0
Episode : 7660 		 Timestep : 335594 		 Average Reward : 10570.0 		 Eval Score: 26085.0
Episode : 7670 		 Timestep : 336034 		 Average Reward : 22890.0 		 Eval Score: 15340.0
Episode : 7680 		 Timestep : 336411 		 Average Reward : 19630.0 		 Eval Score: 22640.0
Episode : 7690 		 Timestep : 336939 		 Average Reward : 28600.0 		 Eval Score: 30730.0
Episode : 7700 		 Timestep : 337366 		 Average Reward : 21750.0 		 Eval Score: 23940.0
Episode : 7710 		 Timestep : 337874 		 Average Reward : 27660.0 		 Eval Score: 26490.0
Episode : 7720 		 Timestep : 338329 		 Average Reward : 23200.0 		 Eval Score: 24020.0
Episode : 7730 		 Timestep : 338708 		 Average Reward : 17010.0 		 Eval Score: 19210.0
Episode : 7740 		 Timestep : 339164 		 Average Reward : 24060.0 		 Eval Score: 30545.0
Episode : 7750 		 Timestep : 339759 		 Aver

Episode : 8590 		 Timestep : 378123 		 Average Reward : 17250.0 		 Eval Score: 22425.0
Episode : 8600 		 Timestep : 378603 		 Average Reward : 26690.0 		 Eval Score: 34745.0
Episode : 8610 		 Timestep : 379376 		 Average Reward : 42170.0 		 Eval Score: 23015.0
Episode : 8620 		 Timestep : 379919 		 Average Reward : 29700.0 		 Eval Score: 29035.0
Episode : 8630 		 Timestep : 380383 		 Average Reward : 25650.0 		 Eval Score: 29940.0
Episode : 8640 		 Timestep : 380944 		 Average Reward : 30550.0 		 Eval Score: 24265.0
Episode : 8650 		 Timestep : 381319 		 Average Reward : 19700.0 		 Eval Score: 22335.0
Episode : 8660 		 Timestep : 381892 		 Average Reward : 29150.0 		 Eval Score: 18485.0
Episode : 8670 		 Timestep : 382257 		 Average Reward : 18440.0 		 Eval Score: 32840.0
Episode : 8680 		 Timestep : 382930 		 Average Reward : 36620.0 		 Eval Score: 27190.0
Episode : 8690 		 Timestep : 383308 		 Average Reward : 21950.0 		 Eval Score: 19425.0
Episode : 8700 		 Timestep : 383674 		 Aver

Episode : 9540 		 Timestep : 422623 		 Average Reward : 9710.0 		 Eval Score: 21885.0
Episode : 9550 		 Timestep : 423007 		 Average Reward : 20850.0 		 Eval Score: 22605.0
Episode : 9560 		 Timestep : 423569 		 Average Reward : 30820.0 		 Eval Score: 23805.0
Episode : 9570 		 Timestep : 423966 		 Average Reward : 20720.0 		 Eval Score: 28395.0
Episode : 9580 		 Timestep : 424525 		 Average Reward : 32020.0 		 Eval Score: 17380.0
Episode : 9590 		 Timestep : 424859 		 Average Reward : 17320.0 		 Eval Score: 22410.0
Episode : 9600 		 Timestep : 425341 		 Average Reward : 26730.0 		 Eval Score: 18945.0
Episode : 9610 		 Timestep : 425808 		 Average Reward : 25940.0 		 Eval Score: 25155.0
Episode : 9620 		 Timestep : 426189 		 Average Reward : 21960.0 		 Eval Score: 21895.0
Episode : 9630 		 Timestep : 426453 		 Average Reward : 13260.0 		 Eval Score: 21555.0
Episode : 9640 		 Timestep : 426800 		 Average Reward : 17130.0 		 Eval Score: 32350.0
Episode : 9650 		 Timestep : 427236 		 Avera

Episode : 10480 		 Timestep : 467106 		 Average Reward : 28270.0 		 Eval Score: 20155.0
Episode : 10490 		 Timestep : 467755 		 Average Reward : 37890.0 		 Eval Score: 26965.0
Episode : 10500 		 Timestep : 468161 		 Average Reward : 21380.0 		 Eval Score: 23150.0
Episode : 10510 		 Timestep : 468703 		 Average Reward : 28330.0 		 Eval Score: 22120.0
Episode : 10520 		 Timestep : 469231 		 Average Reward : 28030.0 		 Eval Score: 17835.0
Episode : 10530 		 Timestep : 469740 		 Average Reward : 28700.0 		 Eval Score: 30180.0
Episode : 10540 		 Timestep : 470220 		 Average Reward : 28160.0 		 Eval Score: 24260.0
Episode : 10550 		 Timestep : 470773 		 Average Reward : 29780.0 		 Eval Score: 34945.0
Episode : 10560 		 Timestep : 471088 		 Average Reward : 16080.0 		 Eval Score: 24690.0
Episode : 10570 		 Timestep : 471533 		 Average Reward : 25040.0 		 Eval Score: 23910.0
Episode : 10580 		 Timestep : 471909 		 Average Reward : 17660.0 		 Eval Score: 18080.0
Episode : 10590 		 Timestep : 47

Episode : 11420 		 Timestep : 508888 		 Average Reward : 27860.0 		 Eval Score: 24020.0
Episode : 11430 		 Timestep : 509480 		 Average Reward : 33740.0 		 Eval Score: 35465.0
Episode : 11440 		 Timestep : 510141 		 Average Reward : 36440.0 		 Eval Score: 27865.0
Episode : 11450 		 Timestep : 510590 		 Average Reward : 23940.0 		 Eval Score: 22150.0
Episode : 11460 		 Timestep : 510999 		 Average Reward : 20880.0 		 Eval Score: 31085.0
Episode : 11470 		 Timestep : 511503 		 Average Reward : 26290.0 		 Eval Score: 23515.0
Episode : 11480 		 Timestep : 512055 		 Average Reward : 29180.0 		 Eval Score: 22360.0
Episode : 11490 		 Timestep : 512552 		 Average Reward : 28190.0 		 Eval Score: 35255.0
Episode : 11500 		 Timestep : 512826 		 Average Reward : 14900.0 		 Eval Score: 20660.0
Episode : 11510 		 Timestep : 513187 		 Average Reward : 17520.0 		 Eval Score: 24405.0
Episode : 11520 		 Timestep : 513551 		 Average Reward : 18640.0 		 Eval Score: 21860.0
Episode : 11530 		 Timestep : 51

Episode : 12360 		 Timestep : 552936 		 Average Reward : 26850.0 		 Eval Score: 21680.0
Episode : 12370 		 Timestep : 553377 		 Average Reward : 22160.0 		 Eval Score: 24035.0
Episode : 12380 		 Timestep : 554012 		 Average Reward : 35930.0 		 Eval Score: 29375.0
Episode : 12390 		 Timestep : 554480 		 Average Reward : 25880.0 		 Eval Score: 19595.0
Episode : 12400 		 Timestep : 554977 		 Average Reward : 26730.0 		 Eval Score: 28000.0
Episode : 12410 		 Timestep : 555513 		 Average Reward : 29390.0 		 Eval Score: 22385.0
Episode : 12420 		 Timestep : 555828 		 Average Reward : 17790.0 		 Eval Score: 21460.0
Episode : 12430 		 Timestep : 556110 		 Average Reward : 15280.0 		 Eval Score: 27075.0
Episode : 12440 		 Timestep : 556557 		 Average Reward : 22240.0 		 Eval Score: 28345.0
Episode : 12450 		 Timestep : 556903 		 Average Reward : 19730.0 		 Eval Score: 20025.0
Episode : 12460 		 Timestep : 557179 		 Average Reward : 13850.0 		 Eval Score: 38835.0
Episode : 12470 		 Timestep : 55

Episode : 13300 		 Timestep : 595774 		 Average Reward : 30500.0 		 Eval Score: 24195.0
Episode : 13310 		 Timestep : 596347 		 Average Reward : 31210.0 		 Eval Score: 26110.0
Episode : 13320 		 Timestep : 596810 		 Average Reward : 26390.0 		 Eval Score: 21850.0
Episode : 13330 		 Timestep : 597284 		 Average Reward : 27800.0 		 Eval Score: 23330.0
Episode : 13340 		 Timestep : 597651 		 Average Reward : 19270.0 		 Eval Score: 24180.0
Episode : 13350 		 Timestep : 598088 		 Average Reward : 23180.0 		 Eval Score: 29655.0
Episode : 13360 		 Timestep : 598339 		 Average Reward : 11570.0 		 Eval Score: 27410.0
Episode : 13370 		 Timestep : 598847 		 Average Reward : 26820.0 		 Eval Score: 21835.0
Episode : 13380 		 Timestep : 599297 		 Average Reward : 23430.0 		 Eval Score: 27865.0
Episode : 13390 		 Timestep : 599679 		 Average Reward : 21910.0 		 Eval Score: 27375.0
Episode : 13400 		 Timestep : 600038 		 Average Reward : 18620.0 		 Eval Score: 31870.0
Episode : 13410 		 Timestep : 60

Episode : 14240 		 Timestep : 638543 		 Average Reward : 21330.0 		 Eval Score: 26630.0
Episode : 14250 		 Timestep : 639176 		 Average Reward : 33800.0 		 Eval Score: 29010.0
Episode : 14260 		 Timestep : 639722 		 Average Reward : 28350.0 		 Eval Score: 23355.0
Episode : 14270 		 Timestep : 640293 		 Average Reward : 30510.0 		 Eval Score: 33105.0
Episode : 14280 		 Timestep : 640700 		 Average Reward : 22490.0 		 Eval Score: 19050.0
Episode : 14290 		 Timestep : 641004 		 Average Reward : 14990.0 		 Eval Score: 19070.0
Episode : 14300 		 Timestep : 641285 		 Average Reward : 13650.0 		 Eval Score: 21480.0
Episode : 14310 		 Timestep : 641830 		 Average Reward : 30050.0 		 Eval Score: 28230.0
Episode : 14320 		 Timestep : 642234 		 Average Reward : 22200.0 		 Eval Score: 18965.0
Episode : 14330 		 Timestep : 642796 		 Average Reward : 30160.0 		 Eval Score: 18630.0
Episode : 14340 		 Timestep : 643081 		 Average Reward : 14060.0 		 Eval Score: 19835.0
Episode : 14350 		 Timestep : 64

Episode : 15180 		 Timestep : 680585 		 Average Reward : 30210.0 		 Eval Score: 21715.0
Episode : 15190 		 Timestep : 681190 		 Average Reward : 35310.0 		 Eval Score: 22370.0
Episode : 15200 		 Timestep : 681785 		 Average Reward : 31020.0 		 Eval Score: 24190.0
Episode : 15210 		 Timestep : 682084 		 Average Reward : 14980.0 		 Eval Score: 18625.0
Episode : 15220 		 Timestep : 682659 		 Average Reward : 32010.0 		 Eval Score: 21970.0
Episode : 15230 		 Timestep : 683105 		 Average Reward : 22650.0 		 Eval Score: 24950.0
Episode : 15240 		 Timestep : 683464 		 Average Reward : 17350.0 		 Eval Score: 26150.0
Episode : 15250 		 Timestep : 683821 		 Average Reward : 17450.0 		 Eval Score: 22975.0
Episode : 15260 		 Timestep : 684371 		 Average Reward : 30100.0 		 Eval Score: 22915.0
Episode : 15270 		 Timestep : 684753 		 Average Reward : 18780.0 		 Eval Score: 25975.0
Episode : 15280 		 Timestep : 685019 		 Average Reward : 13570.0 		 Eval Score: 27970.0
Episode : 15290 		 Timestep : 68

Episode : 16120 		 Timestep : 723181 		 Average Reward : 23510.0 		 Eval Score: 22640.0
Episode : 16130 		 Timestep : 723773 		 Average Reward : 35220.0 		 Eval Score: 28575.0
Episode : 16140 		 Timestep : 724291 		 Average Reward : 30430.0 		 Eval Score: 18765.0
Episode : 16150 		 Timestep : 724637 		 Average Reward : 18980.0 		 Eval Score: 27455.0
Episode : 16160 		 Timestep : 725047 		 Average Reward : 22860.0 		 Eval Score: 28550.0
Episode : 16170 		 Timestep : 725396 		 Average Reward : 19150.0 		 Eval Score: 22955.0
Episode : 16180 		 Timestep : 725850 		 Average Reward : 26510.0 		 Eval Score: 25240.0
Episode : 16190 		 Timestep : 726211 		 Average Reward : 17030.0 		 Eval Score: 25240.0
Episode : 16200 		 Timestep : 726624 		 Average Reward : 23270.0 		 Eval Score: 32630.0
Episode : 16210 		 Timestep : 727040 		 Average Reward : 23030.0 		 Eval Score: 27300.0
Episode : 16220 		 Timestep : 727605 		 Average Reward : 30610.0 		 Eval Score: 18545.0
Episode : 16230 		 Timestep : 72

Episode : 17060 		 Timestep : 767520 		 Average Reward : 8090.0 		 Eval Score: 20200.0
Episode : 17070 		 Timestep : 768102 		 Average Reward : 31330.0 		 Eval Score: 28860.0
Episode : 17080 		 Timestep : 768437 		 Average Reward : 18660.0 		 Eval Score: 26825.0
Episode : 17090 		 Timestep : 768820 		 Average Reward : 19840.0 		 Eval Score: 32965.0
Episode : 17100 		 Timestep : 769105 		 Average Reward : 14010.0 		 Eval Score: 25615.0
Episode : 17110 		 Timestep : 769781 		 Average Reward : 38160.0 		 Eval Score: 28075.0
Episode : 17120 		 Timestep : 770203 		 Average Reward : 20350.0 		 Eval Score: 21005.0
Episode : 17130 		 Timestep : 770625 		 Average Reward : 24340.0 		 Eval Score: 28755.0
Episode : 17140 		 Timestep : 771184 		 Average Reward : 30490.0 		 Eval Score: 27055.0
Episode : 17150 		 Timestep : 771711 		 Average Reward : 31220.0 		 Eval Score: 32475.0
Episode : 17160 		 Timestep : 772308 		 Average Reward : 33940.0 		 Eval Score: 26035.0
Episode : 17170 		 Timestep : 772

Episode : 18000 		 Timestep : 811392 		 Average Reward : 24120.0 		 Eval Score: 27325.0
Episode : 18010 		 Timestep : 811804 		 Average Reward : 22660.0 		 Eval Score: 29990.0
Episode : 18020 		 Timestep : 812091 		 Average Reward : 15310.0 		 Eval Score: 30270.0
Episode : 18030 		 Timestep : 812676 		 Average Reward : 32660.0 		 Eval Score: 15815.0
Episode : 18040 		 Timestep : 813224 		 Average Reward : 30510.0 		 Eval Score: 28085.0
Episode : 18050 		 Timestep : 813732 		 Average Reward : 27060.0 		 Eval Score: 25070.0
Episode : 18060 		 Timestep : 814194 		 Average Reward : 23990.0 		 Eval Score: 16405.0
Episode : 18070 		 Timestep : 814678 		 Average Reward : 26320.0 		 Eval Score: 21515.0
Episode : 18080 		 Timestep : 815043 		 Average Reward : 18990.0 		 Eval Score: 23355.0
Episode : 18090 		 Timestep : 815470 		 Average Reward : 21270.0 		 Eval Score: 24075.0
Episode : 18100 		 Timestep : 815865 		 Average Reward : 20210.0 		 Eval Score: 27260.0
Episode : 18110 		 Timestep : 81

Episode : 18940 		 Timestep : 854168 		 Average Reward : 28850.0 		 Eval Score: 31740.0
Episode : 18950 		 Timestep : 854378 		 Average Reward : 7930.0 		 Eval Score: 20225.0
Episode : 18960 		 Timestep : 854749 		 Average Reward : 19700.0 		 Eval Score: 26175.0
Episode : 18970 		 Timestep : 855111 		 Average Reward : 19440.0 		 Eval Score: 24535.0
Episode : 18980 		 Timestep : 855795 		 Average Reward : 39660.0 		 Eval Score: 19630.0
Episode : 18990 		 Timestep : 856301 		 Average Reward : 29120.0 		 Eval Score: 22820.0
Episode : 19000 		 Timestep : 856697 		 Average Reward : 20910.0 		 Eval Score: 24890.0
Episode : 19010 		 Timestep : 857319 		 Average Reward : 33550.0 		 Eval Score: 29915.0
Episode : 19020 		 Timestep : 857920 		 Average Reward : 33290.0 		 Eval Score: 20890.0
Episode : 19030 		 Timestep : 858278 		 Average Reward : 19000.0 		 Eval Score: 20100.0
Episode : 19040 		 Timestep : 858773 		 Average Reward : 24760.0 		 Eval Score: 17845.0
Episode : 19050 		 Timestep : 859

Episode : 19880 		 Timestep : 897268 		 Average Reward : 32960.0 		 Eval Score: 19455.0
Episode : 19890 		 Timestep : 897801 		 Average Reward : 29110.0 		 Eval Score: 23310.0
Episode : 19900 		 Timestep : 898012 		 Average Reward : 11170.0 		 Eval Score: 20045.0
Episode : 19910 		 Timestep : 898583 		 Average Reward : 29980.0 		 Eval Score: 29820.0
Episode : 19920 		 Timestep : 899205 		 Average Reward : 35830.0 		 Eval Score: 23495.0
Episode : 19930 		 Timestep : 899433 		 Average Reward : 13630.0 		 Eval Score: 26510.0
Episode : 19940 		 Timestep : 899974 		 Average Reward : 29370.0 		 Eval Score: 29860.0
Episode : 19950 		 Timestep : 900575 		 Average Reward : 32590.0 		 Eval Score: 19975.0
Episode : 19960 		 Timestep : 901172 		 Average Reward : 34610.0 		 Eval Score: 21975.0
Episode : 19970 		 Timestep : 901712 		 Average Reward : 30440.0 		 Eval Score: 20785.0
Episode : 19980 		 Timestep : 902117 		 Average Reward : 22240.0 		 Eval Score: 30080.0
Episode : 19990 		 Timestep : 90

Episode : 20820 		 Timestep : 939471 		 Average Reward : 21750.0 		 Eval Score: 20030.0
Episode : 20830 		 Timestep : 939834 		 Average Reward : 19330.0 		 Eval Score: 23880.0
Episode : 20840 		 Timestep : 940158 		 Average Reward : 16350.0 		 Eval Score: 37595.0
Episode : 20850 		 Timestep : 940428 		 Average Reward : 11960.0 		 Eval Score: 22520.0
Episode : 20860 		 Timestep : 940849 		 Average Reward : 22880.0 		 Eval Score: 19180.0
Episode : 20870 		 Timestep : 941216 		 Average Reward : 19990.0 		 Eval Score: 23700.0
Episode : 20880 		 Timestep : 941992 		 Average Reward : 42480.0 		 Eval Score: 27430.0
Episode : 20890 		 Timestep : 942450 		 Average Reward : 24060.0 		 Eval Score: 24825.0
Episode : 20900 		 Timestep : 942803 		 Average Reward : 17080.0 		 Eval Score: 19035.0
Episode : 20910 		 Timestep : 943235 		 Average Reward : 21980.0 		 Eval Score: 21875.0
Episode : 20920 		 Timestep : 943549 		 Average Reward : 15970.0 		 Eval Score: 24625.0
Episode : 20930 		 Timestep : 94

Episode : 21760 		 Timestep : 981957 		 Average Reward : 26820.0 		 Eval Score: 28250.0
Episode : 21770 		 Timestep : 982428 		 Average Reward : 26760.0 		 Eval Score: 25315.0
Episode : 21780 		 Timestep : 982814 		 Average Reward : 20640.0 		 Eval Score: 23890.0
Episode : 21790 		 Timestep : 983267 		 Average Reward : 26690.0 		 Eval Score: 19225.0
Episode : 21800 		 Timestep : 983764 		 Average Reward : 27510.0 		 Eval Score: 30565.0
Episode : 21810 		 Timestep : 984283 		 Average Reward : 29710.0 		 Eval Score: 29255.0
Episode : 21820 		 Timestep : 984768 		 Average Reward : 28090.0 		 Eval Score: 34340.0
Episode : 21830 		 Timestep : 985253 		 Average Reward : 26830.0 		 Eval Score: 23135.0
Episode : 21840 		 Timestep : 985876 		 Average Reward : 35540.0 		 Eval Score: 23395.0
Episode : 21850 		 Timestep : 986266 		 Average Reward : 22110.0 		 Eval Score: 24850.0
Episode : 21860 		 Timestep : 986747 		 Average Reward : 27710.0 		 Eval Score: 21380.0
Episode : 21870 		 Timestep : 98

Episode : 22690 		 Timestep : 1024769 		 Average Reward : 14790.0 		 Eval Score: 26590.0
Episode : 22700 		 Timestep : 1025237 		 Average Reward : 25590.0 		 Eval Score: 22510.0
Episode : 22710 		 Timestep : 1025685 		 Average Reward : 23460.0 		 Eval Score: 32480.0
Episode : 22720 		 Timestep : 1026227 		 Average Reward : 28100.0 		 Eval Score: 25305.0
Episode : 22730 		 Timestep : 1026670 		 Average Reward : 24490.0 		 Eval Score: 25395.0
Episode : 22740 		 Timestep : 1027124 		 Average Reward : 24720.0 		 Eval Score: 29915.0
Episode : 22750 		 Timestep : 1027782 		 Average Reward : 37210.0 		 Eval Score: 27685.0
Episode : 22760 		 Timestep : 1028182 		 Average Reward : 19790.0 		 Eval Score: 21620.0
Episode : 22770 		 Timestep : 1028589 		 Average Reward : 23700.0 		 Eval Score: 24295.0
Episode : 22780 		 Timestep : 1029082 		 Average Reward : 27650.0 		 Eval Score: 17755.0
Episode : 22790 		 Timestep : 1029405 		 Average Reward : 16180.0 		 Eval Score: 23915.0
Episode : 22800 		 Ti

Episode : 23620 		 Timestep : 1069532 		 Average Reward : 40090.0 		 Eval Score: 17635.0
Episode : 23630 		 Timestep : 1069795 		 Average Reward : 13460.0 		 Eval Score: 21245.0
Episode : 23640 		 Timestep : 1070172 		 Average Reward : 19500.0 		 Eval Score: 26190.0
Episode : 23650 		 Timestep : 1070748 		 Average Reward : 32340.0 		 Eval Score: 38110.0
Episode : 23660 		 Timestep : 1071212 		 Average Reward : 24060.0 		 Eval Score: 31950.0
Episode : 23670 		 Timestep : 1071663 		 Average Reward : 22890.0 		 Eval Score: 23020.0
Episode : 23680 		 Timestep : 1071927 		 Average Reward : 13480.0 		 Eval Score: 27560.0
Episode : 23690 		 Timestep : 1072372 		 Average Reward : 23370.0 		 Eval Score: 32335.0
Episode : 23700 		 Timestep : 1072671 		 Average Reward : 14540.0 		 Eval Score: 24005.0
Episode : 23710 		 Timestep : 1073128 		 Average Reward : 22460.0 		 Eval Score: 30455.0
Episode : 23720 		 Timestep : 1073659 		 Average Reward : 32320.0 		 Eval Score: 20900.0
Episode : 23730 		 Ti

Episode : 24550 		 Timestep : 1112953 		 Average Reward : 29810.0 		 Eval Score: 26600.0
Episode : 24560 		 Timestep : 1113502 		 Average Reward : 30260.0 		 Eval Score: 29260.0
Episode : 24570 		 Timestep : 1114108 		 Average Reward : 33070.0 		 Eval Score: 24940.0
Episode : 24580 		 Timestep : 1114672 		 Average Reward : 31440.0 		 Eval Score: 26255.0
Episode : 24590 		 Timestep : 1114880 		 Average Reward : 11490.0 		 Eval Score: 21465.0
Episode : 24600 		 Timestep : 1115418 		 Average Reward : 28060.0 		 Eval Score: 29655.0
Episode : 24610 		 Timestep : 1115856 		 Average Reward : 23470.0 		 Eval Score: 27435.0
Episode : 24620 		 Timestep : 1116174 		 Average Reward : 17690.0 		 Eval Score: 23350.0
Episode : 24630 		 Timestep : 1116888 		 Average Reward : 40400.0 		 Eval Score: 31440.0
Episode : 24640 		 Timestep : 1117308 		 Average Reward : 21190.0 		 Eval Score: 27925.0
Episode : 24650 		 Timestep : 1117676 		 Average Reward : 16630.0 		 Eval Score: 22355.0
Episode : 24660 		 Ti

Episode : 25480 		 Timestep : 1158823 		 Average Reward : 24130.0 		 Eval Score: 16480.0
Episode : 25490 		 Timestep : 1159193 		 Average Reward : 19230.0 		 Eval Score: 33385.0
Episode : 25500 		 Timestep : 1159588 		 Average Reward : 21150.0 		 Eval Score: 22630.0
Episode : 25510 		 Timestep : 1160081 		 Average Reward : 28200.0 		 Eval Score: 30195.0
Episode : 25520 		 Timestep : 1160639 		 Average Reward : 30710.0 		 Eval Score: 28745.0
Episode : 25530 		 Timestep : 1161187 		 Average Reward : 29760.0 		 Eval Score: 19090.0
Episode : 25540 		 Timestep : 1161672 		 Average Reward : 25650.0 		 Eval Score: 25395.0
Episode : 25550 		 Timestep : 1162342 		 Average Reward : 34770.0 		 Eval Score: 30795.0
Episode : 25560 		 Timestep : 1162741 		 Average Reward : 21300.0 		 Eval Score: 21975.0
Episode : 25570 		 Timestep : 1163146 		 Average Reward : 23120.0 		 Eval Score: 24885.0
Episode : 25580 		 Timestep : 1163521 		 Average Reward : 19800.0 		 Eval Score: 19855.0
Episode : 25590 		 Ti

Episode : 26410 		 Timestep : 1200985 		 Average Reward : 27670.0 		 Eval Score: 20600.0
Episode : 26420 		 Timestep : 1201317 		 Average Reward : 17040.0 		 Eval Score: 17980.0
Episode : 26430 		 Timestep : 1201828 		 Average Reward : 29210.0 		 Eval Score: 27230.0
Episode : 26440 		 Timestep : 1202310 		 Average Reward : 26870.0 		 Eval Score: 27905.0
Episode : 26450 		 Timestep : 1202818 		 Average Reward : 28030.0 		 Eval Score: 29930.0
Episode : 26460 		 Timestep : 1203430 		 Average Reward : 33620.0 		 Eval Score: 27030.0
Episode : 26470 		 Timestep : 1203980 		 Average Reward : 31000.0 		 Eval Score: 23785.0
Episode : 26480 		 Timestep : 1204432 		 Average Reward : 24610.0 		 Eval Score: 24145.0
Episode : 26490 		 Timestep : 1204923 		 Average Reward : 25450.0 		 Eval Score: 25855.0
Episode : 26500 		 Timestep : 1205398 		 Average Reward : 25800.0 		 Eval Score: 27205.0
Episode : 26510 		 Timestep : 1205891 		 Average Reward : 24640.0 		 Eval Score: 33760.0
Episode : 26520 		 Ti

Episode : 27340 		 Timestep : 1244081 		 Average Reward : 32860.0 		 Eval Score: 25630.0
Episode : 27350 		 Timestep : 1244668 		 Average Reward : 33620.0 		 Eval Score: 28120.0
Episode : 27360 		 Timestep : 1245096 		 Average Reward : 23830.0 		 Eval Score: 28420.0
Episode : 27370 		 Timestep : 1245532 		 Average Reward : 23650.0 		 Eval Score: 27395.0
Episode : 27380 		 Timestep : 1245958 		 Average Reward : 24550.0 		 Eval Score: 25200.0
Episode : 27390 		 Timestep : 1246673 		 Average Reward : 38680.0 		 Eval Score: 22105.0
Episode : 27400 		 Timestep : 1247186 		 Average Reward : 29830.0 		 Eval Score: 21410.0
Episode : 27410 		 Timestep : 1247674 		 Average Reward : 26990.0 		 Eval Score: 20985.0
Episode : 27420 		 Timestep : 1248187 		 Average Reward : 29130.0 		 Eval Score: 22105.0
Episode : 27430 		 Timestep : 1248656 		 Average Reward : 25720.0 		 Eval Score: 20490.0
Episode : 27440 		 Timestep : 1249121 		 Average Reward : 24130.0 		 Eval Score: 33830.0
Episode : 27450 		 Ti

Episode : 28270 		 Timestep : 1286388 		 Average Reward : 26780.0 		 Eval Score: 24040.0
Episode : 28280 		 Timestep : 1286860 		 Average Reward : 23270.0 		 Eval Score: 23390.0
Episode : 28290 		 Timestep : 1287478 		 Average Reward : 37760.0 		 Eval Score: 22730.0
Episode : 28300 		 Timestep : 1287963 		 Average Reward : 30560.0 		 Eval Score: 20565.0
Episode : 28310 		 Timestep : 1288481 		 Average Reward : 26940.0 		 Eval Score: 25985.0
Episode : 28320 		 Timestep : 1288961 		 Average Reward : 25670.0 		 Eval Score: 19705.0
Episode : 28330 		 Timestep : 1289359 		 Average Reward : 23310.0 		 Eval Score: 21665.0
Episode : 28340 		 Timestep : 1289764 		 Average Reward : 22870.0 		 Eval Score: 27350.0
Episode : 28350 		 Timestep : 1290199 		 Average Reward : 22830.0 		 Eval Score: 22335.0
Episode : 28360 		 Timestep : 1290655 		 Average Reward : 25770.0 		 Eval Score: 25055.0
Episode : 28370 		 Timestep : 1291074 		 Average Reward : 23880.0 		 Eval Score: 26510.0
Episode : 28380 		 Ti

Episode : 29200 		 Timestep : 1327805 		 Average Reward : 24420.0 		 Eval Score: 29670.0
Episode : 29210 		 Timestep : 1328183 		 Average Reward : 18800.0 		 Eval Score: 19070.0
Episode : 29220 		 Timestep : 1328799 		 Average Reward : 33500.0 		 Eval Score: 30680.0
Episode : 29230 		 Timestep : 1329313 		 Average Reward : 26960.0 		 Eval Score: 24385.0
Episode : 29240 		 Timestep : 1329696 		 Average Reward : 20840.0 		 Eval Score: 27795.0
Episode : 29250 		 Timestep : 1330154 		 Average Reward : 26510.0 		 Eval Score: 24665.0
Episode : 29260 		 Timestep : 1330617 		 Average Reward : 27820.0 		 Eval Score: 24985.0
Episode : 29270 		 Timestep : 1330932 		 Average Reward : 16710.0 		 Eval Score: 24575.0
Episode : 29280 		 Timestep : 1331221 		 Average Reward : 17080.0 		 Eval Score: 31790.0
Episode : 29290 		 Timestep : 1331754 		 Average Reward : 28330.0 		 Eval Score: 34520.0
Episode : 29300 		 Timestep : 1332287 		 Average Reward : 29160.0 		 Eval Score: 19925.0
Episode : 29310 		 Ti

Episode : 30130 		 Timestep : 1369040 		 Average Reward : 12020.0 		 Eval Score: 19575.0
Episode : 30140 		 Timestep : 1369776 		 Average Reward : 42100.0 		 Eval Score: 25785.0
Episode : 30150 		 Timestep : 1370248 		 Average Reward : 26700.0 		 Eval Score: 17265.0
Episode : 30160 		 Timestep : 1370708 		 Average Reward : 25610.0 		 Eval Score: 20555.0
Episode : 30170 		 Timestep : 1371165 		 Average Reward : 27040.0 		 Eval Score: 15795.0
Episode : 30180 		 Timestep : 1371662 		 Average Reward : 27830.0 		 Eval Score: 27080.0
Episode : 30190 		 Timestep : 1372314 		 Average Reward : 34790.0 		 Eval Score: 23120.0
Episode : 30200 		 Timestep : 1372633 		 Average Reward : 17250.0 		 Eval Score: 24610.0
Episode : 30210 		 Timestep : 1372948 		 Average Reward : 17290.0 		 Eval Score: 22220.0
Episode : 30220 		 Timestep : 1373467 		 Average Reward : 29640.0 		 Eval Score: 27550.0
Episode : 30230 		 Timestep : 1374040 		 Average Reward : 31350.0 		 Eval Score: 25350.0
Episode : 30240 		 Ti

Episode : 31060 		 Timestep : 1413156 		 Average Reward : 25100.0 		 Eval Score: 25445.0
Episode : 31070 		 Timestep : 1413470 		 Average Reward : 16380.0 		 Eval Score: 24145.0
Episode : 31080 		 Timestep : 1413936 		 Average Reward : 27780.0 		 Eval Score: 22225.0
Episode : 31090 		 Timestep : 1414389 		 Average Reward : 26100.0 		 Eval Score: 30210.0
Episode : 31100 		 Timestep : 1414764 		 Average Reward : 20660.0 		 Eval Score: 37840.0
Episode : 31110 		 Timestep : 1415281 		 Average Reward : 29010.0 		 Eval Score: 28040.0
Episode : 31120 		 Timestep : 1415687 		 Average Reward : 19950.0 		 Eval Score: 24980.0
Episode : 31130 		 Timestep : 1416103 		 Average Reward : 21740.0 		 Eval Score: 16685.0
Episode : 31140 		 Timestep : 1416575 		 Average Reward : 24790.0 		 Eval Score: 23780.0
Episode : 31150 		 Timestep : 1416929 		 Average Reward : 18230.0 		 Eval Score: 21195.0
Episode : 31160 		 Timestep : 1417369 		 Average Reward : 24640.0 		 Eval Score: 26250.0
Episode : 31170 		 Ti

Episode : 31990 		 Timestep : 1455800 		 Average Reward : 34640.0 		 Eval Score: 28790.0
Episode : 32000 		 Timestep : 1456148 		 Average Reward : 19550.0 		 Eval Score: 28035.0
Episode : 32010 		 Timestep : 1456632 		 Average Reward : 26160.0 		 Eval Score: 21895.0
Episode : 32020 		 Timestep : 1457260 		 Average Reward : 35910.0 		 Eval Score: 25015.0
Episode : 32030 		 Timestep : 1457761 		 Average Reward : 29630.0 		 Eval Score: 22045.0
Episode : 32040 		 Timestep : 1458235 		 Average Reward : 23740.0 		 Eval Score: 21925.0
Episode : 32050 		 Timestep : 1458709 		 Average Reward : 26230.0 		 Eval Score: 20565.0
Episode : 32060 		 Timestep : 1459133 		 Average Reward : 21070.0 		 Eval Score: 22170.0
Episode : 32070 		 Timestep : 1459417 		 Average Reward : 14820.0 		 Eval Score: 19800.0
Episode : 32080 		 Timestep : 1460007 		 Average Reward : 35640.0 		 Eval Score: 25170.0
Episode : 32090 		 Timestep : 1460449 		 Average Reward : 24150.0 		 Eval Score: 23845.0
Episode : 32100 		 Ti

Episode : 32920 		 Timestep : 1499403 		 Average Reward : 25300.0 		 Eval Score: 26010.0
Episode : 32930 		 Timestep : 1499946 		 Average Reward : 29730.0 		 Eval Score: 24835.0
Episode : 32940 		 Timestep : 1500569 		 Average Reward : 34090.0 		 Eval Score: 29245.0
Episode : 32950 		 Timestep : 1500921 		 Average Reward : 18640.0 		 Eval Score: 24345.0
Episode : 32960 		 Timestep : 1501336 		 Average Reward : 22130.0 		 Eval Score: 28805.0
Episode : 32970 		 Timestep : 1501916 		 Average Reward : 33510.0 		 Eval Score: 30595.0
Episode : 32980 		 Timestep : 1502532 		 Average Reward : 35870.0 		 Eval Score: 17810.0
Episode : 32990 		 Timestep : 1502950 		 Average Reward : 21530.0 		 Eval Score: 26935.0
Episode : 33000 		 Timestep : 1503584 		 Average Reward : 34480.0 		 Eval Score: 25520.0
Episode : 33010 		 Timestep : 1503836 		 Average Reward : 13430.0 		 Eval Score: 31200.0
Episode : 33020 		 Timestep : 1504220 		 Average Reward : 19890.0 		 Eval Score: 21935.0
Episode : 33030 		 Ti

Episode : 33850 		 Timestep : 1541300 		 Average Reward : 27200.0 		 Eval Score: 29580.0
Episode : 33860 		 Timestep : 1542021 		 Average Reward : 39340.0 		 Eval Score: 26450.0
Episode : 33870 		 Timestep : 1542524 		 Average Reward : 26200.0 		 Eval Score: 27960.0
Episode : 33880 		 Timestep : 1542809 		 Average Reward : 15940.0 		 Eval Score: 24855.0
Episode : 33890 		 Timestep : 1543307 		 Average Reward : 27070.0 		 Eval Score: 25240.0
Episode : 33900 		 Timestep : 1543900 		 Average Reward : 30940.0 		 Eval Score: 23655.0
Episode : 33910 		 Timestep : 1544289 		 Average Reward : 20780.0 		 Eval Score: 27445.0
Episode : 33920 		 Timestep : 1544797 		 Average Reward : 27130.0 		 Eval Score: 30160.0
Episode : 33930 		 Timestep : 1545118 		 Average Reward : 17730.0 		 Eval Score: 31075.0
Episode : 33940 		 Timestep : 1545364 		 Average Reward : 11810.0 		 Eval Score: 27370.0
Episode : 33950 		 Timestep : 1545838 		 Average Reward : 25780.0 		 Eval Score: 27980.0
Episode : 33960 		 Ti

Episode : 34780 		 Timestep : 1584781 		 Average Reward : 20900.0 		 Eval Score: 20060.0
Episode : 34790 		 Timestep : 1585175 		 Average Reward : 23530.0 		 Eval Score: 31270.0
Episode : 34800 		 Timestep : 1585513 		 Average Reward : 17420.0 		 Eval Score: 23750.0
Episode : 34810 		 Timestep : 1586044 		 Average Reward : 28610.0 		 Eval Score: 22345.0
Episode : 34820 		 Timestep : 1586365 		 Average Reward : 17970.0 		 Eval Score: 20600.0
Episode : 34830 		 Timestep : 1586753 		 Average Reward : 19910.0 		 Eval Score: 26110.0
Episode : 34840 		 Timestep : 1587226 		 Average Reward : 26600.0 		 Eval Score: 26900.0
Episode : 34850 		 Timestep : 1587598 		 Average Reward : 19230.0 		 Eval Score: 20905.0
Episode : 34860 		 Timestep : 1588142 		 Average Reward : 32110.0 		 Eval Score: 24610.0
Episode : 34870 		 Timestep : 1588558 		 Average Reward : 22380.0 		 Eval Score: 24785.0
Episode : 34880 		 Timestep : 1588938 		 Average Reward : 20540.0 		 Eval Score: 20125.0
Episode : 34890 		 Ti

Episode : 35710 		 Timestep : 1626663 		 Average Reward : 17720.0 		 Eval Score: 28465.0
Episode : 35720 		 Timestep : 1627078 		 Average Reward : 19730.0 		 Eval Score: 18790.0
Episode : 35730 		 Timestep : 1627484 		 Average Reward : 22270.0 		 Eval Score: 22920.0
Episode : 35740 		 Timestep : 1627957 		 Average Reward : 26580.0 		 Eval Score: 26120.0
Episode : 35750 		 Timestep : 1628339 		 Average Reward : 20500.0 		 Eval Score: 19160.0
Episode : 35760 		 Timestep : 1628803 		 Average Reward : 24160.0 		 Eval Score: 31875.0
Episode : 35770 		 Timestep : 1629370 		 Average Reward : 28360.0 		 Eval Score: 17900.0
Episode : 35780 		 Timestep : 1629847 		 Average Reward : 25550.0 		 Eval Score: 22170.0
Episode : 35790 		 Timestep : 1630395 		 Average Reward : 29970.0 		 Eval Score: 30425.0
Episode : 35800 		 Timestep : 1630940 		 Average Reward : 30360.0 		 Eval Score: 27870.0
Episode : 35810 		 Timestep : 1631271 		 Average Reward : 17760.0 		 Eval Score: 26505.0
Episode : 35820 		 Ti

Episode : 36640 		 Timestep : 1669614 		 Average Reward : 17990.0 		 Eval Score: 32020.0
Episode : 36650 		 Timestep : 1669899 		 Average Reward : 15300.0 		 Eval Score: 26885.0
Episode : 36660 		 Timestep : 1670476 		 Average Reward : 31980.0 		 Eval Score: 19645.0
Episode : 36670 		 Timestep : 1670943 		 Average Reward : 26650.0 		 Eval Score: 23000.0
Episode : 36680 		 Timestep : 1671361 		 Average Reward : 22740.0 		 Eval Score: 27890.0
Episode : 36690 		 Timestep : 1671813 		 Average Reward : 24350.0 		 Eval Score: 23935.0
Episode : 36700 		 Timestep : 1672242 		 Average Reward : 23920.0 		 Eval Score: 26480.0
Episode : 36710 		 Timestep : 1672675 		 Average Reward : 25870.0 		 Eval Score: 29345.0
Episode : 36720 		 Timestep : 1673093 		 Average Reward : 23300.0 		 Eval Score: 21180.0
Episode : 36730 		 Timestep : 1673634 		 Average Reward : 30770.0 		 Eval Score: 22835.0
Episode : 36740 		 Timestep : 1674116 		 Average Reward : 28360.0 		 Eval Score: 25885.0
Episode : 36750 		 Ti

Episode : 37570 		 Timestep : 1713847 		 Average Reward : 34880.0 		 Eval Score: 26795.0
Episode : 37580 		 Timestep : 1714140 		 Average Reward : 13140.0 		 Eval Score: 33125.0
Episode : 37590 		 Timestep : 1714518 		 Average Reward : 18980.0 		 Eval Score: 23290.0
Episode : 37600 		 Timestep : 1714945 		 Average Reward : 22120.0 		 Eval Score: 26840.0
Episode : 37610 		 Timestep : 1715482 		 Average Reward : 29020.0 		 Eval Score: 18240.0
Episode : 37620 		 Timestep : 1716114 		 Average Reward : 35290.0 		 Eval Score: 19550.0
Episode : 37630 		 Timestep : 1716551 		 Average Reward : 23270.0 		 Eval Score: 21995.0
Episode : 37640 		 Timestep : 1717049 		 Average Reward : 27330.0 		 Eval Score: 28740.0
Episode : 37650 		 Timestep : 1717567 		 Average Reward : 27250.0 		 Eval Score: 29165.0
Episode : 37660 		 Timestep : 1718062 		 Average Reward : 26970.0 		 Eval Score: 26195.0
Episode : 37670 		 Timestep : 1718547 		 Average Reward : 26770.0 		 Eval Score: 26630.0
Episode : 37680 		 Ti

Episode : 38500 		 Timestep : 1757241 		 Average Reward : 23360.0 		 Eval Score: 27760.0
Episode : 38510 		 Timestep : 1757754 		 Average Reward : 28530.0 		 Eval Score: 36230.0
Episode : 38520 		 Timestep : 1758298 		 Average Reward : 32210.0 		 Eval Score: 24870.0
Episode : 38530 		 Timestep : 1758840 		 Average Reward : 29740.0 		 Eval Score: 30340.0
Episode : 38540 		 Timestep : 1759302 		 Average Reward : 24210.0 		 Eval Score: 21880.0
Episode : 38550 		 Timestep : 1759816 		 Average Reward : 27860.0 		 Eval Score: 28390.0
Episode : 38560 		 Timestep : 1760162 		 Average Reward : 17520.0 		 Eval Score: 19730.0
Episode : 38570 		 Timestep : 1760506 		 Average Reward : 19240.0 		 Eval Score: 18655.0
Episode : 38580 		 Timestep : 1760831 		 Average Reward : 15640.0 		 Eval Score: 23395.0
Episode : 38590 		 Timestep : 1761086 		 Average Reward : 13260.0 		 Eval Score: 24670.0
Episode : 38600 		 Timestep : 1761518 		 Average Reward : 22360.0 		 Eval Score: 24650.0
Episode : 38610 		 Ti

Episode : 39430 		 Timestep : 1800250 		 Average Reward : 36450.0 		 Eval Score: 16915.0
Episode : 39440 		 Timestep : 1800631 		 Average Reward : 20180.0 		 Eval Score: 22775.0
Episode : 39450 		 Timestep : 1801134 		 Average Reward : 27400.0 		 Eval Score: 23810.0
Episode : 39460 		 Timestep : 1801571 		 Average Reward : 23020.0 		 Eval Score: 26590.0
Episode : 39470 		 Timestep : 1802109 		 Average Reward : 29230.0 		 Eval Score: 32975.0
Episode : 39480 		 Timestep : 1802737 		 Average Reward : 35610.0 		 Eval Score: 26950.0
Episode : 39490 		 Timestep : 1803253 		 Average Reward : 31180.0 		 Eval Score: 26235.0
Episode : 39500 		 Timestep : 1803814 		 Average Reward : 32550.0 		 Eval Score: 19405.0
Episode : 39510 		 Timestep : 1804090 		 Average Reward : 14600.0 		 Eval Score: 18515.0
Episode : 39520 		 Timestep : 1804621 		 Average Reward : 29660.0 		 Eval Score: 29540.0
Episode : 39530 		 Timestep : 1805075 		 Average Reward : 24000.0 		 Eval Score: 22685.0
Episode : 39540 		 Ti

0