################################################################################
> # **Part - I**

*   define actor critic networks
*   define PPO algorithm

################################################################################

In [1]:


############################### Import libraries ###############################


import os
import glob
import time
from datetime import datetime

import torch
import torch.nn as nn
from torch.distributions import MultivariateNormal
from torch.distributions import Categorical

import numpy as np

import gym
import pongGym

################################## set device ##################################

print("============================================================================================")


# set device to cpu or cuda
device = torch.device('cpu')

if(torch.cuda.is_available()): 
    device = torch.device('cuda:0') 
    torch.cuda.empty_cache()
    print("Device set to : " + str(torch.cuda.get_device_name(device)))
else:
    print("Device set to : cpu")
    
print("============================================================================================")




################################## PPO Policy ##################################


class RolloutBuffer:
    def __init__(self):
        self.actions = []
        self.states = []
        self.logprobs = []
        self.rewards = []
        self.is_terminals = []
    

    def clear(self):
        del self.actions[:]
        del self.states[:]
        del self.logprobs[:]
        del self.rewards[:]
        del self.is_terminals[:]


class ActorCritic(nn.Module):
    def __init__(self, state_dim, action_dim, action_std_init):
        super(ActorCritic, self).__init__()

        
        self.actor = nn.Sequential(
                        nn.Linear(state_dim, 128),
                        nn.ReLU(),
                        nn.Linear(128, 128),
                        nn.ReLU(),
                        nn.Linear(128, 128),
                        nn.ReLU(),
                        nn.Linear(128, action_dim),
                        nn.Softmax(dim=-1)
                    )

        
        # critic
        self.critic = nn.Sequential(
                        nn.Linear(state_dim, 128),
                        nn.ReLU(),
                        nn.Linear(128, 128),
                        nn.ReLU(),
                        nn.Linear(128, 128),
                        nn.ReLU(),
                        nn.Linear(128, 1)
                    )

    def forward(self):
        raise NotImplementedError
    

    def act(self, state):

        
        action_probs = self.actor(state)
        dist = Categorical(action_probs)

        action = dist.sample()
        action_logprob = dist.log_prob(action)
        
        return action.detach(), action_logprob.detach()
    

    def evaluate(self, state, action):


        action_probs = self.actor(state)
        dist = Categorical(action_probs)

        action_logprobs = dist.log_prob(action)
        dist_entropy = dist.entropy()
        state_values = self.critic(state)
        
        return action_logprobs, state_values, dist_entropy


class PPO:
    def __init__(self, state_dim, action_dim, lr_actor, lr_critic, gamma, K_epochs, eps_clip, action_std_init=0.6):

        self.gamma = gamma
        self.eps_clip = eps_clip
        self.K_epochs = K_epochs
        
        self.buffer = RolloutBuffer()

        self.policy = ActorCritic(state_dim, action_dim, action_std_init).to(device)
        self.optimizer = torch.optim.Adam([
                        {'params': self.policy.actor.parameters(), 'lr': lr_actor},
                        {'params': self.policy.critic.parameters(), 'lr': lr_critic}
                    ])

        self.policy_old = ActorCritic(state_dim, action_dim, action_std_init).to(device)
        self.policy_old.load_state_dict(self.policy.state_dict())
        
        self.MseLoss = nn.MSELoss()


    def select_action(self, state):

        with torch.no_grad():
            state = torch.FloatTensor(state).to(device)
            action, action_logprob = self.policy_old.act(state)

        self.buffer.states.append(state)
        self.buffer.actions.append(action)
        self.buffer.logprobs.append(action_logprob)

        return action.item()


    def update(self):

        # Monte Carlo estimate of returns
        rewards = []
        discounted_reward = 0
        for reward, is_terminal in zip(reversed(self.buffer.rewards), reversed(self.buffer.is_terminals)):
            if is_terminal:
                discounted_reward = 0
            discounted_reward = reward + (self.gamma * discounted_reward)
            rewards.insert(0, discounted_reward)
            
        # Normalizing the rewards
        rewards = torch.tensor(rewards, dtype=torch.float32).to(device)
        rewards = (rewards - rewards.mean()) / (rewards.std() + 1e-7)

        # convert list to tensor
        old_states = torch.squeeze(torch.stack(self.buffer.states, dim=0)).detach().to(device)
        old_actions = torch.squeeze(torch.stack(self.buffer.actions, dim=0)).detach().to(device)
        old_logprobs = torch.squeeze(torch.stack(self.buffer.logprobs, dim=0)).detach().to(device)

        
        # Optimize policy for K epochs
        for _ in range(self.K_epochs):

            # Evaluating old actions and values
            logprobs, state_values, dist_entropy = self.policy.evaluate(old_states, old_actions)

            # match state_values tensor dimensions with rewards tensor
            state_values = torch.squeeze(state_values)
            
            # Finding the ratio (pi_theta / pi_theta__old)
            ratios = torch.exp(logprobs - old_logprobs.detach())

            # Finding Surrogate Loss
            advantages = rewards - state_values.detach()   
            surr1 = ratios * advantages
            surr2 = torch.clamp(ratios, 1-self.eps_clip, 1+self.eps_clip) * advantages

            # final loss of clipped objective PPO
            loss = -torch.min(surr1, surr2) + 0.5*self.MseLoss(state_values, rewards) - 0.01*dist_entropy
            
            # take gradient step
            self.optimizer.zero_grad()
            loss.mean().backward()
            self.optimizer.step()
            
        # Copy new weights into old policy
        self.policy_old.load_state_dict(self.policy.state_dict())

        # clear buffer
        self.buffer.clear()
    
    
    def save(self, checkpoint_path):
        torch.save(self.policy_old.state_dict(), checkpoint_path)
   

    def load(self, checkpoint_path):
        self.policy_old.load_state_dict(torch.load(checkpoint_path, map_location=lambda storage, loc: storage))
        self.policy.load_state_dict(torch.load(checkpoint_path, map_location=lambda storage, loc: storage))
        
        
       



pygame 2.1.0 (SDL 2.0.16, Python 3.9.7)
Hello from the pygame community. https://www.pygame.org/contribute.html
Device set to : NVIDIA GeForce RTX 3070


################################################################################
> # **Part - II**

*   train PPO algorithm on environments
*   save preTrained networks weights and log files

################################################################################

In [None]:

print("============================================================================================")


################################### Training ###################################


####### initialize environment hyperparameters ######

env_name = "pong_game"

max_ep_len = 60*60                    # max timesteps in one episode
max_training_timesteps = int(1e5)   # break training loop if timeteps > max_training_timesteps

print_freq = max_ep_len * 4     # print avg reward in the interval (in num timesteps)
log_freq = max_ep_len * 2       # log avg reward in the interval (in num timesteps)
save_model_freq = int(2e4)      # save model frequency (in num timesteps)

action_std = None


#####################################################


## Note : print/log frequencies should be > than max_ep_len


################ PPO hyperparameters ################


update_timestep = max_ep_len * 4      # update policy every n timesteps
K_epochs = 40               # update policy for K epochs
eps_clip = 0.2              # clip parameter for PPO
gamma = 0.99                # discount factor

lr_actor = 0.0003       # learning rate for actor network
lr_critic = 0.001       # learning rate for critic network

random_seed = 0         # set random seed if required (0 = no random seed)

#####################################################



print("training environment name : " + env_name)

env = pongGym.DoublePong()

# state space dimension
state_dim = env.observation_space.shape[0]

# action space dimension

action_dim = env.action_space.n



###################### logging ######################

#### log files for multiple runs are NOT overwritten

log_dir = "PPO_logs"
if not os.path.exists(log_dir):
      os.makedirs(log_dir)

log_dir = log_dir + '/' + env_name + '/'
if not os.path.exists(log_dir):
      os.makedirs(log_dir)


#### get number of log files in log directory
run_num = 0
current_num_files = next(os.walk(log_dir))[2]
run_num = len(current_num_files)


#### create new log file for each run 
log_f_name = log_dir + '/PPO_' + env_name + "_log_" + str(run_num) + ".csv"

print("current logging run number for " + env_name + " : ", run_num)
print("logging at : " + log_f_name)

#####################################################


################### checkpointing ###################

run_num_pretrained = 0      #### change this to prevent overwriting weights in same env_name folder

directory = "PPO_preTrained"
if not os.path.exists(directory):
      os.makedirs(directory)

directory = directory + '/' + env_name + '/'
if not os.path.exists(directory):
      os.makedirs(directory)

#####################################################


############# print all hyperparameters #############

print("--------------------------------------------------------------------------------------------")

print("max training timesteps : ", max_training_timesteps)
print("max timesteps per episode : ", max_ep_len)

print("model saving frequency : " + str(save_model_freq) + " timesteps")
print("log frequency : " + str(log_freq) + " timesteps")
print("printing average reward over episodes in last : " + str(print_freq) + " timesteps")

print("--------------------------------------------------------------------------------------------")

print("state space dimension : ", state_dim)
print("action space dimension : ", action_dim)

print("--------------------------------------------------------------------------------------------")

print("Initializing a discrete action space policy")

print("--------------------------------------------------------------------------------------------")

print("PPO update frequency : " + str(update_timestep) + " timesteps") 
print("PPO K epochs : ", K_epochs)
print("PPO epsilon clip : ", eps_clip)
print("discount factor (gamma) : ", gamma)

print("--------------------------------------------------------------------------------------------")

print("optimizer learning rate actor : ", lr_actor)
print("optimizer learning rate critic : ", lr_critic)

if random_seed:
    print("--------------------------------------------------------------------------------------------")
    print("setting random seed to ", random_seed)
    torch.manual_seed(random_seed)
    env.seed(random_seed)
    np.random.seed(random_seed)

#####################################################

print("============================================================================================")

################# training procedure ################

# initialize a PPO agent
ppo_agent = PPO(state_dim, action_dim, lr_actor, lr_critic, gamma, K_epochs, eps_clip, action_std)


# track total training time
start_time = datetime.now().replace(microsecond=0)
print("Started training at (GMT) : ", start_time)

print("============================================================================================")


# logging file
log_f = open(log_f_name,"w+")
log_f.write('episode,timestep,reward\n')


# printing and logging variables
print_running_reward = 0
print_running_episodes = 0

log_running_reward = 0
log_running_episodes = 0

time_step = 0
i_episode = 0

Max_reward = 0
# training loop
# env = pongGym.DoublePong()
while i_episode < 100000:
    
    state = env.reset()
    current_ep_reward = 0
    for t in range(1, max_ep_len+1):
        
        # select action with policy
        action = ppo_agent.select_action(state)
        state, reward, done, _ = env.step(action)
#         if i_episode > 100:
#             env.render()
        
        # saving reward and is_terminals
        ppo_agent.buffer.rewards.append(reward)
        ppo_agent.buffer.is_terminals.append(done)
        
        time_step +=1
        current_ep_reward += reward

        # update PPO agent
        if time_step % update_timestep == 0:
            ppo_agent.update()

        # log in logging file
        if time_step % log_freq == 0:

            # log average reward till last episode
            log_avg_reward = log_running_reward / log_running_episodes
            log_avg_reward = round(log_avg_reward, 4)

            log_f.write('{},{},{}\n'.format(i_episode, time_step, log_avg_reward))
            log_f.flush()

            log_running_reward = 0
            log_running_episodes = 0
            
        # printing average reward
        if time_step % print_freq == 0:

            # print average reward till last episode
            print_avg_reward = print_running_reward / print_running_episodes
            print_avg_reward = round(print_avg_reward, 2)

            print("Episode : {} \t\t Timestep : {} \t\t Average Reward : {}".format(i_episode, time_step, print_avg_reward))
            print_running_reward = 0
            print_running_episodes = 0
        # break; if the episode is over
        if done:
            break

    # save model weights
#     print("Episode : {} \t\t Totol reward : {}".format(i_episode, current_ep_reward))
    if current_ep_reward > Max_reward:
        Max_reward = current_ep_reward
        checkpoint_path = directory + "PPO_{}_{}_{}.pth".format(env_name, int(Max_reward), run_num_pretrained)
        print("--------------------------------------------------------------------------------------------")
        print("saving model at : " + checkpoint_path)
        ppo_agent.save(checkpoint_path)
        print("model saved")
        print("Elapsed Time  : ", datetime.now().replace(microsecond=0) - start_time)
        print("--------------------------------------------------------------------------------------------")
    print_running_reward += current_ep_reward
    print_running_episodes += 1

    log_running_reward += current_ep_reward
    log_running_episodes += 1

    i_episode += 1


log_f.close()
env.close()




# print total training time
print("============================================================================================")
end_time = datetime.now().replace(microsecond=0)
print("Started training at (GMT) : ", start_time)
print("Finished training at (GMT) : ", end_time)
print("Total training time  : ", end_time - start_time)
print("============================================================================================")







training environment name : pong_game
current logging run number for pong_game :  0
logging at : PPO_logs/pong_game//PPO_pong_game_log_0.csv
--------------------------------------------------------------------------------------------
max training timesteps :  100000
max timesteps per episode :  3600
model saving frequency : 20000 timesteps
log frequency : 7200 timesteps
printing average reward over episodes in last : 14400 timesteps
--------------------------------------------------------------------------------------------
state space dimension :  42
action space dimension :  4
--------------------------------------------------------------------------------------------
Initializing a discrete action space policy
--------------------------------------------------------------------------------------------
PPO update frequency : 14400 timesteps
PPO K epochs :  40
PPO epsilon clip :  0.2
discount factor (gamma) :  0.99
----------------------------------------------------------------------

Episode : 399 		 Timestep : 1440000 		 Average Reward : -294.2
Episode : 403 		 Timestep : 1454400 		 Average Reward : -235.44
Episode : 407 		 Timestep : 1468800 		 Average Reward : -285.52
Episode : 411 		 Timestep : 1483200 		 Average Reward : -274.72
Episode : 415 		 Timestep : 1497600 		 Average Reward : -250.94
Episode : 419 		 Timestep : 1512000 		 Average Reward : -274.55
Episode : 423 		 Timestep : 1526400 		 Average Reward : -322.71
Episode : 427 		 Timestep : 1540800 		 Average Reward : -193.3
Episode : 431 		 Timestep : 1555200 		 Average Reward : -164.8
Episode : 435 		 Timestep : 1569600 		 Average Reward : -277.66
Episode : 439 		 Timestep : 1584000 		 Average Reward : -186.61
Episode : 443 		 Timestep : 1598400 		 Average Reward : -177.96
Episode : 447 		 Timestep : 1612800 		 Average Reward : -290.81
Episode : 451 		 Timestep : 1627200 		 Average Reward : -198.67
Episode : 455 		 Timestep : 1641600 		 Average Reward : -273.33
Episode : 459 		 Timestep : 1656000 		 Aver

Episode : 895 		 Timestep : 3225600 		 Average Reward : -181.85
Episode : 899 		 Timestep : 3240000 		 Average Reward : -238.44
Episode : 903 		 Timestep : 3254400 		 Average Reward : -157.52
Episode : 907 		 Timestep : 3268800 		 Average Reward : -214.16
Episode : 911 		 Timestep : 3283200 		 Average Reward : -165.05
Episode : 915 		 Timestep : 3297600 		 Average Reward : -204.2
Episode : 919 		 Timestep : 3312000 		 Average Reward : -227.65
Episode : 923 		 Timestep : 3326400 		 Average Reward : -311.22
Episode : 927 		 Timestep : 3340800 		 Average Reward : -181.98
Episode : 931 		 Timestep : 3355200 		 Average Reward : -233.77
Episode : 935 		 Timestep : 3369600 		 Average Reward : -274.56
Episode : 939 		 Timestep : 3384000 		 Average Reward : -253.36
Episode : 943 		 Timestep : 3398400 		 Average Reward : -187.28
Episode : 947 		 Timestep : 3412800 		 Average Reward : -193.83
Episode : 951 		 Timestep : 3427200 		 Average Reward : -354.09
Episode : 955 		 Timestep : 3441600 		 Av

Episode : 1403 		 Timestep : 5054400 		 Average Reward : -227.51
Episode : 1407 		 Timestep : 5068800 		 Average Reward : -260.09
Episode : 1411 		 Timestep : 5083200 		 Average Reward : -259.03
Episode : 1415 		 Timestep : 5097600 		 Average Reward : -147.32
Episode : 1419 		 Timestep : 5112000 		 Average Reward : -230.07
Episode : 1423 		 Timestep : 5126400 		 Average Reward : -155.13
Episode : 1427 		 Timestep : 5140800 		 Average Reward : -234.36
Episode : 1431 		 Timestep : 5155200 		 Average Reward : -213.77
Episode : 1435 		 Timestep : 5169600 		 Average Reward : -283.47
Episode : 1439 		 Timestep : 5184000 		 Average Reward : -255.49
Episode : 1443 		 Timestep : 5198400 		 Average Reward : -237.36
Episode : 1447 		 Timestep : 5212800 		 Average Reward : -151.89
Episode : 1451 		 Timestep : 5227200 		 Average Reward : -131.32
Episode : 1455 		 Timestep : 5241600 		 Average Reward : -72.71
Episode : 1459 		 Timestep : 5256000 		 Average Reward : -95.21
Episode : 1463 		 Timestep 

Episode : 1911 		 Timestep : 6883200 		 Average Reward : -331.25
Episode : 1915 		 Timestep : 6897600 		 Average Reward : -294.08
Episode : 1919 		 Timestep : 6912000 		 Average Reward : -196.49
Episode : 1923 		 Timestep : 6926400 		 Average Reward : -215.42
Episode : 1927 		 Timestep : 6940800 		 Average Reward : -255.84
Episode : 1931 		 Timestep : 6955200 		 Average Reward : -261.16
Episode : 1935 		 Timestep : 6969600 		 Average Reward : -163.56
Episode : 1939 		 Timestep : 6984000 		 Average Reward : -160.43
Episode : 1943 		 Timestep : 6998400 		 Average Reward : -333.64
Episode : 1947 		 Timestep : 7012800 		 Average Reward : -275.83
Episode : 1951 		 Timestep : 7027200 		 Average Reward : -177.03
Episode : 1955 		 Timestep : 7041600 		 Average Reward : -242.55
Episode : 1959 		 Timestep : 7056000 		 Average Reward : -187.35
Episode : 1963 		 Timestep : 7070400 		 Average Reward : -282.76
Episode : 1967 		 Timestep : 7084800 		 Average Reward : -219.87
Episode : 1971 		 Timeste

Episode : 2419 		 Timestep : 8712000 		 Average Reward : -213.62
Episode : 2423 		 Timestep : 8726400 		 Average Reward : -239.89
Episode : 2427 		 Timestep : 8740800 		 Average Reward : -273.08
Episode : 2431 		 Timestep : 8755200 		 Average Reward : -200.76
Episode : 2435 		 Timestep : 8769600 		 Average Reward : -149.74
Episode : 2439 		 Timestep : 8784000 		 Average Reward : -196.33
Episode : 2443 		 Timestep : 8798400 		 Average Reward : -265.62
Episode : 2447 		 Timestep : 8812800 		 Average Reward : -137.25
Episode : 2451 		 Timestep : 8827200 		 Average Reward : -331.39
Episode : 2455 		 Timestep : 8841600 		 Average Reward : -288.06
Episode : 2459 		 Timestep : 8856000 		 Average Reward : -267.67
Episode : 2463 		 Timestep : 8870400 		 Average Reward : -262.61
Episode : 2467 		 Timestep : 8884800 		 Average Reward : -253.4
Episode : 2471 		 Timestep : 8899200 		 Average Reward : -168.49
Episode : 2475 		 Timestep : 8913600 		 Average Reward : -371.81
Episode : 2479 		 Timestep

Episode : 2923 		 Timestep : 10526400 		 Average Reward : -167.91
Episode : 2927 		 Timestep : 10540800 		 Average Reward : -169.64
Episode : 2931 		 Timestep : 10555200 		 Average Reward : -278.23
Episode : 2935 		 Timestep : 10569600 		 Average Reward : -346.55
Episode : 2939 		 Timestep : 10584000 		 Average Reward : -261.9
Episode : 2943 		 Timestep : 10598400 		 Average Reward : -261.92
Episode : 2947 		 Timestep : 10612800 		 Average Reward : -236.33
Episode : 2951 		 Timestep : 10627200 		 Average Reward : -143.08
Episode : 2955 		 Timestep : 10641600 		 Average Reward : -138.16
Episode : 2959 		 Timestep : 10656000 		 Average Reward : -225.99
Episode : 2963 		 Timestep : 10670400 		 Average Reward : -227.2
Episode : 2967 		 Timestep : 10684800 		 Average Reward : -154.07
Episode : 2971 		 Timestep : 10699200 		 Average Reward : -320.01
Episode : 2975 		 Timestep : 10713600 		 Average Reward : -200.35
Episode : 2979 		 Timestep : 10728000 		 Average Reward : -353.76
Episode : 29

Episode : 3423 		 Timestep : 12326400 		 Average Reward : -188.6
Episode : 3427 		 Timestep : 12340800 		 Average Reward : -286.7
Episode : 3431 		 Timestep : 12355200 		 Average Reward : -215.71
Episode : 3435 		 Timestep : 12369600 		 Average Reward : -237.72
Episode : 3439 		 Timestep : 12384000 		 Average Reward : -325.91
Episode : 3443 		 Timestep : 12398400 		 Average Reward : -336.86
Episode : 3447 		 Timestep : 12412800 		 Average Reward : -164.45
Episode : 3451 		 Timestep : 12427200 		 Average Reward : -182.11
Episode : 3455 		 Timestep : 12441600 		 Average Reward : -244.48
Episode : 3459 		 Timestep : 12456000 		 Average Reward : -185.62
Episode : 3463 		 Timestep : 12470400 		 Average Reward : -260.6
Episode : 3467 		 Timestep : 12484800 		 Average Reward : -206.17
Episode : 3471 		 Timestep : 12499200 		 Average Reward : -221.89
Episode : 3475 		 Timestep : 12513600 		 Average Reward : -244.38
Episode : 3479 		 Timestep : 12528000 		 Average Reward : -280.11
Episode : 348

Episode : 3903 		 Timestep : 14054400 		 Average Reward : -182.82
Episode : 3907 		 Timestep : 14068800 		 Average Reward : -200.24
Episode : 3911 		 Timestep : 14083200 		 Average Reward : -251.03
Episode : 3915 		 Timestep : 14097600 		 Average Reward : -161.36
Episode : 3919 		 Timestep : 14112000 		 Average Reward : -244.92
Episode : 3923 		 Timestep : 14126400 		 Average Reward : -184.12
Episode : 3927 		 Timestep : 14140800 		 Average Reward : -194.69
Episode : 3931 		 Timestep : 14155200 		 Average Reward : -211.42
Episode : 3935 		 Timestep : 14169600 		 Average Reward : -172.83
Episode : 3939 		 Timestep : 14184000 		 Average Reward : -209.5
Episode : 3943 		 Timestep : 14198400 		 Average Reward : -220.18
Episode : 3947 		 Timestep : 14212800 		 Average Reward : -221.55
Episode : 3951 		 Timestep : 14227200 		 Average Reward : -186.0
Episode : 3955 		 Timestep : 14241600 		 Average Reward : -184.63
Episode : 3959 		 Timestep : 14256000 		 Average Reward : -249.88
Episode : 39

Episode : 4403 		 Timestep : 15854400 		 Average Reward : -232.52
Episode : 4407 		 Timestep : 15868800 		 Average Reward : -119.46
Episode : 4411 		 Timestep : 15883200 		 Average Reward : -295.68
Episode : 4415 		 Timestep : 15897600 		 Average Reward : -305.22
Episode : 4419 		 Timestep : 15912000 		 Average Reward : -183.25
Episode : 4423 		 Timestep : 15926400 		 Average Reward : -225.16
Episode : 4427 		 Timestep : 15940800 		 Average Reward : -187.17
Episode : 4431 		 Timestep : 15955200 		 Average Reward : -302.54
Episode : 4435 		 Timestep : 15969600 		 Average Reward : -244.91
Episode : 4439 		 Timestep : 15984000 		 Average Reward : -262.29
Episode : 4443 		 Timestep : 15998400 		 Average Reward : -209.4
Episode : 4447 		 Timestep : 16012800 		 Average Reward : -191.07
Episode : 4451 		 Timestep : 16027200 		 Average Reward : -171.48
Episode : 4455 		 Timestep : 16041600 		 Average Reward : -281.92
Episode : 4459 		 Timestep : 16056000 		 Average Reward : -245.89
Episode : 4

Episode : 4903 		 Timestep : 17654400 		 Average Reward : -146.3
Episode : 4907 		 Timestep : 17668800 		 Average Reward : -237.21
Episode : 4911 		 Timestep : 17683200 		 Average Reward : -171.83
Episode : 4915 		 Timestep : 17697600 		 Average Reward : -176.44
Episode : 4919 		 Timestep : 17712000 		 Average Reward : -121.56
Episode : 4923 		 Timestep : 17726400 		 Average Reward : -102.88
Episode : 4927 		 Timestep : 17740800 		 Average Reward : -124.37
Episode : 4931 		 Timestep : 17755200 		 Average Reward : -170.14
Episode : 4935 		 Timestep : 17769600 		 Average Reward : -99.07
Episode : 4939 		 Timestep : 17784000 		 Average Reward : -234.5
Episode : 4943 		 Timestep : 17798400 		 Average Reward : -214.75
Episode : 4947 		 Timestep : 17812800 		 Average Reward : -226.01
Episode : 4951 		 Timestep : 17827200 		 Average Reward : -169.38
Episode : 4955 		 Timestep : 17841600 		 Average Reward : -212.91
Episode : 4959 		 Timestep : 17856000 		 Average Reward : -259.95
Episode : 496

Episode : 5403 		 Timestep : 19454400 		 Average Reward : -276.78
Episode : 5407 		 Timestep : 19468800 		 Average Reward : -224.93
Episode : 5411 		 Timestep : 19483200 		 Average Reward : -185.65
Episode : 5415 		 Timestep : 19497600 		 Average Reward : -283.48
Episode : 5419 		 Timestep : 19512000 		 Average Reward : -192.36
Episode : 5423 		 Timestep : 19526400 		 Average Reward : -222.27
Episode : 5427 		 Timestep : 19540800 		 Average Reward : -203.1
Episode : 5431 		 Timestep : 19555200 		 Average Reward : -227.61
Episode : 5435 		 Timestep : 19569600 		 Average Reward : -328.71
Episode : 5439 		 Timestep : 19584000 		 Average Reward : -232.04
Episode : 5443 		 Timestep : 19598400 		 Average Reward : -276.25
Episode : 5447 		 Timestep : 19612800 		 Average Reward : -253.37
Episode : 5451 		 Timestep : 19627200 		 Average Reward : -164.57
Episode : 5455 		 Timestep : 19641600 		 Average Reward : -293.73
Episode : 5459 		 Timestep : 19656000 		 Average Reward : -305.96
Episode : 5

Episode : 5903 		 Timestep : 21254400 		 Average Reward : -227.82
Episode : 5907 		 Timestep : 21268800 		 Average Reward : -297.76
Episode : 5911 		 Timestep : 21283200 		 Average Reward : -319.25
Episode : 5915 		 Timestep : 21297600 		 Average Reward : -254.93
Episode : 5919 		 Timestep : 21312000 		 Average Reward : -207.36
Episode : 5923 		 Timestep : 21326400 		 Average Reward : -307.37
Episode : 5927 		 Timestep : 21340800 		 Average Reward : -297.67
Episode : 5931 		 Timestep : 21355200 		 Average Reward : -233.58
Episode : 5935 		 Timestep : 21369600 		 Average Reward : -240.46
Episode : 5939 		 Timestep : 21384000 		 Average Reward : -229.2
Episode : 5943 		 Timestep : 21398400 		 Average Reward : -204.81
Episode : 5947 		 Timestep : 21412800 		 Average Reward : -293.0
Episode : 5951 		 Timestep : 21427200 		 Average Reward : -155.34
Episode : 5955 		 Timestep : 21441600 		 Average Reward : -182.47
Episode : 5959 		 Timestep : 21456000 		 Average Reward : -208.03
Episode : 59

Episode : 6403 		 Timestep : 23054400 		 Average Reward : -164.71
Episode : 6407 		 Timestep : 23068800 		 Average Reward : -246.75
Episode : 6411 		 Timestep : 23083200 		 Average Reward : -235.29
Episode : 6415 		 Timestep : 23097600 		 Average Reward : -191.22
Episode : 6419 		 Timestep : 23112000 		 Average Reward : -139.33
Episode : 6423 		 Timestep : 23126400 		 Average Reward : -89.35
Episode : 6427 		 Timestep : 23140800 		 Average Reward : -126.15
Episode : 6431 		 Timestep : 23155200 		 Average Reward : -152.61
Episode : 6435 		 Timestep : 23169600 		 Average Reward : -39.41
Episode : 6439 		 Timestep : 23184000 		 Average Reward : -120.38
Episode : 6443 		 Timestep : 23198400 		 Average Reward : -87.29
Episode : 6447 		 Timestep : 23212800 		 Average Reward : -147.64
Episode : 6451 		 Timestep : 23227200 		 Average Reward : -118.23
Episode : 6455 		 Timestep : 23241600 		 Average Reward : -184.87
Episode : 6459 		 Timestep : 23256000 		 Average Reward : -113.01
Episode : 646

Episode : 6903 		 Timestep : 24854400 		 Average Reward : -132.35
Episode : 6907 		 Timestep : 24868800 		 Average Reward : -101.68
Episode : 6911 		 Timestep : 24883200 		 Average Reward : -163.31
Episode : 6915 		 Timestep : 24897600 		 Average Reward : -178.02
Episode : 6919 		 Timestep : 24912000 		 Average Reward : -92.16
Episode : 6923 		 Timestep : 24926400 		 Average Reward : -218.88
Episode : 6927 		 Timestep : 24940800 		 Average Reward : -160.59
Episode : 6931 		 Timestep : 24955200 		 Average Reward : -108.05
Episode : 6935 		 Timestep : 24969600 		 Average Reward : -159.54
Episode : 6939 		 Timestep : 24984000 		 Average Reward : -260.18
Episode : 6943 		 Timestep : 24998400 		 Average Reward : -99.3
Episode : 6947 		 Timestep : 25012800 		 Average Reward : -164.16
Episode : 6951 		 Timestep : 25027200 		 Average Reward : -192.01
Episode : 6955 		 Timestep : 25041600 		 Average Reward : -116.03
Episode : 6959 		 Timestep : 25056000 		 Average Reward : -191.15
Episode : 696

Episode : 7403 		 Timestep : 26654400 		 Average Reward : -207.65
Episode : 7407 		 Timestep : 26668800 		 Average Reward : -171.97
Episode : 7411 		 Timestep : 26683200 		 Average Reward : -206.85
Episode : 7415 		 Timestep : 26697600 		 Average Reward : -211.91
Episode : 7419 		 Timestep : 26712000 		 Average Reward : -138.25
Episode : 7423 		 Timestep : 26726400 		 Average Reward : -156.41
Episode : 7427 		 Timestep : 26740800 		 Average Reward : -157.33
Episode : 7431 		 Timestep : 26755200 		 Average Reward : -101.36
Episode : 7435 		 Timestep : 26769600 		 Average Reward : -195.34
Episode : 7439 		 Timestep : 26784000 		 Average Reward : -153.69
Episode : 7443 		 Timestep : 26798400 		 Average Reward : -193.33
Episode : 7447 		 Timestep : 26812800 		 Average Reward : -221.62
Episode : 7451 		 Timestep : 26827200 		 Average Reward : -166.5
Episode : 7455 		 Timestep : 26841600 		 Average Reward : -129.02
Episode : 7459 		 Timestep : 26856000 		 Average Reward : -102.69
Episode : 7

Episode : 7887 		 Timestep : 28396800 		 Average Reward : -82.71
Episode : 7891 		 Timestep : 28411200 		 Average Reward : -173.09
Episode : 7895 		 Timestep : 28425600 		 Average Reward : -204.83
Episode : 7899 		 Timestep : 28440000 		 Average Reward : -208.42
Episode : 7903 		 Timestep : 28454400 		 Average Reward : -173.77
Episode : 7907 		 Timestep : 28468800 		 Average Reward : -144.28
Episode : 7911 		 Timestep : 28483200 		 Average Reward : -243.86
Episode : 7915 		 Timestep : 28497600 		 Average Reward : -173.07
Episode : 7919 		 Timestep : 28512000 		 Average Reward : -217.17
Episode : 7923 		 Timestep : 28526400 		 Average Reward : -184.95
Episode : 7927 		 Timestep : 28540800 		 Average Reward : -266.32
Episode : 7931 		 Timestep : 28555200 		 Average Reward : -202.45
Episode : 7935 		 Timestep : 28569600 		 Average Reward : -261.94
Episode : 7939 		 Timestep : 28584000 		 Average Reward : -116.5
Episode : 7943 		 Timestep : 28598400 		 Average Reward : -153.07
Episode : 79

Episode : 8387 		 Timestep : 30196800 		 Average Reward : -176.39
Episode : 8391 		 Timestep : 30211200 		 Average Reward : -221.37
Episode : 8395 		 Timestep : 30225600 		 Average Reward : -189.01
Episode : 8399 		 Timestep : 30240000 		 Average Reward : -250.51
Episode : 8403 		 Timestep : 30254400 		 Average Reward : -171.06
Episode : 8407 		 Timestep : 30268800 		 Average Reward : -291.28
Episode : 8411 		 Timestep : 30283200 		 Average Reward : -161.27
Episode : 8415 		 Timestep : 30297600 		 Average Reward : -166.25
Episode : 8419 		 Timestep : 30312000 		 Average Reward : -321.89
Episode : 8423 		 Timestep : 30326400 		 Average Reward : -165.86
Episode : 8427 		 Timestep : 30340800 		 Average Reward : -199.46
Episode : 8431 		 Timestep : 30355200 		 Average Reward : -208.72
Episode : 8435 		 Timestep : 30369600 		 Average Reward : -268.97
Episode : 8439 		 Timestep : 30384000 		 Average Reward : -209.4
Episode : 8443 		 Timestep : 30398400 		 Average Reward : -210.11
Episode : 8

Episode : 8887 		 Timestep : 31996800 		 Average Reward : -175.04
Episode : 8891 		 Timestep : 32011200 		 Average Reward : -46.19
Episode : 8895 		 Timestep : 32025600 		 Average Reward : -181.88
Episode : 8899 		 Timestep : 32040000 		 Average Reward : -88.15
Episode : 8903 		 Timestep : 32054400 		 Average Reward : -157.01
Episode : 8907 		 Timestep : 32068800 		 Average Reward : -153.83
Episode : 8911 		 Timestep : 32083200 		 Average Reward : -129.86
Episode : 8915 		 Timestep : 32097600 		 Average Reward : -42.4
Episode : 8919 		 Timestep : 32112000 		 Average Reward : -146.4
Episode : 8923 		 Timestep : 32126400 		 Average Reward : -179.92
Episode : 8927 		 Timestep : 32140800 		 Average Reward : -114.95
Episode : 8931 		 Timestep : 32155200 		 Average Reward : -187.81
Episode : 8935 		 Timestep : 32169600 		 Average Reward : -233.68
Episode : 8939 		 Timestep : 32184000 		 Average Reward : -163.26
Episode : 8943 		 Timestep : 32198400 		 Average Reward : -251.77
Episode : 8947 

Episode : 9387 		 Timestep : 33796800 		 Average Reward : -118.87
Episode : 9391 		 Timestep : 33811200 		 Average Reward : -165.5
Episode : 9395 		 Timestep : 33825600 		 Average Reward : -148.33
Episode : 9399 		 Timestep : 33840000 		 Average Reward : -173.85
Episode : 9403 		 Timestep : 33854400 		 Average Reward : -140.62
Episode : 9407 		 Timestep : 33868800 		 Average Reward : -226.49
Episode : 9411 		 Timestep : 33883200 		 Average Reward : -163.74
Episode : 9415 		 Timestep : 33897600 		 Average Reward : -211.52
Episode : 9419 		 Timestep : 33912000 		 Average Reward : -123.88
Episode : 9423 		 Timestep : 33926400 		 Average Reward : -190.19
Episode : 9427 		 Timestep : 33940800 		 Average Reward : -125.75
Episode : 9431 		 Timestep : 33955200 		 Average Reward : -165.73
Episode : 9435 		 Timestep : 33969600 		 Average Reward : -222.16
Episode : 9439 		 Timestep : 33984000 		 Average Reward : -153.5
Episode : 9443 		 Timestep : 33998400 		 Average Reward : -204.22
Episode : 94

Episode : 9887 		 Timestep : 35596800 		 Average Reward : -257.38
Episode : 9891 		 Timestep : 35611200 		 Average Reward : -157.95
Episode : 9895 		 Timestep : 35625600 		 Average Reward : -129.2
Episode : 9899 		 Timestep : 35640000 		 Average Reward : -170.77
Episode : 9903 		 Timestep : 35654400 		 Average Reward : -84.76
Episode : 9907 		 Timestep : 35668800 		 Average Reward : -174.6
Episode : 9911 		 Timestep : 35683200 		 Average Reward : -206.31
Episode : 9915 		 Timestep : 35697600 		 Average Reward : -231.2
Episode : 9919 		 Timestep : 35712000 		 Average Reward : -261.43
Episode : 9923 		 Timestep : 35726400 		 Average Reward : -193.71
Episode : 9927 		 Timestep : 35740800 		 Average Reward : -145.51
Episode : 9931 		 Timestep : 35755200 		 Average Reward : -188.08
Episode : 9935 		 Timestep : 35769600 		 Average Reward : -136.2
Episode : 9939 		 Timestep : 35784000 		 Average Reward : -127.88
Episode : 9943 		 Timestep : 35798400 		 Average Reward : -230.14
Episode : 9947 

Episode : 10383 		 Timestep : 37382400 		 Average Reward : -262.0
Episode : 10387 		 Timestep : 37396800 		 Average Reward : -225.63
Episode : 10391 		 Timestep : 37411200 		 Average Reward : -149.94
Episode : 10395 		 Timestep : 37425600 		 Average Reward : -104.31
Episode : 10399 		 Timestep : 37440000 		 Average Reward : -158.78
Episode : 10403 		 Timestep : 37454400 		 Average Reward : -211.8
Episode : 10407 		 Timestep : 37468800 		 Average Reward : -226.68
Episode : 10411 		 Timestep : 37483200 		 Average Reward : -151.44
Episode : 10415 		 Timestep : 37497600 		 Average Reward : -207.81
Episode : 10419 		 Timestep : 37512000 		 Average Reward : -192.07
Episode : 10423 		 Timestep : 37526400 		 Average Reward : -133.09
Episode : 10427 		 Timestep : 37540800 		 Average Reward : -106.88
Episode : 10431 		 Timestep : 37555200 		 Average Reward : -251.16
Episode : 10435 		 Timestep : 37569600 		 Average Reward : -156.96
Episode : 10439 		 Timestep : 37584000 		 Average Reward : -267.

Episode : 10875 		 Timestep : 39153600 		 Average Reward : -126.38
Episode : 10879 		 Timestep : 39168000 		 Average Reward : -109.04
Episode : 10883 		 Timestep : 39182400 		 Average Reward : -118.73
Episode : 10887 		 Timestep : 39196800 		 Average Reward : -250.93
Episode : 10891 		 Timestep : 39211200 		 Average Reward : -204.67
Episode : 10895 		 Timestep : 39225600 		 Average Reward : -221.61
Episode : 10899 		 Timestep : 39240000 		 Average Reward : -132.36
Episode : 10903 		 Timestep : 39254400 		 Average Reward : -195.66
Episode : 10907 		 Timestep : 39268800 		 Average Reward : -164.26
Episode : 10911 		 Timestep : 39283200 		 Average Reward : -225.25
Episode : 10915 		 Timestep : 39297600 		 Average Reward : -171.84
Episode : 10919 		 Timestep : 39312000 		 Average Reward : -196.27
Episode : 10923 		 Timestep : 39326400 		 Average Reward : -167.61
Episode : 10927 		 Timestep : 39340800 		 Average Reward : -215.35
Episode : 10931 		 Timestep : 39355200 		 Average Reward : -12

Episode : 11367 		 Timestep : 40924800 		 Average Reward : -130.82
Episode : 11371 		 Timestep : 40939200 		 Average Reward : -118.87
Episode : 11375 		 Timestep : 40953600 		 Average Reward : -63.18
Episode : 11379 		 Timestep : 40968000 		 Average Reward : -153.74
Episode : 11383 		 Timestep : 40982400 		 Average Reward : -207.55
Episode : 11387 		 Timestep : 40996800 		 Average Reward : -117.99
Episode : 11391 		 Timestep : 41011200 		 Average Reward : -144.54
Episode : 11395 		 Timestep : 41025600 		 Average Reward : -120.42
Episode : 11399 		 Timestep : 41040000 		 Average Reward : -96.04
Episode : 11403 		 Timestep : 41054400 		 Average Reward : -128.96
Episode : 11407 		 Timestep : 41068800 		 Average Reward : -83.15
Episode : 11411 		 Timestep : 41083200 		 Average Reward : -86.45
Episode : 11415 		 Timestep : 41097600 		 Average Reward : -97.6
Episode : 11419 		 Timestep : 41112000 		 Average Reward : -135.19
Episode : 11423 		 Timestep : 41126400 		 Average Reward : -52.23
Ep

Episode : 11843 		 Timestep : 42638400 		 Average Reward : -126.42
Episode : 11847 		 Timestep : 42652800 		 Average Reward : -30.44
Episode : 11851 		 Timestep : 42667200 		 Average Reward : -164.37
Episode : 11855 		 Timestep : 42681600 		 Average Reward : -117.28
Episode : 11859 		 Timestep : 42696000 		 Average Reward : -186.18
Episode : 11863 		 Timestep : 42710400 		 Average Reward : -146.54
Episode : 11867 		 Timestep : 42724800 		 Average Reward : -194.55
Episode : 11871 		 Timestep : 42739200 		 Average Reward : -193.79
Episode : 11875 		 Timestep : 42753600 		 Average Reward : -79.95
Episode : 11879 		 Timestep : 42768000 		 Average Reward : -86.37
Episode : 11883 		 Timestep : 42782400 		 Average Reward : -133.74
Episode : 11887 		 Timestep : 42796800 		 Average Reward : -63.54
Episode : 11891 		 Timestep : 42811200 		 Average Reward : -116.89
Episode : 11895 		 Timestep : 42825600 		 Average Reward : -111.82
Episode : 11899 		 Timestep : 42840000 		 Average Reward : -99.9
E

Episode : 12335 		 Timestep : 44409600 		 Average Reward : -85.45
Episode : 12339 		 Timestep : 44424000 		 Average Reward : -110.74
Episode : 12343 		 Timestep : 44438400 		 Average Reward : -165.5
Episode : 12347 		 Timestep : 44452800 		 Average Reward : -153.81
Episode : 12351 		 Timestep : 44467200 		 Average Reward : -136.06
Episode : 12355 		 Timestep : 44481600 		 Average Reward : -132.49
Episode : 12359 		 Timestep : 44496000 		 Average Reward : -75.64
Episode : 12363 		 Timestep : 44510400 		 Average Reward : -144.06
Episode : 12367 		 Timestep : 44524800 		 Average Reward : -151.4
Episode : 12371 		 Timestep : 44539200 		 Average Reward : -119.37
Episode : 12375 		 Timestep : 44553600 		 Average Reward : -94.92
Episode : 12379 		 Timestep : 44568000 		 Average Reward : -136.66
Episode : 12383 		 Timestep : 44582400 		 Average Reward : -120.91
Episode : 12387 		 Timestep : 44596800 		 Average Reward : -128.64
Episode : 12391 		 Timestep : 44611200 		 Average Reward : -167.55


Episode : 12811 		 Timestep : 46123200 		 Average Reward : -138.87
Episode : 12815 		 Timestep : 46137600 		 Average Reward : -168.46
Episode : 12819 		 Timestep : 46152000 		 Average Reward : -158.13
Episode : 12823 		 Timestep : 46166400 		 Average Reward : -107.51
Episode : 12827 		 Timestep : 46180800 		 Average Reward : -125.14
Episode : 12831 		 Timestep : 46195200 		 Average Reward : -185.09
Episode : 12835 		 Timestep : 46209600 		 Average Reward : -102.43
Episode : 12839 		 Timestep : 46224000 		 Average Reward : -118.08
Episode : 12843 		 Timestep : 46238400 		 Average Reward : -216.33
Episode : 12847 		 Timestep : 46252800 		 Average Reward : -213.1
Episode : 12851 		 Timestep : 46267200 		 Average Reward : -174.86
Episode : 12855 		 Timestep : 46281600 		 Average Reward : -137.48
Episode : 12859 		 Timestep : 46296000 		 Average Reward : -119.04
Episode : 12863 		 Timestep : 46310400 		 Average Reward : -126.81
Episode : 12867 		 Timestep : 46324800 		 Average Reward : -124

Episode : 13303 		 Timestep : 47894400 		 Average Reward : -78.16
Episode : 13307 		 Timestep : 47908800 		 Average Reward : -97.81
Episode : 13311 		 Timestep : 47923200 		 Average Reward : -166.96
Episode : 13315 		 Timestep : 47937600 		 Average Reward : -79.01
Episode : 13319 		 Timestep : 47952000 		 Average Reward : -67.8
Episode : 13323 		 Timestep : 47966400 		 Average Reward : -66.48
Episode : 13327 		 Timestep : 47980800 		 Average Reward : -107.89
Episode : 13331 		 Timestep : 47995200 		 Average Reward : -100.97
Episode : 13335 		 Timestep : 48009600 		 Average Reward : -172.4
Episode : 13339 		 Timestep : 48024000 		 Average Reward : -134.08
Episode : 13343 		 Timestep : 48038400 		 Average Reward : -163.95
Episode : 13347 		 Timestep : 48052800 		 Average Reward : -111.55
Episode : 13351 		 Timestep : 48067200 		 Average Reward : -28.27
Episode : 13355 		 Timestep : 48081600 		 Average Reward : -64.45
Episode : 13359 		 Timestep : 48096000 		 Average Reward : -78.59
Episo

Episode : 13799 		 Timestep : 49680000 		 Average Reward : -123.69
Episode : 13803 		 Timestep : 49694400 		 Average Reward : -121.5
Episode : 13807 		 Timestep : 49708800 		 Average Reward : -70.49
Episode : 13811 		 Timestep : 49723200 		 Average Reward : -54.94
Episode : 13815 		 Timestep : 49737600 		 Average Reward : -174.0
Episode : 13819 		 Timestep : 49752000 		 Average Reward : -120.88
Episode : 13823 		 Timestep : 49766400 		 Average Reward : -109.57
Episode : 13827 		 Timestep : 49780800 		 Average Reward : -148.16
Episode : 13831 		 Timestep : 49795200 		 Average Reward : -127.03
Episode : 13835 		 Timestep : 49809600 		 Average Reward : 5.58
Episode : 13839 		 Timestep : 49824000 		 Average Reward : -65.53
Episode : 13843 		 Timestep : 49838400 		 Average Reward : -134.66
Episode : 13847 		 Timestep : 49852800 		 Average Reward : -137.28
Episode : 13851 		 Timestep : 49867200 		 Average Reward : -98.79
Episode : 13855 		 Timestep : 49881600 		 Average Reward : -157.43
Epis

Episode : 14295 		 Timestep : 51465600 		 Average Reward : -58.22
Episode : 14299 		 Timestep : 51480000 		 Average Reward : -115.99
Episode : 14303 		 Timestep : 51494400 		 Average Reward : -91.67
Episode : 14307 		 Timestep : 51508800 		 Average Reward : -134.73
Episode : 14311 		 Timestep : 51523200 		 Average Reward : -117.17
Episode : 14315 		 Timestep : 51537600 		 Average Reward : -111.57
Episode : 14319 		 Timestep : 51552000 		 Average Reward : -75.03
Episode : 14323 		 Timestep : 51566400 		 Average Reward : -116.24
Episode : 14327 		 Timestep : 51580800 		 Average Reward : -137.68
Episode : 14331 		 Timestep : 51595200 		 Average Reward : -67.71
Episode : 14335 		 Timestep : 51609600 		 Average Reward : -151.01
Episode : 14339 		 Timestep : 51624000 		 Average Reward : -135.42
Episode : 14343 		 Timestep : 51638400 		 Average Reward : -73.91
Episode : 14347 		 Timestep : 51652800 		 Average Reward : -73.06
Episode : 14351 		 Timestep : 51667200 		 Average Reward : -15.56
Ep

Episode : 14791 		 Timestep : 53251200 		 Average Reward : -54.04
Episode : 14795 		 Timestep : 53265600 		 Average Reward : -70.85
Episode : 14799 		 Timestep : 53280000 		 Average Reward : -52.49
Episode : 14803 		 Timestep : 53294400 		 Average Reward : -81.85
Episode : 14807 		 Timestep : 53308800 		 Average Reward : -19.54
Episode : 14811 		 Timestep : 53323200 		 Average Reward : -92.55
Episode : 14815 		 Timestep : 53337600 		 Average Reward : -123.87
Episode : 14819 		 Timestep : 53352000 		 Average Reward : -48.22
Episode : 14823 		 Timestep : 53366400 		 Average Reward : -56.14
Episode : 14827 		 Timestep : 53380800 		 Average Reward : -162.21
Episode : 14831 		 Timestep : 53395200 		 Average Reward : -134.91
Episode : 14835 		 Timestep : 53409600 		 Average Reward : -68.88
Episode : 14839 		 Timestep : 53424000 		 Average Reward : -70.2
Episode : 14843 		 Timestep : 53438400 		 Average Reward : -71.75
Episode : 14847 		 Timestep : 53452800 		 Average Reward : -160.56
Episode

Episode : 15287 		 Timestep : 55036800 		 Average Reward : 0.73
Episode : 15291 		 Timestep : 55051200 		 Average Reward : -58.91
Episode : 15295 		 Timestep : 55065600 		 Average Reward : -84.82
Episode : 15299 		 Timestep : 55080000 		 Average Reward : -22.99
Episode : 15303 		 Timestep : 55094400 		 Average Reward : -55.97
Episode : 15307 		 Timestep : 55108800 		 Average Reward : -48.96
Episode : 15311 		 Timestep : 55123200 		 Average Reward : -69.15
Episode : 15315 		 Timestep : 55137600 		 Average Reward : -88.92
Episode : 15319 		 Timestep : 55152000 		 Average Reward : -99.27
Episode : 15323 		 Timestep : 55166400 		 Average Reward : -29.83
Episode : 15327 		 Timestep : 55180800 		 Average Reward : -85.77
Episode : 15331 		 Timestep : 55195200 		 Average Reward : -136.88
Episode : 15335 		 Timestep : 55209600 		 Average Reward : -96.36
Episode : 15339 		 Timestep : 55224000 		 Average Reward : -223.34
Episode : 15343 		 Timestep : 55238400 		 Average Reward : -39.8
Episode : 1

Episode : 15783 		 Timestep : 56822400 		 Average Reward : -104.88
Episode : 15787 		 Timestep : 56836800 		 Average Reward : -140.23
Episode : 15791 		 Timestep : 56851200 		 Average Reward : -144.19
Episode : 15795 		 Timestep : 56865600 		 Average Reward : -3.93
Episode : 15799 		 Timestep : 56880000 		 Average Reward : -143.07
Episode : 15803 		 Timestep : 56894400 		 Average Reward : -174.02
Episode : 15807 		 Timestep : 56908800 		 Average Reward : -52.92
Episode : 15811 		 Timestep : 56923200 		 Average Reward : -23.02
Episode : 15815 		 Timestep : 56937600 		 Average Reward : -94.19
Episode : 15819 		 Timestep : 56952000 		 Average Reward : -72.53
Episode : 15823 		 Timestep : 56966400 		 Average Reward : -112.01
Episode : 15827 		 Timestep : 56980800 		 Average Reward : -101.79
Episode : 15831 		 Timestep : 56995200 		 Average Reward : -120.36
Episode : 15835 		 Timestep : 57009600 		 Average Reward : -130.95
Episode : 15839 		 Timestep : 57024000 		 Average Reward : -46.95
Ep

Episode : 16279 		 Timestep : 58608000 		 Average Reward : -160.59
Episode : 16283 		 Timestep : 58622400 		 Average Reward : -169.95
Episode : 16287 		 Timestep : 58636800 		 Average Reward : -85.16
Episode : 16291 		 Timestep : 58651200 		 Average Reward : -76.03
Episode : 16295 		 Timestep : 58665600 		 Average Reward : -50.13
Episode : 16299 		 Timestep : 58680000 		 Average Reward : -114.85
Episode : 16303 		 Timestep : 58694400 		 Average Reward : -76.08
Episode : 16307 		 Timestep : 58708800 		 Average Reward : -81.08
Episode : 16311 		 Timestep : 58723200 		 Average Reward : -106.51
Episode : 16315 		 Timestep : 58737600 		 Average Reward : -34.12
Episode : 16319 		 Timestep : 58752000 		 Average Reward : -49.47
Episode : 16323 		 Timestep : 58766400 		 Average Reward : -97.0
Episode : 16327 		 Timestep : 58780800 		 Average Reward : -125.78
Episode : 16331 		 Timestep : 58795200 		 Average Reward : -73.73
Episode : 16335 		 Timestep : 58809600 		 Average Reward : -95.73
Episod

Episode : 16775 		 Timestep : 60393600 		 Average Reward : -40.7
Episode : 16779 		 Timestep : 60408000 		 Average Reward : -114.72
Episode : 16783 		 Timestep : 60422400 		 Average Reward : -108.19
Episode : 16787 		 Timestep : 60436800 		 Average Reward : -42.56
Episode : 16791 		 Timestep : 60451200 		 Average Reward : -148.89
Episode : 16795 		 Timestep : 60465600 		 Average Reward : -84.84
Episode : 16799 		 Timestep : 60480000 		 Average Reward : -71.91
Episode : 16803 		 Timestep : 60494400 		 Average Reward : -108.14
Episode : 16807 		 Timestep : 60508800 		 Average Reward : -22.79
Episode : 16811 		 Timestep : 60523200 		 Average Reward : -153.1
Episode : 16815 		 Timestep : 60537600 		 Average Reward : -152.45
Episode : 16819 		 Timestep : 60552000 		 Average Reward : -59.73
Episode : 16823 		 Timestep : 60566400 		 Average Reward : -129.02
Episode : 16827 		 Timestep : 60580800 		 Average Reward : -105.5
Episode : 16831 		 Timestep : 60595200 		 Average Reward : -18.46
Episo

Episode : 17271 		 Timestep : 62179200 		 Average Reward : -82.49
Episode : 17275 		 Timestep : 62193600 		 Average Reward : -111.09
Episode : 17279 		 Timestep : 62208000 		 Average Reward : -158.39
Episode : 17283 		 Timestep : 62222400 		 Average Reward : -156.79
Episode : 17287 		 Timestep : 62236800 		 Average Reward : -126.77
Episode : 17291 		 Timestep : 62251200 		 Average Reward : -67.36
Episode : 17295 		 Timestep : 62265600 		 Average Reward : -4.64
Episode : 17299 		 Timestep : 62280000 		 Average Reward : -104.15
Episode : 17303 		 Timestep : 62294400 		 Average Reward : -103.25
Episode : 17307 		 Timestep : 62308800 		 Average Reward : -125.68
Episode : 17311 		 Timestep : 62323200 		 Average Reward : -81.73
Episode : 17315 		 Timestep : 62337600 		 Average Reward : -101.26
Episode : 17319 		 Timestep : 62352000 		 Average Reward : -146.36
Episode : 17323 		 Timestep : 62366400 		 Average Reward : -65.97
Episode : 17327 		 Timestep : 62380800 		 Average Reward : -126.66
E

Episode : 17767 		 Timestep : 63964800 		 Average Reward : -25.42
Episode : 17771 		 Timestep : 63979200 		 Average Reward : -114.07
Episode : 17775 		 Timestep : 63993600 		 Average Reward : -70.61
Episode : 17779 		 Timestep : 64008000 		 Average Reward : -251.38
Episode : 17783 		 Timestep : 64022400 		 Average Reward : -103.63
Episode : 17787 		 Timestep : 64036800 		 Average Reward : -107.73
Episode : 17791 		 Timestep : 64051200 		 Average Reward : -89.34
Episode : 17795 		 Timestep : 64065600 		 Average Reward : -119.63
Episode : 17799 		 Timestep : 64080000 		 Average Reward : -153.48
Episode : 17803 		 Timestep : 64094400 		 Average Reward : -75.2
Episode : 17807 		 Timestep : 64108800 		 Average Reward : -136.75
Episode : 17811 		 Timestep : 64123200 		 Average Reward : -162.54
Episode : 17815 		 Timestep : 64137600 		 Average Reward : -84.44
Episode : 17819 		 Timestep : 64152000 		 Average Reward : -61.11
Episode : 17823 		 Timestep : 64166400 		 Average Reward : -128.15
Ep

Episode : 18263 		 Timestep : 65750400 		 Average Reward : -106.51
Episode : 18267 		 Timestep : 65764800 		 Average Reward : -93.78
Episode : 18271 		 Timestep : 65779200 		 Average Reward : -55.22
Episode : 18275 		 Timestep : 65793600 		 Average Reward : -56.5
Episode : 18279 		 Timestep : 65808000 		 Average Reward : -114.86
Episode : 18283 		 Timestep : 65822400 		 Average Reward : -67.41
Episode : 18287 		 Timestep : 65836800 		 Average Reward : -103.49
Episode : 18291 		 Timestep : 65851200 		 Average Reward : -133.64
Episode : 18295 		 Timestep : 65865600 		 Average Reward : -121.22
Episode : 18299 		 Timestep : 65880000 		 Average Reward : -44.84
Episode : 18303 		 Timestep : 65894400 		 Average Reward : -111.39
Episode : 18307 		 Timestep : 65908800 		 Average Reward : -88.7
Episode : 18311 		 Timestep : 65923200 		 Average Reward : -85.29
Episode : 18315 		 Timestep : 65937600 		 Average Reward : -107.75
Episode : 18319 		 Timestep : 65952000 		 Average Reward : -131.88
Epis

Episode : 18759 		 Timestep : 67536000 		 Average Reward : -36.88
Episode : 18763 		 Timestep : 67550400 		 Average Reward : -79.93
Episode : 18767 		 Timestep : 67564800 		 Average Reward : -112.92
Episode : 18771 		 Timestep : 67579200 		 Average Reward : -16.22
Episode : 18775 		 Timestep : 67593600 		 Average Reward : -109.62
Episode : 18779 		 Timestep : 67608000 		 Average Reward : -63.26
Episode : 18783 		 Timestep : 67622400 		 Average Reward : -26.87
Episode : 18787 		 Timestep : 67636800 		 Average Reward : -28.67
Episode : 18791 		 Timestep : 67651200 		 Average Reward : -80.09
Episode : 18795 		 Timestep : 67665600 		 Average Reward : -123.73
Episode : 18799 		 Timestep : 67680000 		 Average Reward : -68.53
Episode : 18803 		 Timestep : 67694400 		 Average Reward : -44.29
Episode : 18807 		 Timestep : 67708800 		 Average Reward : -104.75
Episode : 18811 		 Timestep : 67723200 		 Average Reward : -71.11
Episode : 18815 		 Timestep : 67737600 		 Average Reward : -54.13
Episod

Episode : 19255 		 Timestep : 69321600 		 Average Reward : -76.45
Episode : 19259 		 Timestep : 69336000 		 Average Reward : -42.71
Episode : 19263 		 Timestep : 69350400 		 Average Reward : -106.69
Episode : 19267 		 Timestep : 69364800 		 Average Reward : -128.98
Episode : 19271 		 Timestep : 69379200 		 Average Reward : -108.51
Episode : 19275 		 Timestep : 69393600 		 Average Reward : -84.4
Episode : 19279 		 Timestep : 69408000 		 Average Reward : -127.6
Episode : 19283 		 Timestep : 69422400 		 Average Reward : -160.43
Episode : 19287 		 Timestep : 69436800 		 Average Reward : -183.24
Episode : 19291 		 Timestep : 69451200 		 Average Reward : 26.85
Episode : 19295 		 Timestep : 69465600 		 Average Reward : -116.61
Episode : 19299 		 Timestep : 69480000 		 Average Reward : -76.79
Episode : 19303 		 Timestep : 69494400 		 Average Reward : -89.58
Episode : 19307 		 Timestep : 69508800 		 Average Reward : -123.18
Episode : 19311 		 Timestep : 69523200 		 Average Reward : -35.21
Episo

Episode : 19751 		 Timestep : 71107200 		 Average Reward : -38.16
Episode : 19755 		 Timestep : 71121600 		 Average Reward : -170.78
Episode : 19759 		 Timestep : 71136000 		 Average Reward : -83.35
Episode : 19763 		 Timestep : 71150400 		 Average Reward : -109.58
Episode : 19767 		 Timestep : 71164800 		 Average Reward : -15.95
Episode : 19771 		 Timestep : 71179200 		 Average Reward : -7.41
Episode : 19775 		 Timestep : 71193600 		 Average Reward : -106.85
Episode : 19779 		 Timestep : 71208000 		 Average Reward : -155.57
Episode : 19783 		 Timestep : 71222400 		 Average Reward : -81.2
Episode : 19787 		 Timestep : 71236800 		 Average Reward : -59.84
Episode : 19791 		 Timestep : 71251200 		 Average Reward : -53.31
Episode : 19795 		 Timestep : 71265600 		 Average Reward : -109.86
Episode : 19799 		 Timestep : 71280000 		 Average Reward : -35.94
Episode : 19803 		 Timestep : 71294400 		 Average Reward : -119.34
Episode : 19807 		 Timestep : 71308800 		 Average Reward : -90.59
Episod

Episode : 20247 		 Timestep : 72892800 		 Average Reward : -209.97
Episode : 20251 		 Timestep : 72907200 		 Average Reward : -68.11
Episode : 20255 		 Timestep : 72921600 		 Average Reward : -77.49
Episode : 20259 		 Timestep : 72936000 		 Average Reward : -94.3
Episode : 20263 		 Timestep : 72950400 		 Average Reward : -64.1
Episode : 20267 		 Timestep : 72964800 		 Average Reward : -74.72
Episode : 20271 		 Timestep : 72979200 		 Average Reward : -68.84
Episode : 20275 		 Timestep : 72993600 		 Average Reward : -87.76
Episode : 20279 		 Timestep : 73008000 		 Average Reward : -90.0
Episode : 20283 		 Timestep : 73022400 		 Average Reward : -134.0
Episode : 20287 		 Timestep : 73036800 		 Average Reward : -85.13
Episode : 20291 		 Timestep : 73051200 		 Average Reward : -102.12
Episode : 20295 		 Timestep : 73065600 		 Average Reward : -126.29
Episode : 20299 		 Timestep : 73080000 		 Average Reward : -125.32
Episode : 20303 		 Timestep : 73094400 		 Average Reward : -96.1
Episode : 

Episode : 20743 		 Timestep : 74678400 		 Average Reward : -76.99
Episode : 20747 		 Timestep : 74692800 		 Average Reward : -87.49
Episode : 20751 		 Timestep : 74707200 		 Average Reward : -32.59
Episode : 20755 		 Timestep : 74721600 		 Average Reward : -74.66
Episode : 20759 		 Timestep : 74736000 		 Average Reward : -49.36
Episode : 20763 		 Timestep : 74750400 		 Average Reward : -65.74
Episode : 20767 		 Timestep : 74764800 		 Average Reward : -131.93
Episode : 20771 		 Timestep : 74779200 		 Average Reward : -64.51
Episode : 20775 		 Timestep : 74793600 		 Average Reward : -76.44
Episode : 20779 		 Timestep : 74808000 		 Average Reward : -5.7
Episode : 20783 		 Timestep : 74822400 		 Average Reward : -82.1
Episode : 20787 		 Timestep : 74836800 		 Average Reward : -99.05
Episode : 20791 		 Timestep : 74851200 		 Average Reward : -102.28
Episode : 20795 		 Timestep : 74865600 		 Average Reward : -134.14
Episode : 20799 		 Timestep : 74880000 		 Average Reward : -60.22
Episode : 

Episode : 21243 		 Timestep : 76478400 		 Average Reward : -50.07
Episode : 21247 		 Timestep : 76492800 		 Average Reward : -55.5
Episode : 21251 		 Timestep : 76507200 		 Average Reward : -17.12
Episode : 21255 		 Timestep : 76521600 		 Average Reward : -23.87
Episode : 21259 		 Timestep : 76536000 		 Average Reward : -139.28
Episode : 21263 		 Timestep : 76550400 		 Average Reward : -75.2
Episode : 21267 		 Timestep : 76564800 		 Average Reward : -61.87
Episode : 21271 		 Timestep : 76579200 		 Average Reward : -65.38
Episode : 21275 		 Timestep : 76593600 		 Average Reward : -122.22
Episode : 21279 		 Timestep : 76608000 		 Average Reward : -33.2
Episode : 21283 		 Timestep : 76622400 		 Average Reward : -125.94
Episode : 21287 		 Timestep : 76636800 		 Average Reward : -50.52
Episode : 21291 		 Timestep : 76651200 		 Average Reward : -91.6
Episode : 21295 		 Timestep : 76665600 		 Average Reward : -79.66
Episode : 21299 		 Timestep : 76680000 		 Average Reward : -134.96
Episode : 

Episode : 21743 		 Timestep : 78278400 		 Average Reward : -56.21
Episode : 21747 		 Timestep : 78292800 		 Average Reward : -127.97
Episode : 21751 		 Timestep : 78307200 		 Average Reward : -81.91
Episode : 21755 		 Timestep : 78321600 		 Average Reward : -139.29
Episode : 21759 		 Timestep : 78336000 		 Average Reward : -68.43
Episode : 21763 		 Timestep : 78350400 		 Average Reward : -94.3
Episode : 21767 		 Timestep : 78364800 		 Average Reward : -63.78
Episode : 21771 		 Timestep : 78379200 		 Average Reward : -89.9
Episode : 21775 		 Timestep : 78393600 		 Average Reward : -79.81
Episode : 21779 		 Timestep : 78408000 		 Average Reward : -41.03
Episode : 21783 		 Timestep : 78422400 		 Average Reward : -8.23
Episode : 21787 		 Timestep : 78436800 		 Average Reward : -91.94
Episode : 21791 		 Timestep : 78451200 		 Average Reward : -95.92
Episode : 21795 		 Timestep : 78465600 		 Average Reward : -76.7
Episode : 21799 		 Timestep : 78480000 		 Average Reward : -25.33
Episode : 21

Episode : 22239 		 Timestep : 80064000 		 Average Reward : -86.07
Episode : 22243 		 Timestep : 80078400 		 Average Reward : -92.98
Episode : 22247 		 Timestep : 80092800 		 Average Reward : -101.59
Episode : 22251 		 Timestep : 80107200 		 Average Reward : -16.41
Episode : 22255 		 Timestep : 80121600 		 Average Reward : -101.66
Episode : 22259 		 Timestep : 80136000 		 Average Reward : -91.9
Episode : 22263 		 Timestep : 80150400 		 Average Reward : -51.16
Episode : 22267 		 Timestep : 80164800 		 Average Reward : -69.77
Episode : 22271 		 Timestep : 80179200 		 Average Reward : -78.42
Episode : 22275 		 Timestep : 80193600 		 Average Reward : -13.96
Episode : 22279 		 Timestep : 80208000 		 Average Reward : -91.89
Episode : 22283 		 Timestep : 80222400 		 Average Reward : -53.32
Episode : 22287 		 Timestep : 80236800 		 Average Reward : -29.75
Episode : 22291 		 Timestep : 80251200 		 Average Reward : -86.08
Episode : 22295 		 Timestep : 80265600 		 Average Reward : -102.29
Episode 

Episode : 22735 		 Timestep : 81849600 		 Average Reward : -17.85
Episode : 22739 		 Timestep : 81864000 		 Average Reward : -139.22
Episode : 22743 		 Timestep : 81878400 		 Average Reward : -90.71
Episode : 22747 		 Timestep : 81892800 		 Average Reward : -147.64
Episode : 22751 		 Timestep : 81907200 		 Average Reward : -100.22
Episode : 22755 		 Timestep : 81921600 		 Average Reward : -55.01
Episode : 22759 		 Timestep : 81936000 		 Average Reward : -92.23
Episode : 22763 		 Timestep : 81950400 		 Average Reward : -93.35
Episode : 22767 		 Timestep : 81964800 		 Average Reward : -82.37
Episode : 22771 		 Timestep : 81979200 		 Average Reward : -139.65
Episode : 22775 		 Timestep : 81993600 		 Average Reward : -143.11
Episode : 22779 		 Timestep : 82008000 		 Average Reward : -121.55
Episode : 22783 		 Timestep : 82022400 		 Average Reward : -122.5
Episode : 22787 		 Timestep : 82036800 		 Average Reward : -96.3
Episode : 22791 		 Timestep : 82051200 		 Average Reward : -134.97
Epis

Episode : 23231 		 Timestep : 83635200 		 Average Reward : -29.83
Episode : 23235 		 Timestep : 83649600 		 Average Reward : -4.57
Episode : 23239 		 Timestep : 83664000 		 Average Reward : -43.35
Episode : 23243 		 Timestep : 83678400 		 Average Reward : -62.08
Episode : 23247 		 Timestep : 83692800 		 Average Reward : -1.57
Episode : 23251 		 Timestep : 83707200 		 Average Reward : -61.12
Episode : 23255 		 Timestep : 83721600 		 Average Reward : -77.27
Episode : 23259 		 Timestep : 83736000 		 Average Reward : -20.24
Episode : 23263 		 Timestep : 83750400 		 Average Reward : -131.38
Episode : 23267 		 Timestep : 83764800 		 Average Reward : -14.77
Episode : 23271 		 Timestep : 83779200 		 Average Reward : -107.89
Episode : 23275 		 Timestep : 83793600 		 Average Reward : -36.65
Episode : 23279 		 Timestep : 83808000 		 Average Reward : -124.93
Episode : 23283 		 Timestep : 83822400 		 Average Reward : -142.5
Episode : 23287 		 Timestep : 83836800 		 Average Reward : -57.38
Episode :

Episode : 23731 		 Timestep : 85435200 		 Average Reward : -15.37
Episode : 23735 		 Timestep : 85449600 		 Average Reward : -44.08
Episode : 23739 		 Timestep : 85464000 		 Average Reward : -37.84
Episode : 23743 		 Timestep : 85478400 		 Average Reward : 14.36
Episode : 23747 		 Timestep : 85492800 		 Average Reward : -116.5
Episode : 23751 		 Timestep : 85507200 		 Average Reward : -38.45
Episode : 23755 		 Timestep : 85521600 		 Average Reward : -56.59
Episode : 23759 		 Timestep : 85536000 		 Average Reward : -91.31
Episode : 23763 		 Timestep : 85550400 		 Average Reward : -66.84
Episode : 23767 		 Timestep : 85564800 		 Average Reward : -113.08
Episode : 23771 		 Timestep : 85579200 		 Average Reward : 22.89
Episode : 23775 		 Timestep : 85593600 		 Average Reward : -83.88
Episode : 23779 		 Timestep : 85608000 		 Average Reward : -48.84
Episode : 23783 		 Timestep : 85622400 		 Average Reward : -13.43
Episode : 23787 		 Timestep : 85636800 		 Average Reward : -82.9
Episode : 23

Episode : 24231 		 Timestep : 87235200 		 Average Reward : -56.05
Episode : 24235 		 Timestep : 87249600 		 Average Reward : -68.68
Episode : 24239 		 Timestep : 87264000 		 Average Reward : -67.03
Episode : 24243 		 Timestep : 87278400 		 Average Reward : -110.32
Episode : 24247 		 Timestep : 87292800 		 Average Reward : -77.75
Episode : 24251 		 Timestep : 87307200 		 Average Reward : -72.62
Episode : 24255 		 Timestep : 87321600 		 Average Reward : -118.42
Episode : 24259 		 Timestep : 87336000 		 Average Reward : -116.19
Episode : 24263 		 Timestep : 87350400 		 Average Reward : -52.56
Episode : 24267 		 Timestep : 87364800 		 Average Reward : -71.28
Episode : 24271 		 Timestep : 87379200 		 Average Reward : 18.52
Episode : 24275 		 Timestep : 87393600 		 Average Reward : 5.89
Episode : 24279 		 Timestep : 87408000 		 Average Reward : -39.27
Episode : 24283 		 Timestep : 87422400 		 Average Reward : -66.25
Episode : 24287 		 Timestep : 87436800 		 Average Reward : -27.72
Episode : 

Episode : 24731 		 Timestep : 89035200 		 Average Reward : -51.29
Episode : 24735 		 Timestep : 89049600 		 Average Reward : -33.23
Episode : 24739 		 Timestep : 89064000 		 Average Reward : -114.79
Episode : 24743 		 Timestep : 89078400 		 Average Reward : -22.08
Episode : 24747 		 Timestep : 89092800 		 Average Reward : -33.61
Episode : 24751 		 Timestep : 89107200 		 Average Reward : -2.33
Episode : 24755 		 Timestep : 89121600 		 Average Reward : -73.85
Episode : 24759 		 Timestep : 89136000 		 Average Reward : -33.24
Episode : 24763 		 Timestep : 89150400 		 Average Reward : -23.15
Episode : 24767 		 Timestep : 89164800 		 Average Reward : -66.15
Episode : 24771 		 Timestep : 89179200 		 Average Reward : -74.17
Episode : 24775 		 Timestep : 89193600 		 Average Reward : -63.51
Episode : 24779 		 Timestep : 89208000 		 Average Reward : -50.12
Episode : 24783 		 Timestep : 89222400 		 Average Reward : -55.5
Episode : 24787 		 Timestep : 89236800 		 Average Reward : -40.26
Episode : 2

Episode : 25215 		 Timestep : 90777600 		 Average Reward : -44.41
Episode : 25219 		 Timestep : 90792000 		 Average Reward : -59.22
Episode : 25223 		 Timestep : 90806400 		 Average Reward : -22.71
Episode : 25227 		 Timestep : 90820800 		 Average Reward : 12.53
Episode : 25231 		 Timestep : 90835200 		 Average Reward : -0.64
Episode : 25235 		 Timestep : 90849600 		 Average Reward : -42.6
Episode : 25239 		 Timestep : 90864000 		 Average Reward : -99.69
Episode : 25243 		 Timestep : 90878400 		 Average Reward : -11.23
Episode : 25247 		 Timestep : 90892800 		 Average Reward : -54.88
Episode : 25251 		 Timestep : 90907200 		 Average Reward : -18.77
Episode : 25255 		 Timestep : 90921600 		 Average Reward : -51.6
Episode : 25259 		 Timestep : 90936000 		 Average Reward : -123.8
Episode : 25263 		 Timestep : 90950400 		 Average Reward : 7.56
Episode : 25267 		 Timestep : 90964800 		 Average Reward : -103.9
Episode : 25271 		 Timestep : 90979200 		 Average Reward : -4.74
Episode : 25275 	

Episode : 25715 		 Timestep : 92577600 		 Average Reward : -84.85
Episode : 25719 		 Timestep : 92592000 		 Average Reward : -132.15
Episode : 25723 		 Timestep : 92606400 		 Average Reward : -30.57
Episode : 25727 		 Timestep : 92620800 		 Average Reward : -28.54
Episode : 25731 		 Timestep : 92635200 		 Average Reward : -57.34
Episode : 25735 		 Timestep : 92649600 		 Average Reward : -17.0
Episode : 25739 		 Timestep : 92664000 		 Average Reward : -33.76
Episode : 25743 		 Timestep : 92678400 		 Average Reward : -35.94
Episode : 25747 		 Timestep : 92692800 		 Average Reward : 7.66
Episode : 25751 		 Timestep : 92707200 		 Average Reward : -29.22
Episode : 25755 		 Timestep : 92721600 		 Average Reward : 31.24
Episode : 25759 		 Timestep : 92736000 		 Average Reward : -50.2
Episode : 25763 		 Timestep : 92750400 		 Average Reward : -34.71
Episode : 25767 		 Timestep : 92764800 		 Average Reward : 27.56
Episode : 25771 		 Timestep : 92779200 		 Average Reward : -42.76
Episode : 25775

Episode : 26215 		 Timestep : 94377600 		 Average Reward : 2.85
Episode : 26219 		 Timestep : 94392000 		 Average Reward : 10.39
Episode : 26223 		 Timestep : 94406400 		 Average Reward : -58.18
Episode : 26227 		 Timestep : 94420800 		 Average Reward : -11.97
Episode : 26231 		 Timestep : 94435200 		 Average Reward : 0.78
Episode : 26235 		 Timestep : 94449600 		 Average Reward : -71.15
Episode : 26239 		 Timestep : 94464000 		 Average Reward : -119.08
Episode : 26243 		 Timestep : 94478400 		 Average Reward : 15.1
Episode : 26247 		 Timestep : 94492800 		 Average Reward : -57.34
Episode : 26251 		 Timestep : 94507200 		 Average Reward : -86.09
Episode : 26255 		 Timestep : 94521600 		 Average Reward : -63.89
Episode : 26259 		 Timestep : 94536000 		 Average Reward : -50.79
Episode : 26263 		 Timestep : 94550400 		 Average Reward : -15.74
Episode : 26267 		 Timestep : 94564800 		 Average Reward : -38.75
Episode : 26271 		 Timestep : 94579200 		 Average Reward : 8.1
Episode : 26275 		 

Episode : 26715 		 Timestep : 96177600 		 Average Reward : -45.2
Episode : 26719 		 Timestep : 96192000 		 Average Reward : -53.79
Episode : 26723 		 Timestep : 96206400 		 Average Reward : -46.32
Episode : 26727 		 Timestep : 96220800 		 Average Reward : -86.84
Episode : 26731 		 Timestep : 96235200 		 Average Reward : -79.76
Episode : 26735 		 Timestep : 96249600 		 Average Reward : 52.55
Episode : 26739 		 Timestep : 96264000 		 Average Reward : 32.47
Episode : 26743 		 Timestep : 96278400 		 Average Reward : -31.37
Episode : 26747 		 Timestep : 96292800 		 Average Reward : 8.46
Episode : 26751 		 Timestep : 96307200 		 Average Reward : -86.41
Episode : 26755 		 Timestep : 96321600 		 Average Reward : -33.47
Episode : 26759 		 Timestep : 96336000 		 Average Reward : -71.26
Episode : 26763 		 Timestep : 96350400 		 Average Reward : -30.03
Episode : 26767 		 Timestep : 96364800 		 Average Reward : -71.97
Episode : 26771 		 Timestep : 96379200 		 Average Reward : -78.43
Episode : 26775

Episode : 27215 		 Timestep : 97977600 		 Average Reward : 13.69
Episode : 27219 		 Timestep : 97992000 		 Average Reward : -38.66
Episode : 27223 		 Timestep : 98006400 		 Average Reward : -36.04
Episode : 27227 		 Timestep : 98020800 		 Average Reward : 52.26
Episode : 27231 		 Timestep : 98035200 		 Average Reward : -13.92
Episode : 27235 		 Timestep : 98049600 		 Average Reward : -100.98
Episode : 27239 		 Timestep : 98064000 		 Average Reward : -33.48
Episode : 27243 		 Timestep : 98078400 		 Average Reward : -20.67
Episode : 27247 		 Timestep : 98092800 		 Average Reward : -10.31
Episode : 27251 		 Timestep : 98107200 		 Average Reward : -2.77
Episode : 27255 		 Timestep : 98121600 		 Average Reward : -67.11
Episode : 27259 		 Timestep : 98136000 		 Average Reward : -46.56
Episode : 27263 		 Timestep : 98150400 		 Average Reward : -82.71
Episode : 27267 		 Timestep : 98164800 		 Average Reward : -39.76
Episode : 27271 		 Timestep : 98179200 		 Average Reward : -81.13
Episode : 27

Episode : 27715 		 Timestep : 99777600 		 Average Reward : -26.27
Episode : 27719 		 Timestep : 99792000 		 Average Reward : 21.85
Episode : 27723 		 Timestep : 99806400 		 Average Reward : -28.6
Episode : 27727 		 Timestep : 99820800 		 Average Reward : -39.48
Episode : 27731 		 Timestep : 99835200 		 Average Reward : 22.28
Episode : 27735 		 Timestep : 99849600 		 Average Reward : 10.57
Episode : 27739 		 Timestep : 99864000 		 Average Reward : 41.29
Episode : 27743 		 Timestep : 99878400 		 Average Reward : 7.87
Episode : 27747 		 Timestep : 99892800 		 Average Reward : 36.03
Episode : 27751 		 Timestep : 99907200 		 Average Reward : -28.8
Episode : 27755 		 Timestep : 99921600 		 Average Reward : -100.03
Episode : 27759 		 Timestep : 99936000 		 Average Reward : -34.27
Episode : 27763 		 Timestep : 99950400 		 Average Reward : -43.8
Episode : 27767 		 Timestep : 99964800 		 Average Reward : -46.31
Episode : 27771 		 Timestep : 99979200 		 Average Reward : -34.01
Episode : 27775 		 

Episode : 28211 		 Timestep : 101563200 		 Average Reward : 34.72
Episode : 28215 		 Timestep : 101577600 		 Average Reward : -79.9
Episode : 28219 		 Timestep : 101592000 		 Average Reward : -43.95
Episode : 28223 		 Timestep : 101606400 		 Average Reward : -4.98
Episode : 28227 		 Timestep : 101620800 		 Average Reward : -37.79
Episode : 28231 		 Timestep : 101635200 		 Average Reward : -24.72
Episode : 28235 		 Timestep : 101649600 		 Average Reward : -113.97
Episode : 28239 		 Timestep : 101664000 		 Average Reward : -48.01
Episode : 28243 		 Timestep : 101678400 		 Average Reward : -51.22
Episode : 28247 		 Timestep : 101692800 		 Average Reward : 14.35
Episode : 28251 		 Timestep : 101707200 		 Average Reward : 7.98
Episode : 28255 		 Timestep : 101721600 		 Average Reward : -115.39
Episode : 28259 		 Timestep : 101736000 		 Average Reward : -93.74
Episode : 28263 		 Timestep : 101750400 		 Average Reward : -128.81
Episode : 28267 		 Timestep : 101764800 		 Average Reward : -30.7

Episode : 28707 		 Timestep : 103348800 		 Average Reward : -68.2
Episode : 28711 		 Timestep : 103363200 		 Average Reward : -90.27
Episode : 28715 		 Timestep : 103377600 		 Average Reward : 20.12
Episode : 28719 		 Timestep : 103392000 		 Average Reward : -17.18
Episode : 28723 		 Timestep : 103406400 		 Average Reward : 9.39
Episode : 28727 		 Timestep : 103420800 		 Average Reward : -102.74
Episode : 28731 		 Timestep : 103435200 		 Average Reward : -30.67
Episode : 28735 		 Timestep : 103449600 		 Average Reward : 10.25
Episode : 28739 		 Timestep : 103464000 		 Average Reward : -99.74
Episode : 28743 		 Timestep : 103478400 		 Average Reward : 1.05
Episode : 28747 		 Timestep : 103492800 		 Average Reward : -61.76
Episode : 28751 		 Timestep : 103507200 		 Average Reward : -37.13
Episode : 28755 		 Timestep : 103521600 		 Average Reward : 18.99
Episode : 28759 		 Timestep : 103536000 		 Average Reward : -81.37
Episode : 28763 		 Timestep : 103550400 		 Average Reward : -100.11
E

Episode : 29203 		 Timestep : 105134400 		 Average Reward : -40.12
Episode : 29207 		 Timestep : 105148800 		 Average Reward : -52.14
Episode : 29211 		 Timestep : 105163200 		 Average Reward : 11.99
Episode : 29215 		 Timestep : 105177600 		 Average Reward : -36.55
Episode : 29219 		 Timestep : 105192000 		 Average Reward : -0.39
Episode : 29223 		 Timestep : 105206400 		 Average Reward : -86.86
Episode : 29227 		 Timestep : 105220800 		 Average Reward : -38.38
Episode : 29231 		 Timestep : 105235200 		 Average Reward : -63.85
Episode : 29235 		 Timestep : 105249600 		 Average Reward : 6.92
Episode : 29239 		 Timestep : 105264000 		 Average Reward : -39.42
Episode : 29243 		 Timestep : 105278400 		 Average Reward : 28.35
Episode : 29247 		 Timestep : 105292800 		 Average Reward : -60.17
Episode : 29251 		 Timestep : 105307200 		 Average Reward : -44.3
Episode : 29255 		 Timestep : 105321600 		 Average Reward : 18.01
Episode : 29259 		 Timestep : 105336000 		 Average Reward : -78.48
Ep

Episode : 29695 		 Timestep : 106905600 		 Average Reward : -74.31
Episode : 29699 		 Timestep : 106920000 		 Average Reward : -62.31
Episode : 29703 		 Timestep : 106934400 		 Average Reward : -58.09
Episode : 29707 		 Timestep : 106948800 		 Average Reward : -91.2
Episode : 29711 		 Timestep : 106963200 		 Average Reward : -13.41
Episode : 29715 		 Timestep : 106977600 		 Average Reward : -2.02
Episode : 29719 		 Timestep : 106992000 		 Average Reward : -105.26
Episode : 29723 		 Timestep : 107006400 		 Average Reward : -44.88
Episode : 29727 		 Timestep : 107020800 		 Average Reward : -79.64
Episode : 29731 		 Timestep : 107035200 		 Average Reward : 36.13
Episode : 29735 		 Timestep : 107049600 		 Average Reward : 21.29
Episode : 29739 		 Timestep : 107064000 		 Average Reward : -4.37
Episode : 29743 		 Timestep : 107078400 		 Average Reward : -11.68
Episode : 29747 		 Timestep : 107092800 		 Average Reward : -13.87
Episode : 29751 		 Timestep : 107107200 		 Average Reward : -43.59

Episode : 30187 		 Timestep : 108676800 		 Average Reward : 2.84
Episode : 30191 		 Timestep : 108691200 		 Average Reward : -0.1
Episode : 30195 		 Timestep : 108705600 		 Average Reward : 31.33
Episode : 30199 		 Timestep : 108720000 		 Average Reward : 3.84
Episode : 30203 		 Timestep : 108734400 		 Average Reward : -36.11
Episode : 30207 		 Timestep : 108748800 		 Average Reward : -15.94
Episode : 30211 		 Timestep : 108763200 		 Average Reward : 30.36
Episode : 30215 		 Timestep : 108777600 		 Average Reward : -41.27
Episode : 30219 		 Timestep : 108792000 		 Average Reward : -70.93
Episode : 30223 		 Timestep : 108806400 		 Average Reward : -122.79
Episode : 30227 		 Timestep : 108820800 		 Average Reward : -44.73
Episode : 30231 		 Timestep : 108835200 		 Average Reward : 40.16
Episode : 30235 		 Timestep : 108849600 		 Average Reward : -32.52
Episode : 30239 		 Timestep : 108864000 		 Average Reward : -48.01
Episode : 30243 		 Timestep : 108878400 		 Average Reward : -16.43
Epi

Episode : 30683 		 Timestep : 110462400 		 Average Reward : -39.28
Episode : 30687 		 Timestep : 110476800 		 Average Reward : -29.12
Episode : 30691 		 Timestep : 110491200 		 Average Reward : -35.51
Episode : 30695 		 Timestep : 110505600 		 Average Reward : 46.53
Episode : 30699 		 Timestep : 110520000 		 Average Reward : -1.7
Episode : 30703 		 Timestep : 110534400 		 Average Reward : -79.29
Episode : 30707 		 Timestep : 110548800 		 Average Reward : -67.94
Episode : 30711 		 Timestep : 110563200 		 Average Reward : -8.84
Episode : 30715 		 Timestep : 110577600 		 Average Reward : -59.98
Episode : 30719 		 Timestep : 110592000 		 Average Reward : -44.97
Episode : 30723 		 Timestep : 110606400 		 Average Reward : -24.87
Episode : 30727 		 Timestep : 110620800 		 Average Reward : -30.37
Episode : 30731 		 Timestep : 110635200 		 Average Reward : -23.86
Episode : 30735 		 Timestep : 110649600 		 Average Reward : 10.54
Episode : 30739 		 Timestep : 110664000 		 Average Reward : 25.67
E

Episode : 31179 		 Timestep : 112248000 		 Average Reward : -26.73
Episode : 31183 		 Timestep : 112262400 		 Average Reward : -46.97
Episode : 31187 		 Timestep : 112276800 		 Average Reward : 17.96
Episode : 31191 		 Timestep : 112291200 		 Average Reward : -88.52
Episode : 31195 		 Timestep : 112305600 		 Average Reward : -52.05
Episode : 31199 		 Timestep : 112320000 		 Average Reward : -27.84
Episode : 31203 		 Timestep : 112334400 		 Average Reward : -79.45
Episode : 31207 		 Timestep : 112348800 		 Average Reward : 24.13
Episode : 31211 		 Timestep : 112363200 		 Average Reward : -60.71
Episode : 31215 		 Timestep : 112377600 		 Average Reward : -1.99
Episode : 31219 		 Timestep : 112392000 		 Average Reward : -56.47
Episode : 31223 		 Timestep : 112406400 		 Average Reward : -51.87
Episode : 31227 		 Timestep : 112420800 		 Average Reward : -17.74
Episode : 31231 		 Timestep : 112435200 		 Average Reward : -37.68
Episode : 31235 		 Timestep : 112449600 		 Average Reward : -6.06

Episode : 31671 		 Timestep : 114019200 		 Average Reward : -42.35
Episode : 31675 		 Timestep : 114033600 		 Average Reward : -26.8
Episode : 31679 		 Timestep : 114048000 		 Average Reward : -111.78
Episode : 31683 		 Timestep : 114062400 		 Average Reward : -5.89
Episode : 31687 		 Timestep : 114076800 		 Average Reward : 13.18
Episode : 31691 		 Timestep : 114091200 		 Average Reward : -118.2
Episode : 31695 		 Timestep : 114105600 		 Average Reward : -75.52
Episode : 31699 		 Timestep : 114120000 		 Average Reward : -36.46
Episode : 31703 		 Timestep : 114134400 		 Average Reward : -80.85
Episode : 31707 		 Timestep : 114148800 		 Average Reward : 21.2
Episode : 31711 		 Timestep : 114163200 		 Average Reward : -38.46
Episode : 31715 		 Timestep : 114177600 		 Average Reward : 3.29
Episode : 31719 		 Timestep : 114192000 		 Average Reward : -42.9
Episode : 31723 		 Timestep : 114206400 		 Average Reward : -84.34
Episode : 31727 		 Timestep : 114220800 		 Average Reward : -31.2
Epi

Episode : 32167 		 Timestep : 115804800 		 Average Reward : -132.73
Episode : 32171 		 Timestep : 115819200 		 Average Reward : -2.9
Episode : 32175 		 Timestep : 115833600 		 Average Reward : -66.23
Episode : 32179 		 Timestep : 115848000 		 Average Reward : -55.34
Episode : 32183 		 Timestep : 115862400 		 Average Reward : -9.44
Episode : 32187 		 Timestep : 115876800 		 Average Reward : -5.07
Episode : 32191 		 Timestep : 115891200 		 Average Reward : -64.62
Episode : 32195 		 Timestep : 115905600 		 Average Reward : -51.98
Episode : 32199 		 Timestep : 115920000 		 Average Reward : -12.89
Episode : 32203 		 Timestep : 115934400 		 Average Reward : -56.15
Episode : 32207 		 Timestep : 115948800 		 Average Reward : 39.71
Episode : 32211 		 Timestep : 115963200 		 Average Reward : -100.28
Episode : 32215 		 Timestep : 115977600 		 Average Reward : -27.68
Episode : 32219 		 Timestep : 115992000 		 Average Reward : -64.11
Episode : 32223 		 Timestep : 116006400 		 Average Reward : 28.41

Episode : 32663 		 Timestep : 117590400 		 Average Reward : -25.64
Episode : 32667 		 Timestep : 117604800 		 Average Reward : -24.35
Episode : 32671 		 Timestep : 117619200 		 Average Reward : -39.46
Episode : 32675 		 Timestep : 117633600 		 Average Reward : -80.32
Episode : 32679 		 Timestep : 117648000 		 Average Reward : 32.94
Episode : 32683 		 Timestep : 117662400 		 Average Reward : -87.36
Episode : 32687 		 Timestep : 117676800 		 Average Reward : 17.81
Episode : 32691 		 Timestep : 117691200 		 Average Reward : -1.61
Episode : 32695 		 Timestep : 117705600 		 Average Reward : -0.07
Episode : 32699 		 Timestep : 117720000 		 Average Reward : 23.0
Episode : 32703 		 Timestep : 117734400 		 Average Reward : -54.45
Episode : 32707 		 Timestep : 117748800 		 Average Reward : -109.09
Episode : 32711 		 Timestep : 117763200 		 Average Reward : -35.66
Episode : 32715 		 Timestep : 117777600 		 Average Reward : -68.15
Episode : 32719 		 Timestep : 117792000 		 Average Reward : -65.52


Episode : 33155 		 Timestep : 119361600 		 Average Reward : -21.58
Episode : 33159 		 Timestep : 119376000 		 Average Reward : -65.06
Episode : 33163 		 Timestep : 119390400 		 Average Reward : -6.68
Episode : 33167 		 Timestep : 119404800 		 Average Reward : -71.86
Episode : 33171 		 Timestep : 119419200 		 Average Reward : -24.66
Episode : 33175 		 Timestep : 119433600 		 Average Reward : -59.37
Episode : 33179 		 Timestep : 119448000 		 Average Reward : -30.97
Episode : 33183 		 Timestep : 119462400 		 Average Reward : -11.89
Episode : 33187 		 Timestep : 119476800 		 Average Reward : -15.67
Episode : 33191 		 Timestep : 119491200 		 Average Reward : -46.18
Episode : 33195 		 Timestep : 119505600 		 Average Reward : -4.88
Episode : 33199 		 Timestep : 119520000 		 Average Reward : -14.25
Episode : 33203 		 Timestep : 119534400 		 Average Reward : -25.23
Episode : 33207 		 Timestep : 119548800 		 Average Reward : -15.49
Episode : 33211 		 Timestep : 119563200 		 Average Reward : -51.

Episode : 33647 		 Timestep : 121132800 		 Average Reward : -18.49
Episode : 33651 		 Timestep : 121147200 		 Average Reward : -14.5
Episode : 33655 		 Timestep : 121161600 		 Average Reward : -86.97
Episode : 33659 		 Timestep : 121176000 		 Average Reward : -49.02
Episode : 33663 		 Timestep : 121190400 		 Average Reward : -21.75
Episode : 33667 		 Timestep : 121204800 		 Average Reward : -25.0
Episode : 33671 		 Timestep : 121219200 		 Average Reward : -36.22
Episode : 33675 		 Timestep : 121233600 		 Average Reward : -23.39
Episode : 33679 		 Timestep : 121248000 		 Average Reward : -53.17
Episode : 33683 		 Timestep : 121262400 		 Average Reward : -52.49
Episode : 33687 		 Timestep : 121276800 		 Average Reward : -20.73
Episode : 33691 		 Timestep : 121291200 		 Average Reward : -61.76
Episode : 33695 		 Timestep : 121305600 		 Average Reward : -24.87
Episode : 33699 		 Timestep : 121320000 		 Average Reward : -58.84
Episode : 33703 		 Timestep : 121334400 		 Average Reward : -77.

Episode : 34139 		 Timestep : 122904000 		 Average Reward : -22.05
Episode : 34143 		 Timestep : 122918400 		 Average Reward : 16.07
Episode : 34147 		 Timestep : 122932800 		 Average Reward : -25.5
Episode : 34151 		 Timestep : 122947200 		 Average Reward : -5.7
Episode : 34155 		 Timestep : 122961600 		 Average Reward : -24.13
Episode : 34159 		 Timestep : 122976000 		 Average Reward : -44.95
Episode : 34163 		 Timestep : 122990400 		 Average Reward : -61.59
Episode : 34167 		 Timestep : 123004800 		 Average Reward : -28.4
Episode : 34171 		 Timestep : 123019200 		 Average Reward : -16.79
Episode : 34175 		 Timestep : 123033600 		 Average Reward : -35.98
Episode : 34179 		 Timestep : 123048000 		 Average Reward : -36.26
Episode : 34183 		 Timestep : 123062400 		 Average Reward : 10.39
Episode : 34187 		 Timestep : 123076800 		 Average Reward : -57.32
Episode : 34191 		 Timestep : 123091200 		 Average Reward : -122.5
Episode : 34195 		 Timestep : 123105600 		 Average Reward : 2.19
Epi

Episode : 34635 		 Timestep : 124689600 		 Average Reward : -0.77
Episode : 34639 		 Timestep : 124704000 		 Average Reward : 17.32
Episode : 34643 		 Timestep : 124718400 		 Average Reward : -30.49
Episode : 34647 		 Timestep : 124732800 		 Average Reward : -56.73
Episode : 34651 		 Timestep : 124747200 		 Average Reward : -11.55
Episode : 34655 		 Timestep : 124761600 		 Average Reward : -5.65
Episode : 34659 		 Timestep : 124776000 		 Average Reward : -22.51
Episode : 34663 		 Timestep : 124790400 		 Average Reward : -30.72
Episode : 34667 		 Timestep : 124804800 		 Average Reward : -9.08
Episode : 34671 		 Timestep : 124819200 		 Average Reward : -29.9
Episode : 34675 		 Timestep : 124833600 		 Average Reward : -50.69
Episode : 34679 		 Timestep : 124848000 		 Average Reward : -79.84
Episode : 34683 		 Timestep : 124862400 		 Average Reward : 15.6
Episode : 34687 		 Timestep : 124876800 		 Average Reward : -5.1
Episode : 34691 		 Timestep : 124891200 		 Average Reward : -25.37
Epis

Episode : 35131 		 Timestep : 126475200 		 Average Reward : 12.78
Episode : 35135 		 Timestep : 126489600 		 Average Reward : -72.72
Episode : 35139 		 Timestep : 126504000 		 Average Reward : -154.34
Episode : 35143 		 Timestep : 126518400 		 Average Reward : -13.54
Episode : 35147 		 Timestep : 126532800 		 Average Reward : -46.55
Episode : 35151 		 Timestep : 126547200 		 Average Reward : -96.67
Episode : 35155 		 Timestep : 126561600 		 Average Reward : -25.47
Episode : 35159 		 Timestep : 126576000 		 Average Reward : -51.18
Episode : 35163 		 Timestep : 126590400 		 Average Reward : -5.08
Episode : 35167 		 Timestep : 126604800 		 Average Reward : -29.34
Episode : 35171 		 Timestep : 126619200 		 Average Reward : -139.51
Episode : 35175 		 Timestep : 126633600 		 Average Reward : -31.66
Episode : 35179 		 Timestep : 126648000 		 Average Reward : -11.59
Episode : 35183 		 Timestep : 126662400 		 Average Reward : -124.8
Episode : 35187 		 Timestep : 126676800 		 Average Reward : -5

Episode : 35623 		 Timestep : 128246400 		 Average Reward : 16.59
Episode : 35627 		 Timestep : 128260800 		 Average Reward : -174.96
Episode : 35631 		 Timestep : 128275200 		 Average Reward : -31.86
Episode : 35635 		 Timestep : 128289600 		 Average Reward : -26.92
Episode : 35639 		 Timestep : 128304000 		 Average Reward : -32.65
Episode : 35643 		 Timestep : 128318400 		 Average Reward : -18.98
Episode : 35647 		 Timestep : 128332800 		 Average Reward : -72.35
Episode : 35651 		 Timestep : 128347200 		 Average Reward : -9.53
Episode : 35655 		 Timestep : 128361600 		 Average Reward : 17.78
Episode : 35659 		 Timestep : 128376000 		 Average Reward : -101.52
Episode : 35663 		 Timestep : 128390400 		 Average Reward : -12.06
Episode : 35667 		 Timestep : 128404800 		 Average Reward : -19.44
Episode : 35671 		 Timestep : 128419200 		 Average Reward : 8.2
Episode : 35675 		 Timestep : 128433600 		 Average Reward : -1.32
Episode : 35679 		 Timestep : 128448000 		 Average Reward : 15.27
E

Episode : 36115 		 Timestep : 130017600 		 Average Reward : -29.03
Episode : 36119 		 Timestep : 130032000 		 Average Reward : -58.12
Episode : 36123 		 Timestep : 130046400 		 Average Reward : -21.97
Episode : 36127 		 Timestep : 130060800 		 Average Reward : 35.74
Episode : 36131 		 Timestep : 130075200 		 Average Reward : -91.37
Episode : 36135 		 Timestep : 130089600 		 Average Reward : 17.78
Episode : 36139 		 Timestep : 130104000 		 Average Reward : -27.0
Episode : 36143 		 Timestep : 130118400 		 Average Reward : -19.72
Episode : 36147 		 Timestep : 130132800 		 Average Reward : -111.45
Episode : 36151 		 Timestep : 130147200 		 Average Reward : -36.55
Episode : 36155 		 Timestep : 130161600 		 Average Reward : 2.79
Episode : 36159 		 Timestep : 130176000 		 Average Reward : -22.31
Episode : 36163 		 Timestep : 130190400 		 Average Reward : -119.82
Episode : 36167 		 Timestep : 130204800 		 Average Reward : -28.19
Episode : 36171 		 Timestep : 130219200 		 Average Reward : -57.9

Episode : 36607 		 Timestep : 131788800 		 Average Reward : -13.17
Episode : 36611 		 Timestep : 131803200 		 Average Reward : 4.01
Episode : 36615 		 Timestep : 131817600 		 Average Reward : 0.1
Episode : 36619 		 Timestep : 131832000 		 Average Reward : -50.05
Episode : 36623 		 Timestep : 131846400 		 Average Reward : -65.58
Episode : 36627 		 Timestep : 131860800 		 Average Reward : 7.12
Episode : 36631 		 Timestep : 131875200 		 Average Reward : -112.22
Episode : 36635 		 Timestep : 131889600 		 Average Reward : -50.24
Episode : 36639 		 Timestep : 131904000 		 Average Reward : -8.19
Episode : 36643 		 Timestep : 131918400 		 Average Reward : -17.54
Episode : 36647 		 Timestep : 131932800 		 Average Reward : -18.2
Episode : 36651 		 Timestep : 131947200 		 Average Reward : -96.24
Episode : 36655 		 Timestep : 131961600 		 Average Reward : -9.45
Episode : 36659 		 Timestep : 131976000 		 Average Reward : -37.26
Episode : 36663 		 Timestep : 131990400 		 Average Reward : 0.11
Episod

Episode : 37103 		 Timestep : 133574400 		 Average Reward : -14.83
Episode : 37107 		 Timestep : 133588800 		 Average Reward : -51.38
Episode : 37111 		 Timestep : 133603200 		 Average Reward : -9.65
Episode : 37115 		 Timestep : 133617600 		 Average Reward : -57.42
Episode : 37119 		 Timestep : 133632000 		 Average Reward : -42.61
Episode : 37123 		 Timestep : 133646400 		 Average Reward : -18.95
Episode : 37127 		 Timestep : 133660800 		 Average Reward : 4.4
Episode : 37131 		 Timestep : 133675200 		 Average Reward : 9.73
Episode : 37135 		 Timestep : 133689600 		 Average Reward : -38.37
Episode : 37139 		 Timestep : 133704000 		 Average Reward : 0.54
Episode : 37143 		 Timestep : 133718400 		 Average Reward : -38.46
Episode : 37147 		 Timestep : 133732800 		 Average Reward : -24.73
Episode : 37151 		 Timestep : 133747200 		 Average Reward : -65.46
Episode : 37155 		 Timestep : 133761600 		 Average Reward : -13.89
Episode : 37159 		 Timestep : 133776000 		 Average Reward : -52.93
Epi

Episode : 37599 		 Timestep : 135360000 		 Average Reward : -43.83
Episode : 37603 		 Timestep : 135374400 		 Average Reward : -35.33
Episode : 37607 		 Timestep : 135388800 		 Average Reward : 18.95
Episode : 37611 		 Timestep : 135403200 		 Average Reward : -50.37
Episode : 37615 		 Timestep : 135417600 		 Average Reward : -5.27
Episode : 37619 		 Timestep : 135432000 		 Average Reward : -44.07
Episode : 37623 		 Timestep : 135446400 		 Average Reward : -42.6
Episode : 37627 		 Timestep : 135460800 		 Average Reward : -34.74
Episode : 37631 		 Timestep : 135475200 		 Average Reward : 39.33
Episode : 37635 		 Timestep : 135489600 		 Average Reward : -134.25
Episode : 37639 		 Timestep : 135504000 		 Average Reward : 14.45
Episode : 37643 		 Timestep : 135518400 		 Average Reward : -54.84
Episode : 37647 		 Timestep : 135532800 		 Average Reward : -72.03
Episode : 37651 		 Timestep : 135547200 		 Average Reward : -2.32
Episode : 37655 		 Timestep : 135561600 		 Average Reward : -56.9
E

Episode : 38095 		 Timestep : 137145600 		 Average Reward : 32.68
Episode : 38099 		 Timestep : 137160000 		 Average Reward : 6.83
Episode : 38103 		 Timestep : 137174400 		 Average Reward : -73.26
Episode : 38107 		 Timestep : 137188800 		 Average Reward : -48.83
Episode : 38111 		 Timestep : 137203200 		 Average Reward : -103.07
Episode : 38115 		 Timestep : 137217600 		 Average Reward : -36.9
Episode : 38119 		 Timestep : 137232000 		 Average Reward : -26.61
Episode : 38123 		 Timestep : 137246400 		 Average Reward : 24.99
Episode : 38127 		 Timestep : 137260800 		 Average Reward : -1.3
Episode : 38131 		 Timestep : 137275200 		 Average Reward : -14.75
Episode : 38135 		 Timestep : 137289600 		 Average Reward : 14.59
Episode : 38139 		 Timestep : 137304000 		 Average Reward : -32.57
Episode : 38143 		 Timestep : 137318400 		 Average Reward : -65.94
Episode : 38147 		 Timestep : 137332800 		 Average Reward : 4.31
Episode : 38151 		 Timestep : 137347200 		 Average Reward : 23.23
Episo

Episode : 38591 		 Timestep : 138931200 		 Average Reward : -100.69
Episode : 38595 		 Timestep : 138945600 		 Average Reward : -90.68
Episode : 38599 		 Timestep : 138960000 		 Average Reward : -8.52
Episode : 38603 		 Timestep : 138974400 		 Average Reward : -70.89
Episode : 38607 		 Timestep : 138988800 		 Average Reward : -41.45
Episode : 38611 		 Timestep : 139003200 		 Average Reward : -20.85
Episode : 38615 		 Timestep : 139017600 		 Average Reward : -62.3
Episode : 38619 		 Timestep : 139032000 		 Average Reward : -64.96
Episode : 38623 		 Timestep : 139046400 		 Average Reward : -1.03
Episode : 38627 		 Timestep : 139060800 		 Average Reward : -18.82
Episode : 38631 		 Timestep : 139075200 		 Average Reward : 15.09
Episode : 38635 		 Timestep : 139089600 		 Average Reward : -3.33
Episode : 38639 		 Timestep : 139104000 		 Average Reward : -66.39
Episode : 38643 		 Timestep : 139118400 		 Average Reward : -43.37
Episode : 38647 		 Timestep : 139132800 		 Average Reward : -80.56

Episode : 39083 		 Timestep : 140702400 		 Average Reward : -34.06
Episode : 39087 		 Timestep : 140716800 		 Average Reward : -40.1
Episode : 39091 		 Timestep : 140731200 		 Average Reward : -69.02
Episode : 39095 		 Timestep : 140745600 		 Average Reward : -12.69
Episode : 39099 		 Timestep : 140760000 		 Average Reward : 38.94
Episode : 39103 		 Timestep : 140774400 		 Average Reward : -71.61


In [None]:
Max_reward = current_ep_reward
checkpoint_path = directory + "PPO_{}_{}_{}.pth".format(env_name, "final", run_num_pretrained)
print("--------------------------------------------------------------------------------------------")
print("saving model at : " + checkpoint_path)
ppo_agent.save(checkpoint_path)
print("model saved")
print("Elapsed Time  : ", datetime.now().replace(microsecond=0) - start_time)
print("--------------------------------------------------------------------------------------------")