# Setup

In [1]:
import gym
import tensorflow as tf
import retro

import numpy as np
import random as r

from skimage import transform # to preprocess game frames
from skimage.color import rgb2gray # to convert game frames into grayscale

import matplotlib.pyplot as plt

from collections import deque # to create ordered collections of frames

import warnings # ignore warning messages from skiimage during training

warnings.filterwarnings("ignore")

# Create Env

In [2]:
env = gym.make('SpaceInvaders-v0') # make env

print("Frame Size: ", env.observation_space)
print("Action Space: ", env.action_space.n, " moves")

H = 110 # height of preprocessed image
W = 84 # width of preprocessed image

moves = np.array(np.identity(env.action_space.n, dtype=int).tolist())

print(moves)

Frame Size:  Box(210, 160, 3)
Action Space:  6  moves
[[1 0 0 0 0 0]
 [0 1 0 0 0 0]
 [0 0 1 0 0 0]
 [0 0 0 1 0 0]
 [0 0 0 0 1 0]
 [0 0 0 0 0 1]]


In [3]:
for i_episode in range(1):
    observation = env.reset()
    print("input size", observation.shape)
#     for t in range(1):
#         env.render()
#         print(observation)
#         action = env.action_space.sample()
#         observation, reward, done, info = env.step(action)
#         if done:
#             print("Episode finished after {} timesteps".format(t+1))
#             break

input size (210, 160, 3)


# Hyperparameters

In [4]:
# model
STATE_SIZE = [110, 84, 4] # stack dimensions
STACK_SIZE = 4 # "phi length" - how many frames we consolidate into one training example
ACTION_SIZE = env.action_space.n # 6 possible moves
LEARNING_RATE = 2.5e-4

# training
NUM_GAMES = 1 # how many games we will play for training
MAX_STEPS = 50000 # max number of moves we'll make each game
HIDDEN_UNITS = 512
BATCH_SIZE = 1 # num of states we include for each weight update in gradient descent
TRAINING = True # when you just want agent to play
ENV_RENDER = False # when you want to see env

# explore params
EXP_START = 1.0
EXP_STOP = 0.01
DECAY = 1e-5

# q le table
GAMMA = 0.001 # discount for future rewards in bellman equation

# memory
LOAD_MEM = 64 # num experiences to be stored in memory during initialization
MEMORY_SIZE = int(1e6) # max num experiences stored in memory

# Preprocess raw input from game

In [5]:
def state_to_frame(state):
    
#     print("Input Size: ", state.shape)
    
    gray_doe = rgb2gray(state)
    crop_doe = gray_doe[6:-12, 4:-12]
    normalize_doe = crop_doe / 255.0
    frame = transform.resize(normalize_doe, [H, W])
    
#     print("Output Size: ", frame.shape)
    
    return  frame # Shape = 108 height, 84 width | vals = [0, 1]

In [6]:
def stack_frames(stack_o_frames, state, is_new_game):
    
    frame = state_to_frame(state)
    
    if is_new_game: # if new game, clear stack from last game
        
        for i in range(STACK_SIZE): # in new game, start with copies of OG frame
            stack_o_frames.append(frame)
            
#         print("Stack Size: ", len(stack_o_frames))  
#         print("State Size: ", state.shape)
        
    else: # if we already have stack, append latest frame
        
        stack_o_frames.append(frame) # deque automatically removes oldest frame
        
    state = np.stack(stack_o_frames, axis=2) # input to neural net, shape = (108, 84, 4)
       
#     print("Snapshot: ", state[:5])
        
    return stack_o_frames, state

# Model

In [7]:
class DQN:
    def __init__(self, name="DQN"):
        
        with tf.variable_scope(name): # create model skeleton, using placeholders for data to be fed in
            
            self.inputs_ = tf.placeholder(dtype=tf.float32, 
                                          shape=[None, *STATE_SIZE], 
                                          # *STATE_SIZE means we take each element of STATE_SIZE and input sequentially
                                          # ie. [None, 110, 84, 4] instead of [None, [110, 84, 4]]
                                          name="inputs")
            
            self.actions_ = tf.placeholder(dtype=tf.float32,
                                           shape=[None, ACTION_SIZE],
                                           name="actions")
            
            # Target QVal = Reward (state | actions) + discount * QVal (new_state | total_possible_actions)
            self.target_q = tf.placeholder(dtype=tf.float32,
                                           shape=[None],
                                           name="target_q")
            
            # First Convolution
            self.conv1 = tf.layers.conv2d(inputs=self.inputs_,
                                          filters=32,
                                          kernel_size=[8,8],
                                          strides=[4,4],
                                          padding="VALID",
                                          kernel_initializer=tf.contrib.layers.xavier_initializer_conv2d(),
                                          name = "conv1")
            
            # Output size = (W−K+2P)/S+1
            # height = (108 - 8 + 2*0) / 4 + 1= 26
            # width = (84 - 8) / 4 + 1= 20
            
            self.conv1_shape = self.conv1.get_shape()
            
            self.conv1_out = tf.nn.elu(self.conv1, name="conv1_out") # shape = (batch_size, 26 ht, 20 wd, 32 filters)
            
            # Second Convolution
            self.conv2 = tf.layers.conv2d(inputs=self.conv1_out,
                                          filters=64,
                                          kernel_size=[4,4],
                                          strides=[2,2],
                                          padding="VALID",
                                          kernel_initializer=tf.contrib.layers.xavier_initializer_conv2d(),
                                          name = "conv2")
            
            self.conv2_shape = self.conv2.get_shape()
            
            self.conv2_out = tf.nn.elu(self.conv2, name="conv2_out") # shape = (batch_size, 12 ht, 9 wd, 64 filters)
            
            # Third Convolution
            self.conv3 = tf.layers.conv2d(inputs=self.conv2_out,
                                          filters=64,
                                          kernel_size=[3,3],
                                          strides=[2,2],
                                          padding="VALID",
                                          kernel_initializer=tf.contrib.layers.xavier_initializer_conv2d(),
                                          name = "conv3")
            
            self.conv3_shape = self.conv2.get_shape()
            
            self.conv3_out = tf.nn.elu(self.conv3, name="conv3_out") # shape = (batch size, 12 ht, 9 wd, 64 filters)
            
            # Flat
            self.flat = tf.layers.flatten(self.conv3_out)
            
            self.flat_shape = self.flat.get_shape()
            
            # Hidden Layer
            self.fully_connected = tf.layers.dense(inputs=self.flat,
                                                   units=HIDDEN_UNITS,
                                                   activation=tf.nn.elu,
                                                   kernel_initializer=tf.contrib.layers.xavier_initializer(),
                                                   name="fc1")
            
            self.fully_connected_shape = self.fully_connected.get_shape()
            
            # Linear
            self.output = tf.layers.dense(inputs=self.fully_connected,
                                          units=ACTION_SIZE,
                                          kernel_initializer=tf.contrib.layers.xavier_initializer(),
                                          activation=None)
            
            self.output_shape = self.output.get_shape()
            
            # q val
            self.q = tf.reduce_sum(tf.multiply(self.output, self.actions_))
        
            # loss = (discounted_q_vals_in_future - q_val_of_move)^2
            self.loss = tf.reduce_mean(tf.square(self.target_q - self.q))
            
            # backprop
            self.optimizer = tf.train.AdamOptimizer(LEARNING_RATE).minimize(self.loss)                    
            
    def print_shapes(self):
        print("c1 ", self.conv1_shape)
        print("c2 ", self.conv2_shape)
        print("c3 ", self.conv3_shape)
        print("flat ", self.fully_connected_shape)
        print("fully_connected ", self.fully_connected_shape)
        print("output ", self.output_shape)
            

In [8]:
tf.reset_default_graph()

DQN = DQN()

DQN.print_shapes()


For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

Instructions for updating:
Use keras.layers.conv2d instead.
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use keras.layers.flatten instead.
Instructions for updating:
Use keras.layers.dense instead.
c1  (?, 26, 20, 32)
c2  (?, 12, 9, 64)
c3  (?, 12, 9, 64)
flat  (?, 512)
fully_connected  (?, 512)
output  (?, 6)


# Memory module

In [9]:
class Memory():
    def __init__(self):
        self.buffer = deque(maxlen = MEMORY_SIZE)
        
    def add(self, experience):
        self.buffer.append(experience)
        
    def sample(self):
        buffer_size = len(self.buffer)
        index = np.random.choice(np.arange(buffer_size),
                                 size=BATCH_SIZE,
                                 replace=False)
        
        return [self.buffer[i] for i in index]

# Load memory

In [10]:
memory = Memory()

state = env.reset() # reset game
stack = deque([np.zeros((H,W), dtype=np.int) for i in range(STACK_SIZE)], maxlen=STACK_SIZE) # load deque with zeros

# load first state, create first stack
stack, state = stack_frames(stack, state, is_new_game=True)

for i in range(LOAD_MEM):
    
    choice = r.randint(1, len(moves)) - 1 # random index
    action = moves[choice] # chosen index
    next_state, reward, done, _logs_ = env.step(choice) # act
    
    env.render()
    
    stack, next_state = stack_frames(stack, next_state, is_new_game=False) # save next state
    
    if done: # when we lose, create a 0-matrix for next_state
        next_state = np.zeros(state.shape)
    
    memory.add((state, action, reward, next_state, done)) # add experience to memory 
    
    state = next_state
    
    if done: # start new game
        state = env.reset()
        stack, state = stack_frames(stack, state, is_new_game=True) 

print("Experiences Loaded: ", len(memory.buffer))

env.close()

Experiences Loaded:  64


# Tensorboard

In [11]:
writer = tf.summary.FileWriter("./tensorboard/dqn/1") # write

tf.summary.scalar("Loss", DQN.loss) 

write_op = tf.summary.merge_all() # merge

# Training Runs

In [12]:
def move(state, moves, training_step):
    
    adventure = r.uniform(0, 1)

    explore_prob = EXP_STOP - (EXP_START - EXP_STOP) * np.exp(-DECAY * training_step)

    if explore_prob > adventure: # explore randomly
        choice = r.randint(1, len(moves)) - 1
        action = moves[choice]
        
    else: # use dqn
        dq_table = sesh.run(DQN.output, feed_dict={DQN.inputs_:state.reshape((1, *state.shape))}) # run graph with state as input
        choice = np.argmax(dq_table) # pick move with highest predicted q val
        action = moves[choice]
        
    return action, explore_prob

In [17]:
def learn(DQN, memory, sesh):

    batch = memory.sample()

    # retrieve minibatches from memory
    states_mini = np.array([states[0] for states in batch], ndmin=3)
    actions_mini = np.array([actions[1] for actions in batch])
    rewards_mini = np.array([rewards[2] for rewards in batch])
    next_states_mini = np.array([next_states[3] for next_states in batch], ndmin=3)
    dones_mini = np.array([dones[4] for dones in batch])
    
    target_qs = []
    
    # predict q vals for next state
    qvals_next_state = sesh.run(DQN.output, feed_dict={DQN.inputs_:next_states_mini})
    
    for i in range(0, len(batch)): # loop through batches
        
        gameover = dones_mini[i]
        
        if gameover: # target_q = reward
            target_qs.append(rewards_mini[i])
        
        else: # target_q = reward + gamma * max_q(s', a')
            target = rewards_mini[i] + GAMMA * np.max(qvals_next_state[i])
            target_qs.append(target)
            
    targets_mini = np.array([target for target in target_qs]) # create minibatch of target q vals
    
    # calculate loss and backprop on minibatches
    loss, _ = sesh.run([DQN.loss, DQN.optimizer], feed_dict={DQN.inputs_:states_mini,
                                                             DQN.target_q:targets_mini,
                                                             DQN.actions_:actions_mini})
    
    # wrte to tensorboard during each minibatch
    summary = sesh.run(write_op, feed_dict={DQN.inputs_:states_mini,
                                            DQN.target_q:targets_mini,
                                            DQN.actions_:actions_mini})
    writer.add_summary(summary, game)
    writer.flush()
    
    return loss

In [20]:
saver = tf.train.Saver() 

if TRAINING == True:
    
    with tf.Session() as sesh:
        
        tf.global_variables_initializer().run()
        
        training_step = 0
        
        for game in range(NUM_GAMES): # play games
            
            # initialize
            game_step = 0
            game_rewards = []
            state = env.reset()
            
            stack, state = stack_frames(stack, state, is_new_game=True)
            
            while game_step < MAX_STEPS:
                
                game_step += 1
                training_step += 1
                
                action, explore_prob = move(state, moves, training_step) # choose move
                
                next_state, reward, done, _logs_ = env.step(np.argmax(action)) # act
                
                if ENV_RENDER: # show games
                    env.render()
                    
                game_rewards.append(reward) # track reward
                
                if done: # we've lost
                    next_state = np.zeros(state.shape)
                    stack, next_state = stack_frames(stack, next_state, is_new_game=False) 
                    step = MAX_STEPS
                    total_reward = np.sum(game_rewards)
                    memory.add((state, action, reward, next_state, done))
                    
                    print('Game: {}'.format(game), 
                          'Total reward: {}'.format(total_reward),
                          'Explore P: {}'.format(explore_prob),
                          'Training Loss {}'.format(loss))
        
                else: # we're still alive!
                    stack, next_state = stack_frames(stack, next_state, is_new_game=False)
                    memory.add((state, action, reward, next_state, done))
                    state = next_state
                
                loss = learn(DQN, memory, sesh)
                
            if game % 5 == 0: # save model every five games
                save_path = saver.save(sesh, './models/model.cpkt')
                print('Checkpoint')

Game: 0 Total reward: 40.0 Explore P: -0.9761958277305628 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9761859658215951 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.976176104011246 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9761662422995145 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9761563806863998 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9761465191719005 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.976136657756016 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.976126796438745 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9761169352200868 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9761070741000403 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9760972130786045 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9760873521557785 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.976077491331561 Training Loss None
Gam

Game: 0 Total reward: 40.0 Explore P: -0.975032802973397 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9750229526946188 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9750131025143429 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9750032524325682 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9749934024492939 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9749835525645189 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9749737027782422 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.974963853090463 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9749540035011801 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9749441540103927 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9749343046180996 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9749244553242999 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9749146061289928 Training Loss None
G

Game: 0 Total reward: 40.0 Explore P: -0.9739597021643429 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9739498626165191 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9739400231670903 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9739301838160555 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9739203445634137 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9739105054091639 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9739006663533051 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9738908273958364 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9738809885367569 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9738711497760654 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.973861311113761 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9738514725498428 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9738416340843097 Training Loss None


Game: 0 Total reward: 40.0 Explore P: -0.9727600032928166 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9727501757419216 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9727403482893014 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9727305209349555 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9727206936788825 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9727108665210815 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9727010394615518 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.972691212500292 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9726813856373014 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9726715588725791 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9726617322061236 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9726519056379346 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9726420791680106 Training Loss None


Game: 0 Total reward: 40.0 Explore P: -0.9716893784857481 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9716795616410476 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.971669744894515 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9716599282461493 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9716501116959498 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9716402952439152 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9716304788900445 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.971620662634337 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9716108464767915 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9716010304174072 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9715912144561828 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9715813985931177 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9715715828282107 Training Loss None
G

Game: 0 Total reward: 40.0 Explore P: -0.9705414735682438 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9705316682025351 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9705218629348794 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.970512057765276 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9705022526937238 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9704924477202218 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.970482642844769 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9704728380673646 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9704630333880073 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9704532288066966 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.970443424323431 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9704336199382098 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9704238156510319 Training Loss None
Ga

Game: 0 Total reward: 40.0 Explore P: -0.9694634709574533 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9694536763717168 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9694438818839255 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9694340874940788 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9694242932021753 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9694144990082144 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9694047049121948 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9693949109141158 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9693851170139762 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9693753232117752 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9693655295075118 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9693557359011847 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9693459423927933 Training Loss None

Game: 0 Total reward: 40.0 Explore P: -0.968327952059433 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9683181688288287 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.968308385696056 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9682986026611143 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9682888197240026 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9682790368847196 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9682692541432645 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9682594714996363 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9682496889538342 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.968239906505857 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9682301241557038 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9682203419033736 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9682105597488654 Training Loss None
Ga

Game: 0 Total reward: 40.0 Explore P: -0.9671742059200981 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9671644342268975 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9671546626314133 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9671448911336445 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9671351197335902 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9671253484312495 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9671155772266213 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9671058061197046 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9670960351104986 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9670862641990021 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9670764933852143 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9670667226691341 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9670569520507606 Training Loss None

Game: 0 Total reward: 40.0 Explore P: -0.9660803834643837 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9660706227093528 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9660608620519292 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9660511014921116 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.966041341029899 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9660315806652906 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9660218203982853 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9660120602288823 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9660023001570804 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9659925401828788 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9659827803062765 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9659730205272724 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9659632608458656 Training Loss None


Game: 0 Total reward: 40.0 Explore P: -0.964929287891849 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9649195386477164 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9649097895010756 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.964900040451926 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9648902915002663 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9648805426460957 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9648707938894131 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9648610452302175 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9648512966685082 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9648415482042839 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9648317998375437 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9648220515682868 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.964812303396512 Training Loss None
Ga

Game: 0 Total reward: 40.0 Explore P: -0.9638379783368392 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9638282400057475 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9638185017720388 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9638087636357118 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9637990255967657 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9637892876551994 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9637795498110122 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.963769812064203 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9637600744147706 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9637503368627143 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.963740599408033 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9637308620507259 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9637211247907916 Training Loss None
G

Game: 0 Total reward: 40.0 Explore P: -0.9627770732387688 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9627673455166752 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9627576178918582 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9627478903643171 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9627381629340507 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.962728435601058 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9627187083653382 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9627089812268904 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9626992541857134 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9626895272418063 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9626798003951682 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9626700736457982 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.962660346993695 Training Loss None
G

Game: 0 Total reward: 40.0 Explore P: -0.9615910088558027 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9615812929942935 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9615715772299425 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9615618615627486 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9615521459927109 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9615424305198285 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9615327151441002 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9615229998655253 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9615132846841025 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9615035695998312 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9614938546127102 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9614841397227386 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9614744249299154 Training Loss None

Game: 0 Total reward: 40.0 Explore P: -0.9605422569941824 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9605325516201394 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9605228463431496 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9605131411632122 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9605034360803261 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9604937310944902 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9604840262057038 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9604743214139658 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9604646167192752 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9604549121216311 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9604452076210326 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9604355032174784 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9604257989109679 Training Loss None

Game: 0 Total reward: 40.0 Explore P: -0.959417080704487 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9594073865821506 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.959397692556755 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.959387998628299 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9593783047967821 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9593686110622028 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9593589174245605 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.959349223883854 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9593395304400825 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9593298370932449 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9593201438433403 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9593104506903678 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9593007576343262 Training Loss None
Gam

Game: 0 Total reward: 40.0 Explore P: -0.9583319413655609 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9583222580945637 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9583125749203987 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.958302891843065 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9582932088625615 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9582835259788873 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9582738431920417 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9582641605020232 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9582544779088312 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9582447954124648 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9582351130129227 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9582254307102042 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9582157485043081 Training Loss None


Game: 0 Total reward: 40.0 Explore P: -0.9572189996971053 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9572093275554691 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.957199655510554 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9571899835623586 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9571803117108824 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9571706399561241 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9571609682980828 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9571512967367578 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9571416252721477 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.957131953904252 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9571222826330694 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.957112611458599 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.95710294038084 Training Loss None
Game

Game: 0 Total reward: 40.0 Explore P: -0.9560783543905464 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9560686936553063 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9560590330166732 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9560493724746457 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9560397120292232 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9560300516804048 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9560203914281893 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9560107312725759 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9560010712135635 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9559914112511513 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9559817513853383 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9559720916161232 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9559624319435055 Training Loss None

Game: 0 Total reward: 40.0 Explore P: -0.9550162524638718 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9550066023495979 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9549969523318246 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.954987302410551 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9549776525857759 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9549680028574988 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9549583532257185 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.954948703690434 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9549390542516444 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9549294049093486 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9549197556635459 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9549101065142351 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9549004574614153 Training Loss None
G

Game: 0 Total reward: 40.0 Explore P: -0.953916760765406 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9539071216459941 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9538974826229728 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9538878436963414 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9538782048660986 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9538685661322437 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9538589274947756 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9538492889536935 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9538396505089963 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.953830012160683 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9538203739087527 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9538107357532044 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9538010976940373 Training Loss None
G

Game: 0 Total reward: 40.0 Explore P: -0.9527607543950306 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9527511268356246 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9527414993724935 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9527318720056368 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9527222447350531 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9527126175607418 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9527029904827016 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9526933635009317 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9526837366154313 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.952674109826199 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9526644831332344 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9526548565365361 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9526452300361034 Training Loss None


Game: 0 Total reward: 40.0 Explore P: -0.951596518387044 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9515869024699397 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9515772866489942 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9515676709242065 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9515580552955754 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9515484397631002 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9515388243267798 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9515292089866133 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9515195937425998 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9515099785947382 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9515003635430276 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.951490748587467 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9514811337280554 Training Loss None
G

Game: 0 Total reward: 40.0 Explore P: -0.9504528991185847 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.950443294637616 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9504336902526916 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9504240859638106 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.950414481770972 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9504048776741749 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9503952736734183 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9503856697687011 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9503760659600224 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9503664622473815 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9503568586307772 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9503472551102087 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9503376516856746 Training Loss None
G

Game: 0 Total reward: 40.0 Explore P: -0.9493298263522988 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9493202331020015 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9493106399476363 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9493010468892022 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9492914539266982 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9492818610601235 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9492722682894768 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9492626756147573 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9492530830359642 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9492434905530963 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9492338981661528 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9492243058751326 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.949214713680035 Training Loss None


Game: 0 Total reward: 40.0 Explore P: -0.9481793210019468 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9481697392566456 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9481601576071614 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9481505760534931 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.94814099459564 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9481314132336008 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.948121831967375 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9481122507969613 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9481026697223588 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9480930887435665 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9480835078605836 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9480739270734089 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9480643463820417 Training Loss None
Ga

Game: 0 Total reward: 40.0 Explore P: -0.9470301954299827 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9470206251758798 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9470110550174788 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9470014849547791 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9469919149877795 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.946982345116479 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9469727753408769 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9469632056609719 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9469536360767632 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.94694406658825 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9469344971954312 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9469249278983058 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9469153586968729 Training Loss None
Ga

Game: 0 Total reward: 40.0 Explore P: -0.9458920068539395 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9458824479816654 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9458728892049796 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.945863330523881 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9458537719383688 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9458442134484419 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9458346550540995 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9458250967553405 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.945815538552164 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9458059804445692 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9457964224325549 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9457868645161202 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9457773066952643 Training Loss None
G

Game: 0 Total reward: 40.0 Explore P: -0.9447360770042385 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.944726529691205 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9447169824736443 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9447074353515552 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9446978883249368 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9446883413937883 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9446787945581087 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.944669247817897 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.944659701173152 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9446501546238731 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9446406081700592 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9446310618117094 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9446215155488227 Training Loss None
Ga

Game: 0 Total reward: 40.0 Explore P: -0.9436006168054154 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9435910808469272 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9435815449837981 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9435720092160272 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9435624735436136 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9435529379665561 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9435434024848538 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.943533867098506 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9435243318075116 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9435147966118697 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9435052615115791 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.943495726506639 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9434861915970486 Training Loss None
G

Game: 0 Total reward: 40.0 Explore P: -0.9425141315165383 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9425046064228487 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9424950814244095 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9424855565212199 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9424760317132788 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9424665070005852 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9424569823831384 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9424474578609373 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.942437933433981 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9424284091022683 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9424188848657985 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9424093607245706 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9423998366785837 Training Loss None


Game: 0 Total reward: 40.0 Explore P: -0.9413908277209289 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9413813138602211 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9413718000946513 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9413622864242188 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9413527728489226 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9413432593687615 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9413337459837348 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9413242326938415 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9413147194990806 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9413052063994513 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9412956933949523 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.941286180485583 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9412766676713423 Training Loss None


Game: 0 Total reward: 40.0 Explore P: -0.9402973571351462 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9402878542090896 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9402783513780617 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9402688486420616 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9402593460010885 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9402498434551413 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9402403410042192 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9402308386483209 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9402213363874459 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9402118342215928 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.940202332150761 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9401928301749495 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9401833282941572 Training Loss None


Game: 0 Total reward: 40.0 Explore P: -0.9392431122828638 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.939233619899203 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9392241276104655 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9392146354166504 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9392051433177568 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9391956513137837 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9391861594047302 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9391766675905954 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9391671758713781 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9391576842470776 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9391481927176929 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9391387012832229 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9391292099436669 Training Loss None


Game: 0 Total reward: 40.0 Explore P: -0.9381995189409197 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9381900369931401 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9381805551401794 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9381710733820369 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9381615917187116 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9381521101502023 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9381426286765082 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9381331472976284 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.938123666013562 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9381141848243079 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9381047037298652 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9380952227302329 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9380857418254102 Training Loss None


Game: 0 Total reward: 40.0 Explore P: -0.9371854880589217 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9371760162514002 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9371665445385964 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9371570729205092 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9371476013971377 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9371381299684809 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.937128658634538 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.937119187395308 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9371097162507898 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9371002452009826 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9370907742458854 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9370813033854973 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9370718326198174 Training Loss None
G

Game: 0 Total reward: 40.0 Explore P: -0.9361441568592177 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9361346954649561 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.936125234165308 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9361157729602725 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9361063118498486 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9360968508340352 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9360873899128315 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9360779290862365 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9360684683542495 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9360590077168691 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9360495471740948 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9360400867259254 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.93603062637236 Training Loss None
Ga

Game: 0 Total reward: 40.0 Explore P: -0.9350094648223718 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9350000147749739 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9349905648220759 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9349811149636771 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9349716651997764 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9349622155303727 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9349527659554654 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9349433164750534 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9349338670891356 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9349244177977112 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9349149686007793 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9349055194983389 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9348960704903891 Training Loss None

Game: 0 Total reward: 40.0 Explore P: -0.9339516467104906 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9339422072412209 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9339327678663455 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9339233285858632 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9339138893997735 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9339044503080749 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9338950113107669 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9338855724078484 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9338761335993185 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9338666948851762 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9338572562654205 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9338478177400504 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9338383793090653 Training Loss None

Game: 0 Total reward: 40.0 Explore P: -0.9328101559591468 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9328007279047276 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9327912999445885 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9327818720787284 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9327724443071466 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9327630166298418 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9327535890468136 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9327441615580607 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9327347341635822 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9327253068633772 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9327158796574446 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9327064525457837 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9326970255283933 Training Loss None

Game: 0 Total reward: 40.0 Explore P: -0.9318018886114987 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9317924706397025 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9317830527620855 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.931773634978647 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9317642172893856 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9317547996943009 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9317453821933915 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9317359647866567 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9317265474740953 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9317171302557069 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.93170771313149 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.931698296101444 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9316888791655676 Training Loss None
Gam

Game: 0 Total reward: 40.0 Explore P: -0.9307946995329356 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.93078529163298 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9307758838271027 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9307664761153029 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9307570684975801 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9307476609739328 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9307382535443602 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9307288462088615 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9307194389674357 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9307100318200819 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.930700624766799 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9306912178075862 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9306818109424425 Training Loss None
Ga

Game: 0 Total reward: 40.0 Explore P: -0.9297885875703269 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9297791897314404 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9297697919865319 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9297603943356003 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9297509967786448 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9297415993156645 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9297322019466582 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9297228046716253 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9297134074905645 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9297040104034751 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9296946134103562 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9296852165112066 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9296758197060256 Training Loss None

Game: 0 Total reward: 40.0 Explore P: -0.9286896779103787 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.928680291060534 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9286709043045572 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9286615176424475 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9286521310742041 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9286427445998258 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9286333582193118 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9286239719326611 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9286145857398728 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9286051996409459 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9285958136358797 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9285864277246729 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9285770419073248 Training Loss None


Game: 0 Total reward: 40.0 Explore P: -0.9275076737352211 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.927498298705359 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9274889237692467 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9274795489268833 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9274701741782679 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9274607995233994 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9274514249622771 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9274420504948999 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9274326761212669 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9274233018413772 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9274139276552298 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9274045535628237 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9273951795641582 Training Loss None


Game: 0 Total reward: 40.0 Explore P: -0.9263927031425547 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9263833392623428 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9263739754757692 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9263646117828329 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9263552481835332 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9263458846778689 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9263365212658393 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9263271579474434 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.92631779472268 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9263084315915485 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9262990685540479 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9262897056101772 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9262803427599354 Training Loss None
G

Game: 0 Total reward: 40.0 Explore P: -0.9253071173683887 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9252977643439803 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9252884114131015 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9252790585757517 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9252697058319297 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9252603531816347 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9252510006248658 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9252416481616219 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9252322957919022 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9252229435157058 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9252135913330316 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9252042392438788 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9251948872482463 Training Loss None

Game: 0 Total reward: 40.0 Explore P: -0.9242321324182338 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.924222790143621 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9242134479624305 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9242041058746614 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9241947638803126 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9241854219793835 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9241760801718728 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9241667384577797 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9241573968371033 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9241480553098427 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9241387138759968 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9241293725355649 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9241200312885458 Training Loss None


Game: 0 Total reward: 40.0 Explore P: -0.923074402514919 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9230650718175474 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9230557412134822 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9230464107027228 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9230370802852679 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9230277499611167 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9230184197302684 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9230090895927218 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9229997595484761 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9229904295975305 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9229810997398838 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9229717699755354 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9229624403044842 Training Loss None


Game: 0 Total reward: 40.0 Explore P: -0.9220672261333465 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9220579055076883 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.922048584975236 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9220392645359884 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9220299441899449 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9220206239371044 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9220113037774659 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9220019837110285 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9219926637377913 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9219833438577534 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9219740240709139 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9219647043772717 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.921955384776826 Training Loss None
G

Game: 0 Total reward: 40.0 Explore P: -0.9210332054999579 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9210238952144545 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9210145850220534 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9210052749227537 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9209959649165547 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9209866550034551 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9209773451834542 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9209680354565511 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9209587258227449 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9209494162820343 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9209401068344188 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9209307974798974 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9209214882184689 Training Loss None

Game: 0 Total reward: 40.0 Explore P: -0.9200003319926998 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9199910320358798 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9199817321720588 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.919972432401236 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9199631327234105 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9199538331385813 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9199445336467473 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.919935234247908 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9199259349420621 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9199166357292088 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9199073366093472 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9198980375824763 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9198887386485953 Training Loss None
G

Game: 0 Total reward: 40.0 Explore P: -0.9189964738151386 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9189871838968502 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9189778940714605 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9189686043389684 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9189593146993733 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9189500251526741 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9189407356988699 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9189314463379598 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9189221570699428 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9189128678948181 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9189035788125847 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9188942898232415 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9188850009267878 Training Loss None

Game: 0 Total reward: 40.0 Explore P: -0.9179565802135861 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9179473006941816 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9179380212675718 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.917928741933756 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9179194626927328 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9179101835445018 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9179009044890617 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9178916255264118 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9178823466565508 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9178730678794783 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.917863789195193 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.917854510603694 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9178452321049806 Training Loss None
Ga

Game: 0 Total reward: 40.0 Explore P: -0.9168715059069854 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9168622372382698 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9168529686622403 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9168437001788963 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9168344317882364 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9168251634902601 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9168158952849663 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9168066271723541 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9167973591524226 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9167880912251707 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9167788233905978 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9167695556487027 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9167602879994845 Training Loss None

Game: 0 Total reward: 40.0 Explore P: -0.915787700394934 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9157784425642191 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9157691848260823 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9157599271805223 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9157506696275384 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9157414121671295 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9157321547992947 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9157228975240332 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.915713640341344 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.915704383251226 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9156951262536785 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9156858693487007 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9156766125362913 Training Loss None
Ga

Game: 0 Total reward: 40.0 Explore P: -0.9147144092916674 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.91470516219381 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9146959151884232 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.914686668275506 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9146774214550574 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9146681747270765 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9146589280915626 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9146496815485143 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9146404350979312 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9146311887398121 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9146219424741561 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9146126963009623 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9146034502202298 Training Loss None
Ga

Game: 0 Total reward: 40.0 Explore P: -0.9136700721707672 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9136608355162289 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9136515989540566 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9136423624842495 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9136331261068066 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.913623889821727 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9136146536290098 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9136054175286542 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.913596181520659 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9135869456050234 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9135777097817466 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9135684740508275 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9135592384122653 Training Loss None
G

Game: 0 Total reward: 40.0 Explore P: -0.9125807842918481 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9125715585301339 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9125623328606771 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9125531072834765 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9125438817985311 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9125346564058401 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9125254311054027 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9125162058972177 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9125069807812844 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9124977557576018 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.912488530826169 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.912479305986985 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9124700812400489 Training Loss None
G

Game: 0 Total reward: 40.0 Explore P: -0.9114927810145619 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9114835661328262 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.911474351343239 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.911465136645799 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9114559220405056 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9114467075273579 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9114374931063549 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9114282787774954 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.911419064540779 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9114098503962044 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9114006363437707 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9113914223834771 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9113822085153228 Training Loss None
Ga

Game: 0 Total reward: 40.0 Explore P: -0.9104152649306015 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9104060608239729 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9103968568093848 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9103876528868364 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9103784490563268 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.910369245317855 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9103600416714202 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9103508381170212 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9103416346546575 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9103324312843278 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9103232280060315 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9103140248197674 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9103048217255348 Training Loss None


Game: 0 Total reward: 40.0 Explore P: -0.9093206222069318 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9093114290466756 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9093022359783505 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9092930430019557 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9092838501174901 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.909274657324953 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9092654646243433 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9092562720156603 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9092470794989027 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9092378870740699 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.909228694741161 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9092195025001747 Training Loss None
Game: 0 Total reward: 40.0 Explore P: -0.9092103103511107 Training Loss None
G

KeyboardInterrupt: 