In [1]:
import gym
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
%matplotlib inline
import random
import cv2
import os
import time
from tqdm import tqdm

## Configuration parameters

In [2]:
screen_width = 84
screen_height = 84
history_length = 4

memory_size = 1000000
batch_size = 32
gamma = 0.99

learning_rate = 0.0005
learning_rate_decay = 0.96
learning_rate_decay_step = 50000

max_steps = 50000000
learn_start = 50000

# Exploration parameters
ep_min = 0.1
ep_start = 1.0
ep_end_time = memory_size

# Update Target network
target_q_update_step = 10000

# Train batch
train_frequency = 4
# Print performance
test_step = 50000

# Clip rewards
min_reward = -1.0
max_reward = 1.0

# How many times should the same action be taken (avoids taking a new decision after every environment iteration)
action_repeat = 4

# Whether or not to render the environment 
display = False

## Load the game environment

In [3]:
class GymEnvironment():
    def __init__(self,name):
        self.env = gym.make(name)
        self.action_repeat = action_repeat
        
    def execute_action(self,action,is_training=True):
        cum_reward = 0
        start_lives = self.env.ale.lives()
        
        for _ in xrange(self.action_repeat):
            screen, reward, terminal, _ = self.env.step(action)
            cum_reward += reward
            
            if is_training and start_lives > self.env.ale.lives():
                cum_reward -= 1
                terminal = True
                
            if terminal:
                break
                
        reward = cum_reward
        
        if display:
            self.env.render()
        
        screen = cv2.resize(cv2.cvtColor(screen,cv2.COLOR_RGB2GRAY)/255.,(screen_width,screen_height))
        return screen, reward, terminal
    
    @property
    def action_size(self):
        return self.env.action_space.n
    
    def new_game(self):
        screen = self.env.reset()
        screen = cv2.resize(cv2.cvtColor(screen,cv2.COLOR_RGB2GRAY)/255.,(screen_width,screen_height))
        
        return screen

In [4]:
'''screen = env.reset()
print(screen.shape)
env.render()
env.render(close=True)
'''

'screen = env.reset()\nprint(screen.shape)\nenv.render()\nenv.render(close=True)\n'

## Experience Replay
This class will allow us to store the experiences and to take random samples to update our target network parameters.

In [5]:
class Experience_Buffer():
    def __init__(self,memory_size = 50000):
        self.memory = []
        self.memory_size = memory_size
        self.actions = np.empty(self.memory_size,dtype=np.uint8)
        self.rewards = np.empty(self.memory_size,dtype=np.integer)
        self.screens = np.empty((self.memory_size, screen_height, screen_width),dtype=np.float16)
        self.terminals = np.empty(self.memory_size,dtype=np.bool)
        self.prestates = np.empty((batch_size,history_length,screen_height, screen_width),dtype=np.float16)
        self.poststates = np.empty((batch_size,history_length,screen_height, screen_width),dtype=np.float16)
        self.current = 0
        self.count = 0
        
    def add(self, screen, reward, action, terminal):
        self.actions[self.current] = action
        self.rewards[self.current] = reward
        self.screens[self.current,...] = screen
        self.terminals[self.current] = terminal
        self.count = max(self.count, self.current+1)
        self.current = (self.current + 1) % self.memory_size
        
    def getState(self,index):
        index = index % self.count
        # If index is not in the beginning, just use simple slicing
        if index >= history_length-1:
            return self.screens[(index-(history_length-1)):(index+1),...]
        # Otherwise determine the list of indexes which need to be returned
        else:
            indexes = [(index-i) % self.count for i in reversed(range(history_length))]
            return self.screens[indexes,...]
        
    def sample_from_replay(self):
        # Sample random indexes
        indexes = []
        while len(indexes) < batch_size:
            while True:
                index = random.randint(history_length,self.count-1)
                # If index wraps over current pointer, get new one
                if index >= self.current and index - history_length < self.current:
                    continue
                # If index wraps over terminal state, get new one
                if self.terminals[(index-history_length):index].any():
                    continue
                # Use the index otherwise
                break
            self.prestates[len(indexes),...] = self.getState(index-1)
            self.poststates[len(indexes),...] = self.getState(index)
            indexes.append(index)
            
        actions = self.actions[indexes]
        rewards = self.rewards[indexes]
        terminals = self.terminals[indexes]
        
        return np.transpose(self.prestates,(0,2,3,1)),actions,rewards,np.transpose(self.poststates,(0,2,3,1)),terminals

## History
This class will allow us to stack the last K screens to use them as the input to the network (history of states).

In [6]:
class History:
    def __init__(self):
        self.history = np.zeros([history_length,screen_height,screen_width],dtype=np.float32)
        
    def add(self, screen):
        self.history[:-1] = self.history[1:]
        self.history[-1] = screen
    
    def reset(self):
        self.history *= 0
        
    def get(self):
        return np.transpose(self.history,(1,2,0))

## Auxiliar functions 

In [7]:
checkpoint_dir = 'Models/'
if not os.path.exists(checkpoint_dir):
    os.makedirs(checkpoint_dir)
    
log_dir = 'Models/Logs/'
if not os.path.exists(log_dir):
    os.makedirs(log_dir)
    
def conv2d(x,output_dim,kernel_size,stride,
           initializer=tf.contrib.layers.xavier_initializer(),
           activation_fn = tf.nn.relu,
           padding = 'VALID',
           name = 'conv2d'):
    with tf.variable_scope(name):
        stride = [1,stride[0],stride[1],1]
        kernel_shape = [kernel_size[0],kernel_size[1],x.get_shape()[-1],output_dim]
        
        w = tf.get_variable('w',kernel_shape,tf.float32,initializer=initializer)
        conv = tf.nn.conv2d(x,w,stride,padding)
        b = tf.get_variable('biases',[output_dim],initializer=tf.constant_initializer(0.0))
        out = activation_fn(tf.nn.bias_add(conv,b))
        
    return out,w,b       

def linear(input_,output_size,stddev=0.02,bias_start=0.0,activation_fn=None,name='linear'):
    shape = input_.get_shape().as_list()
    
    with tf.variable_scope(name):
        w = tf.get_variable('Matrix',[shape[1],output_size],tf.float32,
                            tf.random_normal_initializer(stddev=stddev))
        b = tf.get_variable('bias',[output_size],initializer=tf.constant_initializer(bias_start))
        
        out = tf.nn.bias_add(tf.matmul(input_,w),b)
        
    if activation_fn!=None:
        return activation_fn(out), w, b
    else:
        return out,w,b

## Learning agent

In [8]:
class Agent():
    def __init__(self,environment,sess):
        self.sess = sess
        self.env = environment
        self.exp_replay = Experience_Buffer(memory_size)
        self.history = History()
        
        with tf.variable_scope('step'):
            self.step_op = tf.Variable(0,trainable=False,name='step')
            self.step_input = tf.placeholder(tf.int32,None,name='step_input')
            self.step_assign_op = self.step_op.assign(self.step_input)
        
        # Build Deep network
        self.build_dqn()
        
    def build_dqn(self):
        self.w = {}
        self.t_w = {}
        
        initializer = tf.truncated_normal_initializer(0, 0.02)
        activation_fn = tf.nn.relu
        
        # TRAINING NETWORK
        with tf.variable_scope('prediction'):
            self.input_state = tf.placeholder(tf.float32,[None, screen_height, screen_width, history_length],
                                              name='input_state')
            self.l1, self.w['l1_w'], self.w['l1_b'] = conv2d(self.input_state,32,[8,8],[4,4],initializer, 
                                                             activation_fn, name='l1')
            self.l2, self.w['l2_w'], self.w['l2_b'] = conv2d(self.l1,64,[4,4],[2,2],initializer, 
                                                             activation_fn, name='l2')
            self.l3, self.w['l3_w'], self.w['l3_b'] = conv2d(self.l2,64,[3,3],[1,1],initializer, 
                                                             activation_fn, name='l3')
            
            shape = self.l3.get_shape().as_list()
            self.l3_flat = tf.reshape(self.l3,[-1,reduce(lambda x,y: x*y,shape[1:])])
            
            # Standard DQN implementation
            self.l4, self.w['l4_w'], self.w['l4_b'] = linear(self.l3_flat,512,activation_fn=activation_fn,name='l4')
            self.q, self.w['q_w'],self.w['q_b'] = linear(self.l4,self.env.action_size,name='q')
            
            self.q_action = tf.argmax(self.q,dimension=1)
            
            # Add output summaries
            q_summary = []
            avg_q = tf.reduce_mean(self.q,0)  # Mean q_value per action for each batch
            for idx in xrange(self.env.action_size):
                q_summary.append(tf.histogram_summary('q/%s' % idx, avg_q[idx]))
            self.q_summary = tf.merge_summary(q_summary,'q_summary')
                           
        # TARGET NETWORK
        with tf.variable_scope('target'):
            self.target_state = tf.placeholder(tf.float32,[None, screen_height, screen_width, history_length],
                                              name='target_state')
            self.target_l1, self.t_w['l1_w'], self.t_w['l1_b'] = conv2d(self.target_state,32,[8,8],[4,4],initializer, 
                                                             activation_fn, name='target_l1')
            self.target_l2, self.t_w['l2_w'], self.t_w['l2_b'] = conv2d(self.target_l1,64,[4,4],[2,2],initializer, 
                                                             activation_fn, name='target_l2')
            self.target_l3, self.t_w['l3_w'], self.t_w['l3_b'] = conv2d(self.target_l2,64,[3,3],[1,1],initializer, 
                                                             activation_fn, name='target_l3')
            
            shape = self.target_l3.get_shape().as_list()
            self.target_l3_flat = tf.reshape(self.target_l3,[-1,reduce(lambda x,y: x*y,shape[1:])])
            
            # Standard DQN
            self.target_l4, self.t_w['l4_w'], self.t_w['l4_b'] = linear(self.target_l3_flat,512,
                                                                    activation_fn=activation_fn,name='target_l4')
            self.target_q, self.t_w['q_w'],self.t_w['q_b'] = linear(self.target_l4,self.env.action_size,
                                                                    name='target_q')
            
            self.target_q_idx = tf.placeholder(tf.int32,[None,None],'outputs_idx')
            self.target_q_with_idx = tf.gather_nd(self.target_q,self.target_q_idx)  # Gets q value according to index
        
        # COPY TRAINING NETWORK INTO TARGET
        with tf.variable_scope('pred_to_target'):
            self.t_w_input = {}
            self.t_w_assign_op = {}
            
            for name in self.w.keys():
                self.t_w_input[name] = tf.placeholder(tf.float32,self.t_w[name].get_shape().as_list(),name=name)
                self.t_w_assign_op[name] = self.t_w[name].assign(self.t_w_input[name])
                    
        # OPTIMIZER
        with tf.variable_scope('optimizer'):
            self.target_q_t = tf.placeholder(tf.float32,[None],name='target_q_t')
            self.action = tf.placeholder(tf.int64,[None],name='action')
            
            # One hot of the action which was taken
            action_one_hot = tf.one_hot(self.action,self.env.action_size,1.0,0.0,name='action_one_hot')
            # Extract the q_value of the action
            q_acted = tf.reduce_sum(self.q * action_one_hot,reduction_indices=1,name='q_acted')
            
            self.delta = self.target_q_t - q_acted      # Error
            self.clipped_delta = tf.clip_by_value(self.delta,-1.0,1.0,name='clipped_delta')
            
            # PAY ATTENTION TO THIS ONE!
            self.global_step = tf.Variable(0,trainable=False)
            
            self.loss = tf.reduce_mean(tf.square(self.clipped_delta),name='loss')
            self.learning_rate_step = tf.placeholder(tf.int64,None,name='learning_rate_step')
            self.learning_rate_op = tf.maximum(0.00025,tf.train.exponential_decay(
                learning_rate,self.learning_rate_step,learning_rate_decay_step,learning_rate_decay,staircase=True))
            self.optim = tf.train.RMSPropOptimizer(self.learning_rate_op,momentum=0.95,epsilon=0.01).minimize(self.loss)
            
        # Add summaries
        with tf.variable_scope('summary'):
            scalar_summary_tags = ['average.reward', 'average.loss', 'average.q', 'episode.max reward', 'episode.min reward',\
                                  'episode.avg reward', 'episode.num of games', 'training.learning_rate']
            self.summary_placeholders = {}
            self.summary_ops = {}
            for tag in scalar_summary_tags:
                self.summary_placeholders[tag] = tf.placeholder(tf.float32,None,name=tag.replace(' ','_'))
                self.summary_ops[tag] = tf.scalar_summary(tag, self.summary_placeholders[tag])
            
            histogram_summary_tags = ['episode.rewards','episode.actions']
            for tag in histogram_summary_tags:
                self.summary_placeholders[tag] = tf.placeholder(tf.float32,None,name=tag.replace(' ','_'))
                self.summary_ops[tag] = tf.histogram_summary(tag, self.summary_placeholders[tag])
                
            self.writer = tf.train.SummaryWriter(log_dir,self.sess.graph)
        
        tf.initialize_all_variables().run()
        self._saver = tf.train.Saver(self.w.values() + [self.step_op], max_to_keep = 10)
        
        self.load_model()
        self.update_target_q_network()
                    
    def update_target_q_network(self):
        for name in self.w.keys():
            self.t_w_assign_op[name].eval({self.t_w_input[name]:self.w[name].eval()})
            
    def load_model(self,step=None):
        print('[*] Loading Checkpoints...')

        ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
        if ckpt and ckpt.model_checkpoint_path:
            ckpt_name = os.path.basename(ckpt.model_checkpoint_path)
            fname = os.path.join(checkpoint_dir,ckpt_name)
            self._saver.restore(self.sess,fname)
            print('[*] Load succesfull: %s' % fname)
            return True
        else:
            print('[!] Load failed: %s' % checkpoint_dir)
            return False
        
    def save_model(self,step=None):
        print('[*] Saving checkpoints...')
        self._saver.save(self.sess,checkpoint_dir,global_step=step)    
        
    def train(self):
        start_step = self.step_op.eval()
        print('ok')
        # VERIFY THE NEED OF THE FOLLOWING VARIABLES
        num_game, self.update_count, ep_reward = 0, 0, 0.
        total_reward, self.total_loss, self.total_q = 0., 0., 0.
        max_avg_ep_reward = 0
        ep_rewards, actions = [], []
        
        # New game (we could modify this to start randomly -> taking random initial actions)
        screen = self.env.new_game()
        
        # Stacking the screen in the first input buffer
        for _ in range(history_length):
            self.history.add(screen)
            
        # Training:
        for self.step in tqdm(range(start_step,max_steps),ncols=70,initial=start_step):
            if self.step == learn_start:
                num_game, self.update_count, ep_reward = 0, 0, 0.
                total_reward, self.total_loss, self.total_q = 0., 0., 0.
                ep_rewards, actions = [], []
                
            # 1. Predict: Use our training network to select an action
            action = self.predict(self.history.get())
            
            # 2. Execute the action
            screen, reward, terminal = self.env.execute_action(action,is_training=True)
            
            # 3. New observation
            self.observe(screen,reward,action,terminal)
            
            if terminal:
                screen = self.env.new_game()
                num_game += 1
                ep_rewards.append(ep_reward)
                ep_reward = 0
            else:
                ep_reward += reward
                total_reward += reward
                actions.append(action)
                
            actions.append(action)
            
            if self.step >= learn_start:
                if self.step % test_step == test_step - 1:
                    avg_reward = total_reward / test_step
                    avg_loss = self.total_loss / self.update_count
                    avg_q = self.total_q / self.update_count
                    
                    try:
                        max_ep_reward = np.max(ep_rewards)
                        min_ep_reward = np.min(ep_rewards)
                        avg_ep_reward = np.mean(ep_rewards)
                    except:
                        max_ep_reward, min_ep_reward, avg_ep_reward = 0,0,0
                        
                    print('////////////////////////\nAverage reward: %.4f \nAverage loss: %.6f \nAverage Q: %3.6f'\
                         % (avg_reward, avg_loss, avg_q))
                    print('\nAvg. Ep. Reward: %.4f \nMax Ep. Reward: %.4f \nMin Ep. Reward: %.4f \n# Game: %d'\
                         % (avg_ep_reward, max_ep_reward, min_ep_reward, num_game))
                    
                    if max_avg_ep_reward * 0.9 <= avg_ep_reward:
                        self.step_assign_op.eval({self.step_input: self.step+1})
                        self.save_model(self.step+1)
                        max_avg_ep_reward = max(max_avg_ep_reward, avg_ep_reward)
                        
                    if self.step > 180:
                        self.inject_summary({
                                'average.reward' : avg_reward,
                                'average.loss': avg_loss,
                                'average.q': avg_q,
                                'episode.max reward':max_ep_reward,
                                'episode.min reward':min_ep_reward,
                                'episode.avg reward':avg_ep_reward,
                                'episode.num of games':num_game,
                                'episode.rewards': ep_rewards,
                                'episode.actions': actions,
                                'training.learning_rate': self.learning_rate_op.eval({self.learning_rate_step:self.step})
                            })

                    num_game = 0
                    total_reward = 0.
                    self.total_loss = 0.
                    self.total_q = 0.
                    self.update_count = 0
                    ep_reward = 0
                    ep_rewards = []       
                    actions = []
            
    def predict(self,current_state,test_ep=None):
        # Calculate exploration prob. epsilon => This is a decaying epsilon starting at 1 and decreasing until 0.1
        # once the learning process start
        ep = test_ep or (ep_min + max(0.,(ep_start-ep_min)*(ep_end_time-max(0.,self.step-learn_start))/ep_end_time))
        if random.random() < ep:
            # Explore: random action
            action = random.randrange(self.env.action_size)
        else:
            action = self.q_action.eval({self.input_state:[current_state]})[0]
            
        return action
    
    def observe(self,screen,reward,action,terminal):
        # Clip reward
        reward = max(min_reward,min(max_reward,reward))
        
        # Add to history
        self.history.add(screen)
        # Add to exp. replay
        self.exp_replay.add(screen,reward,action,terminal)
        
        if self.step > learn_start:
            # If it is time to train the network
            if self.step % train_frequency == 0:
                self.q_learning_mini_batch()
                
            # If it is time to update Target network
            if self.step % target_q_update_step == target_q_update_step -1:
                self.update_target_q_network()
        
    def q_learning_mini_batch(self):
        if self.exp_replay.count < history_length:
            # Not enough experiences
            return
        else:
            init_state, action, reward, end_state, terminal = self.exp_replay.sample_from_replay()
            
            # Standard DQN implementation
            # Get the Q-value of the next state
            q_t_plus_1 = self.target_q.eval({self.target_state: end_state})
            terminal = np.array(terminal) + 0.
            # Get max Q_t+1
            max_q_t_plus_1 = np.max(q_t_plus_1, axis=1)
            # The target q-value (if is not terminal state) will be:
            target_q_t = (1. - terminal) * gamma * max_q_t_plus_1 + reward

            _, q_t, loss, summary_str = self.sess.run([self.optim,self.q,self.loss,self.q_summary], {
                    self.target_q_t: target_q_t,
                    self.action: action,
                    self.input_state: init_state,
                    self.learning_rate_step: self.step
                })

            self.writer.add_summary(summary_str,self.step)
            self.total_loss += loss
            self.total_q += q_t.mean()
            self.update_count += 1      
            
    def inject_summary(self,tag_dir):
        summary_str_lists = self.sess.run([self.summary_ops[tag] for tag in tag_dir.keys()], {
                self.summary_placeholders[tag]: value for tag , value in tag_dir.items()
            })
        for summary_str in summary_str_lists:
            self.writer.add_summary(summary_str,self.step)
        

## Main code:

In [None]:
'''with tf.Session() as sess:
    env = GymEnvironment('Breakout-v0')
    agent = Agent(env,sess)
    agent.train()'''

In [9]:
is_train = True

gpu_config = tf.ConfigProto()
gpu_config.gpu_options.allow_growth = True
gpu_config.gpu_options.per_process_gpu_memory_fraction = 1.0
gpu_config.log_device_placement = True

with tf.Session(config=gpu_config) as sess:
    env = GymEnvironment('Breakout-v0')
    agent = Agent(env,sess)
    
    if is_train:
        agent.train()
    else:
        agent.play()

[2016-12-02 15:46:20,677] Making new env: Breakout-v0


[*] Loading Checkpoints...
[*] Load succesfull: Models/-100000
ok


  0%|                   | 149997/49900000 [12:32<231:44:33, 59.63it/s]

////////////////////////
Average reward: 0.0232 
Average loss: 0.142165 
Average Q: 0.367866

Avg. Ep. Reward: 0.3036 
Max Ep. Reward: 6.0000 
Min Ep. Reward: 0.0000 
# Game: 3818
[*] Saving checkpoints...


  0%|                   | 199997/49900000 [25:19<223:48:32, 61.68it/s]

////////////////////////
Average reward: 0.0233 
Average loss: 0.140669 
Average Q: 0.602020

Avg. Ep. Reward: 0.3093 
Max Ep. Reward: 5.0000 
Min Ep. Reward: 0.0000 
# Game: 3770
[*] Saving checkpoints...


  1%|                   | 249993/49900000 [38:09<213:31:45, 64.59it/s]

////////////////////////
Average reward: 0.0236 
Average loss: 0.140270 
Average Q: 0.813687

Avg. Ep. Reward: 0.3137 
Max Ep. Reward: 6.0000 
Min Ep. Reward: 0.0000 
# Game: 3761
[*] Saving checkpoints...


  1%|                   | 299993/49900000 [51:10<211:16:54, 65.21it/s]

////////////////////////
Average reward: 0.0233 
Average loss: 0.136213 
Average Q: 1.036969

Avg. Ep. Reward: 0.3108 
Max Ep. Reward: 9.0000 
Min Ep. Reward: 0.0000 
# Game: 3735
[*] Saving checkpoints...


  1%|                 | 349993/49900000 [1:04:18<231:23:40, 59.48it/s]

////////////////////////
Average reward: 0.0245 
Average loss: 0.122829 
Average Q: 1.274685

Avg. Ep. Reward: 0.3300 
Max Ep. Reward: 5.0000 
Min Ep. Reward: 0.0000 
# Game: 3715
[*] Saving checkpoints...


  1%|▏                | 399993/49900000 [1:17:38<240:59:28, 57.06it/s]

////////////////////////
Average reward: 0.0301 
Average loss: 0.116589 
Average Q: 1.538831

Avg. Ep. Reward: 0.4336 
Max Ep. Reward: 4.0000 
Min Ep. Reward: 0.0000 
# Game: 3469
[*] Saving checkpoints...


  1%|▏                | 449993/49900000 [1:30:56<226:00:54, 60.78it/s]

////////////////////////
Average reward: 0.0331 
Average loss: 0.113940 
Average Q: 1.810254

Avg. Ep. Reward: 0.5118 
Max Ep. Reward: 7.0000 
Min Ep. Reward: 0.0000 
# Game: 3230
[*] Saving checkpoints...


  1%|▏                | 499993/49900000 [1:44:22<231:52:12, 59.18it/s]

////////////////////////
Average reward: 0.0356 
Average loss: 0.110114 
Average Q: 2.037924

Avg. Ep. Reward: 0.5872 
Max Ep. Reward: 6.0000 
Min Ep. Reward: 0.0000 
# Game: 3028
[*] Saving checkpoints...


  1%|▏                | 549997/49900000 [1:57:46<223:04:10, 61.45it/s]

////////////////////////
Average reward: 0.0427 
Average loss: 0.105732 
Average Q: 2.290139

Avg. Ep. Reward: 0.8324 
Max Ep. Reward: 6.0000 
Min Ep. Reward: 0.0000 
# Game: 2565
[*] Saving checkpoints...


  1%|▏                | 599997/49900000 [2:11:26<221:58:06, 61.70it/s]

////////////////////////
Average reward: 0.0451 
Average loss: 0.098451 
Average Q: 2.496039

Avg. Ep. Reward: 0.9230 
Max Ep. Reward: 7.0000 
Min Ep. Reward: 0.0000 
# Game: 2442
[*] Saving checkpoints...


  1%|▏                | 649997/49900000 [2:25:11<218:56:32, 62.48it/s]

////////////////////////
Average reward: 0.0470 
Average loss: 0.096958 
Average Q: 2.705843

Avg. Ep. Reward: 1.0000 
Max Ep. Reward: 6.0000 
Min Ep. Reward: 0.0000 
# Game: 2346
[*] Saving checkpoints...


  1%|▏                | 699997/49900000 [2:39:05<237:26:32, 57.56it/s]

////////////////////////
Average reward: 0.0487 
Average loss: 0.092012 
Average Q: 2.935419

Avg. Ep. Reward: 1.1110 
Max Ep. Reward: 7.0000 
Min Ep. Reward: 0.0000 
# Game: 2189
[*] Saving checkpoints...


  2%|▎                | 749997/49900000 [2:52:59<247:50:22, 55.09it/s]

////////////////////////
Average reward: 0.0512 
Average loss: 0.089019 
Average Q: 3.088274

Avg. Ep. Reward: 1.2126 
Max Ep. Reward: 7.0000 
Min Ep. Reward: 0.0000 
# Game: 2112
[*] Saving checkpoints...


  2%|▎                | 799997/49900000 [3:06:49<229:21:22, 59.47it/s]

////////////////////////
Average reward: 0.0530 
Average loss: 0.086923 
Average Q: 3.231466

Avg. Ep. Reward: 1.3098 
Max Ep. Reward: 9.0000 
Min Ep. Reward: 0.0000 
# Game: 2024
[*] Saving checkpoints...


  2%|▎                | 849993/49900000 [3:20:46<231:58:14, 58.74it/s]

////////////////////////
Average reward: 0.0544 
Average loss: 0.084959 
Average Q: 3.390613

Avg. Ep. Reward: 1.3926 
Max Ep. Reward: 7.0000 
Min Ep. Reward: 0.0000 
# Game: 1951
[*] Saving checkpoints...


  2%|▎                | 899993/49900000 [3:34:50<174:46:32, 77.88it/s]

////////////////////////
Average reward: 0.0556 
Average loss: 0.081536 
Average Q: 3.541846

Avg. Ep. Reward: 1.5120 
Max Ep. Reward: 10.0000 
Min Ep. Reward: 0.0000 
# Game: 1838
[*] Saving checkpoints...


  2%|▎                | 949993/49900000 [3:49:01<205:41:15, 66.11it/s]

////////////////////////
Average reward: 0.0581 
Average loss: 0.080064 
Average Q: 3.702444

Avg. Ep. Reward: 1.6860 
Max Ep. Reward: 9.0000 
Min Ep. Reward: 0.0000 
# Game: 1723
[*] Saving checkpoints...


  2%|▎                | 999997/49900000 [4:03:12<225:57:31, 60.11it/s]

////////////////////////
Average reward: 0.0587 
Average loss: 0.078780 
Average Q: 3.916708

Avg. Ep. Reward: 1.7469 
Max Ep. Reward: 8.0000 
Min Ep. Reward: 0.0000 
# Game: 1679
[*] Saving checkpoints...


  2%|▎               | 1049997/49900000 [4:17:24<242:47:26, 55.89it/s]

////////////////////////
Average reward: 0.0609 
Average loss: 0.076771 
Average Q: 4.136729

Avg. Ep. Reward: 2.0300 
Max Ep. Reward: 11.0000 
Min Ep. Reward: 0.0000 
# Game: 1499
[*] Saving checkpoints...


  2%|▎               | 1099995/49900000 [4:31:54<233:10:43, 58.13it/s]

////////////////////////
Average reward: 0.0617 
Average loss: 0.076143 
Average Q: 4.342553

Avg. Ep. Reward: 2.0894 
Max Ep. Reward: 11.0000 
Min Ep. Reward: 0.0000 
# Game: 1476
[*] Saving checkpoints...


  2%|▎               | 1149995/49900000 [4:46:30<205:46:30, 65.81it/s]

////////////////////////
Average reward: 0.0618 
Average loss: 0.073713 
Average Q: 4.568146

Avg. Ep. Reward: 2.1164 
Max Ep. Reward: 12.0000 
Min Ep. Reward: 0.0000 
# Game: 1460
[*] Saving checkpoints...


  2%|▍               | 1199997/49900000 [5:00:51<240:01:14, 56.36it/s]

////////////////////////
Average reward: 0.0620 
Average loss: 0.071256 
Average Q: 4.818509

Avg. Ep. Reward: 2.1307 
Max Ep. Reward: 10.0000 
Min Ep. Reward: 0.0000 
# Game: 1454
[*] Saving checkpoints...


  3%|▍               | 1249993/49900000 [5:15:17<243:05:03, 55.59it/s]

////////////////////////
Average reward: 0.0612 
Average loss: 0.068195 
Average Q: 5.047041

Avg. Ep. Reward: 2.0428 
Max Ep. Reward: 8.0000 
Min Ep. Reward: 0.0000 
# Game: 1497
[*] Saving checkpoints...


  3%|▍               | 1299995/49900000 [5:29:59<236:47:38, 57.01it/s]

////////////////////////
Average reward: 0.0623 
Average loss: 0.064492 
Average Q: 5.267203

Avg. Ep. Reward: 2.1693 
Max Ep. Reward: 9.0000 
Min Ep. Reward: 0.0000 
# Game: 1435
[*] Saving checkpoints...


  3%|▍               | 1349997/49900000 [5:44:36<231:35:19, 58.23it/s]

////////////////////////
Average reward: 0.0631 
Average loss: 0.062410 
Average Q: 5.565132

Avg. Ep. Reward: 2.2175 
Max Ep. Reward: 9.0000 
Min Ep. Reward: 0.0000 
# Game: 1421
[*] Saving checkpoints...


  3%|▍               | 1399993/49900000 [5:59:18<249:17:09, 54.04it/s]

////////////////////////
Average reward: 0.0630 
Average loss: 0.060106 
Average Q: 5.807959

Avg. Ep. Reward: 2.1937 
Max Ep. Reward: 10.0000 
Min Ep. Reward: 0.0000 
# Game: 1435
[*] Saving checkpoints...


  3%|▍               | 1449993/49900000 [6:13:32<246:21:21, 54.63it/s]

////////////////////////
Average reward: 0.0629 
Average loss: 0.058011 
Average Q: 6.057569

Avg. Ep. Reward: 2.1947 
Max Ep. Reward: 10.0000 
Min Ep. Reward: 0.0000 
# Game: 1433
[*] Saving checkpoints...


  3%|▍               | 1499993/49900000 [6:27:41<236:46:27, 56.78it/s]

////////////////////////
Average reward: 0.0640 
Average loss: 0.056824 
Average Q: 6.268853

Avg. Ep. Reward: 2.3515 
Max Ep. Reward: 11.0000 
Min Ep. Reward: 0.0000 
# Game: 1360
[*] Saving checkpoints...


  3%|▍               | 1549997/49900000 [6:42:07<210:54:39, 63.68it/s]

////////////////////////
Average reward: 0.0633 
Average loss: 0.057410 
Average Q: 6.475310

Avg. Ep. Reward: 2.3121 
Max Ep. Reward: 11.0000 
Min Ep. Reward: 0.0000 
# Game: 1368
[*] Saving checkpoints...


  3%|▌               | 1599993/49900000 [6:56:20<215:20:00, 62.31it/s]

////////////////////////
Average reward: 0.0620 
Average loss: 0.055855 
Average Q: 6.764640

Avg. Ep. Reward: 2.1567 
Max Ep. Reward: 10.0000 
Min Ep. Reward: 0.0000 
# Game: 1436
[*] Saving checkpoints...


  3%|▌               | 1649999/49900000 [7:10:42<231:45:54, 57.83it/s]

////////////////////////
Average reward: 0.0629 
Average loss: 0.056107 
Average Q: 6.958884

Avg. Ep. Reward: 2.2219 
Max Ep. Reward: 10.0000 
Min Ep. Reward: 0.0000 
# Game: 1415
[*] Saving checkpoints...


  3%|▌               | 1699993/49900000 [7:25:14<229:01:50, 58.46it/s]

////////////////////////
Average reward: 0.0635 
Average loss: 0.053135 
Average Q: 7.121434

Avg. Ep. Reward: 2.2715 
Max Ep. Reward: 12.0000 
Min Ep. Reward: 0.0000 
# Game: 1396
[*] Saving checkpoints...


  4%|▌               | 1749993/49900000 [7:39:48<242:55:38, 55.06it/s]

////////////////////////
Average reward: 0.0630 
Average loss: 0.050960 
Average Q: 7.290835

Avg. Ep. Reward: 2.2895 
Max Ep. Reward: 10.0000 
Min Ep. Reward: 0.0000 
# Game: 1375
[*] Saving checkpoints...


  4%|▌               | 1799994/49900000 [7:54:27<227:22:34, 58.76it/s]

////////////////////////
Average reward: 0.0615 
Average loss: 0.049712 
Average Q: 7.410751

Avg. Ep. Reward: 2.2504 
Max Ep. Reward: 10.0000 
Min Ep. Reward: 0.0000 
# Game: 1366
[*] Saving checkpoints...


  4%|▌               | 1849996/49900000 [8:08:58<221:38:38, 60.22it/s]

////////////////////////
Average reward: 0.0619 
Average loss: 0.050430 
Average Q: 7.571064

Avg. Ep. Reward: 2.3742 
Max Ep. Reward: 8.0000 
Min Ep. Reward: 0.0000 
# Game: 1304
[*] Saving checkpoints...


  4%|▌               | 1899997/49900000 [8:23:35<223:01:55, 59.78it/s]

////////////////////////
Average reward: 0.0622 
Average loss: 0.049518 
Average Q: 7.750849

Avg. Ep. Reward: 2.2967 
Max Ep. Reward: 13.0000 
Min Ep. Reward: 0.0000 
# Game: 1355
[*] Saving checkpoints...


  4%|▋               | 1949999/49900000 [8:38:13<233:49:46, 56.96it/s]

////////////////////////
Average reward: 0.0628 
Average loss: 0.049443 
Average Q: 7.909996

Avg. Ep. Reward: 2.3088 
Max Ep. Reward: 13.0000 
Min Ep. Reward: 0.0000 
# Game: 1360
[*] Saving checkpoints...


  4%|▋               | 1999998/49900000 [8:52:48<242:51:22, 54.79it/s]

////////////////////////
Average reward: 0.0636 
Average loss: 0.049138 
Average Q: 8.058322

Avg. Ep. Reward: 2.3503 
Max Ep. Reward: 10.0000 
Min Ep. Reward: 0.0000 
# Game: 1353
[*] Saving checkpoints...


  4%|▋               | 2049995/49900000 [9:07:26<236:13:47, 56.27it/s]

////////////////////////
Average reward: 0.0627 
Average loss: 0.048198 
Average Q: 8.134550

Avg. Ep. Reward: 2.2659 
Max Ep. Reward: 12.0000 
Min Ep. Reward: 0.0000 
# Game: 1384
[*] Saving checkpoints...


  4%|▋               | 2099993/49900000 [9:22:05<242:49:43, 54.68it/s]

////////////////////////
Average reward: 0.0634 
Average loss: 0.047600 
Average Q: 8.315713

Avg. Ep. Reward: 2.3231 
Max Ep. Reward: 12.0000 
Min Ep. Reward: 0.0000 
# Game: 1365
[*] Saving checkpoints...


  4%|▋               | 2149993/49900000 [9:36:39<235:55:43, 56.22it/s]

////////////////////////
Average reward: 0.0632 
Average loss: 0.047934 
Average Q: 8.497808

Avg. Ep. Reward: 2.2984 
Max Ep. Reward: 9.0000 
Min Ep. Reward: 0.0000 
# Game: 1374
[*] Saving checkpoints...


  4%|▋               | 2199997/49900000 [9:51:13<235:21:35, 56.30it/s]

////////////////////////
Average reward: 0.0635 
Average loss: 0.047269 
Average Q: 8.623283

Avg. Ep. Reward: 2.2903 
Max Ep. Reward: 12.0000 
Min Ep. Reward: 0.0000 
# Game: 1385
[*] Saving checkpoints...


  5%|▋              | 2249999/49900000 [10:05:44<232:26:21, 56.94it/s]

////////////////////////
Average reward: 0.0626 
Average loss: 0.046461 
Average Q: 8.681039

Avg. Ep. Reward: 2.2646 
Max Ep. Reward: 11.0000 
Min Ep. Reward: 0.0000 
# Game: 1383
[*] Saving checkpoints...


  5%|▋              | 2299997/49900000 [10:20:12<247:33:40, 53.41it/s]

////////////////////////
Average reward: 0.0632 
Average loss: 0.046077 
Average Q: 8.775906

Avg. Ep. Reward: 2.2967 
Max Ep. Reward: 8.0000 
Min Ep. Reward: 0.0000 
# Game: 1375
[*] Saving checkpoints...


  5%|▋              | 2349993/49900000 [10:34:43<243:10:40, 54.32it/s]

////////////////////////
Average reward: 0.0636 
Average loss: 0.044936 
Average Q: 8.872286

Avg. Ep. Reward: 2.2944 
Max Ep. Reward: 10.0000 
Min Ep. Reward: 0.0000 
# Game: 1386
[*] Saving checkpoints...


  5%|▋              | 2399993/49900000 [10:49:23<230:53:18, 57.15it/s]

////////////////////////
Average reward: 0.0641 
Average loss: 0.044799 
Average Q: 8.909322

Avg. Ep. Reward: 2.3421 
Max Ep. Reward: 10.0000 
Min Ep. Reward: 0.0000 
# Game: 1368
[*] Saving checkpoints...


  5%|▋              | 2449997/49900000 [11:04:01<228:29:53, 57.68it/s]

////////////////////////
Average reward: 0.0643 
Average loss: 0.044116 
Average Q: 8.971611

Avg. Ep. Reward: 2.3423 
Max Ep. Reward: 9.0000 
Min Ep. Reward: 0.0000 
# Game: 1370
[*] Saving checkpoints...


  5%|▊              | 2499993/49900000 [11:18:35<235:46:36, 55.84it/s]

////////////////////////
Average reward: 0.0629 
Average loss: 0.044225 
Average Q: 9.096549

Avg. Ep. Reward: 2.1894 
Max Ep. Reward: 11.0000 
Min Ep. Reward: 0.0000 
# Game: 1436
[*] Saving checkpoints...


  5%|▊              | 2549997/49900000 [11:33:09<234:26:51, 56.10it/s]

////////////////////////
Average reward: 0.0639 
Average loss: 0.044287 
Average Q: 9.193831

Avg. Ep. Reward: 2.2880 
Max Ep. Reward: 9.0000 
Min Ep. Reward: 0.0000 
# Game: 1396
[*] Saving checkpoints...


  5%|▊              | 2599997/49900000 [11:47:41<250:10:33, 52.52it/s]

////////////////////////
Average reward: 0.0630 
Average loss: 0.043745 
Average Q: 9.258631

Avg. Ep. Reward: 2.1859 
Max Ep. Reward: 11.0000 
Min Ep. Reward: 0.0000 
# Game: 1442
[*] Saving checkpoints...


  5%|▊              | 2649999/49900000 [12:02:18<229:35:08, 57.17it/s]

////////////////////////
Average reward: 0.0640 
Average loss: 0.042963 
Average Q: 9.199065

Avg. Ep. Reward: 2.2479 
Max Ep. Reward: 11.0000 
Min Ep. Reward: 0.0000 
# Game: 1424
[*] Saving checkpoints...


  5%|▊              | 2699996/49900000 [12:16:54<218:40:23, 59.96it/s]

////////////////////////
Average reward: 0.0647 
Average loss: 0.042221 
Average Q: 9.212097

Avg. Ep. Reward: 2.3435 
Max Ep. Reward: 11.0000 
Min Ep. Reward: 0.0000 
# Game: 1380
[*] Saving checkpoints...


  6%|▊              | 2749993/49900000 [12:31:32<233:05:06, 56.19it/s]

////////////////////////
Average reward: 0.0640 
Average loss: 0.040516 
Average Q: 9.238079

Avg. Ep. Reward: 2.1986 
Max Ep. Reward: 13.0000 
Min Ep. Reward: 0.0000 
# Game: 1455
[*] Saving checkpoints...


  6%|▊              | 2799998/49900000 [12:46:04<244:25:56, 53.53it/s]

////////////////////////
Average reward: 0.0642 
Average loss: 0.040890 
Average Q: 9.236434

Avg. Ep. Reward: 2.2302 
Max Ep. Reward: 16.0000 
Min Ep. Reward: 0.0000 
# Game: 1438
[*] Saving checkpoints...


  6%|▊              | 2849996/49900000 [13:00:39<224:50:43, 58.13it/s]

////////////////////////
Average reward: 0.0651 
Average loss: 0.040909 
Average Q: 9.280494

Avg. Ep. Reward: 2.3597 
Max Ep. Reward: 11.0000 
Min Ep. Reward: 0.0000 
# Game: 1379
[*] Saving checkpoints...


  6%|▊              | 2899996/49900000 [13:15:12<222:25:43, 58.70it/s]

////////////////////////
Average reward: 0.0652 
Average loss: 0.041121 
Average Q: 9.355304

Avg. Ep. Reward: 2.4303 
Max Ep. Reward: 12.0000 
Min Ep. Reward: 0.0000 
# Game: 1341
[*] Saving checkpoints...


  6%|▉              | 2949993/49900000 [13:29:49<243:12:32, 53.62it/s]

////////////////////////
Average reward: 0.0653 
Average loss: 0.039788 
Average Q: 9.412079

Avg. Ep. Reward: 2.4424 
Max Ep. Reward: 11.0000 
Min Ep. Reward: 0.0000 
# Game: 1336
[*] Saving checkpoints...


  6%|▉              | 2999993/49900000 [13:44:28<224:50:34, 57.94it/s]

////////////////////////
Average reward: 0.0654 
Average loss: 0.040010 
Average Q: 9.408444

Avg. Ep. Reward: 2.4019 
Max Ep. Reward: 12.0000 
Min Ep. Reward: 0.0000 
# Game: 1361
[*] Saving checkpoints...


  6%|▉              | 3049993/49900000 [13:59:06<224:22:50, 58.00it/s]

////////////////////////
Average reward: 0.0654 
Average loss: 0.039215 
Average Q: 9.414415

Avg. Ep. Reward: 2.3991 
Max Ep. Reward: 13.0000 
Min Ep. Reward: 0.0000 
# Game: 1363
[*] Saving checkpoints...


  6%|▉              | 3099997/49900000 [14:13:34<231:53:45, 56.06it/s]

////////////////////////
Average reward: 0.0659 
Average loss: 0.039096 
Average Q: 9.385376

Avg. Ep. Reward: 2.5338 
Max Ep. Reward: 14.0000 
Min Ep. Reward: 0.0000 
# Game: 1300
[*] Saving checkpoints...


  6%|▉              | 3149993/49900000 [14:28:14<235:15:42, 55.20it/s]

////////////////////////
Average reward: 0.0647 
Average loss: 0.038955 
Average Q: 9.365218

Avg. Ep. Reward: 2.3620 
Max Ep. Reward: 15.0000 
Min Ep. Reward: 0.0000 
# Game: 1370
[*] Saving checkpoints...


  6%|▉              | 3199997/49900000 [14:42:56<233:57:46, 55.45it/s]

////////////////////////
Average reward: 0.0655 
Average loss: 0.039008 
Average Q: 9.368443

Avg. Ep. Reward: 2.4913 
Max Ep. Reward: 11.0000 
Min Ep. Reward: 0.0000 
# Game: 1315
[*] Saving checkpoints...


  7%|▉              | 3249993/49900000 [14:57:30<233:39:38, 55.46it/s]

////////////////////////
Average reward: 0.0664 
Average loss: 0.038016 
Average Q: 9.348012

Avg. Ep. Reward: 2.5967 
Max Ep. Reward: 11.0000 
Min Ep. Reward: 0.0000 
# Game: 1277
[*] Saving checkpoints...


  7%|▉              | 3299997/49900000 [15:12:02<233:16:06, 55.49it/s]

////////////////////////
Average reward: 0.0659 
Average loss: 0.037463 
Average Q: 9.321322

Avg. Ep. Reward: 2.4872 
Max Ep. Reward: 12.0000 
Min Ep. Reward: 0.0000 
# Game: 1324
[*] Saving checkpoints...


  7%|█              | 3349993/49900000 [15:26:37<231:57:57, 55.74it/s]

////////////////////////
Average reward: 0.0663 
Average loss: 0.037526 
Average Q: 9.281428

Avg. Ep. Reward: 2.5965 
Max Ep. Reward: 15.0000 
Min Ep. Reward: 0.0000 
# Game: 1274
[*] Saving checkpoints...


  7%|█              | 3399997/49900000 [15:41:09<223:35:06, 57.77it/s]

////////////////////////
Average reward: 0.0663 
Average loss: 0.037282 
Average Q: 9.257130

Avg. Ep. Reward: 2.5754 
Max Ep. Reward: 9.0000 
Min Ep. Reward: 0.0000 
# Game: 1286
[*] Saving checkpoints...


  7%|█              | 3449993/49900000 [15:55:42<225:39:57, 57.18it/s]

////////////////////////
Average reward: 0.0667 
Average loss: 0.036989 
Average Q: 9.219107

Avg. Ep. Reward: 2.5038 
Max Ep. Reward: 11.0000 
Min Ep. Reward: 0.0000 
# Game: 1332
[*] Saving checkpoints...


  7%|█              | 3499998/49900000 [16:10:15<237:17:40, 54.32it/s]

////////////////////////
Average reward: 0.0671 
Average loss: 0.036324 
Average Q: 9.182218

Avg. Ep. Reward: 2.5683 
Max Ep. Reward: 17.0000 
Min Ep. Reward: 0.0000 
# Game: 1304
[*] Saving checkpoints...


  7%|█              | 3549997/49900000 [16:24:45<218:04:42, 59.04it/s]

////////////////////////
Average reward: 0.0676 
Average loss: 0.037259 
Average Q: 9.209851

Avg. Ep. Reward: 2.6502 
Max Ep. Reward: 13.0000 
Min Ep. Reward: 0.0000 
# Game: 1275
[*] Saving checkpoints...


  7%|█              | 3599998/49900000 [16:39:14<220:23:47, 58.35it/s]

////////////////////////
Average reward: 0.0683 
Average loss: 0.036484 
Average Q: 9.220040

Avg. Ep. Reward: 2.7015 
Max Ep. Reward: 12.0000 
Min Ep. Reward: 0.0000 
# Game: 1263
[*] Saving checkpoints...


  7%|█              | 3649999/49900000 [16:53:53<224:05:51, 57.33it/s]

////////////////////////
Average reward: 0.0676 
Average loss: 0.035913 
Average Q: 9.150815

Avg. Ep. Reward: 2.6199 
Max Ep. Reward: 10.0000 
Min Ep. Reward: 0.0000 
# Game: 1289
[*] Saving checkpoints...


  7%|█              | 3699997/49900000 [17:08:30<229:39:35, 55.88it/s]

////////////////////////
Average reward: 0.0676 
Average loss: 0.035993 
Average Q: 9.121728

Avg. Ep. Reward: 2.6229 
Max Ep. Reward: 12.0000 
Min Ep. Reward: 0.0000 
# Game: 1286
[*] Saving checkpoints...


  8%|█▏             | 3749993/49900000 [17:23:04<210:25:35, 60.92it/s]

////////////////////////
Average reward: 0.0666 
Average loss: 0.035387 
Average Q: 9.087604

Avg. Ep. Reward: 2.5459 
Max Ep. Reward: 13.0000 
Min Ep. Reward: 0.0000 
# Game: 1308
[*] Saving checkpoints...


  8%|█▏             | 3799993/49900000 [17:37:35<233:47:32, 54.77it/s]

////////////////////////
Average reward: 0.0679 
Average loss: 0.035510 
Average Q: 9.099642

Avg. Ep. Reward: 2.6302 
Max Ep. Reward: 12.0000 
Min Ep. Reward: 0.0000 
# Game: 1290
[*] Saving checkpoints...


  8%|█▏             | 3849993/49900000 [17:52:05<238:19:03, 53.67it/s]

////////////////////////
Average reward: 0.0682 
Average loss: 0.035449 
Average Q: 9.080185

Avg. Ep. Reward: 2.7324 
Max Ep. Reward: 17.0000 
Min Ep. Reward: 0.0000 
# Game: 1248
[*] Saving checkpoints...


  8%|█▏             | 3899993/49900000 [18:06:42<226:57:43, 56.30it/s]

////////////////////////
Average reward: 0.0683 
Average loss: 0.035428 
Average Q: 9.052052

Avg. Ep. Reward: 2.6363 
Max Ep. Reward: 12.0000 
Min Ep. Reward: 0.0000 
# Game: 1295
[*] Saving checkpoints...


  8%|█▏             | 3949993/49900000 [18:21:17<231:46:48, 55.07it/s]

////////////////////////
Average reward: 0.0681 
Average loss: 0.034937 
Average Q: 9.028500

Avg. Ep. Reward: 2.6589 
Max Ep. Reward: 13.0000 
Min Ep. Reward: 0.0000 
# Game: 1281
[*] Saving checkpoints...


  8%|█▏             | 3999993/49900000 [18:35:51<225:07:43, 56.63it/s]

////////////////////////
Average reward: 0.0687 
Average loss: 0.034527 
Average Q: 9.012072

Avg. Ep. Reward: 2.6891 
Max Ep. Reward: 13.0000 
Min Ep. Reward: 0.0000 
# Game: 1277
[*] Saving checkpoints...


  8%|█▏             | 4049997/49900000 [18:50:32<235:33:54, 54.07it/s]

////////////////////////
Average reward: 0.0677 
Average loss: 0.034314 
Average Q: 8.967712

Avg. Ep. Reward: 2.5959 
Max Ep. Reward: 13.0000 
Min Ep. Reward: 0.0000 
# Game: 1304
[*] Saving checkpoints...


  8%|█▏             | 4099993/49900000 [19:05:01<220:10:07, 57.78it/s]

////////////////////////
Average reward: 0.0678 
Average loss: 0.034516 
Average Q: 8.896744

Avg. Ep. Reward: 2.6248 
Max Ep. Reward: 14.0000 
Min Ep. Reward: 0.0000 
# Game: 1290
[*] Saving checkpoints...


  8%|█▏             | 4149997/49900000 [19:19:34<229:34:55, 55.35it/s]

////////////////////////
Average reward: 0.0683 
Average loss: 0.034768 
Average Q: 8.878876

Avg. Ep. Reward: 2.6610 
Max Ep. Reward: 12.0000 
Min Ep. Reward: 0.0000 
# Game: 1283
[*] Saving checkpoints...


  8%|█▎             | 4199995/49900000 [19:34:10<211:24:18, 60.05it/s]

////////////////////////
Average reward: 0.0675 
Average loss: 0.034452 
Average Q: 8.859573

Avg. Ep. Reward: 2.6054 
Max Ep. Reward: 13.0000 
Min Ep. Reward: 0.0000 
# Game: 1295
[*] Saving checkpoints...


  9%|█▎             | 4249997/49900000 [19:48:42<227:17:33, 55.79it/s]

////////////////////////
Average reward: 0.0666 
Average loss: 0.034611 
Average Q: 8.882112

Avg. Ep. Reward: 2.5254 
Max Ep. Reward: 14.0000 
Min Ep. Reward: 0.0000 
# Game: 1319
[*] Saving checkpoints...


  9%|█▎             | 4299997/49900000 [20:03:15<216:00:38, 58.64it/s]

////////////////////////
Average reward: 0.0687 
Average loss: 0.034179 
Average Q: 8.856148

Avg. Ep. Reward: 2.6897 
Max Ep. Reward: 11.0000 
Min Ep. Reward: 0.0000 
# Game: 1276
[*] Saving checkpoints...


  9%|█▎             | 4349993/49900000 [20:17:52<218:09:56, 58.00it/s]

////////////////////////
Average reward: 0.0690 
Average loss: 0.033791 
Average Q: 8.829531

Avg. Ep. Reward: 2.7598 
Max Ep. Reward: 16.0000 
Min Ep. Reward: 0.0000 
# Game: 1249
[*] Saving checkpoints...


  9%|█▎             | 4399997/49900000 [20:32:26<217:07:28, 58.21it/s]

////////////////////////
Average reward: 0.0681 
Average loss: 0.034341 
Average Q: 8.786747

Avg. Ep. Reward: 2.5768 
Max Ep. Reward: 12.0000 
Min Ep. Reward: 0.0000 
# Game: 1321
[*] Saving checkpoints...


  9%|█▎             | 4449995/49900000 [20:47:02<230:09:34, 54.85it/s]

////////////////////////
Average reward: 0.0678 
Average loss: 0.034645 
Average Q: 8.743765

Avg. Ep. Reward: 2.6599 
Max Ep. Reward: 11.0000 
Min Ep. Reward: 0.0000 
# Game: 1273
[*] Saving checkpoints...


  9%|█▎             | 4499993/49900000 [21:01:35<216:29:47, 58.25it/s]

////////////////////////
Average reward: 0.0688 
Average loss: 0.034203 
Average Q: 8.664640

Avg. Ep. Reward: 2.6553 
Max Ep. Reward: 13.0000 
Min Ep. Reward: 0.0000 
# Game: 1294
[*] Saving checkpoints...


  9%|█▎             | 4549995/49900000 [21:16:03<223:58:29, 56.24it/s]

////////////////////////
Average reward: 0.0684 
Average loss: 0.034236 
Average Q: 8.625840

Avg. Ep. Reward: 2.6688 
Max Ep. Reward: 12.0000 
Min Ep. Reward: 0.0000 
# Game: 1280
[*] Saving checkpoints...


  9%|█▍             | 4599996/49900000 [21:30:37<208:12:58, 60.43it/s]

////////////////////////
Average reward: 0.0685 
Average loss: 0.034119 
Average Q: 8.589478

Avg. Ep. Reward: 2.6977 
Max Ep. Reward: 14.0000 
Min Ep. Reward: 0.0000 
# Game: 1267
[*] Saving checkpoints...


  9%|█▍             | 4649999/49900000 [21:45:11<212:40:35, 59.10it/s]

////////////////////////
Average reward: 0.0683 
Average loss: 0.034213 
Average Q: 8.553996

Avg. Ep. Reward: 2.7044 
Max Ep. Reward: 14.0000 
Min Ep. Reward: 0.0000 
# Game: 1262
[*] Saving checkpoints...


  9%|█▍             | 4699996/49900000 [21:59:44<206:53:37, 60.69it/s]

////////////////////////
Average reward: 0.0674 
Average loss: 0.034183 
Average Q: 8.447070

Avg. Ep. Reward: 2.5816 
Max Ep. Reward: 13.0000 
Min Ep. Reward: 0.0000 
# Game: 1305
[*] Saving checkpoints...


 10%|█▍             | 4749993/49900000 [22:14:15<223:59:32, 55.99it/s]

////////////////////////
Average reward: 0.0680 
Average loss: 0.034471 
Average Q: 8.393321

Avg. Ep. Reward: 2.6709 
Max Ep. Reward: 15.0000 
Min Ep. Reward: 0.0000 
# Game: 1273
[*] Saving checkpoints...


 10%|█▍             | 4799996/49900000 [22:28:51<204:35:38, 61.23it/s]

////////////////////////
Average reward: 0.0682 
Average loss: 0.034997 
Average Q: 8.354055

Avg. Ep. Reward: 2.6524 
Max Ep. Reward: 14.0000 
Min Ep. Reward: 0.0000 
# Game: 1286
[*] Saving checkpoints...


 10%|█▍             | 4849995/49900000 [22:43:23<219:53:12, 56.91it/s]

////////////////////////
Average reward: 0.0680 
Average loss: 0.033830 
Average Q: 8.312289

Avg. Ep. Reward: 2.6472 
Max Ep. Reward: 14.0000 
Min Ep. Reward: 0.0000 
# Game: 1284
[*] Saving checkpoints...


 10%|█▍             | 4899997/49900000 [22:57:54<220:42:00, 56.64it/s]

////////////////////////
Average reward: 0.0680 
Average loss: 0.034456 
Average Q: 8.238815

Avg. Ep. Reward: 2.6811 
Max Ep. Reward: 11.0000 
Min Ep. Reward: 0.0000 
# Game: 1267
[*] Saving checkpoints...


 10%|█▍             | 4949997/49900000 [23:12:29<227:50:30, 54.80it/s]

////////////////////////
Average reward: 0.0684 
Average loss: 0.034049 
Average Q: 8.168776

Avg. Ep. Reward: 2.7352 
Max Ep. Reward: 13.0000 
Min Ep. Reward: 0.0000 
# Game: 1250
[*] Saving checkpoints...


 10%|█▌             | 4999996/49900000 [23:27:07<212:54:49, 58.58it/s]

////////////////////////
Average reward: 0.0692 
Average loss: 0.033490 
Average Q: 8.133943

Avg. Ep. Reward: 2.7731 
Max Ep. Reward: 14.0000 
Min Ep. Reward: 0.0000 
# Game: 1247
[*] Saving checkpoints...


 10%|█▌             | 5049997/49900000 [23:41:40<224:41:51, 55.44it/s]

////////////////////////
Average reward: 0.0694 
Average loss: 0.033674 
Average Q: 8.111031

Avg. Ep. Reward: 2.8202 
Max Ep. Reward: 13.0000 
Min Ep. Reward: 0.0000 
# Game: 1229
[*] Saving checkpoints...


 10%|█▌             | 5099993/49900000 [23:56:11<216:39:58, 57.44it/s]

////////////////////////
Average reward: 0.0689 
Average loss: 0.034131 
Average Q: 8.068065

Avg. Ep. Reward: 2.8031 
Max Ep. Reward: 13.0000 
Min Ep. Reward: 0.0000 
# Game: 1229
[*] Saving checkpoints...


 10%|█▌             | 5149995/49900000 [24:10:50<216:17:08, 57.47it/s]

////////////////////////
Average reward: 0.0685 
Average loss: 0.033459 
Average Q: 8.014034

Avg. Ep. Reward: 2.7760 
Max Ep. Reward: 15.0000 
Min Ep. Reward: 0.0000 
# Game: 1232
[*] Saving checkpoints...


 10%|█▌             | 5199993/49900000 [24:25:19<228:23:05, 54.37it/s]

////////////////////////
Average reward: 0.0681 
Average loss: 0.033258 
Average Q: 7.987269

Avg. Ep. Reward: 2.7460 
Max Ep. Reward: 13.0000 
Min Ep. Reward: 0.0000 
# Game: 1240
[*] Saving checkpoints...


 11%|█▌             | 5249997/49900000 [24:39:51<225:46:45, 54.93it/s]

////////////////////////
Average reward: 0.0694 
Average loss: 0.033303 
Average Q: 7.983484

Avg. Ep. Reward: 2.8157 
Max Ep. Reward: 13.0000 
Min Ep. Reward: 0.0000 
# Game: 1232
[*] Saving checkpoints...


 11%|█▌             | 5299993/49900000 [24:54:24<212:18:23, 58.35it/s]

////////////////////////
Average reward: 0.0691 
Average loss: 0.033324 
Average Q: 7.982246

Avg. Ep. Reward: 2.7630 
Max Ep. Reward: 12.0000 
Min Ep. Reward: 0.0000 
# Game: 1249
[*] Saving checkpoints...


 11%|█▌             | 5349999/49900000 [25:08:56<202:19:47, 61.16it/s]

////////////////////////
Average reward: 0.0698 
Average loss: 0.033317 
Average Q: 7.953708

Avg. Ep. Reward: 2.8512 
Max Ep. Reward: 12.0000 
Min Ep. Reward: 0.0000 
# Game: 1223
[*] Saving checkpoints...


 11%|█▌             | 5399993/49900000 [25:23:28<209:43:14, 58.94it/s]

////////////////////////
Average reward: 0.0695 
Average loss: 0.032408 
Average Q: 7.932187

Avg. Ep. Reward: 2.8870 
Max Ep. Reward: 17.0000 
Min Ep. Reward: 0.0000 
# Game: 1204
[*] Saving checkpoints...


 11%|█▋             | 5449997/49900000 [25:38:01<216:13:08, 57.11it/s]

////////////////////////
Average reward: 0.0702 
Average loss: 0.033272 
Average Q: 7.885437

Avg. Ep. Reward: 2.8977 
Max Ep. Reward: 19.0000 
Min Ep. Reward: 0.0000 
# Game: 1212
[*] Saving checkpoints...


 11%|█▋             | 5499993/49900000 [25:52:36<213:30:28, 57.77it/s]

////////////////////////
Average reward: 0.0698 
Average loss: 0.033419 
Average Q: 7.840991

Avg. Ep. Reward: 2.8723 
Max Ep. Reward: 13.0000 
Min Ep. Reward: 0.0000 
# Game: 1214
[*] Saving checkpoints...


 11%|█▋             | 5549994/49900000 [26:07:08<227:02:03, 54.26it/s]

////////////////////////
Average reward: 0.0688 
Average loss: 0.033468 
Average Q: 7.824626

Avg. Ep. Reward: 2.7906 
Max Ep. Reward: 12.0000 
Min Ep. Reward: 0.0000 
# Game: 1232
[*] Saving checkpoints...


 11%|█▋             | 5599993/49900000 [26:21:42<221:26:47, 55.57it/s]

////////////////////////
Average reward: 0.0697 
Average loss: 0.032428 
Average Q: 7.815013

Avg. Ep. Reward: 2.8881 
Max Ep. Reward: 13.0000 
Min Ep. Reward: 0.0000 
# Game: 1206
[*] Saving checkpoints...


 11%|█▋             | 5649993/49900000 [26:36:16<226:13:45, 54.33it/s]

////////////////////////
Average reward: 0.0693 
Average loss: 0.032767 
Average Q: 7.758051

Avg. Ep. Reward: 2.8402 
Max Ep. Reward: 14.0000 
Min Ep. Reward: 0.0000 
# Game: 1220
[*] Saving checkpoints...


 11%|█▋             | 5699993/49900000 [26:50:46<221:48:12, 55.35it/s]

////////////////////////
Average reward: 0.0692 
Average loss: 0.033065 
Average Q: 7.759636

Avg. Ep. Reward: 2.8023 
Max Ep. Reward: 13.0000 
Min Ep. Reward: 0.0000 
# Game: 1234
[*] Saving checkpoints...


 12%|█▋             | 5749993/49900000 [27:05:22<219:52:34, 55.78it/s]

////////////////////////
Average reward: 0.0689 
Average loss: 0.032596 
Average Q: 7.726442

Avg. Ep. Reward: 2.8788 
Max Ep. Reward: 14.0000 
Min Ep. Reward: 0.0000 
# Game: 1196
[*] Saving checkpoints...


 12%|█▋             | 5799999/49900000 [27:19:57<205:37:42, 59.57it/s]

////////////////////////
Average reward: 0.0707 
Average loss: 0.032778 
Average Q: 7.669216

Avg. Ep. Reward: 2.9689 
Max Ep. Reward: 18.0000 
Min Ep. Reward: 0.0000 
# Game: 1189
[*] Saving checkpoints...


 12%|█▊             | 5849995/49900000 [27:34:28<210:42:32, 58.07it/s]

////////////////////////
Average reward: 0.0691 
Average loss: 0.031778 
Average Q: 7.639857

Avg. Ep. Reward: 2.7413 
Max Ep. Reward: 14.0000 
Min Ep. Reward: 0.0000 
# Game: 1260
[*] Saving checkpoints...


 12%|█▊             | 5899993/49900000 [27:48:54<201:47:02, 60.57it/s]

////////////////////////
Average reward: 0.0708 
Average loss: 0.032399 
Average Q: 7.621898

Avg. Ep. Reward: 2.9690 
Max Ep. Reward: 17.0000 
Min Ep. Reward: 0.0000 
# Game: 1193
[*] Saving checkpoints...


 12%|█▊             | 5949995/49900000 [28:03:19<209:02:13, 58.40it/s]

////////////////////////
Average reward: 0.0710 
Average loss: 0.031905 
Average Q: 7.605235

Avg. Ep. Reward: 2.9907 
Max Ep. Reward: 14.0000 
Min Ep. Reward: 0.0000 
# Game: 1186
[*] Saving checkpoints...


 12%|█▊             | 5999993/49900000 [28:17:51<224:06:54, 54.41it/s]

////////////////////////
Average reward: 0.0695 
Average loss: 0.031570 
Average Q: 7.590341

Avg. Ep. Reward: 2.8412 
Max Ep. Reward: 12.0000 
Min Ep. Reward: 0.0000 
# Game: 1222
[*] Saving checkpoints...


 12%|█▊             | 6049997/49900000 [28:32:20<213:51:49, 56.95it/s]

////////////////////////
Average reward: 0.0699 
Average loss: 0.031332 
Average Q: 7.595337

Avg. Ep. Reward: 2.8931 
Max Ep. Reward: 14.0000 
Min Ep. Reward: 0.0000 
# Game: 1207
[*] Saving checkpoints...


 12%|█▊             | 6099997/49900000 [28:46:49<213:25:37, 57.01it/s]

////////////////////////
Average reward: 0.0705 
Average loss: 0.031463 
Average Q: 7.600688

Avg. Ep. Reward: 2.9822 
Max Ep. Reward: 13.0000 
Min Ep. Reward: 0.0000 
# Game: 1182
[*] Saving checkpoints...


 12%|█▊             | 6149993/49900000 [29:01:20<226:11:24, 53.73it/s]

////////////////////////
Average reward: 0.0712 
Average loss: 0.031596 
Average Q: 7.581820

Avg. Ep. Reward: 3.0315 
Max Ep. Reward: 14.0000 
Min Ep. Reward: 0.0000 
# Game: 1174
[*] Saving checkpoints...


 12%|█▊             | 6199997/49900000 [29:15:53<213:19:31, 56.90it/s]

////////////////////////
Average reward: 0.0700 
Average loss: 0.031904 
Average Q: 7.543655

Avg. Ep. Reward: 2.9183 
Max Ep. Reward: 14.0000 
Min Ep. Reward: 0.0000 
# Game: 1200
[*] Saving checkpoints...


 13%|█▉             | 6249994/49900000 [29:30:21<222:30:21, 54.49it/s]

////////////////////////
Average reward: 0.0711 
Average loss: 0.031843 
Average Q: 7.516218

Avg. Ep. Reward: 2.9992 
Max Ep. Reward: 15.0000 
Min Ep. Reward: 0.0000 
# Game: 1184
[*] Saving checkpoints...


 13%|█▉             | 6299993/49900000 [29:44:50<213:50:16, 56.64it/s]

////////////////////////
Average reward: 0.0709 
Average loss: 0.031309 
Average Q: 7.501994

Avg. Ep. Reward: 2.9427 
Max Ep. Reward: 13.0000 
Min Ep. Reward: 0.0000 
# Game: 1205
[*] Saving checkpoints...


 13%|█▉             | 6349995/49900000 [29:59:18<201:21:45, 60.08it/s]

////////////////////////
Average reward: 0.0705 
Average loss: 0.030794 
Average Q: 7.481183

Avg. Ep. Reward: 2.9318 
Max Ep. Reward: 14.0000 
Min Ep. Reward: 0.0000 
# Game: 1202
[*] Saving checkpoints...


 13%|█▉             | 6399997/49900000 [30:13:51<212:05:48, 56.97it/s]

////////////////////////
Average reward: 0.0718 
Average loss: 0.031353 
Average Q: 7.463123

Avg. Ep. Reward: 3.1400 
Max Ep. Reward: 14.0000 
Min Ep. Reward: 0.0000 
# Game: 1143
[*] Saving checkpoints...


 13%|█▉             | 6449997/49900000 [30:28:23<208:43:47, 57.82it/s]

////////////////////////
Average reward: 0.0717 
Average loss: 0.031369 
Average Q: 7.472600

Avg. Ep. Reward: 3.1782 
Max Ep. Reward: 15.0000 
Min Ep. Reward: 0.0000 
# Game: 1128
[*] Saving checkpoints...


 13%|█▉             | 6499996/49900000 [30:42:48<202:56:34, 59.40it/s]

////////////////////////
Average reward: 0.0702 
Average loss: 0.030890 
Average Q: 7.476440

Avg. Ep. Reward: 2.9796 
Max Ep. Reward: 15.0000 
Min Ep. Reward: 0.0000 
# Game: 1178
[*] Saving checkpoints...


 13%|█▉             | 6549993/49900000 [30:57:18<215:20:51, 55.92it/s]

////////////////////////
Average reward: 0.0712 
Average loss: 0.030683 
Average Q: 7.487874

Avg. Ep. Reward: 3.1120 
Max Ep. Reward: 15.0000 
Min Ep. Reward: 0.0000 
# Game: 1143
[*] Saving checkpoints...


 13%|█▉             | 6599993/49900000 [31:11:46<214:21:33, 56.11it/s]

////////////////////////
Average reward: 0.0720 
Average loss: 0.031389 
Average Q: 7.483153

Avg. Ep. Reward: 3.1793 
Max Ep. Reward: 14.0000 
Min Ep. Reward: 0.0000 
# Game: 1132
[*] Saving checkpoints...


 13%|█▉             | 6649994/49900000 [31:26:17<221:48:13, 54.16it/s]

////////////////////////
Average reward: 0.0723 
Average loss: 0.031054 
Average Q: 7.477340

Avg. Ep. Reward: 3.1815 
Max Ep. Reward: 16.0000 
Min Ep. Reward: 0.0000 
# Game: 1135
[*] Saving checkpoints...


 13%|██             | 6699993/49900000 [31:40:49<198:47:13, 60.37it/s]

////////////////////////
Average reward: 0.0731 
Average loss: 0.030665 
Average Q: 7.471228

Avg. Ep. Reward: 3.2433 
Max Ep. Reward: 17.0000 
Min Ep. Reward: 0.0000 
# Game: 1126
[*] Saving checkpoints...


 14%|██             | 6749996/49900000 [31:55:18<205:09:16, 58.42it/s]

////////////////////////
Average reward: 0.0733 
Average loss: 0.030236 
Average Q: 7.458659

Avg. Ep. Reward: 3.3128 
Max Ep. Reward: 19.0000 
Min Ep. Reward: 0.0000 
# Game: 1106
[*] Saving checkpoints...


 14%|██             | 6799993/49900000 [32:09:53<219:29:54, 54.54it/s]

////////////////////////
Average reward: 0.0721 
Average loss: 0.030985 
Average Q: 7.480791

Avg. Ep. Reward: 3.1223 
Max Ep. Reward: 17.0000 
Min Ep. Reward: 0.0000 
# Game: 1153
[*] Saving checkpoints...


 14%|██             | 6849997/49900000 [32:24:24<212:42:57, 56.22it/s]

////////////////////////
Average reward: 0.0733 
Average loss: 0.030635 
Average Q: 7.473246

Avg. Ep. Reward: 3.2453 
Max Ep. Reward: 16.0000 
Min Ep. Reward: 0.0000 
# Game: 1129
[*] Saving checkpoints...


 14%|██             | 6899997/49900000 [32:38:51<207:13:39, 57.64it/s]

////////////////////////
Average reward: 0.0716 
Average loss: 0.030316 
Average Q: 7.449569

Avg. Ep. Reward: 3.0599 
Max Ep. Reward: 15.0000 
Min Ep. Reward: 0.0000 
# Game: 1169
[*] Saving checkpoints...


 14%|██             | 6949997/49900000 [32:53:20<220:09:27, 54.19it/s]

////////////////////////
Average reward: 0.0719 
Average loss: 0.030314 
Average Q: 7.425559

Avg. Ep. Reward: 3.1796 
Max Ep. Reward: 14.0000 
Min Ep. Reward: 0.0000 
# Game: 1130
[*] Saving checkpoints...


 14%|██             | 6999995/49900000 [33:07:49<213:02:06, 55.94it/s]

////////////////////////
Average reward: 0.0720 
Average loss: 0.029841 
Average Q: 7.418619

Avg. Ep. Reward: 3.2450 
Max Ep. Reward: 19.0000 
Min Ep. Reward: 0.0000 
# Game: 1110
[*] Saving checkpoints...


 14%|██             | 7049997/49900000 [33:22:23<218:56:40, 54.36it/s]

////////////////////////
Average reward: 0.0727 
Average loss: 0.029486 
Average Q: 7.397222

Avg. Ep. Reward: 3.2157 
Max Ep. Reward: 18.0000 
Min Ep. Reward: 0.0000 
# Game: 1131
[*] Saving checkpoints...


 14%|██▏            | 7099993/49900000 [33:36:49<213:30:03, 55.69it/s]

////////////////////////
Average reward: 0.0723 
Average loss: 0.030553 
Average Q: 7.374756

Avg. Ep. Reward: 3.3208 
Max Ep. Reward: 22.0000 
Min Ep. Reward: 0.0000 
# Game: 1088
[*] Saving checkpoints...


 14%|██▏            | 7149993/49900000 [33:51:17<199:54:56, 59.40it/s]

////////////////////////
Average reward: 0.0720 
Average loss: 0.029949 
Average Q: 7.381688

Avg. Ep. Reward: 3.2039 
Max Ep. Reward: 14.0000 
Min Ep. Reward: 0.0000 
# Game: 1123
[*] Saving checkpoints...


 14%|██▏            | 7199996/49900000 [34:05:50<194:31:47, 60.97it/s]

////////////////////////
Average reward: 0.0737 
Average loss: 0.029769 
Average Q: 7.349531

Avg. Ep. Reward: 3.3842 
Max Ep. Reward: 16.0000 
Min Ep. Reward: 0.0000 
# Game: 1088
[*] Saving checkpoints...


 15%|██▏            | 7249993/49900000 [34:20:16<199:03:32, 59.52it/s]

////////////////////////
Average reward: 0.0727 
Average loss: 0.029661 
Average Q: 7.332436

Avg. Ep. Reward: 3.1284 
Max Ep. Reward: 19.0000 
Min Ep. Reward: 0.0000 
# Game: 1160
[*] Saving checkpoints...


 15%|██▏            | 7299995/49900000 [34:34:45<198:26:05, 59.63it/s]

////////////////////////
Average reward: 0.0725 
Average loss: 0.029771 
Average Q: 7.339198

Avg. Ep. Reward: 3.1835 
Max Ep. Reward: 17.0000 
Min Ep. Reward: 0.0000 
# Game: 1139
[*] Saving checkpoints...


 15%|██▏            | 7349993/49900000 [34:49:11<205:00:34, 57.65it/s]

////////////////////////
Average reward: 0.0730 
Average loss: 0.029566 
Average Q: 7.353966

Avg. Ep. Reward: 3.2822 
Max Ep. Reward: 15.0000 
Min Ep. Reward: 0.0000 
# Game: 1109
[*] Saving checkpoints...


 15%|██▏            | 7399997/49900000 [35:03:45<213:30:27, 55.29it/s]

////////////////////////
Average reward: 0.0727 
Average loss: 0.029573 
Average Q: 7.393683

Avg. Ep. Reward: 3.2435 
Max Ep. Reward: 14.0000 
Min Ep. Reward: 0.0000 
# Game: 1121
[*] Saving checkpoints...


 15%|██▏            | 7449993/49900000 [35:18:19<207:04:30, 56.94it/s]

////////////////////////
Average reward: 0.0721 
Average loss: 0.029401 
Average Q: 7.410358

Avg. Ep. Reward: 3.2159 
Max Ep. Reward: 15.0000 
Min Ep. Reward: 0.0000 
# Game: 1121
[*] Saving checkpoints...


 15%|██▎            | 7499997/49900000 [35:32:50<202:02:38, 58.29it/s]

////////////////////////
Average reward: 0.0739 
Average loss: 0.029378 
Average Q: 7.401353

Avg. Ep. Reward: 3.4031 
Max Ep. Reward: 23.0000 
Min Ep. Reward: 0.0000 
# Game: 1084
[*] Saving checkpoints...


 15%|██▎            | 7549997/49900000 [35:47:25<201:08:38, 58.48it/s]

////////////////////////
Average reward: 0.0723 
Average loss: 0.029873 
Average Q: 7.413193

Avg. Ep. Reward: 3.2080 
Max Ep. Reward: 14.0000 
Min Ep. Reward: 0.0000 
# Game: 1125
[*] Saving checkpoints...


 15%|██▎            | 7599997/49900000 [36:01:50<206:53:23, 56.79it/s]

////////////////////////
Average reward: 0.0731 
Average loss: 0.029699 
Average Q: 7.415067

Avg. Ep. Reward: 3.2290 
Max Ep. Reward: 17.0000 
Min Ep. Reward: 0.0000 
# Game: 1131
[*] Saving checkpoints...


 15%|██▎            | 7649997/49900000 [36:16:22<201:19:43, 58.29it/s]

////////////////////////
Average reward: 0.0735 
Average loss: 0.029433 
Average Q: 7.430888

Avg. Ep. Reward: 3.2736 
Max Ep. Reward: 14.0000 
Min Ep. Reward: 0.0000 
# Game: 1122
[*] Saving checkpoints...


 15%|██▎            | 7699997/49900000 [36:30:55<198:22:14, 59.09it/s]

////////////////////////
Average reward: 0.0725 
Average loss: 0.030054 
Average Q: 7.430108

Avg. Ep. Reward: 3.2808 
Max Ep. Reward: 16.0000 
Min Ep. Reward: 0.0000 
# Game: 1104
[*] Saving checkpoints...


 16%|██▎            | 7749997/49900000 [36:45:27<200:18:08, 58.45it/s]

////////////////////////
Average reward: 0.0727 
Average loss: 0.029760 
Average Q: 7.466563

Avg. Ep. Reward: 3.2659 
Max Ep. Reward: 18.0000 
Min Ep. Reward: 0.0000 
# Game: 1113
[*] Saving checkpoints...


 16%|██▎            | 7799997/49900000 [37:00:04<194:42:51, 60.06it/s]

////////////////////////
Average reward: 0.0746 
Average loss: 0.029266 
Average Q: 7.467955

Avg. Ep. Reward: 3.5525 
Max Ep. Reward: 19.0000 
Min Ep. Reward: 0.0000 
# Game: 1048
[*] Saving checkpoints...


 16%|██▎            | 7849993/49900000 [37:14:32<201:49:53, 57.87it/s]

////////////////////////
Average reward: 0.0723 
Average loss: 0.029239 
Average Q: 7.477533

Avg. Ep. Reward: 3.2043 
Max Ep. Reward: 18.0000 
Min Ep. Reward: 0.0000 
# Game: 1126
[*] Saving checkpoints...


 16%|██▎            | 7899997/49900000 [37:29:07<199:48:36, 58.39it/s]

////////////////////////
Average reward: 0.0726 
Average loss: 0.029550 
Average Q: 7.478022

Avg. Ep. Reward: 3.2371 
Max Ep. Reward: 14.0000 
Min Ep. Reward: 0.0000 
# Game: 1122
[*] Saving checkpoints...


 16%|██▍            | 7949997/49900000 [37:43:32<202:33:53, 57.53it/s]

////////////////////////
Average reward: 0.0731 
Average loss: 0.029415 
Average Q: 7.496960

Avg. Ep. Reward: 3.2567 
Max Ep. Reward: 16.0000 
Min Ep. Reward: 0.0000 
# Game: 1122
[*] Saving checkpoints...


 16%|██▍            | 7999995/49900000 [37:58:04<197:07:37, 59.04it/s]

////////////////////////
Average reward: 0.0737 
Average loss: 0.029136 
Average Q: 7.489504

Avg. Ep. Reward: 3.3473 
Max Ep. Reward: 16.0000 
Min Ep. Reward: 0.0000 
# Game: 1100
[*] Saving checkpoints...


 16%|██▍            | 8049993/49900000 [38:12:33<195:25:09, 59.49it/s]

////////////////////////
Average reward: 0.0727 
Average loss: 0.029640 
Average Q: 7.504959

Avg. Ep. Reward: 3.2382 
Max Ep. Reward: 18.0000 
Min Ep. Reward: 0.0000 
# Game: 1121
[*] Saving checkpoints...


 16%|██▍            | 8100006/49900000 [38:27:06<229:57:11, 50.49it/s]

////////////////////////
Average reward: 0.0721 
Average loss: 0.028775 
Average Q: 7.519523

Avg. Ep. Reward: 3.1783 
Max Ep. Reward: 16.0000 
Min Ep. Reward: 0.0000 
# Game: 1133


 16%|██▍            | 8149993/49900000 [38:41:34<197:49:54, 58.62it/s]

////////////////////////
Average reward: 0.0736 
Average loss: 0.029335 
Average Q: 7.511160

Avg. Ep. Reward: 3.4410 
Max Ep. Reward: 16.0000 
Min Ep. Reward: 0.0000 
# Game: 1068
[*] Saving checkpoints...


 16%|██▍            | 8199999/49900000 [38:56:00<200:32:42, 57.76it/s]

////////////////////////
Average reward: 0.0726 
Average loss: 0.029220 
Average Q: 7.507248

Avg. Ep. Reward: 3.2393 
Max Ep. Reward: 15.0000 
Min Ep. Reward: 0.0000 
# Game: 1120
[*] Saving checkpoints...


 17%|██▍            | 8249996/49900000 [39:10:30<184:07:19, 62.84it/s]

////////////////////////
Average reward: 0.0741 
Average loss: 0.030033 
Average Q: 7.522119

Avg. Ep. Reward: 3.4849 
Max Ep. Reward: 18.0000 
Min Ep. Reward: 0.0000 
# Game: 1062
[*] Saving checkpoints...


 17%|██▍            | 8299997/49900000 [39:25:03<215:08:23, 53.71it/s]

////////////////////////
Average reward: 0.0734 
Average loss: 0.029597 
Average Q: 7.527287

Avg. Ep. Reward: 3.3608 
Max Ep. Reward: 16.0000 
Min Ep. Reward: 0.0000 
# Game: 1092
[*] Saving checkpoints...


 17%|██▌            | 8349997/49900000 [39:39:35<202:37:18, 56.96it/s]

////////////////////////
Average reward: 0.0719 
Average loss: 0.029588 
Average Q: 7.528466

Avg. Ep. Reward: 3.2002 
Max Ep. Reward: 16.0000 
Min Ep. Reward: 0.0000 
# Game: 1124
[*] Saving checkpoints...


 17%|██▌            | 8399998/49900000 [39:54:01<216:18:13, 53.29it/s]

////////////////////////
Average reward: 0.0741 
Average loss: 0.029514 
Average Q: 7.559822

Avg. Ep. Reward: 3.5185 
Max Ep. Reward: 15.0000 
Min Ep. Reward: 0.0000 
# Game: 1053
[*] Saving checkpoints...


 17%|██▌            | 8449997/49900000 [40:08:32<198:04:41, 58.13it/s]

////////////////////////
Average reward: 0.0734 
Average loss: 0.029765 
Average Q: 7.584988

Avg. Ep. Reward: 3.3258 
Max Ep. Reward: 17.0000 
Min Ep. Reward: 0.0000 
# Game: 1102
[*] Saving checkpoints...


 17%|██▌            | 8499997/49900000 [40:23:05<205:54:22, 55.85it/s]

////////////////////////
Average reward: 0.0726 
Average loss: 0.030210 
Average Q: 7.606045

Avg. Ep. Reward: 3.2117 
Max Ep. Reward: 16.0000 
Min Ep. Reward: 0.0000 
# Game: 1129
[*] Saving checkpoints...


 17%|██▌            | 8549993/49900000 [40:37:35<197:58:48, 58.02it/s]

////////////////////////
Average reward: 0.0730 
Average loss: 0.029986 
Average Q: 7.635607

Avg. Ep. Reward: 3.2115 
Max Ep. Reward: 22.0000 
Min Ep. Reward: 0.0000 
# Game: 1135
[*] Saving checkpoints...


 17%|██▌            | 8599997/49900000 [40:52:08<203:54:32, 56.26it/s]

////////////////////////
Average reward: 0.0732 
Average loss: 0.030008 
Average Q: 7.645071

Avg. Ep. Reward: 3.3631 
Max Ep. Reward: 22.0000 
Min Ep. Reward: 0.0000 
# Game: 1088
[*] Saving checkpoints...


 17%|██▌            | 8649994/49900000 [41:06:43<212:12:04, 54.00it/s]

////////////////////////
Average reward: 0.0726 
Average loss: 0.030125 
Average Q: 7.634469

Avg. Ep. Reward: 3.2039 
Max Ep. Reward: 18.0000 
Min Ep. Reward: 0.0000 
# Game: 1133
[*] Saving checkpoints...


 17%|██▌            | 8699993/49900000 [41:21:10<186:50:14, 61.25it/s]

////////////////////////
Average reward: 0.0737 
Average loss: 0.030066 
Average Q: 7.637747

Avg. Ep. Reward: 3.5081 
Max Ep. Reward: 19.0000 
Min Ep. Reward: 0.0000 
# Game: 1049
[*] Saving checkpoints...


 18%|██▋            | 8749997/49900000 [41:35:36<192:23:51, 59.41it/s]

////////////////////////
Average reward: 0.0741 
Average loss: 0.030531 
Average Q: 7.654902

Avg. Ep. Reward: 3.4686 
Max Ep. Reward: 20.0000 
Min Ep. Reward: 0.0000 
# Game: 1067
[*] Saving checkpoints...


 18%|██▋            | 8799993/49900000 [41:50:13<198:59:53, 57.37it/s]

////////////////////////
Average reward: 0.0730 
Average loss: 0.030044 
Average Q: 7.666257

Avg. Ep. Reward: 3.3266 
Max Ep. Reward: 17.0000 
Min Ep. Reward: 0.0000 
# Game: 1096
[*] Saving checkpoints...


 18%|██▋            | 8849997/49900000 [42:04:50<212:36:08, 53.63it/s]

////////////////////////
Average reward: 0.0734 
Average loss: 0.030249 
Average Q: 7.700129

Avg. Ep. Reward: 3.3611 
Max Ep. Reward: 18.0000 
Min Ep. Reward: 0.0000 
# Game: 1091
[*] Saving checkpoints...


 18%|██▋            | 8899993/49900000 [42:19:24<198:18:20, 57.43it/s]

////////////////////////
Average reward: 0.0735 
Average loss: 0.029903 
Average Q: 7.736784

Avg. Ep. Reward: 3.3728 
Max Ep. Reward: 28.0000 
Min Ep. Reward: 0.0000 
# Game: 1089
[*] Saving checkpoints...


 18%|██▋            | 8949996/49900000 [42:33:53<187:05:13, 60.80it/s]

////////////////////////
Average reward: 0.0734 
Average loss: 0.030426 
Average Q: 7.775776

Avg. Ep. Reward: 3.3624 
Max Ep. Reward: 21.0000 
Min Ep. Reward: 0.0000 
# Game: 1090
[*] Saving checkpoints...


 18%|██▋            | 8999998/49900000 [42:48:27<201:55:55, 56.26it/s]

////////////////////////
Average reward: 0.0742 
Average loss: 0.029821 
Average Q: 7.806098

Avg. Ep. Reward: 3.5367 
Max Ep. Reward: 15.0000 
Min Ep. Reward: 0.0000 
# Game: 1049
[*] Saving checkpoints...


 18%|██▋            | 9049996/49900000 [43:02:56<184:22:58, 61.54it/s]

////////////////////////
Average reward: 0.0749 
Average loss: 0.029929 
Average Q: 7.802185

Avg. Ep. Reward: 3.5216 
Max Ep. Reward: 16.0000 
Min Ep. Reward: 0.0000 
# Game: 1064
[*] Saving checkpoints...


 18%|██▋            | 9099997/49900000 [43:17:25<204:50:07, 55.33it/s]

////////////////////////
Average reward: 0.0745 
Average loss: 0.029566 
Average Q: 7.816507

Avg. Ep. Reward: 3.4049 
Max Ep. Reward: 16.0000 
Min Ep. Reward: 0.0000 
# Game: 1094
[*] Saving checkpoints...


 18%|██▊            | 9149993/49900000 [43:31:56<196:48:27, 57.52it/s]

////////////////////////
Average reward: 0.0737 
Average loss: 0.030552 
Average Q: 7.825061

Avg. Ep. Reward: 3.3525 
Max Ep. Reward: 17.0000 
Min Ep. Reward: 0.0000 
# Game: 1098
[*] Saving checkpoints...


 18%|██▊            | 9199993/49900000 [43:46:24<198:22:47, 56.99it/s]

////////////////////////
Average reward: 0.0746 
Average loss: 0.029481 
Average Q: 7.844539

Avg. Ep. Reward: 3.5061 
Max Ep. Reward: 15.0000 
Min Ep. Reward: 0.0000 
# Game: 1063
[*] Saving checkpoints...


 19%|██▊            | 9249997/49900000 [44:00:48<199:10:12, 56.69it/s]

////////////////////////
Average reward: 0.0745 
Average loss: 0.029650 
Average Q: 7.875949

Avg. Ep. Reward: 3.4850 
Max Ep. Reward: 16.0000 
Min Ep. Reward: 0.0000 
# Game: 1068
[*] Saving checkpoints...


 19%|██▊            | 9299999/49900000 [44:15:19<202:03:00, 55.82it/s]

////////////////////////
Average reward: 0.0730 
Average loss: 0.029759 
Average Q: 7.940134

Avg. Ep. Reward: 3.2283 
Max Ep. Reward: 18.0000 
Min Ep. Reward: 0.0000 
# Game: 1130
[*] Saving checkpoints...


 19%|██▊            | 9349997/49900000 [44:29:54<213:38:41, 52.72it/s]

////////////////////////
Average reward: 0.0730 
Average loss: 0.029668 
Average Q: 7.996176

Avg. Ep. Reward: 3.2972 
Max Ep. Reward: 13.0000 
Min Ep. Reward: 0.0000 
# Game: 1107
[*] Saving checkpoints...


 19%|██▊            | 9399993/49900000 [44:44:30<197:29:43, 56.96it/s]

////////////////////////
Average reward: 0.0742 
Average loss: 0.030049 
Average Q: 8.022258

Avg. Ep. Reward: 3.4530 
Max Ep. Reward: 25.0000 
Min Ep. Reward: 0.0000 
# Game: 1075
[*] Saving checkpoints...


 19%|██▊            | 9449998/49900000 [44:59:02<207:50:45, 54.06it/s]

////////////////////////
Average reward: 0.0730 
Average loss: 0.030271 
Average Q: 8.057731

Avg. Ep. Reward: 3.2589 
Max Ep. Reward: 17.0000 
Min Ep. Reward: 0.0000 
# Game: 1120
[*] Saving checkpoints...


 19%|██▊            | 9499999/49900000 [45:13:38<195:04:12, 57.53it/s]

////////////////////////
Average reward: 0.0741 
Average loss: 0.030401 
Average Q: 8.103188

Avg. Ep. Reward: 3.3929 
Max Ep. Reward: 21.0000 
Min Ep. Reward: 0.0000 
# Game: 1092
[*] Saving checkpoints...


 19%|██▊            | 9549999/49900000 [45:28:15<191:26:51, 58.55it/s]

////////////////////////
Average reward: 0.0730 
Average loss: 0.030756 
Average Q: 8.127315

Avg. Ep. Reward: 3.2169 
Max Ep. Reward: 15.0000 
Min Ep. Reward: 0.0000 
# Game: 1134
[*] Saving checkpoints...


 19%|██▉            | 9599997/49900000 [45:42:44<206:37:23, 54.18it/s]

////////////////////////
Average reward: 0.0739 
Average loss: 0.030002 
Average Q: 8.162812

Avg. Ep. Reward: 3.4404 
Max Ep. Reward: 17.0000 
Min Ep. Reward: 0.0000 
# Game: 1074
[*] Saving checkpoints...


 19%|██▉            | 9649995/49900000 [45:57:17<196:22:04, 56.94it/s]

////////////////////////
Average reward: 0.0736 
Average loss: 0.030468 
Average Q: 8.172533

Avg. Ep. Reward: 3.4083 
Max Ep. Reward: 17.0000 
Min Ep. Reward: 0.0000 
# Game: 1080
[*] Saving checkpoints...


 19%|██▉            | 9699993/49900000 [46:11:47<195:01:58, 57.26it/s]

////////////////////////
Average reward: 0.0738 
Average loss: 0.030258 
Average Q: 8.168396

Avg. Ep. Reward: 3.5311 
Max Ep. Reward: 18.0000 
Min Ep. Reward: 0.0000 
# Game: 1045
[*] Saving checkpoints...


 20%|██▉            | 9749995/49900000 [46:26:21<195:03:58, 57.17it/s]

////////////////////////
Average reward: 0.0738 
Average loss: 0.029678 
Average Q: 8.162366

Avg. Ep. Reward: 3.3875 
Max Ep. Reward: 14.0000 
Min Ep. Reward: 0.0000 
# Game: 1089
[*] Saving checkpoints...


 20%|██▉            | 9799993/49900000 [46:40:46<201:31:37, 55.27it/s]

////////////////////////
Average reward: 0.0738 
Average loss: 0.030006 
Average Q: 8.164784

Avg. Ep. Reward: 3.6035 
Max Ep. Reward: 16.0000 
Min Ep. Reward: 0.0000 
# Game: 1024
[*] Saving checkpoints...


 20%|██▉            | 9849997/49900000 [46:55:15<199:56:39, 55.64it/s]

////////////////////////
Average reward: 0.0741 
Average loss: 0.029775 
Average Q: 8.211530

Avg. Ep. Reward: 3.4589 
Max Ep. Reward: 18.0000 
Min Ep. Reward: 0.0000 
# Game: 1070
[*] Saving checkpoints...


 20%|██▉            | 9899995/49900000 [47:09:48<190:17:43, 58.39it/s]

////////////////////////
Average reward: 0.0737 
Average loss: 0.030676 
Average Q: 8.281117

Avg. Ep. Reward: 3.3592 
Max Ep. Reward: 18.0000 
Min Ep. Reward: 0.0000 
# Game: 1097
[*] Saving checkpoints...


 20%|██▉            | 9949997/49900000 [47:24:21<201:26:21, 55.09it/s]

////////////////////////
Average reward: 0.0749 
Average loss: 0.030068 
Average Q: 8.319118

Avg. Ep. Reward: 3.5813 
Max Ep. Reward: 23.0000 
Min Ep. Reward: 0.0000 
# Game: 1046
[*] Saving checkpoints...


 20%|███            | 9999997/49900000 [47:38:44<190:30:01, 58.18it/s]

////////////////////////
Average reward: 0.0747 
Average loss: 0.030618 
Average Q: 8.365641

Avg. Ep. Reward: 3.4698 
Max Ep. Reward: 25.0000 
Min Ep. Reward: 0.0000 
# Game: 1075
[*] Saving checkpoints...


 20%|██▊           | 10049998/49900000 [47:53:09<202:32:30, 54.65it/s]

////////////////////////
Average reward: 0.0744 
Average loss: 0.030446 
Average Q: 8.416658

Avg. Ep. Reward: 3.5794 
Max Ep. Reward: 22.0000 
Min Ep. Reward: 0.0000 
# Game: 1039
[*] Saving checkpoints...


 20%|██▊           | 10099995/49900000 [48:07:39<188:33:31, 58.63it/s]

////////////////////////
Average reward: 0.0742 
Average loss: 0.030486 
Average Q: 8.413695

Avg. Ep. Reward: 3.5066 
Max Ep. Reward: 23.0000 
Min Ep. Reward: 0.0000 
# Game: 1058
[*] Saving checkpoints...


 20%|██▊           | 10149999/49900000 [48:22:09<195:30:10, 56.48it/s]

////////////////////////
Average reward: 0.0740 
Average loss: 0.030260 
Average Q: 8.459541

Avg. Ep. Reward: 3.4859 
Max Ep. Reward: 18.0000 
Min Ep. Reward: 0.0000 
# Game: 1062
[*] Saving checkpoints...


 20%|██▊           | 10199993/49900000 [48:36:36<199:26:09, 55.29it/s]

////////////////////////
Average reward: 0.0754 
Average loss: 0.029976 
Average Q: 8.501309

Avg. Ep. Reward: 3.5490 
Max Ep. Reward: 19.0000 
Min Ep. Reward: 0.0000 
# Game: 1062
[*] Saving checkpoints...


 21%|██▉           | 10249993/49900000 [48:51:08<182:50:30, 60.24it/s]

////////////////////////
Average reward: 0.0745 
Average loss: 0.030350 
Average Q: 8.518763

Avg. Ep. Reward: 3.4353 
Max Ep. Reward: 22.0000 
Min Ep. Reward: 0.0000 
# Game: 1082
[*] Saving checkpoints...


 21%|██▉           | 10299995/49900000 [49:05:37<178:48:32, 61.52it/s]

////////////////////////
Average reward: 0.0752 
Average loss: 0.030554 
Average Q: 8.528100

Avg. Ep. Reward: 3.6224 
Max Ep. Reward: 19.0000 
Min Ep. Reward: 0.0000 
# Game: 1038
[*] Saving checkpoints...


 21%|██▉           | 10349993/49900000 [49:20:11<192:48:52, 56.98it/s]

////////////////////////
Average reward: 0.0739 
Average loss: 0.030264 
Average Q: 8.542285

Avg. Ep. Reward: 3.3708 
Max Ep. Reward: 18.0000 
Min Ep. Reward: 0.0000 
# Game: 1095
[*] Saving checkpoints...


 21%|██▉           | 10399993/49900000 [49:34:37<190:22:37, 57.63it/s]

////////////////////////
Average reward: 0.0737 
Average loss: 0.030643 
Average Q: 8.546980

Avg. Ep. Reward: 3.3967 
Max Ep. Reward: 20.0000 
Min Ep. Reward: 0.0000 
# Game: 1084
[*] Saving checkpoints...


 21%|██▉           | 10449998/49900000 [49:49:02<197:10:34, 55.58it/s]

////////////////////////
Average reward: 0.0728 
Average loss: 0.030742 
Average Q: 8.584287

Avg. Ep. Reward: 3.3045 
Max Ep. Reward: 19.0000 
Min Ep. Reward: 0.0000 
# Game: 1100
[*] Saving checkpoints...


 21%|██▉           | 10499993/49900000 [50:03:32<189:43:25, 57.69it/s]

////////////////////////
Average reward: 0.0757 
Average loss: 0.030041 
Average Q: 8.601635

Avg. Ep. Reward: 3.6567 
Max Ep. Reward: 20.0000 
Min Ep. Reward: 0.0000 
# Game: 1034
[*] Saving checkpoints...


 21%|██▉           | 10549997/49900000 [50:18:04<182:34:05, 59.87it/s]

////////////////////////
Average reward: 0.0749 
Average loss: 0.030424 
Average Q: 8.617434

Avg. Ep. Reward: 3.4648 
Max Ep. Reward: 23.0000 
Min Ep. Reward: 0.0000 
# Game: 1080
[*] Saving checkpoints...


 21%|██▉           | 10599993/49900000 [50:32:33<197:51:44, 55.17it/s]

////////////////////////
Average reward: 0.0751 
Average loss: 0.030695 
Average Q: 8.656571

Avg. Ep. Reward: 3.5439 
Max Ep. Reward: 18.0000 
Min Ep. Reward: 0.0000 
# Game: 1059
[*] Saving checkpoints...


 21%|██▉           | 10649993/49900000 [50:47:03<184:15:30, 59.17it/s]

////////////////////////
Average reward: 0.0745 
Average loss: 0.031533 
Average Q: 8.682515

Avg. Ep. Reward: 3.5423 
Max Ep. Reward: 19.0000 
Min Ep. Reward: 0.0000 
# Game: 1051
[*] Saving checkpoints...


 21%|███           | 10699997/49900000 [51:01:33<183:22:24, 59.38it/s]

////////////////////////
Average reward: 0.0756 
Average loss: 0.030989 
Average Q: 8.690421

Avg. Ep. Reward: 3.6451 
Max Ep. Reward: 20.0000 
Min Ep. Reward: 0.0000 
# Game: 1037
[*] Saving checkpoints...


 22%|███           | 10749995/49900000 [51:15:59<187:04:45, 58.13it/s]

////////////////////////
Average reward: 0.0750 
Average loss: 0.031956 
Average Q: 8.674482

Avg. Ep. Reward: 3.6073 
Max Ep. Reward: 21.0000 
Min Ep. Reward: 0.0000 
# Game: 1039
[*] Saving checkpoints...


 22%|███           | 10799993/49900000 [51:30:32<186:42:30, 58.17it/s]

////////////////////////
Average reward: 0.0757 
Average loss: 0.031341 
Average Q: 8.658985

Avg. Ep. Reward: 3.6053 
Max Ep. Reward: 28.0000 
Min Ep. Reward: 0.0000 
# Game: 1049
[*] Saving checkpoints...


 22%|███           | 10849996/49900000 [51:45:00<182:43:50, 59.36it/s]

////////////////////////
Average reward: 0.0757 
Average loss: 0.031687 
Average Q: 8.658019

Avg. Ep. Reward: 3.5799 
Max Ep. Reward: 23.0000 
Min Ep. Reward: 0.0000 
# Game: 1057
[*] Saving checkpoints...


 22%|███           | 10899993/49900000 [51:59:26<192:16:28, 56.34it/s]

////////////////////////
Average reward: 0.0743 
Average loss: 0.031195 
Average Q: 8.686271

Avg. Ep. Reward: 3.4705 
Max Ep. Reward: 20.0000 
Min Ep. Reward: 0.0000 
# Game: 1069
[*] Saving checkpoints...


 22%|███           | 10949994/49900000 [52:13:59<183:37:19, 58.92it/s]

////////////////////////
Average reward: 0.0749 
Average loss: 0.030938 
Average Q: 8.720163

Avg. Ep. Reward: 3.5464 
Max Ep. Reward: 22.0000 
Min Ep. Reward: 0.0000 
# Game: 1056
[*] Saving checkpoints...


 22%|███           | 10999993/49900000 [52:28:34<199:50:40, 54.07it/s]

////////////////////////
Average reward: 0.0755 
Average loss: 0.030867 
Average Q: 8.733154

Avg. Ep. Reward: 3.5831 
Max Ep. Reward: 32.0000 
Min Ep. Reward: 0.0000 
# Game: 1053
[*] Saving checkpoints...


 22%|███           | 11049997/49900000 [52:43:06<193:11:21, 55.86it/s]

////////////////////////
Average reward: 0.0734 
Average loss: 0.031776 
Average Q: 8.760585

Avg. Ep. Reward: 3.4367 
Max Ep. Reward: 19.0000 
Min Ep. Reward: 0.0000 
# Game: 1067
[*] Saving checkpoints...


 22%|███           | 11099997/49900000 [52:57:40<186:13:16, 57.88it/s]

////////////////////////
Average reward: 0.0748 
Average loss: 0.032050 
Average Q: 8.817034

Avg. Ep. Reward: 3.5718 
Max Ep. Reward: 20.0000 
Min Ep. Reward: 0.0000 
# Game: 1044
[*] Saving checkpoints...


 22%|███▏          | 11149993/49900000 [53:12:13<188:55:20, 56.98it/s]

////////////////////////
Average reward: 0.0752 
Average loss: 0.031617 
Average Q: 8.847281

Avg. Ep. Reward: 3.6709 
Max Ep. Reward: 26.0000 
Min Ep. Reward: 0.0000 
# Game: 1024
[*] Saving checkpoints...


 22%|███▏          | 11199998/49900000 [53:26:51<206:41:39, 52.01it/s]

////////////////////////
Average reward: 0.0744 
Average loss: 0.031552 
Average Q: 8.889278

Avg. Ep. Reward: 3.4887 
Max Ep. Reward: 20.0000 
Min Ep. Reward: 0.0000 
# Game: 1066
[*] Saving checkpoints...


 23%|███▏          | 11249997/49900000 [53:41:20<183:15:18, 58.59it/s]

////////////////////////
Average reward: 0.0754 
Average loss: 0.031200 
Average Q: 8.905626

Avg. Ep. Reward: 3.5794 
Max Ep. Reward: 20.0000 
Min Ep. Reward: 0.0000 
# Game: 1051
[*] Saving checkpoints...


 23%|███▏          | 11299995/49900000 [53:55:45<185:25:33, 57.82it/s]

////////////////////////
Average reward: 0.0756 
Average loss: 0.031125 
Average Q: 8.896858

Avg. Ep. Reward: 3.7092 
Max Ep. Reward: 23.0000 
Min Ep. Reward: 0.0000 
# Game: 1018
[*] Saving checkpoints...


 23%|███▏          | 11349996/49900000 [54:10:12<177:29:10, 60.33it/s]

////////////////////////
Average reward: 0.0753 
Average loss: 0.031398 
Average Q: 8.946265

Avg. Ep. Reward: 3.6189 
Max Ep. Reward: 24.0000 
Min Ep. Reward: 0.0000 
# Game: 1039
[*] Saving checkpoints...


 23%|███▏          | 11399995/49900000 [54:24:40<180:01:50, 59.40it/s]

////////////////////////
Average reward: 0.0739 
Average loss: 0.031616 
Average Q: 8.949186

Avg. Ep. Reward: 3.5325 
Max Ep. Reward: 18.0000 
Min Ep. Reward: 0.0000 
# Game: 1046
[*] Saving checkpoints...


 23%|███▏          | 11449995/49900000 [54:39:09<177:59:27, 60.01it/s]

////////////////////////
Average reward: 0.0747 
Average loss: 0.030189 
Average Q: 8.957316

Avg. Ep. Reward: 3.5933 
Max Ep. Reward: 23.0000 
Min Ep. Reward: 0.0000 
# Game: 1040
[*] Saving checkpoints...


 23%|███▏          | 11499993/49900000 [54:53:39<182:00:05, 58.61it/s]

////////////////////////
Average reward: 0.0751 
Average loss: 0.031426 
Average Q: 8.977270

Avg. Ep. Reward: 3.5973 
Max Ep. Reward: 19.0000 
Min Ep. Reward: 0.0000 
# Game: 1043
[*] Saving checkpoints...


 23%|███▏          | 11549999/49900000 [55:08:04<192:12:00, 55.43it/s]

////////////////////////
Average reward: 0.0737 
Average loss: 0.032285 
Average Q: 9.012846

Avg. Ep. Reward: 3.4270 
Max Ep. Reward: 15.0000 
Min Ep. Reward: 0.0000 
# Game: 1075
[*] Saving checkpoints...


 23%|███▎          | 11599996/49900000 [55:22:33<171:57:34, 61.87it/s]

////////////////////////
Average reward: 0.0742 
Average loss: 0.031476 
Average Q: 9.021054

Avg. Ep. Reward: 3.5019 
Max Ep. Reward: 18.0000 
Min Ep. Reward: 0.0000 
# Game: 1060
[*] Saving checkpoints...


 23%|███▎          | 11649993/49900000 [55:36:57<186:08:05, 57.08it/s]

////////////////////////
Average reward: 0.0748 
Average loss: 0.031755 
Average Q: 9.018591

Avg. Ep. Reward: 3.6151 
Max Ep. Reward: 25.0000 
Min Ep. Reward: 0.0000 
# Game: 1034
[*] Saving checkpoints...


 23%|███▎          | 11699999/49900000 [55:51:23<183:01:02, 57.98it/s]

////////////////////////
Average reward: 0.0750 
Average loss: 0.032398 
Average Q: 9.065255

Avg. Ep. Reward: 3.6357 
Max Ep. Reward: 22.0000 
Min Ep. Reward: 0.0000 
# Game: 1032
[*] Saving checkpoints...


 24%|███▎          | 11749993/49900000 [56:05:52<181:28:31, 58.39it/s]

////////////////////////
Average reward: 0.0739 
Average loss: 0.032202 
Average Q: 9.079268

Avg. Ep. Reward: 3.5934 
Max Ep. Reward: 17.0000 
Min Ep. Reward: 0.0000 
# Game: 1028
[*] Saving checkpoints...


 24%|███▎          | 11799997/49900000 [56:20:25<186:01:46, 56.89it/s]

////////////////////////
Average reward: 0.0740 
Average loss: 0.031539 
Average Q: 9.097797

Avg. Ep. Reward: 3.5938 
Max Ep. Reward: 18.0000 
Min Ep. Reward: 0.0000 
# Game: 1029
[*] Saving checkpoints...


 24%|███▎          | 11849997/49900000 [56:34:50<200:08:10, 52.81it/s]

////////////////////////
Average reward: 0.0746 
Average loss: 0.031699 
Average Q: 9.144766

Avg. Ep. Reward: 3.6468 
Max Ep. Reward: 19.0000 
Min Ep. Reward: 0.0000 
# Game: 1022
[*] Saving checkpoints...


 24%|███▎          | 11899993/49900000 [56:49:20<181:12:14, 58.25it/s]

////////////////////////
Average reward: 0.0745 
Average loss: 0.031486 
Average Q: 9.135010

Avg. Ep. Reward: 3.6769 
Max Ep. Reward: 22.0000 
Min Ep. Reward: 0.0000 
# Game: 1012
[*] Saving checkpoints...


 24%|███▎          | 11949997/49900000 [57:03:48<194:42:11, 54.14it/s]

////////////////////////
Average reward: 0.0738 
Average loss: 0.031159 
Average Q: 9.172968

Avg. Ep. Reward: 3.4910 
Max Ep. Reward: 21.0000 
Min Ep. Reward: 0.0000 
# Game: 1057
[*] Saving checkpoints...


 24%|███▎          | 11999994/49900000 [57:18:12<200:54:23, 52.40it/s]

////////////////////////
Average reward: 0.0741 
Average loss: 0.032151 
Average Q: 9.215201

Avg. Ep. Reward: 3.5876 
Max Ep. Reward: 21.0000 
Min Ep. Reward: 0.0000 
# Game: 1033
[*] Saving checkpoints...


 24%|███▍          | 12049993/49900000 [57:32:45<174:52:36, 60.12it/s]

////////////////////////
Average reward: 0.0737 
Average loss: 0.031436 
Average Q: 9.222857

Avg. Ep. Reward: 3.4378 
Max Ep. Reward: 18.0000 
Min Ep. Reward: 0.0000 
# Game: 1069
[*] Saving checkpoints...


 24%|███▍          | 12099996/49900000 [57:47:21<171:30:33, 61.22it/s]

////////////////////////
Average reward: 0.0750 
Average loss: 0.031385 
Average Q: 9.210372

Avg. Ep. Reward: 3.5767 
Max Ep. Reward: 20.0000 
Min Ep. Reward: 0.0000 
# Game: 1049
[*] Saving checkpoints...


 24%|███▍          | 12149994/49900000 [58:01:46<177:20:12, 59.13it/s]

////////////////////////
Average reward: 0.0748 
Average loss: 0.031881 
Average Q: 9.254152

Avg. Ep. Reward: 3.6112 
Max Ep. Reward: 26.0000 
Min Ep. Reward: 0.0000 
# Game: 1034
[*] Saving checkpoints...


 24%|███▍          | 12199993/49900000 [58:16:12<188:33:23, 55.54it/s]

////////////////////////
Average reward: 0.0747 
Average loss: 0.031018 
Average Q: 9.247923

Avg. Ep. Reward: 3.6237 
Max Ep. Reward: 24.0000 
Min Ep. Reward: 0.0000 
# Game: 1031
[*] Saving checkpoints...


 25%|███▍          | 12249997/49900000 [58:30:39<181:22:55, 57.66it/s]

////////////////////////
Average reward: 0.0744 
Average loss: 0.030981 
Average Q: 9.284281

Avg. Ep. Reward: 3.5294 
Max Ep. Reward: 20.0000 
Min Ep. Reward: 0.0000 
# Game: 1054
[*] Saving checkpoints...


 25%|███▍          | 12299997/49900000 [58:45:08<181:22:02, 57.59it/s]

////////////////////////
Average reward: 0.0755 
Average loss: 0.030608 
Average Q: 9.306257

Avg. Ep. Reward: 3.7219 
Max Ep. Reward: 28.0000 
Min Ep. Reward: 0.0000 
# Game: 1014
[*] Saving checkpoints...


 25%|███▍          | 12349994/49900000 [58:59:32<186:15:12, 56.00it/s]

////////////////////////
Average reward: 0.0751 
Average loss: 0.031116 
Average Q: 9.318734

Avg. Ep. Reward: 3.6876 
Max Ep. Reward: 22.0000 
Min Ep. Reward: 0.0000 
# Game: 1018
[*] Saving checkpoints...


 25%|███▍          | 12399998/49900000 [59:14:00<190:47:54, 54.60it/s]

////////////////////////
Average reward: 0.0747 
Average loss: 0.030900 
Average Q: 9.321702

Avg. Ep. Reward: 3.6510 
Max Ep. Reward: 21.0000 
Min Ep. Reward: 0.0000 
# Game: 1023
[*] Saving checkpoints...


 25%|███▍          | 12449997/49900000 [59:28:32<182:13:44, 57.09it/s]

////////////////////////
Average reward: 0.0761 
Average loss: 0.031595 
Average Q: 9.349246

Avg. Ep. Reward: 3.8707 
Max Ep. Reward: 23.0000 
Min Ep. Reward: 0.0000 
# Game: 982
[*] Saving checkpoints...


 25%|███▌          | 12499998/49900000 [59:43:06<189:41:53, 54.77it/s]

////////////////////////
Average reward: 0.0750 
Average loss: 0.031401 
Average Q: 9.371861

Avg. Ep. Reward: 3.6654 
Max Ep. Reward: 27.0000 
Min Ep. Reward: 0.0000 
# Game: 1022
[*] Saving checkpoints...


 25%|███▌          | 12549997/49900000 [59:57:31<185:55:31, 55.80it/s]

////////////////////////
Average reward: 0.0752 
Average loss: 0.030446 
Average Q: 9.389320

Avg. Ep. Reward: 3.6415 
Max Ep. Reward: 20.0000 
Min Ep. Reward: 0.0000 
# Game: 1032
[*] Saving checkpoints...


 25%|███▌          | 12599999/49900000 [60:11:55<173:29:25, 59.72it/s]

////////////////////////
Average reward: 0.0754 
Average loss: 0.030115 
Average Q: 9.397499

Avg. Ep. Reward: 3.7257 
Max Ep. Reward: 25.0000 
Min Ep. Reward: 0.0000 
# Game: 1010
[*] Saving checkpoints...


 25%|███▌          | 12649995/49900000 [60:26:15<181:40:52, 56.95it/s]

////////////////////////
Average reward: 0.0754 
Average loss: 0.031269 
Average Q: 9.382812

Avg. Ep. Reward: 3.8608 
Max Ep. Reward: 24.0000 
Min Ep. Reward: 0.0000 
# Game: 977
[*] Saving checkpoints...


 25%|███▌          | 12699993/49900000 [60:40:46<181:03:55, 57.07it/s]

////////////////////////
Average reward: 0.0754 
Average loss: 0.031084 
Average Q: 9.382307

Avg. Ep. Reward: 3.8014 
Max Ep. Reward: 21.0000 
Min Ep. Reward: 0.0000 
# Game: 992
[*] Saving checkpoints...


 26%|███▌          | 12749993/49900000 [60:55:17<178:21:57, 57.86it/s]

////////////////////////
Average reward: 0.0759 
Average loss: 0.031026 
Average Q: 9.386523

Avg. Ep. Reward: 3.8793 
Max Ep. Reward: 22.0000 
Min Ep. Reward: 0.0000 
# Game: 978
[*] Saving checkpoints...


 26%|███▌          | 12799993/49900000 [61:09:47<184:40:25, 55.80it/s]

////////////////////////
Average reward: 0.0750 
Average loss: 0.030697 
Average Q: 9.387070

Avg. Ep. Reward: 3.6353 
Max Ep. Reward: 24.0000 
Min Ep. Reward: 0.0000 
# Game: 1031
[*] Saving checkpoints...


 26%|███▌          | 12849995/49900000 [61:24:18<181:21:42, 56.75it/s]

////////////////////////
Average reward: 0.0759 
Average loss: 0.030219 
Average Q: 9.378396

Avg. Ep. Reward: 3.6880 
Max Ep. Reward: 18.0000 
Min Ep. Reward: 0.0000 
# Game: 1029
[*] Saving checkpoints...


 26%|███▌          | 12899997/49900000 [61:38:50<185:49:20, 55.31it/s]

////////////////////////
Average reward: 0.0748 
Average loss: 0.029776 
Average Q: 9.380309

Avg. Ep. Reward: 3.6703 
Max Ep. Reward: 20.0000 
Min Ep. Reward: 0.0000 
# Game: 1019
[*] Saving checkpoints...


 26%|███▋          | 12949995/49900000 [61:53:20<182:56:06, 56.11it/s]

////////////////////////
Average reward: 0.0757 
Average loss: 0.030223 
Average Q: 9.374284

Avg. Ep. Reward: 3.8107 
Max Ep. Reward: 21.0000 
Min Ep. Reward: 0.0000 
# Game: 993
[*] Saving checkpoints...


 26%|███▋          | 12999997/49900000 [62:07:50<186:05:25, 55.08it/s]

////////////////////////
Average reward: 0.0754 
Average loss: 0.029768 
Average Q: 9.358501

Avg. Ep. Reward: 3.7475 
Max Ep. Reward: 21.0000 
Min Ep. Reward: 0.0000 
# Game: 1006
[*] Saving checkpoints...


 26%|███▋          | 13049997/49900000 [62:22:13<166:51:10, 61.35it/s]

////////////////////////
Average reward: 0.0750 
Average loss: 0.030962 
Average Q: 9.322546

Avg. Ep. Reward: 3.7658 
Max Ep. Reward: 21.0000 
Min Ep. Reward: 0.0000 
# Game: 995
[*] Saving checkpoints...


 26%|███▋          | 13099997/49900000 [62:36:41<181:02:24, 56.46it/s]

////////////////////////
Average reward: 0.0753 
Average loss: 0.030790 
Average Q: 9.285750

Avg. Ep. Reward: 3.7819 
Max Ep. Reward: 18.0000 
Min Ep. Reward: 0.0000 
# Game: 995
[*] Saving checkpoints...


 26%|███▋          | 13149995/49900000 [62:51:11<173:26:53, 58.86it/s]

////////////////////////
Average reward: 0.0747 
Average loss: 0.030387 
Average Q: 9.294637

Avg. Ep. Reward: 3.7452 
Max Ep. Reward: 24.0000 
Min Ep. Reward: 0.0000 
# Game: 997
[*] Saving checkpoints...


 26%|███▋          | 13199997/49900000 [63:05:38<190:18:47, 53.57it/s]

////////////////////////
Average reward: 0.0752 
Average loss: 0.029937 
Average Q: 9.326310

Avg. Ep. Reward: 3.6673 
Max Ep. Reward: 28.0000 
Min Ep. Reward: 0.0000 
# Game: 1025
[*] Saving checkpoints...


 27%|███▋          | 13249993/49900000 [63:20:09<175:48:12, 57.91it/s]

////////////////////////
Average reward: 0.0741 
Average loss: 0.030210 
Average Q: 9.332017

Avg. Ep. Reward: 3.5640 
Max Ep. Reward: 30.0000 
Min Ep. Reward: 0.0000 
# Game: 1039
[*] Saving checkpoints...


 27%|███▋          | 13299993/49900000 [63:34:36<177:08:55, 57.39it/s]

////////////////////////
Average reward: 0.0745 
Average loss: 0.030505 
Average Q: 9.338848

Avg. Ep. Reward: 3.5857 
Max Ep. Reward: 20.0000 
Min Ep. Reward: 0.0000 
# Game: 1038
[*] Saving checkpoints...


 27%|███▋          | 13349999/49900000 [63:49:11<174:44:35, 58.10it/s]

////////////////////////
Average reward: 0.0746 
Average loss: 0.031190 
Average Q: 9.348274

Avg. Ep. Reward: 3.6296 
Max Ep. Reward: 20.0000 
Min Ep. Reward: 0.0000 
# Game: 1026
[*] Saving checkpoints...


 27%|███▊          | 13399997/49900000 [64:03:28<185:08:36, 54.76it/s]

////////////////////////
Average reward: 0.0751 
Average loss: 0.029891 
Average Q: 9.318825

Avg. Ep. Reward: 3.5573 
Max Ep. Reward: 20.0000 
Min Ep. Reward: 0.0000 
# Game: 1055
[*] Saving checkpoints...


 27%|███▊          | 13449993/49900000 [64:17:51<191:17:58, 52.93it/s]

////////////////////////
Average reward: 0.0751 
Average loss: 0.029848 
Average Q: 9.325109

Avg. Ep. Reward: 3.7815 
Max Ep. Reward: 21.0000 
Min Ep. Reward: 0.0000 
# Game: 993
[*] Saving checkpoints...


 27%|███▊          | 13499996/49900000 [64:32:19<171:51:28, 58.83it/s]

////////////////////////
Average reward: 0.0748 
Average loss: 0.030278 
Average Q: 9.323386

Avg. Ep. Reward: 3.6295 
Max Ep. Reward: 21.0000 
Min Ep. Reward: 0.0000 
# Game: 1031
[*] Saving checkpoints...


 27%|███▊          | 13549997/49900000 [64:46:46<180:03:20, 56.08it/s]

////////////////////////
Average reward: 0.0754 
Average loss: 0.029587 
Average Q: 9.324349

Avg. Ep. Reward: 3.7143 
Max Ep. Reward: 24.0000 
Min Ep. Reward: 0.0000 
# Game: 1015
[*] Saving checkpoints...


 27%|███▊          | 13599997/49900000 [65:01:09<173:40:11, 58.06it/s]

////////////////////////
Average reward: 0.0763 
Average loss: 0.029558 
Average Q: 9.309481

Avg. Ep. Reward: 3.7579 
Max Ep. Reward: 25.0000 
Min Ep. Reward: 0.0000 
# Game: 1012
[*] Saving checkpoints...


 27%|███▊          | 13649997/49900000 [65:15:38<170:22:38, 59.10it/s]

////////////////////////
Average reward: 0.0740 
Average loss: 0.029482 
Average Q: 9.260811

Avg. Ep. Reward: 3.5219 
Max Ep. Reward: 23.0000 
Min Ep. Reward: 0.0000 
# Game: 1050
[*] Saving checkpoints...


 27%|███▊          | 13699996/49900000 [65:30:09<165:41:04, 60.69it/s]

////////////////////////
Average reward: 0.0758 
Average loss: 0.029816 
Average Q: 9.251931

Avg. Ep. Reward: 3.7505 
Max Ep. Reward: 18.0000 
Min Ep. Reward: 0.0000 
# Game: 1010
[*] Saving checkpoints...


 28%|███▊          | 13749997/49900000 [65:44:33<174:55:46, 57.40it/s]

////////////////////////
Average reward: 0.0750 
Average loss: 0.028770 
Average Q: 9.246129

Avg. Ep. Reward: 3.7535 
Max Ep. Reward: 27.0000 
Min Ep. Reward: 0.0000 
# Game: 998
[*] Saving checkpoints...


 28%|███▊          | 13799993/49900000 [65:59:05<169:08:02, 59.29it/s]

////////////////////////
Average reward: 0.0747 
Average loss: 0.029660 
Average Q: 9.229834

Avg. Ep. Reward: 3.5404 
Max Ep. Reward: 21.0000 
Min Ep. Reward: 0.0000 
# Game: 1053
[*] Saving checkpoints...


 28%|███▉          | 13845215/49900000 [66:12:10<174:16:46, 57.47it/s]

KeyboardInterrupt: 