In [1]:
# if colab

# !pip install pybullet
# !pip install gym
# !apt-get install python-opengl -y
# !apt install xvfb -y
# !pip install gym pyvirtualdisplay > /dev/null 2>&1
# !pip install -q git+https://github.com/tensorflow/examples.git

In [2]:
import os
import glob
import cv2
import tensorflow as tf 
from tensorflow.keras import layers, models
import numpy as np 
import gym
from gym import logger as gymlogger
from gym.wrappers import Monitor
import pybullet_envs
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
import math
import glob
import io
import base64
from IPython.display import HTML
from IPython import display as ipythondisplay

In [3]:
seed = 654765645
np.random.seed(seed)
tf.random.set_seed(seed)

# check if GPU
tf.config.list_physical_devices()

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'),
 PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [4]:
# colab

# from google.colab import drive
# drive.mount('/content/drive')

# root_dir = "drive/My Drive/"
# base_dir = root_dir + 'CPCtesting'
# os.makedirs(base_dir,exist_ok=True)

# train_dir = base_dir + '/train'
# os.makedirs(train_dir,exist_ok=True)

# model_dir = base_dir + '/model'
# os.makedirs(model_dir,exist_ok=True)

# if local machine
base_dir = os.getcwd()

train_dir = os.path.join(base_dir , 'train')
os.makedirs(train_dir,exist_ok=True)

model_dir = os.path.join(base_dir , 'model')
os.makedirs(model_dir,exist_ok=True)

# logs_base_dir = os.path.join(base_dir , 'logs')

log_dir = os.path.join(base_dir , 'training_logs_save')
reward_dir = os.path.join(base_dir , 'training_rewards_save')

#remove old logs
fileList1 = glob.glob(os.path.join(log_dir , "events.*"))
fileList2 = glob.glob(os.path.join(reward_dir , "events.*"))

for filePath in fileList1:
    try:
        os.remove(filePath)
    except:
        print("Error while deleting file : ", filePath)
        
for filePath in fileList2:
    try:
        os.remove(filePath)
    except:
        print("Error while deleting file : ", filePath)


# tensorboard directories
# %load_ext tensorboard
os.makedirs(log_dir, exist_ok=True)
os.makedirs(reward_dir,exist_ok=True)
# %tensorboard --logdir {logs_base_dir}

In [5]:
# get data
(train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.mnist.load_data()

In [6]:
class CPCModel(tf.keras.Model):
    def __init__(self,code_size, predict_terms, terms=4, units=256, image_size=64, channels=3):
        super(CPCModel, self).__init__()
        self.code_size = code_size
        self.predict_terms = predict_terms
        self.terms = terms
        self.units = units
        self.image_size = image_size
        self.channels = channels

        self.conv1 = tf.keras.layers.Conv2D(filters=64, kernel_size=3, strides=2, activation='linear')
        self.bn1 = tf.keras.layers.BatchNormalization()
        self.lrelu1 = tf.keras.layers.LeakyReLU()
        self.conv2 = tf.keras.layers.Conv2D(filters=64, kernel_size=3, strides=2, activation='linear')
        self.bn2 = tf.keras.layers.BatchNormalization()
        self.lrelu2 = tf.keras.layers.LeakyReLU()
        self.conv3 = tf.keras.layers.Conv2D(filters=64, kernel_size=3, strides=2, activation='linear')
        self.bn3 = tf.keras.layers.BatchNormalization()
        self.lrelu3 = tf.keras.layers.LeakyReLU()
        self.conv4 = tf.keras.layers.Conv2D(filters=64, kernel_size=3, strides=2, activation='linear')
        self.bn4 = tf.keras.layers.BatchNormalization()
        self.lrelu4 = tf.keras.layers.LeakyReLU()
        self.flatten = tf.keras.layers.Flatten()
        self.dense5 = tf.keras.layers.Dense(units=256, activation='linear')
        self.bn5 = tf.keras.layers.BatchNormalization()
        self.lrelu5 = tf.keras.layers.LeakyReLU()
        self.dense6 = tf.keras.layers.Dense(units=code_size, activation='linear', name='encoder_embedding')

        self.gru = tf.keras.layers.GRU(units, return_sequences=False, name='ar_context')
        self.linear = tf.keras.layers.Dense(predict_terms*code_size, activation='linear')    
   
    def encoding(self,x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.lrelu1(x)
        x = self.conv2(x)
        x = self.bn2(x)
        x = self.lrelu2(x)
        x = self.conv3(x)
        x = self.bn3(x)
        x = self.lrelu3(x)
        x = self.conv4(x)
        x = self.bn4(x)
        x = self.lrelu4(x)
        x = self.flatten(x)
        x = self.dense5(x)
        x = self.bn5(x)
        x = self.lrelu5(x)
        z = self.dense6(x)
        return z
  
    def get_context(self, x):
        z = self.encoding(x)
        z = tf.reshape(z, [-1, self.terms, self.code_size])
        c = self.gru(z)
        return c
    def get_prediction(self, x):
        c = self.get_context(x)
        z_hats = self.linear(c)
        z_hat = tf.reshape(z_hats, [-1, self.predict_terms, self.code_size])
        return z_hat

    def optimizer(self):
        pass

    def loss(self,weights,biases,labels,inputs,num_samples,num_classes): 
        loss = tf.nn.nce_loss(
        weights, biases, labels, inputs, num_sampled, num_classes, num_true=1,
        sampled_values=None, remove_accidental_hits=False, name='nce_loss')
        return loss
  
    def call(self,inputs):
        x_tm, x_tp = inputs
        x_tm = tf.reshape(x_tm, [-1, self.image_size, self.image_size, self.channels])
        x_tp = tf.reshape(x_tp, [-1, self.image_size, self.image_size, self.channels])
        z_hat = self.get_prediction(x_tm)
        z_tp = self.encoding(x_tp)
        z_tp = tf.reshape(z_tp, [-1, self.predict_terms, self.code_size])
        dot_prods = tf.reduce_mean(tf.reduce_mean(z_hat*z_tp, axis=-1), axis=-1, keepdims=True)
        probs = tf.sigmoid(dot_prods)
        return probs


  # def save(self):
  #       f1 = os.path.join(folder,'target_actor')
  #       f2 = os.path.join(folder, 'target_critic')
  #       f3 = os.path.join(folder, 'actor')
  #       f4 = os.path.join(folder, 'critic')
  #       self.target_actor.save(f1)
  #       self.target_critic.save(f2)
  #       self.actor.save(f3)
  #       self.critic.save(f4)


  # def load(self):
  #   pass

In [7]:
class ReplayBuffer():
    def __init__(self,state_space,action_space,capacity,batch):
        self.capacity = capacity
        self.batch = batch
        self.elements = 0
        
        self.avaliable_batch = 0
        self.idx = 0
        self.entries = 0 
        
        self.states = np.empty((self.capacity,state_space),dtype = np.float32)
        self.next_states = np.empty((self.capacity,state_space),dtype = np.float32)
        self.actions = np.empty((self.capacity,action_space),dtype = np.float32)
        self.rewards = np.empty((self.capacity,1),dtype = np.float32)
        self.not_dones = np.empty((self.capacity, 1), dtype=np.float32)
        
    def add(self,state,next_state,action,reward,done):
        np.copyto(self.states[self.idx], state)
        np.copyto(self.actions[self.idx], action)
        np.copyto(self.rewards[self.idx], reward)
        np.copyto(self.next_states[self.idx], next_state)
        np.copyto(self.not_dones[self.idx], not done)
        #self.avaliable_batch= (self.avaliable_batch + 1) if self.avaliable_batch < self.batch else self.batch
        #self.entries = (self.entries + 1) if self.entries < self.capacity else self.capacity
        self.idx = (self.idx + 1) % self.capacity
        self.entries = np.minimum(self.entries + 1, self.capacity)
        
    def sample(self):
        num = self.entries
        if(num > self.batch):
            num = self.batch
        #print('avaliable_batch: ',self.avaliable_batch, "entries: ", self.entries,'capacity: ', self.capacity)
        idx = np.random.choice(self.entries,size = num,replace=False)
        #print('test idx: ', idx)
        
        states = tf.convert_to_tensor(self.states[idx])
        next_states = tf.convert_to_tensor(self.next_states[idx])
        actions = tf.convert_to_tensor(self.actions[idx])
        rewards = tf.convert_to_tensor(self.rewards[idx])
        not_dones = tf.convert_to_tensor(self.not_dones[idx])
        
        return states,next_states,actions,rewards,not_dones
    
    def fill_buffer(self,timesteps,state,prev_timesteps):
        print('sim test: ',env._max_episode_steps,":",timesteps)
        for step in range(timesteps):
            action = env.action_space.sample()
            next_state, reward, done, info = env.step(action)
            np.copyto(self.states[step], state)
            np.copyto(self.actions[step], action)
            np.copyto(self.rewards[step], reward)
            np.copyto(self.next_states[step], next_state)
            np.copyto(self.not_dones[step], not done)
            state = next_state
            if(done):
                print("step: ", step)
                state = env.reset()
                print('done seeding replay buffer')            
            
        

class Actor(tf.keras.Model):
    def __init__(self,state_space,action_space,critic,actor_lr = 0.001,variance = 0.2):
        super(Actor,self).__init__()
        
        #params
        self.std = np.sqrt(variance)
        self.noise_flag = 1.0
        self.action_space = action_space
        
        #optimizer
        self.opt = tf.keras.optimizers.Adam(actor_lr)
        self.critic = critic
       
        #model
        self.dense1 = tf.keras.layers.Dense(400,
                                            #input_shape = (1,1,state_space),
                                            activation = 'relu',
                                            #bias_initializer = tf.random_uniform_initializer(minval=-0.003, maxval=0.003),
                                            bias_initializer = tf.keras.initializers.VarianceScaling(scale=1.0, 
                                                                                                     mode='fan_in', 
                                                                                                     distribution='uniform', 
                                                                                                     seed=seed
                                                                                                    ),
                                            kernel_initializer = tf.keras.initializers.VarianceScaling(scale=1.0, 
                                                                                                       mode='fan_in', 
                                                                                                       distribution='uniform', 
                                                                                                       seed=seed)
                                           )
        self.dense2 = tf.keras.layers.Dense(300,
                                            activation='relu',
                                            #bias_initializer = tf.random_uniform_initializer(minval=-0.003, maxval=0.003),
                                            bias_initializer = tf.keras.initializers.VarianceScaling(scale=1.0, 
                                                                                                       mode='fan_in', 
                                                                                                       distribution='uniform', 
                                                                                                       seed=seed),
                                            kernel_initializer = tf.keras.initializers.VarianceScaling(scale=1.0, 
                                                                                                       mode='fan_in', 
                                                                                                       distribution='uniform', 
                                                                                                       seed=seed)
                                            )
        self.dense3 = tf.keras.layers.Dense(self.action_space,
                                            bias_initializer = tf.random_uniform_initializer(minval=-0.003, 
                                                                                             maxval=0.003,
                                                                                             seed = seed
                                                                                            ),
                                            kernel_initializer = tf.random_uniform_initializer(minval=-0.003, 
                                                                                               maxval=0.003,
                                                                                               seed = seed
                                                                                              )
                                           )
        
    def loss(self,states,actions):
        actions = self(states)
        #stateactions = tf.concat([states,actions],-1)
        #print("state,action shape: ",states.shape,actions.shape)
        Q = self.critic(states,actions)
        loss = - tf.reduce_mean(Q)
        return loss
    
    def update(self,states,actions):
        with tf.GradientTape() as tape:
            loss = self.loss(states,actions)

        grad = tape.gradient(loss,self.trainable_variables)
        self.opt.apply_gradients(zip(grad, self.trainable_variables))
        #print('actor loss: ', loss ,"\n" )
        return loss
    
    def set_noise_flag(self,num):
        self.noise_flag = np.float32(not not num)
    
    def continous_noise(self):
        #num = np.random.normal(0,self.std)
        #result = np.full((self.action_space,),num)
        result = np.random.normal(0,self.std,size=(self.action_space,))
        return self.noise_flag * np.clip(result,a_min = -1.0, a_max = 1.0)
    
    def call(self,x):
        x = self.dense1(x)
        x = self.dense2(x)
        x = self.dense3(x)
        return x

class Critic(tf.keras.Model):
    def __init__(self,combined_space,critic_lr = 0.001):
        super(Critic,self).__init__()
        
        # optimizer
        self.opt = tf.keras.optimizers.Adam(critic_lr)
        
        # loss
        #self.loss = tf.keras.losses.MSE
        
        
        # layers
        self.dense1 = tf.keras.layers.Dense(400,
                                            #input_shape=(1,1,combined_space),
                                            activation = 'relu',
                                            #bias_initializer = tf.random_uniform_initializer(minval=-0.003, maxval=0.003),
                                            bias_initializer = tf.keras.initializers.VarianceScaling(scale=1.0, 
                                                                                                     mode='fan_in', 
                                                                                                     distribution='uniform', 
                                                                                                     seed=seed
                                                                                                    ),
                                            kernel_initializer = tf.keras.initializers.VarianceScaling(scale=1.0, 
                                                                                                       mode='fan_in', 
                                                                                                       distribution='uniform', 
                                                                                                       seed=seed),
                                            kernel_regularizer=tf.keras.regularizers.l2(0.01)
                                           )

        self.dense2 = tf.keras.layers.Dense(300,
                                            activation='relu',
                                            #bias_initializer = tf.random_uniform_initializer(minval=-0.003, maxval=0.003),
                                            bias_initializer = tf.keras.initializers.VarianceScaling(scale=1.0, 
                                                                                                     mode='fan_in', 
                                                                                                     distribution='uniform', 
                                                                                                     seed=seed
                                                                                                    ),
                                            kernel_initializer = tf.keras.initializers.VarianceScaling(scale=1.0, 
                                                                                                       mode='fan_in', 
                                                                                                       distribution='uniform', 
                                                                                                       seed=seed
                                                                                                      ),
                                            kernel_regularizer=tf.keras.regularizers.l2(0.01)
                                            )
        self.dense3 = tf.keras.layers.Dense(1,
                                            bias_initializer = tf.random_uniform_initializer(minval=-0.003, maxval=0.003),
                                            kernel_initializer = tf.random_uniform_initializer(minval=-0.003, 
                                                                                               maxval=0.003,
                                                                                               seed = seed
                                                                                              ),
                                            kernel_regularizer=tf.keras.regularizers.l2(0.01)
                                            ) 
        
    #loss
    def loss(self,actual,pred):
        result = tf.keras.losses.MSE(actual,pred)
        #print('result: ', result)
        #print('actual: ', actual.shape) # shape (16,1)
        #print('pred: ',pred.shape) # shape (16,1,1,1)
        return result
    
    def update(self,states_i,actions_i,Q_h):
        match = Q_h.shape[0]
        with tf.GradientTape() as tape:
            Q = self.call(states_i,actions_i)
            Q = tf.reshape(Q,(1,1,1,match))
            Q_h = tf.reshape(Q_h,(1,1,1,match))
            loss = self.loss(Q,Q_h)

        grad = tape.gradient(loss,self.trainable_variables)
        #grad_magnitude = tf.reduce_sum(grad)
        self.opt.apply_gradients(zip(grad, self.trainable_variables))
        #print('critic loss: ', loss ,"\n" )
        #print("check exploding gradient: ", grad)
        return loss
    
    #predict
    def call(self,states,actions):
        #x = tf.concat([states,actions],-1)
        x = self.dense1(states)
        x = tf.concat([x,actions],-1)
        x = self.dense2(x)
        x = self.dense3(x)
        return x
    
    
class SAC(tf.keras.Model):
    def __init__(self,
                 state_space,
                 action_space,
                 capacity = 1000,
                 batch = 1, 
                 tau=0.999,
                 gamma=0.99,
                 actor_lr = 0.001, 
                 critic_lr = 0.0001,
                 variance = 1.0):
        super(SAC,self).__init__()
        # tensorboard callbacks
        self.cb = [tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=1/3, patience=2, min_lr=1e-4),
                   tf.keras.callbacks.ModelCheckpoint('weights/weights.{epoch:02d}-{val_binary_accuracy:.2f}.cpkt',
                                          monitor='val_binary_accuracy', save_best_only=True, save_weights_only=True),
                   tf.keras.callbacks.EarlyStopping(monitor='val_binary_accuracy', patience=3),
                   tf.keras.callbacks.TensorBoard()]
        
        
        #hyperparameters
        self.batch = batch
        self.tau = tau
        self.gamma = gamma
        self.actor_lr = actor_lr
        self.critic_lr = critic_lr
        self.noise_flag = 1
        self.std = np.sqrt(variance)
        
        
        #spaces
        self.action_space = action_space
        self.state_space = state_space
        self.combined_space = self.action_space + self.state_space
        
        # replay buffer
        self.replay_buffer = ReplayBuffer(self.state_space,self.action_space,capacity,self.batch)
        
        # models
        self.critic = Critic(combined_space = self.combined_space,
                             critic_lr = critic_lr
                            )        
        self.actor = Actor(self.state_space,
                           self.action_space,
                           actor_lr=actor_lr,
                           critic = self.critic
                          )
        #self.critic.compile(optimizer = self.critic.opt,loss = self.critic.loss)
        #self.actor.compile(optimizer = self.actor.opt,loss = self.actor.loss)
        
        # target models
        self.target_actor = Actor(self.state_space,
                                  self.action_space,
                                  actor_lr=actor_lr,
                                  critic = self.critic
                                 )
        self.target_critic = Critic(combined_space = self.combined_space,
                                    critic_lr = critic_lr
                                   )         
        self.target_actor.set_weights(self.actor.get_weights())
        self.target_critic.set_weights(self.critic.get_weights())
        
        #cpc
        #self.cpc = CPC(code_size=128, predict_terms=4, terms=4, units=256, image_size=64, channels=3)
    
    def store_replay(self,state,next_state,action,reward,done):
        self.replay_buffer.add(state,next_state,action,reward,done)
    
    def set_labels(self,states_i,next_states_i,actions_i,rewards_i,terminal_i):
        mu = self.target_actor(next_states_i)
        #print('ends: ', terminal)
        #print(mu,states)
#         stateactions = tf.concat([states,mu],1)
        Q_h = self.target_critic(next_states_i,mu)
        y = rewards_i + terminal_i*self.gamma * Q_h
        #y = np.concatenate(self.y,0).astype('float32') #.reshape((self.minibatch_size,1,1,1))
        #print('y: ',self.y)
        #y = tf.reshape(y,(self.replay_buffer.batch,1,1,1))
        return y 
    
        
    def discrete_random_noise(self):
        pass
    
    def update_target_weights(self):   
        tgt_critic_weight = self.target_critic.get_weights()
        tgt_actor_weight = self.target_actor.get_weights()
        actor_weight = self.actor.get_weights()
        critic_weight = self.target_actor.get_weights()
        
        for idx,(part_tgt,part_net) in enumerate(zip(tgt_critic_weight,critic_weight)):
            tgt_critic_weight[idx] = self.tau*part_tgt + (1.0-self.tau)*part_net
        
        for idx,(part_tgt,part_net) in enumerate(zip(tgt_actor_weight,actor_weight)):
            tgt_actor_weight[idx] = self.tau*part_tgt + (1.0-self.tau)*part_net
            
        self.target_actor.set_weights(tgt_actor_weight)
        self.target_critic.set_weights(tgt_critic_weight)
            
    def save(self,filename):
        self.actor.save_weights(filename)
        self.critic.save_weights(filename)
        self.target_actor.save_weights(filename)
        self.target_critic.save_weights(filename)
    
    def load(self,filename):
        self.actor.load_weights(filename)
        self.critic.load_weights(filename)
        self.target_actor.load_weights(filename)
        self.target_critic.load_weights(filename)
            

In [8]:
class DataHandler:
    def __init__(self, batch_size, terms, predict_terms=1, image_size=64, color=False, rescale=True, aug=True, is_training=True, method='cpc'):
        self.batch_size = batch_size
        self.terms = terms
        self.predict_terms = predict_terms
        self.image_size = image_size
        self.color = color
        self.rescale = rescale
        self.aug = aug
        self.is_training = is_training
        self.method = method
        self.lena = cv2.imread(os.path.join(base_dir,'lena.jpg'))
        (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
        if self.is_training:
            self.x = x_train/255.0
            self.y = y_train
        else:
            self.x = x_test/255.0
            self.y = y_test
        self.idxs = []
        for i in range(10):
            y = y_train if self.is_training else y_test
            self.idxs.append(np.where(y == i)[0])
        self.n_samples = len(self.y)//terms if self.method == 'cpc' else len(self.y)
        self.shape = self.x.shape
        self.n_batches = self.n_samples//batch_size

    def __iter__(self):
        return self

    def __next__(self):
        return self.cpc_batch() if self.method == 'cpc' else self.benchmark_batch()

    def __len__(self):
        return self.n_batches

    def cpc_batch(self):
        img_labels = np.zeros((self.batch_size, self.terms + self.predict_terms))
        sentence_labels = np.ones((self.batch_size, 1)).astype('int32')
        for bi in range(self.batch_size):
            seed = np.random.randint(10)
            sentence = np.arange(seed, seed + self.terms + self.predict_terms) % 10
            if bi < self.batch_size//2:
                num = np.arange(10)
                predicted = sentence[-self.predict_terms:]
                for i, p in enumerate(predicted):
                    predicted[i] = np.random.choice(num[num != p], 1)
                sentence[-self.predict_terms:] = predicted % 10
                sentence_labels[bi, :] = 0
            img_labels[bi, :] = sentence
        images = self.get_samples(img_labels).reshape((self.batch_size, self.terms+self.predict_terms, self.image_size, self.image_size, 3))
        x_images = images[:, :-self.predict_terms, ...]
        y_images = images[:, -self.predict_terms:, ...]
        idx = np.random.choice(self.batch_size, self.batch_size, replace=False)
        return [x_images[idx], y_images[idx]], sentence_labels[idx]

    def get_samples(self, img_labels):
        idx = []
        for label in img_labels.flatten():
            idx.append(np.random.choice(self.idxs[int(label)], 1)[0])
        img_batch = self.x[idx, :, :]
        if self.aug:
            img_batch = self._aug_batch(img_batch)
        return img_batch

    def _aug_batch(self, img_batch):
        if self.image_size != 28:
            resized = []
            for i in range(img_batch.shape[0]):
                resized.append(cv2.resize(img_batch[i], (self.image_size, self.image_size)))
            img_batch = np.stack(resized)
        img_batch = img_batch.reshape((img_batch.shape[0], 1, self.image_size, self.image_size))
        img_batch = np.concatenate([img_batch, img_batch, img_batch], axis=1)

        if self.color:
            img_batch[img_batch >= 0.5] = 1
            img_batch[img_batch < 0.5] = 0
            for i in range(img_batch.shape[0]):
                x_c = np.random.randint(0, self.lena.shape[0] - self.image_size)
                y_c = np.random.randint(0, self.lena.shape[1] - self.image_size)
                img = self.lena[x_c:x_c+self.image_size, y_c:y_c+self.image_size]
                img = np.array(img).transpose((2, 0, 1))/255.0
                for j in range(3):
                    img[j, :, :] = (img[j, :, :] + np.random.uniform(0, 1))/2.0
                img[img_batch[i, :, :, :] == 1] = 1 - img[img_batch[i, :, :, :] == 1]
                img_batch[i, :, :, :] = img

        if self.rescale:
            img_batch = img_batch * 2 - 1
        img_batch = img_batch.transpose((0, 2, 3, 1))
        return img_batch

    def benchmark_batch(self):
        idx = np.random.choice(len(self.x), self.batch_size, replace=False)
        img_batch = self.x[idx]
        label_batch = self.y[idx]
        if self.aug:
            img_batch = self._aug_batch(img_batch)
        label_batch = label_batch.reshape((-1, 1))
        return img_batch, label_batch

In [9]:
# #train loop
# dh_train = DataHandler(64, 4, predict_terms=4, image_size=64, color=True, rescale=True, aug=True, is_training=True, method='cpc')
# dh_test = DataHandler(64, 4, predict_terms=4, image_size=64, color=True, rescale=True, aug=True, is_training=False, method='cpc')
# accuracy_metric_train = tf.keras.metrics.BinaryAccuracy()
# loss_metric_train = tf.keras.metrics.BinaryCrossentropy()
# accuracy_metric_test = tf.keras.metrics.BinaryAccuracy()
# loss_metric_test = tf.keras.metrics.BinaryCrossentropy()
# cpc = CPCModel(code_size=128, predict_terms=4, terms=4, units=256, image_size=64, channels=3)
# optim = tf.keras.optimizers.Adam(1e-3)
# cb = [tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=1/3, patience=2, min_lr=1e-4),
#       tf.keras.callbacks.ModelCheckpoint('weights/weights.{epoch:02d}-{val_binary_accuracy:.2f}.cpkt',
#                                           monitor='val_binary_accuracy', save_best_only=True, save_weights_only=True),
#       tf.keras.callbacks.EarlyStopping(monitor='val_binary_accuracy', patience=3),
#       tf.keras.callbacks.TensorBoard()]
# cpc.compile(optimizer=optim, loss='binary_crossentropy', metrics=['binary_accuracy'])
# cpc.fit(x=dh_train, epochs=10, validation_data=dh_test, steps_per_epoch=60000//64, validation_steps=10000//64, callbacks=cb)


In [10]:
#%tensorboard --logdir {logs_base_dir}

In [11]:
# train loop params


episodes = 200
episode_steps = 1000
buffer_size = 100000
batch_size = 16

# pybullet setup
env = gym.make('HalfCheetahBulletEnv-v0')
env.render(mode = 'human')
env._max_episode_steps = episode_steps





In [12]:
writer = tf.summary.create_file_writer(log_dir)
writer_reward = tf.summary.create_file_writer(reward_dir)

#get spaces
state_space = env.observation_space.shape[0]
action_space = env.action_space.shape[0]
print(state_space,action_space)

26 6


In [None]:
state = env.reset()
sac = SAC(action_space=action_space,
          state_space=state_space,
          capacity = buffer_size,
          batch = batch_size,
          tau = 0.999,
          gamma = 0.99,
          actor_lr = 0.0001,
          critic_lr = 0.001,
          variance = 0.2)

#fill replay buffer
#env._max_episode_steps = buffer_size
#sac.replay_buffer.fill_buffer(buffer_size, state, episode_steps) # self,timesteps,state,prev_timesteps
#env._max_episode_steps = episode_steps


env = gym.wrappers.Monitor(env, "baseline_training", video_callable=lambda episode: True, force="true")
state = env.reset()

for episode in range(episodes):
    sumreward = 0
    for step in range(episode_steps):
        #print(observation)
        print('t: ',step, ' :episode: ',episode)
        #print('state: ',state)
        
        # get action
        state = tf.reshape(state,(1,1,state_space)) #,dtype='float32')
        #print(state)
        tensor_noisy_action = sac.actor(state)+sac.actor.continous_noise()
        #tensor_action = tf.clip_by_value(tensor_action, clip_value_min=-1.0, clip_value_max=1.0)

        noisy_action = tensor_noisy_action[0][0]
        #print('action: ',action)
        
        #get loss
        #q_loss = sac.critic(state,tensor_action)
        
        
        # execute action
        next_state, reward, done, info = env.step(noisy_action)
        sumreward += reward

        # store transitions
        sac.store_replay(state,next_state,noisy_action,reward,done)
        
        #print('state: ',state)
        #print('next_state: ',next_state)
        #print('action: ',action)
        #print('reward: ',reward)

        #sample minibatch from data
        states_i,next_states_i,actions_i,rewards_i,terminal_i = sac.replay_buffer.sample()
        
        #set labels y_i
        y = sac.set_labels(states_i,next_states_i,actions_i,rewards_i,terminal_i)
        #print('y: ',y)
        
        # update critic net
        q_loss = sac.critic.update(states_i, actions_i, y)

        print('q_loss: ', q_loss.numpy())
        with writer.as_default():
            tf.summary.scalar('Squared QLosses (qtarget - qval)^2', q_loss[0][0][0].numpy(),
                              step=episode * episode_steps + step + 1)
        
        #losses[episode*timesteps + t] = loss
        #losses[i_episode*timesteps+] = history.history
        
        #update actor net
        sac.actor.update(states_i, sac.actor(states_i)) #actions)
        #print('weight check: ',rl.actor.get_weights(),'\n')
        
        #update target nets
        sac.update_target_weights()
        
        state = next_state
        if done:
            state = env.reset()
            #rewards[episode] = sumreward
            #sac.save(base_dir+'/baseline_model')
            print("Episode {} finished after {} timesteps with average reward {}".format(episode,step+1,sumreward))
            with writer_reward.as_default():
                tf.summary.scalar('Episode sum reward', sumreward,step=episode)
            break
print('done') 
sac.save(base_dir+'/baseline_model')

t:  0  :episode:  0
q_loss:  [[[0.51214576]]]
t:  1  :episode:  0
q_loss:  [[[0.32530105]]]
t:  2  :episode:  0
q_loss:  [[[0.19346547]]]
t:  3  :episode:  0
q_loss:  [[[0.2349267]]]
t:  4  :episode:  0
q_loss:  [[[0.15801728]]]
t:  5  :episode:  0
q_loss:  [[[0.09860212]]]
t:  6  :episode:  0
q_loss:  [[[0.1298646]]]
t:  7  :episode:  0
q_loss:  [[[0.15899816]]]
t:  8  :episode:  0
q_loss:  [[[0.18582213]]]
t:  9  :episode:  0
q_loss:  [[[0.17287856]]]
t:  10  :episode:  0
q_loss:  [[[0.14272122]]]
t:  11  :episode:  0
q_loss:  [[[0.11208106]]]
t:  12  :episode:  0
q_loss:  [[[0.0888068]]]
t:  13  :episode:  0
q_loss:  [[[0.07490087]]]
t:  14  :episode:  0
q_loss:  [[[0.06721821]]]
t:  15  :episode:  0
q_loss:  [[[0.47394645]]]
t:  16  :episode:  0
q_loss:  [[[0.4625653]]]
t:  17  :episode:  0
q_loss:  [[[0.8621845]]]
t:  18  :episode:  0
q_loss:  [[[0.96047574]]]
t:  19  :episode:  0
q_loss:  [[[1.218885]]]
t:  20  :episode:  0
q_loss:  [[[1.167798]]]
t:  21  :episode:  0
q_loss:  [[

q_loss:  [[[0.1136747]]]
t:  177  :episode:  0
q_loss:  [[[0.06861548]]]
t:  178  :episode:  0
q_loss:  [[[0.49416927]]]
t:  179  :episode:  0
q_loss:  [[[0.1210062]]]
t:  180  :episode:  0
q_loss:  [[[0.04252842]]]
t:  181  :episode:  0
q_loss:  [[[0.04372288]]]
t:  182  :episode:  0
q_loss:  [[[0.14988744]]]
t:  183  :episode:  0
q_loss:  [[[0.18525183]]]
t:  184  :episode:  0
q_loss:  [[[0.301201]]]
t:  185  :episode:  0
q_loss:  [[[0.2611261]]]
t:  186  :episode:  0
q_loss:  [[[0.13142455]]]
t:  187  :episode:  0
q_loss:  [[[0.05618998]]]
t:  188  :episode:  0
q_loss:  [[[0.13297567]]]
t:  189  :episode:  0
q_loss:  [[[0.30005333]]]
t:  190  :episode:  0
q_loss:  [[[0.4395577]]]
t:  191  :episode:  0
q_loss:  [[[0.3201246]]]
t:  192  :episode:  0
q_loss:  [[[0.23862687]]]
t:  193  :episode:  0
q_loss:  [[[0.17535277]]]
t:  194  :episode:  0
q_loss:  [[[0.08263938]]]
t:  195  :episode:  0
q_loss:  [[[0.06569918]]]
t:  196  :episode:  0
q_loss:  [[[0.29899076]]]
t:  197  :episode:  0

q_loss:  [[[0.18571508]]]
t:  351  :episode:  0
q_loss:  [[[0.08135073]]]
t:  352  :episode:  0
q_loss:  [[[0.24254662]]]
t:  353  :episode:  0
q_loss:  [[[0.12684877]]]
t:  354  :episode:  0
q_loss:  [[[0.39597043]]]
t:  355  :episode:  0
q_loss:  [[[0.45330146]]]
t:  356  :episode:  0
q_loss:  [[[0.24661848]]]
t:  357  :episode:  0
q_loss:  [[[0.07059251]]]
t:  358  :episode:  0
q_loss:  [[[0.05086918]]]
t:  359  :episode:  0
q_loss:  [[[0.3070537]]]
t:  360  :episode:  0
q_loss:  [[[0.0923961]]]
t:  361  :episode:  0
q_loss:  [[[0.17109261]]]
t:  362  :episode:  0
q_loss:  [[[0.19715388]]]
t:  363  :episode:  0
q_loss:  [[[0.31088287]]]
t:  364  :episode:  0
q_loss:  [[[0.21144226]]]
t:  365  :episode:  0
q_loss:  [[[0.25664216]]]
t:  366  :episode:  0
q_loss:  [[[0.24182035]]]
t:  367  :episode:  0
q_loss:  [[[0.17585957]]]
t:  368  :episode:  0
q_loss:  [[[0.29477245]]]
t:  369  :episode:  0
q_loss:  [[[0.08915579]]]
t:  370  :episode:  0
q_loss:  [[[0.04178256]]]
t:  371  :episod

q_loss:  [[[0.11917534]]]
t:  524  :episode:  0
q_loss:  [[[0.08146324]]]
t:  525  :episode:  0
q_loss:  [[[0.05423318]]]
t:  526  :episode:  0
q_loss:  [[[0.24237628]]]
t:  527  :episode:  0
q_loss:  [[[0.09645288]]]
t:  528  :episode:  0
q_loss:  [[[0.28715998]]]
t:  529  :episode:  0
q_loss:  [[[0.06887515]]]
t:  530  :episode:  0
q_loss:  [[[0.17986822]]]
t:  531  :episode:  0
q_loss:  [[[0.098179]]]
t:  532  :episode:  0
q_loss:  [[[0.07066942]]]
t:  533  :episode:  0
q_loss:  [[[0.14780523]]]
t:  534  :episode:  0
q_loss:  [[[0.15473802]]]
t:  535  :episode:  0
q_loss:  [[[0.03879286]]]
t:  536  :episode:  0
q_loss:  [[[0.09935959]]]
t:  537  :episode:  0
q_loss:  [[[0.24300298]]]
t:  538  :episode:  0
q_loss:  [[[0.040072]]]
t:  539  :episode:  0
q_loss:  [[[0.30939078]]]
t:  540  :episode:  0
q_loss:  [[[0.05741322]]]
t:  541  :episode:  0
q_loss:  [[[0.2449966]]]
t:  542  :episode:  0
q_loss:  [[[0.04931071]]]
t:  543  :episode:  0
q_loss:  [[[0.06360967]]]
t:  544  :episode: 

t:  697  :episode:  0
q_loss:  [[[0.12697116]]]
t:  698  :episode:  0
q_loss:  [[[0.36982906]]]
t:  699  :episode:  0
q_loss:  [[[0.06739365]]]
t:  700  :episode:  0
q_loss:  [[[0.17727895]]]
t:  701  :episode:  0
q_loss:  [[[0.04546996]]]
t:  702  :episode:  0
q_loss:  [[[0.33565098]]]
t:  703  :episode:  0
q_loss:  [[[0.14910677]]]
t:  704  :episode:  0
q_loss:  [[[0.2802129]]]
t:  705  :episode:  0
q_loss:  [[[0.11113691]]]
t:  706  :episode:  0
q_loss:  [[[0.29497862]]]
t:  707  :episode:  0
q_loss:  [[[0.20365383]]]
t:  708  :episode:  0
q_loss:  [[[0.4209993]]]
t:  709  :episode:  0
q_loss:  [[[0.20478722]]]
t:  710  :episode:  0
q_loss:  [[[0.08837675]]]
t:  711  :episode:  0
q_loss:  [[[0.179288]]]
t:  712  :episode:  0
q_loss:  [[[0.4719761]]]
t:  713  :episode:  0
q_loss:  [[[0.03572735]]]
t:  714  :episode:  0
q_loss:  [[[0.11410885]]]
t:  715  :episode:  0
q_loss:  [[[0.14111272]]]
t:  716  :episode:  0
q_loss:  [[[0.18828443]]]
t:  717  :episode:  0
q_loss:  [[[0.2917118]]

t:  870  :episode:  0
q_loss:  [[[0.40417144]]]
t:  871  :episode:  0
q_loss:  [[[0.5676562]]]
t:  872  :episode:  0
q_loss:  [[[0.43989778]]]
t:  873  :episode:  0
q_loss:  [[[0.5272615]]]
t:  874  :episode:  0
q_loss:  [[[0.80771196]]]
t:  875  :episode:  0
q_loss:  [[[0.28876722]]]
t:  876  :episode:  0
q_loss:  [[[0.4433344]]]
t:  877  :episode:  0
q_loss:  [[[0.93179744]]]
t:  878  :episode:  0
q_loss:  [[[4.5246863]]]
t:  879  :episode:  0
q_loss:  [[[0.4296605]]]
t:  880  :episode:  0
q_loss:  [[[2.7408109]]]
t:  881  :episode:  0
q_loss:  [[[2.2193964]]]
t:  882  :episode:  0
q_loss:  [[[0.5734664]]]
t:  883  :episode:  0
q_loss:  [[[1.1198556]]]
t:  884  :episode:  0
q_loss:  [[[0.7860615]]]
t:  885  :episode:  0
q_loss:  [[[0.1394102]]]
t:  886  :episode:  0
q_loss:  [[[0.62225306]]]
t:  887  :episode:  0
q_loss:  [[[0.64317]]]
t:  888  :episode:  0
q_loss:  [[[0.35608506]]]
t:  889  :episode:  0
q_loss:  [[[0.26189673]]]
t:  890  :episode:  0
q_loss:  [[[0.5914463]]]
t:  891

q_loss:  [[[5.156314]]]
t:  45  :episode:  1
q_loss:  [[[12.850725]]]
t:  46  :episode:  1
q_loss:  [[[1.3968465]]]
t:  47  :episode:  1
q_loss:  [[[1.9760733]]]
t:  48  :episode:  1
q_loss:  [[[14.062187]]]
t:  49  :episode:  1
q_loss:  [[[21.002018]]]
t:  50  :episode:  1
q_loss:  [[[8.87058]]]
t:  51  :episode:  1
q_loss:  [[[4.438919]]]
t:  52  :episode:  1
q_loss:  [[[3.6484988]]]
t:  53  :episode:  1
q_loss:  [[[2.3930821]]]
t:  54  :episode:  1
q_loss:  [[[5.445767]]]
t:  55  :episode:  1
q_loss:  [[[3.4786744]]]
t:  56  :episode:  1
q_loss:  [[[76.77724]]]
t:  57  :episode:  1
q_loss:  [[[1.3711998]]]
t:  58  :episode:  1
q_loss:  [[[5.7507524]]]
t:  59  :episode:  1
q_loss:  [[[2.6085715]]]
t:  60  :episode:  1
q_loss:  [[[118.60094]]]
t:  61  :episode:  1
q_loss:  [[[1.0640218]]]
t:  62  :episode:  1
q_loss:  [[[43.457993]]]
t:  63  :episode:  1
q_loss:  [[[4.0612545]]]
t:  64  :episode:  1
q_loss:  [[[15.90232]]]
t:  65  :episode:  1
q_loss:  [[[5.78113]]]
t:  66  :episode: 

q_loss:  [[[6.272296]]]
t:  223  :episode:  1
q_loss:  [[[3.1622462]]]
t:  224  :episode:  1
q_loss:  [[[2.83368]]]
t:  225  :episode:  1
q_loss:  [[[0.67198944]]]
t:  226  :episode:  1
q_loss:  [[[18.823359]]]
t:  227  :episode:  1
q_loss:  [[[3.7525167]]]
t:  228  :episode:  1
q_loss:  [[[0.70297396]]]
t:  229  :episode:  1
q_loss:  [[[3.479178]]]
t:  230  :episode:  1
q_loss:  [[[2.8217375]]]
t:  231  :episode:  1
q_loss:  [[[7.8781915]]]
t:  232  :episode:  1
q_loss:  [[[4.1015596]]]
t:  233  :episode:  1
q_loss:  [[[1.2546742]]]
t:  234  :episode:  1
q_loss:  [[[1.1565366]]]
t:  235  :episode:  1
q_loss:  [[[1.7441208]]]
t:  236  :episode:  1
q_loss:  [[[61.538654]]]
t:  237  :episode:  1
q_loss:  [[[6.711359]]]
t:  238  :episode:  1
q_loss:  [[[0.6709963]]]
t:  239  :episode:  1
q_loss:  [[[3.056792]]]
t:  240  :episode:  1
q_loss:  [[[2.2289307]]]
t:  241  :episode:  1
q_loss:  [[[4.0029902]]]
t:  242  :episode:  1
q_loss:  [[[49.86375]]]
t:  243  :episode:  1
q_loss:  [[[3.8996

t:  398  :episode:  1
q_loss:  [[[0.90266305]]]
t:  399  :episode:  1
q_loss:  [[[4.0427923]]]
t:  400  :episode:  1
q_loss:  [[[6.7602386]]]
t:  401  :episode:  1
q_loss:  [[[2.782588]]]
t:  402  :episode:  1
q_loss:  [[[0.9536701]]]
t:  403  :episode:  1
q_loss:  [[[1.2498293]]]
t:  404  :episode:  1
q_loss:  [[[9.341126]]]
t:  405  :episode:  1
q_loss:  [[[1.2558991]]]
t:  406  :episode:  1
q_loss:  [[[2.7359335]]]
t:  407  :episode:  1
q_loss:  [[[1.8276253]]]
t:  408  :episode:  1
q_loss:  [[[4.802902]]]
t:  409  :episode:  1
q_loss:  [[[1.8129337]]]
t:  410  :episode:  1
q_loss:  [[[0.45803678]]]
t:  411  :episode:  1
q_loss:  [[[1.6446155]]]
t:  412  :episode:  1
q_loss:  [[[1.3370475]]]
t:  413  :episode:  1
q_loss:  [[[5.9385867]]]
t:  414  :episode:  1
q_loss:  [[[5.775182]]]
t:  415  :episode:  1
q_loss:  [[[3.1966753]]]
t:  416  :episode:  1
q_loss:  [[[1.981069]]]
t:  417  :episode:  1
q_loss:  [[[6.9583097]]]
t:  418  :episode:  1
q_loss:  [[[2.6222093]]]
t:  419  :episod

t:  573  :episode:  1
q_loss:  [[[1.4481509]]]
t:  574  :episode:  1
q_loss:  [[[0.752648]]]
t:  575  :episode:  1
q_loss:  [[[1.9901998]]]
t:  576  :episode:  1
q_loss:  [[[1.9604527]]]
t:  577  :episode:  1
q_loss:  [[[1.2812784]]]
t:  578  :episode:  1
q_loss:  [[[0.31887704]]]
t:  579  :episode:  1
q_loss:  [[[0.97471815]]]
t:  580  :episode:  1
q_loss:  [[[11.037732]]]
t:  581  :episode:  1
q_loss:  [[[0.46378195]]]
t:  582  :episode:  1
q_loss:  [[[2.0957701]]]
t:  583  :episode:  1
q_loss:  [[[0.6479617]]]
t:  584  :episode:  1
q_loss:  [[[3.751173]]]
t:  585  :episode:  1
q_loss:  [[[1.9971035]]]
t:  586  :episode:  1
q_loss:  [[[1.2433987]]]
t:  587  :episode:  1
q_loss:  [[[1.2507343]]]
t:  588  :episode:  1
q_loss:  [[[0.4604906]]]
t:  589  :episode:  1
q_loss:  [[[0.9531274]]]
t:  590  :episode:  1
q_loss:  [[[0.78274286]]]
t:  591  :episode:  1
q_loss:  [[[0.6308057]]]
t:  592  :episode:  1
q_loss:  [[[0.86453927]]]
t:  593  :episode:  1
q_loss:  [[[4.6217346]]]
t:  594  :

q_loss:  [[[0.69444704]]]
t:  748  :episode:  1
q_loss:  [[[0.83184403]]]
t:  749  :episode:  1
q_loss:  [[[2.084481]]]
t:  750  :episode:  1
q_loss:  [[[0.3874405]]]
t:  751  :episode:  1
q_loss:  [[[2.4955485]]]
t:  752  :episode:  1
q_loss:  [[[0.9639109]]]
t:  753  :episode:  1
q_loss:  [[[0.999292]]]
t:  754  :episode:  1
q_loss:  [[[0.42526028]]]
t:  755  :episode:  1
q_loss:  [[[0.44839412]]]
t:  756  :episode:  1
q_loss:  [[[0.49013752]]]
t:  757  :episode:  1
q_loss:  [[[9.420637]]]
t:  758  :episode:  1
q_loss:  [[[2.889222]]]
t:  759  :episode:  1
q_loss:  [[[0.3349394]]]
t:  760  :episode:  1
q_loss:  [[[0.6056597]]]
t:  761  :episode:  1
q_loss:  [[[0.67584085]]]
t:  762  :episode:  1
q_loss:  [[[0.50129557]]]
t:  763  :episode:  1
q_loss:  [[[3.6582043]]]
t:  764  :episode:  1
q_loss:  [[[0.4578827]]]
t:  765  :episode:  1
q_loss:  [[[0.14969234]]]
t:  766  :episode:  1
q_loss:  [[[0.9830275]]]
t:  767  :episode:  1
q_loss:  [[[0.3654333]]]
t:  768  :episode:  1
q_loss:  

t:  923  :episode:  1
q_loss:  [[[0.46889243]]]
t:  924  :episode:  1
q_loss:  [[[0.53468543]]]
t:  925  :episode:  1
q_loss:  [[[0.6563336]]]
t:  926  :episode:  1
q_loss:  [[[0.7545199]]]
t:  927  :episode:  1
q_loss:  [[[1.5621645]]]
t:  928  :episode:  1
q_loss:  [[[0.3382233]]]
t:  929  :episode:  1
q_loss:  [[[0.52081025]]]
t:  930  :episode:  1
q_loss:  [[[1.0019511]]]
t:  931  :episode:  1
q_loss:  [[[1.3320819]]]
t:  932  :episode:  1
q_loss:  [[[2.0202274]]]
t:  933  :episode:  1
q_loss:  [[[1.2954049]]]
t:  934  :episode:  1
q_loss:  [[[2.932711]]]
t:  935  :episode:  1
q_loss:  [[[5.297164]]]
t:  936  :episode:  1
q_loss:  [[[0.8311426]]]
t:  937  :episode:  1
q_loss:  [[[0.6452585]]]
t:  938  :episode:  1
q_loss:  [[[0.8336079]]]
t:  939  :episode:  1
q_loss:  [[[0.31930465]]]
t:  940  :episode:  1
q_loss:  [[[3.168674]]]
t:  941  :episode:  1
q_loss:  [[[0.46174547]]]
t:  942  :episode:  1
q_loss:  [[[0.37179804]]]
t:  943  :episode:  1
q_loss:  [[[1.148076]]]
t:  944  :e

q_loss:  [[[1.5524838]]]
t:  98  :episode:  2
q_loss:  [[[1.8754824]]]
t:  99  :episode:  2
q_loss:  [[[0.22850484]]]
t:  100  :episode:  2
q_loss:  [[[0.35034856]]]
t:  101  :episode:  2
q_loss:  [[[0.578451]]]
t:  102  :episode:  2
q_loss:  [[[0.81392586]]]
t:  103  :episode:  2
q_loss:  [[[6.9777045]]]
t:  104  :episode:  2
q_loss:  [[[1.3312172]]]
t:  105  :episode:  2
q_loss:  [[[0.5166024]]]
t:  106  :episode:  2
q_loss:  [[[0.7178762]]]
t:  107  :episode:  2
q_loss:  [[[0.61337554]]]
t:  108  :episode:  2
q_loss:  [[[3.1213038]]]
t:  109  :episode:  2
q_loss:  [[[1.237303]]]
t:  110  :episode:  2
q_loss:  [[[0.4594041]]]
t:  111  :episode:  2
q_loss:  [[[0.41326565]]]
t:  112  :episode:  2
q_loss:  [[[0.21204653]]]
t:  113  :episode:  2
q_loss:  [[[0.6998254]]]
t:  114  :episode:  2
q_loss:  [[[0.15324102]]]
t:  115  :episode:  2
q_loss:  [[[2.0534592]]]
t:  116  :episode:  2
q_loss:  [[[2.986724]]]
t:  117  :episode:  2
q_loss:  [[[0.9675043]]]
t:  118  :episode:  2
q_loss:  [[

t:  272  :episode:  2
q_loss:  [[[0.562871]]]
t:  273  :episode:  2
q_loss:  [[[2.2328935]]]
t:  274  :episode:  2
q_loss:  [[[1.4881835]]]
t:  275  :episode:  2
q_loss:  [[[0.70942634]]]
t:  276  :episode:  2
q_loss:  [[[0.5926347]]]
t:  277  :episode:  2
q_loss:  [[[0.7035271]]]
t:  278  :episode:  2
q_loss:  [[[1.0404556]]]
t:  279  :episode:  2
q_loss:  [[[2.7812161]]]
t:  280  :episode:  2
q_loss:  [[[0.71558535]]]
t:  281  :episode:  2
q_loss:  [[[0.35186812]]]
t:  282  :episode:  2
q_loss:  [[[0.8477738]]]
t:  283  :episode:  2
q_loss:  [[[0.5283687]]]
t:  284  :episode:  2
q_loss:  [[[0.42321798]]]
t:  285  :episode:  2
q_loss:  [[[1.7312114]]]
t:  286  :episode:  2
q_loss:  [[[2.0266237]]]
t:  287  :episode:  2
q_loss:  [[[0.61920166]]]
t:  288  :episode:  2
q_loss:  [[[0.3714021]]]
t:  289  :episode:  2
q_loss:  [[[2.701519]]]
t:  290  :episode:  2
q_loss:  [[[0.67727816]]]
t:  291  :episode:  2
q_loss:  [[[0.50406337]]]
t:  292  :episode:  2
q_loss:  [[[0.5233905]]]
t:  293 

t:  446  :episode:  2
q_loss:  [[[0.47715148]]]
t:  447  :episode:  2
q_loss:  [[[0.21755552]]]
t:  448  :episode:  2
q_loss:  [[[0.1973567]]]
t:  449  :episode:  2
q_loss:  [[[1.490424]]]
t:  450  :episode:  2
q_loss:  [[[1.3622932]]]
t:  451  :episode:  2
q_loss:  [[[0.3678741]]]
t:  452  :episode:  2
q_loss:  [[[0.2310694]]]
t:  453  :episode:  2
q_loss:  [[[1.5540665]]]
t:  454  :episode:  2
q_loss:  [[[0.2835977]]]
t:  455  :episode:  2
q_loss:  [[[0.22649688]]]
t:  456  :episode:  2
q_loss:  [[[0.1760207]]]
t:  457  :episode:  2
q_loss:  [[[2.0268648]]]
t:  458  :episode:  2
q_loss:  [[[0.94701046]]]
t:  459  :episode:  2
q_loss:  [[[0.79673016]]]
t:  460  :episode:  2
q_loss:  [[[0.55471575]]]
t:  461  :episode:  2
q_loss:  [[[2.2495413]]]
t:  462  :episode:  2
q_loss:  [[[0.5027889]]]
t:  463  :episode:  2
q_loss:  [[[0.4292447]]]
t:  464  :episode:  2
q_loss:  [[[0.41375816]]]
t:  465  :episode:  2
q_loss:  [[[0.263902]]]
t:  466  :episode:  2
q_loss:  [[[4.769446]]]
t:  467  

q_loss:  [[[0.2113404]]]
t:  620  :episode:  2
q_loss:  [[[0.24378602]]]
t:  621  :episode:  2
q_loss:  [[[1.0450853]]]
t:  622  :episode:  2
q_loss:  [[[0.96290725]]]
t:  623  :episode:  2
q_loss:  [[[2.2342496]]]
t:  624  :episode:  2
q_loss:  [[[0.81066775]]]
t:  625  :episode:  2
q_loss:  [[[0.7316923]]]
t:  626  :episode:  2
q_loss:  [[[4.774188]]]
t:  627  :episode:  2
q_loss:  [[[3.0694957]]]
t:  628  :episode:  2
q_loss:  [[[0.9275807]]]
t:  629  :episode:  2
q_loss:  [[[0.37752092]]]
t:  630  :episode:  2
q_loss:  [[[0.35836387]]]
t:  631  :episode:  2
q_loss:  [[[0.43541205]]]
t:  632  :episode:  2
q_loss:  [[[0.82142043]]]
t:  633  :episode:  2
q_loss:  [[[0.59729797]]]
t:  634  :episode:  2
q_loss:  [[[0.33494502]]]
t:  635  :episode:  2
q_loss:  [[[1.108144]]]
t:  636  :episode:  2
q_loss:  [[[3.3432524]]]
t:  637  :episode:  2
q_loss:  [[[0.9776012]]]
t:  638  :episode:  2
q_loss:  [[[0.87208664]]]
t:  639  :episode:  2
q_loss:  [[[3.453167]]]
t:  640  :episode:  2
q_loss

t:  793  :episode:  2
q_loss:  [[[1.0199767]]]
t:  794  :episode:  2
q_loss:  [[[1.2550116]]]
t:  795  :episode:  2
q_loss:  [[[1.1119745]]]
t:  796  :episode:  2
q_loss:  [[[0.5063766]]]
t:  797  :episode:  2
q_loss:  [[[0.29528576]]]
t:  798  :episode:  2
q_loss:  [[[1.027869]]]
t:  799  :episode:  2
q_loss:  [[[0.51469684]]]
t:  800  :episode:  2
q_loss:  [[[2.6798065]]]
t:  801  :episode:  2
q_loss:  [[[0.3170934]]]
t:  802  :episode:  2
q_loss:  [[[0.6868332]]]
t:  803  :episode:  2
q_loss:  [[[0.42152017]]]
t:  804  :episode:  2
q_loss:  [[[0.22934446]]]
t:  805  :episode:  2
q_loss:  [[[1.2708621]]]
t:  806  :episode:  2
q_loss:  [[[0.25941446]]]
t:  807  :episode:  2
q_loss:  [[[0.76536596]]]
t:  808  :episode:  2
q_loss:  [[[0.25206095]]]
t:  809  :episode:  2
q_loss:  [[[0.72731054]]]
t:  810  :episode:  2
q_loss:  [[[3.1586149]]]
t:  811  :episode:  2
q_loss:  [[[0.71527255]]]
t:  812  :episode:  2
q_loss:  [[[1.3990862]]]
t:  813  :episode:  2
q_loss:  [[[1.1553894]]]
t:  8

q_loss:  [[[0.5317212]]]
t:  968  :episode:  2
q_loss:  [[[0.8258068]]]
t:  969  :episode:  2
q_loss:  [[[0.43502808]]]
t:  970  :episode:  2
q_loss:  [[[2.7437184]]]
t:  971  :episode:  2
q_loss:  [[[1.2044343]]]
t:  972  :episode:  2
q_loss:  [[[0.7967684]]]
t:  973  :episode:  2
q_loss:  [[[0.8654299]]]
t:  974  :episode:  2
q_loss:  [[[0.53963333]]]
t:  975  :episode:  2
q_loss:  [[[0.36871073]]]
t:  976  :episode:  2
q_loss:  [[[1.8058373]]]
t:  977  :episode:  2
q_loss:  [[[0.3740472]]]
t:  978  :episode:  2
q_loss:  [[[0.9687015]]]
t:  979  :episode:  2
q_loss:  [[[0.720708]]]
t:  980  :episode:  2
q_loss:  [[[0.9525094]]]
t:  981  :episode:  2
q_loss:  [[[0.6017586]]]
t:  982  :episode:  2
q_loss:  [[[0.69282603]]]
t:  983  :episode:  2
q_loss:  [[[1.2682024]]]
t:  984  :episode:  2
q_loss:  [[[0.09085789]]]
t:  985  :episode:  2
q_loss:  [[[4.1512175]]]
t:  986  :episode:  2
q_loss:  [[[2.4689062]]]
t:  987  :episode:  2
q_loss:  [[[0.5754227]]]
t:  988  :episode:  2
q_loss:  

t:  144  :episode:  3
q_loss:  [[[1.2083443]]]
t:  145  :episode:  3
q_loss:  [[[0.39309502]]]
t:  146  :episode:  3
q_loss:  [[[1.3566042]]]
t:  147  :episode:  3
q_loss:  [[[0.7158628]]]
t:  148  :episode:  3
q_loss:  [[[0.24634685]]]
t:  149  :episode:  3
q_loss:  [[[1.3739152]]]
t:  150  :episode:  3
q_loss:  [[[1.0990367]]]
t:  151  :episode:  3
q_loss:  [[[2.9045076]]]
t:  152  :episode:  3
q_loss:  [[[0.45721325]]]
t:  153  :episode:  3
q_loss:  [[[1.1210942]]]
t:  154  :episode:  3
q_loss:  [[[1.2744905]]]
t:  155  :episode:  3
q_loss:  [[[0.96669984]]]
t:  156  :episode:  3
q_loss:  [[[1.5812966]]]
t:  157  :episode:  3
q_loss:  [[[2.6781616]]]
t:  158  :episode:  3
q_loss:  [[[0.48458493]]]
t:  159  :episode:  3
q_loss:  [[[0.857517]]]
t:  160  :episode:  3
q_loss:  [[[0.48056158]]]
t:  161  :episode:  3
q_loss:  [[[2.0859096]]]
t:  162  :episode:  3
q_loss:  [[[1.0512427]]]
t:  163  :episode:  3
q_loss:  [[[3.5836527]]]
t:  164  :episode:  3
q_loss:  [[[2.942791]]]
t:  165  

t:  319  :episode:  3
q_loss:  [[[2.4260573]]]
t:  320  :episode:  3
q_loss:  [[[1.3512213]]]
t:  321  :episode:  3
q_loss:  [[[0.8914945]]]
t:  322  :episode:  3
q_loss:  [[[1.4752753]]]
t:  323  :episode:  3
q_loss:  [[[1.5029967]]]
t:  324  :episode:  3
q_loss:  [[[0.5680517]]]
t:  325  :episode:  3
q_loss:  [[[2.5655193]]]
t:  326  :episode:  3
q_loss:  [[[0.85277313]]]
t:  327  :episode:  3
q_loss:  [[[0.8379841]]]
t:  328  :episode:  3
q_loss:  [[[0.74537873]]]
t:  329  :episode:  3
q_loss:  [[[1.4736657]]]
t:  330  :episode:  3
q_loss:  [[[0.75257015]]]
t:  331  :episode:  3
q_loss:  [[[2.2757254]]]
t:  332  :episode:  3
q_loss:  [[[6.516586]]]
t:  333  :episode:  3
q_loss:  [[[2.0281131]]]
t:  334  :episode:  3
q_loss:  [[[0.91943014]]]
t:  335  :episode:  3
q_loss:  [[[1.267441]]]
t:  336  :episode:  3
q_loss:  [[[1.905718]]]
t:  337  :episode:  3
q_loss:  [[[1.1961014]]]
t:  338  :episode:  3
q_loss:  [[[0.6188631]]]
t:  339  :episode:  3
q_loss:  [[[1.2126956]]]
t:  340  :ep

t:  495  :episode:  3
q_loss:  [[[0.56734693]]]
t:  496  :episode:  3
q_loss:  [[[0.2631921]]]
t:  497  :episode:  3
q_loss:  [[[2.0856152]]]
t:  498  :episode:  3
q_loss:  [[[1.047912]]]
t:  499  :episode:  3
q_loss:  [[[6.531452]]]
t:  500  :episode:  3
q_loss:  [[[2.0113626]]]
t:  501  :episode:  3
q_loss:  [[[1.517947]]]
t:  502  :episode:  3
q_loss:  [[[0.8887844]]]
t:  503  :episode:  3
q_loss:  [[[1.8931596]]]
t:  504  :episode:  3
q_loss:  [[[1.3503556]]]
t:  505  :episode:  3
q_loss:  [[[1.2444396]]]
t:  506  :episode:  3
q_loss:  [[[1.458667]]]
t:  507  :episode:  3
q_loss:  [[[2.3059988]]]
t:  508  :episode:  3
q_loss:  [[[0.7252639]]]
t:  509  :episode:  3
q_loss:  [[[0.8654685]]]
t:  510  :episode:  3
q_loss:  [[[1.0411347]]]
t:  511  :episode:  3
q_loss:  [[[0.8084367]]]
t:  512  :episode:  3
q_loss:  [[[2.6407597]]]
t:  513  :episode:  3
q_loss:  [[[1.9078859]]]
t:  514  :episode:  3
q_loss:  [[[1.3720367]]]
t:  515  :episode:  3
q_loss:  [[[1.2639883]]]
t:  516  :episod

q_loss:  [[[1.2373539]]]
t:  671  :episode:  3
q_loss:  [[[0.35297763]]]
t:  672  :episode:  3
q_loss:  [[[1.8781209]]]
t:  673  :episode:  3
q_loss:  [[[0.3994401]]]
t:  674  :episode:  3
q_loss:  [[[0.44888484]]]
t:  675  :episode:  3
q_loss:  [[[1.9487503]]]
t:  676  :episode:  3
q_loss:  [[[0.7225177]]]
t:  677  :episode:  3
q_loss:  [[[2.2736268]]]
t:  678  :episode:  3
q_loss:  [[[2.6469123]]]
t:  679  :episode:  3
q_loss:  [[[0.75656855]]]
t:  680  :episode:  3
q_loss:  [[[0.8642267]]]
t:  681  :episode:  3
q_loss:  [[[0.48522526]]]
t:  682  :episode:  3
q_loss:  [[[0.6374787]]]
t:  683  :episode:  3
q_loss:  [[[1.2807322]]]
t:  684  :episode:  3
q_loss:  [[[0.80166036]]]
t:  685  :episode:  3
q_loss:  [[[2.4513927]]]
t:  686  :episode:  3
q_loss:  [[[0.36769274]]]
t:  687  :episode:  3
q_loss:  [[[1.6941202]]]
t:  688  :episode:  3
q_loss:  [[[1.2801665]]]
t:  689  :episode:  3
q_loss:  [[[1.3552125]]]
t:  690  :episode:  3
q_loss:  [[[9.352291]]]
t:  691  :episode:  3
q_loss: 

t:  848  :episode:  3
q_loss:  [[[23.633636]]]
t:  849  :episode:  3
q_loss:  [[[104.937996]]]
t:  850  :episode:  3
q_loss:  [[[4.1461678]]]
t:  851  :episode:  3
q_loss:  [[[4.5197396]]]
t:  852  :episode:  3
q_loss:  [[[15.603197]]]
t:  853  :episode:  3
q_loss:  [[[56.61782]]]
t:  854  :episode:  3
q_loss:  [[[39.258316]]]
t:  855  :episode:  3
q_loss:  [[[8.996292]]]
t:  856  :episode:  3
q_loss:  [[[5.3909955]]]
t:  857  :episode:  3
q_loss:  [[[7.222539]]]
t:  858  :episode:  3
q_loss:  [[[2.837669]]]
t:  859  :episode:  3
q_loss:  [[[11.580843]]]
t:  860  :episode:  3
q_loss:  [[[3.8741927]]]
t:  861  :episode:  3
q_loss:  [[[46.68945]]]
t:  862  :episode:  3
q_loss:  [[[74.28692]]]
t:  863  :episode:  3
q_loss:  [[[2.2490258]]]
t:  864  :episode:  3
q_loss:  [[[36.08909]]]
t:  865  :episode:  3
q_loss:  [[[20.496927]]]
t:  866  :episode:  3
q_loss:  [[[10.644844]]]
t:  867  :episode:  3
q_loss:  [[[5.585964]]]
t:  868  :episode:  3
q_loss:  [[[7.1668267]]]
t:  869  :episode:  

q_loss:  [[[13.441891]]]
t:  24  :episode:  4
q_loss:  [[[18.539305]]]
t:  25  :episode:  4
q_loss:  [[[16.317184]]]
t:  26  :episode:  4
q_loss:  [[[3.9815395]]]
t:  27  :episode:  4
q_loss:  [[[4.071918]]]
t:  28  :episode:  4
q_loss:  [[[24.039867]]]
t:  29  :episode:  4
q_loss:  [[[15.209683]]]
t:  30  :episode:  4
q_loss:  [[[8.188509]]]
t:  31  :episode:  4
q_loss:  [[[1.3554382]]]
t:  32  :episode:  4
q_loss:  [[[8.950752]]]
t:  33  :episode:  4
q_loss:  [[[7.971019]]]
t:  34  :episode:  4
q_loss:  [[[3.112435]]]
t:  35  :episode:  4
q_loss:  [[[10.165173]]]
t:  36  :episode:  4
q_loss:  [[[35.466602]]]
t:  37  :episode:  4
q_loss:  [[[10.7504225]]]
t:  38  :episode:  4
q_loss:  [[[2.564857]]]
t:  39  :episode:  4
q_loss:  [[[2.9325008]]]
t:  40  :episode:  4
q_loss:  [[[13.942058]]]
t:  41  :episode:  4
q_loss:  [[[21.070347]]]
t:  42  :episode:  4
q_loss:  [[[5.00883]]]
t:  43  :episode:  4
q_loss:  [[[21.564255]]]
t:  44  :episode:  4
q_loss:  [[[19.410076]]]
t:  45  :episode

t:  203  :episode:  4
q_loss:  [[[16.637135]]]
t:  204  :episode:  4
q_loss:  [[[10.251956]]]
t:  205  :episode:  4
q_loss:  [[[20.907616]]]
t:  206  :episode:  4
q_loss:  [[[14.629458]]]
t:  207  :episode:  4
q_loss:  [[[24.260574]]]
t:  208  :episode:  4
q_loss:  [[[32.049614]]]
t:  209  :episode:  4
q_loss:  [[[11.892496]]]
t:  210  :episode:  4
q_loss:  [[[21.51716]]]
t:  211  :episode:  4
q_loss:  [[[4.9887867]]]
t:  212  :episode:  4
q_loss:  [[[23.782953]]]
t:  213  :episode:  4
q_loss:  [[[26.780102]]]
t:  214  :episode:  4
q_loss:  [[[3.4406235]]]
t:  215  :episode:  4
q_loss:  [[[3.170455]]]
t:  216  :episode:  4
q_loss:  [[[6.8029118]]]
t:  217  :episode:  4
q_loss:  [[[223.85483]]]
t:  218  :episode:  4
q_loss:  [[[9.580337]]]
t:  219  :episode:  4
q_loss:  [[[47.309116]]]
t:  220  :episode:  4
q_loss:  [[[22.464405]]]
t:  221  :episode:  4
q_loss:  [[[28.324282]]]
t:  222  :episode:  4
q_loss:  [[[10.069196]]]
t:  223  :episode:  4
q_loss:  [[[3.7678072]]]
t:  224  :episod

q_loss:  [[[12.309412]]]
t:  382  :episode:  4
q_loss:  [[[4.0708904]]]
t:  383  :episode:  4
q_loss:  [[[6.307342]]]
t:  384  :episode:  4
q_loss:  [[[5.550993]]]
t:  385  :episode:  4
q_loss:  [[[11.025297]]]
t:  386  :episode:  4
q_loss:  [[[5.0502834]]]
t:  387  :episode:  4
q_loss:  [[[12.247541]]]
t:  388  :episode:  4
q_loss:  [[[56.72727]]]
t:  389  :episode:  4
q_loss:  [[[10.470603]]]
t:  390  :episode:  4
q_loss:  [[[5.5319014]]]
t:  391  :episode:  4
q_loss:  [[[4.821091]]]
t:  392  :episode:  4
q_loss:  [[[11.762714]]]
t:  393  :episode:  4
q_loss:  [[[9.973681]]]
t:  394  :episode:  4
q_loss:  [[[2.4771142]]]
t:  395  :episode:  4
q_loss:  [[[15.417667]]]
t:  396  :episode:  4
q_loss:  [[[3.4817774]]]
t:  397  :episode:  4
q_loss:  [[[4.5443225]]]
t:  398  :episode:  4
q_loss:  [[[1.4284344]]]
t:  399  :episode:  4
q_loss:  [[[7.6509914]]]
t:  400  :episode:  4
q_loss:  [[[9.154765]]]
t:  401  :episode:  4
q_loss:  [[[4.4893336]]]
t:  402  :episode:  4
q_loss:  [[[2.31393

t:  558  :episode:  4
q_loss:  [[[4.988783]]]
t:  559  :episode:  4
q_loss:  [[[4.636493]]]
t:  560  :episode:  4
q_loss:  [[[3.520825]]]
t:  561  :episode:  4
q_loss:  [[[8.111865]]]
t:  562  :episode:  4
q_loss:  [[[17.596771]]]
t:  563  :episode:  4
q_loss:  [[[10.549714]]]
t:  564  :episode:  4
q_loss:  [[[9.81332]]]
t:  565  :episode:  4
q_loss:  [[[6.484771]]]
t:  566  :episode:  4
q_loss:  [[[2.6427693]]]
t:  567  :episode:  4
q_loss:  [[[2.9222116]]]
t:  568  :episode:  4
q_loss:  [[[3.4522636]]]
t:  569  :episode:  4
q_loss:  [[[2.3725648]]]
t:  570  :episode:  4
q_loss:  [[[12.578813]]]
t:  571  :episode:  4
q_loss:  [[[204.45743]]]
t:  572  :episode:  4
q_loss:  [[[29.031734]]]
t:  573  :episode:  4
q_loss:  [[[7.4057236]]]
t:  574  :episode:  4
q_loss:  [[[4.8987417]]]
t:  575  :episode:  4
q_loss:  [[[71.30594]]]
t:  576  :episode:  4
q_loss:  [[[13.949988]]]
t:  577  :episode:  4
q_loss:  [[[12.585639]]]
t:  578  :episode:  4
q_loss:  [[[16.174505]]]
t:  579  :episode:  4

t:  736  :episode:  4
q_loss:  [[[5.333859]]]
t:  737  :episode:  4
q_loss:  [[[9.997695]]]
t:  738  :episode:  4
q_loss:  [[[5.4526596]]]
t:  739  :episode:  4
q_loss:  [[[5.044556]]]
t:  740  :episode:  4
q_loss:  [[[12.495682]]]
t:  741  :episode:  4
q_loss:  [[[8.630595]]]
t:  742  :episode:  4
q_loss:  [[[9.8995]]]
t:  743  :episode:  4
q_loss:  [[[4.3162203]]]
t:  744  :episode:  4
q_loss:  [[[8.421635]]]
t:  745  :episode:  4
q_loss:  [[[6.019252]]]
t:  746  :episode:  4
q_loss:  [[[6.389642]]]
t:  747  :episode:  4
q_loss:  [[[6.303412]]]
t:  748  :episode:  4
q_loss:  [[[3.117579]]]
t:  749  :episode:  4
q_loss:  [[[3.6340044]]]
t:  750  :episode:  4
q_loss:  [[[5.737173]]]
t:  751  :episode:  4
q_loss:  [[[4.156178]]]
t:  752  :episode:  4
q_loss:  [[[2.467215]]]
t:  753  :episode:  4
q_loss:  [[[1.9129112]]]
t:  754  :episode:  4
q_loss:  [[[2.8657959]]]
t:  755  :episode:  4
q_loss:  [[[3.6543875]]]
t:  756  :episode:  4
q_loss:  [[[1.6122191]]]
t:  757  :episode:  4
q_loss

t:  913  :episode:  4
q_loss:  [[[16.904163]]]
t:  914  :episode:  4
q_loss:  [[[17.20767]]]
t:  915  :episode:  4
q_loss:  [[[22.866684]]]
t:  916  :episode:  4
q_loss:  [[[32.208855]]]
t:  917  :episode:  4
q_loss:  [[[4.223397]]]
t:  918  :episode:  4
q_loss:  [[[41.951878]]]
t:  919  :episode:  4
q_loss:  [[[64.959496]]]
t:  920  :episode:  4
q_loss:  [[[1457.7578]]]
t:  921  :episode:  4
q_loss:  [[[12.312863]]]
t:  922  :episode:  4
q_loss:  [[[11.027506]]]
t:  923  :episode:  4
q_loss:  [[[4.7848163]]]
t:  924  :episode:  4
q_loss:  [[[16.967098]]]
t:  925  :episode:  4
q_loss:  [[[24.578552]]]
t:  926  :episode:  4
q_loss:  [[[14.249205]]]
t:  927  :episode:  4
q_loss:  [[[17.135393]]]
t:  928  :episode:  4
q_loss:  [[[18.116198]]]
t:  929  :episode:  4
q_loss:  [[[91.08285]]]
t:  930  :episode:  4
q_loss:  [[[14.744015]]]
t:  931  :episode:  4
q_loss:  [[[11.541025]]]
t:  932  :episode:  4
q_loss:  [[[28.368002]]]
t:  933  :episode:  4
q_loss:  [[[593.52563]]]
t:  934  :episod

t:  90  :episode:  5
q_loss:  [[[38.967663]]]
t:  91  :episode:  5
q_loss:  [[[38.85578]]]
t:  92  :episode:  5
q_loss:  [[[24.021194]]]
t:  93  :episode:  5
q_loss:  [[[4.9720163]]]
t:  94  :episode:  5
q_loss:  [[[23.68758]]]
t:  95  :episode:  5
q_loss:  [[[5.4101706]]]
t:  96  :episode:  5
q_loss:  [[[16.89835]]]
t:  97  :episode:  5
q_loss:  [[[10.526916]]]
t:  98  :episode:  5
q_loss:  [[[9.993879]]]
t:  99  :episode:  5
q_loss:  [[[60.419014]]]
t:  100  :episode:  5
q_loss:  [[[4.3355374]]]
t:  101  :episode:  5
q_loss:  [[[5.4857073]]]
t:  102  :episode:  5
q_loss:  [[[12.748974]]]
t:  103  :episode:  5
q_loss:  [[[5.569597]]]
t:  104  :episode:  5
q_loss:  [[[82.79053]]]
t:  105  :episode:  5
q_loss:  [[[3.5047247]]]
t:  106  :episode:  5
q_loss:  [[[10.800095]]]
t:  107  :episode:  5
q_loss:  [[[18.005213]]]
t:  108  :episode:  5
q_loss:  [[[12.3054905]]]
t:  109  :episode:  5
q_loss:  [[[24.066303]]]
t:  110  :episode:  5
q_loss:  [[[2.2820058]]]
t:  111  :episode:  5
q_loss

q_loss:  [[[16.036919]]]
t:  268  :episode:  5
q_loss:  [[[7.499472]]]
t:  269  :episode:  5
q_loss:  [[[6.15472]]]
t:  270  :episode:  5
q_loss:  [[[9.861309]]]
t:  271  :episode:  5
q_loss:  [[[6.2365556]]]
t:  272  :episode:  5
q_loss:  [[[16.909422]]]
t:  273  :episode:  5
q_loss:  [[[3.3742647]]]
t:  274  :episode:  5
q_loss:  [[[5.1597314]]]
t:  275  :episode:  5
q_loss:  [[[11.130189]]]
t:  276  :episode:  5
q_loss:  [[[4.6783113]]]
t:  277  :episode:  5
q_loss:  [[[22.194288]]]
t:  278  :episode:  5
q_loss:  [[[9.901356]]]
t:  279  :episode:  5
q_loss:  [[[12.063631]]]
t:  280  :episode:  5
q_loss:  [[[21.891499]]]
t:  281  :episode:  5
q_loss:  [[[10.341366]]]
t:  282  :episode:  5
q_loss:  [[[5.687378]]]
t:  283  :episode:  5
q_loss:  [[[4.5803666]]]
t:  284  :episode:  5
q_loss:  [[[11.383767]]]
t:  285  :episode:  5
q_loss:  [[[17.064407]]]
t:  286  :episode:  5
q_loss:  [[[55.998234]]]
t:  287  :episode:  5
q_loss:  [[[9.716635]]]
t:  288  :episode:  5
q_loss:  [[[23.58610

q_loss:  [[[2.364168]]]
t:  444  :episode:  5
q_loss:  [[[2.8753262]]]
t:  445  :episode:  5
q_loss:  [[[0.8539563]]]
t:  446  :episode:  5
q_loss:  [[[18.396608]]]
t:  447  :episode:  5
q_loss:  [[[13.446188]]]
t:  448  :episode:  5
q_loss:  [[[46.051727]]]
t:  449  :episode:  5
q_loss:  [[[4.0494084]]]
t:  450  :episode:  5
q_loss:  [[[56.848835]]]
t:  451  :episode:  5
q_loss:  [[[6.4273567]]]
t:  452  :episode:  5
q_loss:  [[[6.25065]]]
t:  453  :episode:  5
q_loss:  [[[8.428135]]]
t:  454  :episode:  5
q_loss:  [[[25.260187]]]
t:  455  :episode:  5
q_loss:  [[[13.939766]]]
t:  456  :episode:  5
q_loss:  [[[7.111066]]]
t:  457  :episode:  5
q_loss:  [[[6.301954]]]
t:  458  :episode:  5
q_loss:  [[[13.194331]]]
t:  459  :episode:  5
q_loss:  [[[3.1550932]]]
t:  460  :episode:  5
q_loss:  [[[9.81569]]]
t:  461  :episode:  5
q_loss:  [[[14.91765]]]
t:  462  :episode:  5
q_loss:  [[[9.877359]]]
t:  463  :episode:  5
q_loss:  [[[5.8232203]]]
t:  464  :episode:  5
q_loss:  [[[8.136651]]]

t:  621  :episode:  5
q_loss:  [[[3.5291028]]]
t:  622  :episode:  5
q_loss:  [[[7.8518486]]]
t:  623  :episode:  5
q_loss:  [[[2.0459292]]]
t:  624  :episode:  5
q_loss:  [[[8.835192]]]
t:  625  :episode:  5
q_loss:  [[[6.7904725]]]
t:  626  :episode:  5
q_loss:  [[[4.557956]]]
t:  627  :episode:  5
q_loss:  [[[20.28856]]]
t:  628  :episode:  5
q_loss:  [[[4.1880436]]]
t:  629  :episode:  5
q_loss:  [[[3.66401]]]
t:  630  :episode:  5
q_loss:  [[[10.920107]]]
t:  631  :episode:  5
q_loss:  [[[4.8559504]]]
t:  632  :episode:  5
q_loss:  [[[62.2184]]]
t:  633  :episode:  5
q_loss:  [[[3.2934544]]]
t:  634  :episode:  5
q_loss:  [[[4.4763846]]]
t:  635  :episode:  5
q_loss:  [[[5.839701]]]
t:  636  :episode:  5
q_loss:  [[[3.4176161]]]
t:  637  :episode:  5
q_loss:  [[[11.875436]]]
t:  638  :episode:  5
q_loss:  [[[5.761082]]]
t:  639  :episode:  5
q_loss:  [[[3.5180736]]]
t:  640  :episode:  5
q_loss:  [[[2.1187239]]]
t:  641  :episode:  5
q_loss:  [[[3.2713554]]]
t:  642  :episode:  5


t:  799  :episode:  5
q_loss:  [[[2.8812633]]]
t:  800  :episode:  5
q_loss:  [[[6.3499017]]]
t:  801  :episode:  5
q_loss:  [[[8.464905]]]
t:  802  :episode:  5
q_loss:  [[[3.6405163]]]
t:  803  :episode:  5
q_loss:  [[[2.850788]]]
t:  804  :episode:  5
q_loss:  [[[12.547455]]]
t:  805  :episode:  5
q_loss:  [[[4.236847]]]
t:  806  :episode:  5
q_loss:  [[[2.4428096]]]
t:  807  :episode:  5
q_loss:  [[[6.048911]]]
t:  808  :episode:  5
q_loss:  [[[4.2001877]]]
t:  809  :episode:  5
q_loss:  [[[2.526839]]]
t:  810  :episode:  5
q_loss:  [[[5.6265182]]]
t:  811  :episode:  5
q_loss:  [[[7.597021]]]
t:  812  :episode:  5
q_loss:  [[[5.062346]]]
t:  813  :episode:  5
q_loss:  [[[3.9493978]]]
t:  814  :episode:  5
q_loss:  [[[3.6117935]]]
t:  815  :episode:  5
q_loss:  [[[6.07832]]]
t:  816  :episode:  5
q_loss:  [[[2.3950324]]]
t:  817  :episode:  5
q_loss:  [[[3.6573746]]]
t:  818  :episode:  5
q_loss:  [[[3.1304276]]]
t:  819  :episode:  5
q_loss:  [[[10.113522]]]
t:  820  :episode:  5


t:  975  :episode:  5
q_loss:  [[[2.1365738]]]
t:  976  :episode:  5
q_loss:  [[[7.2821712]]]
t:  977  :episode:  5
q_loss:  [[[33.061874]]]
t:  978  :episode:  5
q_loss:  [[[3.6505036]]]
t:  979  :episode:  5
q_loss:  [[[9.947318]]]
t:  980  :episode:  5
q_loss:  [[[4.6367517]]]
t:  981  :episode:  5
q_loss:  [[[4.219117]]]
t:  982  :episode:  5
q_loss:  [[[15.838826]]]
t:  983  :episode:  5
q_loss:  [[[5.4300094]]]
t:  984  :episode:  5
q_loss:  [[[13.391801]]]
t:  985  :episode:  5
q_loss:  [[[17.433132]]]
t:  986  :episode:  5
q_loss:  [[[8.966778]]]
t:  987  :episode:  5
q_loss:  [[[8.6210985]]]
t:  988  :episode:  5
q_loss:  [[[4.6375995]]]
t:  989  :episode:  5
q_loss:  [[[2.8776631]]]
t:  990  :episode:  5
q_loss:  [[[5.555211]]]
t:  991  :episode:  5
q_loss:  [[[4.9824533]]]
t:  992  :episode:  5
q_loss:  [[[9.890238]]]
t:  993  :episode:  5
q_loss:  [[[2.2880497]]]
t:  994  :episode:  5
q_loss:  [[[8.187971]]]
t:  995  :episode:  5
q_loss:  [[[10.224356]]]
t:  996  :episode: 

t:  153  :episode:  6
q_loss:  [[[5.804041]]]
t:  154  :episode:  6
q_loss:  [[[7.0513597]]]
t:  155  :episode:  6
q_loss:  [[[3.3211794]]]
t:  156  :episode:  6
q_loss:  [[[0.727822]]]
t:  157  :episode:  6
q_loss:  [[[7.6059685]]]
t:  158  :episode:  6
q_loss:  [[[4.2633877]]]
t:  159  :episode:  6
q_loss:  [[[3.7869973]]]
t:  160  :episode:  6
q_loss:  [[[9.56776]]]
t:  161  :episode:  6
q_loss:  [[[1.5977226]]]
t:  162  :episode:  6
q_loss:  [[[2.3591576]]]
t:  163  :episode:  6
q_loss:  [[[3.175726]]]
t:  164  :episode:  6
q_loss:  [[[2.2648206]]]
t:  165  :episode:  6
q_loss:  [[[15.133799]]]
t:  166  :episode:  6
q_loss:  [[[2.1345673]]]
t:  167  :episode:  6
q_loss:  [[[2.0245013]]]
t:  168  :episode:  6
q_loss:  [[[4.211805]]]
t:  169  :episode:  6
q_loss:  [[[2.234855]]]
t:  170  :episode:  6
q_loss:  [[[14.298117]]]
t:  171  :episode:  6
q_loss:  [[[5.4234414]]]
t:  172  :episode:  6
q_loss:  [[[7.3970714]]]
t:  173  :episode:  6
q_loss:  [[[42.062714]]]
t:  174  :episode:  

t:  330  :episode:  6
q_loss:  [[[11.162265]]]
t:  331  :episode:  6
q_loss:  [[[7.630707]]]
t:  332  :episode:  6
q_loss:  [[[9.607573]]]
t:  333  :episode:  6
q_loss:  [[[1.9313234]]]
t:  334  :episode:  6
q_loss:  [[[5.528971]]]
t:  335  :episode:  6
q_loss:  [[[7.074072]]]
t:  336  :episode:  6
q_loss:  [[[6.0448475]]]
t:  337  :episode:  6
q_loss:  [[[9.571473]]]
t:  338  :episode:  6
q_loss:  [[[2.567388]]]
t:  339  :episode:  6
q_loss:  [[[2.9767637]]]
t:  340  :episode:  6
q_loss:  [[[20.385342]]]
t:  341  :episode:  6
q_loss:  [[[18.395292]]]
t:  342  :episode:  6
q_loss:  [[[5.02592]]]
t:  343  :episode:  6
q_loss:  [[[8.614785]]]
t:  344  :episode:  6
q_loss:  [[[4.7819567]]]
t:  345  :episode:  6
q_loss:  [[[9.108495]]]
t:  346  :episode:  6
q_loss:  [[[2.853413]]]
t:  347  :episode:  6
q_loss:  [[[12.106641]]]
t:  348  :episode:  6
q_loss:  [[[3.6037588]]]
t:  349  :episode:  6
q_loss:  [[[5.294355]]]
t:  350  :episode:  6
q_loss:  [[[3.9421995]]]
t:  351  :episode:  6
q_l

t:  506  :episode:  6
q_loss:  [[[3.6028295]]]
t:  507  :episode:  6
q_loss:  [[[10.079778]]]
t:  508  :episode:  6
q_loss:  [[[4.253758]]]
t:  509  :episode:  6
q_loss:  [[[1.5959651]]]
t:  510  :episode:  6
q_loss:  [[[5.213842]]]
t:  511  :episode:  6
q_loss:  [[[59.53202]]]
t:  512  :episode:  6
q_loss:  [[[2.784773]]]
t:  513  :episode:  6
q_loss:  [[[32.06755]]]
t:  514  :episode:  6
q_loss:  [[[4.2127047]]]
t:  515  :episode:  6
q_loss:  [[[6.635201]]]
t:  516  :episode:  6
q_loss:  [[[46.323128]]]
t:  517  :episode:  6
q_loss:  [[[9.352539]]]
t:  518  :episode:  6
q_loss:  [[[10.038095]]]
t:  519  :episode:  6
q_loss:  [[[5.2155128]]]
t:  520  :episode:  6
q_loss:  [[[8.767652]]]
t:  521  :episode:  6
q_loss:  [[[7.384415]]]
t:  522  :episode:  6
q_loss:  [[[6.094057]]]
t:  523  :episode:  6
q_loss:  [[[5.2632856]]]
t:  524  :episode:  6
q_loss:  [[[9.75408]]]
t:  525  :episode:  6
q_loss:  [[[7.6763973]]]
t:  526  :episode:  6
q_loss:  [[[5.466541]]]
t:  527  :episode:  6
q_lo

t:  683  :episode:  6
q_loss:  [[[1.2010568]]]
t:  684  :episode:  6
q_loss:  [[[2.6131787]]]
t:  685  :episode:  6
q_loss:  [[[4.2609067]]]
t:  686  :episode:  6
q_loss:  [[[2.3981712]]]
t:  687  :episode:  6
q_loss:  [[[6.948001]]]
t:  688  :episode:  6
q_loss:  [[[5.858518]]]
t:  689  :episode:  6
q_loss:  [[[5.0762806]]]
t:  690  :episode:  6
q_loss:  [[[2.752407]]]
t:  691  :episode:  6
q_loss:  [[[40.551685]]]
t:  692  :episode:  6
q_loss:  [[[4.925484]]]
t:  693  :episode:  6
q_loss:  [[[4.066302]]]
t:  694  :episode:  6
q_loss:  [[[44.028725]]]
t:  695  :episode:  6
q_loss:  [[[19.028969]]]
t:  696  :episode:  6
q_loss:  [[[6.309304]]]
t:  697  :episode:  6
q_loss:  [[[7.2016697]]]
t:  698  :episode:  6
q_loss:  [[[1.721147]]]
t:  699  :episode:  6
q_loss:  [[[2.4758046]]]
t:  700  :episode:  6
q_loss:  [[[26.265736]]]
t:  701  :episode:  6
q_loss:  [[[6.5910616]]]
t:  702  :episode:  6
q_loss:  [[[4.479315]]]
t:  703  :episode:  6
q_loss:  [[[8.38205]]]
t:  704  :episode:  6
q

t:  859  :episode:  6
q_loss:  [[[11.52124]]]
t:  860  :episode:  6
q_loss:  [[[10.833866]]]
t:  861  :episode:  6
q_loss:  [[[3.4135888]]]
t:  862  :episode:  6
q_loss:  [[[3.443943]]]
t:  863  :episode:  6
q_loss:  [[[7.770323]]]
t:  864  :episode:  6
q_loss:  [[[15.003153]]]
t:  865  :episode:  6
q_loss:  [[[2.4759247]]]
t:  866  :episode:  6
q_loss:  [[[21.20833]]]
t:  867  :episode:  6
q_loss:  [[[7.914865]]]
t:  868  :episode:  6
q_loss:  [[[10.258709]]]
t:  869  :episode:  6
q_loss:  [[[4.8032103]]]
t:  870  :episode:  6
q_loss:  [[[12.686108]]]
t:  871  :episode:  6
q_loss:  [[[2.6354227]]]
t:  872  :episode:  6
q_loss:  [[[6.1696606]]]
t:  873  :episode:  6
q_loss:  [[[4.925987]]]
t:  874  :episode:  6
q_loss:  [[[7.5586123]]]
t:  875  :episode:  6
q_loss:  [[[2.5750074]]]
t:  876  :episode:  6
q_loss:  [[[9.29102]]]
t:  877  :episode:  6
q_loss:  [[[3.2408624]]]
t:  878  :episode:  6
q_loss:  [[[2.0378969]]]
t:  879  :episode:  6
q_loss:  [[[7.142074]]]
t:  880  :episode:  6


t:  36  :episode:  7
q_loss:  [[[7.532766]]]
t:  37  :episode:  7
q_loss:  [[[25.262909]]]
t:  38  :episode:  7
q_loss:  [[[14.625098]]]
t:  39  :episode:  7
q_loss:  [[[6.9557548]]]
t:  40  :episode:  7
q_loss:  [[[4.341565]]]
t:  41  :episode:  7
q_loss:  [[[6.0504746]]]
t:  42  :episode:  7
q_loss:  [[[4.1186132]]]
t:  43  :episode:  7
q_loss:  [[[2.0410614]]]
t:  44  :episode:  7
q_loss:  [[[23.28312]]]
t:  45  :episode:  7
q_loss:  [[[7.566648]]]
t:  46  :episode:  7
q_loss:  [[[4.7371264]]]
t:  47  :episode:  7
q_loss:  [[[2.6549313]]]
t:  48  :episode:  7
q_loss:  [[[2.183871]]]
t:  49  :episode:  7
q_loss:  [[[6.9385595]]]
t:  50  :episode:  7
q_loss:  [[[10.25573]]]
t:  51  :episode:  7
q_loss:  [[[18.808502]]]
t:  52  :episode:  7
q_loss:  [[[11.630455]]]
t:  53  :episode:  7
q_loss:  [[[7.1280394]]]
t:  54  :episode:  7
q_loss:  [[[4.445799]]]
t:  55  :episode:  7
q_loss:  [[[8.885364]]]
t:  56  :episode:  7
q_loss:  [[[10.4406]]]
t:  57  :episode:  7
q_loss:  [[[20.813292]]

t:  214  :episode:  7
q_loss:  [[[4.382117]]]
t:  215  :episode:  7
q_loss:  [[[2.1510236]]]
t:  216  :episode:  7
q_loss:  [[[6.0700626]]]
t:  217  :episode:  7
q_loss:  [[[14.281048]]]
t:  218  :episode:  7
q_loss:  [[[10.481172]]]
t:  219  :episode:  7
q_loss:  [[[9.180222]]]
t:  220  :episode:  7
q_loss:  [[[7.5031424]]]
t:  221  :episode:  7
q_loss:  [[[3.7674701]]]
t:  222  :episode:  7
q_loss:  [[[2.6430717]]]
t:  223  :episode:  7
q_loss:  [[[4.4739933]]]
t:  224  :episode:  7
q_loss:  [[[19.887978]]]
t:  225  :episode:  7
q_loss:  [[[5.42833]]]
t:  226  :episode:  7
q_loss:  [[[4.78924]]]
t:  227  :episode:  7
q_loss:  [[[4.3466334]]]
t:  228  :episode:  7
q_loss:  [[[1.2506438]]]
t:  229  :episode:  7
q_loss:  [[[10.468051]]]
t:  230  :episode:  7
q_loss:  [[[3.9545794]]]
t:  231  :episode:  7
q_loss:  [[[18.166903]]]
t:  232  :episode:  7
q_loss:  [[[1.6125941]]]
t:  233  :episode:  7
q_loss:  [[[4.438724]]]
t:  234  :episode:  7
q_loss:  [[[9.197628]]]
t:  235  :episode:  7

t:  390  :episode:  7
q_loss:  [[[103.73222]]]
t:  391  :episode:  7
q_loss:  [[[11.461305]]]
t:  392  :episode:  7
q_loss:  [[[15.482208]]]
t:  393  :episode:  7
q_loss:  [[[50.439705]]]
t:  394  :episode:  7
q_loss:  [[[11.663094]]]
t:  395  :episode:  7
q_loss:  [[[12.414978]]]
t:  396  :episode:  7
q_loss:  [[[7.251338]]]
t:  397  :episode:  7
q_loss:  [[[11.652291]]]
t:  398  :episode:  7
q_loss:  [[[29.938236]]]
t:  399  :episode:  7
q_loss:  [[[26.630085]]]
t:  400  :episode:  7
q_loss:  [[[11.120329]]]
t:  401  :episode:  7
q_loss:  [[[8.3101635]]]
t:  402  :episode:  7
q_loss:  [[[10.314396]]]
t:  403  :episode:  7
q_loss:  [[[28.191896]]]
t:  404  :episode:  7
q_loss:  [[[31.25137]]]
t:  405  :episode:  7
q_loss:  [[[7.606469]]]
t:  406  :episode:  7
q_loss:  [[[16.272472]]]
t:  407  :episode:  7
q_loss:  [[[18.557285]]]
t:  408  :episode:  7
q_loss:  [[[4.5460157]]]
t:  409  :episode:  7
q_loss:  [[[9.860409]]]
t:  410  :episode:  7
q_loss:  [[[3.8205426]]]
t:  411  :episode

t:  568  :episode:  7
q_loss:  [[[104.918304]]]
t:  569  :episode:  7
q_loss:  [[[2534.3188]]]
t:  570  :episode:  7
q_loss:  [[[39.936943]]]
t:  571  :episode:  7
q_loss:  [[[1012.82446]]]
t:  572  :episode:  7
q_loss:  [[[149.26328]]]
t:  573  :episode:  7
q_loss:  [[[2135.7583]]]
t:  574  :episode:  7
q_loss:  [[[228.30154]]]
t:  575  :episode:  7
q_loss:  [[[80.945335]]]
t:  576  :episode:  7
q_loss:  [[[19.18182]]]
t:  577  :episode:  7
q_loss:  [[[37.750435]]]
t:  578  :episode:  7
q_loss:  [[[31.086472]]]
t:  579  :episode:  7
q_loss:  [[[81.86039]]]
t:  580  :episode:  7
q_loss:  [[[60.034363]]]
t:  581  :episode:  7
q_loss:  [[[52.034172]]]
t:  582  :episode:  7
q_loss:  [[[11.799025]]]
t:  583  :episode:  7
q_loss:  [[[32.68597]]]
t:  584  :episode:  7
q_loss:  [[[232.09486]]]
t:  585  :episode:  7
q_loss:  [[[1281.1965]]]
t:  586  :episode:  7
q_loss:  [[[47.752495]]]
t:  587  :episode:  7
q_loss:  [[[491.64496]]]
t:  588  :episode:  7
q_loss:  [[[147.86865]]]
t:  589  :epis

q_loss:  [[[33.944954]]]
t:  745  :episode:  7
q_loss:  [[[894.5197]]]
t:  746  :episode:  7
q_loss:  [[[60.18242]]]
t:  747  :episode:  7
q_loss:  [[[69.78292]]]
t:  748  :episode:  7
q_loss:  [[[46.454475]]]
t:  749  :episode:  7
q_loss:  [[[446.16614]]]
t:  750  :episode:  7
q_loss:  [[[19.565535]]]
t:  751  :episode:  7
q_loss:  [[[19.687809]]]
t:  752  :episode:  7
q_loss:  [[[397.28317]]]
t:  753  :episode:  7
q_loss:  [[[99.97542]]]
t:  754  :episode:  7
q_loss:  [[[310.0509]]]
t:  755  :episode:  7
q_loss:  [[[436.40796]]]
t:  756  :episode:  7
q_loss:  [[[49.94906]]]
t:  757  :episode:  7
q_loss:  [[[25.892189]]]
t:  758  :episode:  7
q_loss:  [[[69.19069]]]
t:  759  :episode:  7
q_loss:  [[[213.35709]]]
t:  760  :episode:  7
q_loss:  [[[54.716347]]]
t:  761  :episode:  7
q_loss:  [[[349.61734]]]
t:  762  :episode:  7
q_loss:  [[[65.118195]]]
t:  763  :episode:  7
q_loss:  [[[39.03227]]]
t:  764  :episode:  7
q_loss:  [[[58.63917]]]
t:  765  :episode:  7
q_loss:  [[[25.341581]

t:  922  :episode:  7
q_loss:  [[[365.045]]]
t:  923  :episode:  7
q_loss:  [[[128.34613]]]
t:  924  :episode:  7
q_loss:  [[[16.875088]]]
t:  925  :episode:  7
q_loss:  [[[128.22963]]]
t:  926  :episode:  7
q_loss:  [[[27.004713]]]
t:  927  :episode:  7
q_loss:  [[[52.66649]]]
t:  928  :episode:  7
q_loss:  [[[55.450165]]]
t:  929  :episode:  7
q_loss:  [[[39.784706]]]
t:  930  :episode:  7
q_loss:  [[[44.312714]]]
t:  931  :episode:  7
q_loss:  [[[393.9177]]]
t:  932  :episode:  7
q_loss:  [[[933.03754]]]
t:  933  :episode:  7
q_loss:  [[[37.027985]]]
t:  934  :episode:  7
q_loss:  [[[43.662964]]]
t:  935  :episode:  7
q_loss:  [[[121.7445]]]
t:  936  :episode:  7
q_loss:  [[[15.870238]]]
t:  937  :episode:  7
q_loss:  [[[76.15117]]]
t:  938  :episode:  7
q_loss:  [[[40.925964]]]
t:  939  :episode:  7
q_loss:  [[[58.080738]]]
t:  940  :episode:  7
q_loss:  [[[19.921814]]]
t:  941  :episode:  7
q_loss:  [[[24.930481]]]
t:  942  :episode:  7
q_loss:  [[[87.98065]]]
t:  943  :episode:  

q_loss:  [[[42.86951]]]
t:  100  :episode:  8
q_loss:  [[[33.354843]]]
t:  101  :episode:  8
q_loss:  [[[1874.0227]]]
t:  102  :episode:  8
q_loss:  [[[600.70764]]]
t:  103  :episode:  8
q_loss:  [[[88.926636]]]
t:  104  :episode:  8
q_loss:  [[[54.85656]]]
t:  105  :episode:  8
q_loss:  [[[65.42536]]]
t:  106  :episode:  8
q_loss:  [[[31.269192]]]
t:  107  :episode:  8
q_loss:  [[[174.38943]]]
t:  108  :episode:  8
q_loss:  [[[51.688736]]]
t:  109  :episode:  8
q_loss:  [[[31.988483]]]
t:  110  :episode:  8
q_loss:  [[[8797.149]]]
t:  111  :episode:  8
q_loss:  [[[27.857365]]]
t:  112  :episode:  8
q_loss:  [[[38.933083]]]
t:  113  :episode:  8
q_loss:  [[[84.56681]]]
t:  114  :episode:  8
q_loss:  [[[111.30815]]]
t:  115  :episode:  8
q_loss:  [[[64.10164]]]
t:  116  :episode:  8
q_loss:  [[[616.42053]]]
t:  117  :episode:  8
q_loss:  [[[108.22851]]]
t:  118  :episode:  8
q_loss:  [[[95.24144]]]
t:  119  :episode:  8
q_loss:  [[[29.910795]]]
t:  120  :episode:  8
q_loss:  [[[86.98677

t:  277  :episode:  8
q_loss:  [[[28.300386]]]
t:  278  :episode:  8
q_loss:  [[[149.81342]]]
t:  279  :episode:  8
q_loss:  [[[363.71884]]]
t:  280  :episode:  8
q_loss:  [[[47.172535]]]
t:  281  :episode:  8
q_loss:  [[[41.643898]]]
t:  282  :episode:  8
q_loss:  [[[102.95231]]]
t:  283  :episode:  8
q_loss:  [[[79.29558]]]
t:  284  :episode:  8
q_loss:  [[[76.93046]]]
t:  285  :episode:  8
q_loss:  [[[115.32455]]]
t:  286  :episode:  8
q_loss:  [[[69.316414]]]
t:  287  :episode:  8
q_loss:  [[[266.41586]]]
t:  288  :episode:  8
q_loss:  [[[36.656876]]]
t:  289  :episode:  8
q_loss:  [[[1076.8549]]]
t:  290  :episode:  8
q_loss:  [[[357.48578]]]
t:  291  :episode:  8
q_loss:  [[[109.83139]]]
t:  292  :episode:  8
q_loss:  [[[86.470116]]]
t:  293  :episode:  8
q_loss:  [[[119.098854]]]
t:  294  :episode:  8
q_loss:  [[[132.46306]]]
t:  295  :episode:  8
q_loss:  [[[41.808666]]]
t:  296  :episode:  8
q_loss:  [[[386.3487]]]
t:  297  :episode:  8
q_loss:  [[[51.90033]]]
t:  298  :episod

q_loss:  [[[12.763246]]]
t:  454  :episode:  8
q_loss:  [[[55.898018]]]
t:  455  :episode:  8
q_loss:  [[[47.065544]]]
t:  456  :episode:  8
q_loss:  [[[81.78363]]]
t:  457  :episode:  8
q_loss:  [[[101.09794]]]
t:  458  :episode:  8
q_loss:  [[[13.230982]]]
t:  459  :episode:  8
q_loss:  [[[60.220722]]]
t:  460  :episode:  8
q_loss:  [[[207.46312]]]
t:  461  :episode:  8
q_loss:  [[[335.46323]]]
t:  462  :episode:  8
q_loss:  [[[13.755442]]]
t:  463  :episode:  8
q_loss:  [[[20.26371]]]
t:  464  :episode:  8
q_loss:  [[[30.119606]]]
t:  465  :episode:  8
q_loss:  [[[14.280203]]]
t:  466  :episode:  8
q_loss:  [[[26.13505]]]
t:  467  :episode:  8
q_loss:  [[[28.675274]]]
t:  468  :episode:  8
q_loss:  [[[72.239975]]]
t:  469  :episode:  8
q_loss:  [[[6.273457]]]
t:  470  :episode:  8
q_loss:  [[[6.3619957]]]
t:  471  :episode:  8
q_loss:  [[[25.213007]]]
t:  472  :episode:  8
q_loss:  [[[797.0822]]]
t:  473  :episode:  8
q_loss:  [[[307.05472]]]
t:  474  :episode:  8
q_loss:  [[[53.322

t:  631  :episode:  8
q_loss:  [[[21.212902]]]
t:  632  :episode:  8
q_loss:  [[[183.99681]]]
t:  633  :episode:  8
q_loss:  [[[48.648254]]]
t:  634  :episode:  8
q_loss:  [[[57.23519]]]
t:  635  :episode:  8
q_loss:  [[[11.006398]]]
t:  636  :episode:  8
q_loss:  [[[20.493423]]]
t:  637  :episode:  8
q_loss:  [[[4.412613]]]
t:  638  :episode:  8
q_loss:  [[[23.923563]]]
t:  639  :episode:  8
q_loss:  [[[68.11484]]]
t:  640  :episode:  8
q_loss:  [[[20.517664]]]
t:  641  :episode:  8
q_loss:  [[[7.028003]]]
t:  642  :episode:  8
q_loss:  [[[22.337246]]]
t:  643  :episode:  8
q_loss:  [[[37.8231]]]
t:  644  :episode:  8
q_loss:  [[[12.05302]]]
t:  645  :episode:  8
q_loss:  [[[22.399086]]]
t:  646  :episode:  8
q_loss:  [[[19.49248]]]
t:  647  :episode:  8
q_loss:  [[[33.203213]]]
t:  648  :episode:  8
q_loss:  [[[32.27384]]]
t:  649  :episode:  8
q_loss:  [[[56.255745]]]
t:  650  :episode:  8
q_loss:  [[[19.57089]]]
t:  651  :episode:  8
q_loss:  [[[41.4525]]]
t:  652  :episode:  8
q_l

t:  809  :episode:  8
q_loss:  [[[101.29788]]]
t:  810  :episode:  8
q_loss:  [[[41.08345]]]
t:  811  :episode:  8
q_loss:  [[[47.542194]]]
t:  812  :episode:  8
q_loss:  [[[68.48775]]]
t:  813  :episode:  8
q_loss:  [[[28.670013]]]
t:  814  :episode:  8
q_loss:  [[[15.260693]]]
t:  815  :episode:  8
q_loss:  [[[38.53501]]]
t:  816  :episode:  8
q_loss:  [[[222.53204]]]
t:  817  :episode:  8
q_loss:  [[[10.481603]]]
t:  818  :episode:  8
q_loss:  [[[41.461353]]]
t:  819  :episode:  8
q_loss:  [[[72.74263]]]
t:  820  :episode:  8
q_loss:  [[[184.9966]]]
t:  821  :episode:  8
q_loss:  [[[25.842875]]]
t:  822  :episode:  8
q_loss:  [[[43.15718]]]
t:  823  :episode:  8
q_loss:  [[[36.283417]]]
t:  824  :episode:  8
q_loss:  [[[18.487873]]]
t:  825  :episode:  8
q_loss:  [[[311.20004]]]
t:  826  :episode:  8
q_loss:  [[[12.507973]]]
t:  827  :episode:  8
q_loss:  [[[25.488628]]]
t:  828  :episode:  8
q_loss:  [[[20.420425]]]
t:  829  :episode:  8
q_loss:  [[[35.99865]]]
t:  830  :episode:  

q_loss:  [[[30.639698]]]
t:  987  :episode:  8
q_loss:  [[[42.131382]]]
t:  988  :episode:  8
q_loss:  [[[23.929276]]]
t:  989  :episode:  8
q_loss:  [[[168.8436]]]
t:  990  :episode:  8
q_loss:  [[[33.601288]]]
t:  991  :episode:  8
q_loss:  [[[199.82355]]]
t:  992  :episode:  8
q_loss:  [[[101.52974]]]
t:  993  :episode:  8
q_loss:  [[[19.817703]]]
t:  994  :episode:  8
q_loss:  [[[35.972565]]]
t:  995  :episode:  8
q_loss:  [[[28.603207]]]
t:  996  :episode:  8
q_loss:  [[[25.588705]]]
t:  997  :episode:  8
q_loss:  [[[16.843424]]]
t:  998  :episode:  8
q_loss:  [[[12.758576]]]
t:  999  :episode:  8
q_loss:  [[[88.18274]]]
Episode 8 finished after 1000 timesteps with average reward -22982.676512203612
t:  0  :episode:  9
q_loss:  [[[26.334827]]]
t:  1  :episode:  9
q_loss:  [[[23.325619]]]
t:  2  :episode:  9
q_loss:  [[[15.8887]]]
t:  3  :episode:  9
q_loss:  [[[16.798803]]]
t:  4  :episode:  9
q_loss:  [[[42.890812]]]
t:  5  :episode:  9
q_loss:  [[[88.74768]]]
t:  6  :episode:  9

q_loss:  [[[20.786192]]]
t:  163  :episode:  9
q_loss:  [[[19.315954]]]
t:  164  :episode:  9
q_loss:  [[[18.471107]]]
t:  165  :episode:  9
q_loss:  [[[227.08821]]]
t:  166  :episode:  9
q_loss:  [[[18.683987]]]
t:  167  :episode:  9
q_loss:  [[[28.123428]]]
t:  168  :episode:  9
q_loss:  [[[32.404476]]]
t:  169  :episode:  9
q_loss:  [[[89.201935]]]
t:  170  :episode:  9
q_loss:  [[[12.873042]]]
t:  171  :episode:  9
q_loss:  [[[27.235935]]]
t:  172  :episode:  9
q_loss:  [[[14.850294]]]
t:  173  :episode:  9
q_loss:  [[[11.641433]]]
t:  174  :episode:  9
q_loss:  [[[28.76638]]]
t:  175  :episode:  9
q_loss:  [[[5.7208977]]]
t:  176  :episode:  9
q_loss:  [[[15.21776]]]
t:  177  :episode:  9
q_loss:  [[[44.6078]]]
t:  178  :episode:  9
q_loss:  [[[39.198536]]]
t:  179  :episode:  9
q_loss:  [[[18.053827]]]
t:  180  :episode:  9
q_loss:  [[[22.389427]]]
t:  181  :episode:  9
q_loss:  [[[102.79067]]]
t:  182  :episode:  9
q_loss:  [[[15.578437]]]
t:  183  :episode:  9
q_loss:  [[[32.96

t:  339  :episode:  9
q_loss:  [[[22.29705]]]
t:  340  :episode:  9
q_loss:  [[[18.432434]]]
t:  341  :episode:  9
q_loss:  [[[37.940468]]]
t:  342  :episode:  9
q_loss:  [[[57.153152]]]
t:  343  :episode:  9
q_loss:  [[[14.38492]]]
t:  344  :episode:  9
q_loss:  [[[16.628437]]]
t:  345  :episode:  9
q_loss:  [[[54.926735]]]
t:  346  :episode:  9
q_loss:  [[[19.5569]]]
t:  347  :episode:  9
q_loss:  [[[43.613514]]]
t:  348  :episode:  9
q_loss:  [[[41.794117]]]
t:  349  :episode:  9
q_loss:  [[[32.00085]]]
t:  350  :episode:  9
q_loss:  [[[8.970104]]]
t:  351  :episode:  9
q_loss:  [[[41.185223]]]
t:  352  :episode:  9
q_loss:  [[[80.64955]]]
t:  353  :episode:  9
q_loss:  [[[72.14132]]]
t:  354  :episode:  9
q_loss:  [[[77.934746]]]
t:  355  :episode:  9
q_loss:  [[[274.857]]]
t:  356  :episode:  9
q_loss:  [[[34.873917]]]
t:  357  :episode:  9
q_loss:  [[[18.869823]]]
t:  358  :episode:  9
q_loss:  [[[42.128548]]]
t:  359  :episode:  9
q_loss:  [[[18.934006]]]
t:  360  :episode:  9
q

t:  515  :episode:  9
q_loss:  [[[80.57861]]]
t:  516  :episode:  9
q_loss:  [[[65.72744]]]
t:  517  :episode:  9
q_loss:  [[[73.84878]]]
t:  518  :episode:  9
q_loss:  [[[11.462407]]]
t:  519  :episode:  9
q_loss:  [[[110.75227]]]
t:  520  :episode:  9
q_loss:  [[[52.786377]]]
t:  521  :episode:  9
q_loss:  [[[17.052166]]]
t:  522  :episode:  9
q_loss:  [[[108.95364]]]
t:  523  :episode:  9
q_loss:  [[[68.26527]]]
t:  524  :episode:  9
q_loss:  [[[22.44429]]]
t:  525  :episode:  9
q_loss:  [[[195.06404]]]
t:  526  :episode:  9
q_loss:  [[[46.2679]]]
t:  527  :episode:  9
q_loss:  [[[324.4018]]]
t:  528  :episode:  9
q_loss:  [[[55.852295]]]
t:  529  :episode:  9
q_loss:  [[[21.012257]]]
t:  530  :episode:  9
q_loss:  [[[582.18054]]]
t:  531  :episode:  9
q_loss:  [[[61.717464]]]
t:  532  :episode:  9
q_loss:  [[[57.402164]]]
t:  533  :episode:  9
q_loss:  [[[247.8483]]]
t:  534  :episode:  9
q_loss:  [[[77.977036]]]
t:  535  :episode:  9
q_loss:  [[[54.664467]]]
t:  536  :episode:  9


t:  691  :episode:  9
q_loss:  [[[19.82352]]]
t:  692  :episode:  9
q_loss:  [[[8.592402]]]
t:  693  :episode:  9
q_loss:  [[[25.591831]]]
t:  694  :episode:  9
q_loss:  [[[11.568922]]]
t:  695  :episode:  9
q_loss:  [[[9.53235]]]
t:  696  :episode:  9
q_loss:  [[[249.91063]]]
t:  697  :episode:  9
q_loss:  [[[5.1299887]]]
t:  698  :episode:  9
q_loss:  [[[25.472786]]]
t:  699  :episode:  9
q_loss:  [[[68.34473]]]
t:  700  :episode:  9
q_loss:  [[[23.52155]]]
t:  701  :episode:  9
q_loss:  [[[80.65556]]]
t:  702  :episode:  9
q_loss:  [[[43.935658]]]
t:  703  :episode:  9
q_loss:  [[[43.999283]]]
t:  704  :episode:  9
q_loss:  [[[18.062548]]]
t:  705  :episode:  9
q_loss:  [[[37.130013]]]
t:  706  :episode:  9
q_loss:  [[[23.863838]]]
t:  707  :episode:  9
q_loss:  [[[158.13148]]]
t:  708  :episode:  9
q_loss:  [[[184.66367]]]
t:  709  :episode:  9
q_loss:  [[[57.76862]]]
t:  710  :episode:  9
q_loss:  [[[96.703354]]]
t:  711  :episode:  9
q_loss:  [[[9.41905]]]
t:  712  :episode:  9
q

t:  868  :episode:  9
q_loss:  [[[38.300133]]]
t:  869  :episode:  9
q_loss:  [[[37.01673]]]
t:  870  :episode:  9
q_loss:  [[[32.887367]]]
t:  871  :episode:  9
q_loss:  [[[28.94125]]]
t:  872  :episode:  9
q_loss:  [[[48.552494]]]
t:  873  :episode:  9
q_loss:  [[[15.220907]]]
t:  874  :episode:  9
q_loss:  [[[57.907013]]]
t:  875  :episode:  9
q_loss:  [[[133.98907]]]
t:  876  :episode:  9
q_loss:  [[[19.94188]]]
t:  877  :episode:  9
q_loss:  [[[24.429722]]]
t:  878  :episode:  9
q_loss:  [[[27.509521]]]
t:  879  :episode:  9
q_loss:  [[[47.469715]]]
t:  880  :episode:  9
q_loss:  [[[13.472707]]]
t:  881  :episode:  9
q_loss:  [[[15.882347]]]
t:  882  :episode:  9
q_loss:  [[[9.9075165]]]
t:  883  :episode:  9
q_loss:  [[[235.79147]]]
t:  884  :episode:  9
q_loss:  [[[39.937393]]]
t:  885  :episode:  9
q_loss:  [[[26.631107]]]
t:  886  :episode:  9
q_loss:  [[[128.69395]]]
t:  887  :episode:  9
q_loss:  [[[31.66439]]]
t:  888  :episode:  9
q_loss:  [[[96.709946]]]
t:  889  :episode

q_loss:  [[[30.606329]]]
t:  44  :episode:  10
q_loss:  [[[29.418663]]]
t:  45  :episode:  10
q_loss:  [[[13.024307]]]
t:  46  :episode:  10
q_loss:  [[[182.03467]]]
t:  47  :episode:  10
q_loss:  [[[8.551002]]]
t:  48  :episode:  10
q_loss:  [[[17.7069]]]
t:  49  :episode:  10
q_loss:  [[[51.475975]]]
t:  50  :episode:  10
q_loss:  [[[23.039509]]]
t:  51  :episode:  10
q_loss:  [[[198.22374]]]
t:  52  :episode:  10
q_loss:  [[[15.673393]]]
t:  53  :episode:  10
q_loss:  [[[24.7685]]]
t:  54  :episode:  10
q_loss:  [[[43.100708]]]
t:  55  :episode:  10
q_loss:  [[[33.560043]]]
t:  56  :episode:  10
q_loss:  [[[21.681952]]]
t:  57  :episode:  10
q_loss:  [[[6.076314]]]
t:  58  :episode:  10
q_loss:  [[[16.602802]]]
t:  59  :episode:  10
q_loss:  [[[28.96302]]]
t:  60  :episode:  10
q_loss:  [[[48.298405]]]
t:  61  :episode:  10
q_loss:  [[[32.961685]]]
t:  62  :episode:  10
q_loss:  [[[27.199232]]]
t:  63  :episode:  10
q_loss:  [[[13.69074]]]
t:  64  :episode:  10
q_loss:  [[[44.20415]

q_loss:  [[[16.937468]]]
t:  217  :episode:  10
q_loss:  [[[27.475111]]]
t:  218  :episode:  10
q_loss:  [[[34.89434]]]
t:  219  :episode:  10
q_loss:  [[[27.165508]]]
t:  220  :episode:  10
q_loss:  [[[60.82834]]]
t:  221  :episode:  10
q_loss:  [[[16.682112]]]
t:  222  :episode:  10
q_loss:  [[[16.63816]]]
t:  223  :episode:  10
q_loss:  [[[23.803297]]]
t:  224  :episode:  10
q_loss:  [[[21.968122]]]
t:  225  :episode:  10
q_loss:  [[[134.92072]]]
t:  226  :episode:  10
q_loss:  [[[10.177105]]]
t:  227  :episode:  10
q_loss:  [[[53.38803]]]
t:  228  :episode:  10
q_loss:  [[[14.085166]]]
t:  229  :episode:  10
q_loss:  [[[107.44829]]]
t:  230  :episode:  10
q_loss:  [[[83.03878]]]
t:  231  :episode:  10
q_loss:  [[[85.84451]]]
t:  232  :episode:  10
q_loss:  [[[325.61563]]]
t:  233  :episode:  10
q_loss:  [[[9.20577]]]
t:  234  :episode:  10
q_loss:  [[[17.376007]]]
t:  235  :episode:  10
q_loss:  [[[34.98644]]]
t:  236  :episode:  10
q_loss:  [[[83.61103]]]
t:  237  :episode:  10
q_

t:  390  :episode:  10
q_loss:  [[[9.928463]]]
t:  391  :episode:  10
q_loss:  [[[25.162846]]]
t:  392  :episode:  10
q_loss:  [[[7.3076696]]]
t:  393  :episode:  10
q_loss:  [[[189.85379]]]
t:  394  :episode:  10
q_loss:  [[[58.025337]]]
t:  395  :episode:  10
q_loss:  [[[23.957298]]]
t:  396  :episode:  10
q_loss:  [[[9.33955]]]
t:  397  :episode:  10
q_loss:  [[[26.440262]]]
t:  398  :episode:  10
q_loss:  [[[46.065136]]]
t:  399  :episode:  10
q_loss:  [[[85.67116]]]
t:  400  :episode:  10
q_loss:  [[[70.97043]]]
t:  401  :episode:  10
q_loss:  [[[14.393775]]]
t:  402  :episode:  10
q_loss:  [[[28.596483]]]
t:  403  :episode:  10
q_loss:  [[[5.435445]]]
t:  404  :episode:  10
q_loss:  [[[15.575763]]]
t:  405  :episode:  10
q_loss:  [[[42.475994]]]
t:  406  :episode:  10
q_loss:  [[[13.182789]]]
t:  407  :episode:  10
q_loss:  [[[38.621735]]]
t:  408  :episode:  10
q_loss:  [[[9.402603]]]
t:  409  :episode:  10
q_loss:  [[[10.372107]]]
t:  410  :episode:  10
q_loss:  [[[29.681892]]]

q_loss:  [[[29.356018]]]
t:  564  :episode:  10
q_loss:  [[[9.010935]]]
t:  565  :episode:  10
q_loss:  [[[5.7229037]]]
t:  566  :episode:  10
q_loss:  [[[19.332087]]]
t:  567  :episode:  10
q_loss:  [[[8.990997]]]
t:  568  :episode:  10
q_loss:  [[[14.001322]]]
t:  569  :episode:  10
q_loss:  [[[11.76427]]]
t:  570  :episode:  10
q_loss:  [[[25.084797]]]
t:  571  :episode:  10
q_loss:  [[[23.706196]]]
t:  572  :episode:  10
q_loss:  [[[9.648082]]]
t:  573  :episode:  10
q_loss:  [[[219.1089]]]
t:  574  :episode:  10
q_loss:  [[[56.936314]]]
t:  575  :episode:  10
q_loss:  [[[26.80027]]]
t:  576  :episode:  10
q_loss:  [[[28.2276]]]
t:  577  :episode:  10
q_loss:  [[[30.75275]]]
t:  578  :episode:  10
q_loss:  [[[52.939266]]]
t:  579  :episode:  10
q_loss:  [[[106.54511]]]
t:  580  :episode:  10
q_loss:  [[[35.95485]]]
t:  581  :episode:  10
q_loss:  [[[46.622494]]]
t:  582  :episode:  10
q_loss:  [[[13.449963]]]
t:  583  :episode:  10
q_loss:  [[[20.947409]]]
t:  584  :episode:  10
q_

q_loss:  [[[53.914276]]]
t:  736  :episode:  10
q_loss:  [[[68.13354]]]
t:  737  :episode:  10
q_loss:  [[[7.3986163]]]
t:  738  :episode:  10
q_loss:  [[[35.285473]]]
t:  739  :episode:  10
q_loss:  [[[51.993416]]]
t:  740  :episode:  10
q_loss:  [[[26.117188]]]
t:  741  :episode:  10
q_loss:  [[[38.811127]]]
t:  742  :episode:  10
q_loss:  [[[37.846306]]]
t:  743  :episode:  10
q_loss:  [[[10.671721]]]
t:  744  :episode:  10
q_loss:  [[[18.88528]]]
t:  745  :episode:  10
q_loss:  [[[12.858509]]]
t:  746  :episode:  10
q_loss:  [[[126.05128]]]
t:  747  :episode:  10
q_loss:  [[[14.060732]]]
t:  748  :episode:  10
q_loss:  [[[54.55648]]]
t:  749  :episode:  10
q_loss:  [[[49.52455]]]
t:  750  :episode:  10
q_loss:  [[[4.548571]]]
t:  751  :episode:  10
q_loss:  [[[80.11608]]]
t:  752  :episode:  10
q_loss:  [[[66.89332]]]
t:  753  :episode:  10
q_loss:  [[[39.793648]]]
t:  754  :episode:  10
q_loss:  [[[48.9016]]]
t:  755  :episode:  10
q_loss:  [[[31.504967]]]
t:  756  :episode:  10
q

t:  909  :episode:  10
q_loss:  [[[37.754314]]]
t:  910  :episode:  10
q_loss:  [[[17.244112]]]
t:  911  :episode:  10
q_loss:  [[[31.518661]]]
t:  912  :episode:  10
q_loss:  [[[5.959915]]]
t:  913  :episode:  10
q_loss:  [[[22.894789]]]
t:  914  :episode:  10
q_loss:  [[[13.935563]]]
t:  915  :episode:  10
q_loss:  [[[42.418613]]]
t:  916  :episode:  10
q_loss:  [[[25.583668]]]
t:  917  :episode:  10
q_loss:  [[[30.046461]]]
t:  918  :episode:  10
q_loss:  [[[12.632872]]]
t:  919  :episode:  10
q_loss:  [[[19.600702]]]
t:  920  :episode:  10
q_loss:  [[[19.3199]]]
t:  921  :episode:  10
q_loss:  [[[37.724407]]]
t:  922  :episode:  10
q_loss:  [[[27.228922]]]
t:  923  :episode:  10
q_loss:  [[[26.460142]]]
t:  924  :episode:  10
q_loss:  [[[20.770588]]]
t:  925  :episode:  10
q_loss:  [[[10.161516]]]
t:  926  :episode:  10
q_loss:  [[[18.974857]]]
t:  927  :episode:  10
q_loss:  [[[12.206769]]]
t:  928  :episode:  10
q_loss:  [[[17.643272]]]
t:  929  :episode:  10
q_loss:  [[[153.5553

t:  82  :episode:  11
q_loss:  [[[35.711365]]]
t:  83  :episode:  11
q_loss:  [[[161.0322]]]
t:  84  :episode:  11
q_loss:  [[[78.604546]]]
t:  85  :episode:  11
q_loss:  [[[16.648602]]]
t:  86  :episode:  11
q_loss:  [[[40.517654]]]
t:  87  :episode:  11
q_loss:  [[[53.050323]]]
t:  88  :episode:  11
q_loss:  [[[70.79172]]]
t:  89  :episode:  11
q_loss:  [[[22.095795]]]
t:  90  :episode:  11
q_loss:  [[[21.893808]]]
t:  91  :episode:  11
q_loss:  [[[103.89809]]]
t:  92  :episode:  11
q_loss:  [[[21.46759]]]
t:  93  :episode:  11
q_loss:  [[[7.4115286]]]
t:  94  :episode:  11
q_loss:  [[[42.57178]]]
t:  95  :episode:  11
q_loss:  [[[9.70646]]]
t:  96  :episode:  11
q_loss:  [[[40.834145]]]
t:  97  :episode:  11
q_loss:  [[[7.970331]]]
t:  98  :episode:  11
q_loss:  [[[271.92035]]]
t:  99  :episode:  11
q_loss:  [[[18.664896]]]
t:  100  :episode:  11
q_loss:  [[[56.79509]]]
t:  101  :episode:  11
q_loss:  [[[34.16626]]]
t:  102  :episode:  11
q_loss:  [[[115.87218]]]
t:  103  :episode: 

t:  256  :episode:  11
q_loss:  [[[38.31173]]]
t:  257  :episode:  11
q_loss:  [[[31.135464]]]
t:  258  :episode:  11
q_loss:  [[[43.612373]]]
t:  259  :episode:  11
q_loss:  [[[65.608696]]]
t:  260  :episode:  11
q_loss:  [[[45.52804]]]
t:  261  :episode:  11
q_loss:  [[[55.545815]]]
t:  262  :episode:  11
q_loss:  [[[115.541695]]]
t:  263  :episode:  11
q_loss:  [[[61.097263]]]
t:  264  :episode:  11
q_loss:  [[[33.145603]]]
t:  265  :episode:  11
q_loss:  [[[21.834253]]]
t:  266  :episode:  11
q_loss:  [[[12.631475]]]
t:  267  :episode:  11
q_loss:  [[[22.441599]]]
t:  268  :episode:  11
q_loss:  [[[73.01595]]]
t:  269  :episode:  11
q_loss:  [[[77.780426]]]
t:  270  :episode:  11
q_loss:  [[[32.544067]]]
t:  271  :episode:  11
q_loss:  [[[11.277407]]]
t:  272  :episode:  11
q_loss:  [[[27.64282]]]
t:  273  :episode:  11
q_loss:  [[[28.121758]]]
t:  274  :episode:  11
q_loss:  [[[33.57441]]]
t:  275  :episode:  11
q_loss:  [[[10.092092]]]
t:  276  :episode:  11
q_loss:  [[[28.75457]

q_loss:  [[[13.1827345]]]
t:  430  :episode:  11
q_loss:  [[[16.585266]]]
t:  431  :episode:  11
q_loss:  [[[27.377842]]]
t:  432  :episode:  11
q_loss:  [[[39.716053]]]
t:  433  :episode:  11
q_loss:  [[[25.262989]]]
t:  434  :episode:  11
q_loss:  [[[109.48511]]]
t:  435  :episode:  11
q_loss:  [[[32.72603]]]
t:  436  :episode:  11
q_loss:  [[[36.016888]]]
t:  437  :episode:  11
q_loss:  [[[48.352707]]]
t:  438  :episode:  11
q_loss:  [[[8.024181]]]
t:  439  :episode:  11
q_loss:  [[[85.12978]]]
t:  440  :episode:  11
q_loss:  [[[18.596725]]]
t:  441  :episode:  11
q_loss:  [[[13.056522]]]
t:  442  :episode:  11
q_loss:  [[[30.300941]]]
t:  443  :episode:  11
q_loss:  [[[34.270123]]]
t:  444  :episode:  11
q_loss:  [[[21.864134]]]
t:  445  :episode:  11
q_loss:  [[[23.830145]]]
t:  446  :episode:  11
q_loss:  [[[54.00074]]]
t:  447  :episode:  11
q_loss:  [[[28.251453]]]
t:  448  :episode:  11
q_loss:  [[[63.487793]]]
t:  449  :episode:  11
q_loss:  [[[356.39163]]]
t:  450  :episode:

q_loss:  [[[52.166756]]]
t:  602  :episode:  11
q_loss:  [[[42.646976]]]
t:  603  :episode:  11
q_loss:  [[[18.523243]]]
t:  604  :episode:  11
q_loss:  [[[101.99388]]]
t:  605  :episode:  11
q_loss:  [[[12.865169]]]
t:  606  :episode:  11
q_loss:  [[[40.467865]]]
t:  607  :episode:  11
q_loss:  [[[123.55504]]]
t:  608  :episode:  11
q_loss:  [[[7.288364]]]
t:  609  :episode:  11
q_loss:  [[[60.237164]]]
t:  610  :episode:  11
q_loss:  [[[65.0359]]]
t:  611  :episode:  11
q_loss:  [[[30.07265]]]
t:  612  :episode:  11
q_loss:  [[[29.30484]]]
t:  613  :episode:  11
q_loss:  [[[41.32432]]]
t:  614  :episode:  11
q_loss:  [[[119.443596]]]
t:  615  :episode:  11
q_loss:  [[[104.91341]]]
t:  616  :episode:  11
q_loss:  [[[65.25998]]]
t:  617  :episode:  11
q_loss:  [[[68.185555]]]
t:  618  :episode:  11
q_loss:  [[[64.78271]]]
t:  619  :episode:  11
q_loss:  [[[29.759535]]]
t:  620  :episode:  11
q_loss:  [[[37.60147]]]
t:  621  :episode:  11
q_loss:  [[[32.687065]]]
t:  622  :episode:  11


q_loss:  [[[20.997238]]]
t:  774  :episode:  11
q_loss:  [[[40.0428]]]
t:  775  :episode:  11
q_loss:  [[[52.84193]]]
t:  776  :episode:  11
q_loss:  [[[293.32507]]]
t:  777  :episode:  11
q_loss:  [[[141.75357]]]
t:  778  :episode:  11
q_loss:  [[[679.4546]]]
t:  779  :episode:  11
q_loss:  [[[117.877884]]]
t:  780  :episode:  11
q_loss:  [[[72.23043]]]
t:  781  :episode:  11
q_loss:  [[[47.81852]]]
t:  782  :episode:  11
q_loss:  [[[1560.021]]]
t:  783  :episode:  11
q_loss:  [[[345.2396]]]
t:  784  :episode:  11
q_loss:  [[[119.09002]]]
t:  785  :episode:  11
q_loss:  [[[21.582233]]]
t:  786  :episode:  11
q_loss:  [[[112.57035]]]
t:  787  :episode:  11
q_loss:  [[[533.8506]]]
t:  788  :episode:  11
q_loss:  [[[39.156715]]]
t:  789  :episode:  11
q_loss:  [[[5602.7188]]]
t:  790  :episode:  11
q_loss:  [[[1226.122]]]
t:  791  :episode:  11
q_loss:  [[[265.97452]]]
t:  792  :episode:  11
q_loss:  [[[282.34497]]]
t:  793  :episode:  11
q_loss:  [[[260.27676]]]
t:  794  :episode:  11
q

 https://datascience.stackexchange.com/questions/13216/intuitive-explanation-of-noise-contrastive-estimation-nce-loss(InfoNCE Loss )
<br>
Representation Learning with Contrastive Predictive Coding
<br>
https://github.com/gdao-research/cpc/blob/master/cpc/data_handler.py (CPC)
<br>
https://github.com/davidtellez/contrastive-predictive-coding/blob/master/train_model.py (CPC)
<br>
https://github.com/MishaLaskin/curl/blob/23b0880708c29b078b0a25e62ff31fb587587b18/utils.py#L123 (replay buffer and SAC)
<br>
https://github.com/marload/DeepRL-TensorFlow2/blob/master/A2C/A2C_Discrete.py (A2C)
<br>
https://github.com/germain-hug/Deep-RL-Keras/blob/master/A3C/a3c.py (A3C)
<br>
https://github.com/tensorflow/agents/blob/v0.5.0/tf_agents/agents/sac/sac_agent.py (SAC)
<br>
https://github.com/cookbenjamin/DDPG/blob/master/networks/critic.py (transfer the action state merge to second layer)
<br>
https://github.com/georgesung/TD3 (check expected results)
<br>
https://github.com/georgesung/TD3/blob/master/DDPG.py (param mistake)