In [1]:
from memory import MemoryBuffer
from policies import PPOTrainer
from utils import gaussian_likelihood, reshape_train_var
import tensorflow as tf
from tensorflow.layers import dense, conv2d, max_pooling2d, flatten
import numpy as np
import time
import gym

In [2]:
class IMTrainer():
    def __init__(self, in_op, out_op, v_out_op, act_type='discrete', sess=None):
        """
        Create a wrapper for RL networks for easy training.
        Args:
            in_op (tf.Placeholder): Observation input to architecture
            out_op (tf.Variable): Action output of architecture
            act_type (string): 'discrete' for a discrete actions space or 'continuous'
                               for a continuous actions space
            sess (tf.Session): A session if you would like to use a custom session,
                               if left none it will be automatically created
        """

        if not sess:
            self.renew_sess()
        
        self.in_op = in_op
        self.out_op = out_op
        self.v_out_op = v_out_op
        
        if act_type in ('discrete', 'd'):
            self.train = self._create_discrete_trainer()
            self.act_type = 'discrete'
        elif act_type in ('continuous', 'c'):
            self.train = self._create_continuous_trainer()
            self.act_type = 'continuous'
        else:
            raise TypeError('act_type must be \'discrete\' or \'continuous\'')
        
    def renew_sess(self):
        """
        Starts a new internal Tensorflow session
        """
        self.sess = tf.Session()
        
    def end_sess(self):
        """
        Ends the internal Tensorflow session if it exists
        """
        if self.sess:
            self.sess.close()
            
    def _create_ICM(self):
        feature_dim = 12
        r_i_scale = 3
        
        # Create placeholder
        self.next_obs_holders = tf.placeholder(tf.float32, shape=self.in_op.shape)
        
        # Observation feature encoder
        with tf.variable_scope('feature_encoder'):
            self.f_obs = dense(self.in_op, feature_dim, activation=tf.nn.tanh, name='fe_dense')
            
        with tf.variable_scope('feature_encoder', reuse=True):
            self.f_obs_next = dense(self.next_obs_holders, feature_dim, activation=tf.nn.tanh, name='fe_dense')
            
        # State predictor forward model
        self.state_act_pair = tf.concat([self.act_holders, self.f_obs], axis=1)
        self.sp_dense = dense(self.state_act_pair, 32, activation=tf.nn.tanh)
        self.f_obs_next_hat = dense(self.sp_dense, feature_dim, activation=tf.nn.tanh)
        
        # Inverse model (predicting action)
        self.state_state_pair = tf.concat([self.f_obs, self.f_obs_next], axis=1)
        self.act_hat = dense(self.state_state_pair, self.out_op.shape[1])
        
        # Calculating intrinsic reward
        self.obs_diff = self.f_obs_next_hat - self.f_obs_next
        self.r_i = r_i_scale * tf.reduce_sum(self.obs_diff ** 2, axis=1)
        
        # Calculating losses
        self.pre_loss_i = tf.reduce_sum((self.act_hat - self.act_holders) ** 2, axis=1)
        self.pre_loss_f = tf.reduce_sum(self.obs_diff ** 2, axis=1)
        
        self.loss_i = 0.5 * self.pre_loss_i
        self.loss_f = 0.5 * self.pre_loss_f
        
    def _create_discrete_trainer(self, optimizer=tf.train.AdamOptimizer()):
        """
        Creates a function for vanilla policy training with a discrete action space
        """
#         self.act_holders = tf.placeholder(tf.int32, shape=[None])
#         self.reward_holders = tf.placeholder(tf.float32, shape=[None])
        
#         self.act_masks = tf.one_hot(self.act_holders, self.out_op.shape[1].value, dtype=tf.float32)
#         self.log_probs = tf.log(self.out_op)
        
#         self.advantages = self.reward_holders - tf.squeeze(self.v_out_op)
        
#         self.resp_acts = tf.reduce_sum(self.act_masks *  self.log_probs, axis=1)
#         self.loss = -tf.reduce_mean(self.resp_acts * self.advantages)
        
#         self.optimizer = optimizer
#         self.actor_update = self.optimizer.minimize(self.loss)
        
#         with tf.control_dependencies([self.actor_update]):
#             self.value_loss = tf.reduce_mean(tf.square(self.reward_holders - tf.squeeze(self.v_out_op)))
#             self.value_update = self.optimizer.minimize(self.value_loss)
        
#         update_func = lambda train_data: self.sess.run([self.actor_update, self.value_update], 
#                                                        feed_dict={self.in_op: reshape_train_var(train_data[:, 0]),
#                                                             self.act_holders: reshape_train_var(train_data[:, 1]),
#                                                             self.reward_holders: train_data[:, 2]})
        
#         self.sess.run(tf.global_variables_initializer())
        
#         return update_func
        
    def _create_continuous_trainer(self, optimizer=tf.train.AdamOptimizer()):
        """
        Creates a function for vanilla policy training with a continuous action space
        """
        self.act_holders = tf.placeholder(tf.float32, shape=[None, self.out_op.shape[1].value])
        self.reward_holders = tf.placeholder(tf.float32, shape=[None])
        
        self.std = tf.Variable(0.5 * np.ones(shape=self.out_op.shape[1].value), dtype=tf.float32)
        self.out_act = self.out_op + tf.random_normal(tf.shape(self.out_op), dtype=tf.float32) * self.std
        
        self.log_probs = gaussian_likelihood(self.act_holders, self.out_op, self.std)
        
        self.advantages = self.reward_holders - tf.squeeze(self.v_out_op)
        
        # Creation of ICM module
        
        self._create_ICM()
        
        self.r_e = tf.reduce_mean(self.log_probs * self.advantages)
        self.r_t = tf.reduce_mean(self.r_e + self.r_i) # Maybe I should reduce_sum or split by rollout
        
        self.lamb = 0.1
        self.beta = 0.2
        
        self.total_loss = -self.lamb * self.r_t + (1.-self.beta) * self.loss_i + self.beta * self.loss_f
        
        self.optimizer = optimizer
        self.actor_update = self.optimizer.minimize(self.total_loss)
        
        with tf.control_dependencies([self.actor_update]):
            self.value_loss = tf.reduce_mean(tf.square(self.reward_holders - tf.squeeze(self.v_out_op)))
            self.value_update = self.optimizer.minimize(self.value_loss)
        
        def update_func(train_data):
            i_reward, _, _ = self.sess.run([self.r_i, self.actor_update, self.value_update], 
                               feed_dict={self.in_op: reshape_train_var(train_data[:, 0]),
                                    self.act_holders: reshape_train_var(train_data[:, 1]),
                                    self.reward_holders: train_data[:, 2],
                                    self.next_obs_holders: reshape_train_var(train_data[:, 3])})
            
            print(f'Intrinsic Reward: {np.mean(i_reward)}')    
        
        self.sess.run(tf.global_variables_initializer())
        
        return update_func
        
    def _gen_discrete_act(self, obs):
        act_probs = self.sess.run(self.out_op, feed_dict={self.in_op: [obs]})
        act = np.random.choice(list(range(len(act_probs[0]))), p=act_probs)
        
        return act
    
    def _gen_continuous_act(self, obs):
        act_vect = self.sess.run(self.out_act, feed_dict={self.in_op: [obs]})[0]
        
        return np.array(act_vect)
        
    def gen_act(self, obs):
        if self.act_type == 'discrete':
            return self._gen_discrete_act(obs)
        else:
            return self._gen_continuous_act(obs)
        
    def train(self, obs, rewards, acts):
        raise RuntimeError('The train method was not properly created')

In [3]:
env = gym.make('BipedalWalker-v2')

obs = tf.placeholder(tf.float32, shape=[None]+list(env.observation_space.shape))
dense1 = dense(obs, 32, activation=tf.tanh)
dense2 = dense(dense1, 32, activation=tf.tanh)
act_probs = dense(dense2, env.action_space.shape[0])

v_dense1 = dense(obs, 32, activation=tf.tanh)
v_dense2 = dense(v_dense1, 32, activation=tf.tanh)
value = dense(v_dense2, 1)

network = IMTrainer(obs, act_probs, value, act_type='c')

[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m


In [4]:
n_episodes = 1000000
max_steps = 500
update_freq = 1
print_freq = 1

mb = MemoryBuffer(True)

In [None]:
all_rewards = []

for episode in range(n_episodes):
    ep_reward = 0
    
    mb.start_rollout()
    obs = env.reset()
    for step in range(max_steps):
        obs = obs.squeeze()
        act = network.gen_act(obs)
        
        obs_next, rew, d, _ = env.step(act)
        ep_reward += rew
        
        if False:
            env.render()
            time.sleep(0.02)
        
        mb.record(obs, act, rew, obs_next)
        obs = obs_next
        
        if d:
            break
            
    all_rewards.append(ep_reward)
            
    if episode % update_freq == 0 and episode != 0:
        network.train(mb.to_data())
        
        if episode % (update_freq * print_freq) == 0:
            print(f'Update #{episode // update_freq}, Reward: {np.mean(all_rewards[-update_freq*print_freq:])}')
            print()

Intrinsic Reward: 141.8203582763672
Update #1, Reward: -108.1877613371412

Intrinsic Reward: 142.29800415039062
Update #2, Reward: -104.93920621598326

Intrinsic Reward: 141.77227783203125
Update #3, Reward: -106.67962389740099

Intrinsic Reward: 141.9000701904297
Update #4, Reward: -104.33232116363818

Intrinsic Reward: 140.94427490234375
Update #5, Reward: -107.94131572361849

Intrinsic Reward: 142.08303833007812
Update #6, Reward: -105.70714224652387

Intrinsic Reward: 142.25411987304688
Update #7, Reward: -103.10124309706129

Intrinsic Reward: 142.13668823242188
Update #8, Reward: -105.26173562503979

Intrinsic Reward: 141.6094970703125
Update #9, Reward: -103.0819227867201

Intrinsic Reward: 141.536376953125
Update #10, Reward: -105.80744348400646

Intrinsic Reward: 141.97628784179688
Update #11, Reward: -106.28764034203999

Intrinsic Reward: 141.01806640625
Update #12, Reward: -104.41805487677144

Intrinsic Reward: 141.51329040527344
Update #13, Reward: -104.37899782687549

Intri

Intrinsic Reward: 141.72335815429688
Update #110, Reward: -104.26009215658965

Intrinsic Reward: 141.6574249267578
Update #111, Reward: -107.90752240361336

Intrinsic Reward: 141.63916015625
Update #112, Reward: -106.99899438278501

Intrinsic Reward: 141.7860565185547
Update #113, Reward: -110.31637532760824

Intrinsic Reward: 142.11727905273438
Update #114, Reward: -41.71348057895774

Intrinsic Reward: 141.72140502929688
Update #115, Reward: -104.29485520391228

Intrinsic Reward: 141.54994201660156
Update #116, Reward: -108.84585291585388

Intrinsic Reward: 142.66180419921875
Update #117, Reward: -103.9837869064454

Intrinsic Reward: 141.74974060058594
Update #118, Reward: -106.5858596423405

Intrinsic Reward: 142.6212158203125
Update #119, Reward: -106.89436623358105

Intrinsic Reward: 141.53305053710938
Update #120, Reward: -108.18674954947085

Intrinsic Reward: 142.24517822265625
Update #121, Reward: -104.17497381555847

Intrinsic Reward: 142.01869201660156
Update #122, Reward: -10

Intrinsic Reward: 142.32594299316406
Update #215, Reward: -105.2364306797826

Intrinsic Reward: 143.23440551757812
Update #216, Reward: -105.94360520272826

Intrinsic Reward: 142.1772918701172
Update #217, Reward: -108.86083310614278

Intrinsic Reward: 142.567138671875
Update #218, Reward: -104.47535727840476

Intrinsic Reward: 142.13746643066406
Update #219, Reward: -107.39338784700011

Intrinsic Reward: 142.26535034179688
Update #220, Reward: -104.25057206912153

Intrinsic Reward: 142.28518676757812
Update #221, Reward: -109.19552208778511

Intrinsic Reward: 141.86550903320312
Update #222, Reward: -105.12299286158259

Intrinsic Reward: 142.04820251464844
Update #223, Reward: -104.3727084690885

Intrinsic Reward: 142.39585876464844
Update #224, Reward: -104.62331975010845

Intrinsic Reward: 142.29092407226562
Update #225, Reward: -104.39879182992317

Intrinsic Reward: 142.0066375732422
Update #226, Reward: -102.86811793591703

Intrinsic Reward: 142.55845642089844
Update #227, Reward: 

Intrinsic Reward: 142.36036682128906
Update #322, Reward: -105.89569049177692

Intrinsic Reward: 142.74835205078125
Update #323, Reward: -106.32280151266356

Intrinsic Reward: 142.4604949951172
Update #324, Reward: -103.83930251153558

Intrinsic Reward: 141.96853637695312
Update #325, Reward: -104.41018418573216

Intrinsic Reward: 142.9966278076172
Update #326, Reward: -106.32921150718629

Intrinsic Reward: 142.3785400390625
Update #327, Reward: -104.83515258962537

Intrinsic Reward: 141.86151123046875
Update #328, Reward: -105.18164910485285

Intrinsic Reward: 141.79034423828125
Update #329, Reward: -104.30367566339423

Intrinsic Reward: 142.29542541503906
Update #330, Reward: -105.07885783997179

Intrinsic Reward: 142.53541564941406
Update #331, Reward: -123.33502599776287

Intrinsic Reward: 143.22274780273438
Update #332, Reward: -105.72091999928219

Intrinsic Reward: 141.75975036621094
Update #333, Reward: -103.92336107873669

Intrinsic Reward: 142.54786682128906
Update #334, Rewar

Intrinsic Reward: 142.3104248046875
Update #430, Reward: -104.5389522219915

Intrinsic Reward: 142.68222045898438
Update #431, Reward: -104.6819708557104

Intrinsic Reward: 142.46730041503906
Update #432, Reward: -104.98617675299694

Intrinsic Reward: 142.92706298828125
Update #433, Reward: -107.05343660003754

Intrinsic Reward: 143.05465698242188
Update #434, Reward: -109.65999183366138

Intrinsic Reward: 142.15997314453125
Update #435, Reward: -102.62231514054474

Intrinsic Reward: 143.16653442382812
Update #436, Reward: -105.17222459227158

Intrinsic Reward: 142.51194763183594
Update #437, Reward: -105.39322349084914

Intrinsic Reward: 143.46444702148438
Update #438, Reward: -125.00921107586225

Intrinsic Reward: 143.5236053466797
Update #439, Reward: -103.60572553915097

Intrinsic Reward: 143.19671630859375
Update #440, Reward: -104.1702363601271

Intrinsic Reward: 142.39376831054688
Update #441, Reward: -104.63600510488327

Intrinsic Reward: 142.91026306152344
Update #442, Reward:

Intrinsic Reward: 143.40684509277344
Update #537, Reward: -106.05121858239112

Intrinsic Reward: 143.61695861816406
Update #538, Reward: -103.94512132216866

Intrinsic Reward: 143.275634765625
Update #539, Reward: -114.54525133745558

Intrinsic Reward: 143.53543090820312
Update #540, Reward: -106.25765011193354

Intrinsic Reward: 142.8446502685547
Update #541, Reward: -105.17667199522754

Intrinsic Reward: 143.54420471191406
Update #542, Reward: -105.10751944796554

Intrinsic Reward: 143.39036560058594
Update #543, Reward: -124.89993993538556

Intrinsic Reward: 142.5909423828125
Update #544, Reward: -104.0568271685224

Intrinsic Reward: 142.969482421875
Update #545, Reward: -104.72774245865084

Intrinsic Reward: 143.206787109375
Update #546, Reward: -103.80172402813162

Intrinsic Reward: 143.5924835205078
Update #547, Reward: -108.7198818175892

Intrinsic Reward: 143.3367462158203
Update #548, Reward: -104.74478867854799

Intrinsic Reward: 143.5348358154297
Update #549, Reward: -106.69

Intrinsic Reward: 143.2457275390625
Update #643, Reward: -103.69636152597641

Intrinsic Reward: 143.56504821777344
Update #644, Reward: -106.21254541494946

Intrinsic Reward: 143.04473876953125
Update #645, Reward: -106.78754705220895

Intrinsic Reward: 143.46694946289062
Update #646, Reward: -106.55476469250272

Intrinsic Reward: 142.7855987548828
Update #647, Reward: -105.14055974962935

Intrinsic Reward: 143.1460723876953
Update #648, Reward: -104.96789147145923

Intrinsic Reward: 142.8438262939453
Update #649, Reward: -105.67275261942359

Intrinsic Reward: 143.56654357910156
Update #650, Reward: -107.63480033285543

Intrinsic Reward: 143.557861328125
Update #651, Reward: -105.57775669633348

Intrinsic Reward: 142.79608154296875
Update #652, Reward: -104.52789643639264

Intrinsic Reward: 142.29493713378906
Update #653, Reward: -105.46814128149921

Intrinsic Reward: 142.66635131835938
Update #654, Reward: -105.9285417049136

Intrinsic Reward: 143.45339965820312
Update #655, Reward: -

Intrinsic Reward: 143.63821411132812
Update #748, Reward: -108.69993118795132

Intrinsic Reward: 143.33038330078125
Update #749, Reward: -106.62520719835659

Intrinsic Reward: 143.3009490966797
Update #750, Reward: -106.56911156655165

Intrinsic Reward: 142.34947204589844
Update #751, Reward: -105.30247048601136

Intrinsic Reward: 143.54217529296875
Update #752, Reward: -105.31661712384472

Intrinsic Reward: 143.43592834472656
Update #753, Reward: -105.86206121542926

Intrinsic Reward: 143.59031677246094
Update #754, Reward: -106.97670050823999

Intrinsic Reward: 143.2148895263672
Update #755, Reward: -107.33368720604294

Intrinsic Reward: 142.7252655029297
Update #756, Reward: -113.82936562263905

Intrinsic Reward: 142.77854919433594
Update #757, Reward: -105.02219473942307

Intrinsic Reward: 143.3481903076172
Update #758, Reward: -106.90494114293841

Intrinsic Reward: 142.74314880371094
Update #759, Reward: -104.08104286256557

Intrinsic Reward: 142.76426696777344
Update #760, Reward

Intrinsic Reward: 143.22972106933594
Update #853, Reward: -105.08231590882006

Intrinsic Reward: 143.12069702148438
Update #854, Reward: -127.94960468594792

Intrinsic Reward: 142.32867431640625
Update #855, Reward: -104.37067003368773

Intrinsic Reward: 143.23654174804688
Update #856, Reward: -104.1512945764338

Intrinsic Reward: 142.55551147460938
Update #857, Reward: -104.2621664241093

Intrinsic Reward: 141.8031768798828
Update #858, Reward: -103.91101154917851

Intrinsic Reward: 142.53359985351562
Update #859, Reward: -104.0029145743431

Intrinsic Reward: 142.6914825439453
Update #860, Reward: -104.38736604511614

Intrinsic Reward: 143.25645446777344
Update #861, Reward: -104.25377500546848

Intrinsic Reward: 142.38523864746094
Update #862, Reward: -104.2198638033246

Intrinsic Reward: 142.2842254638672
Update #863, Reward: -102.1625354014306

Intrinsic Reward: 142.5415496826172
Update #864, Reward: -101.11341947254725

Intrinsic Reward: 143.05300903320312
Update #865, Reward: -10

Intrinsic Reward: 143.10910034179688
Update #960, Reward: -106.86841664721693

Intrinsic Reward: 142.82135009765625
Update #961, Reward: -107.50730461212

Intrinsic Reward: 142.341796875
Update #962, Reward: -104.88845407271323

Intrinsic Reward: 142.29388427734375
Update #963, Reward: -104.00792698050104

Intrinsic Reward: 142.0682830810547
Update #964, Reward: -102.9499391096135

Intrinsic Reward: 142.71060180664062
Update #965, Reward: -103.9656799796323

Intrinsic Reward: 142.6759033203125
Update #966, Reward: -105.43331316942783

Intrinsic Reward: 143.02859497070312
Update #967, Reward: -105.20774921686761

Intrinsic Reward: 142.06524658203125
Update #968, Reward: -103.82977590219056

Intrinsic Reward: 142.62664794921875
Update #969, Reward: -102.47760968834783

Intrinsic Reward: 142.53713989257812
Update #970, Reward: -104.48127545628883

Intrinsic Reward: 142.52369689941406
Update #971, Reward: -104.30960318761382

Intrinsic Reward: 143.02635192871094
Update #972, Reward: -106.1

Intrinsic Reward: 142.26446533203125
Update #1067, Reward: -105.53680558989694

Intrinsic Reward: 141.67237854003906
Update #1068, Reward: -103.50708709908525

Intrinsic Reward: 142.30308532714844
Update #1069, Reward: -104.04003543826255

Intrinsic Reward: 142.53799438476562
Update #1070, Reward: -104.09953608950352

Intrinsic Reward: 142.5702362060547
Update #1071, Reward: -105.71213056746126

Intrinsic Reward: 142.13595581054688
Update #1072, Reward: -103.96284830308271

Intrinsic Reward: 143.0892333984375
Update #1073, Reward: -107.42783245398911

Intrinsic Reward: 141.6509552001953
Update #1074, Reward: -105.50710332719112

Intrinsic Reward: 141.73182678222656
Update #1075, Reward: -104.66579030936832

Intrinsic Reward: 141.71542358398438
Update #1076, Reward: -104.12850852381563

Intrinsic Reward: 142.5591278076172
Update #1077, Reward: -104.40106818082928

Intrinsic Reward: 142.52880859375
Update #1078, Reward: -101.36260163045613

Intrinsic Reward: 143.12100219726562
Update #10

Intrinsic Reward: 142.9000244140625
Update #1174, Reward: -105.81141604903092

Intrinsic Reward: 142.60995483398438
Update #1175, Reward: -104.24493811998765

Intrinsic Reward: 141.8542938232422
Update #1176, Reward: -102.08444518176962

Intrinsic Reward: 142.2006072998047
Update #1177, Reward: -102.05172326795757

Intrinsic Reward: 141.99893188476562
Update #1178, Reward: -104.30044710398155

Intrinsic Reward: 142.15452575683594
Update #1179, Reward: -105.85722292649187

Intrinsic Reward: 141.96559143066406
Update #1180, Reward: -103.91350003781666

Intrinsic Reward: 142.98471069335938
Update #1181, Reward: -105.69134173250137

Intrinsic Reward: 142.18206787109375
Update #1182, Reward: -104.10698108707803

Intrinsic Reward: 143.1572723388672
Update #1183, Reward: -106.81270072921303

Intrinsic Reward: 141.6356964111328
Update #1184, Reward: -103.43934392382452

Intrinsic Reward: 143.27088928222656
Update #1185, Reward: -106.49556378046299

Intrinsic Reward: 142.34262084960938
Update #

Intrinsic Reward: 143.10446166992188
Update #1279, Reward: -105.55009923664046

Intrinsic Reward: 142.81427001953125
Update #1280, Reward: -104.2821404391403

Intrinsic Reward: 142.68727111816406
Update #1281, Reward: -105.32416622811742

Intrinsic Reward: 141.55734252929688
Update #1282, Reward: -102.94473844128909

Intrinsic Reward: 142.4490966796875
Update #1283, Reward: -104.26677005377412

Intrinsic Reward: 142.16990661621094
Update #1284, Reward: -102.75409888606829

Intrinsic Reward: 143.23382568359375
Update #1285, Reward: -105.7413564340143

Intrinsic Reward: 142.24240112304688
Update #1286, Reward: -102.43640802341389

Intrinsic Reward: 142.71864318847656
Update #1287, Reward: -104.01636455264749

Intrinsic Reward: 142.2537841796875
Update #1288, Reward: -103.88920381875522

Intrinsic Reward: 142.44252014160156
Update #1289, Reward: -104.64136914235229

Intrinsic Reward: 141.9053192138672
Update #1290, Reward: -105.13075498921982

Intrinsic Reward: 142.91603088378906
Update #

Intrinsic Reward: 143.0191192626953
Update #1385, Reward: -103.77614865885675

Intrinsic Reward: 142.65948486328125
Update #1386, Reward: -104.12432181410615

Intrinsic Reward: 142.3637237548828
Update #1387, Reward: -105.06617017903613

Intrinsic Reward: 141.87545776367188
Update #1388, Reward: -103.4563218819946

Intrinsic Reward: 142.10238647460938
Update #1389, Reward: -103.92073196976942

Intrinsic Reward: 142.3261260986328
Update #1390, Reward: -104.28216222305906

Intrinsic Reward: 142.37783813476562
Update #1391, Reward: -104.16336092653933

Intrinsic Reward: 142.29588317871094
Update #1392, Reward: -103.92725251470009

Intrinsic Reward: 143.0155487060547
Update #1393, Reward: -105.23749303587526

Intrinsic Reward: 142.18783569335938
Update #1394, Reward: -104.37460393407382

Intrinsic Reward: 142.29978942871094
Update #1395, Reward: -104.50396296218597

Intrinsic Reward: 142.14581298828125
Update #1396, Reward: -105.95460494435082

Intrinsic Reward: 142.8638916015625
Update #1

Intrinsic Reward: 142.04013061523438
Update #1492, Reward: -103.75204831031586

Intrinsic Reward: 143.31600952148438
Update #1493, Reward: -106.3698068749973

Intrinsic Reward: 142.56869506835938
Update #1494, Reward: -104.06123310608417

Intrinsic Reward: 142.81277465820312
Update #1495, Reward: -105.61064325555228

Intrinsic Reward: 142.59356689453125
Update #1496, Reward: -105.2434841750823

Intrinsic Reward: 142.46177673339844
Update #1497, Reward: -105.99134566348916

Intrinsic Reward: 143.12680053710938
Update #1498, Reward: -108.2548001743673

Intrinsic Reward: 142.4709014892578
Update #1499, Reward: -104.36874164867712

Intrinsic Reward: 142.63201904296875
Update #1500, Reward: -104.19230164618915

Intrinsic Reward: 143.20263671875
Update #1501, Reward: -105.68076607197648

Intrinsic Reward: 143.0005340576172
Update #1502, Reward: -106.10187765397194

Intrinsic Reward: 143.53099060058594
Update #1503, Reward: -110.94455864409605

Intrinsic Reward: 143.17633056640625
Update #150

Intrinsic Reward: 143.23629760742188
Update #1597, Reward: -105.08910266718455

Intrinsic Reward: 143.1551513671875
Update #1598, Reward: -105.76334678774948

Intrinsic Reward: 142.76797485351562
Update #1599, Reward: -104.29321354707331

Intrinsic Reward: 142.53204345703125
Update #1600, Reward: -104.20361601333258

Intrinsic Reward: 142.18548583984375
Update #1601, Reward: -103.46496477368723

Intrinsic Reward: 142.76416015625
Update #1602, Reward: -103.79218152948158

Intrinsic Reward: 142.19793701171875
Update #1603, Reward: -105.76282037957944

Intrinsic Reward: 142.77134704589844
Update #1604, Reward: -104.06925804764964

Intrinsic Reward: 142.912353515625
Update #1605, Reward: -104.77992128422733

Intrinsic Reward: 142.4049530029297
Update #1606, Reward: -105.13910767688665

Intrinsic Reward: 142.80908203125
Update #1607, Reward: -104.2185724551032

Intrinsic Reward: 142.31033325195312
Update #1608, Reward: -103.93333255220949

Intrinsic Reward: 142.8652801513672
Update #1609, R

Intrinsic Reward: 143.67550659179688
Update #1701, Reward: -109.75528322861716

Intrinsic Reward: 143.5838623046875
Update #1702, Reward: -107.4504762224673

Intrinsic Reward: 143.29229736328125
Update #1703, Reward: -108.15013614846269

Intrinsic Reward: 143.73399353027344
Update #1704, Reward: -108.25335982517464

Intrinsic Reward: 143.23532104492188
Update #1705, Reward: -105.43744981856022

Intrinsic Reward: 143.1685791015625
Update #1706, Reward: -104.9116137502715

Intrinsic Reward: 143.17562866210938
Update #1707, Reward: -105.62511500950468

Intrinsic Reward: 143.70034790039062
Update #1708, Reward: -107.9925805585192

Intrinsic Reward: 143.1258544921875
Update #1709, Reward: -107.94909304816089

Intrinsic Reward: 143.62411499023438
Update #1710, Reward: -111.15358470372732

Intrinsic Reward: 143.55958557128906
Update #1711, Reward: -109.88581886113312

Intrinsic Reward: 142.75450134277344
Update #1712, Reward: -105.48207702066314

Intrinsic Reward: 143.7126007080078
Update #17

Intrinsic Reward: 142.3957061767578
Update #1805, Reward: -104.12437636382195

Intrinsic Reward: 143.23428344726562
Update #1806, Reward: -108.22041739396937

Intrinsic Reward: 142.38827514648438
Update #1807, Reward: -109.20056765394907

Intrinsic Reward: 143.36309814453125
Update #1808, Reward: -110.12309196725116

Intrinsic Reward: 143.52723693847656
Update #1809, Reward: -109.23535974243904

Intrinsic Reward: 143.50579833984375
Update #1810, Reward: -113.55063851178934

Intrinsic Reward: 142.7847137451172
Update #1811, Reward: -109.60667505891372

Intrinsic Reward: 143.00799560546875
Update #1812, Reward: -106.04516189636725

Intrinsic Reward: 143.40953063964844
Update #1813, Reward: -108.23373935830283

Intrinsic Reward: 142.17918395996094
Update #1814, Reward: -105.49954815761683

Intrinsic Reward: 143.10389709472656
Update #1815, Reward: -108.24073212694998

Intrinsic Reward: 143.21649169921875
Update #1816, Reward: -109.43652721232486

Intrinsic Reward: 142.80450439453125
Updat

Intrinsic Reward: 142.7085723876953
Update #1911, Reward: -106.75097174831417

Intrinsic Reward: 143.26657104492188
Update #1912, Reward: -108.72562317273082

Intrinsic Reward: 142.78689575195312
Update #1913, Reward: -106.98366610622281

Intrinsic Reward: 143.46054077148438
Update #1914, Reward: -107.501632055711

Intrinsic Reward: 143.00454711914062
Update #1915, Reward: -103.8467153159945

Intrinsic Reward: 142.40687561035156
Update #1916, Reward: -104.16505473645032

Intrinsic Reward: 142.83773803710938
Update #1917, Reward: -107.15389210606118

Intrinsic Reward: 142.7406463623047
Update #1918, Reward: -103.7213350706001

Intrinsic Reward: 142.92373657226562
Update #1919, Reward: -102.26469001581395

Intrinsic Reward: 143.23709106445312
Update #1920, Reward: -104.61897865018435

Intrinsic Reward: 143.22015380859375
Update #1921, Reward: -108.06732297172397

Intrinsic Reward: 143.62957763671875
Update #1922, Reward: -107.33043068050407

Intrinsic Reward: 142.4286346435547
Update #19

Intrinsic Reward: 142.67555236816406
Update #2017, Reward: -102.84041826534519

Intrinsic Reward: 142.4942169189453
Update #2018, Reward: -107.14621183737367

Intrinsic Reward: 142.69639587402344
Update #2019, Reward: -108.21127507160728

Intrinsic Reward: 142.96873474121094
Update #2020, Reward: -105.89923020338205

Intrinsic Reward: 142.17251586914062
Update #2021, Reward: -103.72270604324775

Intrinsic Reward: 143.13417053222656
Update #2022, Reward: -106.12471073436986

Intrinsic Reward: 142.4358367919922
Update #2023, Reward: -104.36364638612979

Intrinsic Reward: 142.52647399902344
Update #2024, Reward: -105.18182685374903

Intrinsic Reward: 142.791259765625
Update #2025, Reward: -107.19163645954927

Intrinsic Reward: 142.76785278320312
Update #2026, Reward: -105.60892830165227

Intrinsic Reward: 142.94610595703125
Update #2027, Reward: -105.78106055012843

Intrinsic Reward: 143.13890075683594
Update #2028, Reward: -105.33378240650644

Intrinsic Reward: 142.99307250976562
Update 

Intrinsic Reward: 142.22891235351562
Update #2124, Reward: -104.64855401619089

Intrinsic Reward: 142.48162841796875
Update #2125, Reward: -103.213047353878

Intrinsic Reward: 142.36004638671875
Update #2126, Reward: -102.73619404164826

Intrinsic Reward: 142.08872985839844
Update #2127, Reward: -103.41585257106833

Intrinsic Reward: 142.5205078125
Update #2128, Reward: -100.12968343680912

Intrinsic Reward: 142.3298797607422
Update #2129, Reward: -103.82020009072001

Intrinsic Reward: 142.8677978515625
Update #2130, Reward: -102.24132526933712

Intrinsic Reward: 141.9033660888672
Update #2131, Reward: -103.49478776199494

Intrinsic Reward: 142.53875732421875
Update #2132, Reward: -103.47159862209298

Intrinsic Reward: 142.3219451904297
Update #2133, Reward: -102.91470641400106

Intrinsic Reward: 142.33740234375
Update #2134, Reward: -99.71893455536974

Intrinsic Reward: 141.94078063964844
Update #2135, Reward: -103.34296662609776

Intrinsic Reward: 141.97935485839844
Update #2136, Rew

Intrinsic Reward: 143.1002655029297
Update #2231, Reward: -111.68082984395078

Intrinsic Reward: 142.8309783935547
Update #2232, Reward: -101.90431658873955

Intrinsic Reward: 142.62779235839844
Update #2233, Reward: -103.19353357180208

Intrinsic Reward: 142.6785125732422
Update #2234, Reward: -102.63594200590873

Intrinsic Reward: 142.79774475097656
Update #2235, Reward: -101.64614723831912

Intrinsic Reward: 141.96124267578125
Update #2236, Reward: -104.11253051005366

Intrinsic Reward: 142.93212890625
Update #2237, Reward: -102.91078074305443

Intrinsic Reward: 143.42807006835938
Update #2238, Reward: -106.15304847086657

Intrinsic Reward: 142.75048828125
Update #2239, Reward: -102.85627670178562

Intrinsic Reward: 142.13082885742188
Update #2240, Reward: -103.38905155184679

Intrinsic Reward: 142.6376190185547
Update #2241, Reward: -102.53493449057267

Intrinsic Reward: 143.01231384277344
Update #2242, Reward: -100.23830460628929

Intrinsic Reward: 143.19198608398438
Update #2243,

Intrinsic Reward: 142.73495483398438
Update #2337, Reward: -101.9977768077422

Intrinsic Reward: 142.3708953857422
Update #2338, Reward: -103.04125791189199

Intrinsic Reward: 142.58592224121094
Update #2339, Reward: -102.84967065072183

Intrinsic Reward: 142.93283081054688
Update #2340, Reward: -104.04188487188642

Intrinsic Reward: 142.6250457763672
Update #2341, Reward: -103.76432398930751

Intrinsic Reward: 143.03504943847656
Update #2342, Reward: -100.1398080203347

Intrinsic Reward: 142.5108184814453
Update #2343, Reward: -104.30730654500549

Intrinsic Reward: 142.68185424804688
Update #2344, Reward: -102.30656279585324

Intrinsic Reward: 142.66200256347656
Update #2345, Reward: -100.67791499224492

Intrinsic Reward: 143.09446716308594
Update #2346, Reward: -103.94165979379477

Intrinsic Reward: 142.45254516601562
Update #2347, Reward: -103.76051620311662

Intrinsic Reward: 142.79049682617188
Update #2348, Reward: -104.97952758759385

Intrinsic Reward: 143.19619750976562
Update #

Intrinsic Reward: 142.82034301757812
Update #2442, Reward: -101.65016293084436

Intrinsic Reward: 143.10621643066406
Update #2443, Reward: -101.84646761397644

Intrinsic Reward: 143.17721557617188
Update #2444, Reward: -108.05476883213409

Intrinsic Reward: 143.2059326171875
Update #2445, Reward: -105.83531831531165

Intrinsic Reward: 141.90109252929688
Update #2446, Reward: -126.73680478875028

Intrinsic Reward: 143.07810974121094
Update #2447, Reward: -104.46095646047903

Intrinsic Reward: 143.15939331054688
Update #2448, Reward: -101.7108554127117

Intrinsic Reward: 143.19586181640625
Update #2449, Reward: -106.83798916537252

Intrinsic Reward: 142.82693481445312
Update #2450, Reward: -101.8260120208462

Intrinsic Reward: 142.8332977294922
Update #2451, Reward: -106.84560763202174

Intrinsic Reward: 142.944091796875
Update #2452, Reward: -102.09775919400032

Intrinsic Reward: 143.18177795410156
Update #2453, Reward: -102.60549914570836

Intrinsic Reward: 143.29592895507812
Update #2

Intrinsic Reward: 142.9113006591797
Update #2549, Reward: -104.45332380070103

Intrinsic Reward: 142.88072204589844
Update #2550, Reward: -104.47479680223329

Intrinsic Reward: 142.9015655517578
Update #2551, Reward: -106.45175491237268

Intrinsic Reward: 142.93402099609375
Update #2552, Reward: -106.5447968260385

Intrinsic Reward: 142.82632446289062
Update #2553, Reward: -103.11996446654823

Intrinsic Reward: 143.09886169433594
Update #2554, Reward: -107.46920499785679

Intrinsic Reward: 142.83876037597656
Update #2555, Reward: -108.59438866477583

Intrinsic Reward: 143.05987548828125
Update #2556, Reward: -100.45425755743993

Intrinsic Reward: 143.104248046875
Update #2557, Reward: -101.56188462449238

Intrinsic Reward: 142.8417510986328
Update #2558, Reward: -101.95932537447351

Intrinsic Reward: 142.8946990966797
Update #2559, Reward: -104.96670863654279

Intrinsic Reward: 142.80877685546875
Update #2560, Reward: -106.40773097687277

Intrinsic Reward: 143.0885772705078
Update #256

Intrinsic Reward: 142.8575897216797
Update #2654, Reward: -107.88008203128477

Intrinsic Reward: 143.12753295898438
Update #2655, Reward: -105.6538830328416

Intrinsic Reward: 140.18173217773438
Update #2656, Reward: -123.64992815662238

Intrinsic Reward: 142.9836883544922
Update #2657, Reward: -108.01899171272603

Intrinsic Reward: 143.10650634765625
Update #2658, Reward: -107.02357828604616

Intrinsic Reward: 143.2969512939453
Update #2659, Reward: -104.2338150219402

Intrinsic Reward: 143.17588806152344
Update #2660, Reward: -107.95609927526738

Intrinsic Reward: 142.87451171875
Update #2661, Reward: -106.1270742829113

Intrinsic Reward: 141.1292724609375
Update #2662, Reward: -125.49557806330857

Intrinsic Reward: 143.1631317138672
Update #2663, Reward: -106.10317243236304

Intrinsic Reward: 143.02886962890625
Update #2664, Reward: -105.38633524587999

Intrinsic Reward: 143.1820526123047
Update #2665, Reward: -108.11957867487769

Intrinsic Reward: 139.69163513183594
Update #2666, R

Intrinsic Reward: 143.59991455078125
Update #2758, Reward: -107.74667774763704

Intrinsic Reward: 143.3682403564453
Update #2759, Reward: -104.3538684052738

Intrinsic Reward: 143.0745086669922
Update #2760, Reward: -103.67388826019379

Intrinsic Reward: 143.2552490234375
Update #2761, Reward: -107.06096691101293

Intrinsic Reward: 143.34912109375
Update #2762, Reward: -107.84456562790585

Intrinsic Reward: 143.11927795410156
Update #2763, Reward: -103.97530434116287

Intrinsic Reward: 143.3931427001953
Update #2764, Reward: -106.24318053377544

Intrinsic Reward: 143.3448944091797
Update #2765, Reward: -105.64760052971977

Intrinsic Reward: 143.17098999023438
Update #2766, Reward: -103.72360385751662

Intrinsic Reward: 143.32098388671875
Update #2767, Reward: -107.81876828165912

Intrinsic Reward: 143.23155212402344
Update #2768, Reward: -104.99489180656087

Intrinsic Reward: 143.28216552734375
Update #2769, Reward: -108.0829771006275

Intrinsic Reward: 143.53224182128906
Update #2770,

Intrinsic Reward: 143.55450439453125
Update #2864, Reward: -106.80748864819482

Intrinsic Reward: 143.21087646484375
Update #2865, Reward: -58.066861344837186

Intrinsic Reward: 143.7116241455078
Update #2866, Reward: -108.50449097803794

Intrinsic Reward: 143.59317016601562
Update #2867, Reward: -106.07998582321778

Intrinsic Reward: 143.248291015625
Update #2868, Reward: -106.78975259722645

Intrinsic Reward: 143.21380615234375
Update #2869, Reward: -105.11901430187996

Intrinsic Reward: 143.36196899414062
Update #2870, Reward: -105.49313571748262

Intrinsic Reward: 143.15003967285156
Update #2871, Reward: -104.52963849465797

Intrinsic Reward: 143.8134765625
Update #2872, Reward: -111.09296945998011

Intrinsic Reward: 143.3893585205078
Update #2873, Reward: -104.71974083426532

Intrinsic Reward: 143.2411346435547
Update #2874, Reward: -106.46287975804943

Intrinsic Reward: 143.44500732421875
Update #2875, Reward: -106.22378322130504

Intrinsic Reward: 143.2124481201172
Update #2876,

Intrinsic Reward: 143.48561096191406
Update #2970, Reward: -128.62079658414237

Intrinsic Reward: 143.68759155273438
Update #2971, Reward: -116.04256283379408

Intrinsic Reward: 143.7285614013672
Update #2972, Reward: -109.11926393265216

Intrinsic Reward: 143.7838134765625
Update #2973, Reward: -115.17046679495648

Intrinsic Reward: 143.58514404296875
Update #2974, Reward: -107.27802878033805

Intrinsic Reward: 143.83877563476562
Update #2975, Reward: -111.48561878465365

Intrinsic Reward: 143.78683471679688
Update #2976, Reward: -109.42800863134923

Intrinsic Reward: 143.83187866210938
Update #2977, Reward: -111.34128480965954

Intrinsic Reward: 143.7582550048828
Update #2978, Reward: -113.35623687352427

Intrinsic Reward: 143.68699645996094
Update #2979, Reward: -115.85282636189461

Intrinsic Reward: 143.72927856445312
Update #2980, Reward: -116.77532829782305

Intrinsic Reward: 143.64100646972656
Update #2981, Reward: -116.92498508301563

Intrinsic Reward: 143.7646026611328
Update 

Intrinsic Reward: 143.351806640625
Update #3074, Reward: -127.2209777814081

Intrinsic Reward: 143.5922393798828
Update #3075, Reward: -108.63076329310735

Intrinsic Reward: 143.12132263183594
Update #3076, Reward: -110.51037926247157

Intrinsic Reward: 143.51708984375
Update #3077, Reward: -109.5498460706547

Intrinsic Reward: 143.75123596191406
Update #3078, Reward: -108.88473905183312

Intrinsic Reward: 142.6567840576172
Update #3079, Reward: -123.26472914422737

Intrinsic Reward: 143.75119018554688
Update #3080, Reward: -111.17117575552066

Intrinsic Reward: 143.59805297851562
Update #3081, Reward: -109.89837801828794

Intrinsic Reward: 143.7297821044922
Update #3082, Reward: -109.35838346585446

Intrinsic Reward: 143.71417236328125
Update #3083, Reward: -109.7857812041479

Intrinsic Reward: 142.9281005859375
Update #3084, Reward: -123.88119002385437

Intrinsic Reward: 143.65255737304688
Update #3085, Reward: -115.15772974607223

Intrinsic Reward: 143.32525634765625
Update #3086, R

Intrinsic Reward: 143.7006378173828
Update #3181, Reward: -111.86387602885564

Intrinsic Reward: 143.40402221679688
Update #3182, Reward: -111.0134428825099

Intrinsic Reward: 143.0308837890625
Update #3183, Reward: -122.52377703789064

Intrinsic Reward: 143.2740020751953
Update #3184, Reward: -108.59171311816822

Intrinsic Reward: 143.70957946777344
Update #3185, Reward: -108.57287684989721

Intrinsic Reward: 142.69989013671875
Update #3186, Reward: -123.36052403855572

Intrinsic Reward: 143.74267578125
Update #3187, Reward: -109.64486690602513

Intrinsic Reward: 143.6276397705078
Update #3188, Reward: -107.37328399410906

Intrinsic Reward: 143.72991943359375
Update #3189, Reward: -113.06858471315292

Intrinsic Reward: 143.5808563232422
Update #3190, Reward: -108.6781985660214

Intrinsic Reward: 143.75421142578125
Update #3191, Reward: -112.7439563056895

Intrinsic Reward: 143.60244750976562
Update #3192, Reward: -117.28231856814585

Intrinsic Reward: 142.8386688232422
Update #3193, R

Intrinsic Reward: 143.7336883544922
Update #3286, Reward: -113.86970147267915

Intrinsic Reward: 143.4692840576172
Update #3287, Reward: -108.90555952314908

Intrinsic Reward: 143.5991668701172
Update #3288, Reward: -108.18411380328239

Intrinsic Reward: 143.6190948486328
Update #3289, Reward: -108.55052356200355

Intrinsic Reward: 143.565185546875
Update #3290, Reward: -107.66220136454825

Intrinsic Reward: 143.44784545898438
Update #3291, Reward: -108.17776072557457

Intrinsic Reward: 143.43667602539062
Update #3292, Reward: -108.64387786289491

Intrinsic Reward: 143.50804138183594
Update #3293, Reward: -108.33500715998747

Intrinsic Reward: 143.14715576171875
Update #3294, Reward: -108.96629483146842

Intrinsic Reward: 143.7069091796875
Update #3295, Reward: -108.69332367311667

Intrinsic Reward: 143.73300170898438
Update #3296, Reward: -109.97330711330225

Intrinsic Reward: 143.3504180908203
Update #3297, Reward: -106.46662188814642

Intrinsic Reward: 143.34164428710938
Update #329

Intrinsic Reward: 143.7305450439453
Update #3391, Reward: -109.75528392562146

Intrinsic Reward: 143.42245483398438
Update #3392, Reward: -108.08753897470235

Intrinsic Reward: 143.3214569091797
Update #3393, Reward: -107.9539356201825

Intrinsic Reward: 143.75376892089844
Update #3394, Reward: -114.94610514631867

Intrinsic Reward: 143.74334716796875
Update #3395, Reward: -110.98749430804452

Intrinsic Reward: 143.35244750976562
Update #3396, Reward: -109.01839926981492

Intrinsic Reward: 143.1784210205078
Update #3397, Reward: -109.5064351328841

Intrinsic Reward: 143.76852416992188
Update #3398, Reward: -114.61026768777086

Intrinsic Reward: 143.49073791503906
Update #3399, Reward: -109.6311125755906

Intrinsic Reward: 143.7269287109375
Update #3400, Reward: -108.284969090083

Intrinsic Reward: 143.69296264648438
Update #3401, Reward: -110.04224370915132

Intrinsic Reward: 143.61705017089844
Update #3402, Reward: -108.39595203650805

Intrinsic Reward: 143.58326721191406
Update #3403

Intrinsic Reward: 143.0130157470703
Update #3499, Reward: -114.85777127496961

Intrinsic Reward: 143.7408905029297
Update #3500, Reward: -110.1207239690243

Intrinsic Reward: 143.44850158691406
Update #3501, Reward: -108.80042176917445

Intrinsic Reward: 143.74000549316406
Update #3502, Reward: -110.77740143079497

Intrinsic Reward: 143.4340362548828
Update #3503, Reward: -112.44478706869545

Intrinsic Reward: 143.72415161132812
Update #3504, Reward: -110.21860135823427

Intrinsic Reward: 143.30227661132812
Update #3505, Reward: -108.27648914375654

Intrinsic Reward: 143.6298370361328
Update #3506, Reward: -108.81997229345764

Intrinsic Reward: 143.46817016601562
Update #3507, Reward: -108.62144596789405

Intrinsic Reward: 143.7714080810547
Update #3508, Reward: -110.91412233625476

Intrinsic Reward: 143.67922973632812
Update #3509, Reward: -108.77533027022456

Intrinsic Reward: 143.4534149169922
Update #3510, Reward: -107.7296814908317

Intrinsic Reward: 143.41009521484375
Update #351

Intrinsic Reward: 143.6241455078125
Update #3606, Reward: -106.53618717120465

Intrinsic Reward: 143.84751892089844
Update #3607, Reward: -110.02741009753446

Intrinsic Reward: 143.7311553955078
Update #3608, Reward: -109.98274707993616

Intrinsic Reward: 143.7867889404297
Update #3609, Reward: -110.33111866625585

Intrinsic Reward: 143.58294677734375
Update #3610, Reward: -106.95276133539465

Intrinsic Reward: 143.7901153564453
Update #3611, Reward: -107.6833910390983

Intrinsic Reward: 143.7605438232422
Update #3612, Reward: -109.37882682756955

Intrinsic Reward: 143.47157287597656
Update #3613, Reward: -107.43064840564132

Intrinsic Reward: 143.71400451660156
Update #3614, Reward: -108.37438237511181

Intrinsic Reward: 143.83505249023438
Update #3615, Reward: -112.32810160900529

Intrinsic Reward: 143.70306396484375
Update #3616, Reward: -108.4567031618425

Intrinsic Reward: 143.7889404296875
Update #3617, Reward: -111.92569607064004

Intrinsic Reward: 143.80093383789062
Update #361

Intrinsic Reward: 143.7038116455078
Update #3713, Reward: -115.78806184104707

Intrinsic Reward: 143.7978515625
Update #3714, Reward: -110.00527992161861

Intrinsic Reward: 143.76806640625
Update #3715, Reward: -108.3274586212424

Intrinsic Reward: 143.70849609375
Update #3716, Reward: -109.14594614721463

Intrinsic Reward: 143.5170135498047
Update #3717, Reward: -107.82909100324599

Intrinsic Reward: 143.67433166503906
Update #3718, Reward: -107.1654398847806

Intrinsic Reward: 143.7096405029297
Update #3719, Reward: -109.83047458249455

Intrinsic Reward: 143.64808654785156
Update #3720, Reward: -106.30530489549352

Intrinsic Reward: 143.63131713867188
Update #3721, Reward: -107.18526557316942

Intrinsic Reward: 143.81707763671875
Update #3722, Reward: -113.14917422302315

Intrinsic Reward: 143.797119140625
Update #3723, Reward: -111.98370761795839

Intrinsic Reward: 143.80191040039062
Update #3724, Reward: -111.78029744293852

Intrinsic Reward: 143.6331024169922
Update #3725, Reward:

Intrinsic Reward: 143.62135314941406
Update #3817, Reward: -106.90561563111966

Intrinsic Reward: 143.76242065429688
Update #3818, Reward: -107.06259398102388

Intrinsic Reward: 143.65802001953125
Update #3819, Reward: -107.70234144884907

Intrinsic Reward: 143.753173828125
Update #3820, Reward: -110.94198144753712

Intrinsic Reward: 143.64373779296875
Update #3821, Reward: -110.19176429853712

Intrinsic Reward: 143.48126220703125
Update #3822, Reward: -107.61273439687243

Intrinsic Reward: 143.72671508789062
Update #3823, Reward: -109.47270715722566

Intrinsic Reward: 143.69174194335938
Update #3824, Reward: -107.97554555882824

Intrinsic Reward: 143.52957153320312
Update #3825, Reward: -107.8872717657586

Intrinsic Reward: 143.42628479003906
Update #3826, Reward: -107.39900098977424

Intrinsic Reward: 143.4667205810547
Update #3827, Reward: -107.99778400368493

Intrinsic Reward: 143.65765380859375
Update #3828, Reward: -106.74757016151958

Intrinsic Reward: 143.7626190185547
Update #

Intrinsic Reward: 143.63955688476562
Update #3923, Reward: -105.0473743378669

Intrinsic Reward: 143.71710205078125
Update #3924, Reward: -105.3987195261338

Intrinsic Reward: 143.63265991210938
Update #3925, Reward: -110.58849138015074

Intrinsic Reward: 143.6202392578125
Update #3926, Reward: -107.09633202508712

Intrinsic Reward: 143.7094268798828
Update #3927, Reward: -108.55375416470568

Intrinsic Reward: 143.75148010253906
Update #3928, Reward: -107.82968698875109

Intrinsic Reward: 143.52923583984375
Update #3929, Reward: -107.38903905798992

Intrinsic Reward: 143.5572509765625
Update #3930, Reward: -106.99906523869497

Intrinsic Reward: 143.54769897460938
Update #3931, Reward: -107.99134130913578

Intrinsic Reward: 143.48095703125
Update #3932, Reward: -107.58830683236631

Intrinsic Reward: 143.6429443359375
Update #3933, Reward: -107.8972100433285

Intrinsic Reward: 143.4324493408203
Update #3934, Reward: -108.72801537474741

Intrinsic Reward: 143.44178771972656
Update #3935, 

Intrinsic Reward: 143.76809692382812
Update #4031, Reward: -110.82767489144454

Intrinsic Reward: 143.677490234375
Update #4032, Reward: -107.158899326376

Intrinsic Reward: 143.74240112304688
Update #4033, Reward: -108.2925545173095

Intrinsic Reward: 143.71746826171875
Update #4034, Reward: -108.90582381331673

Intrinsic Reward: 143.58580017089844
Update #4035, Reward: -106.99199532003267

Intrinsic Reward: 143.721923828125
Update #4036, Reward: -107.73995854271638

Intrinsic Reward: 143.6231689453125
Update #4037, Reward: -106.60669551924802

Intrinsic Reward: 143.6919708251953
Update #4038, Reward: -110.52795576541189

Intrinsic Reward: 143.61749267578125
Update #4039, Reward: -107.12897355008312

Intrinsic Reward: 143.2926788330078
Update #4040, Reward: -108.3534899860217

Intrinsic Reward: 143.69322204589844
Update #4041, Reward: -108.17918145171491

Intrinsic Reward: 143.80667114257812
Update #4042, Reward: -109.27975979113951

Intrinsic Reward: 143.7678985595703
Update #4043, R

Intrinsic Reward: 143.7759246826172
Update #4139, Reward: -109.33187239777918

Intrinsic Reward: 143.8309783935547
Update #4140, Reward: -108.98252478400245

Intrinsic Reward: 143.79074096679688
Update #4141, Reward: -110.81721310112749

Intrinsic Reward: 143.72250366210938
Update #4142, Reward: -108.11470608781838

Intrinsic Reward: 143.85975646972656
Update #4143, Reward: -112.02441298945931

Intrinsic Reward: 143.85638427734375
Update #4144, Reward: -111.24158823003992

Intrinsic Reward: 143.64108276367188
Update #4145, Reward: -113.40159980565807

Intrinsic Reward: 143.84140014648438
Update #4146, Reward: -114.56902221050859

Intrinsic Reward: 143.7170867919922
Update #4147, Reward: -111.76818359732752

Intrinsic Reward: 143.67745971679688
Update #4148, Reward: -107.79006929428876

Intrinsic Reward: 143.82247924804688
Update #4149, Reward: -114.4500400258607

Intrinsic Reward: 143.6426544189453
Update #4150, Reward: -107.96545361833026

Intrinsic Reward: 143.81822204589844
Update #

Intrinsic Reward: 143.79922485351562
Update #4246, Reward: -113.69048948590148

Intrinsic Reward: 143.80628967285156
Update #4247, Reward: -117.30896263252075

Intrinsic Reward: 143.79502868652344
Update #4248, Reward: -114.45366028679163

Intrinsic Reward: 143.62890625
Update #4249, Reward: -119.05249023080431

Intrinsic Reward: 143.87095642089844
Update #4250, Reward: -114.71575662936891

Intrinsic Reward: 143.76087951660156
Update #4251, Reward: -106.91843341392155

Intrinsic Reward: 143.83705139160156
Update #4252, Reward: -109.71035989553917

Intrinsic Reward: 143.7759552001953
Update #4253, Reward: -106.9556807143384

Intrinsic Reward: 143.5121612548828
Update #4254, Reward: -107.41642476720611

Intrinsic Reward: 143.8834686279297
Update #4255, Reward: -112.04334457584471

Intrinsic Reward: 143.7683868408203
Update #4256, Reward: -110.00373161082715

Intrinsic Reward: 143.6409912109375
Update #4257, Reward: -107.84943149188781

Intrinsic Reward: 143.7359161376953
Update #4258, Re

Intrinsic Reward: 143.81568908691406
Update #4350, Reward: -110.62911607092433

Intrinsic Reward: 143.38673400878906
Update #4351, Reward: -106.28146099384874

Intrinsic Reward: 143.5639190673828
Update #4352, Reward: -106.52720148077793

Intrinsic Reward: 143.44813537597656
Update #4353, Reward: -107.11290277072105

Intrinsic Reward: 143.7545623779297
Update #4354, Reward: -107.11243613396212

Intrinsic Reward: 143.74072265625
Update #4355, Reward: -107.80231317933215

Intrinsic Reward: 143.65533447265625
Update #4356, Reward: -104.64546508620617

Intrinsic Reward: 143.5497589111328
Update #4357, Reward: -108.81316647190786

Intrinsic Reward: 143.70590209960938
Update #4358, Reward: -106.52799226698404

Intrinsic Reward: 143.48056030273438
Update #4359, Reward: -104.52869107091738

Intrinsic Reward: 143.5721435546875
Update #4360, Reward: -106.38609544980153

Intrinsic Reward: 143.4903106689453
Update #4361, Reward: -106.9565894308413

Intrinsic Reward: 143.75985717773438
Update #4362

Intrinsic Reward: 143.85830688476562
Update #4455, Reward: -115.43231668502776

Intrinsic Reward: 143.8841094970703
Update #4456, Reward: -110.7236043922628

Intrinsic Reward: 143.48831176757812
Update #4457, Reward: -109.28585014979106

Intrinsic Reward: 143.7547149658203
Update #4458, Reward: -109.50907238830564

Intrinsic Reward: 143.70596313476562
Update #4459, Reward: -107.55892231353062

Intrinsic Reward: 143.7211151123047
Update #4460, Reward: -108.56596859447473

Intrinsic Reward: 143.75924682617188
Update #4461, Reward: -106.57217162563217

Intrinsic Reward: 143.62689208984375
Update #4462, Reward: -106.25647294306445

Intrinsic Reward: 143.74649047851562
Update #4463, Reward: -109.16536963422162

Intrinsic Reward: 143.71958923339844
Update #4464, Reward: -108.78497554786689

Intrinsic Reward: 143.7100067138672
Update #4465, Reward: -109.5915667286509

Intrinsic Reward: 143.55262756347656
Update #4466, Reward: -107.51188461422176

Intrinsic Reward: 143.76060485839844
Update #4

Intrinsic Reward: 143.34576416015625
Update #4560, Reward: -109.34618924040038

Intrinsic Reward: 143.5470428466797
Update #4561, Reward: -109.33568203595156

Intrinsic Reward: 143.82321166992188
Update #4562, Reward: -115.83834582400446

Intrinsic Reward: 143.64260864257812
Update #4563, Reward: -108.05728046159881

Intrinsic Reward: 143.79067993164062
Update #4564, Reward: -107.0548024478176

Intrinsic Reward: 143.84068298339844
Update #4565, Reward: -111.26442174481166

Intrinsic Reward: 143.48828125
Update #4566, Reward: -110.3370916572567

Intrinsic Reward: 143.80892944335938
Update #4567, Reward: -110.45132671603561

Intrinsic Reward: 143.30323791503906
Update #4568, Reward: -110.65473923109721

Intrinsic Reward: 143.66510009765625
Update #4569, Reward: -106.15359985279788

Intrinsic Reward: 143.7964324951172
Update #4570, Reward: -107.65155230488007

Intrinsic Reward: 143.79434204101562
Update #4571, Reward: -109.17424193638128

Intrinsic Reward: 143.72593688964844
Update #4572,

Intrinsic Reward: 143.83013916015625
Update #4666, Reward: -111.97810078388514

Intrinsic Reward: 143.79376220703125
Update #4667, Reward: -108.67863829844941

Intrinsic Reward: 143.67816162109375
Update #4668, Reward: -106.77545371592666

Intrinsic Reward: 143.6452178955078
Update #4669, Reward: -107.72241743236222

Intrinsic Reward: 143.79449462890625
Update #4670, Reward: -109.9597909236867

Intrinsic Reward: 143.5401611328125
Update #4671, Reward: -105.01818277284193

Intrinsic Reward: 143.79498291015625
Update #4672, Reward: -106.80046947795338

Intrinsic Reward: 143.80075073242188
Update #4673, Reward: -107.60713094185975

Intrinsic Reward: 143.6495819091797
Update #4674, Reward: -106.87972225519891

Intrinsic Reward: 143.4864044189453
Update #4675, Reward: -123.1034811119251

Intrinsic Reward: 143.85853576660156
Update #4676, Reward: -112.56881772265893

Intrinsic Reward: 143.82131958007812
Update #4677, Reward: -109.6668735175617

Intrinsic Reward: 143.7733154296875
Update #467

Intrinsic Reward: 143.86807250976562
Update #4771, Reward: -110.30974040227818

Intrinsic Reward: 143.87391662597656
Update #4772, Reward: -113.59114146544474

Intrinsic Reward: 143.7777557373047
Update #4773, Reward: -106.56891864906811

Intrinsic Reward: 143.87184143066406
Update #4774, Reward: -110.23048687873346

Intrinsic Reward: 143.8692626953125
Update #4775, Reward: -109.52401122902707

Intrinsic Reward: 143.79774475097656
Update #4776, Reward: -108.20232991566758

Intrinsic Reward: 143.80941772460938
Update #4777, Reward: -108.7771281545156

Intrinsic Reward: 143.56997680664062
Update #4778, Reward: -108.4150884115013

Intrinsic Reward: 143.53306579589844
Update #4779, Reward: -103.24171016530444

Intrinsic Reward: 143.8291473388672
Update #4780, Reward: -111.29020791367255

Intrinsic Reward: 143.88540649414062
Update #4781, Reward: -116.09292940383901

Intrinsic Reward: 143.61940002441406
Update #4782, Reward: -107.97257026870611

Intrinsic Reward: 143.70516967773438
Update #

Intrinsic Reward: 143.748046875
Update #4874, Reward: -108.87788153723689

Intrinsic Reward: 143.81243896484375
Update #4875, Reward: -108.24930071820009

Intrinsic Reward: 143.88735961914062
Update #4876, Reward: -112.09783431902156

Intrinsic Reward: 143.78038024902344
Update #4877, Reward: -105.61775293196241

Intrinsic Reward: 143.83517456054688
Update #4878, Reward: -107.78545229072186

Intrinsic Reward: 143.689697265625
Update #4879, Reward: -107.72962233039489

Intrinsic Reward: 143.87637329101562
Update #4880, Reward: -111.24872254878221

Intrinsic Reward: 143.6652374267578
Update #4881, Reward: -109.51416329874533

Intrinsic Reward: 143.87335205078125
Update #4882, Reward: -115.87209461072895

Intrinsic Reward: 143.81568908691406
Update #4883, Reward: -108.93554392080382

Intrinsic Reward: 143.6215057373047
Update #4884, Reward: -105.94986726006617

Intrinsic Reward: 143.85824584960938
Update #4885, Reward: -113.96628185382113

Intrinsic Reward: 143.88714599609375
Update #4886

Intrinsic Reward: 143.75982666015625
Update #4981, Reward: -106.48341955948

Intrinsic Reward: 143.65386962890625
Update #4982, Reward: -107.63518396356827

Intrinsic Reward: 143.6756134033203
Update #4983, Reward: -109.07071919931968

Intrinsic Reward: 143.62103271484375
Update #4984, Reward: -107.50018510088945

Intrinsic Reward: 143.7888946533203
Update #4985, Reward: -108.67911161872496

Intrinsic Reward: 143.5806427001953
Update #4986, Reward: -106.41005482474962

Intrinsic Reward: 143.8367462158203
Update #4987, Reward: -108.06077325712455

Intrinsic Reward: 143.76168823242188
Update #4988, Reward: -108.65457953550977

Intrinsic Reward: 143.74249267578125
Update #4989, Reward: -106.79196494506547

Intrinsic Reward: 143.62261962890625
Update #4990, Reward: -108.6519809824607

Intrinsic Reward: 143.51707458496094
Update #4991, Reward: -106.92227099897029

Intrinsic Reward: 143.52792358398438
Update #4992, Reward: -109.910392159552

Intrinsic Reward: 143.66696166992188
Update #4993,

Intrinsic Reward: 143.730224609375
Update #5085, Reward: -107.48005611808473

Intrinsic Reward: 143.79275512695312
Update #5086, Reward: -108.10327049182231

Intrinsic Reward: 143.75604248046875
Update #5087, Reward: -108.37232882097673

Intrinsic Reward: 143.8092498779297
Update #5088, Reward: -110.67540856876411

Intrinsic Reward: 143.7902069091797
Update #5089, Reward: -112.67835210353385

Intrinsic Reward: 143.74221801757812
Update #5090, Reward: -108.10511101070605

Intrinsic Reward: 143.64059448242188
Update #5091, Reward: -108.58976786958178

Intrinsic Reward: 143.78489685058594
Update #5092, Reward: -107.39861071498196

Intrinsic Reward: 143.62916564941406
Update #5093, Reward: -108.12700101988142

Intrinsic Reward: 143.6680908203125
Update #5094, Reward: -107.02060297620235

Intrinsic Reward: 143.81092834472656
Update #5095, Reward: -107.90275676103992

Intrinsic Reward: 143.7600860595703
Update #5096, Reward: -108.62099469800107

Intrinsic Reward: 143.6796875
Update #5097, Re

Intrinsic Reward: 143.8098602294922
Update #5190, Reward: -112.44878030485846

Intrinsic Reward: 143.8404998779297
Update #5191, Reward: -110.09410569940943

Intrinsic Reward: 143.6434783935547
Update #5192, Reward: -108.43964402190969

Intrinsic Reward: 143.78355407714844
Update #5193, Reward: -107.72561655704801

Intrinsic Reward: 143.64328002929688
Update #5194, Reward: -109.20753783056762

Intrinsic Reward: 143.76947021484375
Update #5195, Reward: -107.52156184850136

Intrinsic Reward: 143.87451171875
Update #5196, Reward: -110.425688734555

Intrinsic Reward: 143.8060760498047
Update #5197, Reward: -107.44028051592471

Intrinsic Reward: 143.83099365234375
Update #5198, Reward: -108.01910562708849

Intrinsic Reward: 143.7848358154297
Update #5199, Reward: -107.19886739394441

Intrinsic Reward: 143.7998809814453
Update #5200, Reward: -110.30145210903325

Intrinsic Reward: 143.83712768554688
Update #5201, Reward: -109.1898076045718

Intrinsic Reward: 143.75001525878906
Update #5202, R

Intrinsic Reward: 143.86326599121094
Update #5296, Reward: -114.12460901834629

Intrinsic Reward: 143.8341064453125
Update #5297, Reward: -108.764237563792

Intrinsic Reward: 143.83859252929688
Update #5298, Reward: -115.31458302411376

Intrinsic Reward: 143.853271484375
Update #5299, Reward: -115.45158426933612

Intrinsic Reward: 143.7323760986328
Update #5300, Reward: -124.65031273056319

Intrinsic Reward: 143.26036071777344
Update #5301, Reward: -122.33624696220954

Intrinsic Reward: 143.4452667236328
Update #5302, Reward: -123.52631815246183

Intrinsic Reward: 143.76260375976562
Update #5303, Reward: -117.98157162094799

Intrinsic Reward: 143.7957000732422
Update #5304, Reward: -115.48425319604887

Intrinsic Reward: 143.76065063476562
Update #5305, Reward: -110.31258913512279

Intrinsic Reward: 143.86410522460938
Update #5306, Reward: -111.70813946134845

Intrinsic Reward: 143.84121704101562
Update #5307, Reward: -108.92998706490918

Intrinsic Reward: 143.78421020507812
Update #530

Intrinsic Reward: 143.8351593017578
Update #5402, Reward: -108.28119705544599

Intrinsic Reward: 143.83596801757812
Update #5403, Reward: -109.21533173718117

Intrinsic Reward: 143.88389587402344
Update #5404, Reward: -112.45747261280133

Intrinsic Reward: 143.69073486328125
Update #5405, Reward: -108.36602444646384

Intrinsic Reward: 143.7872314453125
Update #5406, Reward: -107.21086948474

Intrinsic Reward: 143.8341522216797
Update #5407, Reward: -108.0985589960081

Intrinsic Reward: 143.7852325439453
Update #5408, Reward: -108.74684312549172

Intrinsic Reward: 143.8187713623047
Update #5409, Reward: -108.87027611266697

Intrinsic Reward: 143.8099365234375
Update #5410, Reward: -108.39323176727133

Intrinsic Reward: 143.7787322998047
Update #5411, Reward: -106.66073114006221

Intrinsic Reward: 143.81736755371094
Update #5412, Reward: -108.9868681191417

Intrinsic Reward: 143.69056701660156
Update #5413, Reward: -108.68215407715924

Intrinsic Reward: 143.63803100585938
Update #5414, R

Intrinsic Reward: 143.82444763183594
Update #5510, Reward: -107.82574269563953

Intrinsic Reward: 143.78257751464844
Update #5511, Reward: -108.13672411254669

Intrinsic Reward: 143.80296325683594
Update #5512, Reward: -108.9992643400443

Intrinsic Reward: 143.78744506835938
Update #5513, Reward: -107.01902994192206

Intrinsic Reward: 143.8106231689453
Update #5514, Reward: -108.21835049333237

Intrinsic Reward: 143.792236328125
Update #5515, Reward: -106.89866027252128

Intrinsic Reward: 143.71795654296875
Update #5516, Reward: -106.57516045734783

Intrinsic Reward: 143.77337646484375
Update #5517, Reward: -107.34363467587531

Intrinsic Reward: 143.8802947998047
Update #5518, Reward: -113.0006393843436

Intrinsic Reward: 143.6957550048828
Update #5519, Reward: -107.54747796345006

Intrinsic Reward: 143.72088623046875
Update #5520, Reward: -107.10818857397201

Intrinsic Reward: 143.74964904785156
Update #5521, Reward: -108.533465801863

Intrinsic Reward: 143.87252807617188
Update #5522

Intrinsic Reward: 143.5983123779297
Update #5615, Reward: -108.30716402386687

Intrinsic Reward: 143.8924560546875
Update #5616, Reward: -112.90723431664458

Intrinsic Reward: 143.88751220703125
Update #5617, Reward: -113.63545850960725

Intrinsic Reward: 143.71487426757812
Update #5618, Reward: -107.52052698074282

Intrinsic Reward: 143.88490295410156
Update #5619, Reward: -111.95093810311147

Intrinsic Reward: 143.87840270996094
Update #5620, Reward: -109.97778995172679

Intrinsic Reward: 143.88687133789062
Update #5621, Reward: -113.32488998574019

Intrinsic Reward: 143.8050994873047
Update #5622, Reward: -107.58580221534956

Intrinsic Reward: 143.82431030273438
Update #5623, Reward: -108.44567323415976

Intrinsic Reward: 143.9133758544922
Update #5624, Reward: -111.92782295365755

Intrinsic Reward: 143.64593505859375
Update #5625, Reward: -123.6529942317158

Intrinsic Reward: 143.897216796875
Update #5626, Reward: -112.46773051283571

Intrinsic Reward: 143.8894500732422
Update #562

Intrinsic Reward: 143.63864135742188
Update #5719, Reward: -121.97185774475895

Intrinsic Reward: 143.8490753173828
Update #5720, Reward: -120.22634573676065

Intrinsic Reward: 143.48593139648438
Update #5721, Reward: -122.93391098293912

Intrinsic Reward: 143.50274658203125
Update #5722, Reward: -122.35929503086396

Intrinsic Reward: 143.89511108398438
Update #5723, Reward: -116.07939140747798

Intrinsic Reward: 143.7489471435547
Update #5724, Reward: -125.03970317443213

Intrinsic Reward: 143.6298828125
Update #5725, Reward: -124.43326516572075

Intrinsic Reward: 143.92478942871094
Update #5726, Reward: -114.99242580623118

Intrinsic Reward: 143.7845916748047
Update #5727, Reward: -125.32085803389053

Intrinsic Reward: 143.92721557617188
Update #5728, Reward: -114.06899193938821

Intrinsic Reward: 143.36036682128906
Update #5729, Reward: -123.88342525228981

Intrinsic Reward: 143.89723205566406
Update #5730, Reward: -118.23844825126852

Intrinsic Reward: 143.51394653320312
Update #57

Intrinsic Reward: 143.89256286621094
Update #5826, Reward: -108.48336941659451

Intrinsic Reward: 143.83612060546875
Update #5827, Reward: -106.42498411827845

Intrinsic Reward: 143.7864532470703
Update #5828, Reward: -106.12670797637975

Intrinsic Reward: 143.83868408203125
Update #5829, Reward: -107.88782689254235

Intrinsic Reward: 143.7498321533203
Update #5830, Reward: -108.15507689139682

Intrinsic Reward: 143.89837646484375
Update #5831, Reward: -113.94117788656678

Intrinsic Reward: 143.87986755371094
Update #5832, Reward: -107.60258826717734

Intrinsic Reward: 143.6984405517578
Update #5833, Reward: -107.83853618426808

Intrinsic Reward: 143.86988830566406
Update #5834, Reward: -107.58532983819643

Intrinsic Reward: 143.76992797851562
Update #5835, Reward: -106.49356688676215

Intrinsic Reward: 143.88150024414062
Update #5836, Reward: -109.00639599408086

Intrinsic Reward: 143.58804321289062
Update #5837, Reward: -108.73037945308536

Intrinsic Reward: 143.72396850585938
Update

Intrinsic Reward: 143.9037628173828
Update #5930, Reward: -114.03887390717492

Intrinsic Reward: 143.7825927734375
Update #5931, Reward: -107.64566494601716

Intrinsic Reward: 143.84063720703125
Update #5932, Reward: -108.97276880368528

Intrinsic Reward: 143.89073181152344
Update #5933, Reward: -110.66362837496462

Intrinsic Reward: 143.79287719726562
Update #5934, Reward: -107.93148646948673

Intrinsic Reward: 143.87625122070312
Update #5935, Reward: -110.5859717128401

Intrinsic Reward: 143.62631225585938
Update #5936, Reward: -107.6231908453647

Intrinsic Reward: 143.9076385498047
Update #5937, Reward: -116.60636120079023

Intrinsic Reward: 143.84457397460938
Update #5938, Reward: -111.63974654445363

Intrinsic Reward: 143.91636657714844
Update #5939, Reward: -113.00513929962554

Intrinsic Reward: 143.88818359375
Update #5940, Reward: -110.73985350762369

Intrinsic Reward: 143.86605834960938
Update #5941, Reward: -110.09817622247836

Intrinsic Reward: 143.7845458984375
Update #5942

Intrinsic Reward: 143.89784240722656
Update #6036, Reward: -112.16205715405818

Intrinsic Reward: 143.87234497070312
Update #6037, Reward: -117.16202551373343

Intrinsic Reward: 143.8278045654297
Update #6038, Reward: -107.03590736439142

Intrinsic Reward: 143.8772735595703
Update #6039, Reward: -109.82063088490938

Intrinsic Reward: 143.8964080810547
Update #6040, Reward: -111.778772814136

Intrinsic Reward: 143.8390350341797
Update #6041, Reward: -108.4961685753471

Intrinsic Reward: 143.89283752441406
Update #6042, Reward: -109.69462739251429

Intrinsic Reward: 143.81610107421875
Update #6043, Reward: -107.44462601321315

Intrinsic Reward: 143.89996337890625
Update #6044, Reward: -112.3013820039425

Intrinsic Reward: 143.90353393554688
Update #6045, Reward: -112.67027201176249

Intrinsic Reward: 143.87557983398438
Update #6046, Reward: -109.28097650211677

Intrinsic Reward: 143.86587524414062
Update #6047, Reward: -110.17007849202182

Intrinsic Reward: 143.8557586669922
Update #6048

Intrinsic Reward: 143.88653564453125
Update #6143, Reward: -107.86277138978429

Intrinsic Reward: 143.88931274414062
Update #6144, Reward: -112.04952545391396

Intrinsic Reward: 143.90538024902344
Update #6145, Reward: -110.45503605124354

Intrinsic Reward: 143.8488311767578
Update #6146, Reward: -108.2319863847134

Intrinsic Reward: 143.9227294921875
Update #6147, Reward: -115.16954322045297

Intrinsic Reward: 143.89013671875
Update #6148, Reward: -108.85133292532154

Intrinsic Reward: 143.93658447265625
Update #6149, Reward: -111.59287564446716

Intrinsic Reward: 143.90707397460938
Update #6150, Reward: -112.95782198620836

Intrinsic Reward: 143.86148071289062
Update #6151, Reward: -119.90319370017573

Intrinsic Reward: 143.75250244140625
Update #6152, Reward: -106.13017922926508

Intrinsic Reward: 143.9158935546875
Update #6153, Reward: -113.08868093102922

Intrinsic Reward: 143.93362426757812
Update #6154, Reward: -113.06129906455863

Intrinsic Reward: 143.91134643554688
Update #61

Intrinsic Reward: 143.8240966796875
Update #6247, Reward: -107.32045986008954

Intrinsic Reward: 143.7980194091797
Update #6248, Reward: -107.63512729085981

Intrinsic Reward: 143.9116973876953
Update #6249, Reward: -112.23887662542673

Intrinsic Reward: 143.8314666748047
Update #6250, Reward: -108.37697466387228

Intrinsic Reward: 143.88864135742188
Update #6251, Reward: -116.22989656082603

Intrinsic Reward: 143.7276611328125
Update #6252, Reward: -108.34098979607845

Intrinsic Reward: 143.7932891845703
Update #6253, Reward: -105.27827641172831

Intrinsic Reward: 143.7456512451172
Update #6254, Reward: -107.44616495459402

Intrinsic Reward: 143.88494873046875
Update #6255, Reward: -109.69958765380706

Intrinsic Reward: 143.7201385498047
Update #6256, Reward: -106.83452858293616

Intrinsic Reward: 143.86920166015625
Update #6257, Reward: -110.00671914730407

Intrinsic Reward: 143.86407470703125
Update #6258, Reward: -110.0900050217292

Intrinsic Reward: 143.81251525878906
Update #6259

Intrinsic Reward: 143.7129364013672
Update #6352, Reward: -121.81110157035354

Intrinsic Reward: 143.9116668701172
Update #6353, Reward: -112.11465133520154

Intrinsic Reward: 143.88851928710938
Update #6354, Reward: -108.87117547153

Intrinsic Reward: 143.9418182373047
Update #6355, Reward: -114.6147762608907

Intrinsic Reward: 143.92376708984375
Update #6356, Reward: -110.46137575128985

Intrinsic Reward: 143.93511962890625
Update #6357, Reward: -115.34348691546545

Intrinsic Reward: 143.88531494140625
Update #6358, Reward: -108.94042282135102

Intrinsic Reward: 143.94578552246094
Update #6359, Reward: -114.96649918973011

Intrinsic Reward: 143.81463623046875
Update #6360, Reward: -126.76349495301767

Intrinsic Reward: 143.87789916992188
Update #6361, Reward: -109.79906587877187

Intrinsic Reward: 143.91241455078125
Update #6362, Reward: -115.72058785880978

Intrinsic Reward: 143.87611389160156
Update #6363, Reward: -110.04948407289076

Intrinsic Reward: 143.68148803710938
Update #63

Intrinsic Reward: 143.68692016601562
Update #6456, Reward: -125.0875697478559

Intrinsic Reward: 143.9307861328125
Update #6457, Reward: -116.15105434504213

Intrinsic Reward: 143.94680786132812
Update #6458, Reward: -112.64117647592226

Intrinsic Reward: 143.93907165527344
Update #6459, Reward: -114.6781739977027

Intrinsic Reward: 143.6730194091797
Update #6460, Reward: -124.6891782024776

Intrinsic Reward: 143.92225646972656
Update #6461, Reward: -111.62760382912805

Intrinsic Reward: 143.67904663085938
Update #6462, Reward: -121.18634399461374

Intrinsic Reward: 143.8700408935547
Update #6463, Reward: -119.2748425035427

Intrinsic Reward: 143.91900634765625
Update #6464, Reward: -116.70760209385182

Intrinsic Reward: 143.93777465820312
Update #6465, Reward: -107.96926385905655

Intrinsic Reward: 143.95095825195312
Update #6466, Reward: -110.44192814277113

Intrinsic Reward: 143.7723846435547
Update #6467, Reward: -119.86348920854802

Intrinsic Reward: 143.93069458007812
Update #646

Intrinsic Reward: 143.93402099609375
Update #6562, Reward: -108.79016656649858

Intrinsic Reward: 143.9314422607422
Update #6563, Reward: -108.57078049351462

Intrinsic Reward: 143.9438018798828
Update #6564, Reward: -108.84206613751935

Intrinsic Reward: 143.9293975830078
Update #6565, Reward: -107.39147051119929

Intrinsic Reward: 143.9497833251953
Update #6566, Reward: -112.06868072223354

Intrinsic Reward: 143.92910766601562
Update #6567, Reward: -107.8052005327108

Intrinsic Reward: 143.9471893310547
Update #6568, Reward: -114.14781582319117

Intrinsic Reward: 143.9508514404297
Update #6569, Reward: -108.54838507859347

Intrinsic Reward: 143.95176696777344
Update #6570, Reward: -108.81773806799576

Intrinsic Reward: 143.90325927734375
Update #6571, Reward: -108.6791275159878

Intrinsic Reward: 143.9497833251953
Update #6572, Reward: -112.64389324163211

Intrinsic Reward: 143.9156036376953
Update #6573, Reward: -109.89514990483598

Intrinsic Reward: 143.9537353515625
Update #6574, 

Intrinsic Reward: 143.812255859375
Update #6668, Reward: -52.1596933795058

Intrinsic Reward: 143.53659057617188
Update #6669, Reward: -124.11769834975588

Intrinsic Reward: 143.87742614746094
Update #6670, Reward: -121.1450234840947

Intrinsic Reward: 143.8952178955078
Update #6671, Reward: -126.57203335503054

Intrinsic Reward: 143.68467712402344
Update #6672, Reward: -123.9727841350579

Intrinsic Reward: 143.63827514648438
Update #6673, Reward: -123.75098582764093

Intrinsic Reward: 143.80490112304688
Update #6674, Reward: -124.44569838622586

Intrinsic Reward: 143.7677459716797
Update #6675, Reward: -124.64779719669559

Intrinsic Reward: 143.74205017089844
Update #6676, Reward: -125.07663503243961

Intrinsic Reward: 143.5199737548828
Update #6677, Reward: -123.65694335803825

Intrinsic Reward: 143.77767944335938
Update #6678, Reward: -124.09852158512795

Intrinsic Reward: 143.84793090820312
Update #6679, Reward: -127.64833690161197

Intrinsic Reward: 143.7471923828125
Update #6680,

Intrinsic Reward: 143.8724365234375
Update #6772, Reward: -52.10451172396165

Intrinsic Reward: 143.9644012451172
Update #6773, Reward: -110.13663281886714

Intrinsic Reward: 143.86886596679688
Update #6774, Reward: -53.979509755187365

Intrinsic Reward: 143.9630889892578
Update #6775, Reward: -111.92822423710798

Intrinsic Reward: 143.91082763671875
Update #6776, Reward: -119.8511774389955

Intrinsic Reward: 143.97091674804688
Update #6777, Reward: -112.50857904992937

Intrinsic Reward: 143.96401977539062
Update #6778, Reward: -110.72553371726535

Intrinsic Reward: 143.9702606201172
Update #6779, Reward: -110.65526353832769

Intrinsic Reward: 143.9503631591797
Update #6780, Reward: -110.06573422365325

Intrinsic Reward: 143.96815490722656
Update #6781, Reward: -111.3087560357588

Intrinsic Reward: 143.71311950683594
Update #6782, Reward: -123.28565011783566

Intrinsic Reward: 143.82363891601562
Update #6783, Reward: -125.21415286695586

Intrinsic Reward: 143.89248657226562
Update #678

Intrinsic Reward: 143.8780059814453
Update #6878, Reward: -131.05933727017728

Intrinsic Reward: 143.9473876953125
Update #6879, Reward: -117.67105852709898

Intrinsic Reward: 143.96768188476562
Update #6880, Reward: -112.00372797562802

Intrinsic Reward: 143.9632568359375
Update #6881, Reward: -111.40184794912611

Intrinsic Reward: 143.95977783203125
Update #6882, Reward: -109.95955293681162

Intrinsic Reward: 143.96682739257812
Update #6883, Reward: -110.14709483696024

Intrinsic Reward: 143.96661376953125
Update #6884, Reward: -112.19388780494903

Intrinsic Reward: 143.7101287841797
Update #6885, Reward: -123.15845987405318

Intrinsic Reward: 143.96890258789062
Update #6886, Reward: -110.39220737821857

Intrinsic Reward: 143.96810913085938
Update #6887, Reward: -110.00362805696204

Intrinsic Reward: 143.9488525390625
Update #6888, Reward: -118.08257250011899

Intrinsic Reward: 143.95376586914062
Update #6889, Reward: -113.0860328153465

Intrinsic Reward: 143.9637908935547
Update #68

Intrinsic Reward: 143.96347045898438
Update #6984, Reward: -112.5681774665018

Intrinsic Reward: 143.96812438964844
Update #6985, Reward: -114.07052107878837

Intrinsic Reward: 143.9683074951172
Update #6986, Reward: -108.31109932731837

Intrinsic Reward: 143.97164916992188
Update #6987, Reward: -112.79015142055663

Intrinsic Reward: 143.974365234375
Update #6988, Reward: -113.95180262366559

Intrinsic Reward: 143.95423889160156
Update #6989, Reward: -116.76034346547598

Intrinsic Reward: 143.9683074951172
Update #6990, Reward: -111.26712359449206

Intrinsic Reward: 143.96231079101562
Update #6991, Reward: -112.53207347230365

Intrinsic Reward: 143.97262573242188
Update #6992, Reward: -112.84984318228376

Intrinsic Reward: 143.739501953125
Update #6993, Reward: -123.81473748485494

Intrinsic Reward: 143.840087890625
Update #6994, Reward: -124.93244494234212

Intrinsic Reward: 143.87013244628906
Update #6995, Reward: -125.95609270722048

Intrinsic Reward: 143.9682159423828
Update #6996,

Intrinsic Reward: 143.96559143066406
Update #7091, Reward: -109.30671788942877

Intrinsic Reward: 143.9582061767578
Update #7092, Reward: -116.41488343334757

Intrinsic Reward: 143.7359619140625
Update #7093, Reward: -122.5303349970368

Intrinsic Reward: 143.97386169433594
Update #7094, Reward: -111.8300633850079

Intrinsic Reward: 143.8599395751953
Update #7095, Reward: -124.4756243723159

Intrinsic Reward: 143.9512481689453
Update #7096, Reward: -112.51389477600965

Intrinsic Reward: 143.9720916748047
Update #7097, Reward: -111.2714380072821

Intrinsic Reward: 143.96847534179688
Update #7098, Reward: -115.50842731622978

Intrinsic Reward: 143.96673583984375
Update #7099, Reward: -114.1602826138232

Intrinsic Reward: 143.968017578125
Update #7100, Reward: -112.37806679434702

Intrinsic Reward: 143.91212463378906
Update #7101, Reward: -119.7321395948548

Intrinsic Reward: 143.72267150878906
Update #7102, Reward: -121.40897191951301

Intrinsic Reward: 143.72698974609375
Update #7103, Re

Intrinsic Reward: 143.96693420410156
Update #7195, Reward: -109.57410445786702

Intrinsic Reward: 143.93109130859375
Update #7196, Reward: -108.69511297274133

Intrinsic Reward: 143.9631805419922
Update #7197, Reward: -108.31491803688309

Intrinsic Reward: 143.9654998779297
Update #7198, Reward: -109.48502217018418

Intrinsic Reward: 143.9501190185547
Update #7199, Reward: -106.89441384964995

Intrinsic Reward: 143.96603393554688
Update #7200, Reward: -108.91524778682118

Intrinsic Reward: 143.9632568359375
Update #7201, Reward: -109.27193641656886

Intrinsic Reward: 143.96949768066406
Update #7202, Reward: -111.15152369780155

Intrinsic Reward: 143.92352294921875
Update #7203, Reward: -105.44912621930862

Intrinsic Reward: 143.9114990234375
Update #7204, Reward: -108.48607630888745

Intrinsic Reward: 143.9170684814453
Update #7205, Reward: -106.406840073814

Intrinsic Reward: 143.95494079589844
Update #7206, Reward: -107.72054993415438

Intrinsic Reward: 143.9390411376953
Update #7207

Intrinsic Reward: 143.9354705810547
Update #7299, Reward: -105.94002119604187

Intrinsic Reward: 143.96351623535156
Update #7300, Reward: -111.77993848585959

Intrinsic Reward: 143.9591827392578
Update #7301, Reward: -106.69854275671827

Intrinsic Reward: 143.93011474609375
Update #7302, Reward: -105.3355582019066

Intrinsic Reward: 143.9670867919922
Update #7303, Reward: -113.42967763788253

Intrinsic Reward: 143.95895385742188
Update #7304, Reward: -109.4464072782925

Intrinsic Reward: 143.9672088623047
Update #7305, Reward: -110.388974641166

Intrinsic Reward: 143.9689178466797
Update #7306, Reward: -111.60504936045098

Intrinsic Reward: 143.96139526367188
Update #7307, Reward: -109.04765046746036

Intrinsic Reward: 143.97293090820312
Update #7308, Reward: -112.1584969421855

Intrinsic Reward: 143.90640258789062
Update #7309, Reward: -120.58184030883262

Intrinsic Reward: 143.95526123046875
Update #7310, Reward: -106.52669854818222

Intrinsic Reward: 143.969482421875
Update #7311, R

Intrinsic Reward: 143.96873474121094
Update #7403, Reward: -111.34834616835664

Intrinsic Reward: 143.95245361328125
Update #7404, Reward: -110.39237231368323

Intrinsic Reward: 143.9400177001953
Update #7405, Reward: -108.38982498590151

Intrinsic Reward: 143.96055603027344
Update #7406, Reward: -110.53148399184272

Intrinsic Reward: 143.96864318847656
Update #7407, Reward: -109.92790959369019

Intrinsic Reward: 143.9664764404297
Update #7408, Reward: -108.76114884335858

Intrinsic Reward: 143.9467010498047
Update #7409, Reward: -106.54706718102781

Intrinsic Reward: 143.9485321044922
Update #7410, Reward: -111.64958847695216

Intrinsic Reward: 143.9714813232422
Update #7411, Reward: -109.54305774916274

Intrinsic Reward: 143.95413208007812
Update #7412, Reward: -113.98269039696144

Intrinsic Reward: 143.95867919921875
Update #7413, Reward: -109.81445521587133

Intrinsic Reward: 143.9410400390625
Update #7414, Reward: -117.52743708526157

Intrinsic Reward: 143.96640014648438
Update #7