In [1]:
import gym
import numpy as np
import copy, random, os, subprocess, cv2
import tensorflow as tf
import keras.backend as K
from tensorflow.keras import layers, models, regularizers

  import imp
  'nearest': pil_image.NEAREST,
  'bilinear': pil_image.BILINEAR,
  'bicubic': pil_image.BICUBIC,
  if hasattr(pil_image, 'HAMMING'):
  if hasattr(pil_image, 'BOX'):
  if hasattr(pil_image, 'LANCZOS'):


In [2]:
MODEL_NAME = 'model06'
BUCKET = 'gs://etsuji-car-racing-v2'
os.environ['BUCKET'] = BUCKET
!gsutil mb -c regional -l us-west1 $BUCKET
!gsutil ls $BUCKET

Creating gs://etsuji-car-racing-v2/...
ServiceException: 409 A Cloud Storage bucket named 'etsuji-car-racing-v2' already exists. Try another name. Bucket names must be globally unique across all Google Cloud projects, including those outside of your organization.
gs://etsuji-car-racing-v2/model04/
gs://etsuji-car-racing-v2/model05/
gs://etsuji-car-racing-v2/model06/
gs://etsuji-car-racing-v2/model07/


In [3]:
class ApplyL1Weight(layers.Layer):
    def __init__(self, l1=0.01, **kwargs):
        self.filter_shape = None
        self.l1 = l1
        super(ApplyL1Weight, self).__init__(**kwargs)
        
    def build(self, input_shape):
        self.kernel = self.add_weight(name='weights', shape=[input_shape[3]],
                                      regularizer=regularizers.L1(self.l1))
        self.filter_shape = input_shape

    def get_config(self):
        config = super().get_config()
        config.update({
            'l1': self.l1,
        })
        return config
    
    def call(self, inputs, **kwargs):
        return inputs * kernel
    
# Base model
class QValue:
    def __init__(self):
        self.model = self.build_model()

    def build_model(self):
        cnn_input = layers.Input(shape=(48, 48, 3), name='cnn_input')
        cnn1 = layers.Conv2D(16, (5, 5), padding='same',
                             use_bias=True, activation='relu',
                             name='cnn1')(cnn_input)
        pool1 = layers.MaxPooling2D((2, 2), name='pool1')(cnn1)
        cnn2 = layers.Conv2D(32, (5, 5), padding='same',
                             use_bias=True, activation='relu',
                             name='cnn2')(pool1)        
        pool2 = layers.MaxPooling2D((2, 2), name='pool2')(cnn2)
        weighted_filters = ApplyL1Weight(name='weighted_filters')(pool2)

        cnn_flatten = layers.Flatten(name='flatten')(weighted_filters)
        action_input = layers.Input(shape=(5,), name='action_input')
        combined = layers.concatenate([cnn_flatten, action_input], name='concat')
        hidden1 = layers.Dense(1024, activation='relu', name='dense1')(combined)
        hidden2 = layers.Dense(512, activation='relu', name='dense2')(hidden1)
        q_value = layers.Dense(1, name='output')(hidden2)

        model = models.Model(inputs=[cnn_input, action_input], outputs=q_value)
        model.compile(loss='mse')
        return model

    def get_action(self, state):
        states = []
        actions = []
        for a in range(5):
            states.append(np.array(state))
            action_onehot = np.zeros(5)
            action_onehot[a] = 1
            actions.append(action_onehot)
  
        q_values = self.model.predict([np.array(states), np.array(actions)])
        optimal_action = np.argmax(q_values)
        return optimal_action, q_values[optimal_action][0]

In [4]:
def join_frames(o0, o1, o2):
    gray_image0 = cv2.cvtColor(cv2.resize(o0, (48, 48)), cv2.COLOR_RGB2GRAY)
    gray_image1 = cv2.cvtColor(cv2.resize(o1, (48, 48)), cv2.COLOR_RGB2GRAY)
    gray_image2 = cv2.cvtColor(cv2.resize(o2, (48, 48)), cv2.COLOR_RGB2GRAY)
    
    return np.array(
        [gray_image0.transpose(),
         gray_image1.transpose(),
         gray_image2.transpose()]).transpose()

In [5]:
def get_episode(environ, q_value, epsilon):
    episode = []
    o0 = environ.reset()
    o1 = copy.deepcopy(o0)
    o2 = copy.deepcopy(o0)
    total_r = 0

    if epsilon > 0:
        keep_count = 3
    else:
        keep_count = 1

    c = 0
    while True:
        if c % keep_count == 0: # Get new action
            if np.random.random() < epsilon:
                a = np.random.randint(5)
            else:
                a, _ = q_value.get_action(join_frames(o0, o1, o2))
        c += 1
        o_new, r, done, inf = environ.step(a)                
        total_r += r

        # Terminate episode when total reward becomes negative
        if total_r < 0:
            done = 1

        if done:
            # Terminal state is to achive more than 990 or get out of the field.
            if total_r > 990 or r < -99:
                episode.append((join_frames(o0, o1, o2), a, r, None))
            break
        else:
            episode.append((join_frames(o0, o1, o2), a, r, join_frames(o1, o2, o_new)))
        o0, o1, o2 = o1, o2, o_new

    print('epsilon={}, episode length={}, total rewards={}'.format(epsilon, len(episode), total_r))
    return episode, total_r

In [6]:
def train(environ, q_value, epsilon, checkpoint=0):
    if checkpoint > 0:
        filename = 'car-racing-v2-{}-{}.hd5'.format(checkpoint, MODEL_NAME)
        subprocess.run(['gsutil', 'cp', '{}/{}/{}'.format(BUCKET, MODEL_NAME, filename), './'])
        print('load model {}'.format(filename))
        q_value.model = models.load_model(filename)
        os.remove(filename)

    experience = []
    good_experience = []
    best_r = [-100, -100, -100]

    for n in range(checkpoint + 1, checkpoint + 1000):
        print('iteration {}'.format(n))

        total_len = 0
        if n % 3 == 0:
            print('Testing the current performance...')
            episode, total_r = get_episode(environ, q_value, epsilon=0)
            with open('result.txt', 'a') as f:
                f.write('{},{},{},{}\n'.format(n, epsilon, len(episode), total_r))
            filename = 'car-racing-v2-{}-{}.hd5'.format(n, MODEL_NAME)
            q_value.model.save(filename, save_format='h5')
            subprocess.run(['gsutil', '-m', 'cp',
                            '{}'.format(filename), '{}/{}/'.format(BUCKET, MODEL_NAME)])
            os.remove(filename)
            experience += episode
            total_len += len(episode)

        while total_len < 500:
            episode, total_r = get_episode(environ, q_value, epsilon)
            total_len += len(episode)
            experience += episode

            # Keep the top 3 episodes
            if total_r > min(best_r):
                best_r = best_r[1:] + [total_r]
                good_experience += episode
                if len(good_experience) > 999 * 3:
                    good_experience = good_experience[-999 * 3:]

            
        if len(experience) > 999 * 5: # remember last 5 episodes
            experience = experience[-999 * 5:]

        epsilon = (epsilon - 0.2) * 0.99 + 0.2

        print('Training the model...')
        # Use latest episode + past episodes (sampling) + top 3 episode (sampling)
        latest_experience = experience[-total_len:]
        past_experience = experience[:-total_len]
        examples = latest_experience + \
            random.sample(past_experience, min(len(past_experience), 999)) + \
            random.sample(good_experience, min(len(good_experience), 999))
        
        # Show some statistics
        print('experience length={}'.format(len(experience)))
        print('number of examples={}'.format(len(examples)))
        print('best total reward = ', best_r)
        np.random.shuffle(examples)
                        
        states, actions, labels = [], [], []
        for state, a, r, state_new in examples:
            states.append(np.array(state))

            action_onehot = np.zeros(5)
            action_onehot[a] = 1
            actions.append(action_onehot)
            
            if state_new is None:   # Terminal state
                q_new = 0
            else:
                _, q_new = q_value.get_action(state_new)
            labels.append(np.array(r + q_new))

        hist = q_value.model.fit(
            [np.array(states), np.array(actions)], np.array(labels),
            batch_size=50, epochs=10, verbose=0)
        print('loss = {}'.format(hist.history['loss']))

In [7]:
env = gym.make("CarRacing-v2", continuous=False)
q_value = QValue()
q_value.model.summary()

  "Initializing wrapper in old step API which returns one bool instead of two. It is recommended to set `new_step_api=True` to use new step API. This will be the default behaviour in future."
  "Initializing environment in old step API which returns one bool instead of two. It is recommended to set `new_step_api=True` to use new step API. This will be the default behaviour in future."
2022-08-01 23:46:48.868330: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-01 23:46:48.881243: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-01 23:46:48.881906: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at l

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 cnn_input (InputLayer)         [(None, 48, 48, 3)]  0           []                               
                                                                                                  
 cnn1 (Conv2D)                  (None, 48, 48, 16)   1216        ['cnn_input[0][0]']              
                                                                                                  
 pool1 (MaxPooling2D)           (None, 24, 24, 16)   0           ['cnn1[0][0]']                   
                                                                                                  
 cnn2 (Conv2D)                  (None, 24, 24, 32)   12832       ['pool1[0][0]']                  
                                                                                              

2022-08-01 23:46:48.883051: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-08-01 23:46:48.884062: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-01 23:46:48.884753: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-01 23:46:48.885356: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zer

In [None]:
train(env, q_value, epsilon=1.0, checkpoint=0)

iteration 1
epsilon=1.0, episode length=790, total rewards=-0.04861660079024974
Training the model...
experience length=790
number of examples=1580
best total reward =  [-100, -100, -0.04861660079024974]


2022-08-01 23:46:57.596929: I tensorflow/stream_executor/cuda/cuda_dnn.cc:368] Loaded cuDNN version 8200


loss = [30063.845703125, 18.222278594970703, 5.5592942237854, 1.5709571838378906, 0.6793814897537231, 0.6274317502975464, 0.6696242094039917, 0.7548159956932068, 0.5571935772895813, 0.6653337478637695]
iteration 2
epsilon=0.992, episode length=131, total rewards=-0.04210526315789062
epsilon=0.992, episode length=111, total rewards=-0.005970149253715834
epsilon=0.992, episode length=182, total rewards=-0.051824817518234534
epsilon=0.992, episode length=185, total rewards=-0.08148148148143197
Training the model...
experience length=1399
number of examples=2398
best total reward =  [-0.04861660079024974, -0.04210526315789062, -0.005970149253715834]
loss = [0.4442785680294037, 0.44378870725631714, 0.3842591643333435, 0.371902734041214, 0.3642255961894989, 0.3419865667819977, 0.34576311707496643, 0.32436221837997437, 0.3210403025150299, 0.31514447927474976]
iteration 3
Testing the current performance...
epsilon=0, episode length=999, total rewards=60.00000000000463


Copying file://car-racing-v2-3-model06.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 40.2 MiB/ 40.2 MiB] 100% Done                                    
Operation completed over 1 objects/40.2 MiB.                                     


Training the model...
experience length=2398
number of examples=2997
best total reward =  [-0.04861660079024974, -0.04210526315789062, -0.005970149253715834]
loss = [0.46547800302505493, 0.39634308218955994, 0.3981923460960388, 0.3745150566101074, 0.35997647047042847, 0.38221824169158936, 0.345010906457901, 0.36476361751556396, 0.3412971794605255, 0.34975534677505493]
iteration 4
epsilon=0.9762392000000002, episode length=69, total rewards=-0.006993006992998846
epsilon=0.9762392000000002, episode length=95, total rewards=-0.015335463258780296
epsilon=0.9762392000000002, episode length=168, total rewards=-0.06498316498312318
epsilon=0.9762392000000002, episode length=198, total rewards=-0.0324503311257984
Training the model...
experience length=2928
number of examples=2528
best total reward =  [-0.005970149253715834, -0.006993006992998846, -0.015335463258780296]
loss = [0.3834962546825409, 0.35106992721557617, 0.2941764295101166, 0.30434924364089966, 0.28610384464263916, 0.2967178523540

Copying file://car-racing-v2-6-model06.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 40.2 MiB/ 40.2 MiB] 100% Done                                    
Operation completed over 1 objects/40.2 MiB.                                     


epsilon=0.9607920399200003, episode length=144, total rewards=-0.0072463768115753335
epsilon=0.9607920399200003, episode length=306, total rewards=-0.048659003831331676
Training the model...
experience length=4186
number of examples=2693
best total reward =  [-0.012658227848073994, -0.011036789297652144, -0.0072463768115753335]
loss = [4.150639533996582, 4.036187648773193, 3.9332058429718018, 3.673279047012329, 3.303196430206299, 3.1231229305267334, 2.3631932735443115, 2.5621514320373535, 2.0435471534729004, 2.0155444145202637]
iteration 7
epsilon=0.9531841195208004, episode length=273, total rewards=-0.002739726027337158
epsilon=0.9531841195208004, episode length=256, total rewards=-0.05897435897434744
Training the model...
experience length=4715
number of examples=2527
best total reward =  [-0.011036789297652144, -0.0072463768115753335, -0.002739726027337158]
loss = [0.41914811730384827, 0.3715205490589142, 0.33286213874816895, 0.29146328568458557, 0.3110632300376892, 0.3127620518207

Copying file://car-racing-v2-9-model06.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 40.2 MiB/ 40.2 MiB] 100% Done                                    
Operation completed over 1 objects/40.2 MiB.                                     


epsilon=0.9381957555423366, episode length=138, total rewards=-0.011111111111093502
epsilon=0.9381957555423366, episode length=149, total rewards=-0.07462686567162957
Training the model...
experience length=4995
number of examples=2547
best total reward =  [-0.0072463768115753335, -0.002739726027337158, -0.010989010988991604]
loss = [0.4702812135219574, 0.37688755989074707, 0.3720483183860779, 0.3327919542789459, 0.31305959820747375, 0.292953759431839, 0.3163324296474457, 0.29355233907699585, 0.27923133969306946, 0.2933659255504608]
iteration 10
epsilon=0.9308137979869133, episode length=82, total rewards=-0.03553719008263842
epsilon=0.9308137979869133, episode length=133, total rewards=-0.06666666666666354
epsilon=0.9308137979869133, episode length=766, total rewards=-0.07164750957864396
Training the model...
experience length=4995
number of examples=2979
best total reward =  [-0.0072463768115753335, -0.002739726027337158, -0.010989010988991604]
loss = [2.9393301010131836, 1.914083123

Copying file://car-racing-v2-12-model06.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 40.2 MiB/ 40.2 MiB] 100% Done                                    
Operation completed over 1 objects/40.2 MiB.                                     


epsilon=0.9162706034069739, episode length=436, total rewards=-0.07583892617439791
Training the model...
experience length=4995
number of examples=2814
best total reward =  [-0.0072463768115753335, -0.002739726027337158, -0.010989010988991604]
loss = [4.253825664520264, 3.8444957733154297, 3.2180612087249756, 2.7653369903564453, 1.5345488786697388, 2.7556636333465576, 1.4793692827224731, 0.3703393340110779, 1.6093699932098389, 0.4444044232368469]
iteration 13
epsilon=0.9091078973729041, episode length=809, total rewards=-0.0140845070420543
Training the model...
experience length=4995
number of examples=2807
best total reward =  [-0.0072463768115753335, -0.002739726027337158, -0.010989010988991604]
loss = [0.5396531820297241, 0.46899518370628357, 0.4042087197303772, 0.37675702571868896, 0.3824038505554199, 0.34561577439308167, 0.3446112275123596, 0.3036534786224365, 0.316459596157074, 0.29633650183677673]
iteration 14
epsilon=0.902016818399175, episode length=206, total rewards=-0.05103

Copying file://car-racing-v2-15-model06.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 40.2 MiB/ 40.2 MiB] 100% Done                                    
Operation completed over 1 objects/40.2 MiB.                                     


epsilon=0.8949966502151834, episode length=306, total rewards=-0.08775510204073211
Training the model...
experience length=4995
number of examples=2774
best total reward =  [-0.0072463768115753335, -0.002739726027337158, -0.010989010988991604]
loss = [8.315507888793945, 6.264093399047852, 4.837151527404785, 4.389252185821533, 5.986559867858887, 4.451865196228027, 4.111301422119141, 5.080657005310059, 4.063793659210205, 4.49527645111084]
iteration 16
epsilon=0.8880466837130316, episode length=196, total rewards=-0.0921568627450633
epsilon=0.8880466837130316, episode length=594, total rewards=-0.09405940594070583
Training the model...
experience length=4995
number of examples=2788
best total reward =  [-0.0072463768115753335, -0.002739726027337158, -0.010989010988991604]
loss = [0.6374443173408508, 0.511408805847168, 0.47866103053092957, 0.4756714403629303, 0.4314931035041809, 0.42121621966362, 0.39591142535209656, 0.39976951479911804, 0.37846800684928894, 0.3492388427257538]
iteration 1

Copying file://car-racing-v2-18-model06.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 40.2 MiB/ 40.2 MiB] 100% Done                                    
Operation completed over 1 objects/40.2 MiB.                                     


epsilon=0.8743545547071423, episode length=397, total rewards=-0.06490066225153446
Training the model...
experience length=4995
number of examples=2726
best total reward =  [-0.002739726027337158, -0.010989010988991604, -0.004379562043676372]
loss = [0.5783881545066833, 0.4713136553764343, 0.4400988221168518, 0.3928547203540802, 0.3801286518573761, 0.4042336940765381, 0.3544534146785736, 0.33215636014938354, 0.328170508146286, 0.3499358296394348]
iteration 19
epsilon=0.8676110091600708, episode length=294, total rewards=-0.08823529411761055
epsilon=0.8676110091600708, episode length=996, total rewards=-0.06900369003673767
Training the model...
experience length=4995
number of examples=3288
best total reward =  [-0.002739726027337158, -0.010989010988991604, -0.004379562043676372]
loss = [3.9152092933654785, 3.6770591735839844, 3.541332244873047, 3.417829751968384, 3.2662065029144287, 3.031237840652466, 2.8332834243774414, 2.6392812728881836, 2.1443214416503906, 1.7466093301773071]
itera

Copying file://car-racing-v2-21-model06.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 40.2 MiB/ 40.2 MiB] 100% Done                                    
Operation completed over 1 objects/40.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2853
best total reward =  [-0.002739726027337158, -0.010989010988991604, -0.004379562043676372]
loss = [0.8294024467468262, 0.6329306960105896, 0.5575414896011353, 0.5756491422653198, 0.5563976168632507, 0.5260708332061768, 0.503087043762207, 0.47980156540870667, 0.44304975867271423, 0.4218588173389435]
iteration 22
epsilon=0.8477822945770077, episode length=214, total rewards=-0.0932721712538147
epsilon=0.8477822945770077, episode length=289, total rewards=-0.06109324758837187
Training the model...
experience length=4995
number of examples=2501
best total reward =  [-0.002739726027337158, -0.010989010988991604, -0.004379562043676372]
loss = [0.5770189166069031, 0.4965059757232666, 0.44674152135849, 0.3942188620567322, 0.4492581784725189, 0.36995118856430054, 0.3899955749511719, 0.3269583582878113, 0.3380366265773773, 0.36380043625831604]
iteration 23
epsilon=0.8413044716312377, episode length=782, total rewards=-0.0687074

Copying file://car-racing-v2-24-model06.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 40.2 MiB/ 40.2 MiB] 100% Done                                    
Operation completed over 1 objects/40.2 MiB.                                     


epsilon=0.8348914269149252, episode length=644, total rewards=-0.09322033898284876
Training the model...
experience length=4995
number of examples=3010
best total reward =  [-0.002739726027337158, -0.010989010988991604, -0.004379562043676372]
loss = [0.613553524017334, 0.5117899775505066, 0.4503917694091797, 0.46669310331344604, 0.4067569077014923, 0.42961838841438293, 0.3753403425216675, 0.3702278137207031, 0.36163511872291565, 0.33313608169555664]
iteration 25
epsilon=0.8285425126457759, episode length=129, total rewards=-0.05501618122975249
epsilon=0.8285425126457759, episode length=280, total rewards=-0.029824561403477307
epsilon=0.8285425126457759, episode length=490, total rewards=-0.020245398773052176
Training the model...
experience length=4995
number of examples=2897
best total reward =  [-0.002739726027337158, -0.010989010988991604, -0.004379562043676372]
loss = [0.45677295327186584, 0.3738757371902466, 0.37043771147727966, 0.3706120550632477, 0.32191556692123413, 0.330915808

Copying file://car-racing-v2-27-model06.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 40.2 MiB/ 40.2 MiB] 100% Done                                    
Operation completed over 1 objects/40.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [-0.002739726027337158, -0.010989010988991604, -0.004379562043676372]
loss = [0.5614864230155945, 0.4616073966026306, 0.4199035167694092, 0.3881368041038513, 0.3866051435470581, 0.34236520528793335, 0.3552025258541107, 0.32693588733673096, 0.33143970370292664, 0.3173627555370331]
iteration 28
epsilon=0.809874171477684, episode length=209, total rewards=-0.09407665505220852
epsilon=0.809874171477684, episode length=646, total rewards=-0.07414965986392841
Training the model...
experience length=4995
number of examples=2853
best total reward =  [-0.002739726027337158, -0.010989010988991604, -0.004379562043676372]
loss = [0.5209338665008545, 0.4489767551422119, 0.3976927101612091, 0.36762699484825134, 0.35856303572654724, 0.3475815951824188, 0.2902606427669525, 0.3519960641860962, 0.31997066736221313, 0.2877705991268158]
iteration 29
epsilon=0.8037754297629072, episode length=690, total rewards=-0.009

Copying file://car-racing-v2-30-model06.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 40.2 MiB/ 40.2 MiB] 100% Done                                    
Operation completed over 1 objects/40.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [-0.010989010988991604, -0.004379562043676372, -0.009090909091193228]
loss = [0.6783135533332825, 0.5743443965911865, 0.4995131492614746, 0.5127346515655518, 0.44320929050445557, 0.4565943777561188, 0.38902968168258667, 0.41572657227516174, 0.3808935880661011, 0.35217806696891785]
iteration 31
epsilon=0.7917602987106254, episode length=798, total rewards=-0.05209125475311294
Training the model...
experience length=4995
number of examples=2796
best total reward =  [-0.010989010988991604, -0.004379562043676372, -0.009090909091193228]
loss = [0.6548994779586792, 0.5189070105552673, 0.4321545660495758, 0.4402216970920563, 0.41199547052383423, 0.4097311794757843, 0.39038336277008057, 0.3592381775379181, 0.352779746055603, 0.3454912602901459]
iteration 32
epsilon=0.7858426957235192, episode length=203, total rewards=-0.05116279069764684
epsilon=0.7858426957235192, episode length=132, total rewards=-0.05

Copying file://car-racing-v2-33-model06.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 40.2 MiB/ 40.2 MiB] 100% Done                                    
Operation completed over 1 objects/40.2 MiB.                                     


epsilon=0.7799842687662841, episode length=816, total rewards=-0.0006535947712127121
Training the model...
experience length=4995
number of examples=3240
best total reward =  [-0.004379562043676372, -0.009090909091193228, -0.0006535947712127121]
loss = [0.6411551237106323, 0.5132789611816406, 0.4561484754085541, 0.41133034229278564, 0.3755084276199341, 0.35439538955688477, 0.34294456243515015, 0.3439512252807617, 0.29165661334991455, 0.3076072335243225]
iteration 34
epsilon=0.7741844260786213, episode length=148, total rewards=-0.01904761904759164
epsilon=0.7741844260786213, episode length=662, total rewards=-0.09790940766537415
Training the model...
experience length=4995
number of examples=2808
best total reward =  [-0.004379562043676372, -0.009090909091193228, -0.0006535947712127121]
loss = [0.6098421812057495, 0.4823814034461975, 0.41497841477394104, 0.39992281794548035, 0.35772499442100525, 0.3424169421195984, 0.35553422570228577, 0.3495742380619049, 0.30356693267822266, 0.2935472

Copying file://car-racing-v2-36-model06.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 40.2 MiB/ 40.2 MiB] 100% Done                                    
Operation completed over 1 objects/40.2 MiB.                                     


epsilon=0.7627581559996568, episode length=607, total rewards=-0.09712460063881001
Training the model...
experience length=4995
number of examples=2972
best total reward =  [-0.004379562043676372, -0.009090909091193228, -0.0006535947712127121]
loss = [0.579818069934845, 0.47326305508613586, 0.42805734276771545, 0.3849622309207916, 0.3673669099807739, 0.33464160561561584, 0.3208383619785309, 0.32390719652175903, 0.3123093247413635, 0.2927834987640381]
iteration 37
epsilon=0.7571305744396604, episode length=579, total rewards=-0.028985507246591186
Training the model...
experience length=4995
number of examples=2577
best total reward =  [-0.004379562043676372, -0.009090909091193228, -0.0006535947712127121]
loss = [0.6101902723312378, 0.48866844177246094, 0.43866339325904846, 0.42608511447906494, 0.38510236144065857, 0.3884965479373932, 0.35165899991989136, 0.3345538079738617, 0.3492569327354431, 0.3220289349555969]
iteration 38
epsilon=0.7515592686952639, episode length=999, total rewards

Copying file://car-racing-v2-39-model06.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 40.2 MiB/ 40.2 MiB] 100% Done                                    
Operation completed over 1 objects/40.2 MiB.                                     


epsilon=0.7460436760083113, episode length=167, total rewards=-0.07759197324411127
epsilon=0.7460436760083113, episode length=236, total rewards=-0.05135135135128377
Training the model...
experience length=4995
number of examples=2686
best total reward =  [-0.009090909091193228, -0.0006535947712127121, 35.5311355311348]
loss = [0.7921314239501953, 0.6166911125183105, 0.526552140712738, 0.5269452333450317, 0.446590781211853, 0.49042457342147827, 0.45977506041526794, 0.4188418388366699, 0.42026522755622864, 0.38593292236328125]
iteration 40
epsilon=0.7405832392482283, episode length=812, total rewards=-0.027915194346566724
Training the model...
experience length=4995
number of examples=2810
best total reward =  [-0.009090909091193228, -0.0006535947712127121, 35.5311355311348]
loss = [0.6854109764099121, 0.5416182279586792, 0.5114990472793579, 0.48854342103004456, 0.4516918659210205, 0.41369932889938354, 0.34919074177742004, 0.38181033730506897, 0.3952670991420746, 0.34743329882621765]
it

Copying file://car-racing-v2-42-model06.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 40.2 MiB/ 40.2 MiB] 100% Done                                    
Operation completed over 1 objects/40.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2814
best total reward =  [-0.009090909091193228, -0.0006535947712127121, 35.5311355311348]
loss = [0.6814352869987488, 0.5277125835418701, 0.4851400852203369, 0.47395703196525574, 0.41801196336746216, 0.4339030086994171, 0.4057998061180115, 0.3739470839500427, 0.35747137665748596, 0.38726872205734253]
iteration 43
epsilon=0.724527376459317, episode length=507, total rewards=-0.006349206349058195
Training the model...
experience length=4995
number of examples=2505
best total reward =  [-0.0006535947712127121, 35.5311355311348, -0.006349206349058195]
loss = [0.8136566877365112, 0.5465270280838013, 0.4860951900482178, 0.4505341947078705, 0.47263962030410767, 0.4076591730117798, 0.39080557227134705, 0.4218839704990387, 0.36378443241119385, 0.3420284390449524]
iteration 44
epsilon=0.7192821026947238, episode length=300, total rewards=-0.09999999999999054
epsilon=0.7192821026947238, episode length=503, total rewards=-0.08553459

Copying file://car-racing-v2-45-model06.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 40.2 MiB/ 40.2 MiB] 100% Done                                    
Operation completed over 1 objects/40.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [-0.0006535947712127121, 35.5311355311348, -0.006349206349058195]
loss = [0.7365416884422302, 0.5835001468658447, 0.5544053316116333, 0.5102822780609131, 0.43935611844062805, 0.4421503245830536, 0.4086344242095947, 0.45405715703964233, 0.3886300027370453, 0.35896289348602295]
iteration 46
epsilon=0.708948388851099, episode length=331, total rewards=-0.08741721854294515
epsilon=0.708948388851099, episode length=975, total rewards=-0.03902439024455015
Training the model...
experience length=4995
number of examples=3304
best total reward =  [-0.0006535947712127121, 35.5311355311348, -0.006349206349058195]
loss = [0.6763566732406616, 0.5226677656173706, 0.474352091550827, 0.45622217655181885, 0.4442254602909088, 0.38724684715270996, 0.38235026597976685, 0.3914208710193634, 0.35822242498397827, 0.3391481637954712]
iteration 47
epsilon=0.7038589049625881, episode length=810, total rewards=-0.01891891891

Copying file://car-racing-v2-48-model06.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 40.2 MiB/ 40.2 MiB] 100% Done                                    
Operation completed over 1 objects/40.2 MiB.                                     


epsilon=0.6988203159129622, episode length=163, total rewards=-0.060130718954219065
epsilon=0.6988203159129622, episode length=294, total rewards=-0.08823529411756437
Training the model...
experience length=4995
number of examples=2756
best total reward =  [-0.0006535947712127121, 35.5311355311348, -0.006349206349058195]
loss = [0.6604371666908264, 0.4801346957683563, 0.482278048992157, 0.399055153131485, 0.39409226179122925, 0.37231191992759705, 0.34616097807884216, 0.3439142405986786, 0.38228175044059753, 0.33123764395713806]
iteration 49
epsilon=0.6938321127538326, episode length=526, total rewards=-0.06842105263157205
Training the model...
experience length=4995
number of examples=2524
best total reward =  [-0.0006535947712127121, 35.5311355311348, -0.006349206349058195]
loss = [0.6367822289466858, 0.5412265658378601, 0.488509863615036, 0.40793663263320923, 0.4030856490135193, 0.3726714849472046, 0.36725327372550964, 0.36080753803253174, 0.3136751055717468, 0.3371932804584503]
iter

Copying file://car-racing-v2-51-model06.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 40.2 MiB/ 40.2 MiB] 100% Done                                    
Operation completed over 1 objects/40.2 MiB.                                     


epsilon=0.6840048537100314, episode length=496, total rewards=-0.010559006211020988
Training the model...
experience length=4995
number of examples=2946
best total reward =  [-0.0006535947712127121, 35.5311355311348, -0.006349206349058195]
loss = [0.5326343774795532, 0.4330631196498871, 0.3900790512561798, 0.3206292688846588, 0.32885921001434326, 0.31484559178352356, 0.30699050426483154, 0.2810581922531128, 0.2741917669773102, 0.2574458420276642]
iteration 52
epsilon=0.6791648051729311, episode length=454, total rewards=-0.04545454545456737
epsilon=0.6791648051729311, episode length=664, total rewards=-0.09375000000021783
Training the model...
experience length=4995
number of examples=3116
best total reward =  [-0.0006535947712127121, 35.5311355311348, -0.006349206349058195]
loss = [0.6124061346054077, 0.5346034169197083, 0.4403207004070282, 0.4054529070854187, 0.41462916135787964, 0.34385353326797485, 0.3496454656124115, 0.3489449620246887, 0.3257426619529724, 0.31761786341667175]
ite

Copying file://car-racing-v2-54-model06.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 40.2 MiB/ 40.2 MiB] 100% Done                                    
Operation completed over 1 objects/40.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [-0.0006535947712127121, 35.5311355311348, -0.006349206349058195]
loss = [0.8446446061134338, 0.6837624311447144, 0.6284584403038025, 0.5897873640060425, 0.530989944934845, 0.5274844169616699, 0.4890543222427368, 0.4895722568035126, 0.4687771797180176, 0.4156705439090729]
iteration 55
epsilon=0.6649331312944898, episode length=741, total rewards=-0.006451612903089571
Training the model...
experience length=4995
number of examples=2739
best total reward =  [-0.0006535947712127121, 35.5311355311348, -0.006349206349058195]
loss = [0.5848914980888367, 0.4945056736469269, 0.5011361837387085, 0.4014459252357483, 0.40548932552337646, 0.3489188551902771, 0.3712240755558014, 0.34380510449409485, 0.361686646938324, 0.3100137710571289]
iteration 56
epsilon=0.660283799981545, episode length=851, total rewards=-0.014814814815609284
Training the model...
experience length=4995
number of examples=2849
best total

Copying file://car-racing-v2-57-model06.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 40.2 MiB/ 40.2 MiB] 100% Done                                    
Operation completed over 1 objects/40.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2921
best total reward =  [-0.0006535947712127121, 35.5311355311348, -0.006349206349058195]
loss = [6.222336769104004, 5.854999542236328, 5.68986177444458, 5.567278861999512, 5.518417835235596, 5.321741580963135, 5.345385551452637, 5.096558094024658, 5.091236591339111, 5.012245178222656]
iteration 58
epsilon=0.6511241523619121, episode length=952, total rewards=-0.061904761905226424
Training the model...
experience length=4995
number of examples=2950
best total reward =  [-0.0006535947712127121, 35.5311355311348, -0.006349206349058195]
loss = [0.7227540016174316, 0.6206318140029907, 0.5689921975135803, 0.5352919101715088, 0.5021789073944092, 0.42810994386672974, 0.48913806676864624, 0.4268123507499695, 0.4518185257911682, 0.4205543100833893]
iteration 59
epsilon=0.646612910838293, episode length=462, total rewards=-0.09537953795371945
epsilon=0.646612910838293, episode length=655, total rewards=-0.08275862068945838
Trainin

Copying file://car-racing-v2-60-model06.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 40.2 MiB/ 40.2 MiB] 100% Done                                    
Operation completed over 1 objects/40.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [-0.0006535947712127121, 35.5311355311348, -0.006349206349058195]
loss = [5.748531818389893, 5.577633857727051, 5.191765785217285, 5.240199089050293, 4.797599792480469, 4.793340682983398, 4.3357014656066895, 4.181509494781494, 3.981619358062744, 3.723957061767578]
iteration 61
epsilon=0.6377253139126109, episode length=820, total rewards=-0.010447761194037869
Training the model...
experience length=4995
number of examples=2818
best total reward =  [-0.0006535947712127121, 35.5311355311348, -0.006349206349058195]
loss = [0.902852475643158, 0.7186838984489441, 0.6195793151855469, 0.6217625737190247, 0.5606200098991394, 0.5460308790206909, 0.5130860209465027, 0.4543667137622833, 0.507167398929596, 0.4208377003669739]
iteration 62
epsilon=0.6333480607734847, episode length=758, total rewards=-0.08772563176951675
Training the model...
experience length=4995
number of examples=2756
best total reward =  

Copying file://car-racing-v2-63-model06.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 40.2 MiB/ 40.2 MiB] 100% Done                                    
Operation completed over 1 objects/40.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2947
best total reward =  [-0.0006535947712127121, 35.5311355311348, -0.006349206349058195]
loss = [0.674137532711029, 0.5681722164154053, 0.534966230392456, 0.5040367841720581, 0.4699209928512573, 0.4365580677986145, 0.4169699549674988, 0.5635605454444885, 0.3827044665813446, 0.4138052761554718]
iteration 64
epsilon=0.6247244343640923, episode length=769, total rewards=-0.07692307692316658
Training the model...
experience length=4995
number of examples=2767
best total reward =  [-0.0006535947712127121, 35.5311355311348, -0.006349206349058195]
loss = [0.6603520512580872, 0.6011454463005066, 0.48715710639953613, 0.5332121849060059, 0.4950215816497803, 0.4447750747203827, 0.4368939697742462, 0.41096529364585876, 0.40337735414505005, 0.41848137974739075]
iteration 65
epsilon=0.6204771900204513, episode length=999, total rewards=11.913357400721253
Training the model...
experience length=4995
number of examples=2997
best total 

Copying file://car-racing-v2-66-model06.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 40.2 MiB/ 40.2 MiB] 100% Done                                    
Operation completed over 1 objects/40.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [35.5311355311348, -0.006349206349058195, 11.913357400721253]
loss = [0.9164456725120544, 0.7206480503082275, 0.6927318572998047, 0.6368924379348755, 0.6597279906272888, 0.5656477808952332, 0.5504983067512512, 0.5538687109947205, 0.6478548645973206, 0.487093448638916]
iteration 67
epsilon=0.6121096939390444, episode length=388, total rewards=-0.06504854368923607
epsilon=0.6121096939390444, episode length=485, total rewards=-0.05631067961159597
Training the model...
experience length=4995
number of examples=2871
best total reward =  [35.5311355311348, -0.006349206349058195, 11.913357400721253]
loss = [0.7139108180999756, 0.607184648513794, 0.5383758544921875, 0.5175121426582336, 0.49759718775749207, 0.4818493127822876, 0.45942822098731995, 0.42660951614379883, 0.4017673134803772, 0.44830742478370667]
iteration 68
epsilon=0.6079885969996539, episode length=180, total rewards=-0.049458483754501986
ep

Copying file://car-racing-v2-69-model06.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 40.2 MiB/ 40.2 MiB] 100% Done                                    
Operation completed over 1 objects/40.2 MiB.                                     


epsilon=0.6039087110296574, episode length=755, total rewards=-0.060431654676510904
Training the model...
experience length=4995
number of examples=3185
best total reward =  [35.5311355311348, -0.006349206349058195, 11.913357400721253]
loss = [0.7909789085388184, 0.6673054695129395, 0.5923768281936646, 0.560785174369812, 0.5244524478912354, 0.49209460616111755, 0.4900643229484558, 0.4526292085647583, 0.45024698972702026, 0.4265575706958771]
iteration 70
epsilon=0.5998696239193608, episode length=273, total rewards=-0.09624573378833223
epsilon=0.5998696239193608, episode length=659, total rewards=-0.06593406593395668
Training the model...
experience length=4995
number of examples=2930
best total reward =  [35.5311355311348, -0.006349206349058195, 11.913357400721253]
loss = [0.7184732556343079, 0.6357997059822083, 0.5299932360649109, 0.5553203821182251, 0.5032899975776672, 0.47652533650398254, 0.4522034823894501, 0.41374948620796204, 0.4162532687187195, 0.40441277623176575]
iteration 71


Copying file://car-racing-v2-72-model06.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 40.2 MiB/ 40.2 MiB] 100% Done                                    
Operation completed over 1 objects/40.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2524
best total reward =  [-0.006349206349058195, 11.913357400721253, 9.677419354837854]
loss = [0.5768212080001831, 0.5110935568809509, 0.4555548131465912, 0.4234532117843628, 0.40101075172424316, 0.4328034818172455, 0.3875102400779724, 0.35574042797088623, 0.3663612902164459, 0.36950263381004333]
iteration 73
epsilon=0.5879930962193319, episode length=445, total rewards=-0.014012738853366374
epsilon=0.5879930962193319, episode length=284, total rewards=-0.01898734177208336
Training the model...
experience length=4995
number of examples=2727
best total reward =  [-0.006349206349058195, 11.913357400721253, 9.677419354837854]
loss = [0.586750864982605, 0.567553699016571, 0.4648979902267456, 0.45330387353897095, 0.4265406131744385, 0.42173904180526733, 0.36177176237106323, 0.3738540709018707, 0.3528372645378113, 0.3732204735279083]
iteration 74
epsilon=0.5841131652571386, episode length=719, total rewards=-0.0575539568347356

Copying file://car-racing-v2-75-model06.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 40.2 MiB/ 40.2 MiB] 100% Done                                    
Operation completed over 1 objects/40.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [-0.006349206349058195, 11.913357400721253, 9.677419354837854]
loss = [0.8532876968383789, 0.6827554702758789, 0.6629897952079773, 0.6255668997764587, 0.5678185820579529, 0.571444034576416, 0.5628812313079834, 0.5375576615333557, 0.5075986385345459, 0.520789384841919]
iteration 76
epsilon=0.5764693132685216, episode length=206, total rewards=-0.010344827586173028
epsilon=0.5764693132685216, episode length=278, total rewards=-0.02543554006962198
epsilon=0.5764693132685216, episode length=999, total rewards=20.320855614972565
Training the model...
experience length=4995
number of examples=3481
best total reward =  [11.913357400721253, 9.677419354837854, 20.320855614972565]
loss = [0.7929775714874268, 0.6720759272575378, 0.6353310346603394, 0.5749087929725647, 0.5333032608032227, 0.5345621705055237, 0.5143616795539856, 0.5209140181541443, 0.5056770443916321, 0.45282623171806335]
iteration 77
epsilon=

Copying file://car-racing-v2-78-model06.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 40.2 MiB/ 40.2 MiB] 100% Done                                    
Operation completed over 1 objects/40.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2898
best total reward =  [9.677419354837854, 20.320855614972565, 38.181818181817086]
loss = [0.7352060079574585, 0.5709024667739868, 0.5436264276504517, 0.5376970171928406, 0.47616103291511536, 0.47677502036094666, 0.46437522768974304, 0.468923956155777, 0.41342464089393616, 0.4532274901866913]
iteration 79
epsilon=0.5652877981951332, episode length=372, total rewards=-0.011864406779659137
epsilon=0.5652877981951332, episode length=793, total rewards=-0.08965517241368315
Training the model...
experience length=4995
number of examples=3163
best total reward =  [9.677419354837854, 20.320855614972565, 38.181818181817086]
loss = [0.6876298785209656, 0.5572368502616882, 0.5629429221153259, 0.522502064704895, 0.49987688660621643, 0.5303455591201782, 0.45234519243240356, 0.4359329640865326, 0.5819045901298523, 0.415105402469635]
iteration 80
epsilon=0.5616349202131818, episode length=999, total rewards=9.022556390976273
Training

Copying file://car-racing-v2-81-model06.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 40.2 MiB/ 40.2 MiB] 100% Done                                    
Operation completed over 1 objects/40.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2752
best total reward =  [9.677419354837854, 20.320855614972565, 38.181818181817086]
loss = [0.6950080394744873, 0.5172616839408875, 0.5286985635757446, 0.4856013357639313, 0.45036399364471436, 0.43257856369018555, 0.517968475818634, 0.39519238471984863, 0.5250517725944519, 0.35505709052085876]
iteration 82
epsilon=0.5544383853009396, episode length=203, total rewards=-0.0610169491524879
epsilon=0.5544383853009396, episode length=999, total rewards=21.951219512194783
Training the model...
experience length=4995
number of examples=3200
best total reward =  [20.320855614972565, 38.181818181817086, 21.951219512194783]
loss = [0.7770990133285522, 0.6148084402084351, 0.5739671587944031, 0.519636332988739, 0.48572486639022827, 0.4852694571018219, 0.46253326535224915, 0.43245643377304077, 0.4623347520828247, 0.40382933616638184]
iteration 83
epsilon=0.5508940014479302, episode length=999, total rewards=37.03703703703605
Training

Copying file://car-racing-v2-84-model06.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 40.2 MiB/ 40.2 MiB] 100% Done                                    
Operation completed over 1 objects/40.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [38.181818181817086, 21.951219512194783, 37.03703703703605]
loss = [0.8089649081230164, 0.7744573354721069, 0.6633975505828857, 0.6468821167945862, 0.6214065551757812, 0.5715329647064209, 0.5865382552146912, 0.5167329907417297, 0.5474519729614258, 0.6165112257003784]
iteration 85
epsilon=0.5439112108191164, episode length=999, total rewards=78.83211678832154
Training the model...
experience length=4995
number of examples=2997
best total reward =  [21.951219512194783, 37.03703703703605, 78.83211678832154]
loss = [0.8318730592727661, 0.7226622104644775, 0.6793670058250427, 0.598008930683136, 0.5466441512107849, 0.5313959121704102, 0.5358220934867859, 0.5271917581558228, 0.43429797887802124, 0.4920451045036316]
iteration 86
epsilon=0.5404720987109253, episode length=809, total rewards=-0.014084507042832345
Training the model...
experience length=4995
number of examples=2807
best total reward =  [21.9

Copying file://car-racing-v2-87-model06.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 40.2 MiB/ 40.2 MiB] 100% Done                                    
Operation completed over 1 objects/40.2 MiB.                                     


epsilon=0.537067377723816, episode length=999, total rewards=43.81270903009982
Training the model...
experience length=4995
number of examples=3436
best total reward =  [37.03703703703605, 78.83211678832154, 43.81270903009982]
loss = [0.7874187231063843, 0.6335775256156921, 0.6038026809692383, 0.5320084095001221, 0.5403532981872559, 0.5069811344146729, 0.5616165399551392, 0.4420388638973236, 0.4851312041282654, 0.44169291853904724]
iteration 88
epsilon=0.5336967039465779, episode length=845, total rewards=-0.041176470588765585
Training the model...
experience length=4995
number of examples=2843
best total reward =  [37.03703703703605, 78.83211678832154, 43.81270903009982]
loss = [0.7794044017791748, 0.7453293800354004, 0.6266770958900452, 0.5696775913238525, 0.5780045390129089, 0.5690792798995972, 0.5594614148139954, 0.5092487931251526, 0.4524211287498474, 0.4536404609680176]
iteration 89
epsilon=0.530359736907112, episode length=999, total rewards=42.85714285714161
Training the model.

Copying file://car-racing-v2-90-model06.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 40.2 MiB/ 40.2 MiB] 100% Done                                    
Operation completed over 1 objects/40.2 MiB.                                     


epsilon=0.5270561395380409, episode length=596, total rewards=-0.040909090909054785
Training the model...
experience length=4995
number of examples=3022
best total reward =  [78.83211678832154, 43.81270903009982, 42.85714285714161]
loss = [8.098682403564453, 7.546570777893066, 7.135784149169922, 6.735123634338379, 6.470636367797852, 6.319621562957764, 6.018686771392822, 5.769958019256592, 5.484965801239014, 5.1880202293396]
iteration 91
epsilon=0.5237855781426605, episode length=338, total rewards=-0.05384615384609437
epsilon=0.5237855781426605, episode length=256, total rewards=-0.058974358974287044
Training the model...
experience length=4995
number of examples=2592
best total reward =  [78.83211678832154, 43.81270903009982, 42.85714285714161]
loss = [0.9012643098831177, 0.7194453477859497, 0.6964389681816101, 0.5991319417953491, 0.5983218550682068, 0.5339277386665344, 0.5627130270004272, 0.5205431580543518, 0.5126414895057678, 0.5190519094467163]
iteration 92
epsilon=0.5205477223612

Copying file://car-racing-v2-93-model06.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 40.2 MiB/ 40.2 MiB] 100% Done                                    
Operation completed over 1 objects/40.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2725
best total reward =  [78.83211678832154, 43.81270903009982, 42.85714285714161]
loss = [1.330172061920166, 1.017057180404663, 0.9741456508636475, 0.8563681840896606, 0.7781124711036682, 0.6847438812255859, 0.7468416094779968, 0.6369783282279968, 0.6359172463417053, 0.6173810362815857]
iteration 94
epsilon=0.5141688226862453, episode length=999, total rewards=33.80281690140726
Training the model...
experience length=4995
number of examples=2997
best total reward =  [78.83211678832154, 43.81270903009982, 42.85714285714161]
loss = [1.3232836723327637, 1.0289615392684937, 0.7700468897819519, 0.8745443224906921, 0.603251039981842, 0.6891610622406006, 0.6054996848106384, 0.6712292432785034, 0.6195706725120544, 0.5846118330955505]
iteration 95
epsilon=0.5110271344593829, episode length=999, total rewards=49.99999999999932
Training the model...
experience length=4995
number of examples=2997
best total reward =  [43.81270903009

Copying file://car-racing-v2-96-model06.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 40.2 MiB/ 40.2 MiB] 100% Done                                    
Operation completed over 1 objects/40.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [43.81270903009982, 42.85714285714161, 49.99999999999932]
loss = [1.1487303972244263, 0.9415118098258972, 0.9287521839141846, 0.8936593532562256, 0.8359024524688721, 0.8484187126159668, 0.7506958246231079, 0.8602489829063416, 0.7327570915222168, 0.6984815001487732]
iteration 97
epsilon=0.5048376944836411, episode length=999, total rewards=75.8241758241761
Training the model...
experience length=4995
number of examples=2997
best total reward =  [42.85714285714161, 49.99999999999932, 75.8241758241761]
loss = [1.0474739074707031, 0.9063884615898132, 0.8116224408149719, 0.818298876285553, 0.7555112838745117, 0.7308470010757446, 0.7279167175292969, 0.6472570896148682, 0.6728848218917847, 0.6474005579948425]
iteration 98
epsilon=0.5017893175388047, episode length=813, total rewards=-0.044067796610513826
Training the model...
experience length=4995
number of examples=2811
best total reward =  [42.8571428

Copying file://car-racing-v2-99-model06.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 40.2 MiB/ 40.2 MiB] 100% Done                                    
Operation completed over 1 objects/40.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2782
best total reward =  [42.85714285714161, 49.99999999999932, 75.8241758241761]
loss = [0.8250061869621277, 0.7215029001235962, 0.6336342096328735, 0.6062661409378052, 0.5610814690589905, 0.5648084282875061, 0.5897161960601807, 0.5199533104896545, 0.49868154525756836, 0.4849863648414612]
iteration 100
epsilon=0.49578371011978256, episode length=517, total rewards=-0.007171314741371421
Training the model...
experience length=4995
number of examples=2515
best total reward =  [42.85714285714161, 49.99999999999932, 75.8241758241761]
loss = [0.7971972227096558, 0.653573751449585, 0.6376944184303284, 0.62629634141922, 0.5346733927726746, 0.5231881141662598, 0.5232399702072144, 0.5105169415473938, 0.5080896615982056, 0.48123466968536377]
iteration 101
epsilon=0.4928258730185847, episode length=696, total rewards=-0.07974683544344541
Training the model...
experience length=4995
number of examples=2694
best total reward =  [42.8

Copying file://car-racing-v2-102-model06.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 40.2 MiB/ 40.2 MiB] 100% Done                                    
Operation completed over 1 objects/40.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2921
best total reward =  [42.85714285714161, 49.99999999999932, 75.8241758241761]
loss = [0.8680525422096252, 0.7913216948509216, 0.6709553003311157, 0.6196747422218323, 0.6221833825111389, 0.6536035537719727, 0.5699947476387024, 0.5287713408470154, 0.5544650554656982, 0.572211503982544]
iteration 103
epsilon=0.48699863814551486, episode length=209, total rewards=-0.02097902097899615
epsilon=0.48699863814551486, episode length=247, total rewards=-0.06501766784445917
epsilon=0.48699863814551486, episode length=999, total rewards=54.71698113207639
Training the model...
experience length=4995
number of examples=3453
best total reward =  [49.99999999999932, 75.8241758241761, 54.71698113207639]
loss = [0.9095603823661804, 0.7853384613990784, 0.8675017952919006, 0.7111285328865051, 0.6533448696136475, 0.6651745438575745, 0.6000620126724243, 0.5940203666687012, 0.5335365533828735, 0.5324053168296814]
iteration 104
epsilon=0.4841

Copying file://car-racing-v2-105-model06.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 40.2 MiB/ 40.2 MiB] 100% Done                                    
Operation completed over 1 objects/40.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [49.99999999999932, 75.8241758241761, 54.71698113207639]
loss = [0.9534872770309448, 0.8290906548500061, 0.7920162081718445, 0.7096987962722778, 0.6927475333213806, 0.6784857511520386, 0.6655001640319824, 0.627031147480011, 0.678392231464386, 0.59388667345047]
iteration 106
epsilon=0.47847449159395494, episode length=999, total rewards=67.74193548387012
Training the model...
experience length=4995
number of examples=2997
best total reward =  [75.8241758241761, 54.71698113207639, 67.74193548387012]
loss = [0.8917317390441895, 0.779947817325592, 0.7187188863754272, 0.6782950162887573, 0.6218833327293396, 0.6272607445716858, 0.5960825085639954, 0.5746347904205322, 0.6388381719589233, 0.565887451171875]
iteration 107
epsilon=0.4756897466780154, episode length=753, total rewards=-0.057534246575557946
Training the model...
experience length=4995
number of examples=2751
best total reward =  [75.824175824

Copying file://car-racing-v2-108-model06.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 40.2 MiB/ 40.2 MiB] 100% Done                                    
Operation completed over 1 objects/40.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [75.8241758241761, 54.71698113207639, 67.74193548387012]
loss = [1.0784265995025635, 0.9186563491821289, 0.7964959740638733, 0.8430178165435791, 0.7381064891815186, 0.7514351010322571, 0.6821909546852112, 0.6774178743362427, 0.7041122913360596, 0.6247429847717285]
iteration 109
epsilon=0.47020352071912286, episode length=158, total rewards=-0.026984126984092444
epsilon=0.47020352071912286, episode length=833, total rewards=-0.06666666666663201
Training the model...
experience length=4995
number of examples=2989
best total reward =  [75.8241758241761, 54.71698113207639, 67.74193548387012]
loss = [0.9265037178993225, 0.8249462842941284, 0.7742115259170532, 0.7087330222129822, 0.6761126518249512, 0.6075710654258728, 0.6008515357971191, 0.7144332528114319, 0.5387784242630005, 0.5705565214157104]
iteration 110
epsilon=0.4675014855119316, episode length=254, total rewards=-0.045454545454471446
epsilon=0

Copying file://car-racing-v2-111-model06.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 40.2 MiB/ 40.2 MiB] 100% Done                                    
Operation completed over 1 objects/40.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [75.8241758241761, 54.71698113207639, 67.74193548387012]
loss = [1.2312461137771606, 1.1335177421569824, 1.0097181797027588, 0.9521694779396057, 0.9027965068817139, 0.9404961466789246, 0.7947161197662354, 0.855462908744812, 0.7672082781791687, 0.6994041800498962]
iteration 112
epsilon=0.4621782059502441, episode length=999, total rewards=127.27272727273109
Training the model...
experience length=4995
number of examples=2997
best total reward =  [54.71698113207639, 67.74193548387012, 127.27272727273109]
loss = [1.0014336109161377, 0.9104234576225281, 0.9716223478317261, 0.8246981501579285, 0.772426962852478, 0.7555969953536987, 0.7308986783027649, 0.713407039642334, 0.7141113877296448, 0.7536974549293518]
iteration 113
epsilon=0.4595564238907417, episode length=999, total rewards=107.4688796680519
Training the model...
experience length=4995
number of examples=2997
best total reward =  [67.74193548

Copying file://car-racing-v2-114-model06.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 40.2 MiB/ 40.2 MiB] 100% Done                                    
Operation completed over 1 objects/40.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [67.74193548387012, 127.27272727273109, 107.4688796680519]
loss = [1.225071668624878, 1.1234080791473389, 1.0178464651107788, 0.9651645421981812, 0.9002410769462585, 0.9204059839248657, 0.8797377347946167, 0.8400879502296448, 0.7867814898490906, 0.8176303505897522]
iteration 115
epsilon=0.4543912510553159, episode length=734, total rewards=-0.07342657342710734
Training the model...
experience length=4995
number of examples=2732
best total reward =  [67.74193548387012, 127.27272727273109, 107.4688796680519]
loss = [1.0012229681015015, 0.8600596189498901, 0.8236687183380127, 0.8303963541984558, 0.687872052192688, 0.8231415748596191, 0.6368726491928101, 0.6603979468345642, 0.6861391067504883, 0.645812451839447]
iteration 116
epsilon=0.4518473385447628, episode length=999, total rewards=53.310104529615764
Training the model...
experience length=4995
number of examples=2997
best total reward =  [67.741

Copying file://car-racing-v2-117-model06.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 40.2 MiB/ 40.2 MiB] 100% Done                                    
Operation completed over 1 objects/40.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [67.74193548387012, 127.27272727273109, 107.4688796680519]
loss = [1.4284563064575195, 1.3175328969955444, 1.1749794483184814, 1.1516673564910889, 1.0961575508117676, 1.052231788635254, 1.0147478580474854, 1.022017002105713, 0.9593495726585388, 0.9266947507858276]
iteration 118
epsilon=0.446835576507722, episode length=68, total rewards=-0.09727891156461888
epsilon=0.446835576507722, episode length=999, total rewards=27.586206896550657
Training the model...
experience length=4995
number of examples=3065
best total reward =  [67.74193548387012, 127.27272727273109, 107.4688796680519]
loss = [1.2561737298965454, 1.116670846939087, 0.966525673866272, 0.9572433829307556, 0.8711400032043457, 0.8698744177818298, 0.8976905941963196, 0.7477245926856995, 0.7447662949562073, 0.7848595380783081]
iteration 119
epsilon=0.4443672207426448, episode length=999, total rewards=7.526881720429275
Training the model...

Copying file://car-racing-v2-120-model06.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 40.2 MiB/ 40.2 MiB] 100% Done                                    
Operation completed over 1 objects/40.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [67.74193548387012, 127.27272727273109, 107.4688796680519]
loss = [1.5270675420761108, 1.299275517463684, 1.1928050518035889, 1.2037135362625122, 1.169481873512268, 0.9994884729385376, 1.0191951990127563, 0.9638463258743286, 1.000592827796936, 0.9928728938102722]
iteration 121
epsilon=0.43950431304986615, episode length=760, total rewards=-0.013043478260976488
Training the model...
experience length=4995
number of examples=2758
best total reward =  [67.74193548387012, 127.27272727273109, 107.4688796680519]
loss = [1.1275135278701782, 1.0015939474105835, 0.9850789904594421, 0.8804864287376404, 0.8171724081039429, 0.7809693217277527, 0.7530190944671631, 0.7328358888626099, 0.7102782726287842, 0.7015315294265747]
iteration 122
epsilon=0.4371092699193675, episode length=335, total rewards=-0.06341463414626536
epsilon=0.4371092699193675, episode length=393, total rewards=-0.055737704918081166
Training 

Copying file://car-racing-v2-123-model06.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 40.2 MiB/ 40.2 MiB] 100% Done                                    
Operation completed over 1 objects/40.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2823
best total reward =  [67.74193548387012, 127.27272727273109, 107.4688796680519]
loss = [1.5280250310897827, 1.2730841636657715, 0.9789153337478638, 0.921424925327301, 0.8710264563560486, 0.7548027038574219, 0.7863236665725708, 0.7720628380775452, 0.6850852370262146, 0.696255087852478]
iteration 124
epsilon=0.4323907954479721, episode length=999, total rewards=19.85018726591701
Training the model...
experience length=4995
number of examples=2997
best total reward =  [67.74193548387012, 127.27272727273109, 107.4688796680519]
loss = [1.2119929790496826, 1.1710630655288696, 0.8764704465866089, 0.9809200763702393, 0.8363980650901794, 0.8544954657554626, 0.8288036584854126, 0.8125695586204529, 0.7131791710853577, 0.7371968030929565]
iteration 125
epsilon=0.43006688749349237, episode length=999, total rewards=122.22222222222294
Training the model...
experience length=4995
number of examples=2997
best total reward =  [127.272

Copying file://car-racing-v2-126-model06.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 40.2 MiB/ 40.2 MiB] 100% Done                                    
Operation completed over 1 objects/40.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [127.27272727273109, 107.4688796680519, 122.22222222222294]
loss = [1.5299127101898193, 1.3621870279312134, 1.2101236581802368, 1.1072558164596558, 1.0277453660964966, 1.0323600769042969, 0.9644889235496521, 0.9555788636207581, 0.8885297775268555, 0.8896551132202148]
iteration 127
epsilon=0.4254885564323719, episode length=754, total rewards=-0.0901639344262446
Training the model...
experience length=4995
number of examples=2752
best total reward =  [127.27272727273109, 107.4688796680519, 122.22222222222294]
loss = [1.2015799283981323, 1.0179177522659302, 0.8953061699867249, 0.8611487746238708, 0.7677679657936096, 0.8390288352966309, 0.6938076019287109, 0.7337554097175598, 0.7043935060501099, 0.7111986875534058]
iteration 128
epsilon=0.4232336708680482, episode length=999, total rewards=111.26760563380624
Training the model...
experience length=4995
number of examples=2997
best total reward =  [10

Copying file://car-racing-v2-129-model06.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 40.2 MiB/ 40.2 MiB] 100% Done                                    
Operation completed over 1 objects/40.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [107.4688796680519, 122.22222222222294, 111.26760563380624]
loss = [1.4783626794815063, 1.2857586145401, 1.235511302947998, 1.1749101877212524, 1.1259583234786987, 1.0954256057739258, 1.077553391456604, 1.0569745302200317, 0.9719272255897522, 1.010672688484192]
iteration 130
epsilon=0.41879132081777404, episode length=999, total rewards=60.30534351145405
Training the model...
experience length=4995
number of examples=2997
best total reward =  [107.4688796680519, 122.22222222222294, 111.26760563380624]
loss = [1.241408348083496, 1.0739773511886597, 0.997341513633728, 0.960418164730072, 0.9179664850234985, 0.8616898059844971, 0.8960729837417603, 0.7938467264175415, 0.8528164625167847, 0.8068278431892395]
iteration 131
epsilon=0.4166034076095963, episode length=582, total rewards=-0.08082191780852566
Training the model...
experience length=4995
number of examples=2580
best total reward =  [107.468879

Copying file://car-racing-v2-132-model06.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 40.2 MiB/ 40.2 MiB] 100% Done                                    
Operation completed over 1 objects/40.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [107.4688796680519, 122.22222222222294, 111.26760563380624]
loss = [1.7734622955322266, 1.445020079612732, 1.483267903327942, 1.3255021572113037, 1.2174206972122192, 1.1846574544906616, 1.2129194736480713, 1.244517207145691, 0.9953189492225647, 1.02327299118042]
iteration 133
epsilon=0.4122929997981653, episode length=999, total rewards=72.29729729729982
Training the model...
experience length=4995
number of examples=2997
best total reward =  [107.4688796680519, 122.22222222222294, 111.26760563380624]
loss = [1.280310869216919, 1.1228731870651245, 1.042531967163086, 1.0877050161361694, 0.8878359794616699, 0.9312220811843872, 0.9090465903282166, 0.8671163320541382, 0.8405671119689941, 0.8397202491760254]
iteration 134
epsilon=0.41017006980018367, episode length=999, total rewards=24.999999999998707
Training the model...
experience length=4995
number of examples=2997
best total reward =  [107.468879

Copying file://car-racing-v2-135-model06.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 40.2 MiB/ 40.2 MiB] 100% Done                                    
Operation completed over 1 objects/40.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [107.4688796680519, 122.22222222222294, 111.26760563380624]
loss = [2.1326699256896973, 1.9347552061080933, 1.7858015298843384, 1.637479305267334, 1.6008901596069336, 1.7484636306762695, 1.4141583442687988, 1.3795967102050781, 1.361985683441162, 1.2950950860977173]
iteration 136
epsilon=0.40598768541116, episode length=542, total rewards=-0.03643410852724502
Training the model...
experience length=4995
number of examples=2540
best total reward =  [107.4688796680519, 122.22222222222294, 111.26760563380624]
loss = [1.488328218460083, 1.3147536516189575, 1.1558359861373901, 1.0061300992965698, 1.0902303457260132, 0.9581507444381714, 0.9583572149276733, 0.9358258247375488, 0.8469160199165344, 0.838830292224884]
iteration 137
epsilon=0.40392780855704835, episode length=999, total rewards=142.95774647887595
Training the model...
experience length=4995
number of examples=2997
best total reward =  [122.22

Copying file://car-racing-v2-138-model06.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 40.2 MiB/ 40.2 MiB] 100% Done                                    
Operation completed over 1 objects/40.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2626
best total reward =  [122.22222222222294, 111.26760563380624, 142.95774647887595]
loss = [1.2915222644805908, 1.0752756595611572, 0.985633373260498, 0.86280757188797, 0.9230958819389343, 0.8109357953071594, 0.8104410767555237, 0.9052199125289917, 0.7630637288093567, 0.6958659291267395]
iteration 139
epsilon=0.3998696451667631, episode length=699, total rewards=-0.06993006993058626
Training the model...
experience length=4995
number of examples=2697
best total reward =  [122.22222222222294, 111.26760563380624, 142.95774647887595]
loss = [1.3715769052505493, 1.196639060974121, 1.0969913005828857, 0.9690255522727966, 0.8584182858467102, 0.9524128437042236, 0.9865806698799133, 0.9063529372215271, 0.8264362812042236, 0.787742555141449]
iteration 140
epsilon=0.39787094871509543, episode length=326, total rewards=-0.02026143790854215
epsilon=0.39787094871509543, episode length=771, total rewards=-0.007017543860237313
Trainin

Copying file://car-racing-v2-141-model06.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 40.2 MiB/ 40.2 MiB] 100% Done                                    
Operation completed over 1 objects/40.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [122.22222222222294, 111.26760563380624, 142.95774647887595]
loss = [1.8542722463607788, 1.5735691785812378, 1.595441460609436, 1.384717345237732, 1.351919174194336, 1.3106054067611694, 1.1782981157302856, 1.2463184595108032, 1.1799192428588867, 1.470790147781372]
iteration 142
epsilon=0.393933316835665, episode length=999, total rewards=122.22222222222359
Training the model...
experience length=4995
number of examples=2997
best total reward =  [111.26760563380624, 142.95774647887595, 122.22222222222359]
loss = [1.5162465572357178, 1.3187735080718994, 1.2660634517669678, 1.1076656579971313, 1.4203184843063354, 1.0130689144134521, 1.0828044414520264, 1.1434372663497925, 1.087880253791809, 0.9278572201728821]
iteration 143
epsilon=0.39199398366730837, episode length=999, total rewards=39.19413919413977
Training the model...
experience length=4995
number of examples=2997
best total reward =  [111.267

Copying file://car-racing-v2-144-model06.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 40.2 MiB/ 40.2 MiB] 100% Done                                    
Operation completed over 1 objects/40.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [111.26760563380624, 142.95774647887595, 122.22222222222359]
loss = [1.6783549785614014, 1.4463458061218262, 1.4180335998535156, 1.2984707355499268, 1.3153741359710693, 1.3155864477157593, 1.097956895828247, 1.1551748514175415, 1.120187759399414, 1.0511901378631592]
iteration 145
epsilon=0.3881733033923289, episode length=543, total rewards=-0.05217391304390273
Training the model...
experience length=4995
number of examples=2541
best total reward =  [111.26760563380624, 142.95774647887595, 122.22222222222359]
loss = [1.2138996124267578, 1.0867440700531006, 1.1232249736785889, 0.9432855844497681, 1.0172572135925293, 0.8525222539901733, 0.892157793045044, 0.8572173118591309, 0.7495611310005188, 0.8386890292167664]
iteration 146
epsilon=0.38629157035840567, episode length=999, total rewards=103.70370370370601
Training the model...
experience length=4995
number of examples=2997
best total reward =  [1

Copying file://car-racing-v2-147-model06.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 40.2 MiB/ 40.2 MiB] 100% Done                                    
Operation completed over 1 objects/40.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [111.26760563380624, 142.95774647887595, 122.22222222222359]
loss = [1.6603918075561523, 1.4435285329818726, 1.2519735097885132, 1.28904390335083, 1.1590708494186401, 1.1424261331558228, 1.1449556350708008, 1.1025077104568481, 1.1212518215179443, 1.1073368787765503]
iteration 148
epsilon=0.3825843681082734, episode length=999, total rewards=146.79487179487492
Training the model...
experience length=4995
number of examples=2997
best total reward =  [142.95774647887595, 122.22222222222359, 146.79487179487492]
loss = [1.5766993761062622, 1.2884703874588013, 1.1860629320144653, 1.133726954460144, 1.1499853134155273, 1.0248252153396606, 1.09588623046875, 0.9690157175064087, 1.0947428941726685, 0.8886516690254211]
iteration 149
epsilon=0.38075852442719066, episode length=999, total rewards=50.00000000000047
Training the model...
experience length=4995
number of examples=2997
best total reward =  [142.95

Copying file://car-racing-v2-150-model06.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 40.2 MiB/ 40.2 MiB] 100% Done                                    
Operation completed over 1 objects/40.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [142.95774647887595, 122.22222222222359, 146.79487179487492]
loss = [1.6024627685546875, 1.489911675453186, 1.300431728363037, 1.3323479890823364, 1.3154661655426025, 1.1451976299285889, 1.2436209917068481, 1.1469074487686157, 1.0520565509796143, 1.0640151500701904]
iteration 151
epsilon=0.3771614297910896, episode length=999, total rewards=69.06474820144314
Training the model...
experience length=4995
number of examples=2997
best total reward =  [142.95774647887595, 122.22222222222359, 146.79487179487492]
loss = [1.5404447317123413, 1.2895146608352661, 1.3735237121582031, 1.1830466985702515, 1.081294298171997, 1.1612772941589355, 0.9975757598876953, 0.9996909499168396, 1.0279438495635986, 0.9581430554389954]
iteration 152
epsilon=0.3753898154931787, episode length=486, total rewards=-0.0677811550151095
epsilon=0.3753898154931787, episode length=999, total rewards=41.479099678456386
Training the m

Copying file://car-racing-v2-153-model06.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 40.2 MiB/ 40.2 MiB] 100% Done                                    
Operation completed over 1 objects/40.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [142.95774647887595, 122.22222222222359, 146.79487179487492]
loss = [1.6325854063034058, 1.3531769514083862, 1.1555521488189697, 1.1130332946777344, 1.0713753700256348, 1.0463248491287231, 1.1248456239700317, 0.8835245370864868, 0.8635633587837219, 0.8814151883125305]
iteration 154
epsilon=0.3718995581648644, episode length=591, total rewards=-0.009968847352110205
Training the model...
experience length=4995
number of examples=2589
best total reward =  [142.95774647887595, 122.22222222222359, 146.79487179487492]
loss = [1.091910481452942, 0.9712747931480408, 0.9626131057739258, 0.861729621887207, 0.8268213272094727, 0.8682781457901001, 0.7420642971992493, 0.714465320110321, 0.7118102312088013, 0.8449392914772034]
iteration 155
epsilon=0.37018056258321574, episode length=674, total rewards=-0.015337423312993453
Training the model...
experience length=4995
number of examples=2672
best total reward =

Copying file://car-racing-v2-156-model06.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 40.2 MiB/ 40.2 MiB] 100% Done                                    
Operation completed over 1 objects/40.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [142.95774647887595, 122.22222222222359, 146.79487179487492]
loss = [1.6903438568115234, 1.4818824529647827, 1.6855963468551636, 1.2754924297332764, 1.3125286102294922, 1.3339102268218994, 1.1407403945922852, 1.2488447427749634, 1.1167248487472534, 1.0824142694473267]
iteration 157
epsilon=0.3667939693878097, episode length=571, total rewards=-0.057142857143167136
Training the model...
experience length=4995
number of examples=2569
best total reward =  [142.95774647887595, 122.22222222222359, 146.79487179487492]
loss = [1.4169586896896362, 1.032791018486023, 0.9552736282348633, 0.999579668045044, 0.9621444940567017, 0.8412535786628723, 0.8345816731452942, 0.8257553577423096, 0.74512779712677, 0.7592736482620239]
iteration 158
epsilon=0.3651260296939316, episode length=673, total rewards=-0.05993265993297325
Training the model...
experience length=4995
number of examples=2671
best total reward =  [

Copying file://car-racing-v2-159-model06.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 40.2 MiB/ 40.2 MiB] 100% Done                                    
Operation completed over 1 objects/40.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [142.95774647887595, 122.22222222222359, 146.79487179487492]
loss = [1.8625831604003906, 1.5834693908691406, 1.5656224489212036, 1.4336066246032715, 1.3210217952728271, 1.2690397500991821, 1.2349059581756592, 1.1937415599822998, 1.1749705076217651, 1.2136764526367188]
iteration 160
epsilon=0.3618400217030224, episode length=999, total rewards=48.854961832062315
Training the model...
experience length=4995
number of examples=2997
best total reward =  [142.95774647887595, 122.22222222222359, 146.79487179487492]
loss = [1.3253117799758911, 1.2579057216644287, 1.298046588897705, 1.2494109869003296, 1.0150423049926758, 0.9684652090072632, 1.0264277458190918, 0.9448269605636597, 0.9947875738143921, 0.822318434715271]
iteration 161
epsilon=0.3602216214859921, episode length=999, total rewards=68.91891891892102
Training the model...
experience length=4995
number of examples=2997
best total reward =  [142.

Copying file://car-racing-v2-162-model06.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 40.2 MiB/ 40.2 MiB] 100% Done                                    
Operation completed over 1 objects/40.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [142.95774647887595, 122.22222222222359, 146.79487179487492]
loss = [1.480142593383789, 1.779833436012268, 1.2497731447219849, 1.231669545173645, 1.129136085510254, 1.1506279706954956, 1.0266485214233398, 0.9948644042015076, 0.9564331769943237, 0.991378903388977]
iteration 163
epsilon=0.3570332112184209, episode length=72, total rewards=-0.05362318840578806
epsilon=0.3570332112184209, episode length=314, total rewards=-0.03146853146843506
epsilon=0.3570332112184209, episode length=227, total rewards=-0.07272727272720769
Training the model...
experience length=4995
number of examples=2611
best total reward =  [142.95774647887595, 122.22222222222359, 146.79487179487492]
loss = [1.3468292951583862, 1.1730495691299438, 1.1583112478256226, 1.0347445011138916, 1.050458312034607, 1.0981481075286865, 0.8614773750305176, 0.9322991967201233, 0.8169112801551819, 0.8394115567207336]
iteration 164
epsilon=0.35

Copying file://car-racing-v2-165-model06.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 40.2 MiB/ 40.2 MiB] 100% Done                                    
Operation completed over 1 objects/40.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [142.95774647887595, 122.22222222222359, 146.79487179487492]
loss = [1.552110195159912, 1.4846656322479248, 1.3074591159820557, 1.2654331922531128, 1.2642605304718018, 1.1741490364074707, 1.1423851251602173, 1.2645419836044312, 1.0964637994766235, 1.1280581951141357]
iteration 166
epsilon=0.35236916781202265, episode length=999, total rewards=37.54646840148572
Training the model...
experience length=4995
number of examples=2997
best total reward =  [142.95774647887595, 122.22222222222359, 146.79487179487492]
loss = [1.541200876235962, 1.2677712440490723, 1.1061948537826538, 1.1015833616256714, 1.0619280338287354, 0.9422764778137207, 0.9437036514282227, 0.9532618522644043, 0.9840382933616638, 0.8801000118255615]
iteration 167
epsilon=0.35084547613390243, episode length=919, total rewards=-0.04597701149501285
Training the model...
experience length=4995
number of examples=2917
best total reward =  [

Copying file://car-racing-v2-168-model06.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 40.2 MiB/ 40.2 MiB] 100% Done                                    
Operation completed over 1 objects/40.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [142.95774647887595, 122.22222222222359, 146.79487179487492]
