In [2]:
import gym
import numpy as np
import copy, random, os, subprocess, cv2
from tensorflow.keras import layers, models

  import imp
  'nearest': pil_image.NEAREST,
  'bilinear': pil_image.BILINEAR,
  'bicubic': pil_image.BICUBIC,
  if hasattr(pil_image, 'HAMMING'):
  if hasattr(pil_image, 'BOX'):
  if hasattr(pil_image, 'LANCZOS'):


In [3]:
MODEL_NAME = 'model05'
BUCKET = 'gs://etsuji-car-racing-v2'
os.environ['BUCKET'] = BUCKET
!gsutil mb -c regional -l us-west1 $BUCKET
!gsutil ls $BUCKET

Creating gs://etsuji-car-racing-v2/...
ServiceException: 409 A Cloud Storage bucket named 'etsuji-car-racing-v2' already exists. Try another name. Bucket names must be globally unique across all Google Cloud projects, including those outside of your organization.
gs://etsuji-car-racing-v2/model04/


In [4]:
# Base model
class QValue:
    def __init__(self):
        self.model = self.build_model()

    def build_model(self):
        cnn_input = layers.Input(shape=(48, 48, 3), name='cnn_input')
        cnn1 = layers.Conv2D(32, (5, 5), padding='same',
                         use_bias=True, activation='relu',
                        name='cnn1')(cnn_input)
        pool1 = layers.MaxPooling2D((2, 2), name='pool1')(cnn1)
        cnn2 = layers.Conv2D(64, (5, 5), padding='same',
                         use_bias=True, activation='relu',
                        name='cnn2')(pool1)
        pool2 = layers.MaxPooling2D((2, 2), name='pool2')(cnn2)

        cnn_flatten = layers.Flatten(name='flatten')(pool2)
        action_input = layers.Input(shape=(5,), name='action_input')
        combined = layers.concatenate([cnn_flatten, action_input], name='concat')
        hidden1 = layers.Dense(1024, activation='relu', name='dense1')(combined)
        hidden2 = layers.Dense(512, activation='relu', name='dense2')(hidden1)
        q_value = layers.Dense(1, name='output')(hidden2)

        model = models.Model(inputs=[cnn_input, action_input], outputs=q_value)
        model.compile(loss='mse')
        return model

    def get_action(self, state):
        states = []
        actions = []
        for a in range(5):
            states.append(np.array(state))
            action_onehot = np.zeros(5)
            action_onehot[a] = 1
            actions.append(action_onehot)
  
        q_values = self.model.predict([np.array(states), np.array(actions)])
        optimal_action = np.argmax(q_values)
        return optimal_action, q_values[optimal_action][0]

In [15]:
def join_frames(o0, o1, o2):
    gray_image0 = cv2.cvtColor(cv2.resize(o0, (48, 48)), cv2.COLOR_RGB2GRAY)
    gray_image1 = cv2.cvtColor(cv2.resize(o1, (48, 48)), cv2.COLOR_RGB2GRAY)
    gray_image2 = cv2.cvtColor(cv2.resize(o2, (48, 48)), cv2.COLOR_RGB2GRAY)
    
    return np.array(
        [gray_image0.transpose(),
         gray_image1.transpose(),
         gray_image2.transpose()]).transpose()

In [11]:
def get_episode(environ, q_value, epsilon):
    episode = []
    o0 = environ.reset()
    o1 = copy.deepcopy(o0)
    o2 = copy.deepcopy(o0)
    total_r = 0

    if epsilon > 0:
        keep_count = 3
    else:
        keep_count = 1

    c = 0
    while True:
        if c % keep_count == 0: # Get new action
            if np.random.random() < epsilon:
                a = np.random.randint(5)
            else:
                a, _ = q_value.get_action(join_frames(o0, o1, o2))
        c += 1
        o_new, r, done, inf = environ.step(a)                
        total_r += r

        # Terminate episode when total reward becomes negative
        if total_r < 0:
            done = 1

        if done:
            # Terminal state is to achieve more than 990 or get out of the field.
            if total_r > 990 or r < -99:
                episode.append((join_frames(o0, o1, o2), a, r, None))
            break
        else:
            episode.append((join_frames(o0, o1, o2), a, r, join_frames(o1, o2, o_new)))
        o0, o1, o2 = o1, o2, o_new

    print('epsilon={}, episode length={}, total rewards={}'.format(epsilon, len(episode), total_r))
    return episode, total_r

In [8]:
def train(environ, q_value, epsilon, checkpoint=0):
    if checkpoint > 0:
        filename = 'car-racing-v2-{}-{}.hd5'.format(checkpoint, MODEL_NAME)
        subprocess.run(['gsutil', 'cp', '{}/{}/{}'.format(BUCKET, MODEL_NAME, filename), './'])
        print('load model {}'.format(filename))
        q_value.model = models.load_model(filename)
        os.remove(filename)

    experience = []
    good_experience = []
    best_r = [-100, -100, -100]

    for n in range(checkpoint + 1, checkpoint + 1000):
        print('iteration {}'.format(n))

        total_len = 0
        if n % 3 == 0:
            print('Testing the current performance...')
            episode, total_r = get_episode(environ, q_value, epsilon=0)
            with open('result.txt', 'a') as f:
                f.write('{},{},{},{}\n'.format(n, epsilon, len(episode), total_r))
            filename = 'car-racing-v2-{}-{}.hd5'.format(n, MODEL_NAME)
            q_value.model.save(filename, save_format='h5')
            subprocess.run(['gsutil', '-m', 'cp',
                            '{}'.format(filename), '{}/{}/'.format(BUCKET, MODEL_NAME)])
            os.remove(filename)
            experience += episode
            total_len += len(episode)

        while total_len < 500:
            episode, total_r = get_episode(environ, q_value, epsilon)
            total_len += len(episode)
            experience += episode

            # Keep the top 3 episodes
            if total_r > min(best_r):
                best_r = best_r[1:] + [total_r]
                good_experience += episode
                if len(good_experience) > 999 * 3:
                    good_experience = good_experience[-999 * 3:]

            
        if len(experience) > 999 * 5: # remember last 5 episodes
            experience = experience[-999 * 5:]

        epsilon = (epsilon - 0.2) * 0.99 + 0.2

        print('Training the model...')
        # Use latest episode + past episodes (sampling) + top 3 episode (sampling)
        latest_experience = experience[-total_len:]
        past_experience = experience[:-total_len]
        examples = latest_experience + \
            random.sample(past_experience, min(len(past_experience), 999)) + \
            random.sample(good_experience, min(len(good_experience), 999))
        
        # Show some statistics
        print('experience length={}'.format(len(experience)))
        print('number of examples={}'.format(len(examples)))
        print('best total reward = ', best_r)
        np.random.shuffle(examples)
                        
        states, actions, labels = [], [], []
        for state, a, r, state_new in examples:
            states.append(np.array(state))

            action_onehot = np.zeros(5)
            action_onehot[a] = 1
            actions.append(action_onehot)
            
            if state_new is None:   # Terminal state
                q_new = 0
            else:
                _, q_new = q_value.get_action(state_new)
            labels.append(np.array(r + q_new))

        hist = q_value.model.fit(
            [np.array(states), np.array(actions)], np.array(labels),
            batch_size=50, epochs=10, verbose=0)
        print('loss = {}'.format(hist.history['loss']))

In [9]:
env = gym.make("CarRacing-v2", continuous=False)
q_value = QValue()
q_value.model.summary()

  "Initializing wrapper in old step API which returns one bool instead of two. It is recommended to set `new_step_api=True` to use new step API. This will be the default behaviour in future."
  "Initializing environment in old step API which returns one bool instead of two. It is recommended to set `new_step_api=True` to use new step API. This will be the default behaviour in future."


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 cnn_input (InputLayer)         [(None, 48, 48, 3)]  0           []                               
                                                                                                  
 cnn1 (Conv2D)                  (None, 48, 48, 32)   2432        ['cnn_input[0][0]']              
                                                                                                  
 pool1 (MaxPooling2D)           (None, 24, 24, 32)   0           ['cnn1[0][0]']                   
                                                                                                  
 cnn2 (Conv2D)                  (None, 24, 24, 64)   51264       ['pool1[0][0]']                  
                                                                                              

2022-07-31 04:28:35.903310: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-31 04:28:36.019379: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-31 04:28:36.020346: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-31 04:28:36.023118: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags

In [None]:
train(env, q_value, epsilon=1.0, checkpoint=0)

iteration 1
epsilon=1.0, episode length=388, total rewards=-0.030742049469842686
epsilon=1.0, episode length=103, total rewards=-0.019377162629740513
epsilon=1.0, episode length=500, total rewards=-0.09999999999991549
Training the model...
experience length=991
number of examples=1982
best total reward =  [-0.030742049469842686, -0.019377162629740513, -0.09999999999991549]


2022-07-31 04:32:17.516918: I tensorflow/stream_executor/cuda/cuda_dnn.cc:368] Loaded cuDNN version 8200


loss = [497956.53125, 73.32007598876953, 77.51648712158203, 92.5329818725586, 120.36689758300781, 170.72515869140625, 140.1612548828125, 72.56980895996094, 43.81856155395508, 40.56475830078125]
iteration 2
epsilon=0.992, episode length=190, total rewards=-0.08859315589349515
epsilon=0.992, episode length=325, total rewards=-0.02671009771977781
Training the model...
experience length=1506
number of examples=2505
best total reward =  [-0.09999999999991549, -0.08859315589349515, -0.02671009771977781]
loss = [21.975669860839844, 20.485933303833008, 17.40968132019043, 18.9661922454834, 14.760263442993164, 14.251534461975098, 11.81113052368164, 11.811808586120605, 10.329019546508789, 10.2379732131958]
iteration 3
Testing the current performance...
epsilon=0, episode length=71, total rewards=-0.05714285714284828


Copying file://car-racing-v2-3-model05.hd5 [Content-Type=application/octet-stream]...
\ [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


epsilon=0.9840800000000001, episode length=123, total rewards=-0.05432098765429427
epsilon=0.9840800000000001, episode length=203, total rewards=-0.06101694915252076
epsilon=0.9840800000000001, episode length=132, total rewards=-0.0986798679867793
Training the model...
experience length=2035
number of examples=2527
best total reward =  [-0.02671009771977781, -0.05432098765429427, -0.06101694915252076]
loss = [7.227925777435303, 7.030678749084473, 6.519842624664307, 6.486823558807373, 6.387738227844238, 5.618881702423096, 5.925346374511719, 5.600024223327637, 5.260457992553711, 4.725531101226807]
iteration 4
epsilon=0.9762392000000002, episode length=147, total rewards=-0.09411764705880343
epsilon=0.9762392000000002, episode length=176, total rewards=-0.09436619718307004
epsilon=0.9762392000000002, episode length=247, total rewards=-0.06501766784446716
Training the model...
experience length=2605
number of examples=2568
best total reward =  [-0.02671009771977781, -0.05432098765429427, -

Copying file://car-racing-v2-6-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


epsilon=0.9607920399200003, episode length=272, total rewards=-0.0626459143968241
epsilon=0.9607920399200003, episode length=251, total rewards=-0.02014388489201943
Training the model...
experience length=3782
number of examples=2588
best total reward =  [-0.04913294797687426, -0.0024896265559901376, -0.02014388489201943]
loss = [2.2289304733276367, 2.0383129119873047, 4.150683403015137, 2.642827272415161, 2.4622960090637207, 2.098698377609253, 2.0010030269622803, 2.07883620262146, 1.789202094078064, 1.8957163095474243]
iteration 7
epsilon=0.9531841195208004, episode length=179, total rewards=-0.014388489208586125
epsilon=0.9531841195208004, episode length=349, total rewards=-0.034965034964938385
Training the model...
experience length=4310
number of examples=2526
best total reward =  [-0.0024896265559901376, -0.02014388489201943, -0.014388489208586125]
loss = [2.349485397338867, 1.7809693813323975, 1.8405282497406006, 1.6899019479751587, 1.695328712463379, 1.595164179801941, 1.7519478

Copying file://car-racing-v2-9-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


epsilon=0.9381957555423366, episode length=217, total rewards=-0.060869565217330335
epsilon=0.9381957555423366, episode length=861, total rewards=-0.057677902621463106
Training the model...
experience length=4995
number of examples=3139
best total reward =  [-0.02014388489201943, -0.014388489208586125, -0.011111111111096167]
loss = [1.5311943292617798, 1.3956103324890137, 1.5761418342590332, 1.4862141609191895, 1.3325836658477783, 1.3423850536346436, 1.4518455266952515, 1.1571041345596313, 1.1944279670715332, 1.2500207424163818]
iteration 10
epsilon=0.9308137979869133, episode length=393, total rewards=-0.029921259842400988
epsilon=0.9308137979869133, episode length=95, total rewards=-0.0761904761904646
epsilon=0.9308137979869133, episode length=401, total rewards=-0.05401459854001911
Training the model...
experience length=4995
number of examples=2887
best total reward =  [-0.02014388489201943, -0.014388489208586125, -0.011111111111096167]
loss = [1.4084522724151611, 1.386745572090149

Copying file://car-racing-v2-12-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


epsilon=0.9162706034069739, episode length=92, total rewards=-0.09754601226993587
epsilon=0.9162706034069739, episode length=216, total rewards=-0.039350180505358184
Training the model...
experience length=4995
number of examples=2661
best total reward =  [-0.014388489208586125, -0.011111111111096167, -0.00959409594083846]
loss = [3.5219626426696777, 3.640045166015625, 3.338906764984131, 3.5557608604431152, 3.4159698486328125, 3.273927688598633, 3.0187182426452637, 3.2236390113830566, 2.755357027053833, 2.8076605796813965]
iteration 13
epsilon=0.9091078973729041, episode length=209, total rewards=-0.0419161676646371
epsilon=0.9091078973729041, episode length=142, total rewards=-0.014285714285690448
epsilon=0.9091078973729041, episode length=487, total rewards=-0.019512195121794224
Training the model...
experience length=4995
number of examples=2836
best total reward =  [-0.011111111111096167, -0.00959409594083846, -0.014285714285690448]
loss = [1.0836595296859741, 1.1006426811218262, 1

Copying file://car-racing-v2-15-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


epsilon=0.8949966502151834, episode length=205, total rewards=-0.05205479452051365
Training the model...
experience length=4995
number of examples=2556
best total reward =  [-0.011111111111096167, -0.00959409594083846, -0.014285714285690448]
loss = [3.3432400226593018, 3.2490830421447754, 3.1052892208099365, 3.021732807159424, 2.8177032470703125, 2.9533467292785645, 2.4820895195007324, 2.6450557708740234, 2.2355740070343018, 2.151355266571045]
iteration 16
epsilon=0.8880466837130316, episode length=999, total rewards=3.333333333333576
Training the model...
experience length=4995
number of examples=2997
best total reward =  [-0.00959409594083846, -0.014285714285690448, 3.333333333333576]
loss = [2.2911741733551025, 1.3311773538589478, 1.4010298252105713, 1.1001911163330078, 0.8507466316223145, 1.079924464225769, 0.8647130727767944, 0.8291235566139221, 1.0499423742294312, 0.8274676203727722]
iteration 17
epsilon=0.8811662168759014, episode length=361, total rewards=-0.015789473684113292


Copying file://car-racing-v2-18-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


epsilon=0.8743545547071423, episode length=253, total rewards=-0.0031746031745331915
Training the model...
experience length=4995
number of examples=2606
best total reward =  [-0.014285714285690448, 3.333333333333576, -0.0031746031745331915]
loss = [3.8154795169830322, 1.1525673866271973, 1.0234028100967407, 1.3634904623031616, 1.0095628499984741, 0.8586108088493347, 1.0233110189437866, 0.9764636754989624, 0.943540632724762, 0.8475664854049683]
iteration 19
epsilon=0.8676110091600708, episode length=777, total rewards=-0.06148409893968432
Training the model...
experience length=4995
number of examples=2775
best total reward =  [-0.014285714285690448, 3.333333333333576, -0.0031746031745331915]
loss = [0.7608402967453003, 0.6898556351661682, 0.6981018781661987, 0.6045088768005371, 0.78958660364151, 0.6534606218338013, 0.7417399287223816, 0.6071124076843262, 0.7928460836410522, 0.5897749066352844]
iteration 20
epsilon=0.8609348990684702, episode length=729, total rewards=-0.08333333333348

Copying file://car-racing-v2-21-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


epsilon=0.8543255500777855, episode length=664, total rewards=-0.05481727574736728
Training the model...
experience length=4995
number of examples=2918
best total reward =  [-0.014285714285690448, 3.333333333333576, -0.0031746031745331915]
loss = [3.498852491378784, 3.061861515045166, 2.808983325958252, 2.6224515438079834, 2.5817816257476807, 2.3466579914093018, 2.382550001144409, 1.8228368759155273, 1.78472900390625, 1.3287131786346436]
iteration 22
epsilon=0.8477822945770077, episode length=601, total rewards=-0.07341772151927839
Training the model...
experience length=4995
number of examples=2599
best total reward =  [-0.014285714285690448, 3.333333333333576, -0.0031746031745331915]
loss = [0.5700555443763733, 0.5158267617225647, 0.5279706716537476, 0.4570757746696472, 0.4953783452510834, 0.47050103545188904, 0.4821925163269043, 0.47463035583496094, 0.4964725077152252, 0.4764481782913208]
iteration 23
epsilon=0.8413044716312377, episode length=555, total rewards=-0.04444444444453774

Copying file://car-racing-v2-24-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


epsilon=0.8348914269149252, episode length=196, total rewards=-0.09215686274507662
epsilon=0.8348914269149252, episode length=999, total rewards=17.64705882352842
Training the model...
experience length=4995
number of examples=3445
best total reward =  [3.333333333333576, -0.0031746031745331915, 17.64705882352842]
loss = [2.0893714427948, 1.836175560951233, 1.653775691986084, 1.110142707824707, 1.5690406560897827, 1.2177097797393799, 1.2272710800170898, 1.1527700424194336, 1.0528391599655151, 1.181265950202942]
iteration 25
epsilon=0.8285425126457759, episode length=903, total rewards=-0.07741935483879847
Training the model...
experience length=4995
number of examples=2901
best total reward =  [3.333333333333576, -0.0031746031745331915, 17.64705882352842]
loss = [0.5569828748703003, 0.55074143409729, 0.5211448669433594, 0.5547581315040588, 0.5651679635047913, 0.5194813013076782, 0.5576494336128235, 0.49884557723999023, 0.5587831139564514, 0.523740828037262]
iteration 26
epsilon=0.82225

Copying file://car-racing-v2-27-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


epsilon=0.8160345166441252, episode length=449, total rewards=-0.017301038062172952
Training the model...
experience length=4995
number of examples=2801
best total reward =  [3.333333333333576, -0.0031746031745331915, 17.64705882352842]
loss = [1.2828444242477417, 0.5553992986679077, 0.8409850597381592, 0.9471083879470825, 0.6410599946975708, 0.9679919481277466, 0.686739981174469, 0.9326237440109253, 0.8590971231460571, 0.6780830025672913]
iteration 28
epsilon=0.809874171477684, episode length=374, total rewards=-0.08503401360536267
epsilon=0.809874171477684, episode length=699, total rewards=-0.06993006992983486
Training the model...
experience length=4995
number of examples=3071
best total reward =  [3.333333333333576, -0.0031746031745331915, 17.64705882352842]
loss = [0.4929082691669464, 0.45262351632118225, 0.4994295835494995, 0.4733004570007324, 0.4631505608558655, 0.46609950065612793, 0.4338272511959076, 0.5610145926475525, 0.4225981533527374, 0.4856003522872925]
iteration 29
eps

Copying file://car-racing-v2-30-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


epsilon=0.7977376754652781, episode length=130, total rewards=-0.028104575163371254
epsilon=0.7977376754652781, episode length=363, total rewards=-0.09636963696360579
Training the model...
experience length=4995
number of examples=2846
best total reward =  [3.333333333333576, -0.0031746031745331915, 17.64705882352842]
loss = [3.324721097946167, 1.6101053953170776, 1.581192135810852, 1.5835926532745361, 0.5496014952659607, 0.6603431105613708, 0.7827950716018677, 0.5346421003341675, 0.7206186056137085, 0.588638424873352]
iteration 31
epsilon=0.7917602987106254, episode length=483, total rewards=-0.012903225806327273
epsilon=0.7917602987106254, episode length=761, total rewards=-0.07543252595170924
Training the model...
experience length=4995
number of examples=3242
best total reward =  [3.333333333333576, -0.0031746031745331915, 17.64705882352842]
loss = [2.1590194702148438, 0.953299343585968, 1.1209083795547485, 0.5729138851165771, 0.5765265822410583, 0.7249730825424194, 0.6315956115722

Copying file://car-racing-v2-33-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


epsilon=0.7799842687662841, episode length=186, total rewards=-0.06645962732915778
epsilon=0.7799842687662841, episode length=372, total rewards=-0.011864406779595188
Training the model...
experience length=4995
number of examples=2799
best total reward =  [3.333333333333576, -0.0031746031745331915, 17.64705882352842]
loss = [1.106715202331543, 1.5159608125686646, 1.128113031387329, 0.9893078804016113, 0.7380363345146179, 1.000219702720642, 0.8274413347244263, 0.6331974864006042, 0.9074938893318176, 0.5788218975067139]
iteration 34
epsilon=0.7741844260786213, episode length=818, total rewards=-0.04946619217120338
Training the model...
experience length=4995
number of examples=2816
best total reward =  [3.333333333333576, -0.0031746031745331915, 17.64705882352842]
loss = [1.0009433031082153, 0.46019288897514343, 0.47809189558029175, 0.7262386679649353, 0.4608810245990753, 0.7272069454193115, 0.46138399839401245, 0.6038469076156616, 0.5989439487457275, 0.5241782665252686]
iteration 35
ep

Copying file://car-racing-v2-36-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


epsilon=0.7627581559996568, episode length=529, total rewards=-0.019867549668988377
Training the model...
experience length=4995
number of examples=2789
best total reward =  [3.333333333333576, -0.0031746031745331915, 17.64705882352842]
loss = [2.052180767059326, 1.1438207626342773, 1.1189733743667603, 1.2974244356155396, 1.0722784996032715, 0.9953585863113403, 0.9373793005943298, 1.0073274374008179, 0.9171675443649292, 0.9196226000785828]
iteration 37
epsilon=0.7571305744396604, episode length=655, total rewards=-0.026229508196537327
Training the model...
experience length=4995
number of examples=2653
best total reward =  [3.333333333333576, -0.0031746031745331915, 17.64705882352842]
loss = [0.9423685669898987, 0.6156235337257385, 0.919951856136322, 0.5402907729148865, 0.8183456659317017, 0.5875948667526245, 0.6755968332290649, 0.66484135389328, 0.7215917110443115, 0.5538152456283569]
iteration 38
epsilon=0.7515592686952639, episode length=501, total rewards=-0.032775919732317166
Trai

Copying file://car-racing-v2-39-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


epsilon=0.7460436760083113, episode length=666, total rewards=-0.03333333333378727
Training the model...
experience length=4995
number of examples=3018
best total reward =  [3.333333333333576, -0.0031746031745331915, 17.64705882352842]
loss = [1.203965425491333, 1.3404018878936768, 1.0889496803283691, 0.6857919096946716, 0.9323981404304504, 1.0184180736541748, 0.7164741158485413, 0.6458435654640198, 1.1103367805480957, 0.7898804545402527]
iteration 40
epsilon=0.7405832392482283, episode length=736, total rewards=-0.015789473684877126
Training the model...
experience length=4995
number of examples=2734
best total reward =  [3.333333333333576, -0.0031746031745331915, 17.64705882352842]
loss = [0.8567829728126526, 0.6283351182937622, 0.62548828125, 0.5876368284225464, 0.5494902729988098, 0.4896707236766815, 0.5108447074890137, 0.48688775300979614, 0.8548729419708252, 0.778900682926178]
iteration 41
epsilon=0.7351774068557462, episode length=684, total rewards=-0.09609120521152659
Training

Copying file://car-racing-v2-42-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


epsilon=0.7298256327871888, episode length=390, total rewards=-0.012052117263739975
Training the model...
experience length=4995
number of examples=2744
best total reward =  [3.333333333333576, -0.0031746031745331915, 17.64705882352842]
loss = [3.5851495265960693, 1.2891441583633423, 0.9889609813690186, 1.2554634809494019, 0.8458636403083801, 1.051673412322998, 1.6923084259033203, 0.7302728891372681, 1.3783308267593384, 1.4622836112976074]
iteration 43
epsilon=0.724527376459317, episode length=633, total rewards=-0.06666666666652543
Training the model...
experience length=4995
number of examples=2631
best total reward =  [3.333333333333576, -0.0031746031745331915, 17.64705882352842]
loss = [1.3099040985107422, 0.8268033266067505, 0.857232391834259, 1.19942045211792, 0.470805287361145, 0.5120420455932617, 0.9131293892860413, 0.5395517349243164, 0.6112053990364075, 0.6905163526535034]
iteration 44
epsilon=0.7192821026947238, episode length=772, total rewards=-0.07992277992350805
Training

Copying file://car-racing-v2-45-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


epsilon=0.7140892816677766, episode length=285, total rewards=-0.028571428571424223
Training the model...
experience length=4995
number of examples=2637
best total reward =  [3.333333333333576, -0.0031746031745331915, 17.64705882352842]
loss = [2.282278537750244, 1.7183496952056885, 1.3603235483169556, 1.3728967905044556, 1.432475209236145, 1.0070205926895142, 1.0239571332931519, 1.0507137775421143, 1.1516783237457275, 0.7539210915565491]
iteration 46
epsilon=0.708948388851099, episode length=494, total rewards=-0.030035335689182235
epsilon=0.708948388851099, episode length=793, total rewards=-0.03492063492067851
Training the model...
experience length=4995
number of examples=3285
best total reward =  [3.333333333333576, -0.0031746031745331915, 17.64705882352842]
loss = [1.1824954748153687, 0.820344090461731, 0.6116275787353516, 0.5438768863677979, 0.9391489624977112, 0.6724061369895935, 0.7729023694992065, 0.6326490044593811, 0.6505194902420044, 0.5584200620651245]
iteration 47
epsilo

Copying file://car-racing-v2-48-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


epsilon=0.6988203159129622, episode length=728, total rewards=-0.05231788079446287
Training the model...
experience length=4995
number of examples=3081
best total reward =  [3.333333333333576, -0.0031746031745331915, 17.64705882352842]
loss = [0.9222967028617859, 0.7779058814048767, 0.6868749260902405, 0.6489223837852478, 0.6236778497695923, 0.6372956037521362, 0.5806983709335327, 0.6552021503448486, 0.5813583731651306, 0.5542186498641968]
iteration 49
epsilon=0.6938321127538326, episode length=167, total rewards=-0.02147651006708637
epsilon=0.6938321127538326, episode length=606, total rewards=-0.006358381502683502
Training the model...
experience length=4995
number of examples=2771
best total reward =  [3.333333333333576, -0.0031746031745331915, 17.64705882352842]
loss = [0.7729648351669312, 0.6282298564910889, 0.5026937127113342, 0.5311183333396912, 0.5799582600593567, 0.5058860778808594, 0.5049196481704712, 0.5277943015098572, 0.47475188970565796, 0.3911004662513733]
iteration 50
e

Copying file://car-racing-v2-51-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


epsilon=0.6840048537100314, episode length=519, total rewards=-0.05194805194838037
Training the model...
experience length=4995
number of examples=2768
best total reward =  [3.333333333333576, -0.0031746031745331915, 17.64705882352842]
loss = [2.413257598876953, 1.6421233415603638, 1.4514409303665161, 1.3834761381149292, 1.2604293823242188, 1.1966006755828857, 1.0661933422088623, 1.2724143266677856, 1.0104107856750488, 1.0220869779586792]
iteration 52
epsilon=0.6791648051729311, episode length=820, total rewards=-0.010447761194570776
Training the model...
experience length=4995
number of examples=2818
best total reward =  [3.333333333333576, -0.0031746031745331915, 17.64705882352842]
loss = [1.1072735786437988, 1.0421606302261353, 0.9051010608673096, 0.8380135893821716, 0.7370418906211853, 0.8292121291160583, 0.6405931711196899, 0.867093563079834, 0.8648719787597656, 0.8486160039901733]
iteration 53
epsilon=0.6743731571212017, episode length=624, total rewards=-3.030631301470521e-13
Tr

Copying file://car-racing-v2-54-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


epsilon=0.6696294255499897, episode length=688, total rewards=-0.05942028985559955
Training the model...
experience length=4995
number of examples=2953
best total reward =  [-0.0031746031745331915, 17.64705882352842, -3.030631301470521e-13]
loss = [1.8008753061294556, 1.5165185928344727, 1.2620971202850342, 1.1646499633789062, 1.2383530139923096, 0.9206383228302002, 1.0276527404785156, 0.8763939142227173, 0.8529763221740723, 0.924213707447052]
iteration 55
epsilon=0.6649331312944898, episode length=953, total rewards=-0.005263157895606357
Training the model...
experience length=4995
number of examples=2951
best total reward =  [-0.0031746031745331915, 17.64705882352842, -3.030631301470521e-13]
loss = [1.1728489398956299, 0.7753567099571228, 0.6772794127464294, 0.7325641512870789, 0.7908435463905334, 0.6780239343643188, 0.5996738076210022, 0.766233503818512, 0.6808677315711975, 0.7692837119102478]
iteration 56
epsilon=0.660283799981545, episode length=827, total rewards=-0.0932330827075

Copying file://car-racing-v2-57-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


epsilon=0.6556809619817294, episode length=685, total rewards=-0.051612903226421425
Training the model...
experience length=4995
number of examples=2939
best total reward =  [-0.0031746031745331915, 17.64705882352842, -3.030631301470521e-13]
loss = [1.4431109428405762, 0.9853038787841797, 0.9201428294181824, 1.0914009809494019, 0.8256886601448059, 0.9125093817710876, 0.9255020618438721, 1.000316858291626, 0.8312716484069824, 0.8740122318267822]
iteration 58
epsilon=0.6511241523619121, episode length=558, total rewards=-0.01764705882335635
Training the model...
experience length=4995
number of examples=2556
best total reward =  [-0.0031746031745331915, 17.64705882352842, -3.030631301470521e-13]
loss = [0.7061001062393188, 0.901468813419342, 0.7041938900947571, 0.6896976232528687, 0.6382238268852234, 0.6195440888404846, 0.6651093363761902, 0.6369986534118652, 0.7288380265235901, 0.5194176435470581]
iteration 59
epsilon=0.646612910838293, episode length=466, total rewards=-0.0333333333334

Copying file://car-racing-v2-60-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


epsilon=0.64214678172991, episode length=660, total rewards=-0.033933933934400445
Training the model...
experience length=4995
number of examples=3013
best total reward =  [-0.0031746031745331915, 17.64705882352842, -3.030631301470521e-13]
loss = [1.3153389692306519, 1.004672646522522, 0.9659603834152222, 0.8369982242584229, 0.7605209946632385, 0.9953383803367615, 0.6779316067695618, 0.6969565153121948, 0.8652116656303406, 0.6838048100471497]
iteration 61
epsilon=0.6377253139126109, episode length=699, total rewards=-0.06993006993059336
Training the model...
experience length=4995
number of examples=2697
best total reward =  [-0.0031746031745331915, 17.64705882352842, -3.030631301470521e-13]
loss = [0.6274849772453308, 0.6472163200378418, 0.5637319087982178, 0.5875768065452576, 0.5523058772087097, 0.599419891834259, 0.501526415348053, 0.5713607668876648, 0.4921880066394806, 0.5513977408409119]
iteration 62
epsilon=0.6333480607734847, episode length=864, total rewards=-0.033834586467024

Copying file://car-racing-v2-63-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


epsilon=0.6290145801657498, episode length=942, total rewards=-0.037704918033150764
Training the model...
experience length=4995
number of examples=3298
best total reward =  [-0.0031746031745331915, 17.64705882352842, -3.030631301470521e-13]
loss = [1.357857584953308, 1.0291118621826172, 0.9143283367156982, 0.8102661371231079, 1.0333223342895508, 0.6959748864173889, 0.9062903523445129, 0.7842885851860046, 0.7842459678649902, 0.8756207227706909]
iteration 64
epsilon=0.6247244343640923, episode length=546, total rewards=-0.09249146757692484
Training the model...
experience length=4995
number of examples=2544
best total reward =  [-0.0031746031745331915, 17.64705882352842, -3.030631301470521e-13]
loss = [0.8364916443824768, 0.7968882322311401, 0.7606201767921448, 0.6576167941093445, 0.6669857501983643, 0.6550719738006592, 0.7007762789726257, 0.7172747254371643, 0.6181891560554504, 0.6612228155136108]
iteration 65
epsilon=0.6204771900204513, episode length=729, total rewards=-0.00729927007

Copying file://car-racing-v2-66-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


epsilon=0.6162724181202468, episode length=359, total rewards=-0.02877697841730617
Training the model...
experience length=4995
number of examples=2788
best total reward =  [-0.0031746031745331915, 17.64705882352842, -3.030631301470521e-13]
loss = [2.36440110206604, 1.6316767930984497, 1.5366228818893433, 1.118787407875061, 0.9730510115623474, 1.2869445085525513, 1.272790789604187, 1.2634072303771973, 1.0234261751174927, 0.9538617730140686]
iteration 67
epsilon=0.6121096939390444, episode length=999, total rewards=41.89189189189365
Training the model...
experience length=4995
number of examples=2997
best total reward =  [17.64705882352842, -3.030631301470521e-13, 41.89189189189365]
loss = [1.7540923357009888, 1.3141512870788574, 1.4048393964767456, 1.155868411064148, 1.086592197418213, 1.0247374773025513, 0.9340475797653198, 1.0346732139587402, 0.9885275363922119, 0.8506898283958435]
iteration 68
epsilon=0.6079885969996539, episode length=705, total rewards=-0.08717948717980417
Trainin

Copying file://car-racing-v2-69-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


epsilon=0.6039087110296574, episode length=414, total rewards=-0.09872611464973713
Training the model...
experience length=4995
number of examples=2677
best total reward =  [17.64705882352842, -3.030631301470521e-13, 41.89189189189365]
loss = [2.9328453540802, 2.1211721897125244, 1.4519202709197998, 1.353356957435608, 1.2018076181411743, 1.3872435092926025, 1.2926874160766602, 1.1416856050491333, 1.0525473356246948, 1.0038280487060547]
iteration 70
epsilon=0.5998696239193608, episode length=513, total rewards=-0.030136986301625174
Training the model...
experience length=4995
number of examples=2511
best total reward =  [17.64705882352842, -3.030631301470521e-13, 41.89189189189365]
loss = [1.1513715982437134, 0.9982993602752686, 0.928219735622406, 0.8962406516075134, 0.8869704604148865, 0.6542935371398926, 0.7734091877937317, 0.7130682468414307, 0.6641874313354492, 0.7075362205505371]
iteration 71
epsilon=0.5958709276801673, episode length=516, total rewards=-0.03948339483415461
Trainin

Copying file://car-racing-v2-72-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2714
best total reward =  [17.64705882352842, -3.030631301470521e-13, 41.89189189189365]
loss = [2.104163408279419, 1.7689769268035889, 1.3555935621261597, 1.423176884651184, 1.328313946723938, 1.2288455963134766, 1.243077278137207, 1.106361985206604, 1.1354289054870605, 1.2675607204437256]
iteration 73
epsilon=0.5879930962193319, episode length=415, total rewards=-0.09056603773605443
epsilon=0.5879930962193319, episode length=851, total rewards=-0.09361702127697069
Training the model...
experience length=4995
number of examples=3264
best total reward =  [17.64705882352842, -3.030631301470521e-13, 41.89189189189365]
loss = [1.7782223224639893, 1.4496519565582275, 1.197866439819336, 1.1099380254745483, 0.9288415908813477, 1.1519393920898438, 1.0499207973480225, 0.8738952279090881, 0.9343211650848389, 0.6870802044868469]
iteration 74
epsilon=0.5841131652571386, episode length=698, total rewards=-0.04705882352992427
Training 

Copying file://car-racing-v2-75-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


epsilon=0.5802720336045673, episode length=869, total rewards=-0.043478260869897783
Training the model...
experience length=4995
number of examples=3239
best total reward =  [17.64705882352842, -3.030631301470521e-13, 41.89189189189365]
loss = [1.8665882349014282, 1.7633237838745117, 1.4061888456344604, 1.154890775680542, 1.485243320465088, 1.111777663230896, 1.2312378883361816, 1.0365945100784302, 1.310936450958252, 1.1688286066055298]
iteration 76
epsilon=0.5764693132685216, episode length=999, total rewards=5.072463768115721
Training the model...
experience length=4995
number of examples=2997
best total reward =  [-3.030631301470521e-13, 41.89189189189365, 5.072463768115721]
loss = [1.2337592840194702, 0.8754615783691406, 0.8127235174179077, 0.8055478930473328, 0.7102243900299072, 0.6862766146659851, 0.7283341288566589, 0.7856983542442322, 0.5601568222045898, 0.6459941267967224]
iteration 77
epsilon=0.5727046201358363, episode length=999, total rewards=1.5037593984952315
Training th

Copying file://car-racing-v2-78-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


epsilon=0.568977573934478, episode length=698, total rewards=-0.0470588235299598
Training the model...
experience length=4995
number of examples=3107
best total reward =  [41.89189189189365, 5.072463768115721, 1.5037593984952315]
loss = [2.9470980167388916, 2.4280292987823486, 1.9776955842971802, 1.8173413276672363, 1.6572802066802979, 1.396962285041809, 1.4268356561660767, 1.2478773593902588, 1.148766040802002, 1.0445984601974487]
iteration 79
epsilon=0.5652877981951332, episode length=701, total rewards=-0.024561403509211405
Training the model...
experience length=4995
number of examples=2699
best total reward =  [41.89189189189365, 5.072463768115721, 1.5037593984952315]
loss = [1.4021410942077637, 1.2901729345321655, 1.0496515035629272, 1.22639799118042, 0.9184755086898804, 0.9132993221282959, 0.9421916604042053, 0.8074791431427002, 0.8354675769805908, 0.7869747877120972]
iteration 80
epsilon=0.5616349202131818, episode length=600, total rewards=-0.0293286219084454
Training the mode

Copying file://car-racing-v2-81-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


epsilon=0.55801857101105, episode length=365, total rewards=-0.05514950166116006
Training the model...
experience length=4995
number of examples=2822
best total reward =  [41.89189189189365, 5.072463768115721, 1.5037593984952315]
loss = [2.983386754989624, 2.0824921131134033, 1.8411246538162231, 1.612134575843811, 1.6710290908813477, 1.15904700756073, 1.3630141019821167, 1.1173675060272217, 1.0236567258834839, 1.1933257579803467]
iteration 82
epsilon=0.5544383853009396, episode length=684, total rewards=-0.09609120521227621
Training the model...
experience length=4995
number of examples=2682
best total reward =  [41.89189189189365, 5.072463768115721, 1.5037593984952315]
loss = [2.3328142166137695, 1.3390486240386963, 1.2999790906906128, 0.9765086770057678, 0.9073173403739929, 0.8747922778129578, 0.8502081036567688, 0.7949488162994385, 0.7880374193191528, 0.6792415976524353]
iteration 83
epsilon=0.5508940014479302, episode length=999, total rewards=24.08759124087468
Training the model..

Copying file://car-racing-v2-84-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [5.072463768115721, 1.5037593984952315, 24.08759124087468]
loss = [1.1857795715332031, 1.0014183521270752, 0.9478272199630737, 0.8002886772155762, 0.7952078580856323, 0.7609488368034363, 0.6593554019927979, 0.6956826448440552, 0.6520251631736755, 0.605766773223877]
iteration 85
epsilon=0.5439112108191164, episode length=788, total rewards=-0.046953405018642175
Training the model...
experience length=4995
number of examples=2786
best total reward =  [5.072463768115721, 1.5037593984952315, 24.08759124087468]
loss = [1.070356011390686, 0.9284873604774475, 0.7632641196250916, 0.689586877822876, 0.7077963352203369, 0.5624104142189026, 0.5868628621101379, 0.4697819948196411, 0.5141298174858093, 0.5126610398292542]
iteration 86
epsilon=0.5404720987109253, episode length=999, total rewards=75.25773195876428
Training the model...
experience length=4995
number of examples=2997
best total reward =  [1.503759

Copying file://car-racing-v2-87-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [1.5037593984952315, 24.08759124087468, 75.25773195876428]
loss = [1.139431357383728, 0.9153953194618225, 0.784826397895813, 0.7377802729606628, 0.6538912653923035, 0.6121968030929565, 0.577833354473114, 0.5778941512107849, 0.533507227897644, 0.5311519503593445]
iteration 88
epsilon=0.5336967039465779, episode length=638, total rewards=-0.0702127659578704
Training the model...
experience length=4995
number of examples=2636
best total reward =  [1.5037593984952315, 24.08759124087468, 75.25773195876428]
loss = [0.9458646774291992, 0.7275800108909607, 0.6662336587905884, 0.5717129707336426, 0.5131760835647583, 0.47600749135017395, 0.45295384526252747, 0.44231364130973816, 0.4208388328552246, 0.40465235710144043]
iteration 89
epsilon=0.530359736907112, episode length=627, total rewards=-0.09372937293768158
Training the model...
experience length=4995
number of examples=2625
best total reward =  [1.503

Copying file://car-racing-v2-90-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2834
best total reward =  [1.5037593984952315, 24.08759124087468, 75.25773195876428]
loss = [1.030571460723877, 0.7508442401885986, 0.6996337175369263, 0.594640851020813, 0.5266038179397583, 0.48246464133262634, 0.5001101493835449, 0.45066016912460327, 0.4129554331302643, 0.40563371777534485]
iteration 91
epsilon=0.5237855781426605, episode length=999, total rewards=73.91304347826282
Training the model...
experience length=4995
number of examples=2997
best total reward =  [24.08759124087468, 75.25773195876428, 73.91304347826282]
loss = [1.1510365009307861, 0.8683539628982544, 0.709740161895752, 0.6369318962097168, 0.6118302345275879, 0.5639458298683167, 0.5279794931411743, 0.5061966180801392, 0.4687653183937073, 0.44851934909820557]
iteration 92
epsilon=0.5205477223612338, episode length=902, total rewards=-0.022222222223136273
Training the model...
experience length=4995
number of examples=2900
best total reward =  [24.08

Copying file://car-racing-v2-93-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [24.08759124087468, 75.25773195876428, 73.91304347826282]
loss = [1.0430362224578857, 0.7841914296150208, 0.6771115064620972, 0.6513165831565857, 0.5629785060882568, 0.5369572639465332, 0.48484814167022705, 0.4661869406700134, 0.3939433693885803, 0.4263576865196228]
iteration 94
epsilon=0.5141688226862453, episode length=999, total rewards=9.02255639097653
Training the model...
experience length=4995
number of examples=2997
best total reward =  [24.08759124087468, 75.25773195876428, 73.91304347826282]
loss = [0.9517137408256531, 0.7326025366783142, 0.6251583695411682, 0.5427402257919312, 0.44847798347473145, 0.4411737620830536, 0.41061297059059143, 0.38557910919189453, 0.3644402325153351, 0.3679525852203369]
iteration 95
epsilon=0.5110271344593829, episode length=673, total rewards=-0.05993265993316865
Training the model...
experience length=4995
number of examples=2671
best total reward =  [24.08

Copying file://car-racing-v2-96-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [24.08759124087468, 75.25773195876428, 73.91304347826282]
loss = [1.1478826999664307, 0.8075554370880127, 0.6544278860092163, 0.5930456519126892, 0.5266713500022888, 0.4669381380081177, 0.48426562547683716, 0.39825892448425293, 0.4049897789955139, 0.3835131824016571]
iteration 97
epsilon=0.5048376944836411, episode length=709, total rewards=-0.054054054054682554
Training the model...
experience length=4995
number of examples=2707
best total reward =  [24.08759124087468, 75.25773195876428, 73.91304347826282]
loss = [1.1285171508789062, 0.8113027215003967, 0.7423256635665894, 0.6160546541213989, 0.5734800100326538, 0.5450025200843811, 0.5313317775726318, 0.4868442714214325, 0.4560778737068176, 0.4137338101863861]
iteration 98
epsilon=0.5017893175388047, episode length=999, total rewards=9.803921568626425
Training the model...
experience length=4995
number of examples=2997
best total reward =  [24.08

Copying file://car-racing-v2-99-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2755
best total reward =  [24.08759124087468, 75.25773195876428, 73.91304347826282]
loss = [1.0190335512161255, 0.7394989728927612, 0.6281096935272217, 0.520791232585907, 0.48209598660469055, 0.45443734526634216, 0.4094417691230774, 0.4234924018383026, 0.3854541480541229, 0.3737197518348694]
iteration 100
epsilon=0.49578371011978256, episode length=613, total rewards=-0.050306748466696805
Training the model...
experience length=4995
number of examples=2611
best total reward =  [24.08759124087468, 75.25773195876428, 73.91304347826282]
loss = [1.0320736169815063, 0.6917626261711121, 0.6215695738792419, 0.5353121161460876, 0.44915297627449036, 0.383126437664032, 0.40804997086524963, 0.38721776008605957, 0.35270798206329346, 0.327851802110672]
iteration 101
epsilon=0.4928258730185847, episode length=659, total rewards=-0.0659340659345713
Training the model...
experience length=4995
number of examples=2657
best total reward =  

Copying file://car-racing-v2-102-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2637
best total reward =  [24.08759124087468, 75.25773195876428, 73.91304347826282]
loss = [6.639316082000732, 4.810122966766357, 4.6358866691589355, 4.121555328369141, 4.2683587074279785, 3.7927939891815186, 3.50598406791687, 3.1804959774017334, 3.6041171550750732, 2.9251856803894043]
iteration 103
epsilon=0.48699863814551486, episode length=613, total rewards=-0.050306748466632856
Training the model...
experience length=4995
number of examples=2611
best total reward =  [24.08759124087468, 75.25773195876428, 73.91304347826282]
loss = [1.060950756072998, 0.7047986388206482, 0.6382108330726624, 0.5335926413536072, 0.5773703455924988, 0.48359331488609314, 0.44051486253738403, 0.3894259035587311, 0.3936831057071686, 0.36227861046791077]
iteration 104
epsilon=0.4841286517640597, episode length=583, total rewards=-0.06666666666698373
Training the model...
experience length=4995
number of examples=2581
best total reward =  [24.0

Copying file://car-racing-v2-105-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [24.08759124087468, 75.25773195876428, 73.91304347826282]
loss = [1.1928399801254272, 0.8991645574569702, 0.7199198007583618, 0.6277778148651123, 0.6064481735229492, 0.5364620089530945, 0.540812611579895, 0.49658483266830444, 0.4575369358062744, 0.4233574867248535]
iteration 106
epsilon=0.47847449159395494, episode length=827, total rewards=-0.06618705036039896
Training the model...
experience length=4995
number of examples=2825
best total reward =  [24.08759124087468, 75.25773195876428, 73.91304347826282]
loss = [1.0868890285491943, 0.8676477074623108, 0.7054765820503235, 0.5999616384506226, 0.545459508895874, 0.48096367716789246, 0.5092123746871948, 0.41219305992126465, 0.430565744638443, 0.3592899739742279]
iteration 107
epsilon=0.4756897466780154, episode length=477, total rewards=-0.005882352941349839
epsilon=0.4756897466780154, episode length=719, total rewards=-0.08219178082257028
Training 

Copying file://car-racing-v2-108-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [24.08759124087468, 75.25773195876428, 73.91304347826282]
loss = [1.5076899528503418, 1.0656784772872925, 0.8947538137435913, 0.7998509407043457, 0.7143205404281616, 0.6665051579475403, 0.6264547109603882, 0.5920619368553162, 0.5514963865280151, 0.5058534145355225]
iteration 109
epsilon=0.47020352071912286, episode length=642, total rewards=-0.07981651376197729
Training the model...
experience length=4995
number of examples=2640
best total reward =  [24.08759124087468, 75.25773195876428, 73.91304347826282]
loss = [1.1804931163787842, 0.7681870460510254, 0.6372041702270508, 0.5436990261077881, 0.5184032320976257, 0.48588359355926514, 0.4974319040775299, 0.3891914486885071, 0.41445618867874146, 0.37808969616889954]
iteration 110
epsilon=0.4675014855119316, episode length=574, total rewards=-0.028735632184188037
Training the model...
experience length=4995
number of examples=2572
best total reward = 

Copying file://car-racing-v2-111-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2624
best total reward =  [24.08759124087468, 75.25773195876428, 73.91304347826282]
loss = [1.1341708898544312, 0.8174511194229126, 0.6725425720214844, 0.5715374946594238, 0.4883216619491577, 0.4898691475391388, 0.43475770950317383, 0.44127872586250305, 0.4064972698688507, 0.40870192646980286]
iteration 112
epsilon=0.4621782059502441, episode length=999, total rewards=152.6690391459114
Training the model...
experience length=4995
number of examples=2997
best total reward =  [75.25773195876428, 73.91304347826282, 152.6690391459114]
loss = [1.931598424911499, 1.3692060708999634, 1.1010397672653198, 0.951572835445404, 0.8644137978553772, 0.8270089626312256, 0.7258343696594238, 0.6455089449882507, 0.6446697115898132, 0.5882770419120789]
iteration 113
epsilon=0.4595564238907417, episode length=641, total rewards=-0.09743589743644887
Training the model...
experience length=4995
number of examples=2639
best total reward =  [75.25

Copying file://car-racing-v2-114-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2668
best total reward =  [75.25773195876428, 73.91304347826282, 152.6690391459114]
loss = [1.4268943071365356, 1.0561143159866333, 0.8206850290298462, 0.7634483575820923, 0.6331902742385864, 0.6356934905052185, 0.59759920835495, 0.5323843955993652, 0.5950927734375, 0.5131136178970337]
iteration 115
epsilon=0.4543912510553159, episode length=714, total rewards=-0.071428571429075
Training the model...
experience length=4995
number of examples=2712
best total reward =  [75.25773195876428, 73.91304347826282, 152.6690391459114]
loss = [1.3162355422973633, 0.9536028504371643, 0.7159766554832458, 0.6736997365951538, 0.601542592048645, 0.5650665163993835, 0.47706085443496704, 0.4812612235546112, 0.4102972149848938, 0.46052420139312744]
iteration 116
epsilon=0.4518473385447628, episode length=722, total rewards=-0.09783393501854332
Training the model...
experience length=4995
number of examples=2720
best total reward =  [75.257731

Copying file://car-racing-v2-117-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [75.25773195876428, 73.91304347826282, 152.6690391459114]
loss = [1.8076190948486328, 1.3515044450759888, 1.087653398513794, 0.909282922744751, 0.820083737373352, 0.7040430903434753, 0.654452919960022, 0.637204110622406, 0.5571933388710022, 0.5377086400985718]
iteration 118
epsilon=0.446835576507722, episode length=670, total rewards=-0.007348242811944433
Training the model...
experience length=4995
number of examples=2668
best total reward =  [75.25773195876428, 73.91304347826282, 152.6690391459114]
loss = [1.3569386005401611, 0.9533999562263489, 0.8488834500312805, 0.6981498003005981, 0.5888993740081787, 0.5594190359115601, 0.5133169293403625, 0.5122829079627991, 0.4363058805465698, 0.4389726221561432]
iteration 119
epsilon=0.4443672207426448, episode length=513, total rewards=-0.0166007905141978
Training the model...
experience length=4995
number of examples=2511
best total reward =  [75.257731

Copying file://car-racing-v2-120-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [75.25773195876428, 73.91304347826282, 152.6690391459114]
loss = [1.482116937637329, 1.031431794166565, 0.8275991082191467, 0.6927410960197449, 0.6419629454612732, 0.5548098087310791, 0.4936597943305969, 0.5530625581741333, 0.4221251308917999, 0.42635297775268555]
iteration 121
epsilon=0.43950431304986615, episode length=999, total rewards=9.374999999998783
Training the model...
experience length=4995
number of examples=2997
best total reward =  [75.25773195876428, 73.91304347826282, 152.6690391459114]
loss = [1.4258646965026855, 1.0352153778076172, 0.8325256705284119, 0.6535027027130127, 0.6235899925231934, 0.563169002532959, 0.49682125449180603, 0.4855974018573761, 0.44340842962265015, 0.45449772477149963]
iteration 122
epsilon=0.4371092699193675, episode length=999, total rewards=0.9771986970673213
Training the model...
experience length=4995
number of examples=2997
best total reward =  [75.257

Copying file://car-racing-v2-123-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2755
best total reward =  [75.25773195876428, 73.91304347826282, 152.6690391459114]
loss = [1.3810844421386719, 1.0531948804855347, 0.7926797866821289, 0.7133623361587524, 0.615498960018158, 0.5556087493896484, 0.541887104511261, 0.594642162322998, 0.4330329895019531, 0.43321937322616577]
iteration 124
epsilon=0.4323907954479721, episode length=627, total rewards=-0.0937293729377171
Training the model...
experience length=4995
number of examples=2625
best total reward =  [75.25773195876428, 73.91304347826282, 152.6690391459114]
loss = [1.2576857805252075, 0.8132943511009216, 0.6940861344337463, 0.607728123664856, 0.5757874250411987, 0.49041131138801575, 0.48288270831108093, 0.4478297531604767, 0.47367173433303833, 0.3899587094783783]
iteration 125
epsilon=0.43006688749349237, episode length=397, total rewards=-0.06490066225176716
epsilon=0.43006688749349237, episode length=816, total rewards=-0.0006535947715289037
Training

Copying file://car-racing-v2-126-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [75.25773195876428, 73.91304347826282, 152.6690391459114]
loss = [2.140963315963745, 1.573423147201538, 1.2478764057159424, 1.044353723526001, 0.9669548273086548, 0.8036229014396667, 0.7006946802139282, 0.7115846872329712, 0.6391270756721497, 0.5908447504043579]
iteration 127
epsilon=0.4254885564323719, episode length=285, total rewards=-0.02857142857134784
epsilon=0.4254885564323719, episode length=357, total rewards=-0.08571428571436823
Training the model...
experience length=4995
number of examples=2640
best total reward =  [75.25773195876428, 73.91304347826282, 152.6690391459114]
loss = [1.3546862602233887, 0.9200774431228638, 0.691228449344635, 0.6061144471168518, 0.5213446617126465, 0.4826376140117645, 0.4604887068271637, 0.45222556591033936, 0.4103226065635681, 0.3390599191188812]
iteration 128
epsilon=0.4232336708680482, episode length=345, total rewards=-0.008805031446443595
epsilon=0.423

Copying file://car-racing-v2-129-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [75.25773195876428, 73.91304347826282, 152.6690391459114]
loss = [2.1691322326660156, 1.474553108215332, 1.157832384109497, 1.0204238891601562, 0.8601197004318237, 0.751265823841095, 0.6771012544631958, 0.6547584533691406, 0.6106274127960205, 0.5798192024230957]
iteration 130
epsilon=0.41879132081777404, episode length=999, total rewards=25.82781456953513
Training the model...
experience length=4995
number of examples=2997
best total reward =  [75.25773195876428, 73.91304347826282, 152.6690391459114]
loss = [1.8375117778778076, 1.2505472898483276, 1.002120018005371, 0.869202196598053, 0.7792233228683472, 0.7140499949455261, 0.677060604095459, 0.5752940773963928, 0.5781600475311279, 0.5572290420532227]
iteration 131
epsilon=0.4166034076095963, episode length=999, total rewards=27.208480565369634
Training the model...
experience length=4995
number of examples=2997
best total reward =  [75.2577319587

Copying file://car-racing-v2-132-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2747
best total reward =  [75.25773195876428, 73.91304347826282, 152.6690391459114]
loss = [1.6263149976730347, 1.1309148073196411, 0.8566509485244751, 0.7691370248794556, 0.6544361710548401, 0.6206356883049011, 0.5990090370178223, 0.564493715763092, 0.5564634799957275, 0.47417983412742615]
iteration 133
epsilon=0.4122929997981653, episode length=939, total rewards=-0.04026845637664886
Training the model...
experience length=4995
number of examples=2937
best total reward =  [75.25773195876428, 73.91304347826282, 152.6690391459114]
loss = [1.3748220205307007, 1.002288579940796, 0.863208532333374, 0.7295663952827454, 0.6507718563079834, 0.597630500793457, 0.49672067165374756, 0.521742582321167, 0.4719234108924866, 0.4445982277393341]
iteration 134
epsilon=0.41017006980018367, episode length=836, total rewards=-0.07630662020971468
Training the model...
experience length=4995
number of examples=2834
best total reward =  [75.25

Copying file://car-racing-v2-135-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [75.25773195876428, 73.91304347826282, 152.6690391459114]
loss = [1.272135615348816, 0.9752482175827026, 0.7724495530128479, 0.7428632378578186, 0.6973618865013123, 0.6120471954345703, 0.5475561022758484, 0.5496432185173035, 0.5276427268981934, 0.4573637843132019]
iteration 136
epsilon=0.40598768541116, episode length=999, total rewards=29.41176470588225
Training the model...
experience length=4995
number of examples=2997
best total reward =  [75.25773195876428, 73.91304347826282, 152.6690391459114]
loss = [1.317747712135315, 1.0023869276046753, 0.8506779670715332, 0.7288777232170105, 0.6888731718063354, 0.6081501245498657, 0.5881330370903015, 0.5018438696861267, 0.5443405508995056, 0.5454287528991699]
iteration 137
epsilon=0.40392780855704835, episode length=683, total rewards=-0.07701863354093885
Training the model...
experience length=4995
number of examples=2681
best total reward =  [75.257731

Copying file://car-racing-v2-138-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2750
best total reward =  [75.25773195876428, 73.91304347826282, 152.6690391459114]
loss = [1.1603069305419922, 0.8031254410743713, 0.6992073655128479, 0.6040629148483276, 0.4686689078807831, 0.465989887714386, 0.44609034061431885, 0.4546511769294739, 0.3699447214603424, 0.46759411692619324]
iteration 139
epsilon=0.3998696451667631, episode length=592, total rewards=-0.08947368421099466
Training the model...
experience length=4995
number of examples=2590
best total reward =  [75.25773195876428, 73.91304347826282, 152.6690391459114]
loss = [1.154991626739502, 0.8089195489883423, 0.7501609921455383, 0.6157647967338562, 0.5231108069419861, 0.5213884711265564, 0.5021880269050598, 0.46617940068244934, 0.4412212669849396, 0.41120821237564087]
iteration 140
epsilon=0.39787094871509543, episode length=653, total rewards=-0.04052287581753797
Training the model...
experience length=4995
number of examples=2651
best total reward =  [

Copying file://car-racing-v2-141-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2727
best total reward =  [75.25773195876428, 73.91304347826282, 152.6690391459114]
loss = [1.5560064315795898, 1.0215868949890137, 0.8695216774940491, 0.7133852243423462, 0.6128798127174377, 0.6323032379150391, 0.5366612672805786, 0.5531967878341675, 0.4726918339729309, 0.45792990922927856]
iteration 142
epsilon=0.393933316835665, episode length=573, total rewards=-0.05232974910440302
Training the model...
experience length=4995
number of examples=2571
best total reward =  [75.25773195876428, 73.91304347826282, 152.6690391459114]
loss = [1.1544065475463867, 0.8548988103866577, 0.6864573359489441, 0.5811874866485596, 0.4989851713180542, 0.5216990113258362, 0.412926584482193, 0.40706416964530945, 0.41901031136512756, 0.405527800321579]
iteration 143
epsilon=0.39199398366730837, episode length=434, total rewards=-0.021739130434928117
epsilon=0.39199398366730837, episode length=607, total rewards=-0.00972644376949569
Training

Copying file://car-racing-v2-144-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [75.25773195876428, 73.91304347826282, 152.6690391459114]
loss = [1.2533504962921143, 0.8743680119514465, 0.7020566463470459, 0.622241199016571, 0.549788773059845, 0.5313137173652649, 0.4917841851711273, 0.49101153016090393, 0.45918339490890503, 0.4344579875469208]
iteration 145
epsilon=0.3881733033923289, episode length=999, total rewards=9.090909090909644
Training the model...
experience length=4995
number of examples=2997
best total reward =  [75.25773195876428, 73.91304347826282, 152.6690391459114]
loss = [1.5441209077835083, 1.0475990772247314, 0.9424029588699341, 0.7362011075019836, 0.6227644681930542, 0.6001958847045898, 0.502267599105835, 0.5385076403617859, 0.46386587619781494, 0.5216278433799744]
iteration 146
epsilon=0.38629157035840567, episode length=709, total rewards=-0.05405405405466834
Training the model...
experience length=4995
number of examples=2707
best total reward =  [75.25

Copying file://car-racing-v2-147-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2860
best total reward =  [75.25773195876428, 73.91304347826282, 152.6690391459114]
loss = [2.0746326446533203, 1.429121494293213, 1.1657214164733887, 1.0233227014541626, 0.8524881601333618, 0.7547666430473328, 0.6959413886070251, 0.6011554598808289, 0.5898230075836182, 0.5445877313613892]
iteration 148
epsilon=0.3825843681082734, episode length=999, total rewards=11.11111111111209
Training the model...
experience length=4995
number of examples=2997
best total reward =  [75.25773195876428, 73.91304347826282, 152.6690391459114]
loss = [1.8402435779571533, 1.1568657159805298, 0.9488109350204468, 0.8733465075492859, 0.6776414513587952, 0.6521179676055908, 0.5931077003479004, 0.5045831203460693, 0.5466151237487793, 0.5211262106895447]
iteration 149
epsilon=0.38075852442719066, episode length=919, total rewards=-0.045977011495140746
Training the model...
experience length=4995
number of examples=2917
best total reward =  [75.25

Copying file://car-racing-v2-150-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [75.25773195876428, 73.91304347826282, 152.6690391459114]
loss = [1.5243598222732544, 1.0361104011535645, 0.8799396753311157, 0.7113568782806396, 0.6305214166641235, 0.6360960602760315, 0.5697787404060364, 0.5284315347671509, 0.5135882496833801, 0.48115143179893494]
iteration 151
epsilon=0.3771614297910896, episode length=999, total rewards=91.72932330827456
Training the model...
experience length=4995
number of examples=2997
best total reward =  [73.91304347826282, 152.6690391459114, 91.72932330827456]
loss = [1.829496145248413, 1.2900629043579102, 1.2809841632843018, 0.9673483967781067, 0.8708583116531372, 0.7508760094642639, 0.6882330179214478, 0.6751083731651306, 0.6685245037078857, 0.5985960364341736]
iteration 152
epsilon=0.3753898154931787, episode length=746, total rewards=-0.024675324675926563
Training the model...
experience length=4995
number of examples=2744
best total reward =  [73.91

Copying file://car-racing-v2-153-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2553
best total reward =  [73.91304347826282, 152.6690391459114, 91.72932330827456]
loss = [1.4319441318511963, 1.0783714056015015, 0.8139504790306091, 0.6852425336837769, 0.6770604848861694, 0.5482031106948853, 0.49584248661994934, 0.5130893588066101, 0.6047360897064209, 0.3941842317581177]
iteration 154
epsilon=0.3718995581648644, episode length=239, total rewards=-0.027397260273930274
epsilon=0.3718995581648644, episode length=481, total rewards=-0.00722891566297898
Training the model...
experience length=4995
number of examples=2718
best total reward =  [73.91304347826282, 152.6690391459114, 91.72932330827456]
loss = [4.369564056396484, 2.89276123046875, 2.2896506786346436, 1.9774900674819946, 1.720872163772583, 1.4535927772521973, 1.337352991104126, 1.219805121421814, 1.2208874225616455, 1.056881070137024]
iteration 155
epsilon=0.37018056258321574, episode length=999, total rewards=31.75675675675796
Training the model

Copying file://car-racing-v2-156-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [73.91304347826282, 152.6690391459114, 91.72932330827456]
loss = [3.4782040119171143, 2.4764626026153564, 1.8724501132965088, 1.6062936782836914, 1.419724464416504, 1.2428101301193237, 1.0393720865249634, 0.9904287457466125, 0.9267544150352478, 0.8109138011932373]
iteration 157
epsilon=0.3667939693878097, episode length=990, total rewards=-0.02879256966044716
Training the model...
experience length=4995
number of examples=2988
best total reward =  [73.91304347826282, 152.6690391459114, 91.72932330827456]
loss = [2.4762158393859863, 1.790851354598999, 1.4134714603424072, 1.0810062885284424, 1.0084251165390015, 0.8406563997268677, 0.7745055556297302, 0.7201212644577026, 0.6613888144493103, 0.6322588324546814]
iteration 158
epsilon=0.3651260296939316, episode length=999, total rewards=69.23076923077046
Training the model...
experience length=4995
number of examples=2997
best total reward =  [73.91304

Copying file://car-racing-v2-159-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [73.91304347826282, 152.6690391459114, 91.72932330827456]
loss = [3.3635833263397217, 2.226428985595703, 1.716568112373352, 1.5229487419128418, 1.1983271837234497, 1.108383297920227, 1.0242289304733276, 0.8651761412620544, 0.8608453273773193, 0.8021299242973328]
iteration 160
epsilon=0.3618400217030224, episode length=799, total rewards=-7.329414852819127e-13
Training the model...
experience length=4995
number of examples=2797
best total reward =  [73.91304347826282, 152.6690391459114, 91.72932330827456]
loss = [2.421161651611328, 1.643596887588501, 1.312626838684082, 1.0486317873001099, 0.8697054386138916, 0.8375006914138794, 0.785038948059082, 0.759766161441803, 0.6743930578231812, 0.6848164796829224]
iteration 161
epsilon=0.3602216214859921, episode length=557, total rewards=-0.03791821561381534
Training the model...
experience length=4995
number of examples=2555
best total reward =  [73.913043

Copying file://car-racing-v2-162-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2815
best total reward =  [73.91304347826282, 152.6690391459114, 91.72932330827456]
loss = [1.7487295866012573, 1.2302321195602417, 0.9023054242134094, 0.8261368274688721, 0.7601382732391357, 0.6992889046669006, 0.6998132467269897, 0.6281905174255371, 0.5910331606864929, 0.5214632153511047]
iteration 163
epsilon=0.3570332112184209, episode length=780, total rewards=-0.03308550185945444
Training the model...
experience length=4995
number of examples=2778
best total reward =  [73.91304347826282, 152.6690391459114, 91.72932330827456]
loss = [1.7629382610321045, 1.1829798221588135, 0.9998125433921814, 0.8211488127708435, 0.7318783402442932, 0.6674777269363403, 0.6644459962844849, 0.6180412173271179, 0.5456271171569824, 0.5606416463851929]
iteration 164
epsilon=0.3554628791062367, episode length=687, total rewards=-0.071477663230753
Training the model...
experience length=4995
number of examples=2685
best total reward =  [73.91

Copying file://car-racing-v2-165-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [73.91304347826282, 152.6690391459114, 91.72932330827456]
loss = [2.1057803630828857, 1.4040809869766235, 1.087026596069336, 1.0297836065292358, 0.8232790231704712, 0.7690690159797668, 0.7025633454322815, 0.7298285365104675, 0.6593638062477112, 0.5927238464355469]
iteration 166
epsilon=0.35236916781202265, episode length=696, total rewards=-0.0030303030308982126
Training the model...
experience length=4995
number of examples=2694
best total reward =  [73.91304347826282, 152.6690391459114, 91.72932330827456]
loss = [1.7078580856323242, 1.1650885343551636, 0.9336650967597961, 0.76528400182724, 0.7299075722694397, 0.6507054567337036, 0.5466790795326233, 0.5206111073493958, 0.5283284783363342, 0.48013848066329956]
iteration 167
epsilon=0.35084547613390243, episode length=700, total rewards=-0.036305732484650494
Training the model...
experience length=4995
number of examples=2698
best total reward =  [

Copying file://car-racing-v2-168-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [73.91304347826282, 152.6690391459114, 91.72932330827456]
loss = [3.164069652557373, 1.9997191429138184, 1.5945098400115967, 1.4307795763015747, 1.1993528604507446, 1.0321706533432007, 0.9667755365371704, 0.8648139238357544, 0.7475517392158508, 0.744491457939148]
iteration 169
epsilon=0.34784365115883775, episode length=999, total rewards=28.301886792454106
Training the model...
experience length=4995
number of examples=2997
best total reward =  [73.91304347826282, 152.6690391459114, 91.72932330827456]
loss = [2.2038841247558594, 1.49929678440094, 1.2645628452301025, 1.056747555732727, 0.8859207630157471, 0.864189624786377, 0.7866703867912292, 0.7230390906333923, 0.7148630619049072, 0.6410773992538452]
iteration 170
epsilon=0.34636521464724934, episode length=553, total rewards=-0.01538461538493971
Training the model...
experience length=4995
number of examples=2551
best total reward =  [73.913043

Copying file://car-racing-v2-171-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [73.91304347826282, 152.6690391459114, 91.72932330827456]
loss = [3.285597801208496, 2.2934906482696533, 1.9252337217330933, 1.5892267227172852, 1.3667057752609253, 1.2093051671981812, 1.0848067998886108, 1.0117921829223633, 0.9700334072113037, 0.813295304775238]
iteration 172
epsilon=0.3434525468757691, episode length=706, total rewards=-0.06802973977754445
Training the model...
experience length=4995
number of examples=2704
best total reward =  [73.91304347826282, 152.6690391459114, 91.72932330827456]
loss = [2.202746868133545, 1.4572935104370117, 1.219610333442688, 0.9740487933158875, 0.8264188170433044, 0.8650000691413879, 0.6912790536880493, 0.7396464347839355, 0.6664881110191345, 0.6195645928382874]
iteration 173
epsilon=0.34201802140701143, episode length=590, total rewards=-0.07222222222270003
Training the model...
experience length=4995
number of examples=2588
best total reward =  [73.913

Copying file://car-racing-v2-174-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [73.91304347826282, 152.6690391459114, 91.72932330827456]
loss = [3.0801217555999756, 2.282042980194092, 1.8932875394821167, 1.524375319480896, 1.407930612564087, 1.2038997411727905, 1.1132704019546509, 1.111411690711975, 1.0148663520812988, 0.9155839681625366]
iteration 175
epsilon=0.33919186278101193, episode length=699, total rewards=-0.06993006993061823
Training the model...
experience length=4995
number of examples=2697
best total reward =  [73.91304347826282, 152.6690391459114, 91.72932330827456]
loss = [2.4006435871124268, 1.5821149349212646, 1.305027723312378, 1.1388273239135742, 1.0141881704330444, 0.9839016795158386, 0.929658055305481, 0.8050780892372131, 0.7164949178695679, 0.6971663236618042]
iteration 176
epsilon=0.3377999441532018, episode length=788, total rewards=-95.66750902527117
Training the model...
experience length=4995
number of examples=2786
best total reward =  [73.9130434

Copying file://car-racing-v2-177-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [73.91304347826282, 152.6690391459114, 91.72932330827456]
loss = [2.974863290786743, 2.0879507064819336, 1.6391669511795044, 1.4048372507095337, 1.3551994562149048, 1.268775463104248, 1.1142672300338745, 1.0304837226867676, 0.9736603498458862, 0.9505938291549683]
iteration 178
epsilon=0.335057725264553, episode length=542, total rewards=-0.08313253012088606
Training the model...
experience length=4995
number of examples=2540
best total reward =  [73.91304347826282, 152.6690391459114, 91.72932330827456]
loss = [2.5530385971069336, 1.742264986038208, 1.2992600202560425, 1.1639561653137207, 1.0142358541488647, 0.9262828230857849, 0.8149410486221313, 0.774014949798584, 0.8511953353881836, 0.7169889807701111]
iteration 179
epsilon=0.3337071480119075, episode length=902, total rewards=-0.0472924187730481
Training the model...
experience length=4995
number of examples=2900
best total reward =  [73.913043

Copying file://car-racing-v2-180-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [73.91304347826282, 152.6690391459114, 91.72932330827456]
loss = [3.155683994293213, 2.2374799251556396, 1.7671703100204468, 1.3987431526184082, 1.3629579544067383, 1.2407547235488892, 1.053694486618042, 1.0734071731567383, 0.9817141890525818, 0.9011819362640381]
iteration 181
epsilon=0.33104637576647056, episode length=795, total rewards=-0.015224913495459819
Training the model...
experience length=4995
number of examples=2793
best total reward =  [73.91304347826282, 152.6690391459114, 91.72932330827456]
loss = [2.4950790405273438, 1.5964080095291138, 1.4172228574752808, 1.1784336566925049, 1.0308053493499756, 0.9821369051933289, 0.8447822332382202, 0.9516935348510742, 0.7912801504135132, 0.7514464855194092]
iteration 182
epsilon=0.3297359120088058, episode length=581, total rewards=-0.01818181818227879
Training the model...
experience length=4995
number of examples=2579
best total reward =  [73.

Copying file://car-racing-v2-183-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [73.91304347826282, 152.6690391459114, 91.72932330827456]
loss = [2.334299087524414, 1.6164218187332153, 1.3301764726638794, 1.155916452407837, 1.0370017290115356, 0.9527666568756104, 0.91585773229599, 0.8207958340644836, 0.843853771686554, 0.7461945414543152]
iteration 184
epsilon=0.3271541673598306, episode length=903, total rewards=-0.07741935483962803
Training the model...
experience length=4995
number of examples=2901
best total reward =  [73.91304347826282, 152.6690391459114, 91.72932330827456]
loss = [2.3551220893859863, 1.7501965761184692, 1.3721686601638794, 1.057976484298706, 1.0217002630233765, 1.1491307020187378, 0.8575817942619324, 0.7892702221870422, 0.6723652482032776, 0.7311376929283142]
iteration 185
epsilon=0.3258826256862323, episode length=618, total rewards=-0.010749185668204236
Training the model...
experience length=4995
number of examples=2616
best total reward =  [73.91304

Copying file://car-racing-v2-186-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [73.91304347826282, 152.6690391459114, 91.72932330827456]
loss = [2.3108725547790527, 1.7186943292617798, 1.4338804483413696, 1.29298996925354, 1.1405271291732788, 1.0799375772476196, 1.0395258665084839, 0.9109302759170532, 0.905738353729248, 0.8591124415397644]
iteration 187
epsilon=0.32337756143507623, episode length=915, total rewards=-0.07457627118712762
Training the model...
experience length=4995
number of examples=2913
best total reward =  [73.91304347826282, 152.6690391459114, 91.72932330827456]
loss = [2.600104808807373, 1.6786315441131592, 1.356380820274353, 1.1791465282440186, 1.1267940998077393, 1.1276077032089233, 0.8957350850105286, 0.874752402305603, 0.8699058890342712, 0.8239172101020813]
iteration 188
epsilon=0.32214378582072545, episode length=747, total rewards=-0.06690391459145784
Training the model...
experience length=4995
number of examples=2745
best total reward =  [73.9130

Copying file://car-racing-v2-189-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [73.91304347826282, 152.6690391459114, 91.72932330827456]
loss = [3.5992181301116943, 2.330528497695923, 1.8742836713790894, 1.7597382068634033, 1.3534663915634155, 1.3141705989837646, 1.1637623310089111, 1.0842821598052979, 0.967388927936554, 0.956950306892395]
iteration 190
epsilon=0.319713124482893, episode length=999, total rewards=86.5671641791071
Training the model...
experience length=4995
number of examples=2997
best total reward =  [152.6690391459114, 91.72932330827456, 86.5671641791071]
loss = [3.6000864505767822, 2.313015937805176, 1.9474269151687622, 1.637481927871704, 1.4143741130828857, 1.2511730194091797, 1.152806043624878, 1.0473206043243408, 1.0880165100097656, 0.9403066039085388]
iteration 191
epsilon=0.3185159932380641, episode length=999, total rewards=13.207547169810558
Training the model...
experience length=4995
number of examples=2997
best total reward =  [152.6690391459114

Copying file://car-racing-v2-192-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


epsilon=0.31733083330568346, episode length=999, total rewards=27.659574468083775
Training the model...
experience length=4995
number of examples=3287
best total reward =  [152.6690391459114, 91.72932330827456, 86.5671641791071]
loss = [10.850943565368652, 8.38315200805664, 7.0879364013671875, 6.310129165649414, 5.923567295074463, 5.198509216308594, 4.838589668273926, 4.400611400604248, 3.855299472808838, 3.474484920501709]
iteration 193
epsilon=0.3161575249726266, episode length=543, total rewards=-87.79375000000054
Training the model...
experience length=4995
number of examples=2541
best total reward =  [152.6690391459114, 91.72932330827456, 86.5671641791071]
loss = [14.709819793701172, 12.03351879119873, 9.434746742248535, 8.697474479675293, 6.911134719848633, 6.624709606170654, 6.007226943969727, 5.164710521697998, 4.850130081176758, 3.9493191242218018]
iteration 194
epsilon=0.31499594972290035, episode length=729, total rewards=-0.08333333333399051
Training the model...
experience

Copying file://car-racing-v2-195-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


epsilon=0.31384599022567133, episode length=749, total rewards=-6.121492202026957e-13
Training the model...
experience length=4995
number of examples=3148
best total reward =  [152.6690391459114, 91.72932330827456, 86.5671641791071]
loss = [9.564370155334473, 6.7194085121154785, 5.596277713775635, 4.500685691833496, 3.851344108581543, 3.4872026443481445, 3.0321545600891113, 1.6851823329925537, 1.9032500982284546, 1.3856052160263062]
iteration 196
epsilon=0.31270753032341464, episode length=706, total rewards=-0.02862190812788265
Training the model...
experience length=4995
number of examples=2704
best total reward =  [152.6690391459114, 91.72932330827456, 86.5671641791071]
loss = [6.3714470863342285, 3.062748908996582, 3.8999392986297607, 2.296269416809082, 2.2637786865234375, 2.060297966003418, 1.5376081466674805, 1.4240961074829102, 1.5241471529006958, 1.3445146083831787]
iteration 197
epsilon=0.3115804550201805, episode length=777, total rewards=-0.06148409894069684
Training the mod

Copying file://car-racing-v2-198-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2513
best total reward =  [152.6690391459114, 91.72932330827456, 86.5671641791071]
loss = [13.625189781188965, 8.525350570678711, 7.90167760848999, 7.098214149475098, 6.900543689727783, 6.299736976623535, 5.83449125289917, 4.7903361320495605, 4.699406147003174, 2.8112497329711914]
iteration 199
epsilon=0.3093600039652789, episode length=729, total rewards=-0.08333333333404025
Training the model...
experience length=4995
number of examples=2727
best total reward =  [152.6690391459114, 91.72932330827456, 86.5671641791071]
loss = [4.3382697105407715, 3.066485643386841, 1.998840093612671, 1.4430903196334839, 1.894931435585022, 1.5407367944717407, 1.1725056171417236, 1.5534536838531494, 1.1416895389556885, 1.5786842107772827]
iteration 200
epsilon=0.3082664039256261, episode length=999, total rewards=6.312292358802994
Training the model...
experience length=4995
number of examples=2997
best total reward =  [152.6690391459114, 9

Copying file://car-racing-v2-201-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2633
best total reward =  [152.6690391459114, 91.72932330827456, 86.5671641791071]
loss = [7.883615016937256, 5.455199241638184, 3.8706884384155273, 3.030719518661499, 2.4605438709259033, 1.7596298456192017, 1.4973584413528442, 1.2541834115982056, 1.6144877672195435, 1.0909795761108398]
iteration 202
epsilon=0.30611190248750614, episode length=743, total rewards=-0.07567567567630218
Training the model...
experience length=4995
number of examples=2741
best total reward =  [152.6690391459114, 91.72932330827456, 86.5671641791071]
loss = [7.012628078460693, 3.20931077003479, 2.598137378692627, 1.4989840984344482, 2.1527838706970215, 1.4058290719985962, 1.5852723121643066, 1.1651806831359863, 1.3646436929702759, 1.3796045780181885]
iteration 203
epsilon=0.3050507834626311, episode length=999, total rewards=19.999999999999066
Training the model...
experience length=4995
number of examples=2997
best total reward =  [152.669039145

Copying file://car-racing-v2-204-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2499
best total reward =  [152.6690391459114, 91.72932330827456, 86.5671641791071]
loss = [5.360953330993652, 3.530078887939453, 2.384992837905884, 2.5748536586761475, 2.0321946144104004, 2.007324457168579, 1.775685429573059, 1.597687840461731, 1.3895512819290161, 1.7477694749832153]
iteration 205
epsilon=0.3029602728717248, episode length=736, total rewards=-0.015789473684912653
Training the model...
experience length=4995
number of examples=2734
best total reward =  [152.6690391459114, 91.72932330827456, 86.5671641791071]
loss = [2.5288186073303223, 1.6781262159347534, 1.3591554164886475, 1.5629621744155884, 1.2623019218444824, 1.2265452146530151, 1.0215517282485962, 1.3422489166259766, 0.8953169584274292, 0.7783674001693726]
iteration 206
epsilon=0.30193067014300756, episode length=993, total rewards=-0.06225165563023549
Training the model...
experience length=4995
number of examples=2991
best total reward =  [152.66903

Copying file://car-racing-v2-207-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


epsilon=0.3009113634415775, episode length=895, total rewards=-0.09382716049474138
Training the model...
experience length=4995
number of examples=3221
best total reward =  [152.6690391459114, 91.72932330827456, 86.5671641791071]
loss = [5.521160125732422, 3.63250470161438, 2.25933575630188, 2.50425124168396, 2.294508218765259, 2.1552886962890625, 1.7693053483963013, 1.5571463108062744, 1.735709309577942, 1.4516040086746216]
iteration 208
epsilon=0.29990224980716174, episode length=999, total rewards=56.996587030720896
Training the model...
experience length=4995
number of examples=2997
best total reward =  [152.6690391459114, 91.72932330827456, 86.5671641791071]
loss = [4.8843607902526855, 3.2490293979644775, 2.622763156890869, 2.3116343021392822, 1.8238787651062012, 1.7304171323776245, 1.4739078283309937, 2.0720691680908203, 1.465455174446106, 1.3576743602752686]
iteration 209
epsilon=0.29890322730909014, episode length=999, total rewards=63.194444444448315
Training the model...
expe

Copying file://car-racing-v2-210-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [152.6690391459114, 91.72932330827456, 86.5671641791071]
loss = [3.5169765949249268, 2.310636043548584, 1.9252934455871582, 1.850530982017517, 1.5652856826782227, 1.4476077556610107, 1.2065528631210327, 1.2949081659317017, 1.1964694261550903, 1.2152557373046875]
iteration 211
epsilon=0.2969350530856393, episode length=630, total rewards=-0.03693693693743791
Training the model...
experience length=4995
number of examples=2628
best total reward =  [152.6690391459114, 91.72932330827456, 86.5671641791071]
loss = [3.1965932846069336, 2.1541900634765625, 2.0121190547943115, 1.747780680656433, 1.7921797037124634, 1.320353388786316, 1.49696683883667, 1.1657488346099854, 1.4640147686004639, 1.112752914428711]
iteration 212
epsilon=0.2959657025547829, episode length=990, total rewards=-0.058466453674627106
Training the model...
experience length=4995
number of examples=2988
best total reward =  [152.6690391

Copying file://car-racing-v2-213-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [152.6690391459114, 91.72932330827456, 86.5671641791071]
loss = [3.87359619140625, 2.694331407546997, 2.3344595432281494, 1.9778906106948853, 1.8887815475463867, 1.6444333791732788, 1.580410122871399, 1.348365068435669, 1.4716646671295166, 1.2338948249816895]
iteration 214
epsilon=0.2940559850739427, episode length=999, total rewards=211.5942028985524
Training the model...
experience length=4995
number of examples=2997
best total reward =  [91.72932330827456, 86.5671641791071, 211.5942028985524]
loss = [4.06450891494751, 3.002437114715576, 2.507751941680908, 2.014604091644287, 1.7707029581069946, 1.5705519914627075, 1.4523426294326782, 1.3702079057693481, 1.3033384084701538, 1.3828879594802856]
iteration 215
epsilon=0.2931154252232033, episode length=999, total rewards=45.510835913314416
Training the model...
experience length=4995
number of examples=2997
best total reward =  [91.72932330827456, 8

Copying file://car-racing-v2-216-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


epsilon=0.2921842709709712, episode length=999, total rewards=121.12211221122556
Training the model...
experience length=4995
number of examples=3488
best total reward =  [86.5671641791071, 211.5942028985524, 121.12211221122556]
loss = [11.46253776550293, 8.645593643188477, 7.679253101348877, 6.2319111824035645, 4.934104919433594, 4.866926670074463, 2.691462278366089, 3.301388740539551, 2.479060649871826, 1.9719654321670532]
iteration 217
epsilon=0.2912624282612615, episode length=999, total rewards=30.718954248364803
Training the model...
experience length=4995
number of examples=2997
best total reward =  [86.5671641791071, 211.5942028985524, 121.12211221122556]
loss = [3.8554441928863525, 2.9176454544067383, 2.1894423961639404, 1.9836260080337524, 1.6611846685409546, 1.5613129138946533, 1.531025767326355, 1.305500864982605, 1.3081680536270142, 1.2295242547988892]
iteration 218
epsilon=0.29034980397864885, episode length=999, total rewards=116.7832167832199
Training the model...
exper

Copying file://car-racing-v2-219-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2886
best total reward =  [211.5942028985524, 121.12211221122556, 116.7832167832199]
loss = [15.38322925567627, 5.77067232131958, 3.8945748805999756, 3.074688673019409, 2.6619765758514404, 1.9145174026489258, 2.4752588272094727, 1.625767469406128, 1.6963976621627808, 2.317979335784912]
iteration 220
epsilon=0.2885518428794737, episode length=999, total rewards=292.0265780730829
Training the model...
experience length=4995
number of examples=2997
best total reward =  [121.12211221122556, 116.7832167832199, 292.0265780730829]
loss = [6.239496231079102, 4.597565174102783, 3.2644643783569336, 2.9468202590942383, 2.427520513534546, 2.165046453475952, 2.3814985752105713, 1.9925390481948853, 1.9740877151489258, 1.6456050872802734]
iteration 221
epsilon=0.28766632445067897, episode length=999, total rewards=45.11041009463834
Training the model...
experience length=4995
number of examples=2997
best total reward =  [121.122112211225

Copying file://car-racing-v2-222-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [121.12211221122556, 116.7832167832199, 292.0265780730829]
loss = [4.937504768371582, 3.3342831134796143, 2.5491185188293457, 2.4481282234191895, 2.0075631141662598, 1.7061586380004883, 1.7085107564926147, 1.5174723863601685, 1.5886503458023071, 1.4610199928283691]
iteration 223
epsilon=0.28592176459411045, episode length=999, total rewards=160.41666666666887
Training the model...
experience length=4995
number of examples=2997
best total reward =  [116.7832167832199, 292.0265780730829, 160.41666666666887]
loss = [5.318639755249023, 3.1508848667144775, 2.6559226512908936, 2.20491886138916, 1.9686294794082642, 1.9284486770629883, 1.9930206537246704, 1.6800156831741333, 1.481971263885498, 1.734098196029663]
iteration 224
epsilon=0.28506254694816935, episode length=999, total rewards=71.0037174721236
Training the model...
experience length=4995
number of examples=2997
best total reward =  [116.7832167

Copying file://car-racing-v2-225-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [116.7832167832199, 292.0265780730829, 160.41666666666887]
loss = [4.509082794189453, 3.0037577152252197, 2.3971118927001953, 1.9049458503723145, 1.8136552572250366, 1.511248230934143, 1.449825644493103, 1.275705099105835, 1.3040566444396973, 1.0828564167022705]
iteration 226
epsilon=0.2833698022639008, episode length=999, total rewards=167.32673267326896
Training the model...
experience length=4995
number of examples=2997
best total reward =  [292.0265780730829, 160.41666666666887, 167.32673267326896]
loss = [4.374678134918213, 2.91894268989563, 2.32358980178833, 1.9147378206253052, 1.7266230583190918, 1.6519672870635986, 1.5115007162094116, 1.4049029350280762, 1.3614901304244995, 1.4216173887252808]
iteration 227
epsilon=0.2825361042412618, episode length=999, total rewards=136.30136986301764
Training the model...
experience length=4995
number of examples=2997
best total reward =  [292.026578073

Copying file://car-racing-v2-228-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2569
best total reward =  [292.0265780730829, 160.41666666666887, 167.32673267326896]
loss = [7.508143901824951, 7.697579383850098, 4.174933910369873, 3.765063524246216, 1.7027157545089722, 2.2216014862060547, 1.844918131828308, 2.250373363494873, 4.673914432525635, 1.1775628328323364]
iteration 229
epsilon=0.2808936357668607, episode length=878, total rewards=-0.0621621621629711
Training the model...
experience length=4995
number of examples=2876
best total reward =  [292.0265780730829, 160.41666666666887, 167.32673267326896]
loss = [3.3071811199188232, 2.149099349975586, 1.7237679958343506, 1.5080121755599976, 5.7530317306518555, 1.0984551906585693, 1.2526657581329346, 1.4276446104049683, 1.0463311672210693, 2.9728939533233643]
iteration 230
epsilon=0.2800846994091921, episode length=999, total rewards=182.05128205128622
Training the model...
experience length=4995
number of examples=2997
best total reward =  [160.416666

Copying file://car-racing-v2-231-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


epsilon=0.2792838524151002, episode length=999, total rewards=56.86274509804199
Training the model...
experience length=4995
number of examples=3392
best total reward =  [160.41666666666887, 167.32673267326896, 182.05128205128622]
loss = [4.78996467590332, 3.1936981678009033, 3.154780864715576, 2.395827054977417, 1.9292585849761963, 2.0970964431762695, 1.6351343393325806, 1.4495322704315186, 3.651339054107666, 1.4527863264083862]
iteration 232
epsilon=0.27849101389094916, episode length=999, total rewards=139.28571428571914
Training the model...
experience length=4995
number of examples=2997
best total reward =  [160.41666666666887, 167.32673267326896, 182.05128205128622]
loss = [3.885042905807495, 2.6175906658172607, 2.311694622039795, 1.6670557260513306, 1.8755384683609009, 1.4181263446807861, 1.4418843984603882, 2.8469150066375732, 1.0394797325134277, 1.0614428520202637]
iteration 233
epsilon=0.27770610375203963, episode length=999, total rewards=133.21554770318224
Training the mode

Copying file://car-racing-v2-234-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2862
best total reward =  [160.41666666666887, 167.32673267326896, 182.05128205128622]
loss = [13.60085678100586, 10.209778785705566, 8.39789867401123, 7.443601131439209, 6.930002689361572, 5.975379943847656, 6.766822814941406, 5.35172176361084, 5.309146404266357, 4.964600563049316]
iteration 235
epsilon=0.27615975228737405, episode length=999, total rewards=82.9652996845437
Training the model...
experience length=4995
number of examples=2997
best total reward =  [160.41666666666887, 167.32673267326896, 182.05128205128622]
loss = [3.8696203231811523, 2.892451763153076, 2.1958932876586914, 1.6794586181640625, 2.0679142475128174, 1.2580029964447021, 1.6597975492477417, 1.1416577100753784, 1.4887957572937012, 1.079563856124878]
iteration 236
epsilon=0.2753981547645003, episode length=999, total rewards=168.65671641791485
Training the model...
experience length=4995
number of examples=2997
best total reward =  [167.32673267326

Copying file://car-racing-v2-237-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2577
best total reward =  [167.32673267326896, 182.05128205128622, 168.65671641791485]
loss = [10.158795356750488, 6.781628131866455, 5.844723224639893, 5.216931343078613, 3.150294065475464, 7.14906120300293, 1.3350681066513062, 2.3824410438537598, 1.250127911567688, 1.8057836294174194]
iteration 238
epsilon=0.27389773148468677, episode length=999, total rewards=168.81720430107947
Training the model...
experience length=4995
number of examples=2997
best total reward =  [182.05128205128622, 168.65671641791485, 168.81720430107947]
loss = [7.725337505340576, 6.564803123474121, 2.31143856048584, 2.1758997440338135, 2.011270761489868, 1.8343851566314697, 1.8514292240142822, 1.3846420049667358, 1.8206212520599365, 1.410492181777954]
iteration 239
epsilon=0.2731587541698399, episode length=999, total rewards=13.47517730496492
Training the model...
experience length=4995
number of examples=2997
best total reward =  [182.0512820512

Copying file://car-racing-v2-240-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2717
best total reward =  [182.05128205128622, 168.65671641791485, 168.81720430107947]
loss = [6.519869327545166, 3.6982486248016357, 2.790792942047119, 2.2194247245788574, 1.7296415567398071, 2.2503273487091064, 1.6949177980422974, 1.733201503753662, 1.5045335292816162, 1.3163713216781616]
iteration 241
epsilon=0.2717028949618601, episode length=999, total rewards=131.83391003460557
Training the model...
experience length=4995
number of examples=2997
best total reward =  [182.05128205128622, 168.65671641791485, 168.81720430107947]
loss = [3.8976101875305176, 4.344901084899902, 2.0701990127563477, 1.9148019552230835, 1.686060905456543, 1.5408787727355957, 6.920621871948242, 1.0746877193450928, 1.2832921743392944, 1.1971455812454224]
iteration 242
epsilon=0.2709858660122415, episode length=999, total rewards=212.69349845201677
Training the model...
experience length=4995
number of examples=2997
best total reward =  [168.656

Copying file://car-racing-v2-243-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2701
best total reward =  [168.65671641791485, 168.81720430107947, 212.69349845201677]
loss = [7.546799659729004, 4.494481086730957, 3.3794474601745605, 1.9360491037368774, 4.15701150894165, 2.0337653160095215, 1.7440271377563477, 3.2189109325408936, 2.0012612342834473, 1.024560809135437]
iteration 244
epsilon=0.2695732472785979, episode length=999, total rewards=229.07348242811986
Training the model...
experience length=4995
number of examples=2997
best total reward =  [168.81720430107947, 212.69349845201677, 229.07348242811986]
loss = [8.104907989501953, 6.153772830963135, 4.031822681427002, 3.3891754150390625, 3.766676902770996, 2.8330533504486084, 2.9288227558135986, 2.3591809272766113, 2.7122254371643066, 1.9972829818725586]
iteration 245
epsilon=0.2688775148058119, episode length=999, total rewards=80.07662835249515
Training the model...
experience length=4995
number of examples=2997
best total reward =  [168.8172043

Copying file://car-racing-v2-246-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [168.81720430107947, 212.69349845201677, 229.07348242811986]
loss = [5.06120491027832, 3.2092201709747314, 2.5943856239318848, 2.2001760005950928, 2.0642666816711426, 1.8150601387023926, 1.6230885982513428, 1.3868759870529175, 1.3232778310775757, 1.408657193183899]
iteration 247
epsilon=0.26750685226117626, episode length=999, total rewards=164.7058823529437
Training the model...
experience length=4995
number of examples=2997
best total reward =  [168.81720430107947, 212.69349845201677, 229.07348242811986]
loss = [5.015689373016357, 3.2722034454345703, 2.5591859817504883, 2.2868447303771973, 1.9328514337539673, 1.8048683404922485, 1.7337183952331543, 2.2970669269561768, 1.399487018585205, 1.5933599472045898]
iteration 248
epsilon=0.26683178373856453, episode length=999, total rewards=290.41095890410315
Training the model...
experience length=4995
number of examples=2997
best total reward =  [212.6

Copying file://car-racing-v2-249-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [212.69349845201677, 229.07348242811986, 290.41095890410315]
loss = [6.86725378036499, 4.486087799072266, 3.5114052295684814, 2.8244597911834717, 2.538936138153076, 2.1205949783325195, 1.8115001916885376, 1.7279999256134033, 1.5867646932601929, 1.9824047088623047]
iteration 250
epsilon=0.2655018312421671, episode length=999, total rewards=279.310344827576
Training the model...
experience length=4995
number of examples=2997
best total reward =  [229.07348242811986, 290.41095890410315, 279.310344827576]
loss = [5.155695915222168, 3.592639446258545, 2.636976718902588, 2.126951217651367, 1.862931489944458, 1.8410841226577759, 1.5889381170272827, 1.5134027004241943, 1.5191446542739868, 1.349900245666504]
iteration 251
epsilon=0.26484681292974543, episode length=999, total rewards=159.61538461538913
Training the model...
experience length=4995
number of examples=2997
best total reward =  [229.0734824281

Copying file://car-racing-v2-252-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [229.07348242811986, 290.41095890410315, 279.310344827576]
loss = [6.85711145401001, 3.8222835063934326, 3.430223226547241, 2.562286138534546, 2.2154712677001953, 2.015796661376953, 1.884558916091919, 1.6172701120376587, 1.6610136032104492, 1.5541197061538696]
iteration 253
epsilon=0.2635563613524435, episode length=999, total rewards=189.03654485050225
Training the model...
experience length=4995
number of examples=2997
best total reward =  [229.07348242811986, 290.41095890410315, 279.310344827576]
loss = [5.208257675170898, 3.5366573333740234, 2.774831533432007, 2.326425790786743, 1.9669946432113647, 2.1570827960968018, 1.69303297996521, 1.6224653720855713, 1.6096787452697754, 1.5609374046325684]
iteration 254
epsilon=0.2629207977389191, episode length=999, total rewards=396.37681159419907
Training the model...
experience length=4995
number of examples=2997
best total reward =  [290.410958904103

Copying file://car-racing-v2-255-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [290.41095890410315, 279.310344827576, 396.37681159419907]
loss = [4.414884567260742, 2.968489408493042, 2.4290614128112793, 1.8161568641662598, 1.7513222694396973, 1.8054516315460205, 1.3491586446762085, 1.5288738012313843, 1.2164509296417236, 1.2462642192840576]
iteration 256
epsilon=0.26166867386391457, episode length=999, total rewards=154.90196078431805
Training the model...
experience length=4995
number of examples=2997
best total reward =  [290.41095890410315, 279.310344827576, 396.37681159419907]
loss = [4.107914924621582, 3.023460865020752, 2.5102121829986572, 2.195345640182495, 2.277844190597534, 1.5731734037399292, 1.6138055324554443, 1.6262061595916748, 1.409464955329895, 1.607308268547058]
iteration 257
epsilon=0.26105198712527544, episode length=999, total rewards=296.5014577259464
Training the model...
experience length=4995
number of examples=2997
best total reward =  [279.31034482

Copying file://car-racing-v2-258-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2981
best total reward =  [279.310344827576, 396.37681159419907, 296.5014577259464]
loss = [11.164591789245605, 7.187561511993408, 4.815587997436523, 3.6639091968536377, 3.554931640625, 2.8392598628997803, 2.515171766281128, 2.859036684036255, 2.224316120147705, 2.5652565956115723]
iteration 259
epsilon=0.25983705258148243, episode length=999, total rewards=282.97872340425073
Training the model...
experience length=4995
number of examples=2997
best total reward =  [396.37681159419907, 296.5014577259464, 282.97872340425073]
loss = [6.110864639282227, 4.40675163269043, 3.2684950828552246, 2.7520666122436523, 2.4000227451324463, 2.198951005935669, 2.074984550476074, 1.9279046058654785, 1.839158535003662, 1.7464231252670288]
iteration 260
epsilon=0.2592386820556676, episode length=999, total rewards=137.2881355932239
Training the model...
experience length=4995
number of examples=2997
best total reward =  [396.37681159419907, 

Copying file://car-racing-v2-261-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [396.37681159419907, 296.5014577259464, 282.97872340425073]
loss = [6.064323425292969, 3.944399833679199, 3.4911046028137207, 2.715344190597534, 2.5953004360198975, 2.2967379093170166, 2.2524707317352295, 2.0900461673736572, 2.1990506649017334, 1.7974672317504883]
iteration 262
epsilon=0.2580598322827598, episode length=999, total rewards=401.792114695339
Training the model...
experience length=4995
number of examples=2997
best total reward =  [296.5014577259464, 282.97872340425073, 401.792114695339]
loss = [5.618377685546875, 3.768204689025879, 3.336411237716675, 2.3239760398864746, 2.3091137409210205, 2.1193230152130127, 6.008462429046631, 1.598178744316101, 1.9126955270767212, 1.9638417959213257]
iteration 263
epsilon=0.2574792339599322, episode length=999, total rewards=65.51724137931467
Training the model...
experience length=4995
number of examples=2997
best total reward =  [296.501457725946

Copying file://car-racing-v2-264-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [296.5014577259464, 282.97872340425073, 401.792114695339]
loss = [5.8054656982421875, 3.886814594268799, 3.125520706176758, 2.8607983589172363, 2.3900938034057617, 2.0179190635681152, 1.8878284692764282, 1.8358334302902222, 1.71857488155365, 1.8959671258926392]
iteration 265
epsilon=0.25633539720412957, episode length=999, total rewards=420.134228187909
Training the model...
experience length=4995
number of examples=2997
best total reward =  [282.97872340425073, 401.792114695339, 420.134228187909]
loss = [7.058922290802002, 4.681288242340088, 3.378700017929077, 3.028794527053833, 2.599275588989258, 2.3717849254608154, 2.1277105808258057, 2.033200263977051, 2.017296314239502, 1.629888892173767]
iteration 266
epsilon=0.25577204323208824, episode length=904, total rewards=-55.68461538461255
Training the model...
experience length=4995
number of examples=2902
best total reward =  [282.97872340425073, 

Copying file://car-racing-v2-267-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [282.97872340425073, 401.792114695339, 420.134228187909]
loss = [9.79921817779541, 5.322723388671875, 4.150643348693848, 3.776998996734619, 3.55053973197937, 2.8451948165893555, 2.641197681427002, 2.6923837661743164, 2.400118589401245, 2.4663960933685303]
iteration 268
epsilon=0.2546621795717697, episode length=999, total rewards=375.0830564783973
Training the model...
experience length=4995
number of examples=2997
best total reward =  [401.792114695339, 420.134228187909, 375.0830564783973]
loss = [7.0861334800720215, 4.935572624206543, 5.091352462768555, 3.150458812713623, 3.095621109008789, 2.6206812858581543, 2.585503101348877, 2.419461727142334, 2.050177812576294, 2.4404001235961914]
iteration 269
epsilon=0.254115557776052, episode length=999, total rewards=505.63380281689143
Training the model...
experience length=4995
number of examples=2997
best total reward =  [420.134228187909, 375.083056

Copying file://car-racing-v2-270-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [420.134228187909, 375.0830564783973, 505.63380281689143]
loss = [8.648662567138672, 5.951032638549805, 4.824203968048096, 4.023968696594238, 3.183250665664673, 2.9693408012390137, 2.89980149269104, 2.6796295642852783, 2.5496582984924316, 2.367645740509033]
iteration 271
epsilon=0.25303865817630855, episode length=999, total rewards=428.36879432623533
Training the model...
experience length=4995
number of examples=2997
best total reward =  [375.0830564783973, 505.63380281689143, 428.36879432623533]
loss = [5.932701587677002, 4.155456066131592, 3.7654144763946533, 2.9264883995056152, 2.6630053520202637, 2.5820677280426025, 2.3905177116394043, 2.0924360752105713, 2.20813250541687, 2.1799001693725586]
iteration 272
epsilon=0.2525082715945455, episode length=999, total rewards=314.9659863945498
Training the model...
experience length=4995
number of examples=2997
best total reward =  [375.0830564783973

Copying file://car-racing-v2-273-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2554
best total reward =  [375.0830564783973, 505.63380281689143, 428.36879432623533]
loss = [9.639116287231445, 6.869977951049805, 5.162199974060059, 4.167790412902832, 3.415726900100708, 2.8776261806488037, 2.574047327041626, 2.7307145595550537, 2.445641279220581, 1.9577677249908447]
iteration 274
epsilon=0.25146335698981404, episode length=999, total rewards=128.57142857143248
Training the model...
experience length=4995
number of examples=2997
best total reward =  [375.0830564783973, 505.63380281689143, 428.36879432623533]
loss = [5.143599510192871, 3.563943862915039, 3.42916202545166, 2.7792699337005615, 2.386610746383667, 2.126060724258423, 2.1442642211914062, 2.1297497749328613, 1.8446083068847656, 1.9845523834228516]
iteration 275
epsilon=0.2509487234199159, episode length=999, total rewards=196.6360856269162
Training the model...
experience length=4995
number of examples=2997
best total reward =  [375.083056478397

Copying file://car-racing-v2-276-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [375.0830564783973, 505.63380281689143, 428.36879432623533]
loss = [7.550224781036377, 5.0965256690979, 3.7816073894500732, 3.640575408935547, 3.1227450370788574, 2.736142635345459, 2.5308468341827393, 2.927834987640381, 2.21272349357605, 2.443389892578125]
iteration 277
epsilon=0.24993484382385955, episode length=999, total rewards=240.13605442176444
Training the model...
experience length=4995
number of examples=2997
best total reward =  [375.0830564783973, 505.63380281689143, 428.36879432623533]
loss = [6.240758895874023, 4.075380802154541, 3.307141065597534, 3.2098381519317627, 2.8270788192749023, 2.6421008110046387, 2.2845497131347656, 2.1250908374786377, 2.137819290161133, 2.1877870559692383]
iteration 278
epsilon=0.24943549538562096, episode length=999, total rewards=412.9151291512843
Training the model...
experience length=4995
number of examples=2997
best total reward =  [505.633802816891

Copying file://car-racing-v2-279-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [505.63380281689143, 428.36879432623533, 412.9151291512843]
loss = [4.653014659881592, 3.3661978244781494, 3.02052640914917, 2.5459132194519043, 2.4478940963745117, 2.2222976684570312, 2.004443645477295, 1.9891504049301147, 1.8347429037094116, 2.0715558528900146]
iteration 280
epsilon=0.24845172902744708, episode length=999, total rewards=458.8235294117607
Training the model...
experience length=4995
number of examples=2997
best total reward =  [428.36879432623533, 412.9151291512843, 458.8235294117607]
loss = [5.334341049194336, 3.839541435241699, 3.2209255695343018, 2.908470392227173, 2.583312511444092, 2.531270980834961, 2.3884165287017822, 2.3514833450317383, 1.9840506315231323, 2.1368167400360107]
iteration 281
epsilon=0.24796721173717262, episode length=648, total rewards=3.7210526315813013
Training the model...
experience length=4995
number of examples=2646
best total reward =  [428.36879432

Copying file://car-racing-v2-282-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2524
best total reward =  [428.36879432623533, 412.9151291512843, 458.8235294117607]
loss = [11.211054801940918, 7.259082317352295, 4.640863418579102, 3.956571102142334, 3.2909293174743652, 2.9752097129821777, 3.115349531173706, 2.348036050796509, 3.661367177963257, 2.5000808238983154]
iteration 283
epsilon=0.2470126642236029, episode length=735, total rewards=-0.0705882352947722
Training the model...
experience length=4995
number of examples=2733
best total reward =  [428.36879432623533, 412.9151291512843, 458.8235294117607]
loss = [4.705402374267578, 3.200268507003784, 2.755755662918091, 2.8471007347106934, 2.330763578414917, 2.2371182441711426, 2.299769401550293, 1.903080940246582, 2.120073080062866, 1.8698456287384033]
iteration 284
epsilon=0.24654253758136688, episode length=999, total rewards=322.53521126759324
Training the model...
experience length=4995
number of examples=2997
best total reward =  [428.368794326235

Copying file://car-racing-v2-285-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2526
best total reward =  [428.36879432623533, 412.9151291512843, 458.8235294117607]
loss = [12.753658294677734, 8.522157669067383, 6.500118255615234, 4.770941257476807, 3.9516172409057617, 3.149029016494751, 2.290498971939087, 2.2911744117736816, 2.707869291305542, 2.256056785583496]
iteration 286
epsilon=0.24561634108349767, episode length=999, total rewards=168.37060702875857
Training the model...
experience length=4995
number of examples=2997
best total reward =  [428.36879432623533, 412.9151291512843, 458.8235294117607]
loss = [6.626152992248535, 4.640212059020996, 3.9983677864074707, 3.962040424346924, 3.4561121463775635, 2.8669803142547607, 2.4957261085510254, 2.6570916175842285, 2.6524150371551514, 2.036853790283203]
iteration 287
epsilon=0.24516017767266268, episode length=999, total rewards=264.77987421384086
Training the model...
experience length=4995
number of examples=2997
best total reward =  [428.3687943262

Copying file://car-racing-v2-288-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [428.36879432623533, 412.9151291512843, 458.8235294117607]
loss = [7.678726673126221, 5.384105205535889, 4.341264724731445, 4.185922622680664, 3.225841999053955, 2.8572115898132324, 2.7879374027252197, 2.502385139465332, 2.602966785430908, 2.3693666458129883]
iteration 289
epsilon=0.2442614901369767, episode length=999, total rewards=31.20567375886639
Training the model...
experience length=4995
number of examples=2997
best total reward =  [428.36879432623533, 412.9151291512843, 458.8235294117607]
loss = [9.458452224731445, 4.407021999359131, 3.307541608810425, 2.989654064178467, 2.7883870601654053, 2.4228522777557373, 2.203200340270996, 2.308739423751831, 2.1642160415649414, 1.94307279586792]
iteration 290
epsilon=0.24381887523560694, episode length=999, total rewards=233.33333333333783
Training the model...
experience length=4995
number of examples=2997
best total reward =  [428.36879432623533, 

Copying file://car-racing-v2-291-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


epsilon=0.24338068648325087, episode length=999, total rewards=183.08823529412186
Training the model...
experience length=4995
number of examples=3472
best total reward =  [428.36879432623533, 412.9151291512843, 458.8235294117607]
loss = [27.27790641784668, 16.51205825805664, 13.318389892578125, 10.565122604370117, 10.483759880065918, 8.403282165527344, 8.069180488586426, 6.732051849365234, 6.540959358215332, 5.852104187011719]
iteration 292
epsilon=0.24294687961841838, episode length=999, total rewards=353.376205787776
Training the model...
experience length=4995
number of examples=2997
best total reward =  [428.36879432623533, 412.9151291512843, 458.8235294117607]
loss = [7.991931438446045, 6.106972694396973, 4.957935810089111, 4.746392726898193, 6.368388652801514, 3.511932134628296, 3.609511137008667, 3.443052053451538, 3.2762656211853027, 2.896541118621826]
iteration 293
epsilon=0.2425174108222342, episode length=999, total rewards=132.63888888889363
Training the model...
experienc

Copying file://car-racing-v2-294-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


epsilon=0.24209223671401184, episode length=999, total rewards=109.72644376900129
Training the model...
experience length=4995
number of examples=3293
best total reward =  [428.36879432623533, 412.9151291512843, 458.8235294117607]
loss = [18.322317123413086, 12.982230186462402, 18.423311233520508, 8.948190689086914, 9.18932819366455, 7.785010814666748, 4.481208324432373, 3.0956766605377197, 3.5694587230682373, 3.4453768730163574]
iteration 295
epsilon=0.24167131434687172, episode length=999, total rewards=65.01650165016703
Training the model...
experience length=4995
number of examples=2997
best total reward =  [428.36879432623533, 412.9151291512843, 458.8235294117607]
loss = [6.129881858825684, 4.596248149871826, 3.9091386795043945, 3.746910572052002, 4.1135172843933105, 3.517319679260254, 2.7536215782165527, 3.164857864379883, 2.6404001712799072, 2.4506351947784424]
iteration 296
epsilon=0.241254601203403, episode length=999, total rewards=105.88235294118047
Training the model...
exp

Copying file://car-racing-v2-297-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2512
best total reward =  [428.36879432623533, 412.9151291512843, 458.8235294117607]
loss = [22.058124542236328, 16.946273803710938, 11.809793472290039, 9.268977165222168, 2.9299447536468506, 3.1564013957977295, 3.9188849925994873, 2.7699031829833984, 2.6067824363708496, 3.2709202766418457]
iteration 298
epsilon=0.24043363463945527, episode length=999, total rewards=320.13888888888545
Training the model...
experience length=4995
number of examples=2997
best total reward =  [428.36879432623533, 412.9151291512843, 458.8235294117607]
loss = [9.350180625915527, 5.689618110656738, 4.927953243255615, 3.9878029823303223, 3.873692035675049, 3.648925542831421, 3.9823622703552246, 3.008687734603882, 2.992506265640259, 3.2290596961975098]
iteration 299
epsilon=0.24002929829306072, episode length=999, total rewards=243.51145038167792
Training the model...
experience length=4995
number of examples=2997
best total reward =  [428.3687943

Copying file://car-racing-v2-300-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [428.36879432623533, 412.9151291512843, 458.8235294117607]
loss = [10.644331932067871, 7.195060729980469, 5.395909309387207, 4.149945259094238, 4.180046558380127, 3.491948366165161, 3.331132650375366, 3.1249380111694336, 3.1018619537353516, 3.087989091873169]
iteration 301
epsilon=0.2392327152570288, episode length=999, total rewards=272.8813559322068
Training the model...
experience length=4995
number of examples=2997
best total reward =  [428.36879432623533, 412.9151291512843, 458.8235294117607]
loss = [9.500093460083008, 5.0315470695495605, 4.1317548751831055, 4.0200395584106445, 3.2410120964050293, 3.1918444633483887, 3.2033207416534424, 2.9681103229522705, 2.576927900314331, 2.7247862815856934]
iteration 302
epsilon=0.2388403881044585, episode length=999, total rewards=150.83612040134196
Training the model...
experience length=4995
number of examples=2997
best total reward =  [428.36879432623

Copying file://car-racing-v2-303-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [428.36879432623533, 412.9151291512843, 458.8235294117607]
loss = [6.585061073303223, 4.679040908813477, 4.3074116706848145, 3.6949217319488525, 3.1150877475738525, 2.968414068222046, 2.8903145790100098, 2.393906354904175, 2.562837600708008, 2.6011435985565186]
iteration 304
epsilon=0.2380674643811798, episode length=999, total rewards=474.46808510637254
Training the model...
experience length=4995
number of examples=2997
best total reward =  [412.9151291512843, 458.8235294117607, 474.46808510637254]
loss = [9.01379680633545, 6.201435089111328, 5.280611038208008, 4.006303310394287, 3.834820508956909, 3.577892303466797, 3.072704792022705, 3.235445737838745, 2.901905059814453, 2.8553497791290283]
iteration 305
epsilon=0.23768678973736798, episode length=999, total rewards=410.4166666666609
Training the model...
experience length=4995
number of examples=2997
best total reward =  [412.9151291512843, 4

Copying file://car-racing-v2-306-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2838
best total reward =  [412.9151291512843, 458.8235294117607, 474.46808510637254]
loss = [11.75999641418457, 8.360794067382812, 5.19780158996582, 5.273709774017334, 4.31441593170166, 3.454261541366577, 3.3486738204956055, 2.765078067779541, 3.1711385250091553, 3.0429611206054688]
iteration 307
epsilon=0.23693682262159435, episode length=999, total rewards=728.6852589641359
Training the model...
experience length=4995
number of examples=2997
best total reward =  [458.8235294117607, 474.46808510637254, 728.6852589641359]
loss = [9.27065658569336, 6.104935646057129, 5.322549343109131, 4.529474258422852, 3.6859750747680664, 3.9828014373779297, 3.186289072036743, 3.5314319133758545, 3.125084161758423, 2.9918551445007324]
iteration 308
epsilon=0.2365674543953784, episode length=999, total rewards=232.22591362126775
Training the model...
experience length=4995
number of examples=2997
best total reward =  [458.8235294117607, 47

Copying file://car-racing-v2-309-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [458.8235294117607, 474.46808510637254, 728.6852589641359]
loss = [8.927865028381348, 6.108809947967529, 5.042445659637451, 5.163394451141357, 3.583676815032959, 3.4283111095428467, 3.383890390396118, 3.4753928184509277, 2.785522937774658, 2.9738266468048096]
iteration 310
epsilon=0.23583976205291038, episode length=999, total rewards=492.3344947735103
Training the model...
experience length=4995
number of examples=2997
best total reward =  [474.46808510637254, 728.6852589641359, 492.3344947735103]
loss = [8.092629432678223, 6.53542947769165, 4.520315170288086, 4.30068302154541, 3.9302260875701904, 3.7122650146484375, 3.4528932571411133, 3.1767427921295166, 3.2113728523254395, 3.000692367553711]
iteration 311
epsilon=0.23548136443238127, episode length=999, total rewards=362.406015037587
Training the model...
experience length=4995
number of examples=2997
best total reward =  [474.46808510637254, 

Copying file://car-racing-v2-312-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2539
best total reward =  [474.46808510637254, 728.6852589641359, 492.3344947735103]
loss = [13.345776557922363, 8.937498092651367, 5.970963478088379, 6.867417335510254, 3.9873993396759033, 3.6111762523651123, 3.479914426803589, 3.271320104598999, 3.4189488887786865, 2.7280378341674805]
iteration 313
epsilon=0.23477528528017688, episode length=999, total rewards=232.203389830513
Training the model...
experience length=4995
number of examples=2997
best total reward =  [474.46808510637254, 728.6852589641359, 492.3344947735103]
loss = [11.379858016967773, 6.232601165771484, 5.142349720001221, 4.839292049407959, 4.31943941116333, 3.8218977451324463, 3.369955539703369, 3.2241764068603516, 3.128981113433838, 3.107919454574585]
iteration 314
epsilon=0.23442753242737513, episode length=999, total rewards=326.6211604095572
Training the model...
experience length=4995
number of examples=2997
best total reward =  [474.46808510637254,

Copying file://car-racing-v2-315-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [474.46808510637254, 728.6852589641359, 492.3344947735103]
loss = [9.631107330322266, 8.124750137329102, 5.639931678771973, 5.431299686431885, 4.499927043914795, 4.346014976501465, 3.678203821182251, 3.419725179672241, 3.1139864921569824, 3.0568153858184814]
iteration 316
epsilon=0.23374242453207036, episode length=999, total rewards=278.88198757764224
Training the model...
experience length=4995
number of examples=2997
best total reward =  [474.46808510637254, 728.6852589641359, 492.3344947735103]
loss = [8.0327787399292, 5.594496250152588, 4.883405685424805, 3.568403720855713, 5.7987518310546875, 3.012457847595215, 3.1534183025360107, 2.9991068840026855, 2.759688138961792, 3.510512113571167]
iteration 317
epsilon=0.23340500028674965, episode length=999, total rewards=242.6966292134882
Training the model...
experience length=4995
number of examples=2997
best total reward =  [474.46808510637254, 7

Copying file://car-racing-v2-318-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [474.46808510637254, 728.6852589641359, 492.3344947735103]
loss = [8.729926109313965, 9.409906387329102, 4.65031623840332, 3.927030324935913, 3.8835625648498535, 3.0166049003601074, 3.6003665924072266, 2.8067846298217773, 2.773498058319092, 2.731502056121826]
iteration 319
epsilon=0.23274024078104333, episode length=999, total rewards=446.5838509316694
Training the model...
experience length=4995
number of examples=2997
best total reward =  [474.46808510637254, 728.6852589641359, 492.3344947735103]
loss = [8.636553764343262, 6.15131139755249, 5.567974090576172, 4.648523330688477, 4.058825492858887, 3.7071962356567383, 3.645434856414795, 3.2343955039978027, 3.7767996788024902, 2.8788771629333496]
iteration 320
epsilon=0.2324128383732329, episode length=999, total rewards=342.8571428571351
Training the model...
experience length=4995
number of examples=2997
best total reward =  [474.46808510637254, 

Copying file://car-racing-v2-321-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [474.46808510637254, 728.6852589641359, 492.3344947735103]
loss = [13.309990882873535, 8.585955619812012, 7.104246616363525, 6.655025959014893, 5.573605537414551, 5.304263114929199, 4.123225212097168, 3.944200038909912, 4.419590950012207, 3.5748212337493896]
iteration 322
epsilon=0.23176782288960557, episode length=999, total rewards=54.09836065574008
Training the model...
experience length=4995
number of examples=2997
best total reward =  [474.46808510637254, 728.6852589641359, 492.3344947735103]
loss = [13.40256118774414, 8.589253425598145, 7.141876697540283, 6.36475944519043, 4.765539169311523, 4.88848352432251, 3.8589322566986084, 3.9279017448425293, 3.6468963623046875, 3.729468822479248]
iteration 323
epsilon=0.2314501446607095, episode length=999, total rewards=709.5238095238021
Training the model...
experience length=4995
number of examples=2997
best total reward =  [728.6852589641359, 492.

Copying file://car-racing-v2-324-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [728.6852589641359, 492.3344947735103, 709.5238095238021]
loss = [10.946407318115234, 7.9578399658203125, 6.679181098937988, 5.139514446258545, 5.052485466003418, 4.477835655212402, 4.090760231018066, 4.0172834396362305, 3.32230806350708, 7.078414440155029]
iteration 325
epsilon=0.23082428678196137, episode length=999, total rewards=367.3202614379042
Training the model...
experience length=4995
number of examples=2997
best total reward =  [728.6852589641359, 492.3344947735103, 709.5238095238021]
loss = [9.413361549377441, 6.930604934692383, 6.4065632820129395, 5.003548622131348, 4.570117473602295, 4.283291339874268, 3.7480459213256836, 3.858285903930664, 3.5700876712799072, 3.4278523921966553]
iteration 326
epsilon=0.23051604391414177, episode length=999, total rewards=359.45945945945317
Training the model...
experience length=4995
number of examples=2997
best total reward =  [728.6852589641359, 4

Copying file://car-racing-v2-327-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [728.6852589641359, 492.3344947735103, 709.5238095238021]
loss = [10.282182693481445, 7.96444034576416, 6.502971649169922, 8.910576820373535, 4.905785083770752, 5.055173873901367, 4.314322471618652, 5.336523532867432, 3.338557720184326, 3.8398332595825195]
iteration 328
epsilon=0.22990877464025034, episode length=999, total rewards=361.8320610687049
Training the model...
experience length=4995
number of examples=2997
best total reward =  [728.6852589641359, 492.3344947735103, 709.5238095238021]
loss = [10.25543212890625, 7.947099208831787, 6.016724586486816, 5.504332542419434, 4.980016231536865, 4.1595964431762695, 4.361783504486084, 3.5393495559692383, 4.233853816986084, 3.642895460128784]
iteration 329
epsilon=0.22960968689384784, episode length=999, total rewards=179.86348122867352
Training the model...
experience length=4995
number of examples=2997
best total reward =  [728.6852589641359, 492.

Copying file://car-racing-v2-330-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [728.6852589641359, 492.3344947735103, 709.5238095238021]
loss = [11.225208282470703, 8.450529098510742, 6.753507614135742, 6.297032833099365, 7.154341697692871, 4.670450210571289, 5.363761901855469, 3.906240463256836, 3.80256986618042, 3.987638473510742]
iteration 331
epsilon=0.22902045412466027, episode length=999, total rewards=408.305647840526
Training the model...
experience length=4995
number of examples=2997
best total reward =  [728.6852589641359, 492.3344947735103, 709.5238095238021]
loss = [10.360372543334961, 7.146200180053711, 6.3608808517456055, 5.371167182922363, 5.02900505065918, 5.2318291664123535, 4.036722183227539, 4.428660869598389, 4.704485893249512, 3.462283134460449]
iteration 332
epsilon=0.22873024958341368, episode length=999, total rewards=375.6944444444407
Training the model...
experience length=4995
number of examples=2997
best total reward =  [728.6852589641359, 492.334

Copying file://car-racing-v2-333-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [728.6852589641359, 492.3344947735103, 709.5238095238021]
loss = [44.53326416015625, 8.453150749206543, 7.422415256500244, 6.904504299163818, 6.023172855377197, 21.872270584106445, 4.039896488189697, 5.306557655334473, 4.8024773597717285, 3.755009174346924]
iteration 334
epsilon=0.22815851761670375, episode length=999, total rewards=360.26490066224636
Training the model...
experience length=4995
number of examples=2997
best total reward =  [728.6852589641359, 492.3344947735103, 709.5238095238021]
loss = [8.559979438781738, 6.891619682312012, 5.3783278465271, 4.685591697692871, 5.084589004516602, 4.23214054107666, 3.737940788269043, 4.4811692237854, 3.8403193950653076, 4.851566314697266]
iteration 335
epsilon=0.22787693244053672, episode length=999, total rewards=520.8178438661604
Training the model...
experience length=4995
number of examples=2997
best total reward =  [492.3344947735103, 709.52380

Copying file://car-racing-v2-336-model05.hd5 [Content-Type=application/octet-stream]...
\ [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [492.3344947735103, 709.5238095238021, 520.8178438661604]
loss = [13.218494415283203, 8.838120460510254, 7.564863204956055, 6.406482696533203, 6.491943836212158, 5.468796730041504, 4.762940883636475, 4.920272350311279, 4.1665215492248535, 3.8511173725128174]
iteration 337
epsilon=0.22732218148497002, episode length=999, total rewards=214.0243902439077
Training the model...
experience length=4995
number of examples=2997
best total reward =  [492.3344947735103, 709.5238095238021, 520.8178438661604]
loss = [16.972763061523438, 10.432402610778809, 6.742830276489258, 6.045625686645508, 5.857810974121094, 5.688381195068359, 4.528194427490234, 4.117099285125732, 4.479449272155762, 5.721538066864014]
iteration 338
epsilon=0.22704895967012031, episode length=999, total rewards=454.38596491227094
Training the model...
experience length=4995
number of examples=2997
best total reward =  [492.3344947735103, 70

Copying file://car-racing-v2-339-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [492.3344947735103, 709.5238095238021, 520.8178438661604]
loss = [16.231739044189453, 10.735726356506348, 9.372146606445312, 8.073925971984863, 7.223562717437744, 6.421531677246094, 6.162381649017334, 5.287189483642578, 6.2465410232543945, 4.9101104736328125]
iteration 340
epsilon=0.22651068537268493, episode length=999, total rewards=281.11888111888277
Training the model...
experience length=4995
number of examples=2997
best total reward =  [492.3344947735103, 709.5238095238021, 520.8178438661604]
loss = [9.255475997924805, 7.997110843658447, 5.598695278167725, 5.695643424987793, 4.795641899108887, 7.095136642456055, 4.231806755065918, 4.690280914306641, 5.0013580322265625, 4.080370903015137]
iteration 341
epsilon=0.22624557851895807, episode length=999, total rewards=247.5177304964503
Training the model...
experience length=4995
number of examples=2997
best total reward =  [492.3344947735103, 70

Copying file://car-racing-v2-342-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [492.3344947735103, 709.5238095238021, 520.8178438661604]
loss = [14.19568157196045, 7.807529449462891, 7.7831315994262695, 6.385868072509766, 5.968879699707031, 4.8817644119262695, 5.64765739440918, 4.097393035888672, 5.769506931304932, 4.026528358459473]
iteration 343
epsilon=0.2257232915064308, episode length=999, total rewards=138.5620915032728
Training the model...
experience length=4995
number of examples=2997
best total reward =  [492.3344947735103, 709.5238095238021, 520.8178438661604]
loss = [21.454801559448242, 8.347345352172852, 6.992221832275391, 5.904675006866455, 5.38169527053833, 5.158895015716553, 4.880960464477539, 4.537415504455566, 4.1761651039123535, 3.685988426208496]
iteration 344
epsilon=0.2254660585913665, episode length=999, total rewards=766.3793103448132
Training the model...
experience length=4995
number of examples=2997
best total reward =  [709.5238095238021, 520.8178

Copying file://car-racing-v2-345-model05.hd5 [Content-Type=application/octet-stream]...
\ [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [709.5238095238021, 520.8178438661604, 766.3793103448132]
loss = [16.11965560913086, 11.316847801208496, 8.970074653625488, 8.245357513427734, 6.698031425476074, 6.541296005249023, 5.767343044281006, 5.545032024383545, 5.314937114715576, 4.846542835235596]
iteration 346
epsilon=0.2249592840253983, episode length=999, total rewards=332.6923076923084
Training the model...
experience length=4995
number of examples=2997
best total reward =  [709.5238095238021, 520.8178438661604, 766.3793103448132]
loss = [12.789434432983398, 10.037226676940918, 8.270234107971191, 6.87761116027832, 6.599942207336426, 5.6252031326293945, 5.447629451751709, 5.7784013748168945, 4.410337924957275, 4.484092712402344]
iteration 347
epsilon=0.22470969118514433, episode length=999, total rewards=366.9260700388964
Training the model...
experience length=4995
number of examples=2997
best total reward =  [709.5238095238021, 520.8

Copying file://car-racing-v2-348-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [709.5238095238021, 520.8178438661604, 766.3793103448132]
loss = [11.937126159667969, 9.705368995666504, 7.281642436981201, 6.238990306854248, 5.721302509307861, 5.463473320007324, 5.546113967895508, 4.461874485015869, 5.656260967254639, 4.30785608291626]
iteration 349
epsilon=0.22421796833055999, episode length=999, total rewards=443.20987654320453
Training the model...
experience length=4995
number of examples=2997
best total reward =  [709.5238095238021, 520.8178438661604, 766.3793103448132]
loss = [13.233933448791504, 10.108823776245117, 8.04571533203125, 7.2861104011535645, 6.077099323272705, 6.406868934631348, 17.687238693237305, 3.846108913421631, 5.2670488357543945, 10.620104789733887]
iteration 350
epsilon=0.22397578864725437, episode length=999, total rewards=360.7142857142735
Training the model...
experience length=4995
number of examples=2997
best total reward =  [709.5238095238021, 52

Copying file://car-racing-v2-351-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [709.5238095238021, 520.8178438661604, 766.3793103448132]
loss = [14.996051788330078, 11.393157005310059, 8.990471839904785, 8.967026710510254, 6.3777241706848145, 6.092921257019043, 5.8864521980285645, 6.152037620544434, 5.374660015106201, 4.8912739753723145]
iteration 352
epsilon=0.223498670453174, episode length=999, total rewards=133.44947735191852
Training the model...
experience length=4995
number of examples=2997
best total reward =  [709.5238095238021, 520.8178438661604, 766.3793103448132]
loss = [11.051645278930664, 7.739686489105225, 6.849343299865723, 6.148468971252441, 5.431714057922363, 4.847590446472168, 6.012487888336182, 4.210787296295166, 4.5246758460998535, 4.911030292510986]
iteration 353
epsilon=0.22326368374864228, episode length=410, total rewards=-6.571641791043675
epsilon=0.22326368374864228, episode length=473, total rewards=10.854711246201916
Training the model...
experie

Copying file://car-racing-v2-354-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [709.5238095238021, 520.8178438661604, 766.3793103448132]
loss = [27.73299789428711, 20.049436569213867, 16.2194766998291, 12.360584259033203, 9.966545104980469, 7.875850677490234, 6.783883571624756, 8.102566719055176, 5.603507041931152, 5.975597381591797]
iteration 355
epsilon=0.2228007364420443, episode length=999, total rewards=276.3440860215062
Training the model...
experience length=4995
number of examples=2997
best total reward =  [709.5238095238021, 520.8178438661604, 766.3793103448132]
loss = [23.750185012817383, 15.495226860046387, 12.292978286743164, 9.482587814331055, 7.977319240570068, 7.854647159576416, 6.353163242340088, 6.740192890167236, 6.518833160400391, 5.7843475341796875]
iteration 356
epsilon=0.22257272907762385, episode length=999, total rewards=395.35603715169873
Training the model...
experience length=4995
number of examples=2997
best total reward =  [709.5238095238021, 520

Copying file://car-racing-v2-357-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [709.5238095238021, 520.8178438661604, 766.3793103448132]
loss = [20.676607131958008, 11.656535148620605, 9.403907775878906, 9.048980712890625, 6.92902946472168, 7.1409912109375, 6.729747295379639, 5.752525806427002, 7.022794246673584, 7.0490641593933105]
iteration 358
epsilon=0.22212353176897912, episode length=999, total rewards=475.7575757575627
Training the model...
experience length=4995
number of examples=2997
best total reward =  [709.5238095238021, 520.8178438661604, 766.3793103448132]
loss = [13.059144973754883, 10.257317543029785, 11.557830810546875, 7.356577396392822, 7.030060768127441, 6.365079402923584, 5.655329704284668, 5.473479270935059, 10.357076644897461, 4.824936389923096]
iteration 359
epsilon=0.22190229645128934, episode length=999, total rewards=363.1901840490749
Training the model...
experience length=4995
number of examples=2997
best total reward =  [709.5238095238021, 520.

Copying file://car-racing-v2-360-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [709.5238095238021, 520.8178438661604, 766.3793103448132]
loss = [14.237435340881348, 10.330628395080566, 9.107108116149902, 8.245193481445312, 7.381278991699219, 7.011811256408691, 6.189183235168457, 5.885349750518799, 5.982849597930908, 4.358201026916504]
iteration 361
epsilon=0.2214664407519087, episode length=999, total rewards=355.14950166112794
Training the model...
experience length=4995
number of examples=2997
best total reward =  [709.5238095238021, 520.8178438661604, 766.3793103448132]
loss = [10.460747718811035, 9.760322570800781, 6.351073741912842, 6.145778656005859, 5.399648189544678, 6.371671676635742, 4.30795955657959, 5.065861225128174, 4.684673309326172, 4.3139190673828125]
iteration 362
epsilon=0.2212517763443896, episode length=517, total rewards=-0.02006472491947983
Training the model...
experience length=4995
number of examples=2515
best total reward =  [709.5238095238021, 520

Copying file://car-racing-v2-363-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [709.5238095238021, 520.8178438661604, 766.3793103448132]
loss = [71.84764862060547, 9.7205171585083, 8.374101638793945, 7.884340763092041, 7.188078880310059, 5.869349002838135, 6.0846476554870605, 6.669846057891846, 5.436264514923096, 5.7507219314575195]
iteration 364
epsilon=0.22082886599513626, episode length=999, total rewards=605.4263565891384
Training the model...
experience length=4995
number of examples=2997
best total reward =  [520.8178438661604, 766.3793103448132, 605.4263565891384]
loss = [11.15756607055664, 8.397217750549316, 6.858098983764648, 7.503746509552002, 5.011287212371826, 5.033812522888184, 5.231035232543945, 4.307102680206299, 4.415106773376465, 4.682556629180908]
iteration 365
epsilon=0.2206205773351849, episode length=999, total rewards=514.3344709897516
Training the model...
experience length=4995
number of examples=2997
best total reward =  [520.8178438661604, 766.37931

Copying file://car-racing-v2-366-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [520.8178438661604, 766.3793103448132, 605.4263565891384]
loss = [18.2371883392334, 12.963582038879395, 11.011487007141113, 9.174983978271484, 10.381805419921875, 7.3108649253845215, 5.738766670227051, 5.692319393157959, 5.964347839355469, 5.365161895751953]
iteration 367
epsilon=0.22021022784621472, episode length=999, total rewards=352.76872964168655
Training the model...
experience length=4995
number of examples=2997
best total reward =  [520.8178438661604, 766.3793103448132, 605.4263565891384]
loss = [14.25131893157959, 10.253153800964355, 8.165349006652832, 7.00703239440918, 7.157048225402832, 5.472581386566162, 6.0640130043029785, 5.814847946166992, 5.284112930297852, 5.402139663696289]
iteration 368
epsilon=0.22000812556775257, episode length=999, total rewards=33.550488599347474
Training the model...
experience length=4995
number of examples=2997
best total reward =  [520.8178438661604, 76

Copying file://car-racing-v2-369-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [520.8178438661604, 766.3793103448132, 605.4263565891384]
loss = [14.285507202148438, 10.243886947631836, 11.734694480895996, 6.876484394073486, 7.356693744659424, 6.442778587341309, 5.575343608856201, 5.28700065612793, 5.197825908660889, 5.140280246734619]
iteration 370
epsilon=0.2196099638689543, episode length=999, total rewards=223.62459546926033
Training the model...
experience length=4995
number of examples=2997
best total reward =  [520.8178438661604, 766.3793103448132, 605.4263565891384]
loss = [12.739081382751465, 9.271669387817383, 7.002523422241211, 6.268700122833252, 5.949261665344238, 5.439829349517822, 4.930388450622559, 5.505465984344482, 4.626467704772949, 4.385186195373535]
iteration 371
epsilon=0.21941386423026474, episode length=999, total rewards=387.8048780487758
Training the model...
experience length=4995
number of examples=2997
best total reward =  [520.8178438661604, 766.3

Copying file://car-racing-v2-372-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


epsilon=0.2192197255879621, episode length=999, total rewards=330.97643097642384
Training the model...
experience length=4995
number of examples=3278
best total reward =  [520.8178438661604, 766.3793103448132, 605.4263565891384]
loss = [23.078929901123047, 19.147966384887695, 16.50225257873535, 13.910981178283691, 11.93467903137207, 10.818814277648926, 10.004316329956055, 7.698182582855225, 6.935023784637451, 6.561105251312256]
iteration 373
epsilon=0.21902752833208247, episode length=792, total rewards=-33.11459854014231
Training the model...
experience length=4995
number of examples=2790
best total reward =  [520.8178438661604, 766.3793103448132, 605.4263565891384]
loss = [41.77531051635742, 27.73844337463379, 22.0599308013916, 15.08813190460205, 14.671479225158691, 10.813404083251953, 8.956212043762207, 8.53342056274414, 21.093273162841797, 9.008414268493652]
iteration 374
epsilon=0.21883725304876164, episode length=999, total rewards=543.0976430976342
Training the model...
experien

Copying file://car-racing-v2-375-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


epsilon=0.21864888051827402, episode length=999, total rewards=49.305555555559074
Training the model...
experience length=4995
number of examples=3272
best total reward =  [766.3793103448132, 605.4263565891384, 543.0976430976342]
loss = [32.66337585449219, 22.68218421936035, 16.261524200439453, 11.4127197265625, 10.524504661560059, 9.678215026855469, 8.22301197052002, 10.388609886169434, 6.909420967102051, 7.501959800720215]
iteration 376
epsilon=0.2184623917130913, episode length=999, total rewards=470.49180327868305
Training the model...
experience length=4995
number of examples=2997
best total reward =  [766.3793103448132, 605.4263565891384, 543.0976430976342]
loss = [16.8717098236084, 10.720057487487793, 8.521231651306152, 7.571847915649414, 10.63170337677002, 7.041130542755127, 5.759970188140869, 5.2296462059021, 5.699225425720215, 5.477023124694824]
iteration 377
epsilon=0.21827776779596036, episode length=999, total rewards=349.3243243243229
Training the model...
experience leng

Copying file://car-racing-v2-378-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2783
best total reward =  [766.3793103448132, 605.4263565891384, 543.0976430976342]
loss = [22.850725173950195, 15.343371391296387, 14.028284072875977, 11.88345718383789, 8.473973274230957, 8.342544555664062, 5.717928409576416, 5.787513256072998, 5.7258501052856445, 5.299815654754639]
iteration 379
epsilon=0.21791404021682076, episode length=999, total rewards=359.18367346938055
Training the model...
experience length=4995
number of examples=2997
best total reward =  [766.3793103448132, 605.4263565891384, 543.0976430976342]
loss = [11.140931129455566, 9.644671440124512, 6.8206467628479, 6.552046775817871, 5.214531898498535, 5.300912380218506, 5.409256935119629, 4.884852409362793, 6.478518009185791, 4.201320171356201]
iteration 380
epsilon=0.21773489981465255, episode length=999, total rewards=186.62420382166005
Training the model...
experience length=4995
number of examples=2997
best total reward =  [766.3793103448132, 605

Copying file://car-racing-v2-381-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [766.3793103448132, 605.4263565891384, 543.0976430976342]
loss = [17.863855361938477, 12.968653678894043, 11.09889030456543, 19.518268585205078, 7.776895523071289, 7.779870986938477, 7.648569107055664, 6.348871231079102, 7.863302707672119, 5.46548318862915]
iteration 382
epsilon=0.21738197530834097, episode length=999, total rewards=437.2670807453349
Training the model...
experience length=4995
number of examples=2997
best total reward =  [766.3793103448132, 605.4263565891384, 543.0976430976342]
loss = [11.399106979370117, 8.806474685668945, 7.322982311248779, 6.583927631378174, 5.972981929779053, 5.832222938537598, 5.778472423553467, 5.105179309844971, 4.9254069328308105, 5.057513236999512]
iteration 383
epsilon=0.21720815555525755, episode length=999, total rewards=396.40287769783646
Training the model...
experience length=4995
number of examples=2997
best total reward =  [766.3793103448132, 605

Copying file://car-racing-v2-384-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [766.3793103448132, 605.4263565891384, 543.0976430976342]
loss = [15.062651634216309, 11.816790580749512, 9.585911750793457, 8.783480644226074, 11.5836763381958, 5.561357021331787, 6.913235664367676, 6.465515613555908, 5.892213821411133, 6.077451229095459]
iteration 385
epsilon=0.21686571325970794, episode length=999, total rewards=544.4444444444337
Training the model...
experience length=4995
number of examples=2997
best total reward =  [605.4263565891384, 543.0976430976342, 544.4444444444337]
loss = [14.047286033630371, 10.09988784790039, 8.941230773925781, 7.627806186676025, 6.382144451141357, 6.376842975616455, 6.731542110443115, 4.949481010437012, 5.50549840927124, 5.236914157867432]
iteration 386
epsilon=0.21669705612711085, episode length=999, total rewards=347.2049689440956
Training the model...
experience length=4995
number of examples=2997
best total reward =  [605.4263565891384, 543.097

Copying file://car-racing-v2-387-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [605.4263565891384, 543.0976430976342, 544.4444444444337]
loss = [14.663032531738281, 10.296954154968262, 8.971519470214844, 7.417478561401367, 7.386307716369629, 6.335437774658203, 5.702396392822266, 5.9645915031433105, 5.104735851287842, 5.059688091278076]
iteration 388
epsilon=0.21636478471018133, episode length=999, total rewards=50.32679738562452
Training the model...
experience length=4995
number of examples=2997
best total reward =  [605.4263565891384, 543.0976430976342, 544.4444444444337]
loss = [17.537900924682617, 11.951526641845703, 10.044398307800293, 8.221356391906738, 8.185586929321289, 7.104747295379639, 6.336333751678467, 6.636773586273193, 5.914021015167236, 5.630584239959717]
iteration 389
epsilon=0.2162011368630795, episode length=999, total rewards=153.90625000000486
Training the model...
experience length=4995
number of examples=2997
best total reward =  [605.4263565891384, 54

Copying file://car-racing-v2-390-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [605.4263565891384, 543.0976430976342, 544.4444444444337]
loss = [14.929898262023926, 11.30475902557373, 12.439764976501465, 8.065042495727539, 7.3000593185424805, 6.949606418609619, 6.336779594421387, 6.0211968421936035, 5.714669227600098, 5.097921371459961]
iteration 391
epsilon=0.2158787342395042, episode length=999, total rewards=431.2499999999938
Training the model...
experience length=4995
number of examples=2997
best total reward =  [605.4263565891384, 543.0976430976342, 544.4444444444337]
loss = [12.307833671569824, 9.047064781188965, 8.28411865234375, 6.589945316314697, 7.2849040031433105, 5.602209568023682, 5.538482666015625, 4.9040703773498535, 5.0987372398376465, 7.048969745635986]
iteration 392
epsilon=0.21571994689710916, episode length=999, total rewards=678.625954198463
Training the model...
experience length=4995
number of examples=2997
best total reward =  [543.0976430976342, 544

Copying file://car-racing-v2-393-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [543.0976430976342, 544.4444444444337, 678.625954198463]
loss = [15.359478950500488, 10.91772747039795, 9.766350746154785, 9.21274185180664, 6.554996490478516, 10.598719596862793, 5.443422794342041, 6.207245826721191, 6.0881147384643555, 5.622819900512695]
iteration 394
epsilon=0.2154071199538567, episode length=999, total rewards=211.04651162791097
Training the model...
experience length=4995
number of examples=2997
best total reward =  [543.0976430976342, 544.4444444444337, 678.625954198463]
loss = [15.01313591003418, 10.591353416442871, 9.709329605102539, 8.29659366607666, 7.021782875061035, 6.123718738555908, 6.102072715759277, 6.497471332550049, 5.56614351272583, 5.490043640136719]
iteration 395
epsilon=0.21525304875431814, episode length=999, total rewards=599.6699669966937
Training the model...
experience length=4995
number of examples=2997
best total reward =  [544.4444444444337, 678.62595

Copying file://car-racing-v2-396-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [544.4444444444337, 678.625954198463, 599.6699669966937]
loss = [15.350144386291504, 11.667219161987305, 12.211627960205078, 9.244147300720215, 8.463940620422363, 7.531235218048096, 7.1059184074401855, 7.92120885848999, 7.2007737159729, 6.0837812423706055]
iteration 397
epsilon=0.2149495130841072, episode length=999, total rewards=283.38658146965156
Training the model...
experience length=4995
number of examples=2997
best total reward =  [544.4444444444337, 678.625954198463, 599.6699669966937]
loss = [12.755990028381348, 9.47408390045166, 7.235924243927002, 6.784380912780762, 8.783143997192383, 4.929823398590088, 5.717185974121094, 4.917014122009277, 4.904209136962891, 4.867231845855713]
iteration 398
epsilon=0.21480001795326614, episode length=999, total rewards=269.86301369862423
Training the model...
experience length=4995
number of examples=2997
best total reward =  [544.4444444444337, 678.625

Copying file://car-racing-v2-399-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [544.4444444444337, 678.625954198463, 599.6699669966937]
loss = [19.55653953552246, 14.432698249816895, 12.038254737854004, 10.883399963378906, 9.988675117492676, 10.079391479492188, 6.609670162200928, 7.278717517852783, 7.301122188568115, 6.461310863494873]
iteration 400
epsilon=0.21450549759599616, episode length=999, total rewards=242.50764525993358
Training the model...
experience length=4995
number of examples=2997
best total reward =  [544.4444444444337, 678.625954198463, 599.6699669966937]
loss = [16.83600616455078, 13.013343811035156, 10.073787689208984, 8.461195945739746, 7.477972030639648, 8.045672416687012, 6.794766902923584, 6.16170597076416, 5.957669258117676, 6.365283489227295]
iteration 401
epsilon=0.2143604426200362, episode length=999, total rewards=15.384615384615875
Training the model...
experience length=4995
number of examples=2997
best total reward =  [544.4444444444337, 678.

Copying file://car-racing-v2-402-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [544.4444444444337, 678.625954198463, 599.6699669966937]
loss = [17.18490219116211, 14.760102272033691, 12.051164627075195, 10.675833702087402, 8.033839225769043, 8.167738914489746, 7.343997001647949, 6.0083794593811035, 6.3769211769104, 6.440400123596191]
iteration 403
epsilon=0.21407466981189746, episode length=999, total rewards=430.9090909090823
Training the model...
experience length=4995
number of examples=2997
best total reward =  [544.4444444444337, 678.625954198463, 599.6699669966937]
loss = [14.189242362976074, 10.457730293273926, 8.526487350463867, 7.5587286949157715, 6.530575275421143, 10.402615547180176, 6.321136474609375, 5.770596027374268, 5.892522811889648, 5.012724876403809]
iteration 404
epsilon=0.21393392311377848, episode length=621, total rewards=15.12177121771441
Training the model...
experience length=4995
number of examples=2619
best total reward =  [544.4444444444337, 678.

Copying file://car-racing-v2-405-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [544.4444444444337, 678.625954198463, 599.6699669966937]
loss = [16.98265266418457, 11.99487018585205, 11.066967010498047, 9.369166374206543, 7.674950122833252, 7.6703572273254395, 7.3079681396484375, 6.375682353973389, 6.774559020996094, 5.734001159667969]
iteration 406
epsilon=0.2136566380438143, episode length=999, total rewards=228.41328413284538
Training the model...
experience length=4995
number of examples=2997
best total reward =  [544.4444444444337, 678.625954198463, 599.6699669966937]
loss = [13.82413387298584, 52.4474983215332, 17.718002319335938, 8.388681411743164, 7.529083251953125, 6.6895952224731445, 6.8677592277526855, 6.3629631996154785, 5.42153263092041, 5.545581817626953]
iteration 407
epsilon=0.21352007166337617, episode length=999, total rewards=562.9629629629509
Training the model...
experience length=4995
number of examples=2997
best total reward =  [678.625954198463, 599.66

Copying file://car-racing-v2-408-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


epsilon=0.2133848709467424, episode length=774, total rewards=-0.05892255892325804
Training the model...
experience length=4995
number of examples=3036
best total reward =  [678.625954198463, 599.6699669966937, 562.9629629629509]
loss = [26.970489501953125, 18.181617736816406, 14.682485580444336, 9.36128044128418, 7.736989498138428, 7.805840492248535, 7.015453815460205, 6.584253787994385, 6.3044657707214355, 5.76011323928833]
iteration 409
epsilon=0.213251022237275, episode length=443, total rewards=9.133333333334335
epsilon=0.213251022237275, episode length=999, total rewards=420.1342281879143
Training the model...
experience length=4995
number of examples=3440
best total reward =  [678.625954198463, 599.6699669966937, 562.9629629629509]
loss = [31.041772842407227, 21.92871856689453, 20.732301712036133, 14.943018913269043, 12.631156921386719, 10.353622436523438, 10.20545768737793, 8.580230712890625, 7.2862420082092285, 7.25946569442749]
iteration 410
epsilon=0.21311851201490223, episo

Copying file://car-racing-v2-411-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2931
best total reward =  [678.625954198463, 599.6699669966937, 562.9629629629509]
loss = [33.026607513427734, 17.202392578125, 13.782430648803711, 10.685948371887207, 9.072558403015137, 11.422274589538574, 7.100162982940674, 7.182852268218994, 7.490772724151611, 6.354918956756592]
iteration 412
epsilon=0.21285745362580566, episode length=999, total rewards=343.6619718309785
Training the model...
experience length=4995
number of examples=2997
best total reward =  [678.625954198463, 599.6699669966937, 562.9629629629509]
loss = [18.99601173400879, 14.949406623840332, 10.797574043273926, 8.946054458618164, 7.299647331237793, 6.957381248474121, 7.013630390167236, 7.5938029289245605, 5.094383239746094, 5.871640682220459]
iteration 413
epsilon=0.2127288790895476, episode length=999, total rewards=592.0289855072346
Training the model...
experience length=4995
number of examples=2997
best total reward =  [599.6699669966937, 562.96

Copying file://car-racing-v2-414-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [599.6699669966937, 562.9629629629509, 592.0289855072346]
loss = [17.17584991455078, 11.455955505371094, 9.743365287780762, 9.080146789550781, 8.726641654968262, 8.1216402053833, 6.698187351226807, 7.884164810180664, 6.2748517990112305, 6.590676784515381]
iteration 415
epsilon=0.2124755743956656, episode length=999, total rewards=403.2467532467488
Training the model...
experience length=4995
number of examples=2997
best total reward =  [599.6699669966937, 562.9629629629509, 592.0289855072346]
loss = [14.205163955688477, 11.370534896850586, 9.592012405395508, 9.005922317504883, 7.187421798706055, 8.660325050354004, 6.248562335968018, 6.439769268035889, 6.148064136505127, 5.577662467956543]
iteration 416
epsilon=0.21235081865170896, episode length=999, total rewards=496.55172413791615
Training the model...
experience length=4995
number of examples=2997
best total reward =  [599.6699669966937, 562.96

Copying file://car-racing-v2-417-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [599.6699669966937, 562.9629629629509, 592.0289855072346]
loss = [17.043119430541992, 12.604449272155762, 11.657512664794922, 9.816792488098145, 8.984050750732422, 7.813708305358887, 7.242619037628174, 9.027715682983398, 6.647726058959961, 7.651766777038574]
iteration 418
epsilon=0.21210503736053993, episode length=456, total rewards=-59.4784946236558
epsilon=0.21210503736053993, episode length=415, total rewards=11.856704980844157
Training the model...
experience length=4995
number of examples=2869
best total reward =  [599.6699669966937, 562.9629629629509, 592.0289855072346]
loss = [64.00639343261719, 48.2641487121582, 39.66240310668945, 35.61186981201172, 32.76827621459961, 30.801441192626953, 30.601797103881836, 29.840877532958984, 27.27875328063965, 28.400035858154297]
iteration 419
epsilon=0.21198398698693452, episode length=999, total rewards=373.3542319749124
Training the model...
experien

Copying file://car-racing-v2-420-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [599.6699669966937, 562.9629629629509, 592.0289855072346]
loss = [15.394865989685059, 9.108179092407227, 8.79854965209961, 8.014354705810547, 7.911803722381592, 6.787431240081787, 7.179455280303955, 11.358067512512207, 5.864273548126221, 5.599917411804199]
iteration 421
epsilon=0.2117455056458945, episode length=999, total rewards=694.5205479451922
Training the model...
experience length=4995
number of examples=2997
best total reward =  [562.9629629629509, 592.0289855072346, 694.5205479451922]
loss = [30.516691207885742, 22.702754974365234, 21.826526641845703, 20.242835998535156, 19.511016845703125, 17.527849197387695, 17.013072967529297, 15.257627487182617, 16.887054443359375, 13.92994213104248]
iteration 422
epsilon=0.21162805058943557, episode length=999, total rewards=568.9895470383174
Training the model...
experience length=4995
number of examples=2997
best total reward =  [592.0289855072346,

Copying file://car-racing-v2-423-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [592.0289855072346, 694.5205479451922, 568.9895470383174]
loss = [14.99835205078125, 10.843910217285156, 22.231380462646484, 7.6086602210998535, 7.982944011688232, 7.063408374786377, 6.368256092071533, 6.075192451477051, 6.8968892097473145, 6.167535305023193]
iteration 424
epsilon=0.2113966523827058, episode length=999, total rewards=458.2089552238739
Training the model...
experience length=4995
number of examples=2997
best total reward =  [592.0289855072346, 694.5205479451922, 568.9895470383174]
loss = [11.586736679077148, 7.908020496368408, 7.115443706512451, 7.06903600692749, 8.181437492370605, 5.252824306488037, 5.812512397766113, 5.415896892547607, 5.3255414962768555, 4.86186408996582]
iteration 425
epsilon=0.21128268585887874, episode length=999, total rewards=352.8985507246311
Training the model...
experience length=4995
number of examples=2997
best total reward =  [592.0289855072346, 694.5

Copying file://car-racing-v2-426-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [592.0289855072346, 694.5205479451922, 568.9895470383174]
loss = [12.9107027053833, 11.66968822479248, 9.133440017700195, 7.908468246459961, 7.126315593719482, 6.482199668884277, 7.056589603424072, 6.267277240753174, 6.500293254852295, 5.730660915374756]
iteration 427
epsilon=0.21105816041028705, episode length=999, total rewards=386.2068965517162
Training the model...
experience length=4995
number of examples=2997
best total reward =  [592.0289855072346, 694.5205479451922, 568.9895470383174]
loss = [12.95859432220459, 9.67187786102295, 8.728753089904785, 8.895041465759277, 7.4536919593811035, 7.0044732093811035, 6.393274784088135, 6.2513933181762695, 5.959830284118652, 5.754197120666504]
iteration 428
epsilon=0.21094757880618417, episode length=999, total rewards=309.0909090909097
Training the model...
experience length=4995
number of examples=2997
best total reward =  [592.0289855072346, 694.520

Copying file://car-racing-v2-429-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [592.0289855072346, 694.5205479451922, 568.9895470383174]
loss = [11.055283546447754, 7.722424030303955, 7.187110424041748, 6.948004722595215, 6.891077041625977, 5.006420612335205, 5.3491973876953125, 5.144765377044678, 6.0077223777771, 5.195641040802002]
iteration 430
epsilon=0.2107297219879411, episode length=999, total rewards=287.09677419355194
Training the model...
experience length=4995
number of examples=2997
best total reward =  [592.0289855072346, 694.5205479451922, 568.9895470383174]
loss = [12.840155601501465, 10.552969932556152, 18.280487060546875, 6.944916725158691, 7.709307670593262, 6.9371337890625, 6.725864410400391, 5.735057830810547, 6.5299763679504395, 5.695268630981445]
iteration 431
epsilon=0.2106224247680617, episode length=999, total rewards=374.7474747474659
Training the model...
experience length=4995
number of examples=2997
best total reward =  [592.0289855072346, 694.520

Copying file://car-racing-v2-432-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [592.0289855072346, 694.5205479451922, 568.9895470383174]
loss = [16.395339965820312, 11.694496154785156, 10.159687042236328, 9.019363403320312, 7.722672462463379, 7.788303375244141, 8.371578216552734, 7.0752034187316895, 5.984869003295898, 5.819376468658447]
iteration 433
epsilon=0.21041103851517728, episode length=999, total rewards=514.5454545454445
Training the model...
experience length=4995
number of examples=2997
best total reward =  [592.0289855072346, 694.5205479451922, 568.9895470383174]
loss = [16.155569076538086, 13.99655818939209, 9.78633975982666, 7.615062236785889, 7.792037487030029, 7.436388969421387, 6.63954496383667, 14.052213668823242, 5.388040065765381, 33.08468246459961]
iteration 434
epsilon=0.21030692813002552, episode length=999, total rewards=564.1509433962167
Training the model...
experience length=4995
number of examples=2997
best total reward =  [592.0289855072346, 694.

Copying file://car-racing-v2-435-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [592.0289855072346, 694.5205479451922, 568.9895470383174]
loss = [16.98240089416504, 11.937134742736816, 22.896818161010742, 9.238072395324707, 10.070389747619629, 8.809998512268066, 8.242883682250977, 7.491684913635254, 7.273772239685059, 7.3398332595825195]
iteration 436
epsilon=0.21010182026023802, episode length=999, total rewards=91.69329073482768
Training the model...
experience length=4995
number of examples=2997
best total reward =  [592.0289855072346, 694.5205479451922, 568.9895470383174]
loss = [26.256406784057617, 19.47533416748047, 15.607060432434082, 13.218199729919434, 11.990649223327637, 11.891551971435547, 10.499398231506348, 9.82620620727539, 10.017834663391113, 8.639016151428223]
iteration 437
epsilon=0.21000080205763563, episode length=999, total rewards=86.8852459016436
Training the model...
experience length=4995
number of examples=2997
best total reward =  [592.0289855072346,

Copying file://car-racing-v2-438-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [592.0289855072346, 694.5205479451922, 568.9895470383174]
loss = [15.99058723449707, 14.252335548400879, 11.39999008178711, 10.4754056930542, 9.6826810836792, 8.706124305725098, 8.489924430847168, 9.179291725158691, 8.221640586853027, 7.53243350982666]
iteration 439
epsilon=0.20980178609668867, episode length=999, total rewards=54.882154882157785
Training the model...
experience length=4995
number of examples=2997
best total reward =  [592.0289855072346, 694.5205479451922, 568.9895470383174]
loss = [21.38947868347168, 12.511292457580566, 11.712515830993652, 10.308009147644043, 10.96263599395752, 9.252147674560547, 8.360450744628906, 11.716404914855957, 6.297145366668701, 7.837442874908447]
iteration 440
epsilon=0.2097037682357218, episode length=484, total rewards=-18.91994609164243
epsilon=0.2097037682357218, episode length=960, total rewards=-0.014590747331362425
Training the model...
experience

Copying file://car-racing-v2-441-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [592.0289855072346, 694.5205479451922, 568.9895470383174]
loss = [25.402650833129883, 17.72659683227539, 14.942380905151367, 12.774628639221191, 18.752269744873047, 9.53495979309082, 10.134970664978027, 9.38900375366211, 8.577775001525879, 8.729257583618164]
iteration 442
epsilon=0.2095106632478309, episode length=999, total rewards=812.4087591240716
Training the model...
experience length=4995
number of examples=2997
best total reward =  [694.5205479451922, 568.9895470383174, 812.4087591240716]
loss = [16.85190773010254, 11.919591903686523, 10.416160583496094, 10.785155296325684, 8.482094764709473, 7.472480773925781, 6.933629035949707, 6.3351731300354, 6.358363628387451, 6.419369220733643]
iteration 443
epsilon=0.2094155566153526, episode length=999, total rewards=570.0336700336613
Training the model...
experience length=4995
number of examples=2997
best total reward =  [568.9895470383174, 812.40

Copying file://car-racing-v2-444-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [568.9895470383174, 812.4087591240716, 570.0336700336613]
loss = [23.886821746826172, 14.052422523498535, 12.90501594543457, 10.135095596313477, 53.88528060913086, 10.385750770568848, 9.0714693069458, 8.407485961914062, 9.298487663269043, 8.647343635559082]
iteration 445
epsilon=0.2092281870387071, episode length=999, total rewards=570.2127659574361
Training the model...
experience length=4995
number of examples=2997
best total reward =  [812.4087591240716, 570.0336700336613, 570.2127659574361]
loss = [14.016765594482422, 11.85556697845459, 9.93911361694336, 9.54449462890625, 9.913419723510742, 9.388934135437012, 6.947770595550537, 7.862689971923828, 6.770748138427734, 6.777304649353027]
iteration 446
epsilon=0.20913590516832004, episode length=999, total rewards=254.60992907801915
Training the model...
experience length=4995
number of examples=2997
best total reward =  [812.4087591240716, 570.033

Copying file://car-racing-v2-447-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [812.4087591240716, 570.0336700336613, 570.2127659574361]
loss = [11.06262493133545, 8.971577644348145, 7.916159629821777, 6.369472503662109, 8.20749282836914, 6.7058186531066895, 6.316709518432617, 5.842313766479492, 5.851603984832764, 5.262143611907959]
iteration 448
epsilon=0.20895410065547046, episode length=999, total rewards=468.10631229235315
Training the model...
experience length=4995
number of examples=2997
best total reward =  [812.4087591240716, 570.0336700336613, 570.2127659574361]
loss = [13.132722854614258, 8.944042205810547, 9.481181144714355, 9.05362606048584, 7.4442830085754395, 7.395688056945801, 15.851057052612305, 6.830829620361328, 6.640292644500732, 5.904510498046875]
iteration 449
epsilon=0.20886455964891576, episode length=999, total rewards=361.5384615384575
Training the model...
experience length=4995
number of examples=2997
best total reward =  [812.4087591240716, 570.0

Copying file://car-racing-v2-450-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2960
best total reward =  [812.4087591240716, 570.0336700336613, 570.2127659574361]
loss = [13.019451141357422, 10.82183837890625, 9.485356330871582, 8.011937141418457, 22.47057342529297, 7.069310188293457, 6.938239097595215, 6.488762378692627, 6.879509925842285, 5.471476078033447]
iteration 451
epsilon=0.20868815491190235, episode length=999, total rewards=294.53124999998715
Training the model...
experience length=4995
number of examples=2997
best total reward =  [812.4087591240716, 570.0336700336613, 570.2127659574361]
loss = [12.19642162322998, 9.003509521484375, 8.389792442321777, 6.9478983879089355, 6.941577434539795, 6.365467548370361, 6.352088928222656, 5.835029602050781, 5.359518527984619, 5.477950572967529]
iteration 452
epsilon=0.20860127336278333, episode length=429, total rewards=-27.15374149659783
epsilon=0.20860127336278333, episode length=999, total rewards=469.5364238410487
Training the model...
experience 

Copying file://car-racing-v2-453-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


epsilon=0.2085152606291555, episode length=999, total rewards=290.24390243902343
Training the model...
experience length=4995
number of examples=3456
best total reward =  [812.4087591240716, 570.0336700336613, 570.2127659574361]
loss = [21.165111541748047, 15.768102645874023, 13.929007530212402, 12.79754638671875, 9.719000816345215, 9.451606750488281, 8.318368911743164, 8.752659797668457, 6.943731784820557, 7.004895210266113]
iteration 454
epsilon=0.20843010802286396, episode length=999, total rewards=406.0728744939146
Training the model...
experience length=4995
number of examples=2997
best total reward =  [812.4087591240716, 570.0336700336613, 570.2127659574361]
loss = [12.002260208129883, 10.305598258972168, 9.148628234863281, 7.515412330627441, 7.68778133392334, 7.483238220214844, 6.246555328369141, 6.501706123352051, 5.655028820037842, 7.8753228187561035]
iteration 455
epsilon=0.20834580694263533, episode length=702, total rewards=-85.5929577464793
Training the model...
experience

Copying file://car-racing-v2-456-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2692
best total reward =  [812.4087591240716, 570.0336700336613, 570.2127659574361]
loss = [19.720457077026367, 10.818985939025879, 11.829384803771973, 8.610769271850586, 7.795614719390869, 7.538156509399414, 80.80574798583984, 6.825305461883545, 7.089534759521484, 8.1650390625]
iteration 457
epsilon=0.20817972538447688, episode length=999, total rewards=512.7946127946028
Training the model...
experience length=4995
number of examples=2997
best total reward =  [812.4087591240716, 570.0336700336613, 570.2127659574361]
loss = [13.589278221130371, 11.031089782714844, 9.3643217086792, 9.386489868164062, 7.621216297149658, 6.9651103019714355, 7.7111077308654785, 8.001964569091797, 7.009838581085205, 5.955363750457764]
iteration 458
epsilon=0.2080979281306321, episode length=999, total rewards=130.7692307692353
Training the model...
experience length=4995
number of examples=2997
best total reward =  [812.4087591240716, 570.03367

Copying file://car-racing-v2-459-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2910
best total reward =  [812.4087591240716, 570.0336700336613, 570.2127659574361]
loss = [16.23153305053711, 13.149070739746094, 11.070709228515625, 9.451810836791992, 9.345076560974121, 8.257328987121582, 8.245355606079102, 15.372178077697754, 5.963006019592285, 7.081136703491211]
iteration 460
epsilon=0.20793677936083252, episode length=999, total rewards=269.774919614149
Training the model...
experience length=4995
number of examples=2997
best total reward =  [812.4087591240716, 570.0336700336613, 570.2127659574361]
loss = [27.232254028320312, 19.593547821044922, 14.233838081359863, 15.523763656616211, 11.746200561523438, 12.29541015625, 9.85053825378418, 9.637451171875, 9.279335975646973, 10.95975112915039]
iteration 461
epsilon=0.2078574115672242, episode length=999, total rewards=494.0594059405842
Training the model...
experience length=4995
number of examples=2997
best total reward =  [812.4087591240716, 570.03367

Copying file://car-racing-v2-462-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2808
best total reward =  [812.4087591240716, 570.0336700336613, 570.2127659574361]
loss = [16.34341049194336, 12.878691673278809, 11.304755210876465, 10.773991584777832, 9.212403297424316, 8.682916641235352, 8.101716995239258, 7.529818058013916, 8.631113052368164, 6.941976070404053]
iteration 463
epsilon=0.20770104907703643, episode length=999, total rewards=333.33333333332195
Training the model...
experience length=4995
number of examples=2997
best total reward =  [812.4087591240716, 570.0336700336613, 570.2127659574361]
loss = [16.359743118286133, 12.780652046203613, 11.593149185180664, 10.031407356262207, 9.384615898132324, 8.582403182983398, 8.308958053588867, 7.432584285736084, 7.104299068450928, 7.145389080047607]
iteration 464
epsilon=0.20762403858626607, episode length=999, total rewards=605.8823529411641
Training the model...
experience length=4995
number of examples=2997
best total reward =  [570.0336700336613, 

Copying file://car-racing-v2-465-model05.hd5 [Content-Type=application/octet-stream]...
\ [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [570.0336700336613, 570.2127659574361, 605.8823529411641]
loss = [17.100505828857422, 13.252067565917969, 12.2863187789917, 9.958537101745605, 9.673583984375, 9.465580940246582, 8.937248229980469, 7.9164276123046875, 9.345296859741211, 7.93959379196167]
iteration 466
epsilon=0.2074723202183994, episode length=999, total rewards=294.46366782006936
Training the model...
experience length=4995
number of examples=2997
best total reward =  [570.0336700336613, 570.2127659574361, 605.8823529411641]
loss = [15.662125587463379, 12.4794921875, 13.502999305725098, 9.812357902526855, 8.837061882019043, 8.687467575073242, 8.7176513671875, 7.850981712341309, 7.102389335632324, 12.125120162963867]
iteration 467
epsilon=0.2073975970162154, episode length=999, total rewards=412.4555160142236
Training the model...
experience length=4995
number of examples=2997
best total reward =  [570.0336700336613, 570.2127659574

Copying file://car-racing-v2-468-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [570.0336700336613, 570.2127659574361, 605.8823529411641]
loss = [16.984066009521484, 14.464082717895508, 11.465639114379883, 9.777077674865723, 9.385971069335938, 8.593839645385742, 8.308018684387207, 7.185032844543457, 7.752760410308838, 6.855831623077393]
iteration 469
epsilon=0.20725038483559272, episode length=999, total rewards=796.153846153833
Training the model...
experience length=4995
number of examples=2997
best total reward =  [570.2127659574361, 605.8823529411641, 796.153846153833]
loss = [14.489020347595215, 12.090651512145996, 11.054460525512695, 10.367722511291504, 8.253724098205566, 8.209246635437012, 8.036884307861328, 9.903160095214844, 7.815536975860596, 7.259716033935547]
iteration 470
epsilon=0.2071778809872368, episode length=999, total rewards=497.7443609022445
Training the model...
experience length=4995
number of examples=2997
best total reward =  [570.2127659574361, 605.

Copying file://car-racing-v2-471-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [570.2127659574361, 605.8823529411641, 796.153846153833]
loss = [15.896888732910156, 15.146742820739746, 12.752758026123047, 10.645755767822266, 9.236120223999023, 9.415586471557617, 17.272239685058594, 8.153044700622559, 7.629211902618408, 8.116525650024414]
iteration 472
epsilon=0.2070350411555908, episode length=963, total rewards=-0.05448504983470426
Training the model...
experience length=4995
number of examples=2961
best total reward =  [570.2127659574361, 605.8823529411641, 796.153846153833]
loss = [15.419672012329102, 12.724838256835938, 11.11973762512207, 9.668473243713379, 9.098362922668457, 7.777329444885254, 8.418601036071777, 9.64703369140625, 7.339520454406738, 7.327830791473389]
iteration 473
epsilon=0.2069646907440349, episode length=999, total rewards=597.761194029839
Training the model...
experience length=4995
number of examples=2997
best total reward =  [605.8823529411641, 796.

Copying file://car-racing-v2-474-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [605.8823529411641, 796.153846153833, 597.761194029839]
loss = [18.292734146118164, 20.596933364868164, 10.887402534484863, 10.939190864562988, 10.035735130310059, 9.884364128112793, 11.035412788391113, 8.317919731140137, 8.078739166259766, 7.415548801422119]
iteration 475
epsilon=0.2068260933982286, episode length=999, total rewards=471.4285714285592
Training the model...
experience length=4995
number of examples=2997
best total reward =  [605.8823529411641, 796.153846153833, 597.761194029839]
loss = [13.180017471313477, 9.858786582946777, 10.192899703979492, 11.358945846557617, 7.237159252166748, 7.7733025550842285, 6.917756080627441, 7.707747459411621, 7.43565034866333, 6.143120765686035]
iteration 476
epsilon=0.2067578324642463, episode length=999, total rewards=442.3728813559276
Training the model...
experience length=4995
number of examples=2997
best total reward =  [605.8823529411641, 796.1

Copying file://car-racing-v2-477-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [605.8823529411641, 796.153846153833, 597.761194029839]
loss = [19.80006217956543, 13.842491149902344, 13.68328857421875, 10.759350776672363, 10.424991607666016, 9.199482917785645, 9.778705596923828, 8.408838272094727, 11.630888938903809, 7.977903842926025]
iteration 478
epsilon=0.2066233515982078, episode length=999, total rewards=332.6923076923003
Training the model...
experience length=4995
number of examples=2997
best total reward =  [605.8823529411641, 796.153846153833, 597.761194029839]
loss = [12.713961601257324, 10.956890106201172, 10.042634963989258, 8.675984382629395, 7.599737644195557, 7.913522720336914, 7.224248886108398, 7.299107551574707, 7.000335693359375, 6.216514587402344]
iteration 479
epsilon=0.2065571180822257, episode length=999, total rewards=524.9999999999925
Training the model...
experience length=4995
number of examples=2997
best total reward =  [605.8823529411641, 796.153

Copying file://car-racing-v2-480-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2794
best total reward =  [605.8823529411641, 796.153846153833, 597.761194029839]
loss = [17.001739501953125, 14.119606018066406, 11.28896427154541, 10.752750396728516, 9.644301414489746, 8.360167503356934, 10.001928329467773, 10.674379348754883, 8.79561996459961, 6.860110282897949]
iteration 481
epsilon=0.20642663143238943, episode length=999, total rewards=790.1515151515008
Training the model...
experience length=4995
number of examples=2997
best total reward =  [796.153846153833, 597.761194029839, 790.1515151515008]
loss = [13.337398529052734, 11.204293251037598, 9.914467811584473, 9.07548999786377, 8.130390167236328, 8.27326488494873, 8.129698753356934, 8.883040428161621, 7.26747989654541, 6.573174953460693]
iteration 482
epsilon=0.20636236511806552, episode length=999, total rewards=237.57961783439217
Training the model...
experience length=4995
number of examples=2997
best total reward =  [796.153846153833, 597.76119

Copying file://car-racing-v2-483-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [796.153846153833, 597.761194029839, 790.1515151515008]
loss = [16.153915405273438, 14.310602188110352, 10.928869247436523, 10.142329216003418, 10.392187118530273, 9.126198768615723, 8.6716947555542, 7.885421276092529, 7.469874382019043, 7.469554424285889]
iteration 484
epsilon=0.20623575405221603, episode length=999, total rewards=499.26470588234747
Training the model...
experience length=4995
number of examples=2997
best total reward =  [796.153846153833, 597.761194029839, 790.1515151515008]
loss = [14.985122680664062, 11.997574806213379, 10.220714569091797, 9.40196418762207, 9.107933044433594, 8.437766075134277, 7.855900287628174, 7.542497634887695, 9.557965278625488, 6.3162689208984375]
iteration 485
epsilon=0.20617339651169386, episode length=474, total rewards=-84.4257485029946
epsilon=0.20617339651169386, episode length=999, total rewards=17.117117117115846
Training the model...
experience 

Copying file://car-racing-v2-486-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [796.153846153833, 597.761194029839, 790.1515151515008]
loss = [20.043514251708984, 14.655474662780762, 12.820511817932129, 15.143131256103516, 10.383498191833496, 10.049373626708984, 10.180780410766602, 8.748069763183594, 8.742929458618164, 6.942484378814697]
iteration 487
epsilon=0.20605054592111116, episode length=999, total rewards=452.8169014084448
Training the model...
experience length=4995
number of examples=2997
best total reward =  [796.153846153833, 597.761194029839, 790.1515151515008]
loss = [15.079181671142578, 10.867774963378906, 13.188057899475098, 9.330607414245605, 9.90630054473877, 8.146428108215332, 6.836575984954834, 7.948335647583008, 7.3936004638671875, 7.127172946929932]
iteration 488
epsilon=0.20599004046190006, episode length=591, total rewards=-23.963503649632585
Training the model...
experience length=4995
number of examples=2589
best total reward =  [796.153846153833, 5

Copying file://car-racing-v2-489-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2541
best total reward =  [796.153846153833, 597.761194029839, 790.1515151515008]
loss = [19.247358322143555, 13.052982330322266, 14.208076477050781, 9.608323097229004, 9.376599311828613, 11.818325996398926, 9.581636428833008, 8.744184494018555, 8.532458305358887, 7.5434160232543945]
iteration 490
epsilon=0.20587083865670824, episode length=999, total rewards=376.4890282131584
Training the model...
experience length=4995
number of examples=2997
best total reward =  [796.153846153833, 597.761194029839, 790.1515151515008]
loss = [17.823863983154297, 14.582794189453125, 13.291171073913574, 10.530526161193848, 10.197688102722168, 8.764355659484863, 9.277112007141113, 8.509716033935547, 8.876572608947754, 7.509140491485596]
iteration 491
epsilon=0.20581213027014117, episode length=999, total rewards=330.4207119741109
Training the model...
experience length=4995
number of examples=2997
best total reward =  [796.153846153833, 597

Copying file://car-racing-v2-492-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [796.153846153833, 597.761194029839, 790.1515151515008]
loss = [35.97808837890625, 15.250117301940918, 14.271045684814453, 11.391294479370117, 10.69239330291748, 10.067827224731445, 8.742194175720215, 11.507000923156738, 7.972259521484375, 7.806808948516846]
iteration 493
epsilon=0.20569646887776535, episode length=999, total rewards=163.6655948553101
Training the model...
experience length=4995
number of examples=2997
best total reward =  [796.153846153833, 597.761194029839, 790.1515151515008]
loss = [15.527749061584473, 11.594783782958984, 12.22220516204834, 9.884799003601074, 8.961009979248047, 9.707192420959473, 7.5071916580200195, 8.059905052185059, 7.606751441955566, 9.169432640075684]
iteration 494
epsilon=0.2056395041889877, episode length=999, total rewards=568.8102893890598
Training the model...
experience length=4995
number of examples=2997
best total reward =  [796.153846153833, 597.76

Copying file://car-racing-v2-495-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2874
best total reward =  [796.153846153833, 597.761194029839, 790.1515151515008]
loss = [25.800722122192383, 17.925735473632812, 17.09634017944336, 14.890589714050293, 15.280364036560059, 34.887290954589844, 11.457817077636719, 9.677083015441895, 12.353724479675293, 9.171297073364258]
iteration 496
epsilon=0.20552727805562684, episode length=999, total rewards=356.0260586319152
Training the model...
experience length=4995
number of examples=2997
best total reward =  [796.153846153833, 597.761194029839, 790.1515151515008]
loss = [68.0848159790039, 17.719532012939453, 29.032976150512695, 14.417534828186035, 14.647974967956543, 12.089896202087402, 13.288363456726074, 11.318703651428223, 10.918869018554688, 9.52116870880127]
iteration 497
epsilon=0.20547200527507056, episode length=999, total rewards=590.9090909090793
Training the model...
experience length=4995
number of examples=2997
best total reward =  [796.153846153833, 

Copying file://car-racing-v2-498-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [796.153846153833, 597.761194029839, 790.1515151515008]
loss = [20.78148651123047, 15.62661075592041, 41.91443634033203, 12.285229682922363, 12.614826202392578, 11.482070922851562, 12.555752754211426, 10.09270191192627, 9.70621395111084, 10.49939250946045]
iteration 499
epsilon=0.20536311237009666, episode length=999, total rewards=295.97315436240706
Training the model...
experience length=4995
number of examples=2997
best total reward =  [796.153846153833, 597.761194029839, 790.1515151515008]
loss = [20.297025680541992, 15.705665588378906, 14.276721000671387, 13.68313217163086, 16.50881004333496, 10.51121711730957, 11.858474731445312, 10.797130584716797, 15.225261688232422, 9.61425495147705]
iteration 500
epsilon=0.2053094812463957, episode length=999, total rewards=478.3972125435477
Training the model...
experience length=4995
number of examples=2997
best total reward =  [796.153846153833, 597.7

Copying file://car-racing-v2-501-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [796.153846153833, 597.761194029839, 790.1515151515008]
loss = [20.109426498413086, 15.872455596923828, 12.669788360595703, 13.966268539428711, 11.859637260437012, 9.927165031433105, 12.621654510498047, 9.607949256896973, 8.980591773986816, 10.674473762512207]
iteration 502
epsilon=0.2052038225695924, episode length=999, total rewards=589.2857142857013
Training the model...
experience length=4995
number of examples=2997
best total reward =  [796.153846153833, 597.761194029839, 790.1515151515008]
loss = [17.459918975830078, 13.87270450592041, 13.297417640686035, 11.634661674499512, 11.660978317260742, 10.616381645202637, 10.783117294311523, 8.94731616973877, 8.942784309387207, 9.35761833190918]
iteration 503
epsilon=0.2051517843438965, episode length=999, total rewards=530.2816901408346
Training the model...
experience length=4995
number of examples=2997
best total reward =  [796.153846153833, 597.

Copying file://car-racing-v2-504-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [796.153846153833, 597.761194029839, 790.1515151515008]
loss = [19.720333099365234, 17.4588623046875, 14.480956077575684, 11.966252326965332, 51.03679656982422, 10.224534034729004, 10.1442232131958, 9.34521198272705, 9.56691837310791, 17.433940887451172]
iteration 505
epsilon=0.20504926383545297, episode length=999, total rewards=508.3916083915988
Training the model...
experience length=4995
number of examples=2997
best total reward =  [796.153846153833, 597.761194029839, 790.1515151515008]
loss = [130.12046813964844, 17.54073143005371, 36.75170135498047, 15.126005172729492, 13.544686317443848, 13.808273315429688, 15.588457107543945, 10.002676963806152, 11.640721321105957, 9.515745162963867]
iteration 506
epsilon=0.20499877119709844, episode length=366, total rewards=20.496587030718075
epsilon=0.20499877119709844, episode length=999, total rewards=441.21863799283085
Training the model...
experienc

Copying file://car-racing-v2-507-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [796.153846153833, 597.761194029839, 790.1515151515008]
loss = [22.93088150024414, 19.135128021240234, 15.158265113830566, 15.94523811340332, 12.16310977935791, 12.984858512878418, 10.952688217163086, 10.252104759216309, 10.165600776672363, 10.80186653137207]
iteration 508
epsilon=0.20489929565027618, episode length=999, total rewards=272.023809523812
Training the model...
experience length=4995
number of examples=2997
best total reward =  [796.153846153833, 597.761194029839, 790.1515151515008]
loss = [17.697460174560547, 15.375504493713379, 12.845293045043945, 11.522716522216797, 10.298935890197754, 12.54681396484375, 9.33298110961914, 10.094447135925293, 8.989033699035645, 8.654091835021973]
iteration 509
epsilon=0.2048503026937734, episode length=999, total rewards=750.7462686567039
Training the model...
experience length=4995
number of examples=2997
best total reward =  [597.761194029839, 790.

Copying file://car-racing-v2-510-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [597.761194029839, 790.1515151515008, 750.7462686567039]
loss = [20.088817596435547, 18.49949073791504, 15.197643280029297, 12.960549354553223, 12.2915678024292, 13.081013679504395, 10.36672592163086, 10.04076099395752, 10.318005561828613, 9.424884796142578]
iteration 511
epsilon=0.20475378167016733, episode length=576, total rewards=-36.810344827584125
Training the model...
experience length=4995
number of examples=2574
best total reward =  [597.761194029839, 790.1515151515008, 750.7462686567039]
loss = [24.990074157714844, 19.939197540283203, 18.118274688720703, 14.607629776000977, 13.979896545410156, 12.655354499816895, 12.002883911132812, 11.424830436706543, 10.410075187683105, 10.928961753845215]
iteration 512
epsilon=0.20470624385346567, episode length=999, total rewards=198.4126984127022
Training the model...
experience length=4995
number of examples=2997
best total reward =  [597.761194029

Copying file://car-racing-v2-513-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [597.761194029839, 790.1515151515008, 750.7462686567039]
loss = [22.41435432434082, 18.635210037231445, 16.733139038085938, 15.326617240905762, 14.384572982788086, 11.8331937789917, 13.066092491149902, 11.938275337219238, 10.754467964172363, 11.012728691101074]
iteration 514
epsilon=0.20461258960078169, episode length=999, total rewards=413.79310344826223
Training the model...
experience length=4995
number of examples=2997
best total reward =  [597.761194029839, 790.1515151515008, 750.7462686567039]
loss = [18.747873306274414, 14.700261116027832, 13.032047271728516, 11.159309387207031, 10.847208976745605, 10.07747745513916, 10.065681457519531, 8.705581665039062, 11.911310195922852, 8.376543998718262]
iteration 515
epsilon=0.20456646370477388, episode length=825, total rewards=168.15350553504788
Training the model...
experience length=4995
number of examples=2823
best total reward =  [597.761194029

Copying file://car-racing-v2-516-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [597.761194029839, 790.1515151515008, 750.7462686567039]
loss = [21.683401107788086, 15.528409957885742, 14.292512893676758, 15.391136169433594, 10.822954177856445, 12.449285507202148, 11.059289932250977, 11.539695739746094, 9.602621078491211, 10.255187034606934]
iteration 517
epsilon=0.20447559107704888, episode length=999, total rewards=534.1463414634043
Training the model...
experience length=4995
number of examples=2997
best total reward =  [597.761194029839, 790.1515151515008, 750.7462686567039]
loss = [35.64896011352539, 32.04753112792969, 25.163307189941406, 23.157428741455078, 19.196155548095703, 18.711183547973633, 17.066938400268555, 30.190265655517578, 13.191025733947754, 13.256834983825684]
iteration 518
epsilon=0.20443083516627839, episode length=999, total rewards=125.98870056497572
Training the model...
experience length=4995
number of examples=2997
best total reward =  [597.7611940

Copying file://car-racing-v2-519-model05.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 76.5 MiB/ 76.5 MiB] 100% Done                                    
Operation completed over 1 objects/76.5 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [597.761194029839, 790.1515151515008, 750.7462686567039]
loss = [27.88681983947754, 15.317206382751465, 13.458620071411133, 13.091867446899414, 11.23193645477295, 11.433625221252441, 12.933049201965332, 10.367365837097168, 45.96946334838867, 7.707971096038818]
iteration 520
epsilon=0.20434266154646946, episode length=999, total rewards=675.0865051903039
Training the model...
experience length=4995
number of examples=2997
best total reward =  [790.1515151515008, 750.7462686567039, 675.0865051903039]
loss = [16.926267623901367, 14.409167289733887, 12.227670669555664, 11.827457427978516, 11.072009086608887, 9.856014251708984, 9.720939636230469, 10.744915962219238, 9.62158203125, 10.041657447814941]
iteration 521
epsilon=0.20429923493100477, episode length=999, total rewards=247.22222222222726
Training the model...
experience length=4995
number of examples=2997
best total reward =  [790.1515151515008,