In [1]:
import gym
import numpy as np
import copy, random, os, subprocess, cv2
import tensorflow as tf
import keras.backend as K
from tensorflow.keras import layers, models, regularizers

  import imp
  'nearest': pil_image.NEAREST,
  'bilinear': pil_image.BILINEAR,
  'bicubic': pil_image.BICUBIC,
  if hasattr(pil_image, 'HAMMING'):
  if hasattr(pil_image, 'BOX'):
  if hasattr(pil_image, 'LANCZOS'):


In [2]:
MODEL_NAME = 'model09'
BUCKET = 'gs://etsuji-car-racing-v2'
os.environ['BUCKET'] = BUCKET
!gsutil mb -c regional -l us-west1 $BUCKET
!gsutil ls $BUCKET

Creating gs://etsuji-car-racing-v2/...
ServiceException: 409 A Cloud Storage bucket named 'etsuji-car-racing-v2' already exists. Try another name. Bucket names must be globally unique across all Google Cloud projects, including those outside of your organization.
gs://etsuji-car-racing-v2/model04/
gs://etsuji-car-racing-v2/model05/
gs://etsuji-car-racing-v2/model06/
gs://etsuji-car-racing-v2/model07/
gs://etsuji-car-racing-v2/model08/


In [3]:
class ApplySoftMaxWeight(layers.Layer):
    def __init__(self, **kwargs):
        self.filter_shape = None
        super(ApplySoftMaxWeight, self).__init__(**kwargs)

    def build(self, input_shape):
        self.kernel = self.add_weight(name='weights', shape=[input_shape[3]])
        self.filter_shape = input_shape

    def get_config(self):
        config = super().get_config()
        return config
    
    def call(self, inputs, **kwargs):
        return inputs * tf.nn.softmax(self.kernel)
    
# Base model
class QValue:
    def __init__(self):
        self.model = self.build_model()

    def build_model(self):
        cnn_input = layers.Input(shape=(48, 48, 3), name='cnn_input')
        cnn1 = layers.Conv2D(16, (5, 5), padding='same',
                             use_bias=True, activation='relu',
                             name='cnn1')(cnn_input)
        pool1 = layers.MaxPooling2D((2, 2), name='pool1')(cnn1)
        cnn2 = layers.Conv2D(16, (5, 5), padding='same',
                             use_bias=True, activation='relu',
                             name='cnn2')(pool1)        
        pool2 = layers.MaxPooling2D((2, 2), name='pool2')(cnn2)
        weighted_filters = ApplySoftMaxWeight(name='weighted_filters')(pool2)

        cnn_flatten = layers.Flatten(name='flatten')(weighted_filters)
        action_input = layers.Input(shape=(5,), name='action_input')
        combined = layers.concatenate([cnn_flatten, action_input], name='concat')
        hidden1 = layers.Dense(2048, activation='relu', name='dense1')(combined)
        hidden2 = layers.Dense(1024, activation='relu', name='dense2')(hidden1)
        hidden3 = layers.Dense(512, activation='relu', name='dense3')(hidden2)
        q_value = layers.Dense(1, name='output')(hidden3)

        model = models.Model(inputs=[cnn_input, action_input], outputs=q_value)
        model.compile(loss='mse')
        return model

    def get_action(self, state):
        states = []
        actions = []
        for a in range(5):
            states.append(np.array(state))
            action_onehot = np.zeros(5)
            action_onehot[a] = 1
            actions.append(action_onehot)
  
        q_values = self.model.predict([np.array(states), np.array(actions)])
        optimal_action = np.argmax(q_values)
        return optimal_action, q_values[optimal_action][0]

In [4]:
q_value = QValue()
q_value.model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 cnn_input (InputLayer)         [(None, 48, 48, 3)]  0           []                               
                                                                                                  
 cnn1 (Conv2D)                  (None, 48, 48, 16)   1216        ['cnn_input[0][0]']              
                                                                                                  
 pool1 (MaxPooling2D)           (None, 24, 24, 16)   0           ['cnn1[0][0]']                   
                                                                                                  
 cnn2 (Conv2D)                  (None, 24, 24, 16)   6416        ['pool1[0][0]']                  
                                                                                              

2022-08-05 05:08:57.788088: E tensorflow/stream_executor/cuda/cuda_driver.cc:271] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2022-08-05 05:08:57.788136: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (tensorflow-2-8-20220801-151053): /proc/driver/nvidia/version does not exist
2022-08-05 05:08:57.789541: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [5]:
def join_frames(o0, o1, o2):
    gray_image0 = cv2.cvtColor(cv2.resize(o0, (48, 48)), cv2.COLOR_RGB2GRAY)
    gray_image1 = cv2.cvtColor(cv2.resize(o1, (48, 48)), cv2.COLOR_RGB2GRAY)
    gray_image2 = cv2.cvtColor(cv2.resize(o2, (48, 48)), cv2.COLOR_RGB2GRAY)
    
    return np.array(
        [gray_image0.transpose(),
         gray_image1.transpose(),
         gray_image2.transpose()]).transpose()

In [6]:
def get_episode(environ, q_value, epsilon):
    episode = []
    o0 = environ.reset()
    o1 = copy.deepcopy(o0)
    o2 = copy.deepcopy(o0)
    total_r = 0

    if epsilon > 0:
        keep_count = 3
    else:
        keep_count = 1

    c = 0
    while True:
        if c % keep_count == 0: # Get new action
            if np.random.random() < epsilon:
                a = np.random.randint(5)
            else:
                a, _ = q_value.get_action(join_frames(o0, o1, o2))
        c += 1
        o_new, r, done, inf = environ.step(a)                
        total_r += r

        # Terminate episode when total reward becomes negative
        if total_r < 0:
            done = 1

        if done:
            # Terminal state is to achive more than 990 or get out of the field.
            if total_r > 990 or r < -99:
                episode.append((join_frames(o0, o1, o2), a, r, None))
            break
        else:
            episode.append((join_frames(o0, o1, o2), a, r, join_frames(o1, o2, o_new)))
        o0, o1, o2 = o1, o2, o_new

    print('epsilon={}, episode length={}, total rewards={}'.format(epsilon, len(episode), total_r))
    return episode, total_r

In [7]:
def train(environ, q_value, epsilon, checkpoint=0):
    if checkpoint > 0:
        filename = 'car-racing-v2-{}-{}.hd5'.format(checkpoint, MODEL_NAME)
        subprocess.run(['gsutil', 'cp', '{}/{}/{}'.format(BUCKET, MODEL_NAME, filename), './'])
        print('load model {}'.format(filename))
        q_value.model = models.load_model(filename)
        os.remove(filename)

    experience = []
    good_experience = []
    best_r = [-100, -100, -100]

    for n in range(checkpoint + 1, checkpoint + 1000):
        print('iteration {}'.format(n))

        total_len = 0
        if n % 3 == 0:
            print('Testing the current performance...')
            episode, total_r = get_episode(environ, q_value, epsilon=0)
            with open('result.txt', 'a') as f:
                f.write('{},{},{},{}\n'.format(n, epsilon, len(episode), total_r))
            filename = 'car-racing-v2-{}-{}.hd5'.format(n, MODEL_NAME)
            q_value.model.save(filename, save_format='h5')
            subprocess.run(['gsutil', '-m', 'cp',
                            '{}'.format(filename), '{}/{}/'.format(BUCKET, MODEL_NAME)])
            os.remove(filename)
            experience += episode
            total_len += len(episode)

        while total_len < 500:
            episode, total_r = get_episode(environ, q_value, epsilon)
            total_len += len(episode)
            experience += episode

            # Keep the top 3 episodes
            if total_r > min(best_r):
                best_r = best_r[1:] + [total_r]
                good_experience += episode
                if len(good_experience) > 999 * 3:
                    good_experience = good_experience[-999 * 3:]

            
        if len(experience) > 999 * 5: # remember last 5 episodes
            experience = experience[-999 * 5:]

        epsilon = (epsilon - 0.2) * 0.99 + 0.2

        print('Training the model...')
        # Use latest episode + past episodes (sampling) + top 3 episode (sampling)
        latest_experience = experience[-total_len:]
        past_experience = experience[:-total_len]
        examples = latest_experience + \
            random.sample(past_experience, min(len(past_experience), 999)) + \
            random.sample(good_experience, min(len(good_experience), 999))
        
        # Show some statistics
        print('experience length={}'.format(len(experience)))
        print('number of examples={}'.format(len(examples)))
        print('best total reward = ', best_r)
        np.random.shuffle(examples)
                        
        states, actions, labels = [], [], []
        for state, a, r, state_new in examples:
            states.append(np.array(state))

            action_onehot = np.zeros(5)
            action_onehot[a] = 1
            actions.append(action_onehot)
            
            if state_new is None:   # Terminal state
                q_new = 0
            else:
                _, q_new = q_value.get_action(state_new)
            labels.append(np.array(r + q_new))

        hist = q_value.model.fit(
            [np.array(states), np.array(actions)], np.array(labels),
            batch_size=50, epochs=10, verbose=0)
        print('loss = {}'.format(hist.history['loss']))

In [8]:
env = gym.make("CarRacing-v2", continuous=False)
q_value = QValue()
q_value.model.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 cnn_input (InputLayer)         [(None, 48, 48, 3)]  0           []                               
                                                                                                  
 cnn1 (Conv2D)                  (None, 48, 48, 16)   1216        ['cnn_input[0][0]']              
                                                                                                  
 pool1 (MaxPooling2D)           (None, 24, 24, 16)   0           ['cnn1[0][0]']                   
                                                                                                  
 cnn2 (Conv2D)                  (None, 24, 24, 16)   6416        ['pool1[0][0]']                  
                                                                                            

  "Initializing wrapper in old step API which returns one bool instead of two. It is recommended to set `new_step_api=True` to use new step API. This will be the default behaviour in future."
  "Initializing environment in old step API which returns one bool instead of two. It is recommended to set `new_step_api=True` to use new step API. This will be the default behaviour in future."


In [None]:
train(env, q_value, epsilon=1.0, checkpoint=0)

iteration 1
epsilon=1.0, episode length=134, total rewards=-0.0771812080536709
epsilon=1.0, episode length=147, total rewards=-0.09411764705879855
epsilon=1.0, episode length=212, total rewards=-0.023404255319128903
epsilon=1.0, episode length=107, total rewards=-0.04731182795697572
Training the model...
experience length=600
number of examples=1200
best total reward =  [-0.09411764705879855, -0.023404255319128903, -0.04731182795697572]
loss = [4670.14404296875, 8.497062683105469, 6.520956039428711, 0.2938860058784485, 0.2749646306037903, 0.3223553001880646, 0.32706350088119507, 0.37113627791404724, 0.36666688323020935, 0.26872125267982483]
iteration 2
epsilon=0.992, episode length=187, total rewards=-0.07340823970033594
epsilon=0.992, episode length=186, total rewards=-0.06645962732914446
epsilon=0.992, episode length=107, total rewards=-0.08571428571427231
epsilon=0.992, episode length=135, total rewards=-0.08648648648645474
Training the model...
experience length=1215
number of exam

Copying file://car-racing-v2-3-model09.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


epsilon=0.9840800000000001, episode length=101, total rewards=-0.06486486486484577
epsilon=0.9840800000000001, episode length=97, total rewards=-0.028013029315954813
epsilon=0.9840800000000001, episode length=179, total rewards=-0.014388489208591454
epsilon=0.9840800000000001, episode length=206, total rewards=-0.01034482758616237
Training the model...
experience length=1873
number of examples=2656
best total reward =  [-0.028013029315954813, -0.014388489208591454, -0.01034482758616237]
loss = [0.22956529259681702, 0.2536509037017822, 0.2236800193786621, 0.21344134211540222, 0.22508974373340607, 0.2061450034379959, 0.20475661754608154, 0.20286844670772552, 0.19038625061511993, 0.20572708547115326]
iteration 4
epsilon=0.9762392000000002, episode length=101, total rewards=-0.030508474576256023
epsilon=0.9762392000000002, episode length=98, total rewards=-0.06393442622948986
epsilon=0.9762392000000002, episode length=91, total rewards=-0.05365853658536346
epsilon=0.9762392000000002, episo

Copying file://car-racing-v2-6-model09.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


epsilon=0.9607920399200003, episode length=326, total rewards=-0.020261437908432017
epsilon=0.9607920399200003, episode length=114, total rewards=-0.005747126436758537
Training the model...
experience length=3749
number of examples=2570
best total reward =  [-0.01034482758616237, -0.012987012987008856, -0.005747126436758537]
loss = [0.2860679030418396, 0.23285672068595886, 0.2230360209941864, 0.21395818889141083, 0.20462679862976074, 0.1867469996213913, 0.1832372099161148, 0.1872856467962265, 0.17411649227142334, 0.16755561530590057]
iteration 7
epsilon=0.9531841195208004, episode length=221, total rewards=-0.04810126582273597
epsilon=0.9531841195208004, episode length=132, total rewards=-0.09867986798679174
epsilon=0.9531841195208004, episode length=170, total rewards=-0.03515358361771856
Training the model...
experience length=4272
number of examples=2521
best total reward =  [-0.01034482758616237, -0.012987012987008856, -0.005747126436758537]
loss = [0.3321106731891632, 0.2796126008

Copying file://car-racing-v2-9-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


epsilon=0.9381957555423366, episode length=191, total rewards=-0.03067092651753109
epsilon=0.9381957555423366, episode length=250, total rewards=-0.09999999999994569
Training the model...
experience length=4995
number of examples=2580
best total reward =  [-0.012987012987008856, -0.005747126436758537, -0.010989010988996045]
loss = [0.382656991481781, 0.2897765338420868, 0.2796346843242645, 0.25184473395347595, 0.24222570657730103, 0.25400981307029724, 0.2369934618473053, 0.21535755693912506, 0.22266559302806854, 0.2003284990787506]
iteration 10
epsilon=0.9308137979869133, episode length=132, total rewards=-0.010963455149491902
epsilon=0.9308137979869133, episode length=140, total rewards=-0.015492957746462838
epsilon=0.9308137979869133, episode length=445, total rewards=-0.06558704453430253
Training the model...
experience length=4995
number of examples=2715
best total reward =  [-0.005747126436758537, -0.010989010988996045, -0.010963455149491902]
loss = [0.4052134156227112, 0.29377588

Copying file://car-racing-v2-12-model09.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


epsilon=0.9162706034069739, episode length=126, total rewards=-0.04177215189872571
epsilon=0.9162706034069739, episode length=61, total rewards=-0.08379204892965864
epsilon=0.9162706034069739, episode length=311, total rewards=-0.05813148788919631
Training the model...
experience length=4995
number of examples=2766
best total reward =  [-0.005747126436758537, -0.010989010988996045, -0.010963455149491902]
loss = [0.5300875306129456, 0.40425366163253784, 0.3679558038711548, 0.3458160161972046, 0.32007500529289246, 0.3183176815509796, 0.2845984697341919, 0.28665563464164734, 0.2625437080860138, 0.2587946653366089]
iteration 13
epsilon=0.9091078973729041, episode length=131, total rewards=-0.04210526315786753
epsilon=0.9091078973729041, episode length=258, total rewards=-0.09354838709672841
epsilon=0.9091078973729041, episode length=276, total rewards=-0.018339100345941634
Training the model...
experience length=4995
number of examples=2663
best total reward =  [-0.005747126436758537, -0.0

Copying file://car-racing-v2-15-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


epsilon=0.8949966502151834, episode length=631, total rewards=-0.042105263157746736
Training the model...
experience length=4995
number of examples=2879
best total reward =  [-0.005747126436758537, -0.010989010988996045, -0.010963455149491902]
loss = [0.5711642503738403, 0.4244801104068756, 0.37647899985313416, 0.34975627064704895, 0.3348754942417145, 0.3164043724536896, 0.2964693009853363, 0.28336411714553833, 0.2921264171600342, 0.2571047246456146]
iteration 16
epsilon=0.8880466837130316, episode length=190, total rewards=-0.052380952380948864
epsilon=0.8880466837130316, episode length=215, total rewards=-0.017266187050304554
epsilon=0.8880466837130316, episode length=323, total rewards=-0.025899280575511358
Training the model...
experience length=4995
number of examples=2726
best total reward =  [-0.005747126436758537, -0.010989010988996045, -0.010963455149491902]
loss = [0.5961735844612122, 0.444284588098526, 0.4015086889266968, 0.3840551972389221, 0.36417123675346375, 0.3469314873

Copying file://car-racing-v2-18-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


epsilon=0.8743545547071423, episode length=302, total rewards=-0.09865771812073443
Training the model...
experience length=4995
number of examples=2655
best total reward =  [-0.005747126436758537, -0.010989010988996045, -0.010963455149491902]
loss = [4.691947937011719, 4.539913654327393, 4.378929138183594, 4.4237189292907715, 4.191781044006348, 4.369113922119141, 4.2367634773254395, 4.18009090423584, 4.157971382141113, 4.154293537139893]
iteration 19
epsilon=0.8676110091600708, episode length=141, total rewards=-0.06572438162542693
epsilon=0.8676110091600708, episode length=400, total rewards=-0.0999999999999599
Training the model...
experience length=4995
number of examples=2539
best total reward =  [-0.005747126436758537, -0.010989010988996045, -0.010963455149491902]
loss = [0.45053955912590027, 0.3585107624530792, 0.3567837178707123, 0.30578097701072693, 0.29339438676834106, 0.2788359522819519, 0.2849767804145813, 0.27254393696784973, 0.2593746781349182, 0.2628391981124878]
iteratio

Copying file://car-racing-v2-21-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


epsilon=0.8543255500777855, episode length=433, total rewards=-0.07870036101071531
Training the model...
experience length=4995
number of examples=2675
best total reward =  [-0.010989010988996045, -0.010963455149491902, -0.0043795620437207805]
loss = [4.004319667816162, 2.7846410274505615, 4.422743797302246, 2.5704758167266846, 2.949105978012085, 2.3345088958740234, 2.3342883586883545, 1.175535798072815, 1.394765853881836, 4.243708610534668]
iteration 22
epsilon=0.8477822945770077, episode length=343, total rewards=-0.03573883161501465
epsilon=0.8477822945770077, episode length=604, total rewards=-0.09731543624147432
Training the model...
experience length=4995
number of examples=2945
best total reward =  [-0.010989010988996045, -0.010963455149491902, -0.0043795620437207805]
loss = [6.343634605407715, 4.738544940948486, 0.6238279342651367, 3.6025876998901367, 0.48140987753868103, 1.3352304697036743, 1.0115615129470825, 1.638847827911377, 0.8465099930763245, 1.4690688848495483]
iteratio

Copying file://car-racing-v2-24-model09.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


epsilon=0.8348914269149252, episode length=250, total rewards=-0.010394265232935157
Training the model...
experience length=4995
number of examples=2627
best total reward =  [-0.010963455149491902, -0.0043795620437207805, -0.010394265232935157]
loss = [5.706193447113037, 5.973152160644531, 3.2880923748016357, 3.2886927127838135, 2.049574136734009, 3.2782623767852783, 2.42425274848938, 1.0383596420288086, 3.1933560371398926, 2.4392035007476807]
iteration 25
epsilon=0.8285425126457759, episode length=679, total rewards=-0.03883495145611812
Training the model...
experience length=4995
number of examples=2677
best total reward =  [-0.010963455149491902, -0.0043795620437207805, -0.010394265232935157]
loss = [2.993753433227539, 1.2406193017959595, 1.4135420322418213, 1.2960593700408936, 0.9854450225830078, 1.4317052364349365, 1.009763479232788, 0.6567979454994202, 1.0798367261886597, 0.9866025447845459]
iteration 26
epsilon=0.8222570875193183, episode length=99, total rewards=-0.033222591362

Copying file://car-racing-v2-27-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


epsilon=0.8160345166441252, episode length=618, total rewards=-0.010749185667808109
Training the model...
experience length=4995
number of examples=3071
best total reward =  [-0.0043795620437207805, -0.010394265232935157, -0.006249999999883488]
loss = [4.377895832061768, 3.1097264289855957, 2.31019926071167, 2.5927155017852783, 3.410390853881836, 2.101060152053833, 1.7320865392684937, 1.3760912418365479, 1.7659392356872559, 2.1835968494415283]
iteration 28
epsilon=0.809874171477684, episode length=537, total rewards=-0.036559139784779066
Training the model...
experience length=4995
number of examples=2535
best total reward =  [-0.0043795620437207805, -0.010394265232935157, -0.006249999999883488]
loss = [3.2757728099823, 0.9789324402809143, 0.798179030418396, 1.5843074321746826, 0.9695957899093628, 1.0070234537124634, 0.5530701875686646, 0.5220404863357544, 1.205777883529663, 0.40041568875312805]
iteration 29
epsilon=0.8037754297629072, episode length=165, total rewards=-0.0983498349834

Copying file://car-racing-v2-30-model09.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


epsilon=0.7977376754652781, episode length=726, total rewards=-0.09273927392791248
Training the model...
experience length=4995
number of examples=3222
best total reward =  [-0.010394265232935157, -0.006249999999883488, 1.6949152542371344]
loss = [6.498471736907959, 5.5571160316467285, 5.6270670890808105, 4.391630172729492, 4.3442254066467285, 3.6884989738464355, 2.961413860321045, 2.645156145095825, 2.507843494415283, 1.6411340236663818]
iteration 31
epsilon=0.7917602987106254, episode length=323, total rewards=-0.0258992805754705
epsilon=0.7917602987106254, episode length=404, total rewards=-0.09595959595953141
Training the model...
experience length=4995
number of examples=2725
best total reward =  [-0.010394265232935157, -0.006249999999883488, 1.6949152542371344]
loss = [1.8965188264846802, 0.8434029221534729, 1.173973560333252, 0.83404141664505, 0.8273553848266602, 0.7233107089996338, 0.6486920118331909, 0.5050587058067322, 1.577437162399292, 0.7921031713485718]
iteration 32
epsil

Copying file://car-racing-v2-33-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [-0.010394265232935157, -0.006249999999883488, 1.6949152542371344]
loss = [1.291761040687561, 0.6840479969978333, 0.48802080750465393, 0.5168023705482483, 0.451964795589447, 0.49550846219062805, 0.40942370891571045, 0.3932945728302002, 0.3870457112789154, 0.36011406779289246]
iteration 34
epsilon=0.7741844260786213, episode length=260, total rewards=-0.04136807817582808
epsilon=0.7741844260786213, episode length=268, total rewards=-0.08007662835247698
Training the model...
experience length=4995
number of examples=2526
best total reward =  [-0.010394265232935157, -0.006249999999883488, 1.6949152542371344]
loss = [0.5517561435699463, 0.43839624524116516, 0.37603759765625, 0.36295199394226074, 0.3832947015762329, 0.35739845037460327, 0.30814874172210693, 0.3092752993106842, 0.30324292182922363, 0.2892172336578369]
iteration 35
epsilon=0.7684425818178351, episode length=689, total rewards=-0.03448275

Copying file://car-racing-v2-36-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2507
best total reward =  [-0.010394265232935157, -0.006249999999883488, 1.6949152542371344]
loss = [0.5357738137245178, 0.4606533348560333, 0.3835769295692444, 0.3944418728351593, 0.3626071810722351, 0.3537819981575012, 0.35990697145462036, 0.29943832755088806, 0.31148386001586914, 0.3134121298789978]
iteration 37
epsilon=0.7571305744396604, episode length=114, total rewards=-0.07142857142856207
epsilon=0.7571305744396604, episode length=738, total rewards=-0.0992619926200764
Training the model...
experience length=4995
number of examples=2850
best total reward =  [-0.010394265232935157, -0.006249999999883488, 1.6949152542371344]
loss = [0.5236021280288696, 0.43966299295425415, 0.3818727731704712, 0.38777390122413635, 0.35003551840782166, 0.34075412154197693, 0.3270278871059418, 0.30004826188087463, 0.3224502205848694, 0.2804740369319916]
iteration 38
epsilon=0.7515592686952639, episode length=644, total rewards=-0.093220

Copying file://car-racing-v2-39-model09.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [-0.010394265232935157, -0.006249999999883488, 1.6949152542371344]
loss = [0.872490644454956, 0.6376544833183289, 0.5895623564720154, 0.5666605234146118, 0.49703502655029297, 0.5290766954421997, 0.4420883357524872, 0.46237844228744507, 0.4594278931617737, 0.393181174993515]
iteration 40
epsilon=0.7405832392482283, episode length=492, total rewards=-0.0575757575755953
epsilon=0.7405832392482283, episode length=124, total rewards=-0.07763975155278735
Training the model...
experience length=4995
number of examples=2614
best total reward =  [-0.010394265232935157, -0.006249999999883488, 1.6949152542371344]
loss = [0.6564000844955444, 0.5537481307983398, 0.4938856065273285, 0.45905807614326477, 0.5141453146934509, 0.40491440892219543, 0.39194247126579285, 0.3920321762561798, 0.373121976852417, 0.3651792109012604]
iteration 41
epsilon=0.7351774068557462, episode length=657, total rewards=-0.056055363321

Copying file://car-racing-v2-42-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


epsilon=0.7298256327871888, episode length=766, total rewards=-0.044947735192139876
Training the model...
experience length=4995
number of examples=3030
best total reward =  [-0.010394265232935157, -0.006249999999883488, 1.6949152542371344]
loss = [0.6396328806877136, 0.6870590448379517, 0.4980151653289795, 0.5231123566627502, 0.47174087166786194, 0.4381859004497528, 0.42606836557388306, 0.4709434509277344, 0.40824219584465027, 0.41130468249320984]
iteration 43
epsilon=0.724527376459317, episode length=498, total rewards=-0.06611295681051765
epsilon=0.724527376459317, episode length=250, total rewards=-0.09999999999999232
Training the model...
experience length=4995
number of examples=2746
best total reward =  [-0.010394265232935157, -0.006249999999883488, 1.6949152542371344]
loss = [0.6536651849746704, 0.5576050877571106, 0.5598188638687134, 0.494795024394989, 0.45213553309440613, 0.42816421389579773, 0.399184912443161, 0.46589967608451843, 0.3948008120059967, 0.3574700653553009]
iter

Copying file://car-racing-v2-45-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [-0.010394265232935157, -0.006249999999883488, 1.6949152542371344]
loss = [0.9845167994499207, 0.7349854111671448, 0.7566704154014587, 0.6651336550712585, 0.6664837598800659, 0.6084010601043701, 0.5926775336265564, 0.6007025837898254, 0.5582659244537354, 0.5592896938323975]
iteration 46
epsilon=0.708948388851099, episode length=666, total rewards=-0.03333333333367003
Training the model...
experience length=4995
number of examples=2664
best total reward =  [-0.010394265232935157, -0.006249999999883488, 1.6949152542371344]
loss = [0.6800087690353394, 0.5324490070343018, 0.570703387260437, 0.5745065808296204, 0.42778873443603516, 0.4347239136695862, 0.5605226159095764, 0.4266667664051056, 0.4005781412124634, 0.37373673915863037]
iteration 47
epsilon=0.7038589049625881, episode length=999, total rewards=23.67491166077695
Training the model...
experience length=4995
number of examples=2997
best total r

Copying file://car-racing-v2-48-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2674
best total reward =  [-0.006249999999883488, 1.6949152542371344, 23.67491166077695]
loss = [0.7146697640419006, 0.5819615125656128, 0.5733518600463867, 0.5510789155960083, 0.5117146372795105, 0.5847658514976501, 0.6467679738998413, 0.4798166751861572, 0.4950740933418274, 0.4642728269100189]
iteration 49
epsilon=0.6938321127538326, episode length=297, total rewards=-0.09702970297020852
epsilon=0.6938321127538326, episode length=750, total rewards=-0.09999999999978759
Training the model...
experience length=4995
number of examples=3045
best total reward =  [-0.006249999999883488, 1.6949152542371344, 23.67491166077695]
loss = [0.8268657326698303, 0.6844457387924194, 0.6384868025779724, 0.6166021823883057, 0.5707229375839233, 0.539545476436615, 0.5941146016120911, 0.5359224081039429, 0.4650321304798126, 0.47846269607543945]
iteration 50
epsilon=0.6888937916262943, episode length=322, total rewards=-0.04193548387088272
eps

Copying file://car-racing-v2-51-model09.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2535
best total reward =  [-0.006249999999883488, 1.6949152542371344, 23.67491166077695]
loss = [6.87520170211792, 6.570331573486328, 6.188348293304443, 5.820117473602295, 5.629787445068359, 5.397922992706299, 5.1703572273254395, 4.906548023223877, 4.907143592834473, 4.560151100158691]
iteration 52
epsilon=0.6791648051729311, episode length=698, total rewards=-0.047058823529512156
Training the model...
experience length=4995
number of examples=2696
best total reward =  [-0.006249999999883488, 1.6949152542371344, 23.67491166077695]
loss = [0.9250875115394592, 0.8219373822212219, 0.6196426153182983, 0.641686737537384, 0.7067137360572815, 0.580897867679596, 0.607862114906311, 0.5573809742927551, 0.6001991629600525, 0.5862836241722107]
iteration 53
epsilon=0.6743731571212017, episode length=415, total rewards=-0.0775086505191045
epsilon=0.6743731571212017, episode length=370, total rewards=-0.06296296296285608
Training the mod

Copying file://car-racing-v2-54-model09.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2551
best total reward =  [-0.006249999999883488, 1.6949152542371344, 23.67491166077695]
loss = [0.8192819356918335, 0.7119447588920593, 0.6452131867408752, 0.5947602391242981, 0.6061350107192993, 0.5901057124137878, 0.5757148265838623, 0.5376889705657959, 0.5440928339958191, 0.5091583728790283]
iteration 55
epsilon=0.6649331312944898, episode length=738, total rewards=-0.09926199261969981
Training the model...
experience length=4995
number of examples=2736
best total reward =  [-0.006249999999883488, 1.6949152542371344, 23.67491166077695]
loss = [0.7741946578025818, 0.6676084995269775, 0.6660226583480835, 0.6001577377319336, 0.6832510828971863, 0.5299984812736511, 0.5014938116073608, 0.4535984694957733, 0.4983888566493988, 0.4919092357158661]
iteration 56
epsilon=0.660283799981545, episode length=449, total rewards=-0.05617977528109219
epsilon=0.660283799981545, episode length=532, total rewards=-0.06806083650196304
Train

Copying file://car-racing-v2-57-model09.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


epsilon=0.6556809619817294, episode length=454, total rewards=-0.04545454545448033
Training the model...
experience length=4995
number of examples=2822
best total reward =  [-0.006249999999883488, 1.6949152542371344, 23.67491166077695]
loss = [0.7505693435668945, 0.6420417428016663, 0.6325793266296387, 0.5408596396446228, 0.5869265198707581, 0.5275754928588867, 0.5138826966285706, 0.4973830282688141, 0.47714418172836304, 0.5115242004394531]
iteration 58
epsilon=0.6511241523619121, episode length=355, total rewards=-0.012811387900266896
epsilon=0.6511241523619121, episode length=416, total rewards=-0.033333333333209064
Training the model...
experience length=4995
number of examples=2769
best total reward =  [-0.006249999999883488, 1.6949152542371344, 23.67491166077695]
loss = [0.7963941097259521, 0.61960369348526, 0.7761248350143433, 0.5305917859077454, 0.512040913105011, 0.5106053948402405, 0.5809378623962402, 0.5230209231376648, 0.4564245939254761, 0.4915066659450531]
iteration 59
eps

Copying file://car-racing-v2-60-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2840
best total reward =  [-0.006249999999883488, 1.6949152542371344, 23.67491166077695]
loss = [5.306717872619629, 5.132063388824463, 4.903147220611572, 4.738912105560303, 4.705507755279541, 4.343051433563232, 4.359453201293945, 4.046311378479004, 3.9556362628936768, 3.8168821334838867]
iteration 61
epsilon=0.6377253139126109, episode length=447, total rewards=-0.07156549520757705
epsilon=0.6377253139126109, episode length=365, total rewards=-0.014634146341436977
Training the model...
experience length=4995
number of examples=2810
best total reward =  [-0.006249999999883488, 1.6949152542371344, 23.67491166077695]
loss = [0.7122781872749329, 0.6453551054000854, 0.6020594239234924, 0.5403184294700623, 0.5791386961936951, 0.45539528131484985, 0.5096660256385803, 0.4945586323738098, 0.43150413036346436, 0.47638148069381714]
iteration 62
epsilon=0.6333480607734847, episode length=467, total rewards=-0.03741007194255028
epsilon

Copying file://car-racing-v2-63-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


epsilon=0.6290145801657498, episode length=456, total rewards=-0.0859649122809405
Training the model...
experience length=4995
number of examples=2906
best total reward =  [-0.006249999999883488, 1.6949152542371344, 23.67491166077695]
loss = [0.8289624452590942, 0.745888352394104, 0.6579135060310364, 0.6265273690223694, 0.6656981706619263, 0.5433207154273987, 0.5434761047363281, 0.5496118068695068, 0.492240846157074, 0.5317110419273376]
iteration 64
epsilon=0.6247244343640923, episode length=413, total rewards=-0.020689655172515503
epsilon=0.6247244343640923, episode length=370, total rewards=-0.06296296296286852
Training the model...
experience length=4995
number of examples=2781
best total reward =  [-0.006249999999883488, 1.6949152542371344, 23.67491166077695]
loss = [0.7367360591888428, 0.667593777179718, 0.5875180959701538, 0.584541916847229, 0.5528475642204285, 0.5249641537666321, 0.4938293397426605, 0.582410454750061, 0.5029661059379578, 0.4879342317581177]
iteration 65
epsilon=

Copying file://car-racing-v2-66-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


epsilon=0.6162724181202468, episode length=620, total rewards=-0.008496732026018722
Training the model...
experience length=4995
number of examples=2971
best total reward =  [-0.006249999999883488, 1.6949152542371344, 23.67491166077695]
loss = [1.0184663534164429, 0.8042960166931152, 0.6563801169395447, 0.6714605689048767, 0.6542313694953918, 0.5792004466056824, 0.5764523148536682, 0.8891252279281616, 0.5031744837760925, 0.5615625381469727]
iteration 67
epsilon=0.6121096939390444, episode length=698, total rewards=-0.04705882352973598
Training the model...
experience length=4995
number of examples=2696
best total reward =  [-0.006249999999883488, 1.6949152542371344, 23.67491166077695]
loss = [0.7974976897239685, 0.7385421991348267, 0.6764478087425232, 0.6534709334373474, 0.7674073576927185, 0.5301023125648499, 0.5772927403450012, 0.5165776610374451, 0.537830114364624, 0.6003148555755615]
iteration 68
epsilon=0.6079885969996539, episode length=566, total rewards=-0.09622641509424423
Tra

Copying file://car-racing-v2-69-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2629
best total reward =  [-0.006249999999883488, 1.6949152542371344, 23.67491166077695]
loss = [6.8643293380737305, 6.092202186584473, 5.865428447723389, 5.6846771240234375, 5.34736442565918, 5.323902130126953, 5.020486831665039, 4.993673324584961, 4.608901023864746, 4.272120952606201]
iteration 70
epsilon=0.5998696239193608, episode length=415, total rewards=-0.07750865051891975
epsilon=0.5998696239193608, episode length=167, total rewards=-0.08690807799438649
Training the model...
experience length=4995
number of examples=2580
best total reward =  [-0.006249999999883488, 1.6949152542371344, 23.67491166077695]
loss = [4.084051609039307, 3.409533977508545, 3.169874668121338, 2.3549768924713135, 2.1512789726257324, 1.223581075668335, 1.1040877103805542, 1.3090553283691406, 0.7102609276771545, 0.6940319538116455]
iteration 71
epsilon=0.5958709276801673, episode length=688, total rewards=-0.047540983606329607
Training the mo

Copying file://car-racing-v2-72-model09.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [-0.006249999999883488, 1.6949152542371344, 23.67491166077695]
loss = [2.052128791809082, 1.662509799003601, 1.4436167478561401, 1.3646620512008667, 1.1716033220291138, 1.1276850700378418, 1.1930409669876099, 1.0623112916946411, 1.0079345703125, 1.047108769416809]
iteration 73
epsilon=0.5879930962193319, episode length=726, total rewards=-0.03564013840830002
Training the model...
experience length=4995
number of examples=2724
best total reward =  [-0.006249999999883488, 1.6949152542371344, 23.67491166077695]
loss = [1.3572574853897095, 1.0902938842773438, 0.9590805172920227, 0.9605735540390015, 0.8409146666526794, 0.8399959206581116, 0.7599741816520691, 0.7948305606842041, 0.7790560722351074, 0.7254937291145325]
iteration 74
epsilon=0.5841131652571386, episode length=686, total rewards=-0.07254901960795945
Training the model...
experience length=4995
number of examples=2684
best total reward =  [-

Copying file://car-racing-v2-75-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [-0.006249999999883488, 1.6949152542371344, 23.67491166077695]
loss = [2.1591618061065674, 1.7003637552261353, 1.5018688440322876, 1.3598780632019043, 1.4282972812652588, 1.2237486839294434, 1.156898021697998, 1.1764105558395386, 1.1076552867889404, 1.0932470560073853]
iteration 76
epsilon=0.5764693132685216, episode length=999, total rewards=9.634551495015508
Training the model...
experience length=4995
number of examples=2997
best total reward =  [1.6949152542371344, 23.67491166077695, 9.634551495015508]
loss = [1.3715215921401978, 1.1151584386825562, 1.092045545578003, 1.0182526111602783, 0.913235068321228, 0.8934653997421265, 0.8371784090995789, 0.7978164553642273, 0.7439979314804077, 0.7991209626197815]
iteration 77
epsilon=0.5727046201358363, episode length=791, total rewards=-0.06330935251850289
Training the model...
experience length=4995
number of examples=2789
best total reward =  [1.694

Copying file://car-racing-v2-78-model09.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [1.6949152542371344, 23.67491166077695, 9.634551495015508]
loss = [2.3545994758605957, 1.975167989730835, 1.8132634162902832, 1.6798816919326782, 1.5404905080795288, 1.4060496091842651, 1.5158973932266235, 1.2887177467346191, 1.2748268842697144, 1.2679171562194824]
iteration 79
epsilon=0.5652877981951332, episode length=622, total rewards=-0.016262975778972705
Training the model...
experience length=4995
number of examples=2620
best total reward =  [1.6949152542371344, 23.67491166077695, 9.634551495015508]
loss = [1.3451557159423828, 1.1692255735397339, 1.020113229751587, 0.9644368886947632, 0.9148367643356323, 0.8875772356987, 0.8312468528747559, 0.876232922077179, 0.7461544871330261, 0.8236163854598999]
iteration 80
epsilon=0.5616349202131818, episode length=666, total rewards=-0.03333333333376595
Training the model...
experience length=4995
number of examples=2664
best total reward =  [1.694915

Copying file://car-racing-v2-81-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [1.6949152542371344, 23.67491166077695, 9.634551495015508]
loss = [3.168860912322998, 2.7131729125976562, 2.808896064758301, 2.3552169799804688, 2.552189350128174, 2.2236905097961426, 2.3954646587371826, 1.8371481895446777, 2.0323832035064697, 1.9334758520126343]
iteration 82
epsilon=0.5544383853009396, episode length=778, total rewards=-0.07898832684883783
Training the model...
experience length=4995
number of examples=2776
best total reward =  [1.6949152542371344, 23.67491166077695, 9.634551495015508]
loss = [1.9937903881072998, 1.9066455364227295, 1.4111076593399048, 1.362650752067566, 1.240671992301941, 1.198577642440796, 1.1385278701782227, 1.0893635749816895, 1.1091147661209106, 1.0444952249526978]
iteration 83
epsilon=0.5508940014479302, episode length=986, total rewards=-0.06054421768798446
Training the model...
experience length=4995
number of examples=2984
best total reward =  [1.6949152

Copying file://car-racing-v2-84-model09.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [1.6949152542371344, 23.67491166077695, 9.634551495015508]
loss = [2.6740853786468506, 2.4176738262176514, 2.077864408493042, 2.046456813812256, 2.0989725589752197, 2.2910196781158447, 1.785312533378601, 1.567520022392273, 1.654050350189209, 2.2771289348602295]
iteration 85
epsilon=0.5439112108191164, episode length=343, total rewards=-0.035738831615035965
epsilon=0.5439112108191164, episode length=999, total rewards=48.760330578511216
Training the model...
experience length=4995
number of examples=3340
best total reward =  [23.67491166077695, 9.634551495015508, 48.760330578511216]
loss = [2.13087797164917, 2.000352144241333, 1.6900887489318848, 1.8789851665496826, 1.8626974821090698, 2.4953110218048096, 1.5997400283813477, 1.5856386423110962, 1.5187097787857056, 1.4799890518188477]
iteration 86
epsilon=0.5404720987109253, episode length=999, total rewards=74.79674796747963
Training the model...
e

Copying file://car-racing-v2-87-model09.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [9.634551495015508, 48.760330578511216, 74.79674796747963]
loss = [2.640571117401123, 2.2366485595703125, 2.0887203216552734, 2.283088207244873, 2.008620500564575, 1.9939314126968384, 2.2013301849365234, 1.57469642162323, 1.7797284126281738, 1.7957842350006104]
iteration 88
epsilon=0.5336967039465779, episode length=999, total rewards=68.5393258426956
Training the model...
experience length=4995
number of examples=2997
best total reward =  [48.760330578511216, 74.79674796747963, 68.5393258426956]
loss = [2.0532989501953125, 1.8724673986434937, 1.7043858766555786, 1.5527067184448242, 1.5271679162979126, 1.5719956159591675, 1.3638931512832642, 1.4745917320251465, 1.3546470403671265, 1.3696719408035278]
iteration 89
epsilon=0.530359736907112, episode length=799, total rewards=-3.8477554475946363e-13
Training the model...
experience length=4995
number of examples=2797
best total reward =  [48.76033057

Copying file://car-racing-v2-90-model09.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [48.760330578511216, 74.79674796747963, 68.5393258426956]
loss = [2.4199845790863037, 1.8947292566299438, 1.9026024341583252, 1.7381649017333984, 1.717725157737732, 1.6045011281967163, 1.4639968872070312, 1.5964908599853516, 1.4121441841125488, 1.4424240589141846]
iteration 91
epsilon=0.5237855781426605, episode length=999, total rewards=50.3267973856198
Training the model...
experience length=4995
number of examples=2997
best total reward =  [74.79674796747963, 68.5393258426956, 50.3267973856198]
loss = [2.101505756378174, 2.429283380508423, 1.7550908327102661, 1.6151187419891357, 1.7967737913131714, 1.4273499250411987, 1.384971022605896, 1.4141628742218018, 1.2799243927001953, 1.298803687095642]
iteration 92
epsilon=0.5205477223612338, episode length=999, total rewards=88.81118881118927
Training the model...
experience length=4995
number of examples=2997
best total reward =  [68.5393258426956, 5

Copying file://car-racing-v2-93-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [68.5393258426956, 50.3267973856198, 88.81118881118927]
loss = [3.0290143489837646, 2.640099048614502, 2.291266441345215, 2.236238956451416, 2.2244174480438232, 2.058105707168579, 1.8753728866577148, 1.854135274887085, 1.7699954509735107, 1.796852707862854]
iteration 94
epsilon=0.5141688226862453, episode length=999, total rewards=97.80219780219817
Training the model...
experience length=4995
number of examples=2997
best total reward =  [50.3267973856198, 88.81118881118927, 97.80219780219817]
loss = [2.2990264892578125, 2.739629030227661, 1.7203983068466187, 1.8439944982528687, 1.7261176109313965, 1.6407725811004639, 1.5668457746505737, 1.7099303007125854, 1.3697822093963623, 1.4370547533035278]
iteration 95
epsilon=0.5110271344593829, episode length=999, total rewards=107.27272727272899
Training the model...
experience length=4995
number of examples=2997
best total reward =  [88.81118881118927, 9

Copying file://car-racing-v2-96-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [88.81118881118927, 97.80219780219817, 107.27272727272899]
loss = [3.1971983909606934, 2.8240320682525635, 2.5672991275787354, 2.5075125694274902, 2.259315252304077, 2.3778040409088135, 2.8059332370758057, 1.9537514448165894, 1.9835524559020996, 1.9931329488754272]
iteration 97
epsilon=0.5048376944836411, episode length=999, total rewards=83.45323741007223
Training the model...
experience length=4995
number of examples=2997
best total reward =  [88.81118881118927, 97.80219780219817, 107.27272727272899]
loss = [2.5619142055511475, 2.191514253616333, 1.896000862121582, 1.9135277271270752, 2.5358757972717285, 1.8177080154418945, 1.598626971244812, 1.7527858018875122, 1.4691729545593262, 1.57919442653656]
iteration 98
epsilon=0.5017893175388047, episode length=999, total rewards=35.03649635036396
Training the model...
experience length=4995
number of examples=2997
best total reward =  [88.811188811189

Copying file://car-racing-v2-99-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [88.81118881118927, 97.80219780219817, 107.27272727272899]
loss = [2.620004653930664, 2.5517280101776123, 2.1397385597229004, 1.8659462928771973, 1.8761391639709473, 1.7478567361831665, 1.7257436513900757, 1.891983985900879, 1.6594003438949585, 1.6242694854736328]
iteration 100
epsilon=0.49578371011978256, episode length=999, total rewards=77.53623188406092
Training the model...
experience length=4995
number of examples=2997
best total reward =  [88.81118881118927, 97.80219780219817, 107.27272727272899]
loss = [2.061455249786377, 1.951008677482605, 1.7166813611984253, 1.6142501831054688, 1.6255028247833252, 1.5657694339752197, 1.4228218793869019, 1.5471982955932617, 1.3629183769226074, 1.3368504047393799]
iteration 101
epsilon=0.4928258730185847, episode length=999, total rewards=31.034482758619973
Training the model...
experience length=4995
number of examples=2997
best total reward =  [88.811188

Copying file://car-racing-v2-102-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [88.81118881118927, 97.80219780219817, 107.27272727272899]
loss = [3.3836936950683594, 2.538053035736084, 2.613008737564087, 2.375579357147217, 2.2831249237060547, 2.0058257579803467, 2.0091540813446045, 1.733275055885315, 1.733290433883667, 1.6906421184539795]
iteration 103
epsilon=0.48699863814551486, episode length=787, total rewards=-0.03287671232939113
Training the model...
experience length=4995
number of examples=2785
best total reward =  [88.81118881118927, 97.80219780219817, 107.27272727272899]
loss = [2.3776609897613525, 2.436992883682251, 2.0485270023345947, 1.6364840269088745, 1.757003664970398, 1.6916757822036743, 1.59771728515625, 2.2674033641815186, 1.4890377521514893, 1.952741265296936]
iteration 104
epsilon=0.4841286517640597, episode length=718, total rewards=-0.004575163399281851
Training the model...
experience length=4995
number of examples=2716
best total reward =  [88.811188

Copying file://car-racing-v2-105-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2670
best total reward =  [88.81118881118927, 97.80219780219817, 107.27272727272899]
loss = [2.076727867126465, 2.5583348274230957, 1.6662285327911377, 1.546604871749878, 1.7636380195617676, 1.739646553993225, 1.811282753944397, 1.4092319011688232, 1.426238775253296, 1.586596965789795]
iteration 106
epsilon=0.47847449159395494, episode length=999, total rewards=80.50541516245482
Training the model...
experience length=4995
number of examples=2997
best total reward =  [88.81118881118927, 97.80219780219817, 107.27272727272899]
loss = [2.333791732788086, 1.8185858726501465, 2.1548006534576416, 1.752670168876648, 1.5668612718582153, 2.1098337173461914, 1.4309475421905518, 1.4742014408111572, 1.368123173713684, 1.8188574314117432]
iteration 107
epsilon=0.4756897466780154, episode length=638, total rewards=-0.07021276595777093
Training the model...
experience length=4995
number of examples=2636
best total reward =  [88.811188811

Copying file://car-racing-v2-108-model09.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [88.81118881118927, 97.80219780219817, 107.27272727272899]
loss = [2.3042852878570557, 2.0320658683776855, 1.7854318618774414, 1.6609058380126953, 1.806378960609436, 1.6596626043319702, 1.679978609085083, 1.5264437198638916, 2.232487201690674, 1.4729721546173096]
iteration 109
epsilon=0.47020352071912286, episode length=999, total rewards=27.340823970036638
Training the model...
experience length=4995
number of examples=2997
best total reward =  [88.81118881118927, 97.80219780219817, 107.27272727272899]
loss = [1.925912857055664, 1.5242853164672852, 2.164337158203125, 1.3847593069076538, 1.5687987804412842, 1.339914321899414, 1.4199663400650024, 1.6780612468719482, 1.4319226741790771, 1.187862515449524]
iteration 110
epsilon=0.4675014855119316, episode length=999, total rewards=51.51515151515031
Training the model...
experience length=4995
number of examples=2997
best total reward =  [88.811188811

Copying file://car-racing-v2-111-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [88.81118881118927, 97.80219780219817, 107.27272727272899]
loss = [2.2116849422454834, 1.9229693412780762, 1.7478712797164917, 1.9256222248077393, 1.712477445602417, 1.7403481006622314, 1.56063711643219, 1.555420994758606, 1.6730363368988037, 1.4197107553482056]
iteration 112
epsilon=0.4621782059502441, episode length=217, total rewards=-0.06086956521737297
epsilon=0.4621782059502441, episode length=591, total rewards=-0.009968847352422844
Training the model...
experience length=4995
number of examples=2806
best total reward =  [88.81118881118927, 97.80219780219817, 107.27272727272899]
loss = [2.2123324871063232, 1.507332444190979, 1.525511622428894, 1.7562479972839355, 1.3217999935150146, 1.442004919052124, 1.4255967140197754, 1.2058441638946533, 1.4975690841674805, 1.1753278970718384]
iteration 113
epsilon=0.4595564238907417, episode length=999, total rewards=40.24390243902357
Training the model

Copying file://car-racing-v2-114-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [88.81118881118927, 97.80219780219817, 107.27272727272899]
loss = [2.792414903640747, 1.9686139822006226, 1.8131208419799805, 1.5884478092193604, 1.5954853296279907, 1.4997583627700806, 1.5164159536361694, 1.3803303241729736, 1.40797758102417, 1.2945221662521362]
iteration 115
epsilon=0.4543912510553159, episode length=999, total rewards=61.49068322981345
Training the model...
experience length=4995
number of examples=2997
best total reward =  [88.81118881118927, 97.80219780219817, 107.27272727272899]
loss = [1.8986027240753174, 1.6611181497573853, 1.5648242235183716, 1.4480608701705933, 1.5105475187301636, 1.526603102684021, 1.4453918933868408, 1.2994136810302734, 1.3824127912521362, 1.2948784828186035]
iteration 116
epsilon=0.4518473385447628, episode length=185, total rewards=-0.012639405204411619
epsilon=0.4518473385447628, episode length=999, total rewards=53.02491103202753
Training the model

Copying file://car-racing-v2-117-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [88.81118881118927, 97.80219780219817, 107.27272727272899]
loss = [2.428687810897827, 2.2443923950195312, 1.911204218864441, 4.14487361907959, 1.5526084899902344, 2.2756552696228027, 1.5836814641952515, 1.6424500942230225, 2.0590977668762207, 1.4726983308792114]
iteration 118
epsilon=0.446835576507722, episode length=869, total rewards=-0.04347826087035253
Training the model...
experience length=4995
number of examples=2867
best total reward =  [88.81118881118927, 97.80219780219817, 107.27272727272899]
loss = [2.0640223026275635, 1.4990944862365723, 1.5868107080459595, 1.4412504434585571, 1.3797743320465088, 1.3606817722320557, 1.363476037979126, 1.2141058444976807, 1.3532382249832153, 1.2111926078796387]
iteration 119
epsilon=0.4443672207426448, episode length=999, total rewards=88.35616438356456
Training the model...
experience length=4995
number of examples=2997
best total reward =  [88.8111888

Copying file://car-racing-v2-120-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [88.81118881118927, 97.80219780219817, 107.27272727272899]
loss = [2.4067580699920654, 2.227271795272827, 2.160731315612793, 2.0370988845825195, 1.8401504755020142, 1.8996788263320923, 2.0582833290100098, 1.8249001502990723, 1.7670397758483887, 1.7261697053909302]
iteration 121
epsilon=0.43950431304986615, episode length=413, total rewards=-0.04661654135357304
epsilon=0.43950431304986615, episode length=862, total rewards=-0.09310344827634265
Training the model...
experience length=4995
number of examples=3273
best total reward =  [88.81118881118927, 97.80219780219817, 107.27272727272899]
loss = [2.139991521835327, 1.7674124240875244, 1.5896837711334229, 1.6893693208694458, 1.6160563230514526, 1.7171286344528198, 1.6848326921463013, 1.3610213994979858, 1.3112013339996338, 1.3406261205673218]
iteration 122
epsilon=0.4371092699193675, episode length=999, total rewards=73.01038062284037
Training the 

Copying file://car-racing-v2-123-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [88.81118881118927, 97.80219780219817, 107.27272727272899]
loss = [2.1104416847229004, 2.0219473838806152, 1.8921136856079102, 1.7848355770111084, 2.3113462924957275, 2.0157182216644287, 1.6035711765289307, 1.9646739959716797, 1.6614514589309692, 1.6057175397872925]
iteration 124
epsilon=0.4323907954479721, episode length=999, total rewards=52.24913494809803
Training the model...
experience length=4995
number of examples=2997
best total reward =  [88.81118881118927, 97.80219780219817, 107.27272727272899]
loss = [1.9079521894454956, 1.6888747215270996, 1.5459810495376587, 1.3387279510498047, 1.520965337753296, 1.3600022792816162, 1.51497220993042, 1.4080324172973633, 1.3265600204467773, 1.2486556768417358]
iteration 125
epsilon=0.43006688749349237, episode length=707, total rewards=-0.09292929292951624
Training the model...
experience length=4995
number of examples=2705
best total reward =  [88.811

Copying file://car-racing-v2-126-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [88.81118881118927, 97.80219780219817, 107.27272727272899]
loss = [2.115967273712158, 2.128180503845215, 1.9642637968063354, 1.8005369901657104, 1.775730013847351, 1.7036893367767334, 1.6874785423278809, 1.588441252708435, 1.67470121383667, 1.6506526470184326]
iteration 127
epsilon=0.4254885564323719, episode length=999, total rewards=63.4615384615374
Training the model...
experience length=4995
number of examples=2997
best total reward =  [88.81118881118927, 97.80219780219817, 107.27272727272899]
loss = [2.041935682296753, 1.617904543876648, 1.7507156133651733, 1.5595405101776123, 1.537097692489624, 1.5422470569610596, 1.4190927743911743, 1.4032437801361084, 1.4295293092727661, 1.4018604755401611]
iteration 128
epsilon=0.4232336708680482, episode length=999, total rewards=4.529616724738039
Training the model...
experience length=4995
number of examples=2997
best total reward =  [88.81118881118927

Copying file://car-racing-v2-129-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [88.81118881118927, 97.80219780219817, 107.27272727272899]
loss = [2.2423102855682373, 1.9127299785614014, 1.8875118494033813, 2.1135568618774414, 1.751463532447815, 1.518314242362976, 1.6366206407546997, 1.6251599788665771, 1.3870151042938232, 1.561519980430603]
iteration 130
epsilon=0.41879132081777404, episode length=785, total rewards=-0.028571428572086804
Training the model...
experience length=4995
number of examples=2783
best total reward =  [88.81118881118927, 97.80219780219817, 107.27272727272899]
loss = [1.8714927434921265, 1.7372214794158936, 1.5670312643051147, 1.5067218542099, 1.575531005859375, 1.8214764595031738, 1.3619561195373535, 1.4755305051803589, 1.3808220624923706, 1.4722661972045898]
iteration 131
epsilon=0.4166034076095963, episode length=999, total rewards=45.21452145214454
Training the model...
experience length=4995
number of examples=2997
best total reward =  [88.811188

Copying file://car-racing-v2-132-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [88.81118881118927, 97.80219780219817, 107.27272727272899]
loss = [2.1109871864318848, 1.972904920578003, 1.7484612464904785, 1.828346610069275, 2.2555525302886963, 1.6734092235565186, 1.8401317596435547, 1.6830703020095825, 1.5022304058074951, 1.5900086164474487]
iteration 133
epsilon=0.4122929997981653, episode length=330, total rewards=-0.06696696696688975
epsilon=0.4122929997981653, episode length=999, total rewards=70.56856187290904
Training the model...
experience length=4995
number of examples=3327
best total reward =  [88.81118881118927, 97.80219780219817, 107.27272727272899]
loss = [1.9200443029403687, 1.9129037857055664, 1.6082408428192139, 1.7328417301177979, 1.6437612771987915, 1.39853835105896, 1.5316964387893677, 1.4581115245819092, 1.4932293891906738, 1.375145435333252]
iteration 134
epsilon=0.41017006980018367, episode length=999, total rewards=42.85714285714167
Training the model.

Copying file://car-racing-v2-135-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [88.81118881118927, 97.80219780219817, 107.27272727272899]
loss = [2.4352962970733643, 2.120201349258423, 2.165492057800293, 2.4938583374023438, 1.7613259553909302, 2.183298349380493, 1.721933364868164, 1.6727585792541504, 1.7422378063201904, 1.5607507228851318]
iteration 136
epsilon=0.40598768541116, episode length=999, total rewards=68.45878136200653
Training the model...
experience length=4995
number of examples=2997
best total reward =  [88.81118881118927, 97.80219780219817, 107.27272727272899]
loss = [2.8816006183624268, 1.615639567375183, 1.898332118988037, 2.084519386291504, 1.5262668132781982, 1.5739867687225342, 1.5022526979446411, 1.4805995225906372, 1.4534049034118652, 1.5989060401916504]
iteration 137
epsilon=0.40392780855704835, episode length=999, total rewards=51.079136690649776
Training the model...
experience length=4995
number of examples=2997
best total reward =  [88.81118881118

Copying file://car-racing-v2-138-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [88.81118881118927, 97.80219780219817, 107.27272727272899]
loss = [2.6533477306365967, 2.2106223106384277, 2.081380605697632, 1.9392606019973755, 2.284925699234009, 1.7279624938964844, 1.7477341890335083, 1.7797492742538452, 1.7372572422027588, 1.701098918914795]
iteration 139
epsilon=0.3998696451667631, episode length=999, total rewards=96.49122807017821
Training the model...
experience length=4995
number of examples=2997
best total reward =  [97.80219780219817, 107.27272727272899, 96.49122807017821]
loss = [2.1673853397369385, 1.8424323797225952, 1.9213979244232178, 2.3988771438598633, 1.5392545461654663, 1.7356950044631958, 1.5564014911651611, 1.6150633096694946, 1.5844566822052002, 1.3952586650848389]
iteration 140
epsilon=0.39787094871509543, episode length=299, total rewards=-0.09966777408630034
epsilon=0.39787094871509543, episode length=427, total rewards=-0.09537366548029103
Training the 

Copying file://car-racing-v2-141-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [97.80219780219817, 107.27272727272899, 96.49122807017821]
loss = [2.2832436561584473, 2.105569362640381, 1.9161853790283203, 1.934709906578064, 1.7329052686691284, 1.738490104675293, 1.67402184009552, 1.7312203645706177, 1.7835444211959839, 1.4714659452438354]
iteration 142
epsilon=0.393933316835665, episode length=798, total rewards=-0.05209125475359966
Training the model...
experience length=4995
number of examples=2796
best total reward =  [97.80219780219817, 107.27272727272899, 96.49122807017821]
loss = [1.8176968097686768, 1.7185232639312744, 1.505583643913269, 1.4045597314834595, 1.9336234331130981, 1.4294477701187134, 1.4194151163101196, 1.3507176637649536, 1.2913233041763306, 1.6350394487380981]
iteration 143
epsilon=0.39199398366730837, episode length=705, total rewards=-0.011764705882798071
Training the model...
experience length=4995
number of examples=2703
best total reward =  [97.802

Copying file://car-racing-v2-144-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [97.80219780219817, 107.27272727272899, 96.49122807017821]
loss = [2.293098211288452, 1.9094582796096802, 1.73868989944458, 1.750761866569519, 2.0235416889190674, 1.5824401378631592, 1.6709847450256348, 1.6429394483566284, 1.7726030349731445, 1.4611921310424805]
iteration 145
epsilon=0.3881733033923289, episode length=112, total rewards=-0.03239436619716171
epsilon=0.3881733033923289, episode length=999, total rewards=31.498470948013832
Training the model...
experience length=4995
number of examples=3109
best total reward =  [97.80219780219817, 107.27272727272899, 96.49122807017821]
loss = [1.7508409023284912, 2.1275930404663086, 1.666215181350708, 1.7063182592391968, 1.4935568571090698, 1.2873144149780273, 1.473594307899475, 1.316813349723816, 1.295939564704895, 1.2176891565322876]
iteration 146
epsilon=0.38629157035840567, episode length=999, total rewards=77.25752508361498
Training the model...

Copying file://car-racing-v2-147-model09.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2767
best total reward =  [97.80219780219817, 107.27272727272899, 96.49122807017821]
loss = [1.9093517065048218, 1.7125155925750732, 1.8357703685760498, 1.5374083518981934, 1.8471235036849976, 1.404255986213684, 1.4083091020584106, 1.3716932535171509, 1.382683515548706, 1.3316318988800049]
iteration 148
epsilon=0.3825843681082734, episode length=418, total rewards=-0.08815331010454597
epsilon=0.3825843681082734, episode length=671, total rewards=-0.08590604026882187
Training the model...
experience length=4995
number of examples=3087
best total reward =  [97.80219780219817, 107.27272727272899, 96.49122807017821]
loss = [1.693884015083313, 1.5324475765228271, 1.3797873258590698, 1.3368264436721802, 1.4006273746490479, 1.8931143283843994, 1.2510511875152588, 1.2883758544921875, 1.2319344282150269, 1.2222532033920288]
iteration 149
epsilon=0.38075852442719066, episode length=287, total rewards=-0.023021582733805362
epsilon=0.

Copying file://car-racing-v2-150-model09.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [97.80219780219817, 107.27272727272899, 96.49122807017821]
loss = [2.043351888656616, 2.409595251083374, 1.8502038717269897, 1.7187652587890625, 1.8675767183303833, 1.8349987268447876, 1.824794888496399, 1.6293593645095825, 1.5383129119873047, 1.7587388753890991]
iteration 151
epsilon=0.3771614297910896, episode length=999, total rewards=130.76923076923484
Training the model...
experience length=4995
number of examples=2997
best total reward =  [107.27272727272899, 96.49122807017821, 130.76923076923484]
loss = [2.0645272731781006, 1.784651517868042, 1.6152702569961548, 1.7153996229171753, 1.4131104946136475, 2.8487987518310547, 1.4037179946899414, 1.4526714086532593, 1.5769685506820679, 3.2438011169433594]
iteration 152
epsilon=0.3753898154931787, episode length=999, total rewards=127.27272727272945
Training the model...
experience length=4995
number of examples=2997
best total reward =  [96.49122

Copying file://car-racing-v2-153-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [96.49122807017821, 130.76923076923484, 127.27272727272945]
loss = [2.7631137371063232, 2.302340030670166, 2.2465827465057373, 2.253915309906006, 2.2117738723754883, 1.956387996673584, 2.2724108695983887, 1.849381923675537, 1.8910179138183594, 1.8220936059951782]
iteration 154
epsilon=0.3718995581648644, episode length=999, total rewards=71.32867132867132
Training the model...
experience length=4995
number of examples=2997
best total reward =  [96.49122807017821, 130.76923076923484, 127.27272727272945]
loss = [2.0241951942443848, 1.852837085723877, 1.7739640474319458, 1.6626031398773193, 1.7040663957595825, 1.4652249813079834, 2.360609769821167, 1.5798804759979248, 1.4255688190460205, 1.834910273551941]
iteration 155
epsilon=0.37018056258321574, episode length=851, total rewards=-0.09361702127682858
Training the model...
experience length=4995
number of examples=2849
best total reward =  [96.49122

Copying file://car-racing-v2-156-model09.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [96.49122807017821, 130.76923076923484, 127.27272727272945]
loss = [2.4773659706115723, 2.3764045238494873, 2.1440577507019043, 2.0529582500457764, 2.229876756668091, 1.804999828338623, 1.8329172134399414, 1.8046963214874268, 1.99697744846344, 1.8329850435256958]
iteration 157
epsilon=0.3667939693878097, episode length=999, total rewards=129.81366459627506
Training the model...
experience length=4995
number of examples=2997
best total reward =  [130.76923076923484, 127.27272727272945, 129.81366459627506]
loss = [2.2147364616394043, 1.8433938026428223, 1.739778995513916, 1.8050537109375, 1.5563008785247803, 1.6300424337387085, 1.6309505701065063, 1.586267113685608, 1.6395065784454346, 1.4509267807006836]
iteration 158
epsilon=0.3651260296939316, episode length=461, total rewards=-0.09048991354454938
epsilon=0.3651260296939316, episode length=367, total rewards=-0.010702341137141164
Training the mod

Copying file://car-racing-v2-159-model09.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [130.76923076923484, 127.27272727272945, 129.81366459627506]
loss = [2.9596025943756104, 2.480361223220825, 2.4598207473754883, 2.1666486263275146, 2.496279001235962, 1.963138222694397, 2.499743938446045, 1.9558933973312378, 1.9062024354934692, 1.7619640827178955]
iteration 160
epsilon=0.3618400217030224, episode length=999, total rewards=189.28571428571857
Training the model...
experience length=4995
number of examples=2997
best total reward =  [127.27272727272945, 129.81366459627506, 189.28571428571857]
loss = [2.388922691345215, 2.013720989227295, 1.9437042474746704, 1.8432281017303467, 1.766941785812378, 1.7352374792099, 1.7982887029647827, 1.6114931106567383, 1.7484890222549438, 1.5620691776275635]
iteration 161
epsilon=0.3602216214859921, episode length=922, total rewards=-0.04907749077571319
Training the model...
experience length=4995
number of examples=2920
best total reward =  [127.27272

Copying file://car-racing-v2-162-model09.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [127.27272727272945, 129.81366459627506, 189.28571428571857]
loss = [2.809384346008301, 2.3009183406829834, 2.324065685272217, 2.4108521938323975, 1.9610717296600342, 2.115332841873169, 1.8692169189453125, 1.9586812257766724, 2.0165703296661377, 1.8576769828796387]
iteration 163
epsilon=0.3570332112184209, episode length=999, total rewards=34.18530351437577
Training the model...
experience length=4995
number of examples=2997
best total reward =  [127.27272727272945, 129.81366459627506, 189.28571428571857]
loss = [1.9212055206298828, 2.1681857109069824, 1.6815775632858276, 1.8077284097671509, 1.5363869667053223, 1.6078667640686035, 1.6049920320510864, 1.5776301622390747, 1.4263752698898315, 1.4065011739730835]
iteration 164
epsilon=0.3554628791062367, episode length=999, total rewards=100.81967213114821
Training the model...
experience length=4995
number of examples=2997
best total reward =  [127.2

Copying file://car-racing-v2-165-model09.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [127.27272727272945, 129.81366459627506, 189.28571428571857]
loss = [2.4816513061523438, 2.238710403442383, 1.999599575996399, 1.9639166593551636, 1.9285528659820557, 1.889937162399292, 1.6607741117477417, 1.7510546445846558, 1.7915598154067993, 1.6485217809677124]
iteration 166
epsilon=0.35236916781202265, episode length=999, total rewards=125.49019607843428
Training the model...
experience length=4995
number of examples=2997
best total reward =  [127.27272727272945, 129.81366459627506, 189.28571428571857]
loss = [1.9045313596725464, 2.197880983352661, 1.6673717498779297, 1.864089846611023, 1.6655198335647583, 1.659863829612732, 1.5908492803573608, 1.5534918308258057, 1.6255048513412476, 1.3950798511505127]
iteration 167
epsilon=0.35084547613390243, episode length=999, total rewards=55.11551155115389
Training the model...
experience length=4995
number of examples=2997
best total reward =  [127.27

Copying file://car-racing-v2-168-model09.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [127.27272727272945, 129.81366459627506, 189.28571428571857]
loss = [2.6080446243286133, 2.211667776107788, 2.0926437377929688, 2.034864664077759, 1.84132719039917, 1.8803781270980835, 1.894095540046692, 1.6722867488861084, 1.7204666137695312, 1.7289175987243652]
iteration 169
epsilon=0.34784365115883775, episode length=772, total rewards=-0.07992277992346541
Training the model...
experience length=4995
number of examples=2770
best total reward =  [127.27272727272945, 129.81366459627506, 189.28571428571857]
loss = [2.7950685024261475, 1.621333122253418, 1.5889698266983032, 1.544073462486267, 1.440394639968872, 1.4757128953933716, 1.4914568662643433, 1.6976908445358276, 1.2843587398529053, 1.3819791078567505]
iteration 170
epsilon=0.34636521464724934, episode length=992, total rewards=-0.06335877862690009
Training the model...
experience length=4995
number of examples=2990
best total reward =  [127

Copying file://car-racing-v2-171-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [127.27272727272945, 129.81366459627506, 189.28571428571857]
loss = [2.1789684295654297, 2.0616278648376465, 1.9720596075057983, 1.776081919670105, 1.798797607421875, 1.6959202289581299, 1.8193211555480957, 1.6095510721206665, 1.9864211082458496, 1.727042317390442]
iteration 172
epsilon=0.3434525468757691, episode length=999, total rewards=158.89967637540775
Training the model...
experience length=4995
number of examples=2997
best total reward =  [129.81366459627506, 189.28571428571857, 158.89967637540775]
loss = [1.9439386129379272, 1.8730255365371704, 1.8613331317901611, 1.632378339767456, 1.6799930334091187, 1.693841814994812, 1.5529885292053223, 1.585453987121582, 1.515642762184143, 1.485962986946106]
iteration 173
epsilon=0.34201802140701143, episode length=999, total rewards=127.45098039215765
Training the model...
experience length=4995
number of examples=2997
best total reward =  [129.8136

Copying file://car-racing-v2-174-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [129.81366459627506, 189.28571428571857, 158.89967637540775]
loss = [2.2236952781677246, 2.1848807334899902, 2.1790313720703125, 1.9698065519332886, 2.0910072326660156, 1.9413866996765137, 1.8670817613601685, 1.7807106971740723, 1.8417097330093384, 1.615870475769043]
iteration 175
epsilon=0.33919186278101193, episode length=999, total rewards=164.28571428571695
Training the model...
experience length=4995
number of examples=2997
best total reward =  [189.28571428571857, 158.89967637540775, 164.28571428571695]
loss = [2.049628973007202, 1.8129642009735107, 1.8312389850616455, 1.643467903137207, 1.805324673652649, 1.559848427772522, 1.603200912475586, 1.5138485431671143, 1.5969796180725098, 1.6567407846450806]
iteration 176
epsilon=0.3377999441532018, episode length=93, total rewards=-0.05420560747663239
epsilon=0.3377999441532018, episode length=968, total rewards=-0.014186851211449553
Training the

Copying file://car-racing-v2-177-model09.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2674
best total reward =  [189.28571428571857, 158.89967637540775, 164.28571428571695]
loss = [22.550228118896484, 21.351484298706055, 20.12578773498535, 18.523910522460938, 18.42827606201172, 16.30445671081543, 16.089353561401367, 14.666889190673828, 12.731718063354492, 12.24854564666748]
iteration 178
epsilon=0.335057725264553, episode length=522, total rewards=-0.06119402985112399
Training the model...
experience length=4995
number of examples=2520
best total reward =  [189.28571428571857, 158.89967637540775, 164.28571428571695]
loss = [13.074162483215332, 10.846365928649902, 8.89559268951416, 7.454529285430908, 6.602199554443359, 6.20360803604126, 6.161165237426758, 5.9418535232543945, 5.480594158172607, 4.902553558349609]
iteration 179
epsilon=0.3337071480119075, episode length=703, total rewards=-0.06360856269171333
Training the model...
experience length=4995
number of examples=2701
best total reward =  [189.2857142

Copying file://car-racing-v2-180-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2630
best total reward =  [189.28571428571857, 158.89967637540775, 164.28571428571695]
loss = [2.721897602081299, 2.617604970932007, 2.2323832511901855, 2.1480464935302734, 1.8700954914093018, 2.148030996322632, 1.9221611022949219, 1.8698318004608154, 1.8487493991851807, 1.694057822227478]
iteration 181
epsilon=0.33104637576647056, episode length=253, total rewards=-0.0031746031745331915
epsilon=0.33104637576647056, episode length=209, total rewards=-0.09407665505220852
epsilon=0.33104637576647056, episode length=424, total rewards=-0.01633986928113737
Training the model...
experience length=4995
number of examples=2884
best total reward =  [189.28571428571857, 158.89967637540775, 164.28571428571695]
loss = [8.718106269836426, 3.651576042175293, 2.349550724029541, 2.461822032928467, 1.7628214359283447, 2.2844655513763428, 1.717066764831543, 1.9780782461166382, 1.9132866859436035, 1.936165690422058]
iteration 182
epsilon=0.

Copying file://car-racing-v2-183-model09.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


epsilon=0.32843855288871776, episode length=909, total rewards=-0.0909090909100112
Training the model...
experience length=4995
number of examples=3317
best total reward =  [189.28571428571857, 158.89967637540775, 164.28571428571695]
loss = [2.0259783267974854, 1.974551796913147, 1.9001507759094238, 1.711300015449524, 1.7060433626174927, 1.6918431520462036, 1.6638927459716797, 1.7079120874404907, 1.5684475898742676, 1.55183744430542]
iteration 184
epsilon=0.3271541673598306, episode length=424, total rewards=-0.09717314487649742
epsilon=0.3271541673598306, episode length=474, total rewards=-0.04237288135608461
Training the model...
experience length=4995
number of examples=2896
best total reward =  [189.28571428571857, 158.89967637540775, 164.28571428571695]
loss = [1.802565097808838, 1.808485984802246, 1.7660831212997437, 1.6498924493789673, 1.6608049869537354, 1.567838430404663, 1.5179548263549805, 1.5312398672103882, 1.4252922534942627, 1.7783143520355225]
iteration 185
epsilon=0.32

Copying file://car-racing-v2-186-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [189.28571428571857, 158.89967637540775, 164.28571428571695]
loss = [2.4140987396240234, 2.0647106170654297, 1.9657936096191406, 1.8436150550842285, 2.0047595500946045, 1.8130571842193604, 1.8688998222351074, 1.7471946477890015, 1.8386889696121216, 1.879427433013916]
iteration 187
epsilon=0.32337756143507623, episode length=696, total rewards=-0.013588850174614836
Training the model...
experience length=4995
number of examples=2694
best total reward =  [189.28571428571857, 158.89967637540775, 164.28571428571695]
loss = [1.7571873664855957, 2.070101499557495, 1.705497145652771, 1.5764933824539185, 1.74272620677948, 1.6428104639053345, 1.514513373374939, 1.465987205505371, 1.5549514293670654, 1.6038507223129272]
iteration 188
epsilon=0.32214378582072545, episode length=553, total rewards=-0.06403162055377831
Training the model...
experience length=4995
number of examples=2551
best total reward =  [1

Copying file://car-racing-v2-189-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2860
best total reward =  [189.28571428571857, 158.89967637540775, 164.28571428571695]
loss = [2.0668857097625732, 1.9007912874221802, 1.918480396270752, 1.6406052112579346, 1.6334952116012573, 1.5913872718811035, 1.5531127452850342, 1.7163097858428955, 1.3814109563827515, 1.5119529962539673]
iteration 190
epsilon=0.319713124482893, episode length=709, total rewards=-0.012345679012219807
Training the model...
experience length=4995
number of examples=2707
best total reward =  [189.28571428571857, 158.89967637540775, 164.28571428571695]
loss = [2.285382032394409, 2.002807140350342, 1.8354556560516357, 1.7583736181259155, 1.580824375152588, 1.6858364343643188, 1.5515700578689575, 1.4344749450683594, 1.5670523643493652, 1.426042079925537]
iteration 191
epsilon=0.3185159932380641, episode length=999, total rewards=31.20567375886396
Training the model...
experience length=4995
number of examples=2997
best total reward =  [189.2

Copying file://car-racing-v2-192-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


epsilon=0.31733083330568346, episode length=627, total rewards=-0.08222996515692782
Training the model...
experience length=4995
number of examples=2806
best total reward =  [189.28571428571857, 158.89967637540775, 164.28571428571695]
loss = [1.8116782903671265, 1.690242052078247, 1.5916215181350708, 1.627641201019287, 1.558509349822998, 1.8097940683364868, 1.4579991102218628, 1.4528018236160278, 1.3867194652557373, 1.4680489301681519]
iteration 193
epsilon=0.3161575249726266, episode length=165, total rewards=-0.04370860927148909
epsilon=0.3161575249726266, episode length=385, total rewards=-0.01479099678447568
Training the model...
experience length=4995
number of examples=2548
best total reward =  [189.28571428571857, 158.89967637540775, 164.28571428571695]
loss = [1.7283015251159668, 1.4334347248077393, 1.6062074899673462, 1.5931061506271362, 1.4921770095825195, 1.4873448610305786, 1.4724148511886597, 1.4940987825393677, 1.307495355606079, 1.519836664199829]
iteration 194
epsilon=0

Copying file://car-racing-v2-195-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


epsilon=0.31384599022567133, episode length=999, total rewards=50.00000000000166
Training the model...
experience length=4995
number of examples=3394
best total reward =  [189.28571428571857, 158.89967637540775, 164.28571428571695]
loss = [2.083970308303833, 1.7799021005630493, 1.8077932596206665, 1.6385747194290161, 1.7307106256484985, 1.6793848276138306, 1.573410987854004, 1.5950841903686523, 1.5146093368530273, 1.4609876871109009]
iteration 196
epsilon=0.31270753032341464, episode length=999, total rewards=74.4680851063823
Training the model...
experience length=4995
number of examples=2997
best total reward =  [189.28571428571857, 158.89967637540775, 164.28571428571695]
loss = [2.087956190109253, 1.9147653579711914, 1.8763923645019531, 1.7739747762680054, 1.7159620523452759, 1.6721566915512085, 1.7911714315414429, 1.6764525175094604, 1.5125447511672974, 1.735974907875061]
iteration 197
epsilon=0.3115804550201805, episode length=999, total rewards=177.5919732441492
Training the mode

Copying file://car-racing-v2-198-model09.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [158.89967637540775, 164.28571428571695, 177.5919732441492]
loss = [2.1746177673339844, 1.994947075843811, 1.9641884565353394, 1.7563743591308594, 1.9756183624267578, 1.9364302158355713, 1.7741646766662598, 1.8456058502197266, 1.6269423961639404, 1.7981199026107788]
iteration 199
epsilon=0.3093600039652789, episode length=610, total rewards=-0.006752411576038869
Training the model...
experience length=4995
number of examples=2608
best total reward =  [158.89967637540775, 164.28571428571695, 177.5919732441492]
loss = [1.8535385131835938, 1.9367045164108276, 1.784559965133667, 1.8966044187545776, 1.7915728092193604, 1.6124674081802368, 1.9251312017440796, 1.687690019607544, 1.5844743251800537, 1.4704468250274658]
iteration 200
epsilon=0.3082664039256261, episode length=243, total rewards=-0.09444444444441413
epsilon=0.3082664039256261, episode length=833, total rewards=-0.06666666666738164
Training 

Copying file://car-racing-v2-201-model09.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


epsilon=0.3071837398863698, episode length=620, total rewards=-0.008496732026537418
Training the model...
experience length=4995
number of examples=2967
best total reward =  [158.89967637540775, 164.28571428571695, 177.5919732441492]
loss = [20.889421463012695, 19.9490966796875, 19.728151321411133, 19.243314743041992, 18.078737258911133, 17.57840347290039, 16.70036506652832, 16.298925399780273, 14.342445373535156, 13.697555541992188]
iteration 202
epsilon=0.30611190248750614, episode length=703, total rewards=-0.08750000000057523
Training the model...
experience length=4995
number of examples=2701
best total reward =  [158.89967637540775, 164.28571428571695, 177.5919732441492]
loss = [1.7047960758209229, 1.8817847967147827, 1.7539805173873901, 1.7086161375045776, 1.6505587100982666, 1.5249013900756836, 1.590255618095398, 1.5602896213531494, 1.565108060836792, 1.4365198612213135]
iteration 203
epsilon=0.3050507834626311, episode length=999, total rewards=26.934984520122825
Training the 

Copying file://car-racing-v2-204-model09.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2655
best total reward =  [158.89967637540775, 164.28571428571695, 177.5919732441492]
loss = [22.653310775756836, 22.02345085144043, 21.603700637817383, 21.413272857666016, 20.71033477783203, 20.40106964111328, 20.192914962768555, 19.619234085083008, 19.56867027282715, 18.467605590820312]
iteration 205
epsilon=0.3029602728717248, episode length=999, total rewards=68.91891891891896
Training the model...
experience length=4995
number of examples=2997
best total reward =  [158.89967637540775, 164.28571428571695, 177.5919732441492]
loss = [15.239880561828613, 13.294710159301758, 11.173688888549805, 9.850265502929688, 8.704049110412598, 6.9659013748168945, 7.4580607414245605, 4.3972272872924805, 3.507120132446289, 3.115234375]
iteration 206
epsilon=0.30193067014300756, episode length=999, total rewards=48.4098939929315
Training the model...
experience length=4995
number of examples=2997
best total reward =  [158.89967637540775,

Copying file://car-racing-v2-207-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


epsilon=0.3009113634415775, episode length=999, total rewards=63.26530612245243
Training the model...
experience length=4995
number of examples=3299
best total reward =  [158.89967637540775, 164.28571428571695, 177.5919732441492]
loss = [18.034326553344727, 17.510889053344727, 17.215532302856445, 17.058895111083984, 16.955766677856445, 16.87537384033203, 15.773983001708984, 16.390756607055664, 15.900650978088379, 16.347820281982422]
iteration 208
epsilon=0.29990224980716174, episode length=865, total rewards=-0.09480968858189098
Training the model...
experience length=4995
number of examples=2863
best total reward =  [158.89967637540775, 164.28571428571695, 177.5919732441492]
loss = [1.83321213722229, 1.9600404500961304, 2.103780508041382, 1.9403307437896729, 1.8007491827011108, 1.7422893047332764, 1.6891133785247803, 1.7458996772766113, 1.6402761936187744, 1.590279459953308]
iteration 209
epsilon=0.29890322730909014, episode length=784, total rewards=-0.0017064846423562041
Training th

Copying file://car-racing-v2-210-model09.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [158.89967637540775, 164.28571428571695, 177.5919732441492]
loss = [17.42337989807129, 16.179155349731445, 16.042171478271484, 15.039933204650879, 13.98465347290039, 13.776165962219238, 12.679300308227539, 11.991565704345703, 11.62809944152832, 10.232840538024902]
iteration 211
epsilon=0.2969350530856393, episode length=999, total rewards=61.016949152546246
Training the model...
experience length=4995
number of examples=2997
best total reward =  [158.89967637540775, 164.28571428571695, 177.5919732441492]
loss = [2.4498462677001953, 2.2797093391418457, 2.0808897018432617, 2.1796352863311768, 1.8395617008209229, 1.9417986869812012, 1.8828502893447876, 1.8983696699142456, 1.89060378074646, 1.793839693069458]
iteration 212
epsilon=0.2959657025547829, episode length=999, total rewards=178.91156462585397
Training the model...
experience length=4995
number of examples=2997
best total reward =  [164.28571

Copying file://car-racing-v2-213-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [164.28571428571695, 177.5919732441492, 178.91156462585397]
loss = [2.654730796813965, 2.852606773376465, 2.702956438064575, 2.4714345932006836, 2.434074640274048, 2.5186548233032227, 2.312195062637329, 2.708864450454712, 2.2367348670959473, 2.5645554065704346]
iteration 214
epsilon=0.2940559850739427, episode length=694, total rewards=-0.013595166163090172
Training the model...
experience length=4995
number of examples=2692
best total reward =  [164.28571428571695, 177.5919732441492, 178.91156462585397]
loss = [2.4964516162872314, 2.1162405014038086, 1.9680705070495605, 2.346942901611328, 2.0458340644836426, 2.015885353088379, 1.8904670476913452, 1.9194562435150146, 1.6759430170059204, 1.9746639728546143]
iteration 215
epsilon=0.2931154252232033, episode length=999, total rewards=248.53420195440074
Training the model...
experience length=4995
number of examples=2997
best total reward =  [177.5919

Copying file://car-racing-v2-216-model09.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [177.5919732441492, 178.91156462585397, 248.53420195440074]
loss = [2.5978479385375977, 2.467684030532837, 2.548616409301758, 2.371522903442383, 3.264719247817993, 2.194742202758789, 2.3357315063476562, 2.3321971893310547, 2.2108206748962402, 2.20396089553833]
iteration 217
epsilon=0.2912624282612615, episode length=999, total rewards=216.66666666667052
Training the model...
experience length=4995
number of examples=2997
best total reward =  [178.91156462585397, 248.53420195440074, 216.66666666667052]
loss = [2.792133331298828, 2.4107072353363037, 2.6205296516418457, 2.2291407585144043, 2.0858571529388428, 2.189007043838501, 2.197479009628296, 2.2985219955444336, 2.139146089553833, 1.8158069849014282]
iteration 218
epsilon=0.29034980397864885, episode length=641, total rewards=-0.09743589743626413
Training the model...
experience length=4995
number of examples=2639
best total reward =  [178.911564

Copying file://car-racing-v2-219-model09.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [178.91156462585397, 248.53420195440074, 216.66666666667052]
loss = [2.461148262023926, 2.6574106216430664, 2.3273966312408447, 2.4443535804748535, 2.1705596446990967, 3.0194759368896484, 2.2389519214630127, 2.113800287246704, 2.1641311645507812, 2.0120184421539307]
iteration 220
epsilon=0.2885518428794737, episode length=999, total rewards=187.67123287671558
Training the model...
experience length=4995
number of examples=2997
best total reward =  [248.53420195440074, 216.66666666667052, 187.67123287671558]
loss = [2.5994277000427246, 2.6548376083374023, 2.4753000736236572, 2.137744426727295, 2.2015817165374756, 2.1865973472595215, 2.7970340251922607, 2.052142858505249, 2.0166985988616943, 1.9371082782745361]
iteration 221
epsilon=0.28766632445067897, episode length=999, total rewards=111.53846153846355
Training the model...
experience length=4995
number of examples=2997
best total reward =  [248.

Copying file://car-racing-v2-222-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [248.53420195440074, 216.66666666667052, 187.67123287671558]
loss = [2.740773916244507, 2.7684295177459717, 2.6133370399475098, 2.343761444091797, 2.532214403152466, 2.431131601333618, 2.6992571353912354, 1.9677180051803589, 2.3104448318481445, 2.2382936477661133]
iteration 223
epsilon=0.28592176459411045, episode length=704, total rewards=-0.030201342282432736
Training the model...
experience length=4995
number of examples=2702
best total reward =  [248.53420195440074, 216.66666666667052, 187.67123287671558]
loss = [2.3898468017578125, 2.1826303005218506, 1.989553451538086, 2.130078077316284, 2.030937910079956, 2.4066920280456543, 1.9076251983642578, 1.9167550802230835, 1.9146183729171753, 1.81050443649292]
iteration 224
epsilon=0.28506254694816935, episode length=496, total rewards=-0.05460992907833284
epsilon=0.28506254694816935, episode length=999, total rewards=86.61971830985934
Training the 

Copying file://car-racing-v2-225-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [248.53420195440074, 216.66666666667052, 187.67123287671558]
loss = [2.548220157623291, 2.6132819652557373, 2.409574508666992, 2.329044818878174, 2.3556253910064697, 2.290769577026367, 2.2976269721984863, 2.127307653427124, 2.2365376949310303, 2.116839647293091]
iteration 226
epsilon=0.2833698022639008, episode length=999, total rewards=101.41342756183887
Training the model...
experience length=4995
number of examples=2997
best total reward =  [248.53420195440074, 216.66666666667052, 187.67123287671558]
loss = [2.9343760013580322, 2.1045711040496826, 2.208861827850342, 2.114304304122925, 2.27805757522583, 1.9911528825759888, 2.5570380687713623, 2.160604238510132, 1.8889392614364624, 2.0621516704559326]
iteration 227
epsilon=0.2825361042412618, episode length=999, total rewards=285.82677165354676
Training the model...
experience length=4995
number of examples=2997
best total reward =  [216.66666666

Copying file://car-racing-v2-228-model09.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [216.66666666667052, 187.67123287671558, 285.82677165354676]
loss = [2.948643207550049, 2.7498905658721924, 2.7362523078918457, 2.582925319671631, 2.5398130416870117, 2.4109742641448975, 2.6027069091796875, 2.3014237880706787, 2.253251075744629, 2.292937994003296]
iteration 229
epsilon=0.2808936357668607, episode length=999, total rewards=126.22950819672252
Training the model...
experience length=4995
number of examples=2997
best total reward =  [216.66666666667052, 187.67123287671558, 285.82677165354676]
loss = [2.510714054107666, 2.232365608215332, 2.411149024963379, 2.479513168334961, 2.0418128967285156, 2.1416261196136475, 2.0623207092285156, 1.9747216701507568, 2.1546573638916016, 1.8867566585540771]
iteration 230
epsilon=0.2800846994091921, episode length=999, total rewards=192.60450160771998
Training the model...
experience length=4995
number of examples=2997
best total reward =  [187.67123

Copying file://car-racing-v2-231-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [187.67123287671558, 285.82677165354676, 192.60450160771998]
loss = [2.777470827102661, 2.6230742931365967, 2.7497611045837402, 2.4754786491394043, 2.393563747406006, 2.496206283569336, 2.3531291484832764, 2.2229034900665283, 2.5214173793792725, 2.0632901191711426]
iteration 232
epsilon=0.27849101389094916, episode length=999, total rewards=154.29553264605266
Training the model...
experience length=4995
number of examples=2997
best total reward =  [187.67123287671558, 285.82677165354676, 192.60450160771998]
loss = [2.322335720062256, 2.260230779647827, 2.2474687099456787, 2.1965291500091553, 2.1134347915649414, 2.0660438537597656, 1.9241158962249756, 2.0504143238067627, 2.0309526920318604, 1.95136559009552]
iteration 233
epsilon=0.27770610375203963, episode length=999, total rewards=101.38888888889198
Training the model...
experience length=4995
number of examples=2997
best total reward =  [187.67

Copying file://car-racing-v2-234-model09.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [187.67123287671558, 285.82677165354676, 192.60450160771998]
loss = [2.2839972972869873, 2.345769166946411, 2.1534321308135986, 2.083726644515991, 2.1353392601013184, 2.0964298248291016, 1.9059224128723145, 2.230530023574829, 1.8797487020492554, 1.8990262746810913]
iteration 235
epsilon=0.27615975228737405, episode length=999, total rewards=159.9277978339376
Training the model...
experience length=4995
number of examples=2997
best total reward =  [187.67123287671558, 285.82677165354676, 192.60450160771998]
loss = [2.2741847038269043, 1.972362995147705, 2.1042897701263428, 2.057220697402954, 1.8919788599014282, 1.852535367012024, 1.9566500186920166, 1.9279446601867676, 1.7683546543121338, 1.9612946510314941]
iteration 236
epsilon=0.2753981547645003, episode length=999, total rewards=8.303249097472689
Training the model...
experience length=4995
number of examples=2997
best total reward =  [187.6712

Copying file://car-racing-v2-237-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [187.67123287671558, 285.82677165354676, 192.60450160771998]
loss = [2.1662638187408447, 2.386493682861328, 2.425910711288452, 2.3510947227478027, 2.042027473449707, 2.2184441089630127, 2.09733247756958, 2.2058825492858887, 2.003251075744629, 2.065101385116577]
iteration 238
epsilon=0.27389773148468677, episode length=664, total rewards=-0.05481727574804052
Training the model...
experience length=4995
number of examples=2662
best total reward =  [187.67123287671558, 285.82677165354676, 192.60450160771998]
loss = [2.062953472137451, 1.8268170356750488, 1.927533745765686, 1.7483693361282349, 1.7176945209503174, 1.7705892324447632, 1.7753653526306152, 1.7258614301681519, 1.7861270904541016, 1.6024113893508911]
iteration 239
epsilon=0.2731587541698399, episode length=804, total rewards=-0.00464396284901733
Training the model...
experience length=4995
number of examples=2802
best total reward =  [187.6

Copying file://car-racing-v2-240-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [187.67123287671558, 285.82677165354676, 192.60450160771998]
loss = [2.4578135013580322, 2.684990644454956, 2.332876443862915, 2.2123801708221436, 2.2576231956481934, 2.1840929985046387, 2.352410078048706, 2.268839120864868, 2.2436654567718506, 2.1578776836395264]
iteration 241
epsilon=0.2717028949618601, episode length=999, total rewards=232.16783216783574
Training the model...
experience length=4995
number of examples=2997
best total reward =  [285.82677165354676, 192.60450160771998, 232.16783216783574]
loss = [2.4957756996154785, 2.321632146835327, 2.265139579772949, 2.2161905765533447, 2.12534236907959, 1.958444595336914, 2.11859130859375, 1.959904432296753, 2.021407127380371, 2.028160572052002]
iteration 242
epsilon=0.2709858660122415, episode length=999, total rewards=81.52866242038317
Training the model...
experience length=4995
number of examples=2997
best total reward =  [285.826771653546

Copying file://car-racing-v2-243-model09.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [285.82677165354676, 192.60450160771998, 232.16783216783574]
loss = [2.625551462173462, 2.831284523010254, 2.2807464599609375, 2.319734811782837, 2.428452491760254, 2.456364631652832, 2.226719617843628, 2.1857314109802246, 2.280594825744629, 2.2820310592651367]
iteration 244
epsilon=0.2695732472785979, episode length=655, total rewards=-0.08275862069007656
Training the model...
experience length=4995
number of examples=2653
best total reward =  [285.82677165354676, 192.60450160771998, 232.16783216783574]
loss = [2.2925491333007812, 2.157731533050537, 2.33913254737854, 1.944167137145996, 2.055664539337158, 1.9346115589141846, 1.9190119504928589, 1.9416046142578125, 1.985127568244934, 2.0099587440490723]
iteration 245
epsilon=0.2688775148058119, episode length=999, total rewards=16.2790697674414
Training the model...
experience length=4995
number of examples=2997
best total reward =  [285.8267716535

Copying file://car-racing-v2-246-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [285.82677165354676, 192.60450160771998, 232.16783216783574]
loss = [2.3217482566833496, 2.049483299255371, 2.153660774230957, 2.159916639328003, 2.059727668762207, 1.9791513681411743, 2.06555438041687, 1.8519827127456665, 1.9823557138442993, 1.860994815826416]
iteration 247
epsilon=0.26750685226117626, episode length=851, total rewards=-0.026498422713609332
Training the model...
experience length=4995
number of examples=2849
best total reward =  [285.82677165354676, 192.60450160771998, 232.16783216783574]
loss = [2.0607283115386963, 1.9173288345336914, 2.134122610092163, 1.8396718502044678, 1.719571590423584, 1.9235739707946777, 1.7676416635513306, 1.8580821752548218, 1.7435215711593628, 1.636128544807434]
iteration 248
epsilon=0.26683178373856453, episode length=999, total rewards=190.00000000000327
Training the model...
experience length=4995
number of examples=2997
best total reward =  [285.82

Copying file://car-racing-v2-249-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [285.82677165354676, 192.60450160771998, 232.16783216783574]
loss = [2.291454553604126, 2.305079460144043, 1.9988682270050049, 2.07916522026062, 1.9992104768753052, 1.9168046712875366, 2.0561578273773193, 2.4316976070404053, 1.7926487922668457, 1.9196767807006836]
iteration 250
epsilon=0.2655018312421671, episode length=999, total rewards=156.14035087719628
Training the model...
experience length=4995
number of examples=2997
best total reward =  [285.82677165354676, 192.60450160771998, 232.16783216783574]
loss = [2.171536684036255, 2.113025188446045, 2.094538450241089, 2.0688695907592773, 2.1788957118988037, 2.0080864429473877, 1.9985768795013428, 1.9792433977127075, 1.7584315538406372, 2.0545268058776855]
iteration 251
epsilon=0.26484681292974543, episode length=999, total rewards=58.57605177993819
Training the model...
experience length=4995
number of examples=2997
best total reward =  [285.8267

Copying file://car-racing-v2-252-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [285.82677165354676, 192.60450160771998, 232.16783216783574]
loss = [2.3583502769470215, 2.44091534614563, 2.238243341445923, 2.310821771621704, 2.202000617980957, 2.1540462970733643, 2.28255295753479, 2.0147812366485596, 2.1426379680633545, 2.2160489559173584]
iteration 253
epsilon=0.2635563613524435, episode length=103, total rewards=-0.09072164948452088
epsilon=0.2635563613524435, episode length=999, total rewards=31.410256410255215
Training the model...
experience length=4995
number of examples=3100
best total reward =  [285.82677165354676, 192.60450160771998, 232.16783216783574]
loss = [1.881102204322815, 1.949625015258789, 2.0032646656036377, 1.7390882968902588, 1.8188064098358154, 1.881793737411499, 1.7850767374038696, 1.62154221534729, 1.720036506652832, 1.6809455156326294]
iteration 254
epsilon=0.2629207977389191, episode length=999, total rewards=12.759643916914252
Training the model...


Copying file://car-racing-v2-255-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [285.82677165354676, 192.60450160771998, 232.16783216783574]
loss = [2.146376132965088, 2.0774319171905518, 2.009654998779297, 1.9792917966842651, 1.8806989192962646, 1.9068490266799927, 1.9585191011428833, 1.8095275163650513, 1.7669984102249146, 1.9259982109069824]
iteration 256
epsilon=0.26166867386391457, episode length=797, total rewards=-0.08985507246451019
Training the model...
experience length=4995
number of examples=2795
best total reward =  [285.82677165354676, 192.60450160771998, 232.16783216783574]
loss = [1.9492037296295166, 1.9301769733428955, 1.7837752103805542, 1.8432008028030396, 1.7479718923568726, 1.7564136981964111, 1.8039721250534058, 1.7603733539581299, 1.7316752672195435, 1.7321727275848389]
iteration 257
epsilon=0.26105198712527544, episode length=681, total rewards=-0.018181818182413795
Training the model...
experience length=4995
number of examples=2679
best total reward 

Copying file://car-racing-v2-258-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [285.82677165354676, 192.60450160771998, 232.16783216783574]
loss = [2.668285369873047, 2.4008982181549072, 2.3409035205841064, 2.393587350845337, 2.2231459617614746, 2.2249770164489746, 2.2995944023132324, 2.0946502685546875, 2.169294595718384, 2.3083362579345703]
iteration 259
epsilon=0.25983705258148243, episode length=999, total rewards=340.43321299638836
Training the model...
experience length=4995
number of examples=2997
best total reward =  [192.60450160771998, 232.16783216783574, 340.43321299638836]
loss = [2.564932346343994, 2.4967715740203857, 2.5733513832092285, 2.4190704822540283, 2.197018623352051, 2.421192169189453, 2.2018940448760986, 2.125141143798828, 2.2223522663116455, 2.0842740535736084]
iteration 260
epsilon=0.2592386820556676, episode length=612, total rewards=-0.00967741935525021
Training the model...
experience length=4995
number of examples=2610
best total reward =  [192.6

Copying file://car-racing-v2-261-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [192.60450160771998, 232.16783216783574, 340.43321299638836]
loss = [2.5279700756073, 2.4609382152557373, 2.333340644836426, 2.3037288188934326, 2.104038953781128, 2.2894515991210938, 2.3526453971862793, 2.1930272579193115, 2.0486745834350586, 2.300943613052368]
iteration 262
epsilon=0.2580598322827598, episode length=999, total rewards=102.89855072464104
Training the model...
experience length=4995
number of examples=2997
best total reward =  [192.60450160771998, 232.16783216783574, 340.43321299638836]
loss = [2.4543521404266357, 2.4499261379241943, 2.3348703384399414, 2.9754879474639893, 2.209502935409546, 2.3420088291168213, 3.36628794670105, 2.1529974937438965, 2.3050804138183594, 2.036409854888916]
iteration 263
epsilon=0.2574792339599322, episode length=999, total rewards=60.642570281128556
Training the model...
experience length=4995
number of examples=2997
best total reward =  [192.6045016

Copying file://car-racing-v2-264-model09.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2890
best total reward =  [192.60450160771998, 232.16783216783574, 340.43321299638836]
loss = [28.296186447143555, 26.91908836364746, 27.55211067199707, 26.753128051757812, 26.656627655029297, 26.308870315551758, 26.404682159423828, 25.778974533081055, 25.436281204223633, 25.911697387695312]
iteration 265
epsilon=0.25633539720412957, episode length=999, total rewards=42.85714285714535
Training the model...
experience length=4995
number of examples=2997
best total reward =  [192.60450160771998, 232.16783216783574, 340.43321299638836]
loss = [2.16733980178833, 1.957094430923462, 1.9584282636642456, 2.1185617446899414, 1.9675254821777344, 2.074687957763672, 1.8744144439697266, 1.9379984140396118, 1.8098013401031494, 1.7620102167129517]
iteration 266
epsilon=0.25577204323208824, episode length=999, total rewards=67.76315789473746
Training the model...
experience length=4995
number of examples=2997
best total reward =  [192.604

Copying file://car-racing-v2-267-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [192.60450160771998, 232.16783216783574, 340.43321299638836]
loss = [2.6231632232666016, 3.327930212020874, 2.2538726329803467, 2.473862886428833, 2.7160532474517822, 2.0292766094207764, 2.309011697769165, 2.452646017074585, 2.0620782375335693, 2.2349510192871094]
iteration 268
epsilon=0.2546621795717697, episode length=999, total rewards=137.41007194244767
Training the model...
experience length=4995
number of examples=2997
best total reward =  [192.60450160771998, 232.16783216783574, 340.43321299638836]
loss = [2.570843458175659, 2.6306889057159424, 2.363612413406372, 2.4326088428497314, 2.237199544906616, 2.2705211639404297, 2.262232542037964, 2.1257271766662598, 2.0977115631103516, 2.0793938636779785]
iteration 269
epsilon=0.254115557776052, episode length=719, total rewards=-0.08219178082252765
Training the model...
experience length=4995
number of examples=2717
best total reward =  [192.6045

Copying file://car-racing-v2-270-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2818
best total reward =  [192.60450160771998, 232.16783216783574, 340.43321299638836]
loss = [2.1039910316467285, 2.117685079574585, 2.078510284423828, 1.8609740734100342, 1.8653607368469238, 1.768615484237671, 1.9494620561599731, 1.807022213935852, 1.8005363941192627, 1.8587851524353027]
iteration 271
epsilon=0.25303865817630855, episode length=999, total rewards=286.97318007663023
Training the model...
experience length=4995
number of examples=2997
best total reward =  [232.16783216783574, 340.43321299638836, 286.97318007663023]
loss = [2.924671173095703, 2.5135204792022705, 2.7163496017456055, 2.423400402069092, 2.3106844425201416, 2.336150884628296, 2.2987992763519287, 2.42415189743042, 2.2887275218963623, 2.2265536785125732]
iteration 272
epsilon=0.2525082715945455, episode length=529, total rewards=-93.7937888198762
Training the model...
experience length=4995
number of examples=2527
best total reward =  [232.167832

Copying file://car-racing-v2-273-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [232.16783216783574, 340.43321299638836, 286.97318007663023]
loss = [2.956014633178711, 2.9189085960388184, 2.635162830352783, 2.4254348278045654, 2.3570163249969482, 2.3365261554718018, 2.506514072418213, 2.3359851837158203, 2.2964131832122803, 2.1546645164489746]
iteration 274
epsilon=0.25146335698981404, episode length=864, total rewards=-0.08024691358105795
Training the model...
experience length=4995
number of examples=2862
best total reward =  [232.16783216783574, 340.43321299638836, 286.97318007663023]
loss = [24.735668182373047, 24.079879760742188, 23.363887786865234, 22.63338851928711, 22.27203369140625, 21.385549545288086, 20.82013511657715, 20.41107940673828, 19.619815826416016, 19.0052433013916]
iteration 275
epsilon=0.2509487234199159, episode length=999, total rewards=43.859649122809174
Training the model...
experience length=4995
number of examples=2997
best total reward =  [232.167

Copying file://car-racing-v2-276-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


epsilon=0.25043923618571673, episode length=673, total rewards=-0.059932659933225496
Training the model...
experience length=4995
number of examples=3005
best total reward =  [232.16783216783574, 340.43321299638836, 286.97318007663023]
loss = [24.109336853027344, 23.389286041259766, 21.708566665649414, 21.422048568725586, 20.171533584594727, 19.400043487548828, 18.362699508666992, 18.15641212463379, 16.506757736206055, 15.43429946899414]
iteration 277
epsilon=0.24993484382385955, episode length=680, total rewards=-0.07278911564677881
Training the model...
experience length=4995
number of examples=2678
best total reward =  [232.16783216783574, 340.43321299638836, 286.97318007663023]
loss = [2.966421604156494, 2.6279408931732178, 2.5211851596832275, 2.264749050140381, 2.116081953048706, 2.1766512393951416, 1.9036645889282227, 2.15417218208313, 1.9941834211349487, 1.8396333456039429]
iteration 278
epsilon=0.24943549538562096, episode length=707, total rewards=-0.060450160772299144
Trainin

Copying file://car-racing-v2-279-model09.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [232.16783216783574, 340.43321299638836, 286.97318007663023]
loss = [7.4477620124816895, 5.771434307098389, 5.027577877044678, 4.010275363922119, 2.8743538856506348, 3.9901773929595947, 2.974398136138916, 3.192941427230835, 2.903242826461792, 3.2725353240966797]
iteration 280
epsilon=0.24845172902744708, episode length=999, total rewards=179.1519434629017
Training the model...
experience length=4995
number of examples=2997
best total reward =  [232.16783216783574, 340.43321299638836, 286.97318007663023]
loss = [4.520496845245361, 3.243035078048706, 2.8827931880950928, 4.038120269775391, 2.6208131313323975, 2.6736080646514893, 3.1414177417755127, 2.5268948078155518, 3.0039656162261963, 2.776289224624634]
iteration 281
epsilon=0.24796721173717262, episode length=999, total rewards=71.87500000000446
Training the model...
experience length=4995
number of examples=2997
best total reward =  [232.1678321

Copying file://car-racing-v2-282-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [232.16783216783574, 340.43321299638836, 286.97318007663023]
loss = [3.641288995742798, 3.4197754859924316, 3.4630379676818848, 2.9233155250549316, 3.0457773208618164, 2.9861984252929688, 3.1670687198638916, 2.842792272567749, 2.932171106338501, 2.865245819091797]
iteration 283
epsilon=0.2470126642236029, episode length=999, total rewards=241.21621621622086
Training the model...
experience length=4995
number of examples=2997
best total reward =  [340.43321299638836, 286.97318007663023, 241.21621621622086]
loss = [3.3690595626831055, 2.974785327911377, 3.007233142852783, 2.777064085006714, 2.6844775676727295, 2.4773430824279785, 2.8636293411254883, 2.311675786972046, 2.640401840209961, 2.6483867168426514]
iteration 284
epsilon=0.24654253758136688, episode length=711, total rewards=-0.013559322034567972
Training the model...
experience length=4995
number of examples=2709
best total reward =  [340.43

Copying file://car-racing-v2-285-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [340.43321299638836, 286.97318007663023, 241.21621621622086]
loss = [3.183535099029541, 2.8536033630371094, 2.78454327583313, 2.6981379985809326, 2.705381393432617, 2.6549618244171143, 2.9080967903137207, 2.50689435005188, 2.3766586780548096, 2.3286516666412354]
iteration 286
epsilon=0.24561634108349767, episode length=999, total rewards=229.93197278911884
Training the model...
experience length=4995
number of examples=2997
best total reward =  [340.43321299638836, 286.97318007663023, 241.21621621622086]
loss = [2.89762282371521, 2.720791816711426, 2.6933987140655518, 2.538586139678955, 2.5127243995666504, 2.3251423835754395, 2.5052573680877686, 2.3228907585144043, 2.32344126701355, 2.3222854137420654]
iteration 287
epsilon=0.24516017767266268, episode length=328, total rewards=-0.005263157894792786
epsilon=0.24516017767266268, episode length=999, total rewards=125.60975609756123
Training the mode

Copying file://car-racing-v2-288-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [340.43321299638836, 286.97318007663023, 241.21621621622086]
loss = [3.12406325340271, 3.000471591949463, 3.2111122608184814, 2.726245403289795, 3.197584867477417, 2.762477397918701, 2.8225417137145996, 2.6335697174072266, 2.63405442237854, 2.5601518154144287]
iteration 289
epsilon=0.2442614901369767, episode length=999, total rewards=77.99352750809462
Training the model...
experience length=4995
number of examples=2997
best total reward =  [340.43321299638836, 286.97318007663023, 241.21621621622086]
loss = [2.6175944805145264, 2.5533359050750732, 4.445248603820801, 2.1993608474731445, 2.282230854034424, 2.2755229473114014, 2.2518224716186523, 2.259007692337036, 2.312138319015503, 2.2770183086395264]
iteration 290
epsilon=0.24381887523560694, episode length=999, total rewards=367.6258992805707
Training the model...
experience length=4995
number of examples=2997
best total reward =  [286.9731800766

Copying file://car-racing-v2-291-model09.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [286.97318007663023, 241.21621621622086, 367.6258992805707]
loss = [2.9333608150482178, 2.829853057861328, 3.135000467300415, 2.8831734657287598, 2.692033290863037, 2.4621729850769043, 2.7614519596099854, 2.7018232345581055, 2.6323039531707764, 2.571437358856201]
iteration 292
epsilon=0.24294687961841838, episode length=714, total rewards=-0.07142857142922421
Training the model...
experience length=4995
number of examples=2712
best total reward =  [286.97318007663023, 241.21621621622086, 367.6258992805707]
loss = [2.7779152393341064, 2.5618348121643066, 2.293166399002075, 2.5500786304473877, 2.255232572555542, 2.2849557399749756, 2.284707546234131, 2.28542423248291, 2.2223706245422363, 2.831470489501953]
iteration 293
epsilon=0.2425174108222342, episode length=999, total rewards=26.315789473682976
Training the model...
experience length=4995
number of examples=2997
best total reward =  [286.973180

Copying file://car-racing-v2-294-model09.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2605
best total reward =  [286.97318007663023, 241.21621621622086, 367.6258992805707]
loss = [28.848430633544922, 26.854496002197266, 25.559267044067383, 23.924760818481445, 21.8558406829834, 21.02265739440918, 18.86587905883789, 17.022031784057617, 15.478001594543457, 14.998275756835938]
iteration 295
epsilon=0.24167131434687172, episode length=999, total rewards=214.38127090301464
Training the model...
experience length=4995
number of examples=2997
best total reward =  [286.97318007663023, 241.21621621622086, 367.6258992805707]
loss = [12.322558403015137, 6.6204729080200195, 4.381776332855225, 2.9883339405059814, 3.061084270477295, 3.1379799842834473, 2.8804924488067627, 3.1955227851867676, 2.836366891860962, 2.7582108974456787]
iteration 296
epsilon=0.241254601203403, episode length=999, total rewards=159.25925925926256
Training the model...
experience length=4995
number of examples=2997
best total reward =  [286.973180

Copying file://car-racing-v2-297-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [286.97318007663023, 241.21621621622086, 367.6258992805707]
loss = [3.8131563663482666, 3.324570894241333, 3.2012288570404053, 3.0263309478759766, 2.9385459423065186, 3.1666154861450195, 2.875042676925659, 2.799098491668701, 2.967278480529785, 2.8192405700683594]
iteration 298
epsilon=0.24043363463945527, episode length=999, total rewards=245.63758389261847
Training the model...
experience length=4995
number of examples=2997
best total reward =  [241.21621621622086, 367.6258992805707, 245.63758389261847]
loss = [4.180102348327637, 3.3547470569610596, 5.940077304840088, 3.2037353515625, 2.764864444732666, 3.8439340591430664, 2.6962697505950928, 4.379073619842529, 2.6856508255004883, 4.119019508361816]
iteration 299
epsilon=0.24002929829306072, episode length=999, total rewards=219.14893617021647
Training the model...
experience length=4995
number of examples=2997
best total reward =  [241.216216216

Copying file://car-racing-v2-300-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [241.21621621622086, 367.6258992805707, 245.63758389261847]
loss = [3.60396409034729, 3.189732789993286, 3.2931747436523438, 3.2968997955322266, 3.1179943084716797, 2.883420467376709, 3.0618247985839844, 2.8457119464874268, 3.2007782459259033, 2.7431488037109375]
iteration 301
epsilon=0.2392327152570288, episode length=999, total rewards=221.16788321168357
Training the model...
experience length=4995
number of examples=2997
best total reward =  [241.21621621622086, 367.6258992805707, 245.63758389261847]
loss = [3.0705177783966064, 2.8038413524627686, 2.834033250808716, 2.756380319595337, 2.451057195663452, 2.6487908363342285, 4.3286027908325195, 2.6113367080688477, 2.5344510078430176, 2.6418607234954834]
iteration 302
epsilon=0.2388403881044585, episode length=323, total rewards=-0.047058823529405575
epsilon=0.2388403881044585, episode length=999, total rewards=111.92052980132544
Training the mode

Copying file://car-racing-v2-303-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [241.21621621622086, 367.6258992805707, 245.63758389261847]
loss = [3.293982744216919, 3.462726354598999, 3.141059398651123, 3.14843487739563, 3.0981905460357666, 2.933831214904785, 4.761734485626221, 2.9908673763275146, 2.8512165546417236, 2.99039626121521]
iteration 304
epsilon=0.2380674643811798, episode length=999, total rewards=45.21452145214758
Training the model...
experience length=4995
number of examples=2997
best total reward =  [241.21621621622086, 367.6258992805707, 245.63758389261847]
loss = [6.463705062866211, 4.785494804382324, 4.606503963470459, 4.212021827697754, 4.233819961547852, 4.0384521484375, 3.736894369125366, 4.031137466430664, 3.6802773475646973, 3.679110527038574]
iteration 305
epsilon=0.23768678973736798, episode length=999, total rewards=74.06143344710298
Training the model...
experience length=4995
number of examples=2997
best total reward =  [241.21621621622086, 367.

Copying file://car-racing-v2-306-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [241.21621621622086, 367.6258992805707, 245.63758389261847]
loss = [3.4980111122131348, 3.5648794174194336, 3.0529286861419678, 2.9829487800598145, 3.0963969230651855, 2.9322574138641357, 2.86975359916687, 2.8766982555389404, 2.9974780082702637, 2.6133344173431396]
iteration 307
epsilon=0.23693682262159435, episode length=999, total rewards=150.8833922261524
Training the model...
experience length=4995
number of examples=2997
best total reward =  [241.21621621622086, 367.6258992805707, 245.63758389261847]
loss = [3.1699626445770264, 2.9955155849456787, 2.988469362258911, 2.6651723384857178, 2.741807222366333, 2.657071113586426, 2.6319916248321533, 2.6303937435150146, 2.6015405654907227, 2.463505268096924]
iteration 308
epsilon=0.2365674543953784, episode length=999, total rewards=39.13043478260797
Training the model...
experience length=4995
number of examples=2997
best total reward =  [241.216216

Copying file://car-racing-v2-309-model09.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [241.21621621622086, 367.6258992805707, 245.63758389261847]
loss = [3.7151989936828613, 3.485166549682617, 3.366997241973877, 3.1151702404022217, 3.3607728481292725, 3.897029161453247, 3.0183541774749756, 3.1281545162200928, 3.0337612628936768, 2.6980783939361572]
iteration 310
epsilon=0.23583976205291038, episode length=999, total rewards=21.527777777778773
Training the model...
experience length=4995
number of examples=2997
best total reward =  [241.21621621622086, 367.6258992805707, 245.63758389261847]
loss = [3.4419760704040527, 2.7967965602874756, 2.7321054935455322, 2.5441548824310303, 2.6546998023986816, 2.3925750255584717, 2.8037943840026855, 2.3828959465026855, 2.4565906524658203, 2.3963663578033447]
iteration 311
epsilon=0.23548136443238127, episode length=999, total rewards=171.52317880795107
Training the model...
experience length=4995
number of examples=2997
best total reward =  [241.

Copying file://car-racing-v2-312-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [241.21621621622086, 367.6258992805707, 245.63758389261847]
loss = [3.785562515258789, 3.298766851425171, 3.570235252380371, 3.316206455230713, 3.1715240478515625, 3.2227871417999268, 2.9609038829803467, 2.9406845569610596, 3.198746919631958, 3.0388803482055664]
iteration 313
epsilon=0.23477528528017688, episode length=999, total rewards=134.2657342657388
Training the model...
experience length=4995
number of examples=2997
best total reward =  [241.21621621622086, 367.6258992805707, 245.63758389261847]
loss = [5.375965595245361, 4.861063003540039, 4.572714805603027, 3.9571850299835205, 4.225035667419434, 3.5908620357513428, 3.5986392498016357, 3.7211599349975586, 3.8636767864227295, 3.312422752380371]
iteration 314
epsilon=0.23442753242737513, episode length=818, total rewards=-0.04946619217164391
Training the model...
experience length=4995
number of examples=2816
best total reward =  [241.216216

Copying file://car-racing-v2-315-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [241.21621621622086, 367.6258992805707, 245.63758389261847]
loss = [3.4625301361083984, 3.057534694671631, 3.217045545578003, 3.270000457763672, 3.17698073387146, 2.885474681854248, 3.1503121852874756, 2.9191675186157227, 3.1034491062164307, 2.7750234603881836]
iteration 316
epsilon=0.23374242453207036, episode length=482, total rewards=-0.024137931034541732
epsilon=0.23374242453207036, episode length=999, total rewards=111.80555555555735
Training the model...
experience length=4995
number of examples=3479
best total reward =  [241.21621621622086, 367.6258992805707, 245.63758389261847]
loss = [3.0675110816955566, 2.658808946609497, 2.8505640029907227, 2.6007742881774902, 2.6976261138916016, 2.525092601776123, 2.728501081466675, 2.4576122760772705, 2.5513100624084473, 2.472480058670044]
iteration 317
epsilon=0.23340500028674965, episode length=999, total rewards=64.38356164383468
Training the model

Copying file://car-racing-v2-318-model09.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [241.21621621622086, 367.6258992805707, 245.63758389261847]
loss = [3.228228807449341, 2.8512415885925293, 2.8007280826568604, 2.8324296474456787, 2.814040184020996, 2.5169007778167725, 2.8581762313842773, 2.6567914485931396, 2.776843786239624, 2.498936653137207]
iteration 319
epsilon=0.23274024078104333, episode length=999, total rewards=179.661016949157
Training the model...
experience length=4995
number of examples=2997
best total reward =  [241.21621621622086, 367.6258992805707, 245.63758389261847]
loss = [2.856252670288086, 2.993610143661499, 2.716491222381592, 2.9448487758636475, 2.7653164863586426, 3.273378849029541, 2.455866575241089, 2.4888319969177246, 2.6048195362091064, 2.6682093143463135]
iteration 320
epsilon=0.2324128383732329, episode length=999, total rewards=160.60606060606483
Training the model...
experience length=4995
number of examples=2997
best total reward =  [241.216216216

Copying file://car-racing-v2-321-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [241.21621621622086, 367.6258992805707, 245.63758389261847]
loss = [3.506941795349121, 3.627458095550537, 3.0044217109680176, 3.12827730178833, 2.880999803543091, 3.0459117889404297, 2.740719795227051, 2.897975206375122, 2.732959032058716, 2.8056576251983643]
iteration 322
epsilon=0.23176782288960557, episode length=999, total rewards=145.61403508772122
Training the model...
experience length=4995
number of examples=2997
best total reward =  [241.21621621622086, 367.6258992805707, 245.63758389261847]
loss = [3.0759623050689697, 2.721959352493286, 3.1216909885406494, 2.7637293338775635, 2.8373892307281494, 2.712787389755249, 2.9050662517547607, 2.5856211185455322, 2.7251782417297363, 3.1202898025512695]
iteration 323
epsilon=0.2314501446607095, episode length=999, total rewards=338.4057971014476
Training the model...
experience length=4995
number of examples=2997
best total reward =  [367.625899280

Copying file://car-racing-v2-324-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [367.6258992805707, 245.63758389261847, 338.4057971014476]
loss = [3.4686906337738037, 3.4281084537506104, 3.34322190284729, 3.40702748298645, 3.00913143157959, 3.1025872230529785, 3.255396604537964, 3.1641805171966553, 2.9283254146575928, 3.047950029373169]
iteration 325
epsilon=0.23082428678196137, episode length=99, total rewards=-0.03322259136211089
epsilon=0.23082428678196137, episode length=999, total rewards=86.37992831541683
Training the model...
experience length=4995
number of examples=3096
best total reward =  [367.6258992805707, 245.63758389261847, 338.4057971014476]
loss = [3.0119147300720215, 2.8634631633758545, 2.7239480018615723, 3.003675699234009, 2.672515869140625, 2.5336666107177734, 2.6588683128356934, 2.7909982204437256, 2.501193046569824, 2.5745089054107666]
iteration 326
epsilon=0.23051604391414177, episode length=999, total rewards=76.27118644067893
Training the model...
ex

Copying file://car-racing-v2-327-model09.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [367.6258992805707, 245.63758389261847, 338.4057971014476]
loss = [3.0823841094970703, 2.9548301696777344, 3.1941041946411133, 2.912783145904541, 2.9300696849823, 2.9946508407592773, 2.799567461013794, 2.9788143634796143, 2.929755926132202, 2.5521645545959473]
iteration 328
epsilon=0.22990877464025034, episode length=999, total rewards=20.129870129871186
Training the model...
experience length=4995
number of examples=2997
best total reward =  [367.6258992805707, 245.63758389261847, 338.4057971014476]
loss = [2.7200045585632324, 2.6543428897857666, 2.4486348628997803, 2.4592270851135254, 2.5273730754852295, 2.5074942111968994, 2.2858266830444336, 2.4611499309539795, 2.2851858139038086, 2.22766375541687]
iteration 329
epsilon=0.22960968689384784, episode length=999, total rewards=140.60150375940287
Training the model...
experience length=4995
number of examples=2997
best total reward =  [367.6258992

Copying file://car-racing-v2-330-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [367.6258992805707, 245.63758389261847, 338.4057971014476]
loss = [3.245004415512085, 3.0012917518615723, 2.9847073554992676, 3.1663219928741455, 4.030142307281494, 2.9645416736602783, 2.5277085304260254, 3.0529677867889404, 2.698315382003784, 2.7308311462402344]
iteration 331
epsilon=0.22902045412466027, episode length=999, total rewards=223.8434163701091
Training the model...
experience length=4995
number of examples=2997
best total reward =  [367.6258992805707, 245.63758389261847, 338.4057971014476]
loss = [2.8760905265808105, 2.8312289714813232, 2.855250835418701, 2.659590005874634, 2.7041819095611572, 2.8786628246307373, 3.5569019317626953, 2.708660840988159, 2.6712443828582764, 2.5529255867004395]
iteration 332
epsilon=0.22873024958341368, episode length=999, total rewards=153.4722222222258
Training the model...
experience length=4995
number of examples=2997
best total reward =  [367.6258992

Copying file://car-racing-v2-333-model09.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [367.6258992805707, 245.63758389261847, 338.4057971014476]
loss = [3.426560640335083, 3.151397466659546, 3.0125906467437744, 3.132815361022949, 3.328296422958374, 2.909865379333496, 3.0394818782806396, 2.9015424251556396, 3.006800651550293, 3.1566002368927]
iteration 334
epsilon=0.22815851761670375, episode length=999, total rewards=40.6844106463905
Training the model...
experience length=4995
number of examples=2997
best total reward =  [367.6258992805707, 245.63758389261847, 338.4057971014476]
loss = [2.5070321559906006, 2.5541648864746094, 2.5793261528015137, 2.4370949268341064, 2.504267930984497, 2.424130439758301, 2.5887651443481445, 2.2970447540283203, 2.136125326156616, 2.2960457801818848]
iteration 335
epsilon=0.22787693244053672, episode length=999, total rewards=180.48780487805334
Training the model...
experience length=4995
number of examples=2997
best total reward =  [367.6258992805707

Copying file://car-racing-v2-336-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [367.6258992805707, 245.63758389261847, 338.4057971014476]
loss = [2.7788257598876953, 3.012140989303589, 2.656217098236084, 2.901108741760254, 2.576965808868408, 2.6745548248291016, 2.746396541595459, 2.5028584003448486, 2.8184125423431396, 2.5965328216552734]
iteration 337
epsilon=0.22732218148497002, episode length=999, total rewards=146.42857142857412
Training the model...
experience length=4995
number of examples=2997
best total reward =  [367.6258992805707, 245.63758389261847, 338.4057971014476]
loss = [2.8803131580352783, 2.692758321762085, 2.745318651199341, 2.454233407974243, 2.739252805709839, 3.2682836055755615, 2.5860824584960938, 2.5952274799346924, 3.231356382369995, 2.520284652709961]
iteration 338
epsilon=0.22704895967012031, episode length=999, total rewards=348.27586206896297
Training the model...
experience length=4995
number of examples=2997
best total reward =  [245.6375838926

Copying file://car-racing-v2-339-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [245.63758389261847, 338.4057971014476, 348.27586206896297]
loss = [3.45216965675354, 3.2049474716186523, 3.6536941528320312, 2.9658617973327637, 3.2491049766540527, 3.0284616947174072, 3.6977758407592773, 2.9597651958465576, 3.0367956161499023, 2.9889137744903564]
iteration 340
epsilon=0.22651068537268493, episode length=999, total rewards=244.48160535117566
Training the model...
experience length=4995
number of examples=2997
best total reward =  [245.63758389261847, 338.4057971014476, 348.27586206896297]
loss = [3.1696507930755615, 3.219076156616211, 3.002495765686035, 2.9504616260528564, 2.9373176097869873, 2.8125553131103516, 3.0198237895965576, 2.5830395221710205, 2.904628276824951, 2.7540526390075684]
iteration 341
epsilon=0.22624557851895807, episode length=999, total rewards=278.67647058823746
Training the model...
experience length=4995
number of examples=2997
best total reward =  [338.40

Copying file://car-racing-v2-342-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [338.4057971014476, 348.27586206896297, 278.67647058823746]
loss = [3.658965826034546, 3.4414169788360596, 3.3755950927734375, 3.1751608848571777, 3.165809392929077, 3.1941494941711426, 3.3402349948883057, 3.029689311981201, 3.164332866668701, 3.114696502685547]
iteration 343
epsilon=0.2257232915064308, episode length=999, total rewards=141.9354838709682
Training the model...
experience length=4995
number of examples=2997
best total reward =  [338.4057971014476, 348.27586206896297, 278.67647058823746]
loss = [3.118514060974121, 3.3296542167663574, 3.0158073902130127, 3.279637098312378, 2.9399709701538086, 2.8990654945373535, 2.980963706970215, 2.9066834449768066, 2.7352261543273926, 2.912449598312378]
iteration 344
epsilon=0.2254660585913665, episode length=999, total rewards=73.20261437908918
Training the model...
experience length=4995
number of examples=2997
best total reward =  [338.4057971014

Copying file://car-racing-v2-345-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [338.4057971014476, 348.27586206896297, 278.67647058823746]
loss = [3.437873125076294, 3.2127819061279297, 3.079500675201416, 3.0506160259246826, 2.9810285568237305, 2.8667759895324707, 3.108022451400757, 3.04888653755188, 2.7951157093048096, 2.8960721492767334]
iteration 346
epsilon=0.2249592840253983, episode length=912, total rewards=-0.08378378378444862
Training the model...
experience length=4995
number of examples=2910
best total reward =  [338.4057971014476, 348.27586206896297, 278.67647058823746]
loss = [2.7989487648010254, 2.4304044246673584, 2.7029740810394287, 2.5712947845458984, 2.665334939956665, 2.3411922454833984, 2.6010544300079346, 2.5105671882629395, 2.9935545921325684, 2.5147931575775146]
iteration 347
epsilon=0.22470969118514433, episode length=999, total rewards=339.73941368078044
Training the model...
experience length=4995
number of examples=2997
best total reward =  [348.27

Copying file://car-racing-v2-348-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [348.27586206896297, 278.67647058823746, 339.73941368078044]
loss = [3.2203683853149414, 3.251488447189331, 3.148141622543335, 2.9640679359436035, 2.978391170501709, 3.0197196006774902, 2.834597587585449, 2.9169116020202637, 2.6364901065826416, 2.658205986022949]
iteration 349
epsilon=0.22421796833055999, episode length=999, total rewards=138.46153846154144
Training the model...
experience length=4995
number of examples=2997
best total reward =  [348.27586206896297, 278.67647058823746, 339.73941368078044]
loss = [3.3592443466186523, 2.9178733825683594, 2.855898141860962, 3.130598783493042, 3.2122082710266113, 2.6303136348724365, 3.0323739051818848, 2.843900203704834, 2.715785026550293, 2.802638530731201]
iteration 350
epsilon=0.22397578864725437, episode length=999, total rewards=190.00000000000455
Training the model...
experience length=4995
number of examples=2997
best total reward =  [348.27586

Copying file://car-racing-v2-351-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [348.27586206896297, 278.67647058823746, 339.73941368078044]
loss = [3.2848124504089355, 3.220296859741211, 3.0868759155273438, 3.0797150135040283, 2.873365879058838, 2.952587366104126, 2.9753823280334473, 2.915116310119629, 2.8406822681427, 2.5745508670806885]
iteration 352
epsilon=0.223498670453174, episode length=652, total rewards=-0.08260869565263929
Training the model...
experience length=4995
number of examples=2650
best total reward =  [348.27586206896297, 278.67647058823746, 339.73941368078044]
loss = [3.307053327560425, 2.8237948417663574, 2.70735764503479, 2.8760929107666016, 2.689696788787842, 2.658773183822632, 2.6817140579223633, 2.724292278289795, 2.70440936088562, 2.622539520263672]
iteration 353
epsilon=0.22326368374864228, episode length=999, total rewards=132.83582089552652
Training the model...
experience length=4995
number of examples=2997
best total reward =  [348.27586206896

Copying file://car-racing-v2-354-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [348.27586206896297, 278.67647058823746, 339.73941368078044]
loss = [2.777698040008545, 2.9088027477264404, 2.814175844192505, 2.831305980682373, 2.621837854385376, 2.770314931869507, 2.6653130054473877, 2.716167449951172, 2.6828904151916504, 2.5271494388580322]
iteration 355
epsilon=0.2228007364420443, episode length=999, total rewards=48.28897338403391
Training the model...
experience length=4995
number of examples=2997
best total reward =  [348.27586206896297, 278.67647058823746, 339.73941368078044]
loss = [2.944344997406006, 2.981137275695801, 2.91556715965271, 2.7955641746520996, 2.933715343475342, 2.798248052597046, 2.9306225776672363, 2.716130256652832, 2.8061270713806152, 2.72208571434021]
iteration 356
epsilon=0.22257272907762385, episode length=999, total rewards=177.19298245614465
Training the model...
experience length=4995
number of examples=2997
best total reward =  [348.275862068962

Copying file://car-racing-v2-357-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [348.27586206896297, 278.67647058823746, 339.73941368078044]
loss = [3.2037642002105713, 3.2316203117370605, 3.006624221801758, 3.0077905654907227, 3.083259344100952, 2.906726598739624, 2.8882033824920654, 2.7259984016418457, 2.7600269317626953, 2.7730324268341064]
iteration 358
epsilon=0.22212353176897912, episode length=999, total rewards=292.9824561403516
Training the model...
experience length=4995
number of examples=2997
best total reward =  [278.67647058823746, 339.73941368078044, 292.9824561403516]
loss = [3.328632354736328, 3.4887163639068604, 3.1372873783111572, 3.0340332984924316, 3.1999244689941406, 2.988267660140991, 2.8606204986572266, 3.1805734634399414, 2.8743112087249756, 3.2969932556152344]
iteration 359
epsilon=0.22190229645128934, episode length=999, total rewards=128.12500000000085
Training the model...
experience length=4995
number of examples=2997
best total reward =  [278.67

Copying file://car-racing-v2-360-model09.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [278.67647058823746, 339.73941368078044, 292.9824561403516]
loss = [3.9383950233459473, 4.017098903656006, 3.6606879234313965, 3.6608831882476807, 3.6237549781799316, 3.5642404556274414, 3.4878082275390625, 3.5648252964019775, 3.448155164718628, 3.3782155513763428]
iteration 361
epsilon=0.2214664407519087, episode length=999, total rewards=308.9219330855014
Training the model...
experience length=4995
number of examples=2997
best total reward =  [339.73941368078044, 292.9824561403516, 308.9219330855014]
loss = [3.645542621612549, 3.5826966762542725, 3.5208346843719482, 3.379584550857544, 3.304068088531494, 3.175732135772705, 3.5302469730377197, 3.033393383026123, 3.1612777709960938, 3.1267075538635254]
iteration 362
epsilon=0.2212517763443896, episode length=999, total rewards=244.15584415584811
Training the model...
experience length=4995
number of examples=2997
best total reward =  [339.73941368

Copying file://car-racing-v2-363-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [339.73941368078044, 292.9824561403516, 308.9219330855014]
loss = [3.843446731567383, 3.2122507095336914, 3.3745272159576416, 3.344902753829956, 3.5899412631988525, 3.4633049964904785, 3.165166139602661, 3.228517532348633, 2.9499213695526123, 3.284358263015747]
iteration 364
epsilon=0.22082886599513626, episode length=999, total rewards=114.02214022140598
Training the model...
experience length=4995
number of examples=2997
best total reward =  [339.73941368078044, 292.9824561403516, 308.9219330855014]
loss = [3.2209458351135254, 3.391683340072632, 2.9868550300598145, 3.25291109085083, 3.007132053375244, 2.9800033569335938, 2.958127498626709, 3.186833620071411, 2.9039864540100098, 3.0051920413970947]
iteration 365
epsilon=0.2206205773351849, episode length=594, total rewards=-0.09405940594103979
Training the model...
experience length=4995
number of examples=2592
best total reward =  [339.739413680

Copying file://car-racing-v2-366-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [339.73941368078044, 292.9824561403516, 308.9219330855014]
loss = [3.5023858547210693, 3.416091203689575, 3.2534172534942627, 3.5064404010772705, 3.4938952922821045, 3.0572264194488525, 3.198472023010254, 3.348129987716675, 3.1841111183166504, 3.0955779552459717]
iteration 367
epsilon=0.22021022784621472, episode length=999, total rewards=249.67320261438243
Training the model...
experience length=4995
number of examples=2997
best total reward =  [339.73941368078044, 292.9824561403516, 308.9219330855014]
loss = [3.2563745975494385, 3.0201339721679688, 3.1129283905029297, 3.0885469913482666, 3.067260980606079, 2.793247938156128, 2.812913179397583, 2.9881250858306885, 2.7691028118133545, 2.8121562004089355]
iteration 368
epsilon=0.22000812556775257, episode length=999, total rewards=275.79617834395185
Training the model...
experience length=4995
number of examples=2997
best total reward =  [339.73941

Copying file://car-racing-v2-369-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [339.73941368078044, 292.9824561403516, 308.9219330855014]
loss = [3.798971176147461, 3.5808894634246826, 3.805030345916748, 3.6675291061401367, 3.5750861167907715, 3.4956555366516113, 3.608154773712158, 3.4905989170074463, 3.3150508403778076, 3.2931973934173584]
iteration 370
epsilon=0.2196099638689543, episode length=999, total rewards=164.08450704225527
Training the model...
experience length=4995
number of examples=2997
best total reward =  [339.73941368078044, 292.9824561403516, 308.9219330855014]
loss = [3.4837846755981445, 3.3361446857452393, 3.0007364749908447, 3.2898454666137695, 3.008383274078369, 3.209926128387451, 3.0363497734069824, 2.8599987030029297, 2.975294351577759, 3.0096964836120605]
iteration 371
epsilon=0.21941386423026474, episode length=999, total rewards=224.32432432432927
Training the model...
experience length=4995
number of examples=2997
best total reward =  [339.739413

Copying file://car-racing-v2-372-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [339.73941368078044, 292.9824561403516, 308.9219330855014]
loss = [3.5922889709472656, 3.990405797958374, 3.6289374828338623, 3.600062847137451, 3.2803497314453125, 3.457974910736084, 3.7362818717956543, 2.9245691299438477, 3.609821319580078, 3.5948848724365234]
iteration 373
epsilon=0.21902752833208247, episode length=999, total rewards=84.818481848189
Training the model...
experience length=4995
number of examples=2997
best total reward =  [339.73941368078044, 292.9824561403516, 308.9219330855014]
loss = [3.1655755043029785, 3.1519129276275635, 3.025503158569336, 3.206942081451416, 3.108520746231079, 3.070016860961914, 3.0169780254364014, 3.094526529312134, 3.0298800468444824, 2.8461036682128906]
iteration 374
epsilon=0.21883725304876164, episode length=999, total rewards=146.10591900311977
Training the model...
experience length=4995
number of examples=2997
best total reward =  [339.73941368078

Copying file://car-racing-v2-375-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [339.73941368078044, 292.9824561403516, 308.9219330855014]
loss = [3.775965452194214, 3.518608808517456, 3.74943208694458, 3.514453887939453, 3.485088348388672, 3.563483715057373, 3.548807382583618, 3.2442500591278076, 3.6938650608062744, 3.3135905265808105]
iteration 376
epsilon=0.2184623917130913, episode length=999, total rewards=85.07462686567294
Training the model...
experience length=4995
number of examples=2997
best total reward =  [339.73941368078044, 292.9824561403516, 308.9219330855014]
loss = [3.304748773574829, 3.12526273727417, 3.191295862197876, 2.827465534210205, 2.946415424346924, 2.7930104732513428, 2.956181049346924, 2.9544501304626465, 2.802809000015259, 2.7759110927581787]
iteration 377
epsilon=0.21827776779596036, episode length=999, total rewards=62.544169611310615
Training the model...
experience length=4995
number of examples=2997
best total reward =  [339.73941368078044, 2

Copying file://car-racing-v2-378-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [339.73941368078044, 292.9824561403516, 308.9219330855014]
loss = [4.170889854431152, 3.895979642868042, 3.8873653411865234, 3.714294672012329, 3.411482095718384, 3.465815782546997, 3.3674840927124023, 3.4426867961883545, 3.218010663986206, 3.172297239303589]
iteration 379
epsilon=0.21791404021682076, episode length=999, total rewards=215.9509202454019
Training the model...
experience length=4995
number of examples=2997
best total reward =  [339.73941368078044, 292.9824561403516, 308.9219330855014]
loss = [3.679853916168213, 3.2255191802978516, 3.4825246334075928, 3.3809072971343994, 3.1890628337860107, 3.37123703956604, 3.1167380809783936, 3.103140354156494, 3.216972589492798, 3.1255502700805664]
iteration 380
epsilon=0.21773489981465255, episode length=999, total rewards=267.0411985018743
Training the model...
experience length=4995
number of examples=2997
best total reward =  [339.7394136807804

Copying file://car-racing-v2-381-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2965
best total reward =  [339.73941368078044, 292.9824561403516, 308.9219330855014]
loss = [46.046512603759766, 44.24314880371094, 42.89369583129883, 42.594547271728516, 39.97822570800781, 40.68159103393555, 38.44552230834961, 37.011940002441406, 37.80597686767578, 35.969478607177734]
iteration 382
epsilon=0.21738197530834097, episode length=999, total rewards=278.9473684210539
Training the model...
experience length=4995
number of examples=2997
best total reward =  [339.73941368078044, 292.9824561403516, 308.9219330855014]
loss = [3.394462823867798, 3.493769884109497, 3.382167339324951, 3.103400707244873, 3.2903382778167725, 3.244814395904541, 3.189795970916748, 3.169276714324951, 3.192811965942383, 3.114851713180542]
iteration 383
epsilon=0.21720815555525755, episode length=999, total rewards=123.21428571428689
Training the model...
experience length=4995
number of examples=2997
best total reward =  [339.73941368078044,

Copying file://car-racing-v2-384-model09.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [339.73941368078044, 292.9824561403516, 308.9219330855014]
loss = [7.521159648895264, 4.877452850341797, 5.310811996459961, 4.112927436828613, 5.603460311889648, 4.059048175811768, 4.192379951477051, 4.259861946105957, 4.3823561668396, 3.938472270965576]
iteration 385
epsilon=0.21686571325970794, episode length=999, total rewards=32.352941176472974
Training the model...
experience length=4995
number of examples=2997
best total reward =  [339.73941368078044, 292.9824561403516, 308.9219330855014]
loss = [3.434981107711792, 2.984114646911621, 3.1241986751556396, 3.0823638439178467, 2.934863567352295, 2.7383484840393066, 2.8384063243865967, 2.7126660346984863, 2.7843167781829834, 2.849518060684204]
iteration 386
epsilon=0.21669705612711085, episode length=999, total rewards=492.30769230768504
Training the model...
experience length=4995
number of examples=2997
best total reward =  [292.9824561403516, 