In [1]:
import gym
import numpy as np
import copy, random, os, subprocess, cv2
import tensorflow as tf
import keras.backend as K
from tensorflow.keras import layers, models, regularizers

  import imp
  'nearest': pil_image.NEAREST,
  'bilinear': pil_image.BILINEAR,
  'bicubic': pil_image.BICUBIC,
  if hasattr(pil_image, 'HAMMING'):
  if hasattr(pil_image, 'BOX'):
  if hasattr(pil_image, 'LANCZOS'):


In [2]:
MODEL_NAME = 'model10'
BUCKET = 'gs://etsuji-car-racing-v2'
os.environ['BUCKET'] = BUCKET
!gsutil mb -c regional -l us-west1 $BUCKET
!gsutil ls $BUCKET

Creating gs://etsuji-car-racing-v2/...
ServiceException: 409 A Cloud Storage bucket named 'etsuji-car-racing-v2' already exists. Try another name. Bucket names must be globally unique across all Google Cloud projects, including those outside of your organization.
gs://etsuji-car-racing-v2/model04/
gs://etsuji-car-racing-v2/model05/
gs://etsuji-car-racing-v2/model06/
gs://etsuji-car-racing-v2/model07/
gs://etsuji-car-racing-v2/model08/
gs://etsuji-car-racing-v2/model09/
gs://etsuji-car-racing-v2/model10/
gs://etsuji-car-racing-v2/model11/


In [3]:
class ApplySoftMaxWeight(layers.Layer):
    def __init__(self, **kwargs):
        self.filter_shape = None
        super(ApplySoftMaxWeight, self).__init__(**kwargs)

    def build(self, input_shape):
        self.kernel = self.add_weight(name='weights', shape=[input_shape[3]])
        self.filter_shape = input_shape

    def get_config(self):
        config = super().get_config()
        return config
    
    def call(self, inputs, **kwargs):
        return inputs * tf.nn.softmax(self.kernel)
    
# Base model
class QValue:
    def __init__(self):
        self.model = self.build_model()

    def build_model(self):
        cnn_input = layers.Input(shape=(48, 48, 3), name='cnn_input')
        cnn1 = layers.Conv2D(16, (5, 5), padding='same',
                             use_bias=True, activation='relu',
                             name='cnn1')(cnn_input)
        pool1 = layers.MaxPooling2D((2, 2), name='pool1')(cnn1)
        cnn2 = layers.Conv2D(16, (5, 5), padding='same',
                             use_bias=True, activation='relu',
                             name='cnn2')(pool1)        
        pool2 = layers.MaxPooling2D((2, 2), name='pool2')(cnn2)
        weighted_filters = ApplySoftMaxWeight(name='weighted_filters')(pool2)

        cnn_flatten = layers.Flatten(name='flatten')(weighted_filters)
        action_input = layers.Input(shape=(5,), name='action_input')
        combined = layers.concatenate([cnn_flatten, action_input], name='concat')
        hidden1 = layers.Dense(2048, activation='relu', name='dense1')(combined)
        hidden2 = layers.Dense(1024, activation='relu', name='dense2')(hidden1)
        hidden3 = layers.Dense(512, activation='relu', name='dense3')(hidden2)
        q_value = layers.Dense(1, name='output')(hidden3)

        model = models.Model(inputs=[cnn_input, action_input], outputs=q_value)
        model.compile(loss='mse')
        return model

    def get_action(self, state):
        states = []
        actions = []
        for a in range(5):
            states.append(np.array(state))
            action_onehot = np.zeros(5)
            action_onehot[a] = 1
            actions.append(action_onehot)
  
        q_values = self.model.predict([np.array(states), np.array(actions)])
        optimal_action = np.argmax(q_values)
        return optimal_action, q_values[optimal_action][0]

In [4]:
q_value = QValue()
q_value.model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 cnn_input (InputLayer)         [(None, 48, 48, 3)]  0           []                               
                                                                                                  
 cnn1 (Conv2D)                  (None, 48, 48, 16)   1216        ['cnn_input[0][0]']              
                                                                                                  
 pool1 (MaxPooling2D)           (None, 24, 24, 16)   0           ['cnn1[0][0]']                   
                                                                                                  
 cnn2 (Conv2D)                  (None, 24, 24, 16)   6416        ['pool1[0][0]']                  
                                                                                              

2022-08-07 21:16:25.091352: E tensorflow/stream_executor/cuda/cuda_driver.cc:271] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2022-08-07 21:16:25.091401: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (tensorflow-2-8-20220801-173936): /proc/driver/nvidia/version does not exist
2022-08-07 21:16:25.092638: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [5]:
def join_frames(o0, o1, o2):
    gray_image0 = cv2.cvtColor(cv2.resize(o0, (48, 48)), cv2.COLOR_RGB2GRAY)
    gray_image1 = cv2.cvtColor(cv2.resize(o1, (48, 48)), cv2.COLOR_RGB2GRAY)
    gray_image2 = cv2.cvtColor(cv2.resize(o2, (48, 48)), cv2.COLOR_RGB2GRAY)
    
    return np.array(
        [gray_image0.transpose(),
         gray_image1.transpose(),
         gray_image2.transpose()]).transpose()

In [6]:
def get_episode(environ, q_value, epsilon):
    episode = []
    o0 = environ.reset()
    o1 = copy.deepcopy(o0)
    o2 = copy.deepcopy(o0)
    total_r = 0

    if epsilon > 0:
        keep_count = 3
    else:
        keep_count = 1

    c = 0
    while True:
        if c % keep_count == 0: # Get new action
            if np.random.random() < epsilon:
                a = np.random.randint(5)
            else:
                a, _ = q_value.get_action(join_frames(o0, o1, o2))
        c += 1
        o_new, r, done, inf = environ.step(a)                
        total_r += r

        # Terminate episode when total reward becomes negative
        if total_r < 0:
            done = 1

        if done:
            # Terminal state is to achive more than 990 or get out of the field.
            if total_r > 990 or r < -99:
                episode.append((join_frames(o0, o1, o2), a, r, None))
            break
        else:
            episode.append((join_frames(o0, o1, o2), a, r, join_frames(o1, o2, o_new)))
        o0, o1, o2 = o1, o2, o_new

    print('epsilon={}, episode length={}, total rewards={}'.format(epsilon, len(episode), total_r))
    return episode, total_r

In [7]:
def train(environ, q_value, epsilon, checkpoint=0):
    gamma = 0.99
        
    if checkpoint > 0:
        filename = 'car-racing-v2-{}-{}.hd5'.format(checkpoint, MODEL_NAME)
        subprocess.run(['gsutil', 'cp', '{}/{}/{}'.format(BUCKET, MODEL_NAME, filename), './'])
        print('load model {}'.format(filename))
        q_value.model = models.load_model(filename)
        os.remove(filename)

    experience = []
    good_experience = []
    best_r = [-100, -100, -100]

    for n in range(checkpoint + 1, checkpoint + 1000):
        print('iteration {}'.format(n))

        total_len = 0
        if n % 3 == 0:
            print('Testing the current performance...')
            episode, total_r = get_episode(environ, q_value, epsilon=0)
            with open('result.txt', 'a') as f:
                f.write('{},{},{},{}\n'.format(n, epsilon, len(episode), total_r))
            filename = 'car-racing-v2-{}-{}.hd5'.format(n, MODEL_NAME)
            q_value.model.save(filename, save_format='h5')
            subprocess.run(['gsutil', '-m', 'cp',
                            '{}'.format(filename), '{}/{}/'.format(BUCKET, MODEL_NAME)])
            os.remove(filename)
            experience += episode
            total_len += len(episode)

        while total_len < 500:
            episode, total_r = get_episode(environ, q_value, epsilon)
            total_len += len(episode)
            experience += episode

            # Keep the top 3 episodes
            if total_r > min(best_r):
                best_r = best_r[1:] + [total_r]
                good_experience += episode
                if len(good_experience) > 999 * 3:
                    good_experience = good_experience[-999 * 3:]

            
        if len(experience) > 999 * 5: # remember last 5 episodes
            experience = experience[-999 * 5:]

        epsilon = (epsilon - 0.2) * 0.99 + 0.2

        print('Training the model...')
        # Use latest episode + past episodes (sampling) + top 3 episode (sampling)
        latest_experience = experience[-total_len:]
        past_experience = experience[:-total_len]
        examples = latest_experience + \
            random.sample(past_experience, min(len(past_experience), 999)) + \
            random.sample(good_experience, min(len(good_experience), 999))
        
        # Show some statistics
        print('experience length={}'.format(len(experience)))
        print('number of examples={}'.format(len(examples)))
        print('best total reward = ', best_r)
        np.random.shuffle(examples)
           
        states, actions, labels = [], [], []
        for state, a, r, state_new in examples:
            states.append(np.array(state))

            action_onehot = np.zeros(5)
            action_onehot[a] = 1
            actions.append(action_onehot)
            
            if state_new is None:   # Terminal state
                q_new = 0
            else:
                _, q_new = q_value.get_action(state_new)
            labels.append(np.array(r + gamma * q_new))

        hist = q_value.model.fit(
            [np.array(states), np.array(actions)], np.array(labels),
            batch_size=50, epochs=10, verbose=0)
        print('loss = {}'.format(hist.history['loss']))

In [8]:
env = gym.make("CarRacing-v2", continuous=False)
q_value = QValue()
q_value.model.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 cnn_input (InputLayer)         [(None, 48, 48, 3)]  0           []                               
                                                                                                  
 cnn1 (Conv2D)                  (None, 48, 48, 16)   1216        ['cnn_input[0][0]']              
                                                                                                  
 pool1 (MaxPooling2D)           (None, 24, 24, 16)   0           ['cnn1[0][0]']                   
                                                                                                  
 cnn2 (Conv2D)                  (None, 24, 24, 16)   6416        ['pool1[0][0]']                  
                                                                                            

  "Initializing wrapper in old step API which returns one bool instead of two. It is recommended to set `new_step_api=True` to use new step API. This will be the default behaviour in future."
  "Initializing environment in old step API which returns one bool instead of two. It is recommended to set `new_step_api=True` to use new step API. This will be the default behaviour in future."


In [None]:
train(env, q_value, epsilon=1.0, checkpoint=0)

iteration 1
epsilon=1.0, episode length=92, total rewards=-0.06923076923075944
epsilon=1.0, episode length=67, total rewards=-0.02033898305084067
epsilon=1.0, episode length=247, total rewards=-0.06501766784445562
epsilon=1.0, episode length=101, total rewards=-0.09898989898988408
Training the model...
experience length=507
number of examples=913
best total reward =  [-0.06923076923075944, -0.02033898305084067, -0.06501766784445562]
loss = [3356.1015625, 1.6678630113601685, 1.0135606527328491, 0.4260576069355011, 0.30588632822036743, 0.3382064402103424, 0.36367982625961304, 0.23736059665679932, 0.3600166440010071, 0.27549752593040466]
iteration 2
epsilon=0.992, episode length=595, total rewards=-0.02553191489366144
Training the model...
experience length=1102
number of examples=2101
best total reward =  [-0.02033898305084067, -0.06501766784445562, -0.02553191489366144]
loss = [0.5754793286323547, 0.4101273715496063, 0.3794131278991699, 0.4025336802005768, 0.5930966734886169, 0.33036795

Copying file://car-racing-v2-3-model10.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


epsilon=0.9840800000000001, episode length=78, total rewards=-0.08749999999998903
epsilon=0.9840800000000001, episode length=91, total rewards=-0.08145896656534224
epsilon=0.9840800000000001, episode length=151, total rewards=-0.04848484848481968
epsilon=0.9840800000000001, episode length=153, total rewards=-0.06257668711653808
Training the model...
experience length=1646
number of examples=2542
best total reward =  [-0.02553191489366144, -0.04848484848481968, -0.06257668711653808]
loss = [0.3237617313861847, 0.28124645352363586, 0.28037041425704956, 0.27460169792175293, 0.25909319519996643, 0.26945605874061584, 0.24346671998500824, 0.25470221042633057, 0.24108807742595673, 0.24178534746170044]
iteration 4
epsilon=0.9762392000000002, episode length=146, total rewards=-0.04798534798531337
epsilon=0.9762392000000002, episode length=218, total rewards=-0.08181818181815648
epsilon=0.9762392000000002, episode length=94, total rewards=-0.0362776025236535
epsilon=0.9762392000000002, episode l

Copying file://car-racing-v2-6-model10.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=3789
number of examples=2622
best total reward =  [-0.0030075187969604567, -0.006329113924047308, -0.007317073170687055]
loss = [0.2677551209926605, 0.24869705736637115, 0.23781681060791016, 0.23458141088485718, 0.24464690685272217, 0.21639211475849152, 0.2262457311153412, 0.2250456064939499, 0.22089137136936188, 0.21392670273780823]
iteration 7
epsilon=0.9531841195208004, episode length=191, total rewards=-0.09171974522290927
epsilon=0.9531841195208004, episode length=107, total rewards=-0.04731182795696684
epsilon=0.9531841195208004, episode length=250, total rewards=-0.09999999999994924
Training the model...
experience length=4337
number of examples=2546
best total reward =  [-0.0030075187969604567, -0.006329113924047308, -0.007317073170687055]
loss = [0.29807958006858826, 0.2691815197467804, 0.2453012466430664, 0.2409779131412506, 0.2538585960865021, 0.22817566990852356, 0.24041618406772614, 0.21936140954494476, 0.2256382405757904, 0.21613465

Copying file://car-racing-v2-9-model10.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


epsilon=0.9381957555423366, episode length=522, total rewards=-0.03519163763054431
Training the model...
experience length=4995
number of examples=2875
best total reward =  [-0.0030075187969604567, -0.006329113924047308, -0.007317073170687055]
loss = [3.8854660987854004, 3.8127329349517822, 3.8241889476776123, 3.7380309104919434, 3.701789379119873, 3.605788469314575, 3.5447826385498047, 3.2741711139678955, 3.021646022796631, 2.525888442993164]
iteration 10
epsilon=0.9308137979869133, episode length=104, total rewards=-0.010489510489503945
epsilon=0.9308137979869133, episode length=159, total rewards=-0.07643312101910626
epsilon=0.9308137979869133, episode length=111, total rewards=-0.005970149253723828
epsilon=0.9308137979869133, episode length=102, total rewards=-0.06109215017062852
epsilon=0.9308137979869133, episode length=229, total rewards=-0.09923664122130949
Training the model...
experience length=4995
number of examples=2703
best total reward =  [-0.006329113924047308, -0.00731

Copying file://car-racing-v2-12-model10.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2514
best total reward =  [-0.006329113924047308, -0.007317073170687055, -0.005970149253723828]
loss = [4.648839950561523, 4.461066246032715, 4.353189945220947, 4.438372611999512, 4.1957597732543945, 4.141355991363525, 3.8942015171051025, 3.7971909046173096, 3.2348251342773438, 2.9531874656677246]
iteration 13
epsilon=0.9091078973729041, episode length=288, total rewards=-0.053846153846071276
epsilon=0.9091078973729041, episode length=200, total rewards=-0.09999999999998654
epsilon=0.9091078973729041, episode length=270, total rewards=-0.07297297297289629
Training the model...
experience length=4995
number of examples=2756
best total reward =  [-0.006329113924047308, -0.007317073170687055, -0.005970149253723828]
loss = [1.9820282459259033, 1.230678677558899, 0.48832404613494873, 0.983504056930542, 0.7279045581817627, 0.6818157434463501, 0.7505677342414856, 0.5628650784492493, 0.7583051919937134, 0.4133731424808502]
iterati

Copying file://car-racing-v2-15-model10.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


epsilon=0.8949966502151834, episode length=181, total rewards=-0.0840579710144509
epsilon=0.8949966502151834, episode length=719, total rewards=-0.05755395683431641
Training the model...
experience length=4995
number of examples=3072
best total reward =  [-0.006329113924047308, -0.007317073170687055, -0.005970149253723828]
loss = [1.4428061246871948, 0.6008784174919128, 0.9959518909454346, 0.8324798941612244, 0.8814418911933899, 0.8250826001167297, 0.9319138526916504, 0.5655932426452637, 1.1261106729507446, 0.38184699416160583]
iteration 16
epsilon=0.8880466837130316, episode length=129, total rewards=-0.09677419354838457
epsilon=0.8880466837130316, episode length=278, total rewards=-0.02543554006963797
epsilon=0.8880466837130316, episode length=557, total rewards=-0.006008583690982422
Training the model...
experience length=4995
number of examples=2962
best total reward =  [-0.007317073170687055, -0.005970149253723828, -0.006008583690982422]
loss = [0.5328864455223083, 0.4447085261344

Copying file://car-racing-v2-18-model10.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2585
best total reward =  [-0.007317073170687055, -0.005970149253723828, -0.006008583690982422]
loss = [5.179413795471191, 4.565739154815674, 4.434499263763428, 4.353696346282959, 4.287121772766113, 4.276195049285889, 4.168797969818115, 4.330286979675293, 4.140720844268799, 3.971278429031372]
iteration 19
epsilon=0.8676110091600708, episode length=745, total rewards=-0.02372881355911291
Training the model...
experience length=4995
number of examples=2743
best total reward =  [-0.007317073170687055, -0.005970149253723828, -0.006008583690982422]
loss = [0.5565003156661987, 0.40966248512268066, 0.3655273914337158, 0.35714292526245117, 0.3351980447769165, 0.2984216511249542, 0.3003655672073364, 0.2656745910644531, 0.27036920189857483, 0.23424719274044037]
iteration 20
epsilon=0.8609348990684702, episode length=145, total rewards=-0.06511627906975481
epsilon=0.8609348990684702, episode length=595, total rewards=-0.0388714733540

Copying file://car-racing-v2-21-model10.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


epsilon=0.8543255500777855, episode length=793, total rewards=-0.03492063492134287
Training the model...
experience length=4995
number of examples=3245
best total reward =  [-0.007317073170687055, -0.005970149253723828, -0.006008583690982422]
loss = [0.6203969717025757, 0.8159828782081604, 0.4096890389919281, 0.7205432653427124, 0.55555659532547, 0.6504605412483215, 0.4964849650859833, 0.356693297624588, 0.6097515225410461, 0.47211962938308716]
iteration 22
epsilon=0.8477822945770077, episode length=711, total rewards=-0.0256227758009768
Training the model...
experience length=4995
number of examples=2709
best total reward =  [-0.007317073170687055, -0.005970149253723828, -0.006008583690982422]
loss = [0.7435622215270996, 0.8873055577278137, 1.1433746814727783, 0.5502933859825134, 0.6900088787078857, 0.7535433769226074, 0.40203261375427246, 0.4876009523868561, 0.731067955493927, 0.5906347036361694]
iteration 23
epsilon=0.8413044716312377, episode length=705, total rewards=-0.0117647058

Copying file://car-racing-v2-24-model10.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2668
best total reward =  [-0.007317073170687055, -0.005970149253723828, -0.006008583690982422]
loss = [0.9426298141479492, 0.8937938213348389, 1.417515754699707, 0.6902919411659241, 1.0078028440475464, 1.2119288444519043, 0.5350185632705688, 0.9018219113349915, 1.990094542503357, 0.5894021391868591]
iteration 25
epsilon=0.8285425126457759, episode length=135, total rewards=-0.08648648648646273
epsilon=0.8285425126457759, episode length=160, total rewards=-0.022829581993544873
epsilon=0.8285425126457759, episode length=645, total rewards=-0.08387096774208769
Training the model...
experience length=4995
number of examples=2938
best total reward =  [-0.007317073170687055, -0.005970149253723828, -0.006008583690982422]
loss = [0.5293329358100891, 0.46060651540756226, 0.40307050943374634, 0.3792966902256012, 0.34515154361724854, 0.33267462253570557, 0.2988937199115753, 0.31469544768333435, 0.2952490448951721, 0.2809775769710541

Copying file://car-racing-v2-27-model10.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2600
best total reward =  [-0.007317073170687055, -0.005970149253723828, -0.006008583690982422]
loss = [4.7371296882629395, 4.519229888916016, 4.368491172790527, 4.36504602432251, 4.256899356842041, 4.257503032684326, 4.221034526824951, 4.156318187713623, 4.188222408294678, 4.153416633605957]
iteration 28
epsilon=0.809874171477684, episode length=505, total rewards=-0.05848375451250232
Training the model...
experience length=4995
number of examples=2503
best total reward =  [-0.007317073170687055, -0.005970149253723828, -0.006008583690982422]
loss = [0.7999849915504456, 0.5089099407196045, 0.3925097584724426, 0.37036436796188354, 0.32068803906440735, 0.3541235029697418, 0.31094062328338623, 0.28150349855422974, 0.32793134450912476, 0.3057633936405182]
iteration 29
epsilon=0.8037754297629072, episode length=364, total rewards=-0.003649635036399007
epsilon=0.8037754297629072, episode length=89, total rewards=-0.0447761194029

Copying file://car-racing-v2-30-model10.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2521
best total reward =  [-0.006008583690982422, -0.003649635036399007, -0.0014598540145655814]
loss = [8.92186164855957, 8.754793167114258, 8.390751838684082, 8.217443466186523, 7.887367248535156, 7.661123275756836, 7.098696708679199, 6.125341892242432, 5.380017280578613, 5.27309513092041]
iteration 31
epsilon=0.7917602987106254, episode length=112, total rewards=-0.06404494382021153
epsilon=0.7917602987106254, episode length=485, total rewards=-0.09253731343271565
Training the model...
experience length=4995
number of examples=2595
best total reward =  [-0.006008583690982422, -0.003649635036399007, -0.0014598540145655814]
loss = [0.6299135684967041, 0.5099126696586609, 0.4561477303504944, 0.4574659466743469, 0.40077269077301025, 0.40186578035354614, 0.3486000597476959, 0.41007205843925476, 0.32306215167045593, 0.3328947126865387]
iteration 32
epsilon=0.7858426957235192, episode length=315, total rewards=-0.0210526315788

Copying file://car-racing-v2-33-model10.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [-0.006008583690982422, -0.003649635036399007, -0.0014598540145655814]
loss = [1.0232809782028198, 1.2223671674728394, 0.9141932725906372, 0.8828828930854797, 0.9481813311576843, 0.8408598899841309, 0.8911157250404358, 0.7797671556472778, 0.9377263784408569, 0.6979605555534363]
iteration 34
epsilon=0.7741844260786213, episode length=359, total rewards=-0.028776978417164062
epsilon=0.7741844260786213, episode length=333, total rewards=-0.06666666666659293
Training the model...
experience length=4995
number of examples=2690
best total reward =  [-0.006008583690982422, -0.003649635036399007, -0.0014598540145655814]
loss = [0.6276446580886841, 0.5569459795951843, 0.49330949783325195, 0.4569827914237976, 0.4502936005592346, 0.42899179458618164, 0.43337759375572205, 0.3708038926124573, 0.36902204155921936, 0.35343998670578003]
iteration 35
epsilon=0.7684425818178351, episode length=727, total rewards=-0

Copying file://car-racing-v2-36-model10.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2635
best total reward =  [-0.003649635036399007, -0.0014598540145655814, -0.0030651340993703424]
loss = [0.690569281578064, 0.5542462468147278, 0.46695539355278015, 0.45804882049560547, 0.4111361503601074, 0.4052294194698334, 0.446702241897583, 0.33708205819129944, 0.37984949350357056, 0.32500019669532776]
iteration 37
epsilon=0.7571305744396604, episode length=150, total rewards=-0.03975903614454809
epsilon=0.7571305744396604, episode length=347, total rewards=-0.09968454258664594
epsilon=0.7571305744396604, episode length=653, total rewards=-0.04052287581725553
Training the model...
experience length=4995
number of examples=3148
best total reward =  [-0.003649635036399007, -0.0014598540145655814, -0.0030651340993703424]
loss = [0.594902753829956, 0.47225064039230347, 0.4366598129272461, 0.4760119318962097, 0.3776272237300873, 0.3715122640132904, 0.33967098593711853, 0.3185337483882904, 0.3285236954689026, 0.310353249311

Copying file://car-racing-v2-39-model10.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [-0.003649635036399007, -0.0014598540145655814, -0.0030651340993703424]
loss = [0.7417274713516235, 0.6212920546531677, 0.5485603213310242, 0.5061814785003662, 0.4899563491344452, 0.4707616865634918, 0.44844508171081543, 0.44579312205314636, 0.46876969933509827, 0.42719966173171997]
iteration 40
epsilon=0.7405832392482283, episode length=999, total rewards=9.034267912772757
Training the model...
experience length=4995
number of examples=2997
best total reward =  [-0.0014598540145655814, -0.0030651340993703424, 9.034267912772757]
loss = [0.6009690761566162, 0.507444441318512, 0.4841298460960388, 0.4454111158847809, 0.43774834275245667, 0.39329463243484497, 0.3919175863265991, 0.38899385929107666, 0.36793237924575806, 0.36531686782836914]
iteration 41
epsilon=0.7351774068557462, episode length=213, total rewards=-0.04768683274015936
epsilon=0.7351774068557462, episode length=185, total rewards=-0.02

Copying file://car-racing-v2-42-model10.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


epsilon=0.7298256327871888, episode length=111, total rewards=-0.005970149253714058
Training the model...
experience length=4995
number of examples=2585
best total reward =  [-0.0014598540145655814, -0.0030651340993703424, 9.034267912772757]
loss = [0.7606561779975891, 0.5242488384246826, 0.4957568049430847, 0.4421844780445099, 0.428676038980484, 0.4204403758049011, 0.4100208580493927, 0.35582682490348816, 0.372635155916214, 0.3509676456451416]
iteration 43
epsilon=0.724527376459317, episode length=344, total rewards=-0.017241379310289434
epsilon=0.724527376459317, episode length=757, total rewards=-0.090220820189024
Training the model...
experience length=4995
number of examples=3099
best total reward =  [-0.0014598540145655814, -0.0030651340993703424, 9.034267912772757]
loss = [0.6660347580909729, 0.5419049859046936, 0.5064699053764343, 0.5059487819671631, 0.44804832339286804, 0.4589810371398926, 0.39201971888542175, 0.4080899953842163, 0.3855039179325104, 0.37182486057281494]
iterat

Copying file://car-racing-v2-45-model10.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


epsilon=0.7140892816677766, episode length=168, total rewards=-0.06498316498314893
epsilon=0.7140892816677766, episode length=374, total rewards=-0.04681647940063202
Training the model...
experience length=4995
number of examples=2789
best total reward =  [-0.0014598540145655814, -0.0030651340993703424, 9.034267912772757]
loss = [0.47220495343208313, 0.40564289689064026, 0.4085257947444916, 0.3607223629951477, 0.6660831570625305, 0.31425535678863525, 0.29917991161346436, 0.31400272250175476, 0.290287047624588, 0.38322117924690247]
iteration 46
epsilon=0.708948388851099, episode length=392, total rewards=-0.014285714285638934
epsilon=0.708948388851099, episode length=506, total rewards=-0.02432432432418727
Training the model...
experience length=4995
number of examples=2896
best total reward =  [-0.0014598540145655814, -0.0030651340993703424, 9.034267912772757]
loss = [0.4743025004863739, 0.40259209275245667, 0.3966499865055084, 0.391533225774765, 0.3674027621746063, 0.3271332085132599,

Copying file://car-racing-v2-48-model10.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2714
best total reward =  [-0.0014598540145655814, -0.0030651340993703424, 9.034267912772757]
loss = [0.6120557188987732, 0.45980972051620483, 0.43450847268104553, 0.39302507042884827, 0.35206785798072815, 0.33055591583251953, 0.33846816420555115, 0.3385612368583679, 0.28469541668891907, 0.31594955921173096]
iteration 49
epsilon=0.6938321127538326, episode length=372, total rewards=-0.011864406779547226
epsilon=0.6938321127538326, episode length=268, total rewards=-0.054362416107323286
Training the model...
experience length=4995
number of examples=2638
best total reward =  [-0.0014598540145655814, -0.0030651340993703424, 9.034267912772757]
loss = [0.5068265199661255, 0.4265839159488678, 0.3750580847263336, 0.3498745858669281, 0.3287738859653473, 0.332723468542099, 0.29862481355667114, 0.29421424865722656, 0.27450552582740784, 0.2671199440956116]
iteration 50
epsilon=0.6888937916262943, episode length=342, total rewards=-0

Copying file://car-racing-v2-51-model10.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [-0.0014598540145655814, -0.0030651340993703424, 9.034267912772757]
loss = [0.6767411828041077, 0.562832772731781, 0.4924353361129761, 0.43622922897338867, 0.40669354796409607, 0.38506171107292175, 0.3614252805709839, 0.34852197766304016, 0.34444695711135864, 0.3154367208480835]
iteration 52
epsilon=0.6791648051729311, episode length=281, total rewards=-0.07499999999993467
epsilon=0.6791648051729311, episode length=224, total rewards=-0.06410256410252946
Training the model...
experience length=4995
number of examples=2503
best total reward =  [-0.0014598540145655814, -0.0030651340993703424, 9.034267912772757]
loss = [0.5074697136878967, 0.39014604687690735, 0.3716522455215454, 0.32758042216300964, 0.30738186836242676, 0.2794840931892395, 0.3137452304363251, 0.2595943212509155, 0.2819177806377411, 0.24517607688903809]
iteration 53
epsilon=0.6743731571212017, episode length=999, total rewards=18.466

Copying file://car-racing-v2-54-model10.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [-0.0030651340993703424, 9.034267912772757, 18.4668989547025]
loss = [0.9022960662841797, 0.8151700496673584, 0.6845849752426147, 0.6204648017883301, 0.6342450976371765, 0.5476468205451965, 0.5015154480934143, 0.45508846640586853, 0.44852328300476074, 0.425759494304657]
iteration 55
epsilon=0.6649331312944898, episode length=999, total rewards=39.44223107569623
Training the model...
experience length=4995
number of examples=2997
best total reward =  [9.034267912772757, 18.4668989547025, 39.44223107569623]
loss = [0.7086427211761475, 0.6182815432548523, 0.4925297200679779, 0.4898886978626251, 0.43041783571243286, 0.40866819024086, 0.38050368428230286, 0.38166677951812744, 0.3695971667766571, 0.36556902527809143]
iteration 56
epsilon=0.660283799981545, episode length=303, total rewards=-0.0048632218844393005
epsilon=0.660283799981545, episode length=307, total rewards=-0.08327645051193966
Training t

Copying file://car-racing-v2-57-model10.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [9.034267912772757, 18.4668989547025, 39.44223107569623]
loss = [0.7136030197143555, 0.5639026761054993, 0.47421959042549133, 0.46830862760543823, 0.4499392509460449, 0.5641134977340698, 0.40037909150123596, 0.37656888365745544, 0.35820072889328003, 0.34568437933921814]
iteration 58
epsilon=0.6511241523619121, episode length=999, total rewards=53.84615384615471
Training the model...
experience length=4995
number of examples=2997
best total reward =  [18.4668989547025, 39.44223107569623, 53.84615384615471]
loss = [0.9351816177368164, 0.6517082452774048, 0.6030204892158508, 0.5194892287254333, 0.46135413646698, 0.4513357877731323, 0.41223809123039246, 0.46835190057754517, 0.360114723443985, 0.3889588713645935]
iteration 59
epsilon=0.646612910838293, episode length=636, total rewards=-0.005732484076376582
Training the model...
experience length=4995
number of examples=2634
best total reward =  [18.46

Copying file://car-racing-v2-60-model10.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [18.4668989547025, 39.44223107569623, 53.84615384615471]
loss = [0.6535731554031372, 0.5101306438446045, 0.4421033263206482, 0.42563992738723755, 0.37551480531692505, 0.3286557197570801, 0.3536422848701477, 0.3002239763736725, 0.31207770109176636, 0.31510481238365173]
iteration 61
epsilon=0.6377253139126109, episode length=160, total rewards=-0.07435897435897387
epsilon=0.6377253139126109, episode length=210, total rewards=-0.07897897897892325
epsilon=0.6377253139126109, episode length=445, total rewards=-0.06558704453429276
Training the model...
experience length=4995
number of examples=2813
best total reward =  [18.4668989547025, 39.44223107569623, 53.84615384615471]
loss = [0.6639795303344727, 0.5077164769172668, 0.4197435677051544, 0.4140585958957672, 0.3546563982963562, 0.3489436209201813, 0.35078221559524536, 0.3207298815250397, 0.2780398428440094, 0.2995363175868988]
iteration 62
epsilon=0.

Copying file://car-racing-v2-63-model10.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [18.4668989547025, 39.44223107569623, 53.84615384615471]
loss = [0.8632882833480835, 0.6710798740386963, 0.5899394154548645, 0.5175529718399048, 0.4587164521217346, 0.412085622549057, 0.408567875623703, 0.37243467569351196, 0.3555225431919098, 0.33637312054634094]
iteration 64
epsilon=0.6247244343640923, episode length=418, total rewards=-0.09935691318325124
epsilon=0.6247244343640923, episode length=840, total rewards=-0.029203539823776853
Training the model...
experience length=4995
number of examples=3256
best total reward =  [18.4668989547025, 39.44223107569623, 53.84615384615471]
loss = [0.6857093572616577, 0.7697018384933472, 0.4844263195991516, 0.4380344748497009, 0.42767494916915894, 0.3962180018424988, 0.38126814365386963, 0.31694889068603516, 0.3195197284221649, 0.3058515787124634]
iteration 65
epsilon=0.6204771900204513, episode length=314, total rewards=-0.03146853146850612
epsilon=0.6

Copying file://car-racing-v2-66-model10.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [18.4668989547025, 39.44223107569623, 53.84615384615471]
loss = [0.9505347609519958, 0.7652289867401123, 0.6679019927978516, 0.6176163554191589, 0.5534117817878723, 0.520290732383728, 0.4819117486476898, 0.4585707485675812, 0.4480646550655365, 0.4136994779109955]
iteration 67
epsilon=0.6121096939390444, episode length=607, total rewards=-0.08571428571409823
Training the model...
experience length=4995
number of examples=2605
best total reward =  [18.4668989547025, 39.44223107569623, 53.84615384615471]
loss = [0.73070228099823, 0.5424148440361023, 0.46814194321632385, 0.44713321328163147, 0.3672774136066437, 0.3909277319908142, 0.33505845069885254, 0.32218146324157715, 0.3214942514896393, 0.3236711323261261]
iteration 68
epsilon=0.6079885969996539, episode length=104, total rewards=-0.047038327526121665
epsilon=0.6079885969996539, episode length=104, total rewards=-0.08333333333332793
epsilon=0.607

Copying file://car-racing-v2-69-model10.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [18.4668989547025, 39.44223107569623, 53.84615384615471]
loss = [1.138671636581421, 0.8547357320785522, 0.7330402731895447, 0.6464151740074158, 0.565762460231781, 0.5473372340202332, 0.4976203739643097, 0.4674012064933777, 0.4360269606113434, 0.43327686190605164]
iteration 70
epsilon=0.5998696239193608, episode length=825, total rewards=-0.03119266055100775
Training the model...
experience length=4995
number of examples=2823
best total reward =  [18.4668989547025, 39.44223107569623, 53.84615384615471]
loss = [0.8261699080467224, 0.6361379027366638, 0.4949760437011719, 0.5237575769424438, 0.4272535443305969, 0.3982488512992859, 0.40028151869773865, 0.3414947986602783, 0.35886281728744507, 0.34407344460487366]
iteration 71
epsilon=0.5958709276801673, episode length=136, total rewards=-0.0013698630136797785
epsilon=0.5958709276801673, episode length=999, total rewards=49.85590778097862
Training the m

Copying file://car-racing-v2-72-model10.hd5 [Content-Type=application/octet-stream]...
- [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2539
best total reward =  [39.44223107569623, 53.84615384615471, 49.85590778097862]
loss = [0.7257824540138245, 0.560602068901062, 0.48204538226127625, 0.4606860876083374, 0.3993523418903351, 0.37081897258758545, 0.4225373864173889, 0.3279890716075897, 0.34219175577163696, 0.33098670840263367]
iteration 73
epsilon=0.5879930962193319, episode length=734, total rewards=-0.07342657342637371
Training the model...
experience length=4995
number of examples=2732
best total reward =  [39.44223107569623, 53.84615384615471, 49.85590778097862]
loss = [0.6357814073562622, 0.512525737285614, 0.4270249605178833, 0.39904043078422546, 0.3868246376514435, 0.3837602734565735, 0.4150729179382324, 0.3258674740791321, 0.30028653144836426, 0.33004945516586304]
iteration 74
epsilon=0.5841131652571386, episode length=462, total rewards=-0.09537953795366971
epsilon=0.5841131652571386, episode length=326, total rewards=-0.020261437908402707
Trainin

Copying file://car-racing-v2-75-model10.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [39.44223107569623, 53.84615384615471, 49.85590778097862]
loss = [0.6786156892776489, 0.5375087261199951, 0.48694244027137756, 0.4355156421661377, 0.42615482211112976, 0.34558263421058655, 0.4927719533443451, 0.34646663069725037, 0.35870689153671265, 0.334248811006546]
iteration 76
epsilon=0.5764693132685216, episode length=688, total rewards=-0.04754098360685896
Training the model...
experience length=4995
number of examples=2686
best total reward =  [39.44223107569623, 53.84615384615471, 49.85590778097862]
loss = [0.608008623123169, 0.49394139647483826, 0.4889868199825287, 0.37494558095932007, 0.3799781799316406, 0.34565845131874084, 0.3380918800830841, 0.32772231101989746, 0.3100969195365906, 0.2959252595901489]
iteration 77
epsilon=0.5727046201358363, episode length=285, total rewards=-0.028571428571363383
epsilon=0.5727046201358363, episode length=539, total rewards=-0.0317460317458835
Traini

Copying file://car-racing-v2-78-model10.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [39.44223107569623, 53.84615384615471, 49.85590778097862]
loss = [0.8680939078330994, 0.7029879689216614, 0.639191210269928, 0.5544512867927551, 0.5402981042861938, 0.4873291254043579, 0.457379549741745, 0.4680476784706116, 0.4446582794189453, 0.42019355297088623]
iteration 79
epsilon=0.5652877981951332, episode length=999, total rewards=8.108108108107023
Training the model...
experience length=4995
number of examples=2997
best total reward =  [39.44223107569623, 53.84615384615471, 49.85590778097862]
loss = [0.6951237320899963, 0.5626992583274841, 0.5024874210357666, 0.43951940536499023, 0.4251699149608612, 0.38500791788101196, 0.4497198164463043, 0.3242715299129486, 0.3553963601589203, 0.3564733862876892]
iteration 80
epsilon=0.5616349202131818, episode length=684, total rewards=-0.006849315069016287
Training the model...
experience length=4995
number of examples=2682
best total reward =  [39.442

Copying file://car-racing-v2-81-model10.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2655
best total reward =  [39.44223107569623, 53.84615384615471, 49.85590778097862]
loss = [0.7472199201583862, 0.4990912973880768, 0.49399882555007935, 0.39650148153305054, 0.3961038589477539, 0.3335897624492645, 0.32406753301620483, 0.3430142104625702, 0.30133119225502014, 0.3048293888568878]
iteration 82
epsilon=0.5544383853009396, episode length=817, total rewards=-0.015613382900341949
Training the model...
experience length=4995
number of examples=2815
best total reward =  [39.44223107569623, 53.84615384615471, 49.85590778097862]
loss = [0.671235978603363, 0.5285197496414185, 0.4517378509044647, 0.4198106527328491, 0.5187810063362122, 0.40467777848243713, 0.32497328519821167, 0.3479909598827362, 0.33315861225128174, 0.3979910612106323]
iteration 83
epsilon=0.5508940014479302, episode length=827, total rewards=-0.09323308270758557
Training the model...
experience length=4995
number of examples=2825
best total reward = 

Copying file://car-racing-v2-84-model10.hd5 [Content-Type=application/octet-stream]...
/ [1/1 files][ 56.2 MiB/ 56.2 MiB] 100% Done                                    
Operation completed over 1 objects/56.2 MiB.                                     


Training the model...
experience length=4995
number of examples=2997
best total reward =  [39.44223107569623, 53.84615384615471, 49.85590778097862]
loss = [1.170850157737732, 0.8774429559707642, 0.7583107352256775, 0.700752317905426, 0.5909109115600586, 0.615998387336731, 0.5106540322303772, 0.5424823760986328, 0.47290703654289246, 0.43147191405296326]
iteration 85
epsilon=0.5439112108191164, episode length=773, total rewards=-0.0006191950468174834
Training the model...
experience length=4995
number of examples=2771
best total reward =  [39.44223107569623, 53.84615384615471, 49.85590778097862]
loss = [0.7999150156974792, 0.5659582018852234, 0.5065125226974487, 0.43928417563438416, 0.4087236225605011, 0.40213948488235474, 0.3598531484603882, 0.3908293545246124, 0.3456452786922455, 0.3198377192020416]
iteration 86
epsilon=0.5404720987109253, episode length=748, total rewards=-0.049700598802943946
Training the model...
experience length=4995
number of examples=2746
best total reward =  [3