In [4]:
# import package needed
%matplotlib inline
import matplotlib.pyplot as plt
import os
os.environ["SDL_VIDEODRIVER"] = "dummy"  # this line make pop-out window not appear
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
import tensorflow as tf
import numpy as np
import skimage.color
import skimage.transform
from ple.games.flappybird import FlappyBird
from ple import PLE
game = FlappyBird()
env = PLE(game, fps=30, display_screen=False)  # environment interface to game

pygame 1.9.4
Hello from the pygame community. https://www.pygame.org/contribute.html
couldn't import doomish
Couldn't import doom


In [5]:
# return a dictionary whose key is action description and value is action index
print(game.actions)
# return a list of action index (include None)
num_action = len(env.getActionSet())
bucket_range_per_feature = {
  'next_next_pipe_bottom_y': 40,
  'next_next_pipe_dist_to_player': 512,
  'next_next_pipe_top_y': 40,
  'next_pipe_bottom_y': 20,
  'next_pipe_dist_to_player': 20,
  'next_pipe_top_y': 20,
  'player_vel': 4,
  'player_y': 16
}
print(num_action)
game.getGameState()  # a dictionary describe state

{'up': 119}
2


{'player_y': 256,
 'player_vel': 0,
 'next_pipe_dist_to_player': 309.0,
 'next_pipe_top_y': 29,
 'next_pipe_bottom_y': 129,
 'next_next_pipe_dist_to_player': 453.0,
 'next_next_pipe_top_y': 107,
 'next_next_pipe_bottom_y': 207}

In [6]:
# define input size
screen_width = 80
screen_height = 80
num_stack = 4
num_feature = 8

In [7]:
state_key = [k for k, v in sorted(bucket_range_per_feature.items())]
def preprocess(state):
    state = copy.deepcopy(state)
    state['next_next_pipe_bottom_y'] -= state['player_y']
    state['next_next_pipe_top_y'] -= state['player_y']
    state['next_pipe_bottom_y'] -= state['player_y']
    state['next_pipe_top_y'] -= state['player_y']
    state_idx = []
    for key in state_key:
        state_idx.append(int(state[key] / bucket_range_per_feature[key]))
    return state_idx

In [8]:
import math
import copy
from collections import defaultdict
MIN_EXPLORING_RATE = 10e-4


class Agent:
    def __init__(self, name, num_action, t=0, discount_factor=0.99):
        self.exploring_rate = 0.1
        self.discount_factor = discount_factor
        self.num_action = num_action
        self.name = name
        with tf.variable_scope(name):
            self.build_model()

    def build_model(self):

        # input: current screen, selected action and reward
        self.input_screen = tf.placeholder(
            tf.float32, shape=[None, num_feature, num_stack])
        self.action = tf.placeholder(tf.int32, [None])
        self.reward = tf.placeholder(tf.float32, [None])
        self.is_training = tf.placeholder(tf.bool, shape=[])

        def net(screen, reuse=False):
            with tf.variable_scope(
                    "layers",
                    reuse=reuse,
                    initializer=tf.truncated_normal_initializer(stddev=1e-2)):
                
                flat = tf.contrib.layers.flatten(screen)
                dense = tf.layers.dense(
                    inputs=flat, units=1024, activation=tf.nn.relu)
                dense = tf.layers.dense(
                    inputs=dense, units=512, activation=tf.nn.relu)
                dense = tf.layers.dense(
                    inputs=dense, units=256, activation=tf.nn.relu)
                dense = tf.layers.dense(
                    inputs=dense, units=128, activation=tf.nn.relu)
                Q = tf.layers.dense(
                    inputs=dense, units=self.num_action, activation=None)

                return Q

        # optimize
        self.output = net(
            self.input_screen
        )  # Q(s,a,theta) for all a, shape (batch_size, num_action)
        index = tf.stack([tf.range(tf.shape(self.action)[0]), self.action],
                         axis=1)
        self.esti_Q = tf.gather_nd(
            self.output,
            index)  # Q(s,a,theta) for selected action, shape (batch_size, 1)

        self.max_Q = tf.reduce_max(
            self.output, axis=1)  # max(Q(s',a',theta')), shape (batch_size, 1)
        self.tar_Q = tf.placeholder(tf.float32, [None])

        # loss = E[r+max(Q(s',a',theta'))-Q(s,a,theta)]
        self.loss = tf.reduce_mean(
            tf.square(self.reward + self.discount_factor * self.tar_Q -
                      self.esti_Q))

        optimizer = tf.train.AdamOptimizer(learning_rate=1e-5)
        self.g_gvs = optimizer.compute_gradients(
            self.loss,
            var_list=[v for v in tf.global_variables() if self.name in v.name])
        self.train_op = optimizer.apply_gradients(self.g_gvs)
        self.pred = tf.argmax(
            self.output, axis=1
        )  # select action with highest action-value, only used in inference

    def select_action(self, input_screen, sess):
        # epsilon-greedy
        if np.random.rand() < self.exploring_rate:
            action = np.random.choice(self.num_action)  # Select a random action
        else:
            input_screen = np.array(input_screen).transpose([1, 0])
            feed_dict = {
                self.input_screen: input_screen[None, :],
                self.is_training: False,
            }
            action = sess.run(
                self.pred,
                feed_dict=feed_dict)[0]  # Select the action with the highest q
        return action

    def update_policy(self, input_screens, actions, rewards,
                      input_screens_plum, terminal, target_netwrok, sess):
        # use max_Q estimate from target one to update online one
        feed_dict = {
            target_netwrok.input_screen:
            np.array(input_screens_plum).transpose([0, 2, 1]),
            target_netwrok.is_training:
            True,
        }
        max_Q = sess.run(target_netwrok.max_Q, feed_dict=feed_dict)
        max_Q *= ~np.array(terminal)
        feed_dict = {
            self.input_screen: np.array(input_screens).transpose([0, 2, 1]),
            self.tar_Q: max_Q,
            self.action: actions,
            self.reward: rewards,
            self.is_training: True,
        }
        loss, _ = sess.run([self.loss, self.train_op], feed_dict=feed_dict)
        return loss

    def update_parameters(self, episode):
        if self.exploring_rate > MIN_EXPLORING_RATE:
            self.exploring_rate -= (0.1 - MIN_EXPLORING_RATE) / 30000
#             self.exploring_rate = min(0.1, 0.99**((episode) / 30))

    def shutdown_explore(self):
        # make action selection greedy
        self.exploring_rate = 0


In [9]:
def get_update_ops():
  # return operations assign weight to target network
  src_vars = [v for v in tf.global_variables() if 'online' in v.name]
  tar_vars = [v for v in tf.global_variables() if 'target' in v.name]
  update_ops = []
  for src_var, tar_var in zip(src_vars, tar_vars):
    update_ops.append(tar_var.assign(src_var))
  return update_ops


def update_target(update_ops, sess):
  sess.run(update_ops)

In [10]:
# init agent
tf.reset_default_graph()
num_action = len(env.getActionSet())

# agent for frequently updating
online_agent = Agent('online', num_action)

# agent for slow updating
target_agent = Agent('target', num_action)
update_ops = get_update_ops()

In [11]:
class Replay_buffer():

  def __init__(self, buffer_size=50000):
    self.experiences = []
    self.buffer_size = buffer_size

  def add(self, experience):
    if len(self.experiences) >= self.buffer_size:
      self.experiences.pop(0)
    self.experiences.append(experience)

  def sample(self, size):
    """
        sameple experience from buffer
        """
    if size > len(self.experiences):
      experiences_idx = np.random.choice(len(self.experiences), size=size)
    else:
      experiences_idx = np.random.choice(
          len(self.experiences), size=size, replace=False)
    # from all sampled experiences, extract a tuple of (s,a,r,s')
    screens = []
    actions = []
    rewards = []
    screens_plum = []
    terminal = []
    for i in range(size):
      screens.append(self.experiences[experiences_idx[i]][0])
      actions.append(self.experiences[experiences_idx[i]][1])
      rewards.append(self.experiences[experiences_idx[i]][2])
      screens_plum.append(self.experiences[experiences_idx[i]][3])
      terminal.append(self.experiences[experiences_idx[i]][4])
    return screens, actions, rewards, screens_plum, terminal

In [12]:
# init buffer
buffer = Replay_buffer()

In [13]:
def make_anim(images, fps=60, true_image=False):
  duration = len(images) / fps
  import moviepy.editor as mpy

  def make_frame(t):
    try:
      x = images[int(len(images) / duration * t)]
    except:
      x = images[-1]

    if true_image:
      return x.astype(np.uint8)
    else:
      return ((x + 1) / 2 * 255).astype(np.uint8)

  clip = mpy.VideoClip(make_frame, duration=duration)
  clip.fps = fps
  return clip

In [14]:
# init all
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.InteractiveSession(config=config)
sess.run(tf.global_variables_initializer())

In [16]:
from IPython.display import Image, display

update_every_t_step = 3
print_every_episode = 10
save_video_every_episode = 100
NUM_EPISODE = 30000
NUM_EXPLORE = 20

# we can redefine origin reward function
reward_values = {
    "positive": 1,  # reward pass a pipe
    "tick": 0.1,  # reward per timestamp
    "loss": -1,  # reward of gameover
}

# Reset the environment
game = FlappyBird()
# for demo purpose, the following code is trained in the same scene,
env = PLE(
    game,
    fps=30,
    display_screen=False,
    reward_values=reward_values,
    rng=np.random.RandomState(1))

for episode in range(0, NUM_EPISODE + 1):

    env.reset_game()
    env.act(0)  # dummy input to make sure input screen is correct

    # record frame
    if episode % save_video_every_episode == 0:
        frames = [env.getScreenRGB()]

    # for every 500 episodes, shutdown exploration to see performance of greedy action
#     if episode % print_every_episode == 0:
#         online_agent.shutdown_explore()
#

    # grayscale input screen for this episode
    input_screens = [preprocess(game.getGameState())] * 4

    # experience for this episode, store all (s,a,r,s') tuple
    experience = []

    # cumulate reward for this episode
    cum_reward = 0

    t = 0
    while not env.game_over():

        # feed four previous screen, select an action
        action = online_agent.select_action(input_screens[-4:], sess)
        
        # execute the action and get reward
        reward = env.act(env.getActionSet()[action])

        # record frame
        if episode % save_video_every_episode == 0:
            frames.append(env.getScreenRGB())

        # cumulate reward
        cum_reward += reward

        # append grayscale screen for this episode
        input_screens.append(preprocess(game.getGameState()))

        # append experience for this episode
        buffer.add((input_screens[-5:-1], action, reward, input_screens[-4:],
                    env.game_over()))
        t += 1

        # update agent
    if episode > NUM_EXPLORE:
        train_screens, train_actions, train_rewards, train_screens_plum, terminal = buffer.sample(32)
        loss = online_agent.update_policy(train_screens, train_actions,
                                          train_rewards, train_screens_plum,
                                          terminal, target_agent, sess)
    if t % update_every_t_step == 0 and episode > NUM_EXPLORE:
        update_target(update_ops, sess)

    # update explore rating and learning rate
    online_agent.update_parameters(episode)
    target_agent.update_parameters(episode)

    if episode % print_every_episode == 0 and episode > NUM_EXPLORE:
        print(
            "[{}] time live:{}, cumulated reward: {}, exploring rate: {}, loss: {}"
            .format(episode, t, cum_reward, target_agent.exploring_rate, loss))

    if episode % save_video_every_episode == 0:  # for every 100 episode, record an animation
        clip = make_anim(frames, fps=60, true_image=True).rotate(-90)
        clip.write_videofile("mrr8iirovie/DQN-{}.webm".format(episode), fps=60)

[MoviePy] >>>> Building video movie/DQN-0.webm
[MoviePy] Writing video movie/DQN-0.webm


100%|██████████| 63/63 [00:00<00:00, 63.16it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-0.webm 






[30] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09989440000000008, loss: 0.03494509682059288
[40] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0998614000000001, loss: 0.009967807680368423
[50] time live:60, cumulated reward: 4.999999999999995, exploring rate: 0.09982840000000012, loss: 0.00999574363231659
[60] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09979540000000015, loss: 0.009932776913046837
[70] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09976240000000017, loss: 0.009953608736395836
[80] time live:58, cumulated reward: 4.799999999999995, exploring rate: 0.09972940000000019, loss: 0.009977791458368301
[90] time live:55, cumulated reward: 4.4999999999999964, exploring rate: 0.09969640000000021, loss: 0.009904456324875355
[100] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09966340000000024, loss: 0.03517235815525055
[MoviePy] >>>> Building video movie/DQN-10

100%|██████████| 63/63 [00:00<00:00, 64.32it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-100.webm 






[110] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09963040000000026, loss: 0.010157255455851555
[120] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09959740000000028, loss: 0.009463757276535034
[130] time live:57, cumulated reward: 4.699999999999996, exploring rate: 0.0995644000000003, loss: 0.010001882910728455
[140] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09953140000000033, loss: 0.009811398573219776
[150] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09949840000000035, loss: 0.010317946784198284
[160] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09946540000000037, loss: 0.03366576135158539
[170] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09943240000000039, loss: 0.03545934706926346
[180] time live:59, cumulated reward: 4.899999999999995, exploring rate: 0.09939940000000042, loss: 0.0630265474319458
[190] time live:61, cumulated reward:

100%|██████████| 63/63 [00:01<00:00, 58.90it/s]


[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-200.webm 

[210] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09930040000000048, loss: 0.008313116617500782
[220] time live:57, cumulated reward: 4.699999999999996, exploring rate: 0.0992674000000005, loss: 0.008184343576431274
[230] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09923440000000053, loss: 0.07298912107944489
[240] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09920140000000055, loss: 0.037058740854263306
[250] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09916840000000057, loss: 0.024569127708673477
[260] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0991354000000006, loss: 0.02460087090730667
[270] time live:57, cumulated reward: 4.699999999999996, exploring rate: 0.09910240000000062, loss: 0.04592917859554291
[280] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09906940000000064,

100%|██████████| 63/63 [00:00<00:00, 65.27it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-300.webm 






[310] time live:59, cumulated reward: 4.899999999999995, exploring rate: 0.09897040000000071, loss: 0.008658107370138168
[320] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09893740000000073, loss: 0.03646782785654068
[330] time live:59, cumulated reward: 4.899999999999995, exploring rate: 0.09890440000000075, loss: 0.05088042467832565
[340] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09887140000000078, loss: 0.031370941549539566
[350] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0988384000000008, loss: 0.05718212574720383
[360] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09880540000000082, loss: 0.062284186482429504
[370] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09877240000000084, loss: 0.040150389075279236
[380] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09873940000000087, loss: 0.07102090120315552
[390] time live:61, cumulated reward:

100%|██████████| 63/63 [00:00<00:00, 73.03it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-400.webm 






[410] time live:56, cumulated reward: 4.599999999999996, exploring rate: 0.09864040000000093, loss: 0.05327853932976723
[420] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09860740000000096, loss: 0.058961160480976105
[430] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09857440000000098, loss: 0.09097111225128174
[440] time live:59, cumulated reward: 4.899999999999995, exploring rate: 0.098541400000001, loss: 0.001973884180188179
[450] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09850840000000102, loss: 0.013451168313622475
[460] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09847540000000105, loss: 0.013781456276774406
[470] time live:59, cumulated reward: 4.899999999999995, exploring rate: 0.09844240000000107, loss: 0.0009458832209929824
[480] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09840940000000109, loss: 0.04659253731369972
[490] time live:61, cumulated reward

100%|██████████| 63/63 [00:00<00:00, 70.75it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-500.webm 






[510] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09831040000000116, loss: 0.11737236380577087
[520] time live:59, cumulated reward: 4.899999999999995, exploring rate: 0.09827740000000118, loss: 0.020391302183270454
[530] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0982444000000012, loss: 0.050247617065906525
[540] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09821140000000123, loss: 0.009304661303758621
[550] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09817840000000125, loss: 0.009440362453460693
[560] time live:59, cumulated reward: 4.899999999999995, exploring rate: 0.09814540000000127, loss: 0.06568849086761475
[570] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09811240000000129, loss: 0.05359408259391785
[580] time live:59, cumulated reward: 4.899999999999995, exploring rate: 0.09807940000000132, loss: 0.004737760405987501
[590] time live:61, cumulated reward

100%|██████████| 63/63 [00:00<00:00, 70.19it/s]


[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-600.webm 

[610] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09798040000000138, loss: 0.0060385484248399734
[620] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0979474000000014, loss: 0.0042359717190265656
[630] time live:60, cumulated reward: 4.999999999999995, exploring rate: 0.09791440000000143, loss: 0.0035078609362244606
[640] time live:59, cumulated reward: 4.899999999999995, exploring rate: 0.09788140000000145, loss: 0.0013882608618587255
[650] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09784840000000147, loss: 0.005007256753742695
[660] time live:57, cumulated reward: 4.699999999999996, exploring rate: 0.0978154000000015, loss: 0.0014165451284497976
[670] time live:60, cumulated reward: 4.999999999999995, exploring rate: 0.09778240000000152, loss: 0.0013272694777697325
[680] time live:60, cumulated reward: 4.999999999999995, exploring rate: 0.097749400

 98%|█████████▊| 60/61 [00:01<00:00, 57.26it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-700.webm 






[710] time live:56, cumulated reward: 4.599999999999996, exploring rate: 0.09765040000000161, loss: 0.0010762804886326194
[720] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09761740000000163, loss: 0.0010854005813598633
[730] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09758440000000165, loss: 0.033670809119939804
[740] time live:59, cumulated reward: 4.899999999999995, exploring rate: 0.09755140000000168, loss: 0.0007669334299862385
[750] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0975184000000017, loss: 0.0012507385108619928
[760] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09748540000000172, loss: 0.0008417492499575019
[770] time live:51, cumulated reward: 4.099999999999998, exploring rate: 0.09745240000000174, loss: 0.05343294516205788
[780] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09741940000000177, loss: 0.08116316050291061
[790] time live:61, cumulated 

100%|██████████| 63/63 [00:00<00:00, 70.10it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-800.webm 






[810] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.09732040000000183, loss: 0.0013800251763314009
[820] time live:53, cumulated reward: 4.299999999999997, exploring rate: 0.09728740000000186, loss: 0.02699938416481018
[830] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09725440000000188, loss: 0.02697516605257988
[840] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0972214000000019, loss: 0.004663033410906792
[850] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09718840000000192, loss: 0.004982227459549904
[860] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09715540000000195, loss: 0.0012838027905672789
[870] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09712240000000197, loss: 0.002108505694195628
[880] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09708940000000199, loss: 0.0009936690330505371
[890] time live:65, cumulated re

100%|██████████| 63/63 [00:00<00:00, 69.58it/s]


[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-900.webm 

[910] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09699040000000206, loss: 0.002426174469292164
[920] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09695740000000208, loss: 0.0024552149698138237
[930] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0969244000000021, loss: 0.050123684108257294
[940] time live:60, cumulated reward: 4.999999999999995, exploring rate: 0.09689140000000213, loss: 0.0007083234377205372
[950] time live:56, cumulated reward: 4.599999999999996, exploring rate: 0.09685840000000215, loss: 0.0012117673177272081
[960] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09682540000000217, loss: 0.0501229465007782
[970] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0967924000000022, loss: 0.000857076607644558
[980] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09675940000000

100%|██████████| 63/63 [00:01<00:00, 52.43it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-1000.webm 






[1010] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09666040000000228, loss: 0.02512967586517334
[1020] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0966274000000023, loss: 0.001999444793909788
[1030] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09659440000000233, loss: 0.04306995868682861
[1040] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09656140000000235, loss: 0.0020755878649652004
[1050] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09652840000000237, loss: 0.002088455716148019
[1060] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0964954000000024, loss: 0.0016392110846936703
[1070] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09646240000000242, loss: 0.036924924701452255
[1080] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09642940000000244, loss: 0.0010749584762379527
[1090] time live:61, cumu

100%|██████████| 63/63 [00:00<00:00, 63.70it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-1100.webm 






[1110] time live:54, cumulated reward: 4.399999999999997, exploring rate: 0.09633040000000251, loss: 0.08618385344743729
[1120] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09629740000000253, loss: 0.0017214927356690168
[1130] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09626440000000255, loss: 0.0017154825618490577
[1140] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09623140000000258, loss: 0.0021316809579730034
[1150] time live:111, cumulated reward: 12.099999999999977, exploring rate: 0.0961984000000026, loss: 0.001633125706575811
[1160] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09616540000000262, loss: 0.0022978726774454117
[1170] time live:18, cumulated reward: 0.8000000000000007, exploring rate: 0.09613240000000264, loss: 0.08882252126932144
[1180] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09609940000000267, loss: 0.003317886497825384
[1190] time live:61,

 99%|█████████▊| 67/68 [00:00<00:00, 77.29it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-1200.webm 






[1210] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09600040000000273, loss: 0.001805833075195551
[1220] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09596740000000276, loss: 0.0014281307812780142
[1230] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09593440000000278, loss: 0.0034632361494004726
[1240] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0959014000000028, loss: 0.0531051941215992
[1250] time live:65, cumulated reward: 6.499999999999993, exploring rate: 0.09586840000000282, loss: 0.027272198349237442
[1260] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09583540000000285, loss: 0.002204740419983864
[1270] time live:60, cumulated reward: 4.999999999999995, exploring rate: 0.09580240000000287, loss: 0.17106619477272034
[1280] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09576940000000289, loss: 0.0011121048592031002
[1290] time live:61, cumu

100%|██████████| 63/63 [00:00<00:00, 70.76it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-1300.webm 






[1310] time live:57, cumulated reward: 4.699999999999996, exploring rate: 0.09567040000000296, loss: 0.002574748359620571
[1320] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09563740000000298, loss: 0.005903549958020449
[1330] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.095604400000003, loss: 0.0031137114856392145
[1340] time live:58, cumulated reward: 4.799999999999995, exploring rate: 0.09557140000000303, loss: 0.001948416349478066
[1350] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.09553840000000305, loss: 0.048312827944755554
[1360] time live:58, cumulated reward: 4.799999999999995, exploring rate: 0.09550540000000307, loss: 0.003031478263437748
[1370] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0954724000000031, loss: 0.0010693621588870883
[1380] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09543940000000312, loss: 0.001445447443984449
[1390] time live:61, cumu

 98%|█████████▊| 52/53 [00:00<00:00, 67.43it/s]


[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-1400.webm 

[1410] time live:48, cumulated reward: 3.799999999999999, exploring rate: 0.09534040000000318, loss: 0.02564212679862976
[1420] time live:37, cumulated reward: 2.7000000000000024, exploring rate: 0.0953074000000032, loss: 0.041474319994449615
[1430] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09527440000000323, loss: 0.0017892700852826238
[1440] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09524140000000325, loss: 1.1332772970199585
[1450] time live:59, cumulated reward: 4.899999999999995, exploring rate: 0.09520840000000327, loss: 0.015979373827576637
[1460] time live:58, cumulated reward: 4.799999999999995, exploring rate: 0.0951754000000033, loss: 0.0035461820662021637
[1470] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09514240000000332, loss: 0.23292212188243866
[1480] time live:60, cumulated reward: 4.999999999999995, exploring rate: 0.0951094

100%|██████████| 63/63 [00:00<00:00, 63.29it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-1500.webm 






[1510] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09501040000000341, loss: 0.025551844388246536
[1520] time live:49, cumulated reward: 3.8999999999999986, exploring rate: 0.09497740000000343, loss: 0.0018810246838256717
[1530] time live:60, cumulated reward: 4.999999999999995, exploring rate: 0.09494440000000345, loss: 0.023225119337439537
[1540] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.09491140000000348, loss: 0.0012668652925640345
[1550] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0948784000000035, loss: 0.002172605600208044
[1560] time live:56, cumulated reward: 4.599999999999996, exploring rate: 0.09484540000000352, loss: 0.03217870369553566
[1570] time live:18, cumulated reward: 0.8000000000000007, exploring rate: 0.09481240000000354, loss: 0.0027522961609065533
[1580] time live:37, cumulated reward: 2.7000000000000024, exploring rate: 0.09477940000000357, loss: 1.1821022033691406
[1590] time live:61, c

 98%|█████████▊| 63/64 [00:00<00:00, 76.55it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-1600.webm 






[1610] time live:66, cumulated reward: 6.5999999999999925, exploring rate: 0.09468040000000363, loss: 0.020258048549294472
[1620] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09464740000000366, loss: 0.021194536238908768
[1630] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09461440000000368, loss: 0.045083194971084595
[1640] time live:18, cumulated reward: 0.8000000000000007, exploring rate: 0.0945814000000037, loss: 0.07882282882928848
[1650] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09454840000000372, loss: 0.11645210534334183
[1660] time live:37, cumulated reward: 2.7000000000000024, exploring rate: 0.09451540000000375, loss: 0.04783155769109726
[1670] time live:64, cumulated reward: 5.399999999999993, exploring rate: 0.09448240000000377, loss: 0.004426519386470318
[1680] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09444940000000379, loss: 1.2152388095855713
[1690] time live:61, cumula

 98%|█████████▊| 58/59 [00:00<00:00, 66.58it/s]


[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-1700.webm 

[1710] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09435040000000386, loss: 0.2664649486541748
[1720] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09431740000000388, loss: 1.9866222143173218
[1730] time live:59, cumulated reward: 4.899999999999995, exploring rate: 0.0942844000000039, loss: 0.2742999494075775
[1740] time live:64, cumulated reward: 5.399999999999993, exploring rate: 0.09425140000000393, loss: 0.00928286463022232
[1750] time live:58, cumulated reward: 4.799999999999995, exploring rate: 0.09421840000000395, loss: 0.009445981122553349
[1760] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09418540000000397, loss: 0.004693132825195789
[1770] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.094152400000004, loss: 0.038527749478816986
[1780] time live:73, cumulated reward: 7.29999999999999, exploring rate: 0.094119400000004

100%|██████████| 63/63 [00:00<00:00, 64.04it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-1800.webm 






[1810] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09402040000000408, loss: 0.005310674663633108
[1820] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0939874000000041, loss: 0.006104331463575363
[1830] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09395440000000413, loss: 0.0552656315267086
[1840] time live:59, cumulated reward: 4.899999999999995, exploring rate: 0.09392140000000415, loss: 0.1305675357580185
[1850] time live:55, cumulated reward: 4.4999999999999964, exploring rate: 0.09388840000000417, loss: 0.025874672457575798
[1860] time live:51, cumulated reward: 4.099999999999998, exploring rate: 0.0938554000000042, loss: 0.17119576036930084
[1870] time live:18, cumulated reward: 0.8000000000000007, exploring rate: 0.09382240000000422, loss: 0.004666443448513746
[1880] time live:75, cumulated reward: 7.499999999999989, exploring rate: 0.09378940000000424, loss: 0.019569512456655502
[1890] time live:61, cumulate

 98%|█████████▊| 61/62 [00:00<00:00, 70.89it/s]


[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-1900.webm 

[1910] time live:56, cumulated reward: 4.599999999999996, exploring rate: 0.09369040000000431, loss: 0.0027381074614822865
[1920] time live:18, cumulated reward: 0.8000000000000007, exploring rate: 0.09365740000000433, loss: 0.002213675994426012
[1930] time live:18, cumulated reward: 0.8000000000000007, exploring rate: 0.09362440000000435, loss: 0.04664500430226326
[1940] time live:68, cumulated reward: 6.799999999999992, exploring rate: 0.09359140000000438, loss: 0.003780701430514455
[1950] time live:18, cumulated reward: 0.8000000000000007, exploring rate: 0.0935584000000044, loss: 0.02289312519133091
[1960] time live:56, cumulated reward: 4.599999999999996, exploring rate: 0.09352540000000442, loss: 0.004407089669257402
[1970] time live:55, cumulated reward: 4.4999999999999964, exploring rate: 0.09349240000000444, loss: 0.0043257297948002815
[1980] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0

 98%|█████████▊| 57/58 [00:00<00:00, 74.97it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-2000.webm 






[2010] time live:37, cumulated reward: 2.7000000000000024, exploring rate: 0.09336040000000453, loss: 0.7110528945922852
[2020] time live:18, cumulated reward: 0.8000000000000007, exploring rate: 0.09332740000000456, loss: 0.04011014848947525
[2030] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09329440000000458, loss: 0.008412518538534641
[2040] time live:52, cumulated reward: 4.1999999999999975, exploring rate: 0.0932614000000046, loss: 0.031093621626496315
[2050] time live:60, cumulated reward: 4.999999999999995, exploring rate: 0.09322840000000462, loss: 0.030311698094010353
[2060] time live:55, cumulated reward: 4.4999999999999964, exploring rate: 0.09319540000000465, loss: 0.012492816895246506
[2070] time live:64, cumulated reward: 5.399999999999993, exploring rate: 0.09316240000000467, loss: 0.006442307494580746
[2080] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09312940000000469, loss: 0.037867818027734756
[2090] time live:61, cum

 98%|█████████▊| 61/62 [00:00<00:00, 72.57it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-2100.webm 






[2110] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09303040000000476, loss: 0.007143231574445963
[2120] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09299740000000478, loss: 0.0054861148819327354
[2130] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0929644000000048, loss: 0.002979415701702237
[2140] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09293140000000483, loss: 0.007977191358804703
[2150] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09289840000000485, loss: 0.01616598293185234
[2160] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09286540000000487, loss: 0.0028108737897127867
[2170] time live:67, cumulated reward: 6.699999999999992, exploring rate: 0.0928324000000049, loss: 0.017160385847091675
[2180] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09279940000000492, loss: 0.01355652790516615
[2190] time live:61, cumul

100%|██████████| 63/63 [00:00<00:00, 68.65it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-2200.webm 






[2210] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09270040000000498, loss: 0.006713526789098978
[2220] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.092667400000005, loss: 0.004458841867744923
[2230] time live:63, cumulated reward: 5.299999999999994, exploring rate: 0.09263440000000503, loss: 0.005027439445257187
[2240] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09260140000000505, loss: 0.003256989410147071
[2250] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.09256840000000507, loss: 0.004110274836421013
[2260] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0925354000000051, loss: 0.0037463114131242037
[2270] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09250240000000512, loss: 0.03664258494973183
[2280] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09246940000000514, loss: 0.06500717997550964
[2290] time live:61, cumulat

100%|██████████| 63/63 [00:00<00:00, 67.61it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-2300.webm 






[2310] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09237040000000521, loss: 0.0021107858046889305
[2320] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09233740000000523, loss: 0.9213386178016663
[2330] time live:58, cumulated reward: 4.799999999999995, exploring rate: 0.09230440000000525, loss: 0.45095548033714294
[2340] time live:56, cumulated reward: 4.599999999999996, exploring rate: 0.09227140000000528, loss: 0.45383164286613464
[2350] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0922384000000053, loss: 0.037364616990089417
[2360] time live:59, cumulated reward: 4.899999999999995, exploring rate: 0.09220540000000532, loss: 0.0042259953916072845
[2370] time live:63, cumulated reward: 5.299999999999994, exploring rate: 0.09217240000000534, loss: 0.01567823998630047
[2380] time live:101, cumulated reward: 11.09999999999998, exploring rate: 0.09213940000000537, loss: 0.12114106118679047
[2390] time live:61, cumulat

100%|██████████| 63/63 [00:01<00:00, 60.21it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-2400.webm 






[2410] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09204040000000543, loss: 0.007140251807868481
[2420] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09200740000000546, loss: 0.005786830093711615
[2430] time live:48, cumulated reward: 3.799999999999999, exploring rate: 0.09197440000000548, loss: 0.0075172665528953075
[2440] time live:36, cumulated reward: 2.6000000000000023, exploring rate: 0.0919414000000055, loss: 0.00222259433940053
[2450] time live:60, cumulated reward: 4.999999999999995, exploring rate: 0.09190840000000552, loss: 0.003407346550375223
[2460] time live:18, cumulated reward: 0.8000000000000007, exploring rate: 0.09187540000000555, loss: 0.003374590538442135
[2470] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09184240000000557, loss: 0.012893830426037312
[2480] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09180940000000559, loss: 0.5530239939689636
[2490] time live:61, cumu

100%|██████████| 63/63 [00:01<00:00, 40.90it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-2500.webm 






[2510] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09171040000000566, loss: 0.016583379358053207
[2520] time live:59, cumulated reward: 4.899999999999995, exploring rate: 0.09167740000000568, loss: 0.09714610129594803
[2530] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0916444000000057, loss: 0.19258825480937958
[2540] time live:58, cumulated reward: 4.799999999999995, exploring rate: 0.09161140000000573, loss: 0.08382812142372131
[2550] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09157840000000575, loss: 0.8599003553390503
[2560] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09154540000000577, loss: 0.0068550570867955685
[2570] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0915124000000058, loss: 0.008470901288092136
[2580] time live:70, cumulated reward: 6.999999999999991, exploring rate: 0.09147940000000582, loss: 0.003801086451858282
[2590] time live:86, cumulated

 99%|█████████▊| 72/73 [00:00<00:00, 79.04it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-2600.webm 






[2610] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09138040000000588, loss: 0.005446592811495066
[2620] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09134740000000591, loss: 0.004301534499973059
[2630] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09131440000000593, loss: 0.00407420564442873
[2640] time live:64, cumulated reward: 5.399999999999993, exploring rate: 0.09128140000000595, loss: 0.007388426922261715
[2650] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09124840000000597, loss: 0.003185983980074525
[2660] time live:65, cumulated reward: 6.499999999999993, exploring rate: 0.091215400000006, loss: 0.003949853125959635
[2670] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09118240000000602, loss: 0.014593739993870258
[2680] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09114940000000604, loss: 0.014078985899686813
[2690] time live:77, cumula

100%|██████████| 63/63 [00:00<00:00, 68.41it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-2700.webm 






[2710] time live:76, cumulated reward: 7.599999999999989, exploring rate: 0.09105040000000611, loss: 0.39126086235046387
[2720] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09101740000000613, loss: 0.4913805425167084
[2730] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09098440000000615, loss: 0.017572784796357155
[2740] time live:57, cumulated reward: 4.699999999999996, exploring rate: 0.09095140000000618, loss: 0.006929374299943447
[2750] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0909184000000062, loss: 0.019974108785390854
[2760] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09088540000000622, loss: 0.017040561884641647
[2770] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09085240000000624, loss: 0.07023990154266357
[2780] time live:57, cumulated reward: 4.699999999999996, exploring rate: 0.09081940000000627, loss: 0.31392940878868103
[2790] time live:61, cumulated

100%|██████████| 63/63 [00:00<00:00, 64.97it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-2800.webm 






[2810] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09072040000000633, loss: 0.011448530480265617
[2820] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09068740000000636, loss: 0.018004991114139557
[2830] time live:52, cumulated reward: 4.1999999999999975, exploring rate: 0.09065440000000638, loss: 0.4097287356853485
[2840] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0906214000000064, loss: 0.00806442927569151
[2850] time live:52, cumulated reward: 4.1999999999999975, exploring rate: 0.09058840000000642, loss: 0.005912636406719685
[2860] time live:57, cumulated reward: 4.699999999999996, exploring rate: 0.09055540000000645, loss: 0.003490100847557187
[2870] time live:75, cumulated reward: 7.499999999999989, exploring rate: 0.09052240000000647, loss: 0.006997162476181984
[2880] time live:57, cumulated reward: 4.699999999999996, exploring rate: 0.09048940000000649, loss: 0.10730535537004471
[2890] time live:61, cumula

100%|██████████| 63/63 [00:01<00:00, 60.91it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-2900.webm 






[2910] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09039040000000656, loss: 0.278713583946228
[2920] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.09035740000000658, loss: 0.04276939481496811
[2930] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0903244000000066, loss: 0.09845447540283203
[2940] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09029140000000663, loss: 0.15438216924667358
[2950] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09025840000000665, loss: 0.008407216519117355
[2960] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09022540000000667, loss: 0.017504803836345673
[2970] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0901924000000067, loss: 0.059078775346279144
[2980] time live:73, cumulated reward: 7.29999999999999, exploring rate: 0.09015940000000672, loss: 0.06508985906839371
[2990] time live:61, cumulated rew

 98%|█████████▊| 56/57 [00:00<00:00, 70.44it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-3000.webm 






[3010] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09006040000000679, loss: 0.013236074708402157
[3020] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.09002740000000681, loss: 0.0081595154479146
[3030] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08999440000000683, loss: 0.02208196185529232
[3040] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08996140000000685, loss: 0.005444550421088934
[3050] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08992840000000688, loss: 0.036042481660842896
[3060] time live:64, cumulated reward: 5.399999999999993, exploring rate: 0.0898954000000069, loss: 0.006631731055676937
[3070] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08986240000000692, loss: 0.009674238041043282
[3080] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08982940000000694, loss: 0.005438179709017277
[3090] time live:61, cumulat

100%|██████████| 63/63 [00:00<00:00, 74.27it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-3100.webm 






[3110] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08973040000000701, loss: 0.008475703187286854
[3120] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08969740000000703, loss: 0.2478637397289276
[3130] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08966440000000706, loss: 0.005026283673942089
[3140] time live:57, cumulated reward: 4.699999999999996, exploring rate: 0.08963140000000708, loss: 0.005403583403676748
[3150] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0895984000000071, loss: 0.045784156769514084
[3160] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08956540000000712, loss: 0.03948104381561279
[3170] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.08953240000000715, loss: 0.1402054727077484
[3180] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.08949940000000717, loss: 0.0033717043697834015
[3190] time live:61, cumulate

100%|██████████| 63/63 [00:00<00:00, 66.87it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-3200.webm 






[3210] time live:88, cumulated reward: 8.799999999999985, exploring rate: 0.08940040000000724, loss: 0.07679077982902527
[3220] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08936740000000726, loss: 0.010432001203298569
[3230] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08933440000000728, loss: 0.004672141745686531
[3240] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0893014000000073, loss: 0.004492606036365032
[3250] time live:67, cumulated reward: 6.699999999999992, exploring rate: 0.08926840000000733, loss: 0.04124993085861206
[3260] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08923540000000735, loss: 0.006498155649751425
[3270] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08920240000000737, loss: 0.04976534843444824
[3280] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08916940000000739, loss: 0.004499446600675583
[3290] time live:65, cumulat

100%|██████████| 63/63 [00:00<00:00, 71.25it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-3300.webm 






[3310] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08907040000000746, loss: 0.13223153352737427
[3320] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08903740000000748, loss: 0.0052093882113695145
[3330] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0890044000000075, loss: 0.010941180400550365
[3340] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08897140000000753, loss: 0.02080943062901497
[3350] time live:77, cumulated reward: 7.699999999999989, exploring rate: 0.08893840000000755, loss: 0.017240062355995178
[3360] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08890540000000757, loss: 0.06868241727352142
[3370] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0888724000000076, loss: 0.007507605943828821
[3380] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08883940000000762, loss: 0.016268469393253326
[3390] time live:61, cumulat

 95%|█████████▌| 19/20 [00:00<00:00, 65.19it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-3400.webm 






[3410] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08874040000000769, loss: 0.13309484720230103
[3420] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08870740000000771, loss: 0.01096600666642189
[3430] time live:63, cumulated reward: 5.299999999999994, exploring rate: 0.08867440000000773, loss: 0.020613403990864754
[3440] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08864140000000775, loss: 0.00646460335701704
[3450] time live:66, cumulated reward: 6.5999999999999925, exploring rate: 0.08860840000000778, loss: 0.003935714717954397
[3460] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0885754000000078, loss: 0.004827587399631739
[3470] time live:66, cumulated reward: 6.5999999999999925, exploring rate: 0.08854240000000782, loss: 0.12796810269355774
[3480] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08850940000000784, loss: 0.03125951439142227
[3490] time live:61, cumulat

 99%|█████████▉| 109/110 [00:01<00:00, 66.31it/s]


[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-3500.webm 

[3510] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08841040000000791, loss: 0.12033707648515701
[3520] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08837740000000793, loss: 0.011398077011108398
[3530] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08834440000000796, loss: 0.04326808825135231
[3540] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.08831140000000798, loss: 0.06148567423224449
[3550] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.088278400000008, loss: 0.009974262677133083
[3560] time live:37, cumulated reward: 2.7000000000000024, exploring rate: 0.08824540000000802, loss: 0.005397716071456671
[3570] time live:35, cumulated reward: 2.500000000000002, exploring rate: 0.08821240000000805, loss: 0.008612368255853653
[3580] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08817940

100%|██████████| 63/63 [00:00<00:00, 66.26it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-3600.webm 






[3610] time live:74, cumulated reward: 7.39999999999999, exploring rate: 0.08808040000000814, loss: 0.014664854854345322
[3620] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08804740000000816, loss: 0.0070179784670472145
[3630] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.08801440000000818, loss: 0.00832885317504406
[3640] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0879814000000082, loss: 0.0037327499594539404
[3650] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08794840000000823, loss: 0.011174555867910385
[3660] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08791540000000825, loss: 0.046100467443466187
[3670] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08788240000000827, loss: 0.006955986842513084
[3680] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0878494000000083, loss: 0.014620944857597351
[3690] time live:53, cumul

 95%|█████████▌| 19/20 [00:00<00:00, 76.60it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-3700.webm 






[3710] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08775040000000836, loss: 0.3900722861289978
[3720] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08771740000000838, loss: 0.031187091022729874
[3730] time live:71, cumulated reward: 7.099999999999991, exploring rate: 0.0876844000000084, loss: 0.5602245330810547
[3740] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08765140000000843, loss: 0.08726735413074493
[3750] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08761840000000845, loss: 0.007702855858951807
[3760] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08758540000000847, loss: 0.005603572353720665
[3770] time live:18, cumulated reward: 0.8000000000000007, exploring rate: 0.0875524000000085, loss: 0.009160333313047886
[3780] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08751940000000852, loss: 0.09481169283390045
[3790] time live:61, cumulated 

 98%|█████████▊| 64/65 [00:01<00:00, 65.38it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-3800.webm 






[3810] time live:73, cumulated reward: 7.29999999999999, exploring rate: 0.08742040000000859, loss: 0.44680917263031006
[3820] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08738740000000861, loss: 0.007263518404215574
[3830] time live:18, cumulated reward: 0.8000000000000007, exploring rate: 0.08735440000000863, loss: 0.005802728235721588
[3840] time live:18, cumulated reward: 0.8000000000000007, exploring rate: 0.08732140000000865, loss: 0.031358882784843445
[3850] time live:37, cumulated reward: 2.7000000000000024, exploring rate: 0.08728840000000868, loss: 0.008507923223078251
[3860] time live:54, cumulated reward: 4.399999999999997, exploring rate: 0.0872554000000087, loss: 0.01403726264834404
[3870] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08722240000000872, loss: 0.130059152841568
[3880] time live:18, cumulated reward: 0.8000000000000007, exploring rate: 0.08718940000000874, loss: 0.009339374490082264
[3890] time live:34, cumula

100%|██████████| 63/63 [00:01<00:00, 60.54it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-3900.webm 






[3910] time live:64, cumulated reward: 5.399999999999993, exploring rate: 0.08709040000000881, loss: 0.21150051057338715
[3920] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08705740000000883, loss: 0.030049314722418785
[3930] time live:22, cumulated reward: 1.200000000000001, exploring rate: 0.08702440000000886, loss: 0.007738340646028519
[3940] time live:18, cumulated reward: 0.8000000000000007, exploring rate: 0.08699140000000888, loss: 0.008062474429607391
[3950] time live:18, cumulated reward: 0.8000000000000007, exploring rate: 0.0869584000000089, loss: 0.48981791734695435
[3960] time live:64, cumulated reward: 5.399999999999993, exploring rate: 0.08692540000000892, loss: 0.011877551674842834
[3970] time live:49, cumulated reward: 3.8999999999999986, exploring rate: 0.08689240000000895, loss: 0.07744104415178299
[3980] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08685940000000897, loss: 0.006618344224989414
[3990] time live:61, cumu

100%|██████████| 63/63 [00:01<00:00, 53.92it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-4000.webm 






[4010] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08676040000000904, loss: 0.004530302248895168
[4020] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08672740000000906, loss: 0.04676502197980881
[4030] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08669440000000908, loss: 0.1778091937303543
[4040] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0866614000000091, loss: 0.010465019382536411
[4050] time live:21, cumulated reward: 1.100000000000001, exploring rate: 0.08662840000000913, loss: 0.010627700947225094
[4060] time live:51, cumulated reward: 4.099999999999998, exploring rate: 0.08659540000000915, loss: 0.07613860070705414
[4070] time live:43, cumulated reward: 3.3000000000000007, exploring rate: 0.08656240000000917, loss: 0.043488599359989166
[4080] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0865294000000092, loss: 0.004853305406868458
[4090] time live:37, cumulate

 98%|█████████▊| 63/64 [00:01<00:00, 61.10it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-4100.webm 






[4110] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.08643040000000926, loss: 0.012757831253111362
[4120] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08639740000000928, loss: 0.007806189823895693
[4130] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0863644000000093, loss: 0.16591736674308777
[4140] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08633140000000933, loss: 0.004501934163272381
[4150] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08629840000000935, loss: 0.20351703464984894
[4160] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08626540000000937, loss: 0.28736209869384766
[4170] time live:69, cumulated reward: 6.8999999999999915, exploring rate: 0.0862324000000094, loss: 0.011282579973340034
[4180] time live:62, cumulated reward: 5.199999999999994, exploring rate: 0.08619940000000942, loss: 0.006092175841331482
[4190] time live:68, cumulat

100%|██████████| 63/63 [00:00<00:00, 63.68it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-4200.webm 






[4210] time live:62, cumulated reward: 5.199999999999994, exploring rate: 0.08610040000000949, loss: 0.16555750370025635
[4220] time live:64, cumulated reward: 5.399999999999993, exploring rate: 0.08606740000000951, loss: 0.005174866411834955
[4230] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08603440000000953, loss: 0.24794165790081024
[4240] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08600140000000955, loss: 0.3391963243484497
[4250] time live:74, cumulated reward: 7.39999999999999, exploring rate: 0.08596840000000958, loss: 0.2491958886384964
[4260] time live:62, cumulated reward: 5.199999999999994, exploring rate: 0.0859354000000096, loss: 0.29082080721855164
[4270] time live:64, cumulated reward: 5.399999999999993, exploring rate: 0.08590240000000962, loss: 0.008973021060228348
[4280] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08586940000000964, loss: 0.1656605452299118
[4290] time live:61, cumulated rewa

100%|██████████| 63/63 [00:00<00:00, 72.54it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-4300.webm 






[4310] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08577040000000971, loss: 0.04219109192490578
[4320] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08573740000000973, loss: 0.00782858394086361
[4330] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08570440000000976, loss: 0.00598120829090476
[4340] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08567140000000978, loss: 0.06174859777092934
[4350] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0856384000000098, loss: 0.0025811041705310345
[4360] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08560540000000982, loss: 0.007463871967047453
[4370] time live:18, cumulated reward: 0.8000000000000007, exploring rate: 0.08557240000000985, loss: 0.2572334408760071
[4380] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08553940000000987, loss: 0.34603938460350037
[4390] time live:66, cumulated

100%|██████████| 63/63 [00:00<00:00, 76.49it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-4400.webm 






[4410] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08544040000000994, loss: 0.003413191996514797
[4420] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08540740000000996, loss: 0.009626674465835094
[4430] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08537440000000998, loss: 0.010554601438343525
[4440] time live:65, cumulated reward: 6.499999999999993, exploring rate: 0.08534140000001, loss: 0.010374734178185463
[4450] time live:66, cumulated reward: 6.5999999999999925, exploring rate: 0.08530840000001003, loss: 0.04169154167175293
[4460] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08527540000001005, loss: 0.013720696792006493
[4470] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08524240000001007, loss: 0.007871199399232864
[4480] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0852094000000101, loss: 0.004138639196753502
[4490] time live:61, cumulat

100%|██████████| 63/63 [00:00<00:00, 65.51it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-4500.webm 






[4510] time live:64, cumulated reward: 5.399999999999993, exploring rate: 0.08511040000001016, loss: 0.009766394272446632
[4520] time live:70, cumulated reward: 6.999999999999991, exploring rate: 0.08507740000001018, loss: 0.03428572416305542
[4530] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0850444000000102, loss: 0.4582440257072449
[4540] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08501140000001023, loss: 0.016725454479455948
[4550] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08497840000001025, loss: 0.012588388286530972
[4560] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08494540000001027, loss: 0.005353870335966349
[4570] time live:18, cumulated reward: 0.8000000000000007, exploring rate: 0.0849124000000103, loss: 0.014846880920231342
[4580] time live:76, cumulated reward: 7.599999999999989, exploring rate: 0.08487940000001032, loss: 0.09902308881282806
[4590] time live:61, cumulate

100%|██████████| 63/63 [00:00<00:00, 67.23it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-4600.webm 






[4610] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08478040000001039, loss: 0.008943076245486736
[4620] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08474740000001041, loss: 0.13821405172348022
[4630] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08471440000001043, loss: 0.009278953075408936
[4640] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08468140000001045, loss: 0.006332363002002239
[4650] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08464840000001048, loss: 0.04447251185774803
[4660] time live:68, cumulated reward: 6.799999999999992, exploring rate: 0.0846154000000105, loss: 0.00967847928404808
[4670] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08458240000001052, loss: 0.2812170088291168
[4680] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08454940000001054, loss: 0.011831045150756836
[4690] time live:74, cumulated

 99%|█████████▊| 74/75 [00:01<00:00, 69.80it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-4700.webm 






[4710] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08445040000001061, loss: 0.010501530021429062
[4720] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08441740000001063, loss: 0.038022685796022415
[4730] time live:57, cumulated reward: 4.699999999999996, exploring rate: 0.08438440000001066, loss: 0.017219940200448036
[4740] time live:62, cumulated reward: 5.199999999999994, exploring rate: 0.08435140000001068, loss: 0.01370670460164547
[4750] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0843184000000107, loss: 0.014730687253177166
[4760] time live:72, cumulated reward: 7.19999999999999, exploring rate: 0.08428540000001072, loss: 0.4713165760040283
[4770] time live:62, cumulated reward: 5.199999999999994, exploring rate: 0.08425240000001075, loss: 0.012436110526323318
[4780] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08421940000001077, loss: 0.01537057850509882
[4790] time live:71, cumulated

100%|██████████| 63/63 [00:00<00:00, 66.70it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-4800.webm 






[4810] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.08412040000001084, loss: 0.12338225543498993
[4820] time live:55, cumulated reward: 4.4999999999999964, exploring rate: 0.08408740000001086, loss: 0.029508918523788452
[4830] time live:70, cumulated reward: 6.999999999999991, exploring rate: 0.08405440000001088, loss: 0.010977599769830704
[4840] time live:74, cumulated reward: 7.39999999999999, exploring rate: 0.0840214000000109, loss: 0.007962498813867569
[4850] time live:62, cumulated reward: 5.199999999999994, exploring rate: 0.08398840000001093, loss: 0.04168594628572464
[4860] time live:67, cumulated reward: 6.699999999999992, exploring rate: 0.08395540000001095, loss: 0.04220516234636307
[4870] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08392240000001097, loss: 0.03511485084891319
[4880] time live:66, cumulated reward: 6.5999999999999925, exploring rate: 0.083889400000011, loss: 0.014064842835068703
[4890] time live:61, cumulated

 98%|█████████▊| 52/53 [00:00<00:00, 62.07it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-4900.webm 






[4910] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08379040000001106, loss: 0.014912234619259834
[4920] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08375740000001108, loss: 0.3517816364765167
[4930] time live:68, cumulated reward: 6.799999999999992, exploring rate: 0.0837244000000111, loss: 0.009564308449625969
[4940] time live:74, cumulated reward: 7.39999999999999, exploring rate: 0.08369140000001113, loss: 0.005392719060182571
[4950] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08365840000001115, loss: 0.013020388782024384
[4960] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08362540000001117, loss: 0.005997408181428909
[4970] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0835924000000112, loss: 0.004247071221470833
[4980] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08355940000001122, loss: 0.007583959493786097
[4990] time live:18, cumulate

 98%|█████████▊| 64/65 [00:00<00:00, 76.24it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-5000.webm 






[5010] time live:77, cumulated reward: 7.699999999999989, exploring rate: 0.08346040000001129, loss: 0.17579476535320282
[5020] time live:67, cumulated reward: 6.699999999999992, exploring rate: 0.08342740000001131, loss: 0.02359943464398384
[5030] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08339440000001133, loss: 0.15495671331882477
[5040] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08336140000001135, loss: 0.02047584019601345
[5050] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08332840000001138, loss: 0.03839592635631561
[5060] time live:62, cumulated reward: 5.199999999999994, exploring rate: 0.0832954000000114, loss: 0.12585391104221344
[5070] time live:74, cumulated reward: 7.39999999999999, exploring rate: 0.08326240000001142, loss: 0.012309371493756771
[5080] time live:70, cumulated reward: 6.999999999999991, exploring rate: 0.08322940000001144, loss: 0.1471852958202362
[5090] time live:61, cumulated rew

 99%|█████████▊| 66/67 [00:01<00:00, 63.99it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-5100.webm 






[5110] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08313040000001151, loss: 0.017502065747976303
[5120] time live:77, cumulated reward: 7.699999999999989, exploring rate: 0.08309740000001153, loss: 0.021328389644622803
[5130] time live:70, cumulated reward: 6.999999999999991, exploring rate: 0.08306440000001156, loss: 0.017885031178593636
[5140] time live:72, cumulated reward: 7.19999999999999, exploring rate: 0.08303140000001158, loss: 0.022240594029426575
[5150] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0829984000000116, loss: 0.08166192471981049
[5160] time live:74, cumulated reward: 7.39999999999999, exploring rate: 0.08296540000001162, loss: 0.13267040252685547
[5170] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.08293240000001165, loss: 0.011911056935787201
[5180] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08289940000001167, loss: 0.03378354385495186
[5190] time live:67, cumulated

100%|██████████| 63/63 [00:00<00:00, 69.70it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-5200.webm 






[5210] time live:18, cumulated reward: 0.8000000000000007, exploring rate: 0.08280040000001174, loss: 0.013962039723992348
[5220] time live:46, cumulated reward: 3.5999999999999996, exploring rate: 0.08276740000001176, loss: 0.09153427928686142
[5230] time live:62, cumulated reward: 5.199999999999994, exploring rate: 0.08273440000001178, loss: 0.007215338759124279
[5240] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0827014000000118, loss: 0.012779430486261845
[5250] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08266840000001183, loss: 0.08388659358024597
[5260] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08263540000001185, loss: 0.017174873501062393
[5270] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08260240000001187, loss: 0.007362536154687405
[5280] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.0825694000000119, loss: 0.013684454374015331
[5290] time live:97, cumul

 98%|█████████▊| 64/65 [00:00<00:00, 68.90it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-5300.webm 






[5310] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08247040000001196, loss: 0.13340964913368225
[5320] time live:18, cumulated reward: 0.8000000000000007, exploring rate: 0.08243740000001198, loss: 0.017163822427392006
[5330] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.082404400000012, loss: 0.016400201246142387
[5340] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08237140000001203, loss: 0.08126602321863174
[5350] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08233840000001205, loss: 0.010914766229689121
[5360] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08230540000001207, loss: 0.011656174436211586
[5370] time live:68, cumulated reward: 6.799999999999992, exploring rate: 0.0822724000000121, loss: 0.01657206565141678
[5380] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08223940000001212, loss: 0.01315462775528431
[5390] time live:61, cumulated

100%|██████████| 63/63 [00:01<00:00, 54.14it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-5400.webm 






[5410] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08214040000001219, loss: 0.06732414662837982
[5420] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08210740000001221, loss: 0.009905136190354824
[5430] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08207440000001223, loss: 0.09291531890630722
[5440] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08204140000001225, loss: 0.01748034358024597
[5450] time live:102, cumulated reward: 11.19999999999998, exploring rate: 0.08200840000001228, loss: 0.08220458775758743
[5460] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0819754000000123, loss: 0.15124331414699554
[5470] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08194240000001232, loss: 0.010278427973389626
[5480] time live:74, cumulated reward: 7.39999999999999, exploring rate: 0.08190940000001234, loss: 0.061775483191013336
[5490] time live:73, cumulated

100%|██████████| 63/63 [00:01<00:00, 51.13it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-5500.webm 






[5510] time live:66, cumulated reward: 6.5999999999999925, exploring rate: 0.08181040000001241, loss: 0.011825134977698326
[5520] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08177740000001243, loss: 0.00682845851406455
[5530] time live:71, cumulated reward: 7.099999999999991, exploring rate: 0.08174440000001246, loss: 0.007269238121807575
[5540] time live:74, cumulated reward: 7.39999999999999, exploring rate: 0.08171140000001248, loss: 0.18614962697029114
[5550] time live:70, cumulated reward: 6.999999999999991, exploring rate: 0.0816784000000125, loss: 0.01076430082321167
[5560] time live:62, cumulated reward: 5.199999999999994, exploring rate: 0.08164540000001252, loss: 0.08546675741672516
[5570] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08161240000001255, loss: 0.03317797929048538
[5580] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08157940000001257, loss: 0.11876687407493591
[5590] time live:69, cumulated 

100%|██████████| 63/63 [00:01<00:00, 57.51it/s]


[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-5600.webm 

[5610] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08148040000001264, loss: 0.014332943595945835
[5620] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08144740000001266, loss: 0.03593706712126732
[5630] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08141440000001268, loss: 0.021791940554976463
[5640] time live:62, cumulated reward: 5.199999999999994, exploring rate: 0.0813814000000127, loss: 0.12691421806812286
[5650] time live:66, cumulated reward: 6.5999999999999925, exploring rate: 0.08134840000001273, loss: 0.00908590853214264
[5660] time live:76, cumulated reward: 7.599999999999989, exploring rate: 0.08131540000001275, loss: 0.011335060000419617
[5670] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08128240000001277, loss: 0.01575533114373684
[5680] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08124940

100%|██████████| 63/63 [00:01<00:00, 48.23it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-5700.webm 






[5710] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08115040000001286, loss: 0.1094454750418663
[5720] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08111740000001288, loss: 0.039404191076755524
[5730] time live:68, cumulated reward: 6.799999999999992, exploring rate: 0.08108440000001291, loss: 0.011096651665866375
[5740] time live:71, cumulated reward: 7.099999999999991, exploring rate: 0.08105140000001293, loss: 0.04305693879723549
[5750] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08101840000001295, loss: 0.007230085786432028
[5760] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08098540000001297, loss: 0.04990753158926964
[5770] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.080952400000013, loss: 0.007244484964758158
[5780] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08091940000001302, loss: 0.015134826302528381
[5790] time live:61, cumulated

100%|██████████| 63/63 [00:01<00:00, 58.47it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-5800.webm 






[5810] time live:72, cumulated reward: 7.19999999999999, exploring rate: 0.08082040000001309, loss: 0.11024849116802216
[5820] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08078740000001311, loss: 0.007298966869711876
[5830] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08075440000001313, loss: 0.03304458409547806
[5840] time live:69, cumulated reward: 6.8999999999999915, exploring rate: 0.08072140000001315, loss: 0.013547481968998909
[5850] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08068840000001318, loss: 0.004779560025781393
[5860] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0806554000000132, loss: 0.02674712985754013
[5870] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08062240000001322, loss: 0.014839885756373405
[5880] time live:99, cumulated reward: 9.89999999999998, exploring rate: 0.08058940000001324, loss: 0.012413914315402508
[5890] time live:61, cumulate

100%|██████████| 78/78 [00:01<00:00, 73.89it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-5900.webm 






[5910] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08049040000001331, loss: 0.06556817144155502
[5920] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08045740000001333, loss: 0.06858502328395844
[5930] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08042440000001336, loss: 0.0059754494577646255
[5940] time live:51, cumulated reward: 4.099999999999998, exploring rate: 0.08039140000001338, loss: 0.007671535015106201
[5950] time live:63, cumulated reward: 5.299999999999994, exploring rate: 0.0803584000000134, loss: 0.10344401001930237
[5960] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08032540000001342, loss: 0.011510614305734634
[5970] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08029240000001345, loss: 0.007894005626440048
[5980] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08025940000001347, loss: 0.03420772776007652
[5990] time live:61, cumulat

100%|██████████| 63/63 [00:01<00:00, 38.54it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-6000.webm 






[6010] time live:56, cumulated reward: 4.599999999999996, exploring rate: 0.08016040000001354, loss: 0.016593798995018005
[6020] time live:67, cumulated reward: 6.699999999999992, exploring rate: 0.08012740000001356, loss: 0.031332168728113174
[6030] time live:69, cumulated reward: 6.8999999999999915, exploring rate: 0.08009440000001358, loss: 0.014269430190324783
[6040] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0800614000000136, loss: 0.03743857890367508
[6050] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.08002840000001363, loss: 0.05239371955394745
[6060] time live:69, cumulated reward: 6.8999999999999915, exploring rate: 0.07999540000001365, loss: 0.04044633358716965
[6070] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07996240000001367, loss: 0.004067329689860344
[6080] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0799294000000137, loss: 0.009044967591762543
[6090] time live:61, cumula

100%|██████████| 63/63 [00:00<00:00, 66.83it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-6100.webm 






[6110] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07983040000001376, loss: 0.037785377353429794
[6120] time live:64, cumulated reward: 5.399999999999993, exploring rate: 0.07979740000001379, loss: 0.02956838347017765
[6130] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07976440000001381, loss: 0.053913746029138565
[6140] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.07973140000001383, loss: 0.01089697889983654
[6150] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07969840000001385, loss: 0.014578137546777725
[6160] time live:56, cumulated reward: 4.599999999999996, exploring rate: 0.07966540000001388, loss: 0.008862799033522606
[6170] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0796324000000139, loss: 0.040405403822660446
[6180] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07959940000001392, loss: 0.012326684780418873
[6190] time live:61, cumula

100%|██████████| 63/63 [00:00<00:00, 77.81it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-6200.webm 






[6210] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07950040000001399, loss: 0.00891096144914627
[6220] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07946740000001401, loss: 0.009449145756661892
[6230] time live:58, cumulated reward: 4.799999999999995, exploring rate: 0.07943440000001403, loss: 0.008545736782252789
[6240] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07940140000001406, loss: 0.007277614437043667
[6250] time live:67, cumulated reward: 6.699999999999992, exploring rate: 0.07936840000001408, loss: 0.011105186305940151
[6260] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0793354000000141, loss: 0.010322405956685543
[6270] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07930240000001412, loss: 0.05230981484055519
[6280] time live:76, cumulated reward: 7.599999999999989, exploring rate: 0.07926940000001415, loss: 0.011476265266537666
[6290] time live:68, cumula

100%|██████████| 63/63 [00:00<00:00, 64.06it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-6300.webm 






[6310] time live:62, cumulated reward: 5.199999999999994, exploring rate: 0.07917040000001421, loss: 0.09000912308692932
[6320] time live:68, cumulated reward: 6.799999999999992, exploring rate: 0.07913740000001424, loss: 0.00986158661544323
[6330] time live:45, cumulated reward: 3.5, exploring rate: 0.07910440000001426, loss: 0.009436121210455894
[6340] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07907140000001428, loss: 0.010666903108358383
[6350] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0790384000000143, loss: 0.032350458204746246
[6360] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07900540000001433, loss: 0.008401969447731972
[6370] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07897240000001435, loss: 0.017974600195884705
[6380] time live:64, cumulated reward: 5.399999999999993, exploring rate: 0.07893940000001437, loss: 0.007857273332774639
[6390] time live:63, cumulated reward: 5.

100%|██████████| 63/63 [00:00<00:00, 64.37it/s]

[MoviePy] Done.





[MoviePy] >>>> Video ready: movie/DQN-6400.webm 

[6410] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07884040000001444, loss: 0.02145984396338463
[6420] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07880740000001446, loss: 0.015088379383087158
[6430] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07877440000001448, loss: 0.03647402673959732
[6440] time live:67, cumulated reward: 6.699999999999992, exploring rate: 0.0787414000000145, loss: 0.008040469139814377
[6450] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.07870840000001453, loss: 0.014721203595399857
[6460] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07867540000001455, loss: 0.02867760881781578
[6470] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07864240000001457, loss: 0.02967488020658493
[6480] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0786094000000146, loss: 0

 99%|█████████▉| 98/99 [00:01<00:00, 79.85it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-6500.webm 






[6510] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07851040000001466, loss: 0.0086862463504076
[6520] time live:55, cumulated reward: 4.4999999999999964, exploring rate: 0.07847740000001469, loss: 0.012336427345871925
[6530] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07844440000001471, loss: 0.021363938227295876
[6540] time live:70, cumulated reward: 6.999999999999991, exploring rate: 0.07841140000001473, loss: 0.039042480289936066
[6550] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07837840000001475, loss: 0.005896736402064562
[6560] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07834540000001478, loss: 0.0055594476871192455
[6570] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0783124000000148, loss: 0.008867344819009304
[6580] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07827940000001482, loss: 0.009011175483465195
[6590] time live:97, cumu

 99%|█████████▊| 67/68 [00:01<00:00, 59.86it/s]


[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-6600.webm 

[6610] time live:66, cumulated reward: 6.5999999999999925, exploring rate: 0.07818040000001489, loss: 0.008012782782316208
[6620] time live:70, cumulated reward: 6.999999999999991, exploring rate: 0.07814740000001491, loss: 0.0033288344275206327
[6630] time live:64, cumulated reward: 5.399999999999993, exploring rate: 0.07811440000001493, loss: 0.03034854866564274
[6640] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07808140000001496, loss: 0.03259248286485672
[6650] time live:65, cumulated reward: 6.499999999999993, exploring rate: 0.07804840000001498, loss: 0.01689450442790985
[6660] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.078015400000015, loss: 0.006675173994153738
[6670] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07798240000001502, loss: 0.004279723856598139
[6680] time live:77, cumulated reward: 7.699999999999989, exploring rate: 0.0779494

100%|██████████| 63/63 [00:01<00:00, 56.67it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-6700.webm 






[6710] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07785040000001511, loss: 0.007806858979165554
[6720] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07781740000001514, loss: 0.005473836325109005
[6730] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07778440000001516, loss: 0.007684511132538319
[6740] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07775140000001518, loss: 0.04297574236989021
[6750] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0777184000000152, loss: 0.017991239205002785
[6760] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07768540000001523, loss: 0.010212069377303123
[6770] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07765240000001525, loss: 0.019872400909662247
[6780] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07761940000001527, loss: 0.005735625978559256
[6790] time live:69, cumul

 99%|█████████▊| 72/73 [00:00<00:00, 85.08it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-6800.webm 






[6810] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07752040000001534, loss: 0.06645350158214569
[6820] time live:56, cumulated reward: 4.599999999999996, exploring rate: 0.07748740000001536, loss: 0.0988842099905014
[6830] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07745440000001538, loss: 0.01837906613945961
[6840] time live:51, cumulated reward: 4.099999999999998, exploring rate: 0.0774214000000154, loss: 0.03910597041249275
[6850] time live:53, cumulated reward: 4.299999999999997, exploring rate: 0.07738840000001543, loss: 0.0344543419778347
[6860] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07735540000001545, loss: 0.004346857313066721
[6870] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07732240000001547, loss: 0.0033964512404054403
[6880] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0772894000000155, loss: 0.012271607294678688
[6890] time live:61, cumulated r

 98%|█████████▊| 54/55 [00:01<00:00, 51.51it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-6900.webm 






[6910] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.07719040000001556, loss: 0.006403517909348011
[6920] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07715740000001559, loss: 0.005052079446613789
[6930] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07712440000001561, loss: 0.007339997682720423
[6940] time live:66, cumulated reward: 6.5999999999999925, exploring rate: 0.07709140000001563, loss: 0.0037216851487755775
[6950] time live:54, cumulated reward: 4.399999999999997, exploring rate: 0.07705840000001565, loss: 0.002539898967370391
[6960] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07702540000001568, loss: 0.03553811088204384
[6970] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0769924000000157, loss: 0.05251501128077507
[6980] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07695940000001572, loss: 0.010476657189428806
[6990] time live:61, cumu

100%|██████████| 63/63 [00:00<00:00, 76.75it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-7000.webm 






[7010] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07686040000001579, loss: 0.045771874487400055
[7020] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07682740000001581, loss: 0.005546878091990948
[7030] time live:66, cumulated reward: 6.5999999999999925, exploring rate: 0.07679440000001583, loss: 0.047485221177339554
[7040] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07676140000001586, loss: 0.006481090560555458
[7050] time live:48, cumulated reward: 3.799999999999999, exploring rate: 0.07672840000001588, loss: 0.02632024884223938
[7060] time live:65, cumulated reward: 6.499999999999993, exploring rate: 0.0766954000000159, loss: 0.0034395852126181126
[7070] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.07666240000001592, loss: 0.04773338511586189
[7080] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07662940000001595, loss: 0.020498527213931084
[7090] time live:61, cumu

100%|██████████| 63/63 [00:00<00:00, 72.38it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-7100.webm 






[7110] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07653040000001601, loss: 0.03588620200753212
[7120] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.07649740000001604, loss: 0.0044178650714457035
[7130] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07646440000001606, loss: 0.004723767749965191
[7140] time live:54, cumulated reward: 4.399999999999997, exploring rate: 0.07643140000001608, loss: 0.037023309618234634
[7150] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0763984000000161, loss: 0.002723544603213668
[7160] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07636540000001613, loss: 0.004342149011790752
[7170] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07633240000001615, loss: 0.028907114639878273
[7180] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07629940000001617, loss: 0.01695164106786251
[7190] time live:61, cumul

100%|██████████| 63/63 [00:00<00:00, 65.12it/s]


[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-7200.webm 

[7210] time live:44, cumulated reward: 3.4000000000000004, exploring rate: 0.07620040000001624, loss: 0.012044250965118408
[7220] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07616740000001626, loss: 0.005377080291509628
[7230] time live:54, cumulated reward: 4.399999999999997, exploring rate: 0.07613440000001628, loss: 0.04285407066345215
[7240] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0761014000000163, loss: 0.021752461791038513
[7250] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07606840000001633, loss: 0.03940065950155258
[7260] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07603540000001635, loss: 0.014158057980239391
[7270] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07600240000001637, loss: 0.019707627594470978
[7280] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.075969

100%|██████████| 63/63 [00:00<00:00, 73.96it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-7300.webm 






[7310] time live:74, cumulated reward: 7.39999999999999, exploring rate: 0.07587040000001646, loss: 0.005781193263828754
[7320] time live:43, cumulated reward: 3.3000000000000007, exploring rate: 0.07583740000001649, loss: 0.03674866259098053
[7330] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07580440000001651, loss: 0.0018343966221436858
[7340] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07577140000001653, loss: 0.0051008956506848335
[7350] time live:54, cumulated reward: 4.399999999999997, exploring rate: 0.07573840000001655, loss: 0.018754806369543076
[7360] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07570540000001658, loss: 0.0064783766865730286
[7370] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0756724000000166, loss: 0.03619726374745369
[7380] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07563940000001662, loss: 0.0054800305515527725
[7390] time live:61, cu

100%|██████████| 63/63 [00:00<00:00, 72.10it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-7400.webm 






[7410] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07554040000001669, loss: 0.0034590866416692734
[7420] time live:98, cumulated reward: 9.799999999999981, exploring rate: 0.07550740000001671, loss: 0.004824596922844648
[7430] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07547440000001673, loss: 0.005855954717844725
[7440] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07544140000001676, loss: 0.017969558015465736
[7450] time live:54, cumulated reward: 4.399999999999997, exploring rate: 0.07540840000001678, loss: 0.008024071343243122
[7460] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0753754000000168, loss: 0.05221351608633995
[7470] time live:62, cumulated reward: 5.199999999999994, exploring rate: 0.07534240000001682, loss: 0.027784207835793495
[7480] time live:54, cumulated reward: 4.399999999999997, exploring rate: 0.07530940000001685, loss: 0.017002670094370842
[7490] time live:63, cumu

100%|██████████| 63/63 [00:00<00:00, 67.32it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-7500.webm 






[7510] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07521040000001691, loss: 0.005316229071468115
[7520] time live:64, cumulated reward: 5.399999999999993, exploring rate: 0.07517740000001694, loss: 0.03939889743924141
[7530] time live:93, cumulated reward: 9.299999999999983, exploring rate: 0.07514440000001696, loss: 0.034408263862133026
[7540] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07511140000001698, loss: 0.004535309970378876
[7550] time live:69, cumulated reward: 6.8999999999999915, exploring rate: 0.075078400000017, loss: 0.0065847765654325485
[7560] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07504540000001703, loss: 0.003585131373256445
[7570] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07501240000001705, loss: 0.002539405133575201
[7580] time live:42, cumulated reward: 3.200000000000001, exploring rate: 0.07497940000001707, loss: 0.04660942032933235
[7590] time live:61, cumul

100%|██████████| 63/63 [00:00<00:00, 71.13it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-7600.webm 






[7610] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07488040000001714, loss: 0.0432233028113842
[7620] time live:51, cumulated reward: 4.099999999999998, exploring rate: 0.07484740000001716, loss: 0.019267642870545387
[7630] time live:52, cumulated reward: 4.1999999999999975, exploring rate: 0.07481440000001718, loss: 0.004128113389015198
[7640] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0747814000000172, loss: 0.014889046549797058
[7650] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07474840000001723, loss: 0.004232232458889484
[7660] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07471540000001725, loss: 0.003423606511205435
[7670] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07468240000001727, loss: 0.0362045057117939
[7680] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0746494000000173, loss: 0.01753908582031727
[7690] time live:61, cumulated

 98%|█████████▊| 61/62 [00:00<00:00, 65.67it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-7700.webm 






[7710] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07455040000001736, loss: 0.014317762106657028
[7720] time live:68, cumulated reward: 6.799999999999992, exploring rate: 0.07451740000001739, loss: 0.0033882628194987774
[7730] time live:49, cumulated reward: 3.8999999999999986, exploring rate: 0.07448440000001741, loss: 0.019718194380402565
[7740] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07445140000001743, loss: 0.05486863851547241
[7750] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07441840000001745, loss: 0.04677949100732803
[7760] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07438540000001748, loss: 0.009592393413186073
[7770] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.0743524000000175, loss: 0.019281573593616486
[7780] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07431940000001752, loss: 0.004378315526992083
[7790] time live:61, cumu

100%|██████████| 63/63 [00:00<00:00, 72.75it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-7800.webm 






[7810] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07422040000001759, loss: 0.013888922519981861
[7820] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07418740000001761, loss: 0.00325645855627954
[7830] time live:51, cumulated reward: 4.099999999999998, exploring rate: 0.07415440000001763, loss: 0.014219846576452255
[7840] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07412140000001766, loss: 0.00906328298151493
[7850] time live:46, cumulated reward: 3.5999999999999996, exploring rate: 0.07408840000001768, loss: 0.043920788913965225
[7860] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0740554000000177, loss: 0.008638450875878334
[7870] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07402240000001772, loss: 0.004310877062380314
[7880] time live:52, cumulated reward: 4.1999999999999975, exploring rate: 0.07398940000001775, loss: 0.004684843588620424
[7890] time live:61, cumu

100%|██████████| 63/63 [00:00<00:00, 71.98it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-7900.webm 






[7910] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07389040000001781, loss: 0.005523725412786007
[7920] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07385740000001784, loss: 0.026878807693719864
[7930] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07382440000001786, loss: 0.014185229316353798
[7940] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07379140000001788, loss: 0.03355582803487778
[7950] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0737584000000179, loss: 0.016028225421905518
[7960] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07372540000001793, loss: 0.004046372137963772
[7970] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07369240000001795, loss: 0.027585865929722786
[7980] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07365940000001797, loss: 0.013547198846936226
[7990] time live:61, cumul

 98%|█████████▊| 65/66 [00:00<00:00, 80.21it/s]


[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-8000.webm 

[8010] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07356040000001804, loss: 0.0018050774233415723
[8020] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07352740000001806, loss: 0.036280203610658646
[8030] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07349440000001808, loss: 0.02205723524093628
[8040] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0734614000000181, loss: 0.0038323472253978252
[8050] time live:67, cumulated reward: 6.699999999999992, exploring rate: 0.07342840000001813, loss: 0.00547612551599741
[8060] time live:50, cumulated reward: 3.9999999999999982, exploring rate: 0.07339540000001815, loss: 0.0033045548480004072
[8070] time live:64, cumulated reward: 5.399999999999993, exploring rate: 0.07336240000001817, loss: 0.0329398512840271
[8080] time live:64, cumulated reward: 5.399999999999993, exploring rate: 0.07332

100%|██████████| 63/63 [00:00<00:00, 71.59it/s]

[MoviePy] Done.





[MoviePy] >>>> Video ready: movie/DQN-8100.webm 

[8110] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07323040000001826, loss: 0.0028308569453656673
[8120] time live:64, cumulated reward: 5.399999999999993, exploring rate: 0.07319740000001829, loss: 0.002340547274798155
[8130] time live:64, cumulated reward: 5.399999999999993, exploring rate: 0.07316440000001831, loss: 0.030900543555617332
[8140] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07313140000001833, loss: 0.11389587819576263
[8150] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07309840000001835, loss: 0.010670887306332588
[8160] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07306540000001838, loss: 0.012359376065433025
[8170] time live:72, cumulated reward: 7.19999999999999, exploring rate: 0.0730324000000184, loss: 0.003010805696249008
[8180] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07299940000001842, los

100%|██████████| 63/63 [00:00<00:00, 73.11it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-8200.webm 






[8210] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07290040000001849, loss: 0.004548267461359501
[8220] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07286740000001851, loss: 0.043757423758506775
[8230] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07283440000001853, loss: 0.016407756134867668
[8240] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07280140000001856, loss: 0.023710457608103752
[8250] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.07276840000001858, loss: 0.0020178775303065777
[8260] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0727354000000186, loss: 0.002366000320762396
[8270] time live:112, cumulated reward: 12.199999999999976, exploring rate: 0.07270240000001862, loss: 0.019043272361159325
[8280] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07266940000001865, loss: 0.004332693759351969
[8290] time live:63, c

100%|██████████| 63/63 [00:00<00:00, 71.11it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-8300.webm 






[8310] time live:67, cumulated reward: 6.699999999999992, exploring rate: 0.07257040000001871, loss: 0.0035236836411058903
[8320] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07253740000001874, loss: 0.006720329634845257
[8330] time live:58, cumulated reward: 4.799999999999995, exploring rate: 0.07250440000001876, loss: 0.012647273018956184
[8340] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07247140000001878, loss: 0.07561424374580383
[8350] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0724384000000188, loss: 0.016389671713113785
[8360] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07240540000001883, loss: 0.0038575506769120693
[8370] time live:75, cumulated reward: 7.499999999999989, exploring rate: 0.07237240000001885, loss: 0.025778379291296005
[8380] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07233940000001887, loss: 0.041472822427749634
[8390] time live:61, cum

100%|██████████| 63/63 [00:01<00:00, 40.67it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-8400.webm 






[8410] time live:65, cumulated reward: 6.499999999999993, exploring rate: 0.07224040000001894, loss: 0.023976735770702362
[8420] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.07220740000001896, loss: 0.027334850281476974
[8430] time live:73, cumulated reward: 7.29999999999999, exploring rate: 0.07217440000001898, loss: 0.044850222766399384
[8440] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07214140000001901, loss: 0.005737926810979843
[8450] time live:70, cumulated reward: 6.999999999999991, exploring rate: 0.07210840000001903, loss: 0.038953494280576706
[8460] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.07207540000001905, loss: 0.005267012864351273
[8470] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07204240000001907, loss: 0.006662076339125633
[8480] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.0720094000000191, loss: 0.06248920410871506
[8490] time live:61, cumula

100%|██████████| 63/63 [00:00<00:00, 68.09it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-8500.webm 






[8510] time live:65, cumulated reward: 6.499999999999993, exploring rate: 0.07191040000001916, loss: 0.0034096611198037863
[8520] time live:75, cumulated reward: 7.499999999999989, exploring rate: 0.07187740000001919, loss: 0.004514451138675213
[8530] time live:76, cumulated reward: 7.599999999999989, exploring rate: 0.07184440000001921, loss: 0.06112400069832802
[8540] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07181140000001923, loss: 0.0022353518288582563
[8550] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.07177840000001925, loss: 0.0021812766790390015
[8560] time live:73, cumulated reward: 7.29999999999999, exploring rate: 0.07174540000001928, loss: 0.002724181395024061
[8570] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0717124000000193, loss: 0.014576511457562447
[8580] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07167940000001932, loss: 0.05741896107792854
[8590] time live:73, cumu

100%|██████████| 63/63 [00:00<00:00, 69.90it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-8600.webm 






[8610] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07158040000001939, loss: 0.023194728419184685
[8620] time live:104, cumulated reward: 11.399999999999979, exploring rate: 0.07154740000001941, loss: 0.031290676444768906
[8630] time live:72, cumulated reward: 7.19999999999999, exploring rate: 0.07151440000001943, loss: 0.009648837149143219
[8640] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07148140000001946, loss: 0.003886542981490493
[8650] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07144840000001948, loss: 0.00455380417406559
[8660] time live:63, cumulated reward: 5.299999999999994, exploring rate: 0.0714154000000195, loss: 0.018364418298006058
[8670] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07138240000001952, loss: 0.0023367227986454964
[8680] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07134940000001955, loss: 0.04141407832503319
[8690] time live:61, cumu

 99%|█████████▊| 76/77 [00:01<00:00, 65.06it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-8700.webm 






[8710] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07125040000001961, loss: 0.09635069966316223
[8720] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07121740000001964, loss: 0.005894941743463278
[8730] time live:74, cumulated reward: 7.39999999999999, exploring rate: 0.07118440000001966, loss: 0.0029421215876936913
[8740] time live:68, cumulated reward: 6.799999999999992, exploring rate: 0.07115140000001968, loss: 0.052063219249248505
[8750] time live:76, cumulated reward: 7.599999999999989, exploring rate: 0.0711184000000197, loss: 0.009106593206524849
[8760] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07108540000001973, loss: 0.0044808415696024895
[8770] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.07105240000001975, loss: 0.0026954803615808487
[8780] time live:102, cumulated reward: 11.19999999999998, exploring rate: 0.07101940000001977, loss: 0.002167247235774994
[8790] time live:74, 

 99%|█████████▊| 74/75 [00:01<00:00, 64.10it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-8800.webm 






[8810] time live:73, cumulated reward: 7.29999999999999, exploring rate: 0.07092040000001984, loss: 0.01791040226817131
[8820] time live:76, cumulated reward: 7.599999999999989, exploring rate: 0.07088740000001986, loss: 0.07683020085096359
[8830] time live:65, cumulated reward: 6.499999999999993, exploring rate: 0.07085440000001988, loss: 0.08922383189201355
[8840] time live:105, cumulated reward: 11.499999999999979, exploring rate: 0.07082140000001991, loss: 0.012129321694374084
[8850] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07078840000001993, loss: 0.011106673628091812
[8860] time live:71, cumulated reward: 7.099999999999991, exploring rate: 0.07075540000001995, loss: 0.0366833321750164
[8870] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07072240000001997, loss: 0.0034546407405287027
[8880] time live:76, cumulated reward: 7.599999999999989, exploring rate: 0.07068940000002, loss: 0.0021251323632895947
[8890] time live:67, cumulate

 98%|█████████▊| 64/65 [00:00<00:00, 73.36it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-8900.webm 






[8910] time live:55, cumulated reward: 4.4999999999999964, exploring rate: 0.07059040000002006, loss: 0.004659817088395357
[8920] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07055740000002009, loss: 0.014173141680657864
[8930] time live:76, cumulated reward: 7.599999999999989, exploring rate: 0.07052440000002011, loss: 0.024579863995313644
[8940] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07049140000002013, loss: 0.0030660824850201607
[8950] time live:68, cumulated reward: 6.799999999999992, exploring rate: 0.07045840000002015, loss: 0.004615399986505508
[8960] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07042540000002018, loss: 0.0020531732589006424
[8970] time live:76, cumulated reward: 7.599999999999989, exploring rate: 0.0703924000000202, loss: 0.006545661017298698
[8980] time live:69, cumulated reward: 6.8999999999999915, exploring rate: 0.07035940000002022, loss: 0.06690214574337006
[8990] time live:61, c

100%|██████████| 63/63 [00:01<00:00, 42.80it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-9000.webm 






[9010] time live:74, cumulated reward: 7.39999999999999, exploring rate: 0.07026040000002029, loss: 0.0014508056920021772
[9020] time live:68, cumulated reward: 6.799999999999992, exploring rate: 0.07022740000002031, loss: 0.056984588503837585
[9030] time live:71, cumulated reward: 7.099999999999991, exploring rate: 0.07019440000002033, loss: 0.034965503960847855
[9040] time live:106, cumulated reward: 11.599999999999978, exploring rate: 0.07016140000002036, loss: 0.002311405260115862
[9050] time live:70, cumulated reward: 6.999999999999991, exploring rate: 0.07012840000002038, loss: 0.03817950561642647
[9060] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0700954000000204, loss: 0.03522897884249687
[9070] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.07006240000002043, loss: 0.006120556499809027
[9080] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.07002940000002045, loss: 0.002490321407094598
[9090] time live:97, cumu

100%|██████████| 63/63 [00:01<00:00, 48.54it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-9100.webm 






[9110] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06993040000002052, loss: 0.007066849619150162
[9120] time live:74, cumulated reward: 7.39999999999999, exploring rate: 0.06989740000002054, loss: 0.11724895983934402
[9130] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06986440000002056, loss: 0.001411345205269754
[9140] time live:76, cumulated reward: 7.599999999999989, exploring rate: 0.06983140000002058, loss: 0.06445764750242233
[9150] time live:70, cumulated reward: 6.999999999999991, exploring rate: 0.0697984000000206, loss: 0.02431265451014042
[9160] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06976540000002063, loss: 0.056608084589242935
[9170] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06973240000002065, loss: 0.003996462095528841
[9180] time live:112, cumulated reward: 12.199999999999976, exploring rate: 0.06969940000002067, loss: 0.0016339917201548815
[9190] time live:61, cumul

100%|██████████| 63/63 [00:00<00:00, 65.29it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-9200.webm 






[9210] time live:72, cumulated reward: 7.19999999999999, exploring rate: 0.06960040000002074, loss: 0.04359981790184975
[9220] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06956740000002076, loss: 0.04964579641819
[9230] time live:70, cumulated reward: 6.999999999999991, exploring rate: 0.06953440000002079, loss: 0.005291602574288845
[9240] time live:64, cumulated reward: 5.399999999999993, exploring rate: 0.06950140000002081, loss: 0.0017898190999403596
[9250] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.06946840000002083, loss: 0.03719155862927437
[9260] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06943540000002085, loss: 0.017575304955244064
[9270] time live:72, cumulated reward: 7.19999999999999, exploring rate: 0.06940240000002088, loss: 0.034169476479291916
[9280] time live:63, cumulated reward: 5.299999999999994, exploring rate: 0.0693694000000209, loss: 0.025533286854624748
[9290] time live:69, cumulated r

100%|██████████| 63/63 [00:00<00:00, 87.57it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-9300.webm 






[9310] time live:64, cumulated reward: 5.399999999999993, exploring rate: 0.06927040000002097, loss: 0.018960490822792053
[9320] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.06923740000002099, loss: 0.055309515446424484
[9330] time live:63, cumulated reward: 5.299999999999994, exploring rate: 0.06920440000002101, loss: 0.10459693521261215
[9340] time live:68, cumulated reward: 6.799999999999992, exploring rate: 0.06917140000002103, loss: 0.056384868919849396
[9350] time live:62, cumulated reward: 5.199999999999994, exploring rate: 0.06913840000002106, loss: 0.004726676270365715
[9360] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06910540000002108, loss: 0.0030879247933626175
[9370] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.0690724000000211, loss: 0.0018057769630104303
[9380] time live:76, cumulated reward: 7.599999999999989, exploring rate: 0.06903940000002112, loss: 0.010426435619592667
[9390] time live:63, cum

100%|██████████| 63/63 [00:01<00:00, 49.93it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-9400.webm 






[9410] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.06894040000002119, loss: 0.06493397057056427
[9420] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06890740000002121, loss: 0.015724601224064827
[9430] time live:64, cumulated reward: 5.399999999999993, exploring rate: 0.06887440000002124, loss: 0.06647170335054398
[9440] time live:67, cumulated reward: 6.699999999999992, exploring rate: 0.06884140000002126, loss: 0.03875173628330231
[9450] time live:63, cumulated reward: 5.299999999999994, exploring rate: 0.06880840000002128, loss: 0.002737457863986492
[9460] time live:65, cumulated reward: 6.499999999999993, exploring rate: 0.0687754000000213, loss: 0.004098875913769007
[9470] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.06874240000002133, loss: 0.002556202467530966
[9480] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.06870940000002135, loss: 0.03845198079943657
[9490] time live:99, cumulate

 99%|█████████▊| 73/74 [00:00<00:00, 77.28it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-9500.webm 






[9510] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.06861040000002142, loss: 0.016876529902219772
[9520] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06857740000002144, loss: 0.08824780583381653
[9530] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06854440000002146, loss: 0.0026507466100156307
[9540] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06851140000002148, loss: 0.014311387203633785
[9550] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0684784000000215, loss: 0.07689763605594635
[9560] time live:77, cumulated reward: 7.699999999999989, exploring rate: 0.06844540000002153, loss: 0.025072425603866577
[9570] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06841240000002155, loss: 0.06066954508423805
[9580] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06837940000002157, loss: 0.030802661553025246
[9590] time live:61, cumula

100%|██████████| 63/63 [00:00<00:00, 75.84it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-9600.webm 






[9610] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06828040000002164, loss: 0.003687302116304636
[9620] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06824740000002166, loss: 0.012284111231565475
[9630] time live:76, cumulated reward: 7.599999999999989, exploring rate: 0.06821440000002169, loss: 0.0035496996715664864
[9640] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06818140000002171, loss: 0.06834456324577332
[9650] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.06814840000002173, loss: 0.1594661921262741
[9660] time live:66, cumulated reward: 6.5999999999999925, exploring rate: 0.06811540000002175, loss: 0.007433050777763128
[9670] time live:106, cumulated reward: 11.599999999999978, exploring rate: 0.06808240000002178, loss: 0.02687864564359188
[9680] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0680494000000218, loss: 0.10366994142532349
[9690] time live:102, cumu

 99%|█████████▉| 98/99 [00:01<00:00, 77.90it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-9700.webm 






[9710] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06795040000002187, loss: 0.00229255435988307
[9720] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06791740000002189, loss: 0.03569532185792923
[9730] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06788440000002191, loss: 0.1061602532863617
[9740] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06785140000002193, loss: 0.002893067430704832
[9750] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.06781840000002196, loss: 0.16197898983955383
[9760] time live:62, cumulated reward: 5.199999999999994, exploring rate: 0.06778540000002198, loss: 0.006003757938742638
[9770] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.067752400000022, loss: 0.0027651332784444094
[9780] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06771940000002202, loss: 0.04344077780842781
[9790] time live:61, cumulate

100%|██████████| 63/63 [00:00<00:00, 72.16it/s]


[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-9800.webm 

[9810] time live:70, cumulated reward: 6.999999999999991, exploring rate: 0.06762040000002209, loss: 0.03169409930706024
[9820] time live:68, cumulated reward: 6.799999999999992, exploring rate: 0.06758740000002211, loss: 0.00984519999474287
[9830] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.06755440000002214, loss: 0.003070143749937415
[9840] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06752140000002216, loss: 0.08156079053878784
[9850] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06748840000002218, loss: 0.0031511231791228056
[9860] time live:66, cumulated reward: 6.5999999999999925, exploring rate: 0.0674554000000222, loss: 0.03534969314932823
[9870] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.06742240000002223, loss: 0.00254066102206707
[9880] time live:74, cumulated reward: 7.39999999999999, exploring rate: 0.0673894

100%|██████████| 63/63 [00:00<00:00, 71.72it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-9900.webm 






[9910] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06729040000002232, loss: 0.09470335394144058
[9920] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.06725740000002234, loss: 0.023987073451280594
[9930] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.06722440000002236, loss: 0.0010262189898639917
[9940] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06719140000002238, loss: 0.03282327950000763
[9950] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0671584000000224, loss: 0.10645022988319397
[9960] time live:65, cumulated reward: 6.499999999999993, exploring rate: 0.06712540000002243, loss: 0.003592134453356266
[9970] time live:74, cumulated reward: 7.39999999999999, exploring rate: 0.06709240000002245, loss: 0.07033824175596237
[9980] time live:75, cumulated reward: 7.499999999999989, exploring rate: 0.06705940000002247, loss: 0.06418951600790024
[9990] time live:97, cumulat

 99%|█████████▊| 68/69 [00:00<00:00, 83.64it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-10000.webm 






[10010] time live:63, cumulated reward: 5.299999999999994, exploring rate: 0.06696040000002254, loss: 0.004546545911580324
[10020] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06692740000002256, loss: 0.008787404745817184
[10030] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06689440000002259, loss: 0.015237024053931236
[10040] time live:70, cumulated reward: 6.999999999999991, exploring rate: 0.06686140000002261, loss: 0.003555258736014366
[10050] time live:70, cumulated reward: 6.999999999999991, exploring rate: 0.06682840000002263, loss: 0.03170820698142052
[10060] time live:71, cumulated reward: 7.099999999999991, exploring rate: 0.06679540000002265, loss: 0.010671904310584068
[10070] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06676240000002268, loss: 0.006284801755100489
[10080] time live:64, cumulated reward: 5.399999999999993, exploring rate: 0.0667294000000227, loss: 0.12695921957492828
[10090] time live:8

 99%|█████████▉| 111/112 [00:01<00:00, 79.65it/s]


[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-10100.webm 

[10110] time live:106, cumulated reward: 11.599999999999978, exploring rate: 0.06663040000002277, loss: 0.004367731511592865
[10120] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.06659740000002279, loss: 0.046886276453733444
[10130] time live:113, cumulated reward: 12.299999999999976, exploring rate: 0.06656440000002281, loss: 0.05843306705355644
[10140] time live:62, cumulated reward: 5.199999999999994, exploring rate: 0.06653140000002283, loss: 0.06754279881715775
[10150] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.06649840000002286, loss: 0.0033176406286656857
[10160] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06646540000002288, loss: 0.004313766956329346
[10170] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0664324000000229, loss: 0.11707492917776108
[10180] time live:97, cumulated reward: 9.699999999999982, exploring ra

100%|██████████| 63/63 [00:00<00:00, 76.84it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-10200.webm 






[10210] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.06630040000002299, loss: 0.0038948371075093746
[10220] time live:64, cumulated reward: 5.399999999999993, exploring rate: 0.06626740000002301, loss: 0.09225286543369293
[10230] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.06623440000002304, loss: 0.0044944449327886105
[10240] time live:66, cumulated reward: 6.5999999999999925, exploring rate: 0.06620140000002306, loss: 0.07574455440044403
[10250] time live:67, cumulated reward: 6.699999999999992, exploring rate: 0.06616840000002308, loss: 0.0033131781965494156
[10260] time live:59, cumulated reward: 4.899999999999995, exploring rate: 0.0661354000000231, loss: 0.002161592012271285
[10270] time live:62, cumulated reward: 5.199999999999994, exploring rate: 0.06610240000002313, loss: 0.07371785491704941
[10280] time live:69, cumulated reward: 6.8999999999999915, exploring rate: 0.06606940000002315, loss: 0.14284488558769226
[10290] time liv

 99%|█████████▊| 78/79 [00:01<00:00, 75.00it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-10300.webm 






[10310] time live:65, cumulated reward: 6.499999999999993, exploring rate: 0.06597040000002322, loss: 0.06623203307390213
[10320] time live:147, cumulated reward: 16.69999999999998, exploring rate: 0.06593740000002324, loss: 0.12187926471233368
[10330] time live:74, cumulated reward: 7.39999999999999, exploring rate: 0.06590440000002326, loss: 0.062837615609169
[10340] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06587140000002328, loss: 0.0036354383919388056
[10350] time live:107, cumulated reward: 11.699999999999978, exploring rate: 0.0658384000000233, loss: 0.018157251179218292
[10360] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06580540000002333, loss: 0.09400001168251038
[10370] time live:63, cumulated reward: 5.299999999999994, exploring rate: 0.06577240000002335, loss: 0.11197546124458313
[10380] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06573940000002337, loss: 0.004944909829646349
[10390] time live:73,

100%|██████████| 63/63 [00:00<00:00, 79.72it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-10400.webm 






[10410] time live:65, cumulated reward: 6.499999999999993, exploring rate: 0.06564040000002344, loss: 0.036110252141952515
[10420] time live:74, cumulated reward: 7.39999999999999, exploring rate: 0.06560740000002346, loss: 0.0028298520483076572
[10430] time live:69, cumulated reward: 6.8999999999999915, exploring rate: 0.06557440000002349, loss: 0.0052705006673932076
[10440] time live:125, cumulated reward: 13.499999999999972, exploring rate: 0.06554140000002351, loss: 0.005186696536839008
[10450] time live:75, cumulated reward: 7.499999999999989, exploring rate: 0.06550840000002353, loss: 0.04767657443881035
[10460] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.06547540000002355, loss: 0.018872467800974846
[10470] time live:74, cumulated reward: 7.39999999999999, exploring rate: 0.06544240000002358, loss: 0.005358362104743719
[10480] time live:106, cumulated reward: 11.599999999999978, exploring rate: 0.0654094000000236, loss: 0.03203267231583595
[10490] time

100%|██████████| 63/63 [00:00<00:00, 70.55it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-10500.webm 






[10510] time live:70, cumulated reward: 6.999999999999991, exploring rate: 0.06531040000002367, loss: 0.1255301684141159
[10520] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.06527740000002369, loss: 0.007421491201967001
[10530] time live:65, cumulated reward: 6.499999999999993, exploring rate: 0.06524440000002371, loss: 0.004001740366220474
[10540] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06521140000002373, loss: 0.04165833815932274
[10550] time live:66, cumulated reward: 6.5999999999999925, exploring rate: 0.06517840000002376, loss: 0.01944361999630928
[10560] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06514540000002378, loss: 0.08787217736244202
[10570] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0651124000000238, loss: 0.12190372496843338
[10580] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06507940000002382, loss: 0.027490073814988136
[10590] time live:10

 99%|█████████▊| 78/79 [00:00<00:00, 86.85it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-10600.webm 






[10610] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06498040000002389, loss: 0.08966530114412308
[10620] time live:108, cumulated reward: 11.799999999999978, exploring rate: 0.06494740000002391, loss: 0.01675923727452755
[10630] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.06491440000002394, loss: 0.10775689780712128
[10640] time live:74, cumulated reward: 7.39999999999999, exploring rate: 0.06488140000002396, loss: 0.03850450739264488
[10650] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06484840000002398, loss: 0.04010394588112831
[10660] time live:66, cumulated reward: 6.5999999999999925, exploring rate: 0.064815400000024, loss: 0.046484582126140594
[10670] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.06478240000002403, loss: 0.1336584836244583
[10680] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06474940000002405, loss: 0.004136804491281509
[10690] time live:133

100%|██████████| 63/63 [00:01<00:00, 41.85it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-10700.webm 






[10710] time live:71, cumulated reward: 7.099999999999991, exploring rate: 0.06465040000002412, loss: 0.00511488551273942
[10720] time live:134, cumulated reward: 14.399999999999968, exploring rate: 0.06461740000002414, loss: 0.1036122515797615
[10730] time live:258, cumulated reward: 30.80000000000014, exploring rate: 0.06458440000002416, loss: 0.10741329938173294
[10740] time live:66, cumulated reward: 6.5999999999999925, exploring rate: 0.06455140000002418, loss: 0.06997458636760712
[10750] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0645184000000242, loss: 0.13822925090789795
[10760] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.06448540000002423, loss: 0.01862686313688755
[10770] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.06445240000002425, loss: 0.008101517334580421
[10780] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06441940000002427, loss: 0.019174763932824135
[10790] time live:

 99%|█████████▊| 72/73 [00:01<00:00, 67.58it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-10800.webm 






[10810] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.06432040000002434, loss: 0.07883788645267487
[10820] time live:168, cumulated reward: 18.80000000000001, exploring rate: 0.06428740000002436, loss: 0.017269328236579895
[10830] time live:63, cumulated reward: 5.299999999999994, exploring rate: 0.06425440000002439, loss: 0.004019309300929308
[10840] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.06422140000002441, loss: 0.009768681600689888
[10850] time live:101, cumulated reward: 11.09999999999998, exploring rate: 0.06418840000002443, loss: 0.005007721483707428
[10860] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06415540000002445, loss: 0.010343794710934162
[10870] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06412240000002448, loss: 0.010473608039319515
[10880] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0640894000000245, loss: 0.021895848214626312
[10890] time liv

 99%|█████████▊| 69/70 [00:00<00:00, 79.16it/s]


[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-10900.webm 

[10910] time live:106, cumulated reward: 11.599999999999978, exploring rate: 0.06399040000002457, loss: 0.02734624594449997
[10920] time live:100, cumulated reward: 9.99999999999998, exploring rate: 0.06395740000002459, loss: 0.04328607767820358
[10930] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06392440000002461, loss: 0.008833250030875206
[10940] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06389140000002463, loss: 0.16826985776424408
[10950] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06385840000002466, loss: 0.0034077786840498447
[10960] time live:131, cumulated reward: 14.09999999999997, exploring rate: 0.06382540000002468, loss: 0.07273086160421371
[10970] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0637924000000247, loss: 0.043692782521247864
[10980] time live:97, cumulated reward: 9.699999999999982, exploring rate

 98%|█████████▊| 64/65 [00:00<00:00, 65.35it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-11000.webm 






[11010] time live:106, cumulated reward: 11.599999999999978, exploring rate: 0.06366040000002479, loss: 0.1649208664894104
[11020] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06362740000002481, loss: 0.06329799443483353
[11030] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06359440000002484, loss: 0.036036111414432526
[11040] time live:74, cumulated reward: 7.39999999999999, exploring rate: 0.06356140000002486, loss: 0.15215559303760529
[11050] time live:93, cumulated reward: 9.299999999999983, exploring rate: 0.06352840000002488, loss: 0.012576567940413952
[11060] time live:103, cumulated reward: 11.29999999999998, exploring rate: 0.0634954000000249, loss: 0.13491928577423096
[11070] time live:66, cumulated reward: 6.5999999999999925, exploring rate: 0.06346240000002493, loss: 0.2839414179325104
[11080] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06342940000002495, loss: 0.022487742826342583
[11090] time live:61,

 99%|█████████▉| 181/182 [00:02<00:00, 75.89it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-11100.webm 






[11110] time live:73, cumulated reward: 7.29999999999999, exploring rate: 0.06333040000002502, loss: 0.20389172434806824
[11120] time live:174, cumulated reward: 19.40000000000002, exploring rate: 0.06329740000002504, loss: 0.07769733667373657
[11130] time live:73, cumulated reward: 7.29999999999999, exploring rate: 0.06326440000002506, loss: 0.00974661111831665
[11140] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06323140000002508, loss: 0.050933562219142914
[11150] time live:62, cumulated reward: 5.199999999999994, exploring rate: 0.0631984000000251, loss: 0.2305264174938202
[11160] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06316540000002513, loss: 0.22697480022907257
[11170] time live:62, cumulated reward: 5.199999999999994, exploring rate: 0.06313240000002515, loss: 0.0411645732820034
[11180] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.06309940000002517, loss: 0.013235338032245636
[11190] time live:61, cumu

 99%|█████████▉| 114/115 [00:02<00:00, 33.45it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-11200.webm 






[11210] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06300040000002524, loss: 0.07645784318447113
[11220] time live:104, cumulated reward: 11.399999999999979, exploring rate: 0.06296740000002526, loss: 0.005718151107430458
[11230] time live:178, cumulated reward: 20.800000000000026, exploring rate: 0.06293440000002529, loss: 0.0069607943296432495
[11240] time live:74, cumulated reward: 7.39999999999999, exploring rate: 0.06290140000002531, loss: 0.012793116271495819
[11250] time live:65, cumulated reward: 6.499999999999993, exploring rate: 0.06286840000002533, loss: 0.234380304813385
[11260] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06283540000002535, loss: 0.040083229541778564
[11270] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.06280240000002538, loss: 0.0377175472676754
[11280] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0627694000000254, loss: 0.2685522735118866
[11290] time live:1

 99%|█████████▊| 72/73 [00:00<00:00, 84.81it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-11300.webm 






[11310] time live:73, cumulated reward: 7.29999999999999, exploring rate: 0.06267040000002547, loss: 0.047131869941949844
[11320] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06263740000002549, loss: 0.05661032721400261
[11330] time live:64, cumulated reward: 5.399999999999993, exploring rate: 0.06260440000002551, loss: 0.2850814759731293
[11340] time live:99, cumulated reward: 9.89999999999998, exploring rate: 0.06257140000002553, loss: 0.006905358284711838
[11350] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06253840000002556, loss: 0.2620314359664917
[11360] time live:102, cumulated reward: 11.19999999999998, exploring rate: 0.06250540000002558, loss: 0.053860027343034744
[11370] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0624724000000256, loss: 0.005198623985052109
[11380] time live:106, cumulated reward: 11.599999999999978, exploring rate: 0.062439400000025624, loss: 0.28224071860313416
[11390] time live:182

 99%|█████████▊| 70/71 [00:00<00:00, 74.99it/s]


[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-11400.webm 

[11410] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06234040000002569, loss: 0.004565460607409477
[11420] time live:73, cumulated reward: 7.29999999999999, exploring rate: 0.062307400000025714, loss: 0.019361529499292374
[11430] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06227440000002574, loss: 0.01893649436533451
[11440] time live:109, cumulated reward: 11.899999999999977, exploring rate: 0.06224140000002576, loss: 0.04828197509050369
[11450] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06220840000002578, loss: 0.004136587027460337
[11460] time live:110, cumulated reward: 11.999999999999977, exploring rate: 0.062175400000025804, loss: 0.3456076979637146
[11470] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06214240000002583, loss: 0.006273207720369101
[11480] time live:37, cumulated reward: 2.7000000000000024, exploring r

 99%|█████████▊| 75/76 [00:00<00:00, 82.68it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-11500.webm 






[11510] time live:66, cumulated reward: 6.5999999999999925, exploring rate: 0.06201040000002592, loss: 0.12560436129570007
[11520] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06197740000002594, loss: 0.10085135698318481
[11530] time live:70, cumulated reward: 6.999999999999991, exploring rate: 0.06194440000002596, loss: 0.014043642207980156
[11540] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.061911400000025985, loss: 0.01597493514418602
[11550] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.06187840000002601, loss: 0.007481776177883148
[11560] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06184540000002603, loss: 0.2191932499408722
[11570] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.06181240000002605, loss: 0.08487844467163086
[11580] time live:72, cumulated reward: 7.19999999999999, exploring rate: 0.061779400000026075, loss: 0.3147476315498352
[11590] time live:61

 99%|█████████▉| 175/176 [00:02<00:00, 78.97it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-11600.webm 






[11610] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06168040000002614, loss: 0.048835791647434235
[11620] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.061647400000026165, loss: 0.13868027925491333
[11630] time live:71, cumulated reward: 7.099999999999991, exploring rate: 0.06161440000002619, loss: 0.31126096844673157
[11640] time live:110, cumulated reward: 11.999999999999977, exploring rate: 0.06158140000002621, loss: 0.07586116343736649
[11650] time live:66, cumulated reward: 6.5999999999999925, exploring rate: 0.06154840000002623, loss: 0.05843662470579147
[11660] time live:68, cumulated reward: 6.799999999999992, exploring rate: 0.061515400000026255, loss: 0.02781655639410019
[11670] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.06148240000002628, loss: 0.0169412512332201
[11680] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0614494000000263, loss: 0.012845532037317753
[11690] time live

 99%|█████████▉| 105/106 [00:01<00:00, 59.60it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-11700.webm 






[11710] time live:141, cumulated reward: 16.099999999999973, exploring rate: 0.06135040000002637, loss: 0.04022369906306267
[11720] time live:69, cumulated reward: 6.8999999999999915, exploring rate: 0.06131740000002639, loss: 0.016087323427200317
[11730] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.06128440000002641, loss: 0.028562987223267555
[11740] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.061251400000026435, loss: 0.25958314538002014
[11750] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06121840000002646, loss: 0.019312918186187744
[11760] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06118540000002648, loss: 0.012315044179558754
[11770] time live:34, cumulated reward: 2.400000000000002, exploring rate: 0.0611524000000265, loss: 0.02084389142692089
[11780] time live:18, cumulated reward: 0.8000000000000007, exploring rate: 0.061119400000026525, loss: 0.35505375266075134
[11790] time 

100%|██████████| 63/63 [00:00<00:00, 66.84it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-11800.webm 






[11810] time live:148, cumulated reward: 16.799999999999983, exploring rate: 0.06102040000002659, loss: 0.016804363578557968
[11820] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.060987400000026615, loss: 0.01318991370499134
[11830] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06095440000002664, loss: 0.01839243248105049
[11840] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06092140000002666, loss: 0.04990169405937195
[11850] time live:18, cumulated reward: 0.8000000000000007, exploring rate: 0.06088840000002668, loss: 0.013364600948989391
[11860] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.060855400000026705, loss: 0.014707003720104694
[11870] time live:18, cumulated reward: 0.8000000000000007, exploring rate: 0.06082240000002673, loss: 0.017722943797707558
[11880] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06078940000002675, loss: 0.3337833285331726
[11890] time li

100%|██████████| 63/63 [00:00<00:00, 67.20it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-11900.webm 






[11910] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06069040000002682, loss: 0.11970646679401398
[11920] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06065740000002684, loss: 0.8701561689376831
[11930] time live:101, cumulated reward: 11.09999999999998, exploring rate: 0.06062440000002686, loss: 0.3348608911037445
[11940] time live:72, cumulated reward: 7.19999999999999, exploring rate: 0.060591400000026885, loss: 0.21349890530109406
[11950] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06055840000002691, loss: 0.22767265141010284
[11960] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.06052540000002693, loss: 0.006536726374179125
[11970] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06049240000002695, loss: 0.008504138328135014
[11980] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.060459400000026975, loss: 0.35747066140174866
[11990] time live:61, 

 99%|█████████▉| 98/99 [00:01<00:00, 59.82it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-12000.webm 






[12010] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06036040000002704, loss: 0.013868320733308792
[12020] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.060327400000027065, loss: 0.6429187655448914
[12030] time live:62, cumulated reward: 5.199999999999994, exploring rate: 0.06029440000002709, loss: 0.11307111382484436
[12040] time live:62, cumulated reward: 5.199999999999994, exploring rate: 0.06026140000002711, loss: 0.00956651009619236
[12050] time live:73, cumulated reward: 7.29999999999999, exploring rate: 0.06022840000002713, loss: 0.09907298535108566
[12060] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.060195400000027155, loss: 0.010266945697367191
[12070] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06016240000002718, loss: 0.024864356964826584
[12080] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0601294000000272, loss: 0.010182611644268036
[12090] time live:67,

100%|██████████| 63/63 [00:01<00:00, 49.81it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-12100.webm 






[12110] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.06003040000002727, loss: 0.045330628752708435
[12120] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.05999740000002729, loss: 0.012109371833503246
[12130] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.05996440000002731, loss: 0.34261372685432434
[12140] time live:65, cumulated reward: 6.499999999999993, exploring rate: 0.059931400000027335, loss: 0.09268773347139359
[12150] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.05989840000002736, loss: 0.05831232666969299
[12160] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.05986540000002738, loss: 0.3313804268836975
[12170] time live:71, cumulated reward: 7.099999999999991, exploring rate: 0.0598324000000274, loss: 0.008257783018052578
[12180] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.059799400000027425, loss: 0.3178558647632599
[12190] time live:97, 

 98%|█████████▊| 65/66 [00:00<00:00, 68.31it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-12200.webm 






[12210] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.05970040000002749, loss: 0.022783061489462852
[12220] time live:71, cumulated reward: 7.099999999999991, exploring rate: 0.059667400000027515, loss: 0.012393884360790253
[12230] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.05963440000002754, loss: 0.14161166548728943
[12240] time live:112, cumulated reward: 12.199999999999976, exploring rate: 0.05960140000002756, loss: 0.012030552141368389
[12250] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.05956840000002758, loss: 0.10999637842178345
[12260] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.059535400000027605, loss: 0.047147881239652634
[12270] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.05950240000002763, loss: 0.3359125256538391
[12280] time live:131, cumulated reward: 14.09999999999997, exploring rate: 0.05946940000002765, loss: 0.05956414341926575
[12290] time liv

 98%|█████████▊| 63/64 [00:00<00:00, 80.01it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-12300.webm 






[12310] time live:72, cumulated reward: 7.19999999999999, exploring rate: 0.05937040000002772, loss: 0.02859603986144066
[12320] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.05933740000002774, loss: 0.015150942839682102
[12330] time live:66, cumulated reward: 6.5999999999999925, exploring rate: 0.05930440000002776, loss: 0.12191559374332428
[12340] time live:71, cumulated reward: 7.099999999999991, exploring rate: 0.059271400000027785, loss: 0.017901774495840073
[12350] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.05923840000002781, loss: 0.04988136142492294
[12360] time live:63, cumulated reward: 5.299999999999994, exploring rate: 0.05920540000002783, loss: 0.09883643686771393
[12370] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.05917240000002785, loss: 0.12720969319343567
[12380] time live:65, cumulated reward: 6.499999999999993, exploring rate: 0.059139400000027875, loss: 0.0137013616040349
[12390] time live:61,

100%|██████████| 78/78 [00:01<00:00, 63.80it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-12400.webm 






[12410] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.05904040000002794, loss: 0.023731321096420288
[12420] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.059007400000027965, loss: 0.011626092717051506
[12430] time live:69, cumulated reward: 6.8999999999999915, exploring rate: 0.05897440000002799, loss: 0.11134055256843567
[12440] time live:67, cumulated reward: 6.699999999999992, exploring rate: 0.05894140000002801, loss: 0.07175739854574203
[12450] time live:102, cumulated reward: 11.19999999999998, exploring rate: 0.05890840000002803, loss: 0.022056657820940018
[12460] time live:102, cumulated reward: 11.19999999999998, exploring rate: 0.058875400000028055, loss: 0.138688325881958
[12470] time live:72, cumulated reward: 7.19999999999999, exploring rate: 0.05884240000002808, loss: 0.09407813102006912
[12480] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0588094000000281, loss: 0.026013296097517014
[12490] time live:1

100%|██████████| 63/63 [00:00<00:00, 74.19it/s]


[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-12500.webm 

[12510] time live:49, cumulated reward: 3.8999999999999986, exploring rate: 0.05871040000002817, loss: 0.02426687814295292
[12520] time live:106, cumulated reward: 11.599999999999978, exploring rate: 0.05867740000002819, loss: 0.01852475106716156
[12530] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.05864440000002821, loss: 0.00893589947372675
[12540] time live:68, cumulated reward: 6.799999999999992, exploring rate: 0.058611400000028235, loss: 0.013636475428938866
[12550] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.05857840000002826, loss: 0.029025837779045105
[12560] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.05854540000002828, loss: 0.21516025066375732
[12570] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0585124000000283, loss: 0.0732593685388565
[12580] time live:145, cumulated reward: 16.49999999999998, exploring rate:

100%|██████████| 63/63 [00:01<00:00, 58.59it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-12600.webm 






[12610] time live:74, cumulated reward: 7.39999999999999, exploring rate: 0.05838040000002839, loss: 0.012531985528767109
[12620] time live:107, cumulated reward: 11.699999999999978, exploring rate: 0.058347400000028415, loss: 0.01657932624220848
[12630] time live:76, cumulated reward: 7.599999999999989, exploring rate: 0.05831440000002844, loss: 0.020223740488290787
[12640] time live:100, cumulated reward: 9.99999999999998, exploring rate: 0.05828140000002846, loss: 0.12480692565441132
[12650] time live:72, cumulated reward: 7.19999999999999, exploring rate: 0.05824840000002848, loss: 0.04738863557577133
[12660] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.058215400000028505, loss: 0.039213016629219055
[12670] time live:187, cumulated reward: 21.70000000000004, exploring rate: 0.05818240000002853, loss: 0.0437081903219223
[12680] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.05814940000002855, loss: 0.006808496080338955
[12690] time live:

100%|██████████| 63/63 [00:00<00:00, 66.73it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-12700.webm 






[12710] time live:66, cumulated reward: 6.5999999999999925, exploring rate: 0.05805040000002862, loss: 0.035225123167037964
[12720] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.05801740000002864, loss: 0.017775628715753555
[12730] time live:103, cumulated reward: 11.29999999999998, exploring rate: 0.05798440000002866, loss: 0.07366307824850082
[12740] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.057951400000028686, loss: 0.13550616800785065
[12750] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.05791840000002871, loss: 0.29515576362609863
[12760] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.05788540000002873, loss: 0.054381292313337326
[12770] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.05785240000002875, loss: 0.0471830815076828
[12780] time live:67, cumulated reward: 6.699999999999992, exploring rate: 0.057819400000028776, loss: 0.05096224695444107
[12790] time liv

 99%|█████████▉| 134/135 [00:02<00:00, 65.75it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-12800.webm 






[12810] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.05772040000002884, loss: 0.010775876231491566
[12820] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.057687400000028866, loss: 0.11255577951669693
[12830] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.05765440000002889, loss: 0.22045810520648956
[12840] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.05762140000002891, loss: 0.06729665398597717
[12850] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.05758840000002893, loss: 0.07818300276994705
[12860] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.057555400000028956, loss: 0.028842847794294357
[12870] time live:69, cumulated reward: 6.8999999999999915, exploring rate: 0.05752240000002898, loss: 0.12396564334630966
[12880] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.057489400000029, loss: 0.11886297166347504
[12890] time live:61,

100%|██████████| 63/63 [00:01<00:00, 50.08it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-12900.webm 






[12910] time live:40, cumulated reward: 3.0000000000000018, exploring rate: 0.05739040000002907, loss: 0.059131477028131485
[12920] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.05735740000002909, loss: 0.028445925563573837
[12930] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.05732440000002911, loss: 0.02194824442267418
[12940] time live:112, cumulated reward: 12.199999999999976, exploring rate: 0.057291400000029136, loss: 0.11696968227624893
[12950] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.05725840000002916, loss: 0.02698367089033127
[12960] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.05722540000002918, loss: 0.0501863919198513
[12970] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0571924000000292, loss: 0.050328705459833145
[12980] time live:70, cumulated reward: 6.999999999999991, exploring rate: 0.057159400000029226, loss: 0.029866978526115417
[12990] time li

 98%|█████████▊| 56/57 [00:00<00:00, 70.71it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-13000.webm 






[13010] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.05706040000002929, loss: 0.07987970858812332
[13020] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.057027400000029316, loss: 0.17086495459079742
[13030] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.05699440000002934, loss: 0.06216833367943764
[13040] time live:71, cumulated reward: 7.099999999999991, exploring rate: 0.05696140000002936, loss: 0.2298763543367386
[13050] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.05692840000002938, loss: 0.014354497194290161
[13060] time live:73, cumulated reward: 7.29999999999999, exploring rate: 0.056895400000029406, loss: 0.01891280896961689
[13070] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.05686240000002943, loss: 0.09037735313177109
[13080] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.05682940000002945, loss: 0.06898731738328934
[13090] time live:75,

 99%|█████████▊| 75/76 [00:01<00:00, 45.43it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-13100.webm 






[13110] time live:107, cumulated reward: 11.699999999999978, exploring rate: 0.05673040000002952, loss: 0.007902261801064014
[13120] time live:101, cumulated reward: 11.09999999999998, exploring rate: 0.05669740000002954, loss: 0.10313665866851807
[13130] time live:102, cumulated reward: 11.19999999999998, exploring rate: 0.05666440000002956, loss: 0.0340619720518589
[13140] time live:66, cumulated reward: 6.5999999999999925, exploring rate: 0.056631400000029586, loss: 0.10053510218858719
[13150] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.05659840000002961, loss: 0.018030909821391106
[13160] time live:64, cumulated reward: 5.399999999999993, exploring rate: 0.05656540000002963, loss: 0.05992969498038292
[13170] time live:113, cumulated reward: 12.299999999999976, exploring rate: 0.05653240000002965, loss: 0.1334795355796814
[13180] time live:113, cumulated reward: 12.299999999999976, exploring rate: 0.056499400000029676, loss: 0.016422901302576065
[13190] time

 99%|█████████▉| 111/112 [00:01<00:00, 69.95it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-13200.webm 






[13210] time live:287, cumulated reward: 33.70000000000018, exploring rate: 0.05640040000002974, loss: 0.2044680118560791
[13220] time live:67, cumulated reward: 6.699999999999992, exploring rate: 0.056367400000029766, loss: 0.015904629603028297
[13230] time live:65, cumulated reward: 6.499999999999993, exploring rate: 0.05633440000002979, loss: 0.014846152625977993
[13240] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.05630140000002981, loss: 0.020426984876394272
[13250] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.05626840000002983, loss: 0.06044613942503929
[13260] time live:65, cumulated reward: 6.499999999999993, exploring rate: 0.056235400000029856, loss: 0.03233560174703598
[13270] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.05620240000002988, loss: 0.08183752000331879
[13280] time live:143, cumulated reward: 16.299999999999976, exploring rate: 0.0561694000000299, loss: 0.07893048226833344
[13290] time live:

 99%|█████████▉| 133/134 [00:01<00:00, 77.06it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-13300.webm 






[13310] time live:102, cumulated reward: 11.19999999999998, exploring rate: 0.05607040000002997, loss: 0.022231504321098328
[13320] time live:72, cumulated reward: 7.19999999999999, exploring rate: 0.05603740000002999, loss: 0.027308238670229912
[13330] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.056004400000030014, loss: 0.02673419564962387
[13340] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.055971400000030036, loss: 0.015448929741978645
[13350] time live:100, cumulated reward: 9.99999999999998, exploring rate: 0.05593840000003006, loss: 0.07733909785747528
[13360] time live:67, cumulated reward: 6.699999999999992, exploring rate: 0.05590540000003008, loss: 0.058646153658628464
[13370] time live:112, cumulated reward: 12.199999999999976, exploring rate: 0.055872400000030104, loss: 0.02348565310239792
[13380] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.055839400000030126, loss: 0.03461746126413345
[13390] time l

 99%|█████████▊| 66/67 [00:01<00:00, 56.70it/s]


[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-13400.webm 

[13410] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.055740400000030194, loss: 0.013841540552675724
[13420] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.055707400000030216, loss: 0.17288845777511597
[13430] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.05567440000003024, loss: 0.10595441609621048
[13440] time live:52, cumulated reward: 4.1999999999999975, exploring rate: 0.05564140000003026, loss: 0.08564021438360214
[13450] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.055608400000030284, loss: 0.16345074772834778
[13460] time live:143, cumulated reward: 16.299999999999976, exploring rate: 0.055575400000030306, loss: 0.3038601875305176
[13470] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.05554240000003033, loss: 0.28488537669181824
[13480] time live:61, cumulated reward: 5.099999999999994, exploring r

 99%|█████████▉| 186/187 [00:03<00:00, 45.23it/s]


[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-13500.webm 

[13510] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.05541040000003042, loss: 0.12737210094928741
[13520] time live:59, cumulated reward: 4.899999999999995, exploring rate: 0.05537740000003044, loss: 0.03196059912443161
[13530] time live:135, cumulated reward: 14.499999999999968, exploring rate: 0.055344400000030464, loss: 0.016281642019748688
[13540] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.055311400000030486, loss: 0.008933976292610168
[13550] time live:65, cumulated reward: 6.499999999999993, exploring rate: 0.05527840000003051, loss: 0.05436757579445839
[13560] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.05524540000003053, loss: 0.023429419845342636
[13570] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.055212400000030554, loss: 0.04274042695760727
[13580] time live:98, cumulated reward: 9.799999999999981, exploring ra

 99%|█████████▉| 98/99 [00:01<00:00, 74.63it/s]


[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-13600.webm 

[13610] time live:67, cumulated reward: 6.699999999999992, exploring rate: 0.055080400000030644, loss: 0.04669967666268349
[13620] time live:66, cumulated reward: 6.5999999999999925, exploring rate: 0.055047400000030666, loss: 0.06896843761205673
[13630] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.05501440000003069, loss: 0.10440400242805481
[13640] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.05498140000003071, loss: 0.09299518913030624
[13650] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.054948400000030734, loss: 0.09121129661798477
[13660] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.054915400000030756, loss: 0.06205808371305466
[13670] time live:246, cumulated reward: 28.600000000000122, exploring rate: 0.05488240000003078, loss: 0.02360033243894577
[13680] time live:138, cumulated reward: 15.799999999999969, exploring

 99%|█████████▉| 98/99 [00:01<00:00, 79.51it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-13700.webm 






[13710] time live:174, cumulated reward: 19.40000000000002, exploring rate: 0.05475040000003087, loss: 0.42912545800209045
[13720] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.05471740000003089, loss: 0.016883093863725662
[13730] time live:69, cumulated reward: 6.8999999999999915, exploring rate: 0.054684400000030914, loss: 0.06012922525405884
[13740] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.054651400000030936, loss: 0.06509922444820404
[13750] time live:95, cumulated reward: 9.499999999999982, exploring rate: 0.05461840000003096, loss: 0.06918889284133911
[13760] time live:62, cumulated reward: 5.199999999999994, exploring rate: 0.05458540000003098, loss: 0.015178613364696503
[13770] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.054552400000031004, loss: 0.020159300416707993
[13780] time live:69, cumulated reward: 6.8999999999999915, exploring rate: 0.054519400000031026, loss: 0.013891875743865967
[13790] tim

 99%|█████████▊| 66/67 [00:00<00:00, 73.52it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-13800.webm 






[13810] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.054420400000031094, loss: 0.07384663820266724
[13820] time live:62, cumulated reward: 5.199999999999994, exploring rate: 0.054387400000031116, loss: 0.015807559713721275
[13830] time live:102, cumulated reward: 11.19999999999998, exploring rate: 0.05435440000003114, loss: 0.015240923501551151
[13840] time live:174, cumulated reward: 19.40000000000002, exploring rate: 0.05432140000003116, loss: 0.3424137830734253
[13850] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.054288400000031184, loss: 0.02306327223777771
[13860] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.054255400000031206, loss: 0.10137224197387695
[13870] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.05422240000003123, loss: 0.2701631188392639
[13880] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.05418940000003125, loss: 0.17559665441513062
[13890] time l

100%|██████████| 63/63 [00:01<00:00, 61.47it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-13900.webm 






[13910] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.05409040000003132, loss: 0.07172784954309464
[13920] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.05405740000003134, loss: 0.022483527660369873
[13930] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.054024400000031364, loss: 0.046659212559461594
[13940] time live:72, cumulated reward: 7.19999999999999, exploring rate: 0.053991400000031387, loss: 0.021403979510068893
[13950] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.05395840000003141, loss: 0.3836170434951782
[13960] time live:147, cumulated reward: 16.69999999999998, exploring rate: 0.05392540000003143, loss: 0.08483503758907318
[13970] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.053892400000031454, loss: 0.8952526450157166
[13980] time live:67, cumulated reward: 6.699999999999992, exploring rate: 0.05385940000003148, loss: 0.0063305748626589775
[13990] time liv

100%|██████████| 63/63 [00:00<00:00, 78.32it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-14000.webm 






[14010] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.053760400000031544, loss: 0.11627034097909927
[14020] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.05372740000003157, loss: 0.13918828964233398
[14030] time live:143, cumulated reward: 16.299999999999976, exploring rate: 0.05369440000003159, loss: 0.04526975378394127
[14040] time live:106, cumulated reward: 11.599999999999978, exploring rate: 0.05366140000003161, loss: 0.020054519176483154
[14050] time live:135, cumulated reward: 14.499999999999968, exploring rate: 0.053628400000031634, loss: 0.011237077414989471
[14060] time live:68, cumulated reward: 6.799999999999992, exploring rate: 0.05359540000003166, loss: 0.019710080698132515
[14070] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.05356240000003168, loss: 0.23855504393577576
[14080] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.0535294000000317, loss: 0.012809873558580875
[14090] time 

 99%|█████████▉| 98/99 [00:01<00:00, 77.75it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-14100.webm 






[14110] time live:106, cumulated reward: 11.599999999999978, exploring rate: 0.05343040000003177, loss: 0.04710133373737335
[14120] time live:74, cumulated reward: 7.39999999999999, exploring rate: 0.05339740000003179, loss: 0.062316469848155975
[14130] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.053364400000031814, loss: 0.1510917693376541
[14140] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.05333140000003184, loss: 0.022713467478752136
[14150] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.05329840000003186, loss: 0.030306801199913025
[14160] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.05326540000003188, loss: 0.5346270799636841
[14170] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.053232400000031904, loss: 0.11075445264577866
[14180] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.05319940000003193, loss: 0.049637649208307266
[14190] time live:9

 99%|█████████▊| 75/76 [00:01<00:00, 57.21it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-14200.webm 






[14210] time live:139, cumulated reward: 15.89999999999997, exploring rate: 0.053100400000031994, loss: 0.026297030970454216
[14220] time live:62, cumulated reward: 5.199999999999994, exploring rate: 0.05306740000003202, loss: 0.07055994868278503
[14230] time live:98, cumulated reward: 9.799999999999981, exploring rate: 0.05303440000003204, loss: 0.06437866389751434
[14240] time live:99, cumulated reward: 9.89999999999998, exploring rate: 0.05300140000003206, loss: 0.07698224484920502
[14250] time live:69, cumulated reward: 6.8999999999999915, exploring rate: 0.052968400000032084, loss: 0.05168867111206055
[14260] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.05293540000003211, loss: 0.08867809921503067
[14270] time live:106, cumulated reward: 11.599999999999978, exploring rate: 0.05290240000003213, loss: 0.005331763997673988
[14280] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.05286940000003215, loss: 0.07752056419849396
[14290] time live

100%|██████████| 63/63 [00:01<00:00, 57.59it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-14300.webm 






[14310] time live:74, cumulated reward: 7.39999999999999, exploring rate: 0.05277040000003222, loss: 0.00977030023932457
[14320] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.05273740000003224, loss: 0.017100133001804352
[14330] time live:77, cumulated reward: 7.699999999999989, exploring rate: 0.052704400000032264, loss: 0.05550813302397728
[14340] time live:71, cumulated reward: 7.099999999999991, exploring rate: 0.05267140000003229, loss: 0.24078837037086487
[14350] time live:135, cumulated reward: 14.499999999999968, exploring rate: 0.05263840000003231, loss: 0.4118278920650482
[14360] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.05260540000003233, loss: 0.14834114909172058
[14370] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.052572400000032354, loss: 0.5297595858573914
[14380] time live:68, cumulated reward: 6.799999999999992, exploring rate: 0.05253940000003238, loss: 0.01190012414008379
[14390] time live:16

100%|██████████| 63/63 [00:00<00:00, 80.50it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-14400.webm 






[14410] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.052440400000032444, loss: 0.12445389479398727
[14420] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.05240740000003247, loss: 0.04115369915962219
[14430] time live:77, cumulated reward: 7.699999999999989, exploring rate: 0.05237440000003249, loss: 0.04765079915523529
[14440] time live:70, cumulated reward: 6.999999999999991, exploring rate: 0.05234140000003251, loss: 0.3522917628288269
[14450] time live:113, cumulated reward: 12.299999999999976, exploring rate: 0.052308400000032534, loss: 0.15082113444805145
[14460] time live:186, cumulated reward: 21.600000000000037, exploring rate: 0.05227540000003256, loss: 0.04231369495391846
[14470] time live:64, cumulated reward: 5.399999999999993, exploring rate: 0.05224240000003258, loss: 0.016270916908979416
[14480] time live:145, cumulated reward: 16.49999999999998, exploring rate: 0.0522094000000326, loss: 0.08502836525440216
[14490] time live:

 99%|█████████▊| 71/72 [00:00<00:00, 79.22it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-14500.webm 






[14510] time live:148, cumulated reward: 16.799999999999983, exploring rate: 0.05211040000003267, loss: 0.1291007697582245
[14520] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.05207740000003269, loss: 0.12831737101078033
[14530] time live:63, cumulated reward: 5.299999999999994, exploring rate: 0.052044400000032714, loss: 0.023711856454610825
[14540] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.05201140000003274, loss: 0.13619814813137054
[14550] time live:70, cumulated reward: 6.999999999999991, exploring rate: 0.05197840000003276, loss: 0.04639917612075806
[14560] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.05194540000003278, loss: 0.03170674666762352
[14570] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.051912400000032805, loss: 0.06899827718734741
[14580] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.05187940000003283, loss: 0.24935905635356903
[14590] time live:

100%|██████████| 63/63 [00:00<00:00, 77.11it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-14600.webm 






[14610] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.051780400000032895, loss: 0.043472375720739365
[14620] time live:64, cumulated reward: 5.399999999999993, exploring rate: 0.05174740000003292, loss: 0.02009880170226097
[14630] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.05171440000003294, loss: 0.3975852131843567
[14640] time live:69, cumulated reward: 6.8999999999999915, exploring rate: 0.05168140000003296, loss: 0.24341115355491638
[14650] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.051648400000032985, loss: 0.21797429025173187
[14660] time live:71, cumulated reward: 7.099999999999991, exploring rate: 0.05161540000003301, loss: 0.028294367715716362
[14670] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.05158240000003303, loss: 0.054348818957805634
[14680] time live:143, cumulated reward: 16.299999999999976, exploring rate: 0.05154940000003305, loss: 0.16890673339366913
[14690] time live

 99%|█████████▊| 78/79 [00:01<00:00, 44.22it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-14700.webm 






[14710] time live:113, cumulated reward: 12.299999999999976, exploring rate: 0.05145040000003312, loss: 0.2143867015838623
[14720] time live:106, cumulated reward: 11.599999999999978, exploring rate: 0.05141740000003314, loss: 0.00894526019692421
[14730] time live:107, cumulated reward: 11.699999999999978, exploring rate: 0.051384400000033165, loss: 0.0973486453294754
[14740] time live:177, cumulated reward: 19.700000000000024, exploring rate: 0.05135140000003319, loss: 0.0714096650481224
[14750] time live:67, cumulated reward: 6.699999999999992, exploring rate: 0.05131840000003321, loss: 0.24183735251426697
[14760] time live:63, cumulated reward: 5.299999999999994, exploring rate: 0.05128540000003323, loss: 0.05649988725781441
[14770] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.051252400000033255, loss: 0.15661828219890594
[14780] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.05121940000003328, loss: 0.17203004658222198
[14790] time live

100%|██████████| 63/63 [00:00<00:00, 76.25it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-14800.webm 






[14810] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.051120400000033345, loss: 0.1340353935956955
[14820] time live:146, cumulated reward: 16.59999999999998, exploring rate: 0.05108740000003337, loss: 0.04200034588575363
[14830] time live:75, cumulated reward: 7.499999999999989, exploring rate: 0.05105440000003339, loss: 0.03153117373585701
[14840] time live:113, cumulated reward: 12.299999999999976, exploring rate: 0.05102140000003341, loss: 0.018239038065075874
[14850] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.050988400000033435, loss: 0.04627717658877373
[14860] time live:174, cumulated reward: 19.40000000000002, exploring rate: 0.05095540000003346, loss: 0.007583911530673504
[14870] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.05092240000003348, loss: 0.4617130756378174
[14880] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0508894000000335, loss: 0.1156463623046875
[14890] time live:66

 99%|█████████▉| 111/112 [00:01<00:00, 79.76it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-14900.webm 






[14910] time live:186, cumulated reward: 21.600000000000037, exploring rate: 0.05079040000003357, loss: 0.01622641831636429
[14920] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.05075740000003359, loss: 0.05262652784585953
[14930] time live:77, cumulated reward: 7.699999999999989, exploring rate: 0.050724400000033615, loss: 0.2059282660484314
[14940] time live:106, cumulated reward: 11.599999999999978, exploring rate: 0.05069140000003364, loss: 0.013268375769257545
[14950] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.05065840000003366, loss: 0.08377589285373688
[14960] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.05062540000003368, loss: 0.13442131876945496
[14970] time live:182, cumulated reward: 21.20000000000003, exploring rate: 0.050592400000033705, loss: 0.017365766689181328
[14980] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.05055940000003373, loss: 0.0216154083609581
[14990] time 

 99%|█████████▊| 78/79 [00:02<00:00, 26.19it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-15000.webm 






[15010] time live:72, cumulated reward: 7.19999999999999, exploring rate: 0.050460400000033795, loss: 0.02372846007347107
[15020] time live:71, cumulated reward: 7.099999999999991, exploring rate: 0.05042740000003382, loss: 0.1910792887210846
[15030] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.05039440000003384, loss: 0.022136788815259933
[15040] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.05036140000003386, loss: 0.26468196511268616
[15050] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.050328400000033885, loss: 0.026300711557269096
[15060] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.05029540000003391, loss: 0.11345741152763367
[15070] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.05026240000003393, loss: 0.04790115728974342
[15080] time live:111, cumulated reward: 12.099999999999977, exploring rate: 0.05022940000003395, loss: 0.029232772067189217
[15090] time live:1

 99%|█████████▊| 75/76 [00:01<00:00, 64.84it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-15100.webm 






[15110] time live:66, cumulated reward: 6.5999999999999925, exploring rate: 0.05013040000003402, loss: 0.041723717004060745
[15120] time live:66, cumulated reward: 6.5999999999999925, exploring rate: 0.05009740000003404, loss: 0.3628109395503998
[15130] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.050064400000034065, loss: 0.04859878867864609
[15140] time live:67, cumulated reward: 6.699999999999992, exploring rate: 0.05003140000003409, loss: 0.5017802715301514
[15150] time live:67, cumulated reward: 6.699999999999992, exploring rate: 0.04999840000003411, loss: 0.20361405611038208
[15160] time live:137, cumulated reward: 15.699999999999969, exploring rate: 0.04996540000003413, loss: 0.8282683491706848
[15170] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.049932400000034155, loss: 0.11509997397661209
[15180] time live:140, cumulated reward: 15.999999999999972, exploring rate: 0.04989940000003418, loss: 0.018457740545272827
[15190] time live

100%|██████████| 108/108 [00:01<00:00, 79.03it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-15200.webm 






[15210] time live:98, cumulated reward: 9.799999999999981, exploring rate: 0.049800400000034245, loss: 0.05578761547803879
[15220] time live:113, cumulated reward: 12.299999999999976, exploring rate: 0.04976740000003427, loss: 0.0961529091000557
[15230] time live:67, cumulated reward: 6.699999999999992, exploring rate: 0.04973440000003429, loss: 0.05427480489015579
[15240] time live:108, cumulated reward: 11.799999999999978, exploring rate: 0.04970140000003431, loss: 0.2322966605424881
[15250] time live:174, cumulated reward: 19.40000000000002, exploring rate: 0.049668400000034335, loss: 0.031055625528097153
[15260] time live:100, cumulated reward: 9.99999999999998, exploring rate: 0.04963540000003436, loss: 0.6133841872215271
[15270] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.04960240000003438, loss: 0.2867763340473175
[15280] time live:143, cumulated reward: 16.299999999999976, exploring rate: 0.0495694000000344, loss: 0.0919792503118515
[15290] time live:37

 97%|█████████▋| 38/39 [00:00<00:00, 76.05it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-15300.webm 






[15310] time live:37, cumulated reward: 2.7000000000000024, exploring rate: 0.04947040000003447, loss: 0.1576174944639206
[15320] time live:174, cumulated reward: 19.40000000000002, exploring rate: 0.04943740000003449, loss: 0.12151864171028137
[15330] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.049404400000034515, loss: 0.039174824953079224
[15340] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.04937140000003454, loss: 0.19627279043197632
[15350] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.04933840000003456, loss: 0.07660911977291107
[15360] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.04930540000003458, loss: 0.05098528787493706
[15370] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.049272400000034605, loss: 0.15093298256397247
[15380] time live:65, cumulated reward: 6.499999999999993, exploring rate: 0.04923940000003463, loss: 0.238700270652771
[15390] time live:62, 

 99%|█████████▉| 98/99 [00:01<00:00, 72.31it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-15400.webm 






[15410] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.049140400000034695, loss: 0.06185733526945114
[15420] time live:217, cumulated reward: 25.70000000000008, exploring rate: 0.04910740000003472, loss: 0.05739593505859375
[15430] time live:68, cumulated reward: 6.799999999999992, exploring rate: 0.04907440000003474, loss: 0.13539673388004303
[15440] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.04904140000003476, loss: 0.039246153086423874
[15450] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.049008400000034785, loss: 0.07748468965291977
[15460] time live:74, cumulated reward: 7.39999999999999, exploring rate: 0.04897540000003481, loss: 0.04587094113230705
[15470] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.04894240000003483, loss: 0.04719364643096924
[15480] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.04890940000003485, loss: 0.0057775406166911125
[15490] time li

 97%|█████████▋| 33/34 [00:00<00:00, 65.80it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-15500.webm 






[15510] time live:18, cumulated reward: 0.8000000000000007, exploring rate: 0.04881040000003492, loss: 0.21253079175949097
[15520] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.04877740000003494, loss: 0.34846365451812744
[15530] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.048744400000034965, loss: 0.028154464438557625
[15540] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.04871140000003499, loss: 0.0813618078827858
[15550] time live:178, cumulated reward: 20.800000000000026, exploring rate: 0.04867840000003501, loss: 0.06524088978767395
[15560] time live:246, cumulated reward: 28.600000000000122, exploring rate: 0.04864540000003503, loss: 0.03122582659125328
[15570] time live:63, cumulated reward: 5.299999999999994, exploring rate: 0.048612400000035055, loss: 0.17293594777584076
[15580] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.04857940000003508, loss: 0.12332259863615036
[15590] time live

100%|██████████| 63/63 [00:00<00:00, 76.05it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-15600.webm 






[15610] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.048480400000035145, loss: 0.09349729120731354
[15620] time live:63, cumulated reward: 5.299999999999994, exploring rate: 0.04844740000003517, loss: 0.06952711939811707
[15630] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.04841440000003519, loss: 0.09273189306259155
[15640] time live:68, cumulated reward: 6.799999999999992, exploring rate: 0.04838140000003521, loss: 0.13910990953445435
[15650] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.048348400000035235, loss: 0.0772019624710083
[15660] time live:75, cumulated reward: 7.499999999999989, exploring rate: 0.04831540000003526, loss: 0.22223691642284393
[15670] time live:73, cumulated reward: 7.29999999999999, exploring rate: 0.04828240000003528, loss: 0.07521601766347885
[15680] time live:69, cumulated reward: 6.8999999999999915, exploring rate: 0.0482494000000353, loss: 0.14034554362297058
[15690] time live:61, cu

 99%|█████████▉| 98/99 [00:01<00:00, 68.34it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-15700.webm 






[15710] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.04815040000003537, loss: 0.026604607701301575
[15720] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.04811740000003539, loss: 0.02138209342956543
[15730] time live:72, cumulated reward: 7.19999999999999, exploring rate: 0.048084400000035415, loss: 0.14080999791622162
[15740] time live:69, cumulated reward: 6.8999999999999915, exploring rate: 0.04805140000003544, loss: 0.02485094591975212
[15750] time live:72, cumulated reward: 7.19999999999999, exploring rate: 0.04801840000003546, loss: 0.011126060038805008
[15760] time live:68, cumulated reward: 6.799999999999992, exploring rate: 0.04798540000003548, loss: 0.10443594306707382
[15770] time live:73, cumulated reward: 7.29999999999999, exploring rate: 0.047952400000035506, loss: 0.1540958285331726
[15780] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.04791940000003553, loss: 0.01549098826944828
[15790] time live:61, c

 98%|█████████▊| 54/55 [00:01<00:00, 46.50it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-15800.webm 






[15810] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.047820400000035596, loss: 0.13889533281326294
[15820] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.04778740000003562, loss: 0.16533097624778748
[15830] time live:67, cumulated reward: 6.699999999999992, exploring rate: 0.04775440000003564, loss: 0.02370377629995346
[15840] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.04772140000003566, loss: 0.07223990559577942
[15850] time live:90, cumulated reward: 8.999999999999984, exploring rate: 0.047688400000035686, loss: 0.07780593633651733
[15860] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.04765540000003571, loss: 0.5228362083435059
[15870] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.04762240000003573, loss: 0.029130876064300537
[15880] time live:67, cumulated reward: 6.699999999999992, exploring rate: 0.04758940000003575, loss: 0.05385032296180725
[15890] time live:71, 

 99%|█████████▊| 72/73 [00:00<00:00, 83.81it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-15900.webm 






[15910] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.04749040000003582, loss: 0.09557679295539856
[15920] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.04745740000003584, loss: 0.2502526044845581
[15930] time live:63, cumulated reward: 5.299999999999994, exploring rate: 0.047424400000035866, loss: 0.07638058811426163
[15940] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.04739140000003589, loss: 0.06939370930194855
[15950] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.04735840000003591, loss: 0.06898140907287598
[15960] time live:72, cumulated reward: 7.19999999999999, exploring rate: 0.04732540000003593, loss: 0.11146935075521469
[15970] time live:71, cumulated reward: 7.099999999999991, exploring rate: 0.047292400000035956, loss: 0.19296391308307648
[15980] time live:103, cumulated reward: 11.29999999999998, exploring rate: 0.04725940000003598, loss: 0.10540498048067093
[15990] time live:6

 99%|█████████▊| 68/69 [00:00<00:00, 80.09it/s]


[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-16000.webm 

[16010] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.047160400000036046, loss: 0.018125586211681366
[16020] time live:69, cumulated reward: 6.8999999999999915, exploring rate: 0.04712740000003607, loss: 0.26635053753852844
[16030] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.04709440000003609, loss: 0.07739321142435074
[16040] time live:137, cumulated reward: 15.699999999999969, exploring rate: 0.04706140000003611, loss: 0.03255195915699005
[16050] time live:99, cumulated reward: 9.89999999999998, exploring rate: 0.047028400000036136, loss: 0.07503017038106918
[16060] time live:185, cumulated reward: 21.500000000000036, exploring rate: 0.04699540000003616, loss: 0.08130927383899689
[16070] time live:70, cumulated reward: 6.999999999999991, exploring rate: 0.04696240000003618, loss: 0.07913914322853088
[16080] time live:133, cumulated reward: 14.299999999999969, exploring r

 99%|█████████▉| 134/135 [00:02<00:00, 64.05it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-16100.webm 






[16110] time live:113, cumulated reward: 12.299999999999976, exploring rate: 0.04683040000003627, loss: 0.4011158347129822
[16120] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.04679740000003629, loss: 0.029614735394716263
[16130] time live:140, cumulated reward: 15.999999999999972, exploring rate: 0.046764400000036316, loss: 0.06389247626066208
[16140] time live:71, cumulated reward: 7.099999999999991, exploring rate: 0.04673140000003634, loss: 0.197646364569664
[16150] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.04669840000003636, loss: 0.47366106510162354
[16160] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.04666540000003638, loss: 0.2741142511367798
[16170] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.046632400000036406, loss: 0.04813917353749275
[16180] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.04659940000003643, loss: 0.045430250465869904
[16190] time live:

100%|██████████| 63/63 [00:00<00:00, 67.18it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-16200.webm 






[16210] time live:62, cumulated reward: 5.199999999999994, exploring rate: 0.046500400000036496, loss: 0.17803460359573364
[16220] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.04646740000003652, loss: 0.3679473102092743
[16230] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.04643440000003654, loss: 0.011807851493358612
[16240] time live:70, cumulated reward: 6.999999999999991, exploring rate: 0.04640140000003656, loss: 0.018651675432920456
[16250] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.046368400000036586, loss: 0.06041005253791809
[16260] time live:174, cumulated reward: 19.40000000000002, exploring rate: 0.04633540000003661, loss: 0.1105341762304306
[16270] time live:64, cumulated reward: 5.399999999999993, exploring rate: 0.04630240000003663, loss: 0.3524612486362457
[16280] time live:75, cumulated reward: 7.499999999999989, exploring rate: 0.04626940000003665, loss: 0.04058634862303734
[16290] time live:

100%|██████████| 63/63 [00:00<00:00, 67.72it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-16300.webm 






[16310] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.04617040000003672, loss: 0.06118523329496384
[16320] time live:69, cumulated reward: 6.8999999999999915, exploring rate: 0.04613740000003674, loss: 0.14313289523124695
[16330] time live:112, cumulated reward: 12.199999999999976, exploring rate: 0.046104400000036766, loss: 0.015898728743195534
[16340] time live:109, cumulated reward: 11.899999999999977, exploring rate: 0.04607140000003679, loss: 0.28938722610473633
[16350] time live:70, cumulated reward: 6.999999999999991, exploring rate: 0.04603840000003681, loss: 0.09494359791278839
[16360] time live:107, cumulated reward: 11.699999999999978, exploring rate: 0.046005400000036833, loss: 0.056163035333156586
[16370] time live:62, cumulated reward: 5.199999999999994, exploring rate: 0.045972400000036856, loss: 0.04273557662963867
[16380] time live:174, cumulated reward: 19.40000000000002, exploring rate: 0.04593940000003688, loss: 0.6482678055763245
[16390] time

 99%|█████████▉| 98/99 [00:01<00:00, 62.14it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-16400.webm 






[16410] time live:71, cumulated reward: 7.099999999999991, exploring rate: 0.045840400000036946, loss: 0.01497605536133051
[16420] time live:66, cumulated reward: 6.5999999999999925, exploring rate: 0.04580740000003697, loss: 0.05397343263030052
[16430] time live:72, cumulated reward: 7.19999999999999, exploring rate: 0.04577440000003699, loss: 0.3547162115573883
[16440] time live:70, cumulated reward: 6.999999999999991, exploring rate: 0.045741400000037014, loss: 0.1401444673538208
[16450] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.045708400000037036, loss: 0.16822920739650726
[16460] time live:100, cumulated reward: 9.99999999999998, exploring rate: 0.04567540000003706, loss: 0.08610522747039795
[16470] time live:67, cumulated reward: 6.699999999999992, exploring rate: 0.04564240000003708, loss: 0.026711996644735336
[16480] time live:65, cumulated reward: 6.499999999999993, exploring rate: 0.045609400000037104, loss: 0.021562524139881134
[16490] time live:97

 99%|█████████▉| 134/135 [00:01<00:00, 63.25it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-16500.webm 






[16510] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.04551040000003717, loss: 0.24519503116607666
[16520] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.045477400000037194, loss: 0.01597084105014801
[16530] time live:225, cumulated reward: 26.500000000000092, exploring rate: 0.045444400000037216, loss: 0.02981802634894848
[16540] time live:174, cumulated reward: 19.40000000000002, exploring rate: 0.04541140000003724, loss: 0.043781258165836334
[16550] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.04537840000003726, loss: 0.3238956034183502
[16560] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.045345400000037284, loss: 0.054870884865522385
[16570] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.045312400000037306, loss: 0.02010347694158554
[16580] time live:67, cumulated reward: 6.699999999999992, exploring rate: 0.04527940000003733, loss: 0.06890328973531723
[16590] time l

100%|██████████| 63/63 [00:00<00:00, 72.04it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-16600.webm 






[16610] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.045180400000037396, loss: 0.0168407391756773
[16620] time live:101, cumulated reward: 11.09999999999998, exploring rate: 0.04514740000003742, loss: 0.02931216172873974
[16630] time live:146, cumulated reward: 16.59999999999998, exploring rate: 0.04511440000003744, loss: 0.02938033826649189
[16640] time live:104, cumulated reward: 11.399999999999979, exploring rate: 0.045081400000037464, loss: 0.14034134149551392
[16650] time live:75, cumulated reward: 7.499999999999989, exploring rate: 0.045048400000037486, loss: 0.09007061272859573
[16660] time live:111, cumulated reward: 12.099999999999977, exploring rate: 0.04501540000003751, loss: 0.023869192227721214
[16670] time live:190, cumulated reward: 22.000000000000043, exploring rate: 0.04498240000003753, loss: 0.3355347216129303
[16680] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.044949400000037554, loss: 0.08523471653461456
[16690] time 

100%|██████████| 63/63 [00:00<00:00, 65.09it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-16700.webm 






[16710] time live:77, cumulated reward: 7.699999999999989, exploring rate: 0.04485040000003762, loss: 0.15425121784210205
[16720] time live:106, cumulated reward: 11.599999999999978, exploring rate: 0.044817400000037644, loss: 0.15444116294384003
[16730] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.044784400000037666, loss: 0.14751824736595154
[16740] time live:67, cumulated reward: 6.699999999999992, exploring rate: 0.04475140000003769, loss: 0.40090927481651306
[16750] time live:69, cumulated reward: 6.8999999999999915, exploring rate: 0.04471840000003771, loss: 0.0752103179693222
[16760] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.044685400000037734, loss: 0.026706229895353317
[16770] time live:139, cumulated reward: 15.89999999999997, exploring rate: 0.044652400000037756, loss: 0.016204219311475754
[16780] time live:87, cumulated reward: 8.699999999999985, exploring rate: 0.04461940000003778, loss: 0.26391124725341797
[16790] tim

 99%|█████████▉| 146/147 [00:02<00:00, 58.90it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-16800.webm 






[16810] time live:64, cumulated reward: 5.399999999999993, exploring rate: 0.044520400000037846, loss: 0.034411780536174774
[16820] time live:71, cumulated reward: 7.099999999999991, exploring rate: 0.04448740000003787, loss: 0.04199628159403801
[16830] time live:70, cumulated reward: 6.999999999999991, exploring rate: 0.04445440000003789, loss: 0.04389405623078346
[16840] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.044421400000037914, loss: 0.03257102891802788
[16850] time live:63, cumulated reward: 5.299999999999994, exploring rate: 0.044388400000037936, loss: 0.09049469977617264
[16860] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.04435540000003796, loss: 0.008023213595151901
[16870] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.04432240000003798, loss: 0.1097099632024765
[16880] time live:72, cumulated reward: 7.19999999999999, exploring rate: 0.044289400000038004, loss: 0.05891483277082443
[16890] time live:

 99%|█████████▉| 98/99 [00:01<00:00, 79.16it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-16900.webm 






[16910] time live:69, cumulated reward: 6.8999999999999915, exploring rate: 0.04419040000003807, loss: 0.05911196023225784
[16920] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.044157400000038094, loss: 0.020753424614667892
[16930] time live:156, cumulated reward: 17.599999999999994, exploring rate: 0.044124400000038116, loss: 0.019464442506432533
[16940] time live:64, cumulated reward: 5.399999999999993, exploring rate: 0.04409140000003814, loss: 0.3235190510749817
[16950] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.04405840000003816, loss: 0.01759951189160347
[16960] time live:104, cumulated reward: 11.399999999999979, exploring rate: 0.044025400000038184, loss: 0.11315034329891205
[16970] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.043992400000038206, loss: 0.10777446627616882
[16980] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.04395940000003823, loss: 0.06651736795902252
[16990] time l

 99%|█████████▉| 98/99 [00:01<00:00, 82.62it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-17000.webm 






[17010] time live:68, cumulated reward: 6.799999999999992, exploring rate: 0.0438604000000383, loss: 0.6639773845672607
[17020] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.04382740000003832, loss: 0.7074682116508484
[17030] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.04379440000003834, loss: 0.2685267925262451
[17040] time live:67, cumulated reward: 6.699999999999992, exploring rate: 0.043761400000038364, loss: 0.009798957034945488
[17050] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.04372840000003839, loss: 0.03595045208930969
[17060] time live:76, cumulated reward: 7.599999999999989, exploring rate: 0.04369540000003841, loss: 0.11412537097930908
[17070] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.04366240000003843, loss: 0.02543487586081028
[17080] time live:70, cumulated reward: 6.999999999999991, exploring rate: 0.043629400000038454, loss: 0.019779566675424576
[17090] time live:73, 

 99%|█████████▊| 69/70 [00:00<00:00, 76.71it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-17100.webm 






[17110] time live:74, cumulated reward: 7.39999999999999, exploring rate: 0.04353040000003852, loss: 0.24986965954303741
[17120] time live:103, cumulated reward: 11.29999999999998, exploring rate: 0.043497400000038544, loss: 0.31416624784469604
[17130] time live:77, cumulated reward: 7.699999999999989, exploring rate: 0.04346440000003857, loss: 0.06371841579675674
[17140] time live:69, cumulated reward: 6.8999999999999915, exploring rate: 0.04343140000003859, loss: 0.3311406075954437
[17150] time live:111, cumulated reward: 12.099999999999977, exploring rate: 0.04339840000003861, loss: 0.2798137068748474
[17160] time live:77, cumulated reward: 7.699999999999989, exploring rate: 0.043365400000038634, loss: 0.10162052512168884
[17170] time live:101, cumulated reward: 11.09999999999998, exploring rate: 0.04333240000003866, loss: 0.02070477232336998
[17180] time live:111, cumulated reward: 12.099999999999977, exploring rate: 0.04329940000003868, loss: 0.0635119080543518
[17190] time live:1

 99%|█████████▊| 76/77 [00:00<00:00, 82.46it/s]


[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-17200.webm 

[17210] time live:76, cumulated reward: 7.599999999999989, exploring rate: 0.04320040000003875, loss: 0.19437584280967712
[17220] time live:62, cumulated reward: 5.199999999999994, exploring rate: 0.04316740000003877, loss: 0.21915198862552643
[17230] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.04313440000003879, loss: 0.09738481044769287
[17240] time live:70, cumulated reward: 6.999999999999991, exploring rate: 0.043101400000038814, loss: 0.25011560320854187
[17250] time live:67, cumulated reward: 6.699999999999992, exploring rate: 0.04306840000003884, loss: 0.03576745092868805
[17260] time live:174, cumulated reward: 19.40000000000002, exploring rate: 0.04303540000003886, loss: 0.24037334322929382
[17270] time live:67, cumulated reward: 6.699999999999992, exploring rate: 0.04300240000003888, loss: 0.028344087302684784
[17280] time live:69, cumulated reward: 6.8999999999999915, exploring rate: 

 99%|█████████▉| 102/103 [00:01<00:00, 80.73it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-17300.webm 






[17310] time live:72, cumulated reward: 7.19999999999999, exploring rate: 0.04287040000003897, loss: 0.11079631000757217
[17320] time live:66, cumulated reward: 6.5999999999999925, exploring rate: 0.042837400000038994, loss: 0.16885815560817719
[17330] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.04280440000003902, loss: 0.04132143408060074
[17340] time live:158, cumulated reward: 17.799999999999997, exploring rate: 0.04277140000003904, loss: 0.02754068933427334
[17350] time live:68, cumulated reward: 6.799999999999992, exploring rate: 0.04273840000003906, loss: 0.36039063334465027
[17360] time live:68, cumulated reward: 6.799999999999992, exploring rate: 0.042705400000039084, loss: 0.7207173705101013
[17370] time live:113, cumulated reward: 12.299999999999976, exploring rate: 0.04267240000003911, loss: 0.11377319693565369
[17380] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.04263940000003913, loss: 0.021718353033065796
[17390] time live:

 98%|█████████▊| 63/64 [00:01<00:00, 57.58it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-17400.webm 






[17410] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.0425404000000392, loss: 0.24557921290397644
[17420] time live:175, cumulated reward: 19.50000000000002, exploring rate: 0.04250740000003922, loss: 0.036340177059173584
[17430] time live:190, cumulated reward: 22.000000000000043, exploring rate: 0.04247440000003924, loss: 0.039927877485752106
[17440] time live:106, cumulated reward: 11.599999999999978, exploring rate: 0.042441400000039264, loss: 0.03942030295729637
[17450] time live:69, cumulated reward: 6.8999999999999915, exploring rate: 0.04240840000003929, loss: 0.019863059744238853
[17460] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.04237540000003931, loss: 0.021749582141637802
[17470] time live:75, cumulated reward: 7.499999999999989, exploring rate: 0.04234240000003933, loss: 0.05295272916555405
[17480] time live:113, cumulated reward: 12.299999999999976, exploring rate: 0.042309400000039354, loss: 0.015528260730206966
[17490] ti

 99%|█████████▊| 66/67 [00:00<00:00, 76.06it/s]


[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-17500.webm 

[17510] time live:68, cumulated reward: 6.799999999999992, exploring rate: 0.04221040000003942, loss: 0.08754722028970718
[17520] time live:71, cumulated reward: 7.099999999999991, exploring rate: 0.042177400000039444, loss: 0.02705978788435459
[17530] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.04214440000003947, loss: 0.02066703513264656
[17540] time live:77, cumulated reward: 7.699999999999989, exploring rate: 0.04211140000003949, loss: 0.018894508481025696
[17550] time live:72, cumulated reward: 7.19999999999999, exploring rate: 0.04207840000003951, loss: 0.020480075851082802
[17560] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.042045400000039534, loss: 0.07003024965524673
[17570] time live:206, cumulated reward: 23.600000000000065, exploring rate: 0.04201240000003956, loss: 0.1620270311832428
[17580] time live:64, cumulated reward: 5.399999999999993, exploring rate: 

 99%|█████████▉| 104/105 [00:01<00:00, 79.76it/s]


[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-17600.webm 

[17610] time live:102, cumulated reward: 11.19999999999998, exploring rate: 0.04188040000003965, loss: 0.01328662596642971
[17620] time live:76, cumulated reward: 7.599999999999989, exploring rate: 0.04184740000003967, loss: 0.19477775692939758
[17630] time live:73, cumulated reward: 7.29999999999999, exploring rate: 0.04181440000003969, loss: 0.34051230549812317
[17640] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.041781400000039715, loss: 0.14275474846363068
[17650] time live:138, cumulated reward: 15.799999999999969, exploring rate: 0.04174840000003974, loss: 0.025528917089104652
[17660] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.04171540000003976, loss: 0.017187822610139847
[17670] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.04168240000003978, loss: 0.013034142553806305
[17680] time live:140, cumulated reward: 15.999999999999972, exploring ra

 99%|█████████▉| 98/99 [00:01<00:00, 84.88it/s]


[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-17700.webm 

[17710] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.04155040000003987, loss: 0.602331817150116
[17720] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.041517400000039895, loss: 0.03198827803134918
[17730] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.04148440000003992, loss: 0.01843458041548729
[17740] time live:73, cumulated reward: 7.29999999999999, exploring rate: 0.04145140000003994, loss: 0.09029118716716766
[17750] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.04141840000003996, loss: 0.005849648732692003
[17760] time live:62, cumulated reward: 5.199999999999994, exploring rate: 0.041385400000039985, loss: 0.16794158518314362
[17770] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.04135240000004001, loss: 0.01592830941081047
[17780] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.

 99%|█████████▊| 73/74 [00:00<00:00, 77.60it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-17800.webm 






[17810] time live:62, cumulated reward: 5.199999999999994, exploring rate: 0.0412204000000401, loss: 0.20934732258319855
[17820] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.04118740000004012, loss: 0.038654476404190063
[17830] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.04115440000004014, loss: 0.025340627878904343
[17840] time live:68, cumulated reward: 6.799999999999992, exploring rate: 0.041121400000040165, loss: 0.03984012454748154
[17850] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.04108840000004019, loss: 0.01278592273592949
[17860] time live:174, cumulated reward: 19.40000000000002, exploring rate: 0.04105540000004021, loss: 0.3495444655418396
[17870] time live:66, cumulated reward: 6.5999999999999925, exploring rate: 0.04102240000004023, loss: 0.15693283081054688
[17880] time live:174, cumulated reward: 19.40000000000002, exploring rate: 0.040989400000040255, loss: 0.024232972413301468
[17890] time live:

 99%|█████████▉| 98/99 [00:01<00:00, 82.57it/s]


[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-17900.webm 

[17910] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.04089040000004032, loss: 0.06855268031358719
[17920] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.040857400000040345, loss: 0.08217736333608627
[17930] time live:75, cumulated reward: 7.499999999999989, exploring rate: 0.04082440000004037, loss: 0.027407143265008926
[17940] time live:67, cumulated reward: 6.699999999999992, exploring rate: 0.04079140000004039, loss: 0.11319884657859802
[17950] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.04075840000004041, loss: 0.027640895918011665
[17960] time live:62, cumulated reward: 5.199999999999994, exploring rate: 0.040725400000040435, loss: 0.013166813179850578
[17970] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.04069240000004046, loss: 0.019504694268107414
[17980] time live:73, cumulated reward: 7.29999999999999, exploring rate:

100%|██████████| 215/215 [00:02<00:00, 81.12it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-18000.webm 






[18010] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.04056040000004055, loss: 0.021615948528051376
[18020] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.04052740000004057, loss: 0.09111012518405914
[18030] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.04049440000004059, loss: 0.016809340566396713
[18040] time live:101, cumulated reward: 11.09999999999998, exploring rate: 0.040461400000040615, loss: 0.10132946074008942
[18050] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.04042840000004064, loss: 0.05648469924926758
[18060] time live:103, cumulated reward: 11.29999999999998, exploring rate: 0.04039540000004066, loss: 0.02900588884949684
[18070] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.04036240000004068, loss: 0.027139754965901375
[18080] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.040329400000040705, loss: 0.019701238721609116
[18090] time l

 99%|█████████▉| 98/99 [00:01<00:00, 82.16it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-18100.webm 






[18110] time live:67, cumulated reward: 6.699999999999992, exploring rate: 0.04023040000004077, loss: 0.0506543293595314
[18120] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.040197400000040795, loss: 0.14484964311122894
[18130] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.04016440000004082, loss: 0.05715739727020264
[18140] time live:70, cumulated reward: 6.999999999999991, exploring rate: 0.04013140000004084, loss: 0.04332792013883591
[18150] time live:100, cumulated reward: 9.99999999999998, exploring rate: 0.04009840000004086, loss: 0.027145657688379288
[18160] time live:77, cumulated reward: 7.699999999999989, exploring rate: 0.040065400000040885, loss: 0.4956280291080475
[18170] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.04003240000004091, loss: 0.014662286266684532
[18180] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.03999940000004093, loss: 0.059068307280540466
[18190] time live

 99%|█████████▉| 134/135 [00:01<00:00, 77.02it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-18200.webm 






[18210] time live:139, cumulated reward: 15.89999999999997, exploring rate: 0.039900400000041, loss: 0.08864764869213104
[18220] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.03986740000004102, loss: 0.17385147511959076
[18230] time live:75, cumulated reward: 7.499999999999989, exploring rate: 0.03983440000004104, loss: 0.08164263516664505
[18240] time live:106, cumulated reward: 11.599999999999978, exploring rate: 0.039801400000041065, loss: 0.031265340745449066
[18250] time live:71, cumulated reward: 7.099999999999991, exploring rate: 0.03976840000004109, loss: 0.0712592825293541
[18260] time live:71, cumulated reward: 7.099999999999991, exploring rate: 0.03973540000004111, loss: 0.14559513330459595
[18270] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.03970240000004113, loss: 0.15196551382541656
[18280] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.039669400000041155, loss: 0.19922128319740295
[18290] time live:1

 99%|█████████▉| 103/104 [00:01<00:00, 85.74it/s]


[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-18300.webm 

[18310] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.03957040000004122, loss: 0.04664377123117447
[18320] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.039537400000041245, loss: 0.04086395353078842
[18330] time live:87, cumulated reward: 8.699999999999985, exploring rate: 0.03950440000004127, loss: 0.33617937564849854
[18340] time live:185, cumulated reward: 21.500000000000036, exploring rate: 0.03947140000004129, loss: 0.15330784022808075
[18350] time live:75, cumulated reward: 7.499999999999989, exploring rate: 0.03943840000004131, loss: 0.04005906358361244
[18360] time live:63, cumulated reward: 5.299999999999994, exploring rate: 0.039405400000041335, loss: 0.0416000634431839
[18370] time live:67, cumulated reward: 6.699999999999992, exploring rate: 0.03937240000004136, loss: 0.026348553597927094
[18380] time live:66, cumulated reward: 6.5999999999999925, exploring rat

100%|██████████| 63/63 [00:00<00:00, 73.48it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-18400.webm 






[18410] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.03924040000004145, loss: 0.5184196829795837
[18420] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.03920740000004147, loss: 0.6703019738197327
[18430] time live:57, cumulated reward: 4.699999999999996, exploring rate: 0.03917440000004149, loss: 0.05803962051868439
[18440] time live:66, cumulated reward: 6.5999999999999925, exploring rate: 0.039141400000041515, loss: 0.21267947554588318
[18450] time live:106, cumulated reward: 11.599999999999978, exploring rate: 0.03910840000004154, loss: 0.027680162340402603
[18460] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.03907540000004156, loss: 0.12899024784564972
[18470] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.03904240000004158, loss: 0.01977788656949997
[18480] time live:73, cumulated reward: 7.29999999999999, exploring rate: 0.039009400000041605, loss: 0.054559048265218735
[18490] time live:10

 99%|█████████▉| 149/150 [00:01<00:00, 81.44it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-18500.webm 






[18510] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.03891040000004167, loss: 0.21310462057590485
[18520] time live:77, cumulated reward: 7.699999999999989, exploring rate: 0.038877400000041695, loss: 0.12033309042453766
[18530] time live:57, cumulated reward: 4.699999999999996, exploring rate: 0.03884440000004172, loss: 0.029514038935303688
[18540] time live:65, cumulated reward: 6.499999999999993, exploring rate: 0.03881140000004174, loss: 0.02713785320520401
[18550] time live:75, cumulated reward: 7.499999999999989, exploring rate: 0.03877840000004176, loss: 0.19517424702644348
[18560] time live:70, cumulated reward: 6.999999999999991, exploring rate: 0.038745400000041785, loss: 0.06402253359556198
[18570] time live:107, cumulated reward: 11.699999999999978, exploring rate: 0.03871240000004181, loss: 0.06217719614505768
[18580] time live:69, cumulated reward: 6.8999999999999915, exploring rate: 0.03867940000004183, loss: 0.05346706509590149
[18590] time live:

 99%|█████████▉| 143/144 [00:01<00:00, 80.25it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-18600.webm 






[18610] time live:67, cumulated reward: 6.699999999999992, exploring rate: 0.0385804000000419, loss: 0.11718982458114624
[18620] time live:105, cumulated reward: 11.499999999999979, exploring rate: 0.03854740000004192, loss: 0.02542153373360634
[18630] time live:75, cumulated reward: 7.499999999999989, exploring rate: 0.03851440000004194, loss: 0.01854851469397545
[18640] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.038481400000041965, loss: 0.04470231756567955
[18650] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.03844840000004199, loss: 0.1660587042570114
[18660] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.03841540000004201, loss: 0.024076154455542564
[18670] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.03838240000004203, loss: 0.22273778915405273
[18680] time live:75, cumulated reward: 7.499999999999989, exploring rate: 0.038349400000042055, loss: 0.01751713827252388
[18690] time live:6

100%|██████████| 63/63 [00:00<00:00, 78.84it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-18700.webm 






[18710] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.03825040000004212, loss: 0.07402817159891129
[18720] time live:77, cumulated reward: 7.699999999999989, exploring rate: 0.038217400000042145, loss: 0.032224856317043304
[18730] time live:75, cumulated reward: 7.499999999999989, exploring rate: 0.03818440000004217, loss: 0.029299693182110786
[18740] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.03815140000004219, loss: 0.30988726019859314
[18750] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.03811840000004221, loss: 0.014067288488149643
[18760] time live:76, cumulated reward: 7.599999999999989, exploring rate: 0.038085400000042235, loss: 0.13243664801120758
[18770] time live:72, cumulated reward: 7.19999999999999, exploring rate: 0.03805240000004226, loss: 0.3468761146068573
[18780] time live:64, cumulated reward: 5.399999999999993, exploring rate: 0.03801940000004228, loss: 0.01802339404821396
[18790] time live:97,

 99%|█████████▊| 66/67 [00:00<00:00, 73.43it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-18800.webm 






[18810] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.03792040000004235, loss: 0.020023148506879807
[18820] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.03788740000004237, loss: 0.015185270458459854
[18830] time live:71, cumulated reward: 7.099999999999991, exploring rate: 0.03785440000004239, loss: 0.17031510174274445
[18840] time live:174, cumulated reward: 19.40000000000002, exploring rate: 0.037821400000042416, loss: 0.09652349352836609
[18850] time live:189, cumulated reward: 21.90000000000004, exploring rate: 0.03778840000004244, loss: 0.035718027502298355
[18860] time live:106, cumulated reward: 11.599999999999978, exploring rate: 0.03775540000004246, loss: 0.0863005518913269
[18870] time live:75, cumulated reward: 7.499999999999989, exploring rate: 0.03772240000004248, loss: 0.051176074892282486
[18880] time live:69, cumulated reward: 6.8999999999999915, exploring rate: 0.037689400000042506, loss: 0.13682374358177185
[18890] time

 99%|█████████▉| 106/107 [00:01<00:00, 85.09it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-18900.webm 






[18910] time live:72, cumulated reward: 7.19999999999999, exploring rate: 0.03759040000004257, loss: 0.03494773060083389
[18920] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.037557400000042596, loss: 0.07761023193597794
[18930] time live:70, cumulated reward: 6.999999999999991, exploring rate: 0.03752440000004262, loss: 0.035472121089696884
[18940] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.03749140000004264, loss: 0.8610893487930298
[18950] time live:134, cumulated reward: 14.399999999999968, exploring rate: 0.03745840000004266, loss: 0.08504866063594818
[18960] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.037425400000042686, loss: 0.029691211879253387
[18970] time live:99, cumulated reward: 9.89999999999998, exploring rate: 0.03739240000004271, loss: 0.27276960015296936
[18980] time live:66, cumulated reward: 6.5999999999999925, exploring rate: 0.03735940000004273, loss: 0.015394672751426697
[18990] time live:6

100%|██████████| 63/63 [00:00<00:00, 73.26it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-19000.webm 






[19010] time live:76, cumulated reward: 7.599999999999989, exploring rate: 0.0372604000000428, loss: 0.01701977103948593
[19020] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.03722740000004282, loss: 0.07655353099107742
[19030] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.03719440000004284, loss: 0.1753421276807785
[19040] time live:64, cumulated reward: 5.399999999999993, exploring rate: 0.037161400000042866, loss: 0.19811801612377167
[19050] time live:217, cumulated reward: 25.70000000000008, exploring rate: 0.03712840000004289, loss: 0.02677483670413494
[19060] time live:71, cumulated reward: 7.099999999999991, exploring rate: 0.03709540000004291, loss: 0.07438881695270538
[19070] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.03706240000004293, loss: 0.04096484184265137
[19080] time live:140, cumulated reward: 15.999999999999972, exploring rate: 0.037029400000042956, loss: 0.26790493726730347
[19090] time live:1

 99%|█████████▊| 68/69 [00:00<00:00, 74.04it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-19100.webm 






[19110] time live:89, cumulated reward: 8.899999999999984, exploring rate: 0.03693040000004302, loss: 0.20867852866649628
[19120] time live:77, cumulated reward: 7.699999999999989, exploring rate: 0.036897400000043046, loss: 0.01631578989326954
[19130] time live:64, cumulated reward: 5.399999999999993, exploring rate: 0.03686440000004307, loss: 0.09392145276069641
[19140] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.03683140000004309, loss: 0.029578013345599174
[19150] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.03679840000004311, loss: 0.02955922670662403
[19160] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.036765400000043136, loss: 0.014058172702789307
[19170] time live:111, cumulated reward: 12.099999999999977, exploring rate: 0.03673240000004316, loss: 0.27050548791885376
[19180] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.03669940000004318, loss: 0.19728845357894897
[19190] time l

 99%|█████████▉| 106/107 [00:01<00:00, 86.15it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-19200.webm 






[19210] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.03660040000004325, loss: 0.033548012375831604
[19220] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.03656740000004327, loss: 0.05007576197385788
[19230] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.03653440000004329, loss: 0.08543214946985245
[19240] time live:68, cumulated reward: 6.799999999999992, exploring rate: 0.036501400000043316, loss: 0.02455579861998558
[19250] time live:73, cumulated reward: 7.29999999999999, exploring rate: 0.03646840000004334, loss: 0.09486870467662811
[19260] time live:102, cumulated reward: 11.19999999999998, exploring rate: 0.03643540000004336, loss: 0.07882845401763916
[19270] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.03640240000004338, loss: 0.03294662758708
[19280] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.036369400000043406, loss: 0.026331763714551926
[19290] time live:63, c

100%|██████████| 108/108 [00:01<00:00, 77.45it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-19300.webm 






[19310] time live:106, cumulated reward: 11.599999999999978, exploring rate: 0.03627040000004347, loss: 0.028393391519784927
[19320] time live:101, cumulated reward: 11.09999999999998, exploring rate: 0.036237400000043496, loss: 0.07634049654006958
[19330] time live:74, cumulated reward: 7.39999999999999, exploring rate: 0.03620440000004352, loss: 0.20700225234031677
[19340] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.03617140000004354, loss: 0.28135600686073303
[19350] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.03613840000004356, loss: 0.13389840722084045
[19360] time live:74, cumulated reward: 7.39999999999999, exploring rate: 0.036105400000043586, loss: 0.12499283254146576
[19370] time live:74, cumulated reward: 7.39999999999999, exploring rate: 0.03607240000004361, loss: 0.13667653501033783
[19380] time live:67, cumulated reward: 6.699999999999992, exploring rate: 0.03603940000004363, loss: 0.09209669381380081
[19390] time live:133

100%|██████████| 63/63 [00:00<00:00, 70.31it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-19400.webm 






[19410] time live:65, cumulated reward: 6.499999999999993, exploring rate: 0.0359404000000437, loss: 0.3840758502483368
[19420] time live:69, cumulated reward: 6.8999999999999915, exploring rate: 0.03590740000004372, loss: 0.011845419183373451
[19430] time live:64, cumulated reward: 5.399999999999993, exploring rate: 0.035874400000043744, loss: 0.11687199771404266
[19440] time live:72, cumulated reward: 7.19999999999999, exploring rate: 0.035841400000043766, loss: 0.02174609713256359
[19450] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.03580840000004379, loss: 0.015227408148348331
[19460] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.03577540000004381, loss: 0.0497630275785923
[19470] time live:66, cumulated reward: 6.5999999999999925, exploring rate: 0.035742400000043834, loss: 0.029047377407550812
[19480] time live:105, cumulated reward: 11.499999999999979, exploring rate: 0.035709400000043856, loss: 0.10716922581195831
[19490] time live

 99%|█████████▊| 78/79 [00:00<00:00, 79.23it/s]


[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-19500.webm 

[19510] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.035610400000043924, loss: 0.10639457404613495
[19520] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.035577400000043946, loss: 0.03151891753077507
[19530] time live:68, cumulated reward: 6.799999999999992, exploring rate: 0.03554440000004397, loss: 0.17862939834594727
[19540] time live:73, cumulated reward: 7.29999999999999, exploring rate: 0.03551140000004399, loss: 0.04209592938423157
[19550] time live:77, cumulated reward: 7.699999999999989, exploring rate: 0.035478400000044014, loss: 0.02764129638671875
[19560] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.035445400000044036, loss: 0.014831390231847763
[19570] time live:65, cumulated reward: 6.499999999999993, exploring rate: 0.03541240000004406, loss: 0.25879809260368347
[19580] time live:134, cumulated reward: 14.399999999999968, exploring ra

 99%|█████████▉| 134/135 [00:01<00:00, 78.39it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-19600.webm 






[19610] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.03528040000004415, loss: 0.0632859468460083
[19620] time live:71, cumulated reward: 7.099999999999991, exploring rate: 0.03524740000004417, loss: 0.04749266803264618
[19630] time live:71, cumulated reward: 7.099999999999991, exploring rate: 0.035214400000044194, loss: 0.564102828502655
[19640] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.035181400000044216, loss: 0.05393974483013153
[19650] time live:64, cumulated reward: 5.399999999999993, exploring rate: 0.03514840000004424, loss: 0.18795762956142426
[19660] time live:76, cumulated reward: 7.599999999999989, exploring rate: 0.03511540000004426, loss: 0.04936753213405609
[19670] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.035082400000044284, loss: 0.4228895604610443
[19680] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.035049400000044306, loss: 0.02136199362576008
[19690] time live:61, cu

 99%|█████████▊| 71/72 [00:01<00:00, 63.10it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-19700.webm 






[19710] time live:67, cumulated reward: 6.699999999999992, exploring rate: 0.034950400000044374, loss: 0.10155966132879257
[19720] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.034917400000044396, loss: 0.02607019990682602
[19730] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.03488440000004442, loss: 0.29762840270996094
[19740] time live:70, cumulated reward: 6.999999999999991, exploring rate: 0.03485140000004444, loss: 0.03257933631539345
[19750] time live:65, cumulated reward: 6.499999999999993, exploring rate: 0.034818400000044464, loss: 0.027290303260087967
[19760] time live:62, cumulated reward: 5.199999999999994, exploring rate: 0.034785400000044486, loss: 0.016790566965937614
[19770] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.03475240000004451, loss: 0.02291150763630867
[19780] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.03471940000004453, loss: 0.1490413397550583
[19790] time live:6

 99%|█████████▉| 99/100 [00:01<00:00, 55.29it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-19800.webm 






[19810] time live:71, cumulated reward: 7.099999999999991, exploring rate: 0.0346204000000446, loss: 0.05337660387158394
[19820] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.03458740000004462, loss: 0.10661065578460693
[19830] time live:69, cumulated reward: 6.8999999999999915, exploring rate: 0.034554400000044644, loss: 0.24478019773960114
[19840] time live:73, cumulated reward: 7.29999999999999, exploring rate: 0.034521400000044666, loss: 0.015668300911784172
[19850] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.03448840000004469, loss: 0.17784345149993896
[19860] time live:135, cumulated reward: 14.499999999999968, exploring rate: 0.03445540000004471, loss: 0.023161236196756363
[19870] time live:75, cumulated reward: 7.499999999999989, exploring rate: 0.034422400000044734, loss: 0.25001347064971924
[19880] time live:75, cumulated reward: 7.499999999999989, exploring rate: 0.034389400000044756, loss: 0.11590219289064407
[19890] time live

 99%|█████████▉| 98/99 [00:01<00:00, 71.75it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-19900.webm 






[19910] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.034290400000044824, loss: 0.3381696343421936
[19920] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.034257400000044846, loss: 0.021635189652442932
[19930] time live:174, cumulated reward: 19.40000000000002, exploring rate: 0.03422440000004487, loss: 0.0416090302169323
[19940] time live:74, cumulated reward: 7.39999999999999, exploring rate: 0.03419140000004489, loss: 0.06008128076791763
[19950] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.034158400000044914, loss: 0.3545496463775635
[19960] time live:67, cumulated reward: 6.699999999999992, exploring rate: 0.034125400000044936, loss: 0.20231258869171143
[19970] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.03409240000004496, loss: 0.036026354879140854
[19980] time live:72, cumulated reward: 7.19999999999999, exploring rate: 0.03405940000004498, loss: 0.032022103667259216
[19990] time live:7

 98%|█████████▊| 65/66 [00:00<00:00, 81.71it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-20000.webm 






[20010] time live:77, cumulated reward: 7.699999999999989, exploring rate: 0.03396040000004505, loss: 0.05125615745782852
[20020] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.03392740000004507, loss: 0.12418776005506516
[20030] time live:181, cumulated reward: 21.10000000000003, exploring rate: 0.033894400000045094, loss: 0.07237187027931213
[20040] time live:64, cumulated reward: 5.399999999999993, exploring rate: 0.033861400000045117, loss: 0.027717165648937225
[20050] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.03382840000004514, loss: 0.031969379633665085
[20060] time live:67, cumulated reward: 6.699999999999992, exploring rate: 0.03379540000004516, loss: 0.17305225133895874
[20070] time live:74, cumulated reward: 7.39999999999999, exploring rate: 0.033762400000045184, loss: 0.017265180125832558
[20080] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.03372940000004521, loss: 0.3177732825279236
[20090] time live

 99%|█████████▊| 75/76 [00:01<00:00, 72.51it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-20100.webm 






[20110] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.033630400000045274, loss: 0.024154191836714745
[20120] time live:63, cumulated reward: 5.299999999999994, exploring rate: 0.0335974000000453, loss: 0.11062591522932053
[20130] time live:63, cumulated reward: 5.299999999999994, exploring rate: 0.03356440000004532, loss: 0.0391414538025856
[20140] time live:135, cumulated reward: 14.499999999999968, exploring rate: 0.03353140000004534, loss: 0.01515671331435442
[20150] time live:148, cumulated reward: 16.799999999999983, exploring rate: 0.033498400000045364, loss: 0.11240103840827942
[20160] time live:113, cumulated reward: 12.299999999999976, exploring rate: 0.03346540000004539, loss: 0.015519784763455391
[20170] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.03343240000004541, loss: 0.035892605781555176
[20180] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.03339940000004543, loss: 0.03131229430437088
[20190] time li

 99%|█████████▉| 99/100 [00:01<00:00, 83.59it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-20200.webm 






[20210] time live:110, cumulated reward: 11.999999999999977, exploring rate: 0.0333004000000455, loss: 0.03170320764183998
[20220] time live:71, cumulated reward: 7.099999999999991, exploring rate: 0.03326740000004552, loss: 0.02604161761701107
[20230] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.033234400000045544, loss: 0.5643476843833923
[20240] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.03320140000004557, loss: 0.015020252205431461
[20250] time live:184, cumulated reward: 21.400000000000034, exploring rate: 0.03316840000004559, loss: 0.07772472500801086
[20260] time live:67, cumulated reward: 6.699999999999992, exploring rate: 0.03313540000004561, loss: 0.050253577530384064
[20270] time live:73, cumulated reward: 7.29999999999999, exploring rate: 0.033102400000045634, loss: 0.040516115725040436
[20280] time live:110, cumulated reward: 11.999999999999977, exploring rate: 0.03306940000004566, loss: 0.07754949480295181
[20290] time liv

 99%|█████████▉| 182/183 [00:02<00:00, 80.94it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-20300.webm 






[20310] time live:74, cumulated reward: 7.39999999999999, exploring rate: 0.032970400000045724, loss: 0.054604850709438324
[20320] time live:73, cumulated reward: 7.29999999999999, exploring rate: 0.03293740000004575, loss: 0.021355213597416878
[20330] time live:175, cumulated reward: 19.50000000000002, exploring rate: 0.03290440000004577, loss: 0.07427766174077988
[20340] time live:72, cumulated reward: 7.19999999999999, exploring rate: 0.03287140000004579, loss: 0.04497447609901428
[20350] time live:74, cumulated reward: 7.39999999999999, exploring rate: 0.032838400000045814, loss: 0.08336130529642105
[20360] time live:68, cumulated reward: 6.799999999999992, exploring rate: 0.03280540000004584, loss: 0.1341044008731842
[20370] time live:76, cumulated reward: 7.599999999999989, exploring rate: 0.03277240000004586, loss: 0.28077957034111023
[20380] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.03273940000004588, loss: 0.027161557227373123
[20390] time live:61,

 99%|█████████▉| 175/176 [00:02<00:00, 77.76it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-20400.webm 






[20410] time live:98, cumulated reward: 9.799999999999981, exploring rate: 0.03264040000004595, loss: 0.02491992712020874
[20420] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.03260740000004597, loss: 0.2183150053024292
[20430] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.032574400000045994, loss: 0.0913228914141655
[20440] time live:107, cumulated reward: 11.699999999999978, exploring rate: 0.03254140000004602, loss: 0.07819899916648865
[20450] time live:64, cumulated reward: 5.399999999999993, exploring rate: 0.03250840000004604, loss: 0.19407731294631958
[20460] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.03247540000004606, loss: 0.08972331881523132
[20470] time live:112, cumulated reward: 12.199999999999976, exploring rate: 0.032442400000046084, loss: 0.021106209605932236
[20480] time live:68, cumulated reward: 6.799999999999992, exploring rate: 0.03240940000004611, loss: 0.013879738748073578
[20490] time

100%|██████████| 247/247 [00:03<00:00, 76.72it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-20500.webm 






[20510] time live:70, cumulated reward: 6.999999999999991, exploring rate: 0.032310400000046174, loss: 0.21594053506851196
[20520] time live:71, cumulated reward: 7.099999999999991, exploring rate: 0.0322774000000462, loss: 0.05726777762174606
[20530] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.03224440000004622, loss: 0.1729208528995514
[20540] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.03221140000004624, loss: 0.2604941129684448
[20550] time live:66, cumulated reward: 6.5999999999999925, exploring rate: 0.032178400000046264, loss: 0.026872649788856506
[20560] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.03214540000004629, loss: 0.16661201417446136
[20570] time live:69, cumulated reward: 6.8999999999999915, exploring rate: 0.03211240000004631, loss: 0.24030998349189758
[20580] time live:106, cumulated reward: 11.599999999999978, exploring rate: 0.03207940000004633, loss: 0.01830662228167057
[20590] time live:97

 99%|█████████▉| 103/104 [00:01<00:00, 74.38it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-20600.webm 






[20610] time live:110, cumulated reward: 11.999999999999977, exploring rate: 0.0319804000000464, loss: 0.021517159417271614
[20620] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.03194740000004642, loss: 0.019685957580804825
[20630] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.031914400000046445, loss: 0.24921779334545135
[20640] time live:70, cumulated reward: 6.999999999999991, exploring rate: 0.03188140000004647, loss: 0.05227769911289215
[20650] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.03184840000004649, loss: 0.054847583174705505
[20660] time live:63, cumulated reward: 5.299999999999994, exploring rate: 0.03181540000004651, loss: 0.05689787492156029
[20670] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.031782400000046535, loss: 0.18802346289157867
[20680] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.03174940000004656, loss: 0.02183859422802925
[20690] time live:

100%|██████████| 63/63 [00:00<00:00, 67.71it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-20700.webm 






[20710] time live:77, cumulated reward: 7.699999999999989, exploring rate: 0.031650400000046625, loss: 0.0163884237408638
[20720] time live:104, cumulated reward: 11.399999999999979, exploring rate: 0.03161740000004665, loss: 0.017421437427401543
[20730] time live:66, cumulated reward: 6.5999999999999925, exploring rate: 0.03158440000004667, loss: 0.397960364818573
[20740] time live:76, cumulated reward: 7.599999999999989, exploring rate: 0.03155140000004669, loss: 0.12797561287879944
[20750] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.031518400000046715, loss: 0.2993367612361908
[20760] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.03148540000004674, loss: 0.022764408960938454
[20770] time live:210, cumulated reward: 24.00000000000007, exploring rate: 0.03145240000004676, loss: 0.5341922640800476
[20780] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.03141940000004678, loss: 0.4418696165084839
[20790] time live:133,

 99%|█████████▊| 69/70 [00:01<00:00, 66.88it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-20800.webm 






[20810] time live:75, cumulated reward: 7.499999999999989, exploring rate: 0.03132040000004685, loss: 0.5219129920005798
[20820] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.03128740000004687, loss: 0.18176762759685516
[20830] time live:113, cumulated reward: 12.299999999999976, exploring rate: 0.031254400000046895, loss: 0.08562711626291275
[20840] time live:75, cumulated reward: 7.499999999999989, exploring rate: 0.031221400000046886, loss: 0.01843264326453209
[20850] time live:75, cumulated reward: 7.499999999999989, exploring rate: 0.031188400000046874, loss: 0.4866279065608978
[20860] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.03115540000004686, loss: 0.07558896392583847
[20870] time live:104, cumulated reward: 11.399999999999979, exploring rate: 0.03112240000004685, loss: 0.025107339024543762
[20880] time live:148, cumulated reward: 16.799999999999983, exploring rate: 0.031089400000046837, loss: 0.031186524778604507
[20890] time l

 99%|█████████▉| 98/99 [00:01<00:00, 77.90it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-20900.webm 






[20910] time live:71, cumulated reward: 7.099999999999991, exploring rate: 0.0309904000000468, loss: 0.014755534939467907
[20920] time live:76, cumulated reward: 7.599999999999989, exploring rate: 0.03095740000004679, loss: 0.04238072782754898
[20930] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.030924400000046776, loss: 0.14537887275218964
[20940] time live:69, cumulated reward: 6.8999999999999915, exploring rate: 0.030891400000046764, loss: 0.04241817817091942
[20950] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.030858400000046752, loss: 0.03750128298997879
[20960] time live:104, cumulated reward: 11.399999999999979, exploring rate: 0.03082540000004674, loss: 0.04581008851528168
[20970] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.030792400000046728, loss: 0.025565430521965027
[20980] time live:72, cumulated reward: 7.19999999999999, exploring rate: 0.030759400000046715, loss: 0.16912630200386047
[20990] time liv

100%|█████████▉| 211/212 [00:03<00:00, 69.64it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-21000.webm 






[21010] time live:75, cumulated reward: 7.499999999999989, exploring rate: 0.03066040000004668, loss: 0.1573241651058197
[21020] time live:181, cumulated reward: 21.10000000000003, exploring rate: 0.030627400000046667, loss: 0.02293935790657997
[21030] time live:224, cumulated reward: 26.40000000000009, exploring rate: 0.030594400000046654, loss: 0.008137822151184082
[21040] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.030561400000046642, loss: 0.037075575441122055
[21050] time live:109, cumulated reward: 11.899999999999977, exploring rate: 0.03052840000004663, loss: 0.49202466011047363
[21060] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.030495400000046618, loss: 0.09748663008213043
[21070] time live:99, cumulated reward: 9.89999999999998, exploring rate: 0.030462400000046606, loss: 0.25721150636672974
[21080] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.030429400000046593, loss: 0.09549200534820557
[21090] ti

 99%|█████████▉| 134/135 [00:01<00:00, 72.78it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-21100.webm 






[21110] time live:210, cumulated reward: 24.00000000000007, exploring rate: 0.030330400000046557, loss: 0.04672028124332428
[21120] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.030297400000046545, loss: 0.04118366539478302
[21130] time live:72, cumulated reward: 7.19999999999999, exploring rate: 0.030264400000046533, loss: 0.08544619381427765
[21140] time live:67, cumulated reward: 6.699999999999992, exploring rate: 0.03023140000004652, loss: 0.020257892087101936
[21150] time live:326, cumulated reward: 38.600000000000236, exploring rate: 0.030198400000046508, loss: 0.3822682499885559
[21160] time live:104, cumulated reward: 11.399999999999979, exploring rate: 0.030165400000046496, loss: 0.02578621357679367
[21170] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.030132400000046484, loss: 0.024543490260839462
[21180] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.03009940000004647, loss: 0.23939403891563416
[21190] tim

 99%|█████████▊| 72/73 [00:01<00:00, 68.99it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-21200.webm 






[21210] time live:103, cumulated reward: 11.29999999999998, exploring rate: 0.030000400000046435, loss: 0.0789945125579834
[21220] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.029967400000046423, loss: 0.07772783190011978
[21230] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.02993440000004641, loss: 0.029720325022935867
[21240] time live:70, cumulated reward: 6.999999999999991, exploring rate: 0.0299014000000464, loss: 0.5043761134147644
[21250] time live:98, cumulated reward: 9.799999999999981, exploring rate: 0.029868400000046386, loss: 0.1642766147851944
[21260] time live:63, cumulated reward: 5.299999999999994, exploring rate: 0.029835400000046374, loss: 0.048799484968185425
[21270] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.029802400000046362, loss: 0.15507714450359344
[21280] time live:71, cumulated reward: 7.099999999999991, exploring rate: 0.02976940000004635, loss: 0.05827543884515762
[21290] time live:14

 99%|█████████▉| 134/135 [00:01<00:00, 70.19it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-21300.webm 






[21310] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.029670400000046313, loss: 0.06898211687803268
[21320] time live:138, cumulated reward: 15.799999999999969, exploring rate: 0.0296374000000463, loss: 0.07150710374116898
[21330] time live:99, cumulated reward: 9.89999999999998, exploring rate: 0.02960440000004629, loss: 0.25923410058021545
[21340] time live:142, cumulated reward: 16.199999999999974, exploring rate: 0.029571400000046277, loss: 0.04676719754934311
[21350] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.029538400000046264, loss: 0.0317455418407917
[21360] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.029505400000046252, loss: 0.16904374957084656
[21370] time live:101, cumulated reward: 11.09999999999998, exploring rate: 0.02947240000004624, loss: 0.036643240600824356
[21380] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.029439400000046228, loss: 0.018697036430239677
[21390] time 

 99%|█████████▉| 101/102 [00:01<00:00, 72.38it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-21400.webm 






[21410] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.02934040000004619, loss: 0.08166996389627457
[21420] time live:71, cumulated reward: 7.099999999999991, exploring rate: 0.02930740000004618, loss: 0.062445588409900665
[21430] time live:72, cumulated reward: 7.19999999999999, exploring rate: 0.029274400000046167, loss: 0.049521517008543015
[21440] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.029241400000046155, loss: 0.07805518805980682
[21450] time live:75, cumulated reward: 7.499999999999989, exploring rate: 0.029208400000046143, loss: 0.13791610300540924
[21460] time live:75, cumulated reward: 7.499999999999989, exploring rate: 0.02917540000004613, loss: 0.027538537979125977
[21470] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.029142400000046118, loss: 0.06625865399837494
[21480] time live:66, cumulated reward: 6.5999999999999925, exploring rate: 0.029109400000046106, loss: 0.21745622158050537
[21490] time liv

 99%|█████████▉| 98/99 [00:01<00:00, 72.78it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-21500.webm 






[21510] time live:71, cumulated reward: 7.099999999999991, exploring rate: 0.02901040000004607, loss: 0.07865479588508606
[21520] time live:89, cumulated reward: 8.899999999999984, exploring rate: 0.028977400000046057, loss: 0.03771787881851196
[21530] time live:75, cumulated reward: 7.499999999999989, exploring rate: 0.028944400000046045, loss: 0.07510949671268463
[21540] time live:69, cumulated reward: 6.8999999999999915, exploring rate: 0.028911400000046033, loss: 0.03527975082397461
[21550] time live:77, cumulated reward: 7.699999999999989, exploring rate: 0.02887840000004602, loss: 0.4973820149898529
[21560] time live:71, cumulated reward: 7.099999999999991, exploring rate: 0.02884540000004601, loss: 0.286557137966156
[21570] time live:141, cumulated reward: 16.099999999999973, exploring rate: 0.028812400000045996, loss: 0.08201771229505539
[21580] time live:77, cumulated reward: 7.699999999999989, exploring rate: 0.028779400000045984, loss: 0.01994161680340767
[21590] time live:6

 99%|█████████▉| 129/130 [00:01<00:00, 73.55it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-21600.webm 






[21610] time live:72, cumulated reward: 7.19999999999999, exploring rate: 0.028680400000045948, loss: 0.04279942065477371
[21620] time live:176, cumulated reward: 19.600000000000023, exploring rate: 0.028647400000045935, loss: 0.38306334614753723
[21630] time live:101, cumulated reward: 11.09999999999998, exploring rate: 0.028614400000045923, loss: 0.021460941061377525
[21640] time live:75, cumulated reward: 7.499999999999989, exploring rate: 0.02858140000004591, loss: 0.04332765191793442
[21650] time live:73, cumulated reward: 7.29999999999999, exploring rate: 0.0285484000000459, loss: 0.23890788853168488
[21660] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.028515400000045887, loss: 0.3529650866985321
[21670] time live:67, cumulated reward: 6.699999999999992, exploring rate: 0.028482400000045874, loss: 0.03618580847978592
[21680] time live:76, cumulated reward: 7.599999999999989, exploring rate: 0.028449400000045862, loss: 0.053240858018398285
[21690] time li

 99%|█████████▉| 98/99 [00:01<00:00, 67.40it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-21700.webm 






[21710] time live:71, cumulated reward: 7.099999999999991, exploring rate: 0.028350400000045826, loss: 0.08434417098760605
[21720] time live:108, cumulated reward: 11.799999999999978, exploring rate: 0.028317400000045814, loss: 0.045857369899749756
[21730] time live:142, cumulated reward: 16.199999999999974, exploring rate: 0.0282844000000458, loss: 0.6130228042602539
[21740] time live:111, cumulated reward: 12.099999999999977, exploring rate: 0.02825140000004579, loss: 0.03887980058789253
[21750] time live:66, cumulated reward: 6.5999999999999925, exploring rate: 0.028218400000045777, loss: 0.01857423596084118
[21760] time live:98, cumulated reward: 9.799999999999981, exploring rate: 0.028185400000045765, loss: 0.054247915744781494
[21770] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.028152400000045753, loss: 0.07349485158920288
[21780] time live:111, cumulated reward: 12.099999999999977, exploring rate: 0.02811940000004574, loss: 0.08934050053358078
[21790] ti

 99%|█████████▉| 87/88 [00:01<00:00, 68.74it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-21800.webm 






[21810] time live:103, cumulated reward: 11.29999999999998, exploring rate: 0.028020400000045704, loss: 0.0387372225522995
[21820] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.02798740000004569, loss: 0.018000442534685135
[21830] time live:108, cumulated reward: 11.799999999999978, exploring rate: 0.02795440000004568, loss: 0.026295805349946022
[21840] time live:365, cumulated reward: 44.50000000000029, exploring rate: 0.027921400000045667, loss: 0.11419324576854706
[21850] time live:76, cumulated reward: 7.599999999999989, exploring rate: 0.027888400000045655, loss: 0.04268823191523552
[21860] time live:210, cumulated reward: 24.00000000000007, exploring rate: 0.027855400000045643, loss: 0.02312256395816803
[21870] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.02782240000004563, loss: 0.10211840271949768
[21880] time live:102, cumulated reward: 11.19999999999998, exploring rate: 0.02778940000004562, loss: 0.12761461734771729
[21890] time 

100%|██████████| 63/63 [00:01<00:00, 59.96it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-21900.webm 






[21910] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.027690400000045582, loss: 0.18290069699287415
[21920] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.02765740000004557, loss: 0.08717077970504761
[21930] time live:77, cumulated reward: 7.699999999999989, exploring rate: 0.027624400000045558, loss: 0.17006923258304596
[21940] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.027591400000045545, loss: 0.050568047910928726
[21950] time live:113, cumulated reward: 12.299999999999976, exploring rate: 0.027558400000045533, loss: 0.02235446125268936
[21960] time live:147, cumulated reward: 16.69999999999998, exploring rate: 0.02752540000004552, loss: 0.026199588552117348
[21970] time live:71, cumulated reward: 7.099999999999991, exploring rate: 0.02749240000004551, loss: 0.05728386342525482
[21980] time live:111, cumulated reward: 12.099999999999977, exploring rate: 0.027459400000045497, loss: 0.14697395265102386
[21990] 

100%|██████████| 78/78 [00:01<00:00, 63.22it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-22000.webm 






[22010] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.02736040000004546, loss: 0.06039223447442055
[22020] time live:74, cumulated reward: 7.39999999999999, exploring rate: 0.027327400000045448, loss: 0.210248664021492
[22030] time live:174, cumulated reward: 19.40000000000002, exploring rate: 0.027294400000045436, loss: 0.39312368631362915
[22040] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.027261400000045424, loss: 0.0730503648519516
[22050] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.02722840000004541, loss: 0.053680870682001114
[22060] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0271954000000454, loss: 0.016076236963272095
[22070] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.027162400000045387, loss: 0.4846477806568146
[22080] time live:76, cumulated reward: 7.599999999999989, exploring rate: 0.027129400000045375, loss: 0.02186259999871254
[22090] time live:97, 

 99%|█████████▉| 135/136 [00:01<00:00, 74.25it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-22100.webm 






[22110] time live:174, cumulated reward: 19.40000000000002, exploring rate: 0.02703040000004534, loss: 0.11277376860380173
[22120] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.026997400000045326, loss: 0.030779512599110603
[22130] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.026964400000045314, loss: 0.23694637417793274
[22140] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.026931400000045302, loss: 0.08647105097770691
[22150] time live:175, cumulated reward: 19.50000000000002, exploring rate: 0.02689840000004529, loss: 0.07358498871326447
[22160] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.026865400000045277, loss: 0.1259087324142456
[22170] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.026832400000045265, loss: 0.14836695790290833
[22180] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.026799400000045253, loss: 0.05187101662158966
[22190] time liv

 99%|█████████▊| 78/79 [00:01<00:00, 68.87it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-22200.webm 






[22210] time live:77, cumulated reward: 7.699999999999989, exploring rate: 0.026700400000045216, loss: 0.3748842477798462
[22220] time live:71, cumulated reward: 7.099999999999991, exploring rate: 0.026667400000045204, loss: 0.029014870524406433
[22230] time live:71, cumulated reward: 7.099999999999991, exploring rate: 0.026634400000045192, loss: 0.09505265951156616
[22240] time live:71, cumulated reward: 7.099999999999991, exploring rate: 0.02660140000004518, loss: 0.09929216653108597
[22250] time live:74, cumulated reward: 7.39999999999999, exploring rate: 0.026568400000045168, loss: 0.014212757349014282
[22260] time live:64, cumulated reward: 5.399999999999993, exploring rate: 0.026535400000045156, loss: 0.22489827871322632
[22270] time live:70, cumulated reward: 6.999999999999991, exploring rate: 0.026502400000045143, loss: 0.06030638888478279
[22280] time live:74, cumulated reward: 7.39999999999999, exploring rate: 0.02646940000004513, loss: 0.22236564755439758
[22290] time live:6

 99%|█████████▉| 98/99 [00:01<00:00, 70.72it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-22300.webm 






[22310] time live:68, cumulated reward: 6.799999999999992, exploring rate: 0.026370400000045095, loss: 0.025487184524536133
[22320] time live:72, cumulated reward: 7.19999999999999, exploring rate: 0.026337400000045082, loss: 0.2701836824417114
[22330] time live:67, cumulated reward: 6.699999999999992, exploring rate: 0.02630440000004507, loss: 0.1999395191669464
[22340] time live:72, cumulated reward: 7.19999999999999, exploring rate: 0.026271400000045058, loss: 0.06019787862896919
[22350] time live:100, cumulated reward: 9.99999999999998, exploring rate: 0.026238400000045046, loss: 0.025260072201490402
[22360] time live:102, cumulated reward: 11.19999999999998, exploring rate: 0.026205400000045034, loss: 0.04431440681219101
[22370] time live:107, cumulated reward: 11.699999999999978, exploring rate: 0.02617240000004502, loss: 0.29379817843437195
[22380] time live:77, cumulated reward: 7.699999999999989, exploring rate: 0.02613940000004501, loss: 0.036419376730918884
[22390] time live

100%|█████████▉| 222/223 [00:03<00:00, 68.45it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-22400.webm 






[22410] time live:106, cumulated reward: 11.599999999999978, exploring rate: 0.026040400000044973, loss: 0.0225527286529541
[22420] time live:106, cumulated reward: 11.599999999999978, exploring rate: 0.02600740000004496, loss: 0.3463929295539856
[22430] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.02597440000004495, loss: 0.1657579392194748
[22440] time live:67, cumulated reward: 6.699999999999992, exploring rate: 0.025941400000044936, loss: 0.013564344495534897
[22450] time live:69, cumulated reward: 6.8999999999999915, exploring rate: 0.025908400000044924, loss: 0.015039762482047081
[22460] time live:70, cumulated reward: 6.999999999999991, exploring rate: 0.025875400000044912, loss: 0.08641216158866882
[22470] time live:75, cumulated reward: 7.499999999999989, exploring rate: 0.0258424000000449, loss: 0.07282596081495285
[22480] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.025809400000044887, loss: 1.1148494482040405
[22490] time live

 99%|█████████▉| 114/115 [00:01<00:00, 63.30it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-22500.webm 






[22510] time live:102, cumulated reward: 11.19999999999998, exploring rate: 0.02571040000004485, loss: 0.8082506656646729
[22520] time live:140, cumulated reward: 15.999999999999972, exploring rate: 0.02567740000004484, loss: 0.06602035462856293
[22530] time live:68, cumulated reward: 6.799999999999992, exploring rate: 0.025644400000044826, loss: 0.5124951601028442
[22540] time live:74, cumulated reward: 7.39999999999999, exploring rate: 0.025611400000044814, loss: 0.0711374580860138
[22550] time live:100, cumulated reward: 9.99999999999998, exploring rate: 0.025578400000044802, loss: 0.10103681683540344
[22560] time live:111, cumulated reward: 12.099999999999977, exploring rate: 0.02554540000004479, loss: 0.20361262559890747
[22570] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.025512400000044778, loss: 0.08320872485637665
[22580] time live:103, cumulated reward: 11.29999999999998, exploring rate: 0.025479400000044766, loss: 0.2044239342212677
[22590] time live:

100%|█████████▉| 213/214 [00:03<00:00, 67.70it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-22600.webm 






[22610] time live:110, cumulated reward: 11.999999999999977, exploring rate: 0.02538040000004473, loss: 0.036299459636211395
[22620] time live:184, cumulated reward: 21.400000000000034, exploring rate: 0.025347400000044717, loss: 0.07121343910694122
[22630] time live:213, cumulated reward: 24.300000000000075, exploring rate: 0.025314400000044705, loss: 0.06753095984458923
[22640] time live:176, cumulated reward: 19.600000000000023, exploring rate: 0.025281400000044692, loss: 0.7350453734397888
[22650] time live:144, cumulated reward: 16.399999999999977, exploring rate: 0.02524840000004468, loss: 0.06652461737394333
[22660] time live:513, cumulated reward: 62.3000000000005, exploring rate: 0.025215400000044668, loss: 0.30614978075027466
[22670] time live:190, cumulated reward: 22.000000000000043, exploring rate: 0.025182400000044656, loss: 0.0658942386507988
[22680] time live:262, cumulated reward: 31.200000000000145, exploring rate: 0.025149400000044644, loss: 0.2381189614534378
[22690

 99%|█████████▊| 69/70 [00:01<00:00, 63.13it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-22700.webm 






[22710] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.025050400000044607, loss: 0.09832786023616791
[22720] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.025017400000044595, loss: 0.10216820985078812
[22730] time live:136, cumulated reward: 14.599999999999968, exploring rate: 0.024984400000044583, loss: 0.29482218623161316
[22740] time live:135, cumulated reward: 14.499999999999968, exploring rate: 0.02495140000004457, loss: 0.043472059071063995
[22750] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.02491840000004456, loss: 0.24107098579406738
[22760] time live:69, cumulated reward: 6.8999999999999915, exploring rate: 0.024885400000044546, loss: 0.07419021427631378
[22770] time live:69, cumulated reward: 6.8999999999999915, exploring rate: 0.024852400000044534, loss: 0.04556434974074364
[22780] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.024819400000044522, loss: 0.9953639507293701
[22790] time

 99%|█████████▊| 78/79 [00:01<00:00, 71.77it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-22800.webm 






[22810] time live:145, cumulated reward: 16.49999999999998, exploring rate: 0.024720400000044485, loss: 0.04776925966143608
[22820] time live:323, cumulated reward: 38.30000000000023, exploring rate: 0.024687400000044473, loss: 0.05392557382583618
[22830] time live:289, cumulated reward: 33.90000000000018, exploring rate: 0.02465440000004446, loss: 0.2291877567768097
[22840] time live:72, cumulated reward: 7.19999999999999, exploring rate: 0.02462140000004445, loss: 0.2593192756175995
[22850] time live:189, cumulated reward: 21.90000000000004, exploring rate: 0.024588400000044437, loss: 0.020806938409805298
[22860] time live:77, cumulated reward: 7.699999999999989, exploring rate: 0.024555400000044424, loss: 1.420636773109436
[22870] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.024522400000044412, loss: 0.06737716495990753
[22880] time live:105, cumulated reward: 11.499999999999979, exploring rate: 0.0244894000000444, loss: 0.0412566103041172
[22890] time live:9

 99%|█████████▉| 142/143 [00:02<00:00, 70.47it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-22900.webm 






[22910] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.024390400000044363, loss: 0.05947323516011238
[22920] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.02435740000004435, loss: 0.025487437844276428
[22930] time live:104, cumulated reward: 11.399999999999979, exploring rate: 0.02432440000004434, loss: 0.04865449294447899
[22940] time live:111, cumulated reward: 12.099999999999977, exploring rate: 0.024291400000044327, loss: 0.03249334543943405
[22950] time live:77, cumulated reward: 7.699999999999989, exploring rate: 0.024258400000044315, loss: 0.031896211206912994
[22960] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.024225400000044302, loss: 0.13881975412368774
[22970] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.02419240000004429, loss: 1.0144282579421997
[22980] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.024159400000044278, loss: 0.015093741938471794
[22990] time 

 95%|█████████▌| 19/20 [00:00<00:00, 65.72it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-23000.webm 






[23010] time live:18, cumulated reward: 0.8000000000000007, exploring rate: 0.02406040000004424, loss: 0.5312071442604065
[23020] time live:34, cumulated reward: 2.400000000000002, exploring rate: 0.02402740000004423, loss: 0.4828850030899048
[23030] time live:55, cumulated reward: 4.4999999999999964, exploring rate: 0.023994400000044217, loss: 0.10837890952825546
[23040] time live:75, cumulated reward: 7.499999999999989, exploring rate: 0.023961400000044205, loss: 0.15386033058166504
[23050] time live:62, cumulated reward: 5.199999999999994, exploring rate: 0.023928400000044193, loss: 0.029217440634965897
[23060] time live:37, cumulated reward: 2.7000000000000024, exploring rate: 0.02389540000004418, loss: 0.07838264107704163
[23070] time live:18, cumulated reward: 0.8000000000000007, exploring rate: 0.02386240000004417, loss: 0.05372222512960434
[23080] time live:18, cumulated reward: 0.8000000000000007, exploring rate: 0.023829400000044156, loss: 0.8050503730773926
[23090] time live

100%|██████████| 63/63 [00:01<00:00, 59.36it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-23100.webm 






[23110] time live:74, cumulated reward: 7.39999999999999, exploring rate: 0.02373040000004412, loss: 0.04764973744750023
[23120] time live:75, cumulated reward: 7.499999999999989, exploring rate: 0.023697400000044107, loss: 0.058428067713975906
[23130] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.023664400000044095, loss: 0.3977229595184326
[23140] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.023631400000044083, loss: 0.12995292246341705
[23150] time live:69, cumulated reward: 6.8999999999999915, exploring rate: 0.02359840000004407, loss: 0.7133447527885437
[23160] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.02356540000004406, loss: 0.026693925261497498
[23170] time live:63, cumulated reward: 5.299999999999994, exploring rate: 0.023532400000044047, loss: 0.06466592848300934
[23180] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.023499400000044034, loss: 0.1279914528131485
[23190] time live:62

 98%|█████████▊| 63/64 [00:01<00:00, 58.09it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-23200.webm 






[23210] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.023400400000043998, loss: 0.16338606178760529
[23220] time live:65, cumulated reward: 6.499999999999993, exploring rate: 0.023367400000043986, loss: 0.06814590096473694
[23230] time live:101, cumulated reward: 11.09999999999998, exploring rate: 0.023334400000043973, loss: 0.2500762343406677
[23240] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.02330140000004396, loss: 0.19487449526786804
[23250] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.02326840000004395, loss: 0.2923237681388855
[23260] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.023235400000043937, loss: 1.081160545349121
[23270] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.023202400000043925, loss: 0.06570959836244583
[23280] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.023169400000043913, loss: 0.069155752658844
[23290] time live:67, c

100%|██████████| 63/63 [00:01<00:00, 61.35it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-23300.webm 






[23310] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.023070400000043876, loss: 0.027455369010567665
[23320] time live:190, cumulated reward: 22.000000000000043, exploring rate: 0.023037400000043864, loss: 1.20531165599823
[23330] time live:70, cumulated reward: 6.999999999999991, exploring rate: 0.02300440000004385, loss: 0.035808634012937546
[23340] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.02297140000004384, loss: 0.05963625758886337
[23350] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.022938400000043827, loss: 0.16010616719722748
[23360] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.022905400000043815, loss: 0.0935574546456337
[23370] time live:73, cumulated reward: 7.29999999999999, exploring rate: 0.022872400000043803, loss: 0.09262768179178238
[23380] time live:103, cumulated reward: 11.29999999999998, exploring rate: 0.02283940000004379, loss: 0.02174828201532364
[23390] time live:6

 99%|█████████▉| 98/99 [00:01<00:00, 67.05it/s]


[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-23400.webm 

[23410] time live:73, cumulated reward: 7.29999999999999, exploring rate: 0.022740400000043754, loss: 0.11279191821813583
[23420] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.022707400000043742, loss: 0.9321459531784058
[23430] time live:68, cumulated reward: 6.799999999999992, exploring rate: 0.02267440000004373, loss: 0.7396483421325684
[23440] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.022641400000043718, loss: 0.14624805748462677
[23450] time live:72, cumulated reward: 7.19999999999999, exploring rate: 0.022608400000043705, loss: 0.07150471210479736
[23460] time live:67, cumulated reward: 6.699999999999992, exploring rate: 0.022575400000043693, loss: 0.1675240397453308
[23470] time live:73, cumulated reward: 7.29999999999999, exploring rate: 0.02254240000004368, loss: 0.10052777826786041
[23480] time live:63, cumulated reward: 5.299999999999994, exploring rate: 0.022

 99%|█████████▊| 76/77 [00:01<00:00, 70.69it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-23500.webm 






[23510] time live:145, cumulated reward: 16.49999999999998, exploring rate: 0.022410400000043632, loss: 0.10227963328361511
[23520] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.02237740000004362, loss: 0.019906634464859962
[23530] time live:75, cumulated reward: 7.499999999999989, exploring rate: 0.022344400000043608, loss: 0.08062716573476791
[23540] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.022311400000043596, loss: 0.08308333158493042
[23550] time live:76, cumulated reward: 7.599999999999989, exploring rate: 0.022278400000043583, loss: 0.11137361824512482
[23560] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.02224540000004357, loss: 0.08013436198234558
[23570] time live:70, cumulated reward: 6.999999999999991, exploring rate: 0.02221240000004356, loss: 0.45754435658454895
[23580] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.022179400000043547, loss: 0.5709435343742371
[23590] time liv

 99%|█████████▉| 106/107 [00:01<00:00, 62.42it/s]


[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-23600.webm 

[23610] time live:69, cumulated reward: 6.8999999999999915, exploring rate: 0.02208040000004351, loss: 0.1381174921989441
[23620] time live:68, cumulated reward: 6.799999999999992, exploring rate: 0.022047400000043498, loss: 0.3151164650917053
[23630] time live:68, cumulated reward: 6.799999999999992, exploring rate: 0.022014400000043486, loss: 0.03398222103714943
[23640] time live:105, cumulated reward: 11.499999999999979, exploring rate: 0.021981400000043474, loss: 0.5104393362998962
[23650] time live:140, cumulated reward: 15.999999999999972, exploring rate: 0.02194840000004346, loss: 0.09864810109138489
[23660] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.02191540000004345, loss: 0.06148335337638855
[23670] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.021882400000043437, loss: 0.11998524516820908
[23680] time live:217, cumulated reward: 25.70000000000008, exploring r

 99%|█████████▉| 99/100 [00:01<00:00, 70.14it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-23700.webm 






[23710] time live:73, cumulated reward: 7.29999999999999, exploring rate: 0.02175040000004339, loss: 0.03192504495382309
[23720] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.021717400000043376, loss: 0.19440655410289764
[23730] time live:210, cumulated reward: 24.00000000000007, exploring rate: 0.021684400000043364, loss: 0.5006601214408875
[23740] time live:333, cumulated reward: 40.300000000000246, exploring rate: 0.021651400000043352, loss: 0.08218954503536224
[23750] time live:77, cumulated reward: 7.699999999999989, exploring rate: 0.02161840000004334, loss: 0.4111669361591339
[23760] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.021585400000043328, loss: 0.1568816602230072
[23770] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.021552400000043315, loss: 0.43522918224334717
[23780] time live:70, cumulated reward: 6.999999999999991, exploring rate: 0.021519400000043303, loss: 0.02982303500175476
[23790] time live

100%|██████████| 140/140 [00:02<00:00, 66.57it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-23800.webm 






[23810] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.021420400000043267, loss: 0.04611818864941597
[23820] time live:74, cumulated reward: 7.39999999999999, exploring rate: 0.021387400000043254, loss: 0.08863463252782822
[23830] time live:72, cumulated reward: 7.19999999999999, exploring rate: 0.021354400000043242, loss: 0.032006170600652695
[23840] time live:142, cumulated reward: 16.199999999999974, exploring rate: 0.02132140000004323, loss: 0.14357303082942963
[23850] time live:189, cumulated reward: 21.90000000000004, exploring rate: 0.021288400000043218, loss: 0.14813297986984253
[23860] time live:68, cumulated reward: 6.799999999999992, exploring rate: 0.021255400000043206, loss: 0.11942881345748901
[23870] time live:174, cumulated reward: 19.40000000000002, exploring rate: 0.021222400000043194, loss: 0.19540666043758392
[23880] time live:68, cumulated reward: 6.799999999999992, exploring rate: 0.02118940000004318, loss: 0.0539712980389595
[23890] time liv

 99%|█████████▉| 134/135 [00:02<00:00, 65.38it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-23900.webm 






[23910] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.021090400000043145, loss: 0.08505382388830185
[23920] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.021057400000043133, loss: 0.05579674616456032
[23930] time live:113, cumulated reward: 12.299999999999976, exploring rate: 0.02102440000004312, loss: 0.08820702880620956
[23940] time live:136, cumulated reward: 14.599999999999968, exploring rate: 0.020991400000043108, loss: 0.622287929058075
[23950] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.020958400000043096, loss: 0.05585971102118492
[23960] time live:70, cumulated reward: 6.999999999999991, exploring rate: 0.020925400000043084, loss: 0.2554780840873718
[23970] time live:70, cumulated reward: 6.999999999999991, exploring rate: 0.02089240000004307, loss: 0.0575418621301651
[23980] time live:76, cumulated reward: 7.599999999999989, exploring rate: 0.02085940000004306, loss: 0.08073712885379791
[23990] time li

 99%|█████████▊| 68/69 [00:01<00:00, 66.74it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-24000.webm 






[24010] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.020760400000043023, loss: 0.09824708849191666
[24020] time live:64, cumulated reward: 5.399999999999993, exploring rate: 0.02072740000004301, loss: 0.06237825006246567
[24030] time live:69, cumulated reward: 6.8999999999999915, exploring rate: 0.020694400000043, loss: 0.0387488454580307
[24040] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.020661400000042986, loss: 0.0675322562456131
[24050] time live:64, cumulated reward: 5.399999999999993, exploring rate: 0.020628400000042974, loss: 0.0155471321195364
[24060] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.020595400000042962, loss: 0.45220398902893066
[24070] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.02056240000004295, loss: 0.1397409439086914
[24080] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.020529400000042938, loss: 0.3683476746082306
[24090] time live:61, cum

 99%|█████████▊| 74/75 [00:01<00:00, 41.99it/s]

[MoviePy] Done.





[MoviePy] >>>> Video ready: movie/DQN-24100.webm 

[24110] time live:62, cumulated reward: 5.199999999999994, exploring rate: 0.0204304000000429, loss: 0.043974537402391434
[24120] time live:69, cumulated reward: 6.8999999999999915, exploring rate: 0.02039740000004289, loss: 0.147951140999794
[24130] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.020364400000042877, loss: 0.7607238292694092
[24140] time live:71, cumulated reward: 7.099999999999991, exploring rate: 0.020331400000042864, loss: 0.7837323546409607
[24150] time live:106, cumulated reward: 11.599999999999978, exploring rate: 0.020298400000042852, loss: 0.657168984413147
[24160] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.02026540000004284, loss: 0.05005862936377525
[24170] time live:72, cumulated reward: 7.19999999999999, exploring rate: 0.020232400000042828, loss: 0.049991630017757416
[24180] time live:149, cumulated reward: 16.899999999999984, exploring rate: 0.020199400000042

100%|██████████| 78/78 [00:01<00:00, 49.97it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-24200.webm 






[24210] time live:68, cumulated reward: 6.799999999999992, exploring rate: 0.02010040000004278, loss: 0.03838146850466728
[24220] time live:70, cumulated reward: 6.999999999999991, exploring rate: 0.020067400000042767, loss: 0.05809551849961281
[24230] time live:77, cumulated reward: 7.699999999999989, exploring rate: 0.020034400000042755, loss: 0.055415019392967224
[24240] time live:216, cumulated reward: 25.60000000000008, exploring rate: 0.020001400000042743, loss: 0.10640782862901688
[24250] time live:74, cumulated reward: 7.39999999999999, exploring rate: 0.01996840000004273, loss: 0.018124360591173172
[24260] time live:74, cumulated reward: 7.39999999999999, exploring rate: 0.019935400000042718, loss: 0.5842031836509705
[24270] time live:112, cumulated reward: 12.199999999999976, exploring rate: 0.019902400000042706, loss: 0.07694830000400543
[24280] time live:102, cumulated reward: 11.19999999999998, exploring rate: 0.019869400000042694, loss: 0.07214733213186264
[24290] time li

 99%|█████████▉| 98/99 [00:01<00:00, 76.46it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-24300.webm 






[24310] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.019770400000042657, loss: 0.049769870936870575
[24320] time live:72, cumulated reward: 7.19999999999999, exploring rate: 0.019737400000042645, loss: 1.0867977142333984
[24330] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.019704400000042633, loss: 0.0951833501458168
[24340] time live:75, cumulated reward: 7.499999999999989, exploring rate: 0.01967140000004262, loss: 0.17375703155994415
[24350] time live:251, cumulated reward: 30.10000000000013, exploring rate: 0.01963840000004261, loss: 0.05696748197078705
[24360] time live:174, cumulated reward: 19.40000000000002, exploring rate: 0.019605400000042596, loss: 0.10040487349033356
[24370] time live:98, cumulated reward: 9.799999999999981, exploring rate: 0.019572400000042584, loss: 0.15352800488471985
[24380] time live:189, cumulated reward: 21.90000000000004, exploring rate: 0.019539400000042572, loss: 0.06627807766199112
[24390] time 

100%|██████████| 63/63 [00:00<00:00, 76.71it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-24400.webm 






[24410] time live:452, cumulated reward: 55.200000000000415, exploring rate: 0.019440400000042535, loss: 0.06126604229211807
[24420] time live:106, cumulated reward: 11.599999999999978, exploring rate: 0.019407400000042523, loss: 0.6351129412651062
[24430] time live:113, cumulated reward: 12.299999999999976, exploring rate: 0.01937440000004251, loss: 0.16459441184997559
[24440] time live:72, cumulated reward: 7.19999999999999, exploring rate: 0.0193414000000425, loss: 0.08501199632883072
[24450] time live:176, cumulated reward: 19.600000000000023, exploring rate: 0.019308400000042487, loss: 0.06426696479320526
[24460] time live:63, cumulated reward: 5.299999999999994, exploring rate: 0.019275400000042475, loss: 0.08681940287351608
[24470] time live:111, cumulated reward: 12.099999999999977, exploring rate: 0.019242400000042462, loss: 0.05313184857368469
[24480] time live:220, cumulated reward: 26.000000000000085, exploring rate: 0.01920940000004245, loss: 0.13078346848487854
[24490] ti

100%|█████████▉| 330/331 [00:04<00:00, 75.62it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-24500.webm 






[24510] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.019110400000042414, loss: 0.05953517183661461
[24520] time live:99, cumulated reward: 9.89999999999998, exploring rate: 0.0190774000000424, loss: 0.20005826652050018
[24530] time live:216, cumulated reward: 25.60000000000008, exploring rate: 0.01904440000004239, loss: 0.04497365653514862
[24540] time live:210, cumulated reward: 24.00000000000007, exploring rate: 0.019011400000042377, loss: 0.020092416554689407
[24550] time live:298, cumulated reward: 35.800000000000196, exploring rate: 0.018978400000042365, loss: 0.12343445420265198
[24560] time live:174, cumulated reward: 19.40000000000002, exploring rate: 0.018945400000042353, loss: 0.08358895778656006
[24570] time live:186, cumulated reward: 21.600000000000037, exploring rate: 0.01891240000004234, loss: 0.2786133587360382
[24580] time live:216, cumulated reward: 25.60000000000008, exploring rate: 0.01887940000004233, loss: 0.04836293309926987
[24590] time l

 99%|█████████▉| 175/176 [00:02<00:00, 71.55it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-24600.webm 






[24610] time live:134, cumulated reward: 14.399999999999968, exploring rate: 0.018780400000042292, loss: 0.03775811195373535
[24620] time live:185, cumulated reward: 21.500000000000036, exploring rate: 0.01874740000004228, loss: 0.030644580721855164
[24630] time live:210, cumulated reward: 24.00000000000007, exploring rate: 0.018714400000042267, loss: 0.12945377826690674
[24640] time live:174, cumulated reward: 19.40000000000002, exploring rate: 0.018681400000042255, loss: 0.02641795389354229
[24650] time live:73, cumulated reward: 7.29999999999999, exploring rate: 0.018648400000042243, loss: 0.3270857036113739
[24660] time live:149, cumulated reward: 16.899999999999984, exploring rate: 0.01861540000004223, loss: 0.037175193428993225
[24670] time live:98, cumulated reward: 9.799999999999981, exploring rate: 0.01858240000004222, loss: 0.10344378650188446
[24680] time live:18, cumulated reward: 0.8000000000000007, exploring rate: 0.018549400000042206, loss: 0.6103975176811218
[24690] tim

 98%|█████████▊| 44/45 [00:00<00:00, 73.91it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-24700.webm 






[24710] time live:98, cumulated reward: 9.799999999999981, exploring rate: 0.01845040000004217, loss: 0.164498433470726
[24720] time live:64, cumulated reward: 5.399999999999993, exploring rate: 0.018417400000042158, loss: 0.11004507541656494
[24730] time live:113, cumulated reward: 12.299999999999976, exploring rate: 0.018384400000042146, loss: 0.08265972137451172
[24740] time live:64, cumulated reward: 5.399999999999993, exploring rate: 0.018351400000042133, loss: 0.893718957901001
[24750] time live:526, cumulated reward: 64.60000000000045, exploring rate: 0.01831840000004212, loss: 0.0342906229197979
[24760] time live:246, cumulated reward: 28.600000000000122, exploring rate: 0.01828540000004211, loss: 0.3222365975379944
[24770] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.018252400000042097, loss: 0.19945800304412842
[24780] time live:76, cumulated reward: 7.599999999999989, exploring rate: 0.018219400000042085, loss: 0.05029241740703583
[24790] time live:97

 99%|█████████▊| 74/75 [00:00<00:00, 81.58it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-24800.webm 






[24810] time live:101, cumulated reward: 11.09999999999998, exploring rate: 0.018120400000042048, loss: 0.12568587064743042
[24820] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.018087400000042036, loss: 0.07855241000652313
[24830] time live:75, cumulated reward: 7.499999999999989, exploring rate: 0.018054400000042024, loss: 0.044889338314533234
[24840] time live:182, cumulated reward: 21.20000000000003, exploring rate: 0.01802140000004201, loss: 0.08895937353372574
[24850] time live:66, cumulated reward: 6.5999999999999925, exploring rate: 0.017988400000042, loss: 0.21719686686992645
[24860] time live:68, cumulated reward: 6.799999999999992, exploring rate: 0.017955400000041987, loss: 0.07142071425914764
[24870] time live:68, cumulated reward: 6.799999999999992, exploring rate: 0.017922400000041975, loss: 0.7736981511116028
[24880] time live:66, cumulated reward: 6.5999999999999925, exploring rate: 0.017889400000041963, loss: 0.04456561803817749
[24890] time liv

 99%|█████████▉| 98/99 [00:01<00:00, 76.78it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-24900.webm 






[24910] time live:69, cumulated reward: 6.8999999999999915, exploring rate: 0.017790400000041926, loss: 0.06771788746118546
[24920] time live:65, cumulated reward: 6.499999999999993, exploring rate: 0.017757400000041914, loss: 0.24700981378555298
[24930] time live:102, cumulated reward: 11.19999999999998, exploring rate: 0.017724400000041902, loss: 0.14003115892410278
[24940] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.01769140000004189, loss: 0.6858068108558655
[24950] time live:111, cumulated reward: 12.099999999999977, exploring rate: 0.017658400000041877, loss: 0.1627282053232193
[24960] time live:62, cumulated reward: 5.199999999999994, exploring rate: 0.017625400000041865, loss: 0.06176064908504486
[24970] time live:176, cumulated reward: 19.600000000000023, exploring rate: 0.017592400000041853, loss: 1.5313396453857422
[24980] time live:211, cumulated reward: 24.100000000000072, exploring rate: 0.01755940000004184, loss: 0.04912476986646652
[24990] time 

100%|█████████▉| 212/213 [00:02<00:00, 79.98it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-25000.webm 






[25010] time live:73, cumulated reward: 7.29999999999999, exploring rate: 0.017460400000041804, loss: 0.10557732731103897
[25020] time live:103, cumulated reward: 11.29999999999998, exploring rate: 0.017427400000041792, loss: 0.15295913815498352
[25030] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.01739440000004178, loss: 0.11487708240747452
[25040] time live:108, cumulated reward: 11.799999999999978, exploring rate: 0.017361400000041768, loss: 0.06754960119724274
[25050] time live:106, cumulated reward: 11.599999999999978, exploring rate: 0.017328400000041756, loss: 0.08854909241199493
[25060] time live:226, cumulated reward: 26.600000000000094, exploring rate: 0.017295400000041743, loss: 0.08161744475364685
[25070] time live:335, cumulated reward: 40.50000000000025, exploring rate: 0.01726240000004173, loss: 0.05276549234986305
[25080] time live:137, cumulated reward: 15.699999999999969, exploring rate: 0.01722940000004172, loss: 0.07572029531002045
[25090] ti

 99%|█████████▉| 98/99 [00:01<00:00, 77.63it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-25100.webm 






[25110] time live:70, cumulated reward: 6.999999999999991, exploring rate: 0.017130400000041682, loss: 0.028982145711779594
[25120] time live:137, cumulated reward: 15.699999999999969, exploring rate: 0.01709740000004167, loss: 0.07600567489862442
[25130] time live:75, cumulated reward: 7.499999999999989, exploring rate: 0.017064400000041658, loss: 0.05157266557216644
[25140] time live:142, cumulated reward: 16.199999999999974, exploring rate: 0.017031400000041646, loss: 0.08435234427452087
[25150] time live:219, cumulated reward: 25.900000000000084, exploring rate: 0.016998400000041634, loss: 0.035191427916288376
[25160] time live:101, cumulated reward: 11.09999999999998, exploring rate: 0.01696540000004162, loss: 0.8775489330291748
[25170] time live:188, cumulated reward: 21.80000000000004, exploring rate: 0.01693240000004161, loss: 0.29696765542030334
[25180] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.016899400000041597, loss: 0.10026474297046661
[25190] ti

 99%|█████████▉| 98/99 [00:01<00:00, 88.57it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-25200.webm 






[25210] time live:62, cumulated reward: 5.199999999999994, exploring rate: 0.01680040000004156, loss: 0.21436476707458496
[25220] time live:112, cumulated reward: 12.199999999999976, exploring rate: 0.01676740000004155, loss: 0.09495832026004791
[25230] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.016734400000041536, loss: 0.8798776865005493
[25240] time live:187, cumulated reward: 21.70000000000004, exploring rate: 0.016701400000041524, loss: 0.13402673602104187
[25250] time live:113, cumulated reward: 12.299999999999976, exploring rate: 0.016668400000041512, loss: 0.04786235839128494
[25260] time live:108, cumulated reward: 11.799999999999978, exploring rate: 0.0166354000000415, loss: 0.06638703495264053
[25270] time live:103, cumulated reward: 11.29999999999998, exploring rate: 0.016602400000041487, loss: 0.08186353743076324
[25280] time live:74, cumulated reward: 7.39999999999999, exploring rate: 0.016569400000041475, loss: 0.07882144302129745
[25290] time l

 95%|█████████▌| 20/21 [00:00<00:00, 62.37it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-25300.webm 






[25310] time live:51, cumulated reward: 4.099999999999998, exploring rate: 0.01647040000004144, loss: 0.11762779206037521
[25320] time live:25, cumulated reward: 1.5000000000000013, exploring rate: 0.016437400000041427, loss: 0.38688045740127563
[25330] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.016404400000041414, loss: 0.1830434799194336
[25340] time live:68, cumulated reward: 6.799999999999992, exploring rate: 0.016371400000041402, loss: 0.045778702944517136
[25350] time live:210, cumulated reward: 24.00000000000007, exploring rate: 0.01633840000004139, loss: 0.8310160636901855
[25360] time live:98, cumulated reward: 9.799999999999981, exploring rate: 0.016305400000041378, loss: 0.40623876452445984
[25370] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.016272400000041366, loss: 0.035154979676008224
[25380] time live:140, cumulated reward: 15.999999999999972, exploring rate: 0.016239400000041353, loss: 0.07617484778165817
[25390] time l

100%|██████████| 140/140 [00:01<00:00, 71.34it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-25400.webm 






[25410] time live:99, cumulated reward: 9.89999999999998, exploring rate: 0.016140400000041317, loss: 0.09380616992712021
[25420] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.016107400000041305, loss: 1.3965920209884644
[25430] time live:219, cumulated reward: 25.900000000000084, exploring rate: 0.016074400000041292, loss: 0.03482506424188614
[25440] time live:143, cumulated reward: 16.299999999999976, exploring rate: 0.01604140000004128, loss: 0.07145116478204727
[25450] time live:220, cumulated reward: 26.000000000000085, exploring rate: 0.016008400000041268, loss: 0.06333283334970474
[25460] time live:76, cumulated reward: 7.599999999999989, exploring rate: 0.015975400000041256, loss: 0.2738139033317566
[25470] time live:63, cumulated reward: 5.299999999999994, exploring rate: 0.015942400000041244, loss: 0.1722978949546814
[25480] time live:75, cumulated reward: 7.499999999999989, exploring rate: 0.01590940000004123, loss: 0.2922286093235016
[25490] time live

100%|██████████| 63/63 [00:00<00:00, 81.39it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-25500.webm 






[25510] time live:71, cumulated reward: 7.099999999999991, exploring rate: 0.015810400000041195, loss: 0.4758508503437042
[25520] time live:68, cumulated reward: 6.799999999999992, exploring rate: 0.015777400000041183, loss: 0.05519610643386841
[25530] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.01574440000004117, loss: 0.0755833238363266
[25540] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.01571140000004116, loss: 0.05316207557916641
[25550] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.015678400000041146, loss: 0.6964471936225891
[25560] time live:62, cumulated reward: 5.199999999999994, exploring rate: 0.015645400000041134, loss: 0.0969482958316803
[25570] time live:66, cumulated reward: 6.5999999999999925, exploring rate: 0.015612400000041129, loss: 0.08292980492115021
[25580] time live:63, cumulated reward: 5.299999999999994, exploring rate: 0.015579400000041134, loss: 0.08055591583251953
[25590] time live:69,

 99%|█████████▊| 72/73 [00:00<00:00, 86.62it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-25600.webm 






[25610] time live:76, cumulated reward: 7.599999999999989, exploring rate: 0.01548040000004115, loss: 0.12489373981952667
[25620] time live:62, cumulated reward: 5.199999999999994, exploring rate: 0.015447400000041155, loss: 0.4372122287750244
[25630] time live:67, cumulated reward: 6.699999999999992, exploring rate: 0.01541440000004116, loss: 0.09448255598545074
[25640] time live:77, cumulated reward: 7.699999999999989, exploring rate: 0.015381400000041165, loss: 0.06952491402626038
[25650] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.01534840000004117, loss: 0.08716218918561935
[25660] time live:68, cumulated reward: 6.799999999999992, exploring rate: 0.015315400000041175, loss: 0.08255841583013535
[25670] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.01528240000004118, loss: 0.13152146339416504
[25680] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.015249400000041186, loss: 0.21266356110572815
[25690] time live:6

100%|██████████| 63/63 [00:00<00:00, 72.10it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-25700.webm 






[25710] time live:74, cumulated reward: 7.39999999999999, exploring rate: 0.015150400000041201, loss: 0.2025603950023651
[25720] time live:72, cumulated reward: 7.19999999999999, exploring rate: 0.015117400000041206, loss: 0.1616053432226181
[25730] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.015084400000041211, loss: 0.04083468019962311
[25740] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.015051400000041217, loss: 1.2715213298797607
[25750] time live:68, cumulated reward: 6.799999999999992, exploring rate: 0.015018400000041222, loss: 0.04321642592549324
[25760] time live:102, cumulated reward: 11.19999999999998, exploring rate: 0.014985400000041227, loss: 0.07847988605499268
[25770] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.014952400000041232, loss: 0.13749298453330994
[25780] time live:108, cumulated reward: 11.799999999999978, exploring rate: 0.014919400000041237, loss: 0.3788672685623169
[25790] time live

 99%|█████████▉| 176/177 [00:02<00:00, 80.13it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-25800.webm 






[25810] time live:103, cumulated reward: 11.29999999999998, exploring rate: 0.014820400000041253, loss: 0.11378750205039978
[25820] time live:70, cumulated reward: 6.999999999999991, exploring rate: 0.014787400000041258, loss: 0.036779094487428665
[25830] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.014754400000041263, loss: 0.088299959897995
[25840] time live:63, cumulated reward: 5.299999999999994, exploring rate: 0.014721400000041268, loss: 0.05028171092271805
[25850] time live:99, cumulated reward: 9.89999999999998, exploring rate: 0.014688400000041273, loss: 0.06409827619791031
[25860] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.014655400000041278, loss: 0.24585750699043274
[25870] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.014622400000041284, loss: 0.42762014269828796
[25880] time live:182, cumulated reward: 21.20000000000003, exploring rate: 0.014589400000041289, loss: 0.15368424355983734
[25890] time liv

 99%|█████████▉| 134/135 [00:01<00:00, 84.02it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-25900.webm 






[25910] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.014490400000041304, loss: 0.1114930585026741
[25920] time live:287, cumulated reward: 33.70000000000018, exploring rate: 0.01445740000004131, loss: 0.2370380312204361
[25930] time live:400, cumulated reward: 48.00000000000034, exploring rate: 0.014424400000041315, loss: 0.14003853499889374
[25940] time live:75, cumulated reward: 7.499999999999989, exploring rate: 0.01439140000004132, loss: 0.10905170440673828
[25950] time live:169, cumulated reward: 18.900000000000013, exploring rate: 0.014358400000041325, loss: 0.06411910057067871
[25960] time live:300, cumulated reward: 36.0000000000002, exploring rate: 0.01432540000004133, loss: 0.1858239471912384
[25970] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.014292400000041335, loss: 0.1307254284620285
[25980] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.01425940000004134, loss: 0.09089602530002594
[25990] time live:

100%|█████████▉| 219/220 [00:02<00:00, 76.91it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-26000.webm 






[26010] time live:106, cumulated reward: 11.599999999999978, exploring rate: 0.014160400000041356, loss: 0.02654152736067772
[26020] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.014127400000041361, loss: 0.10247098654508591
[26030] time live:336, cumulated reward: 40.60000000000025, exploring rate: 0.014094400000041366, loss: 0.08553489297628403
[26040] time live:148, cumulated reward: 16.799999999999983, exploring rate: 0.014061400000041371, loss: 0.07577778398990631
[26050] time live:295, cumulated reward: 35.50000000000019, exploring rate: 0.014028400000041377, loss: 0.17867647111415863
[26060] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.013995400000041382, loss: 0.12071336805820465
[26070] time live:252, cumulated reward: 30.20000000000013, exploring rate: 0.013962400000041387, loss: 0.14091114699840546
[26080] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.013929400000041392, loss: 0.038903944194316864
[26090

 99%|█████████▉| 114/115 [00:01<00:00, 78.13it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-26100.webm 






[26110] time live:287, cumulated reward: 33.70000000000018, exploring rate: 0.013830400000041407, loss: 0.05027637630701065
[26120] time live:174, cumulated reward: 19.40000000000002, exploring rate: 0.013797400000041413, loss: 0.08496242761611938
[26130] time live:108, cumulated reward: 11.799999999999978, exploring rate: 0.013764400000041418, loss: 0.28749626874923706
[26140] time live:99, cumulated reward: 9.89999999999998, exploring rate: 0.013731400000041423, loss: 0.09394537657499313
[26150] time live:210, cumulated reward: 24.00000000000007, exploring rate: 0.013698400000041428, loss: 0.3411175012588501
[26160] time live:112, cumulated reward: 12.199999999999976, exploring rate: 0.013665400000041433, loss: 0.10146480798721313
[26170] time live:190, cumulated reward: 22.000000000000043, exploring rate: 0.013632400000041438, loss: 0.14356482028961182
[26180] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.013599400000041444, loss: 0.04715265706181526
[26190] t

 99%|█████████▉| 100/101 [00:01<00:00, 88.76it/s]

[MoviePy] Done.





[MoviePy] >>>> Video ready: movie/DQN-26200.webm 

[26210] time live:76, cumulated reward: 7.599999999999989, exploring rate: 0.013500400000041459, loss: 0.17806056141853333
[26220] time live:142, cumulated reward: 16.199999999999974, exploring rate: 0.013467400000041464, loss: 1.1302218437194824
[26230] time live:73, cumulated reward: 7.29999999999999, exploring rate: 0.01343440000004147, loss: 0.03196115046739578
[26240] time live:105, cumulated reward: 11.499999999999979, exploring rate: 0.013401400000041475, loss: 0.12872925400733948
[26250] time live:140, cumulated reward: 15.999999999999972, exploring rate: 0.01336840000004148, loss: 0.16993209719657898
[26260] time live:107, cumulated reward: 11.699999999999978, exploring rate: 0.013335400000041485, loss: 0.06444568932056427
[26270] time live:100, cumulated reward: 9.99999999999998, exploring rate: 0.01330240000004149, loss: 0.08464260399341583
[26280] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.01326940

 99%|█████████▉| 191/192 [00:02<00:00, 83.03it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-26300.webm 






[26310] time live:146, cumulated reward: 16.59999999999998, exploring rate: 0.01317040000004151, loss: 0.2490939348936081
[26320] time live:64, cumulated reward: 5.399999999999993, exploring rate: 0.013137400000041516, loss: 0.0650242492556572
[26330] time live:73, cumulated reward: 7.29999999999999, exploring rate: 0.013104400000041521, loss: 0.31578102707862854
[26340] time live:73, cumulated reward: 7.29999999999999, exploring rate: 0.013071400000041526, loss: 0.08061982691287994
[26350] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.013038400000041531, loss: 0.031042184680700302
[26360] time live:75, cumulated reward: 7.499999999999989, exploring rate: 0.013005400000041537, loss: 0.9825108647346497
[26370] time live:64, cumulated reward: 5.399999999999993, exploring rate: 0.012972400000041542, loss: 0.22024144232273102
[26380] time live:74, cumulated reward: 7.39999999999999, exploring rate: 0.012939400000041547, loss: 0.05223158374428749
[26390] time live:73,

 99%|█████████▊| 76/77 [00:00<00:00, 89.25it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-26400.webm 






[26410] time live:73, cumulated reward: 7.29999999999999, exploring rate: 0.012840400000041562, loss: 0.9451656341552734
[26420] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.012807400000041567, loss: 0.03499048203229904
[26430] time live:210, cumulated reward: 24.00000000000007, exploring rate: 0.012774400000041573, loss: 0.07194972783327103
[26440] time live:76, cumulated reward: 7.599999999999989, exploring rate: 0.012741400000041578, loss: 0.09536875784397125
[26450] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.012708400000041583, loss: 1.8986600637435913
[26460] time live:110, cumulated reward: 11.999999999999977, exploring rate: 0.012675400000041588, loss: 0.05876707658171654
[26470] time live:186, cumulated reward: 21.600000000000037, exploring rate: 0.012642400000041593, loss: 0.08167226612567902
[26480] time live:73, cumulated reward: 7.29999999999999, exploring rate: 0.012609400000041598, loss: 0.058442097157239914
[26490] time

100%|█████████▉| 262/263 [00:03<00:00, 76.40it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-26500.webm 






[26510] time live:246, cumulated reward: 28.600000000000122, exploring rate: 0.012510400000041614, loss: 0.028356444090604782
[26520] time live:323, cumulated reward: 38.30000000000023, exploring rate: 0.012477400000041619, loss: 0.04059407860040665
[26530] time live:373, cumulated reward: 45.3000000000003, exploring rate: 0.012444400000041624, loss: 0.09291824698448181
[26540] time live:109, cumulated reward: 11.899999999999977, exploring rate: 0.01241140000004163, loss: 0.11559303104877472
[26550] time live:256, cumulated reward: 30.600000000000136, exploring rate: 0.012378400000041635, loss: 0.243399977684021
[26560] time live:143, cumulated reward: 16.299999999999976, exploring rate: 0.01234540000004164, loss: 0.07285382598638535
[26570] time live:73, cumulated reward: 7.29999999999999, exploring rate: 0.012312400000041645, loss: 0.09573300927877426
[26580] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.01227940000004165, loss: 0.18025794625282288
[26590] time

 99%|█████████▉| 104/105 [00:01<00:00, 86.11it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-26600.webm 






[26610] time live:77, cumulated reward: 7.699999999999989, exploring rate: 0.012180400000041666, loss: 0.597711980342865
[26620] time live:103, cumulated reward: 11.29999999999998, exploring rate: 0.01214740000004167, loss: 0.5733923316001892
[26630] time live:75, cumulated reward: 7.499999999999989, exploring rate: 0.012114400000041676, loss: 0.08552888035774231
[26640] time live:174, cumulated reward: 19.40000000000002, exploring rate: 0.012081400000041681, loss: 0.3999493420124054
[26650] time live:189, cumulated reward: 21.90000000000004, exploring rate: 0.012048400000041686, loss: 0.12274745851755142
[26660] time live:323, cumulated reward: 38.30000000000023, exploring rate: 0.012015400000041691, loss: 0.07285376638174057
[26670] time live:223, cumulated reward: 26.30000000000009, exploring rate: 0.011982400000041697, loss: 0.07912655174732208
[26680] time live:71, cumulated reward: 7.099999999999991, exploring rate: 0.011949400000041702, loss: 0.04771844670176506
[26690] time liv

 99%|█████████▉| 183/184 [00:02<00:00, 76.87it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-26700.webm 






[26710] time live:141, cumulated reward: 16.099999999999973, exploring rate: 0.011850400000041717, loss: 0.07879933714866638
[26720] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.011817400000041722, loss: 0.09044007956981659
[26730] time live:112, cumulated reward: 12.199999999999976, exploring rate: 0.011784400000041727, loss: 0.21015065908432007
[26740] time live:73, cumulated reward: 7.29999999999999, exploring rate: 0.011751400000041733, loss: 0.0351814441382885
[26750] time live:77, cumulated reward: 7.699999999999989, exploring rate: 0.011718400000041738, loss: 0.1067512035369873
[26760] time live:73, cumulated reward: 7.29999999999999, exploring rate: 0.011685400000041743, loss: 1.2197259664535522
[26770] time live:77, cumulated reward: 7.699999999999989, exploring rate: 0.011652400000041748, loss: 0.045328378677368164
[26780] time live:137, cumulated reward: 15.699999999999969, exploring rate: 0.011619400000041753, loss: 0.16361713409423828
[26790] time l

 99%|█████████▉| 98/99 [00:01<00:00, 83.63it/s]


[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-26800.webm 

[26810] time live:102, cumulated reward: 11.19999999999998, exploring rate: 0.011520400000041769, loss: 0.1750488430261612
[26820] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.011487400000041774, loss: 0.12468180805444717
[26830] time live:109, cumulated reward: 11.899999999999977, exploring rate: 0.011454400000041779, loss: 0.07774219661951065
[26840] time live:138, cumulated reward: 15.799999999999969, exploring rate: 0.011421400000041784, loss: 0.14740388095378876
[26850] time live:226, cumulated reward: 26.600000000000094, exploring rate: 0.01138840000004179, loss: 0.42648953199386597
[26860] time live:302, cumulated reward: 36.2000000000002, exploring rate: 0.011355400000041795, loss: 0.26879245042800903
[26870] time live:246, cumulated reward: 28.600000000000122, exploring rate: 0.0113224000000418, loss: 0.06691907346248627
[26880] time live:446, cumulated reward: 54.600000000000406, explor

 99%|█████████▊| 78/79 [00:00<00:00, 79.83it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-26900.webm 






[26910] time live:138, cumulated reward: 15.799999999999969, exploring rate: 0.01119040000004182, loss: 0.06195740029215813
[26920] time live:246, cumulated reward: 28.600000000000122, exploring rate: 0.011157400000041826, loss: 0.13480454683303833
[26930] time live:101, cumulated reward: 11.09999999999998, exploring rate: 0.01112440000004183, loss: 1.1876248121261597
[26940] time live:77, cumulated reward: 7.699999999999989, exploring rate: 0.011091400000041836, loss: 0.9302538633346558
[26950] time live:187, cumulated reward: 21.70000000000004, exploring rate: 0.011058400000041841, loss: 0.15215815603733063
[26960] time live:487, cumulated reward: 59.700000000000465, exploring rate: 0.011025400000041846, loss: 0.8966779708862305
[26970] time live:62, cumulated reward: 5.199999999999994, exploring rate: 0.010992400000041851, loss: 1.0634419918060303
[26980] time live:190, cumulated reward: 22.000000000000043, exploring rate: 0.010959400000041856, loss: 0.07317966967821121
[26990] time

 99%|█████████▉| 110/111 [00:01<00:00, 78.96it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-27000.webm 






[27010] time live:63, cumulated reward: 5.299999999999994, exploring rate: 0.010860400000041872, loss: 0.30882787704467773
[27020] time live:436, cumulated reward: 52.60000000000039, exploring rate: 0.010827400000041877, loss: 0.41325104236602783
[27030] time live:72, cumulated reward: 7.19999999999999, exploring rate: 0.010794400000041882, loss: 0.15687717497348785
[27040] time live:71, cumulated reward: 7.099999999999991, exploring rate: 0.010761400000041887, loss: 0.06347575038671494
[27050] time live:113, cumulated reward: 12.299999999999976, exploring rate: 0.010728400000041893, loss: 0.11094346642494202
[27060] time live:135, cumulated reward: 14.499999999999968, exploring rate: 0.010695400000041898, loss: 0.13076679408550262
[27070] time live:77, cumulated reward: 7.699999999999989, exploring rate: 0.010662400000041903, loss: 0.1131986677646637
[27080] time live:221, cumulated reward: 26.100000000000087, exploring rate: 0.010629400000041908, loss: 0.03315098211169243
[27090] tim

 99%|█████████▉| 134/135 [00:01<00:00, 81.93it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-27100.webm 






[27110] time live:70, cumulated reward: 6.999999999999991, exploring rate: 0.010530400000041924, loss: 0.08798535168170929
[27120] time live:68, cumulated reward: 6.799999999999992, exploring rate: 0.010497400000041929, loss: 0.07812920957803726
[27130] time live:174, cumulated reward: 19.40000000000002, exploring rate: 0.010464400000041934, loss: 0.06301626563072205
[27140] time live:174, cumulated reward: 19.40000000000002, exploring rate: 0.010431400000041939, loss: 0.07976942509412766
[27150] time live:75, cumulated reward: 7.499999999999989, exploring rate: 0.010398400000041944, loss: 0.04873877763748169
[27160] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.01036540000004195, loss: 0.0837431475520134
[27170] time live:437, cumulated reward: 52.700000000000394, exploring rate: 0.010332400000041955, loss: 0.2272251546382904
[27180] time live:225, cumulated reward: 26.500000000000092, exploring rate: 0.01029940000004196, loss: 0.0687720775604248
[27190] time li

 99%|█████████▉| 190/191 [00:02<00:00, 80.04it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-27200.webm 






[27210] time live:295, cumulated reward: 35.50000000000019, exploring rate: 0.010200400000041975, loss: 0.041727833449840546
[27220] time live:136, cumulated reward: 14.599999999999968, exploring rate: 0.01016740000004198, loss: 0.08471138775348663
[27230] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.010134400000041986, loss: 0.1544475257396698
[27240] time live:181, cumulated reward: 21.10000000000003, exploring rate: 0.01010140000004199, loss: 0.39542192220687866
[27250] time live:412, cumulated reward: 50.20000000000036, exploring rate: 0.010068400000041996, loss: 0.12268136441707611
[27260] time live:250, cumulated reward: 30.000000000000128, exploring rate: 0.010035400000042001, loss: 0.32013723254203796
[27270] time live:246, cumulated reward: 28.600000000000122, exploring rate: 0.010002400000042006, loss: 0.20374304056167603
[27280] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.009969400000042011, loss: 0.08187556266784668
[27290] t

 99%|█████████▉| 191/192 [00:02<00:00, 82.53it/s]


[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-27300.webm 

[27310] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.009870400000042027, loss: 0.6094530820846558
[27320] time live:75, cumulated reward: 7.499999999999989, exploring rate: 0.009837400000042032, loss: 0.04273313656449318
[27330] time live:71, cumulated reward: 7.099999999999991, exploring rate: 0.009804400000042037, loss: 0.10057127475738525
[27340] time live:104, cumulated reward: 11.399999999999979, exploring rate: 0.009771400000042042, loss: 0.11667725443840027
[27350] time live:76, cumulated reward: 7.599999999999989, exploring rate: 0.009738400000042047, loss: 0.08783756196498871
[27360] time live:71, cumulated reward: 7.099999999999991, exploring rate: 0.009705400000042053, loss: 0.1824214607477188
[27370] time live:108, cumulated reward: 11.799999999999978, exploring rate: 0.009672400000042058, loss: 0.22982966899871826
[27380] time live:513, cumulated reward: 62.3000000000005, exploring r

 99%|█████████▊| 74/75 [00:00<00:00, 92.25it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-27400.webm 






[27410] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.009540400000042078, loss: 0.07428203523159027
[27420] time live:369, cumulated reward: 44.9000000000003, exploring rate: 0.009507400000042084, loss: 0.055132657289505005
[27430] time live:105, cumulated reward: 11.499999999999979, exploring rate: 0.009474400000042089, loss: 0.14234361052513123
[27440] time live:210, cumulated reward: 24.00000000000007, exploring rate: 0.009441400000042094, loss: 0.12224306166172028
[27450] time live:137, cumulated reward: 15.699999999999969, exploring rate: 0.009408400000042099, loss: 0.8592524528503418
[27460] time live:113, cumulated reward: 12.299999999999976, exploring rate: 0.009375400000042104, loss: 0.0962846651673317
[27470] time live:105, cumulated reward: 11.499999999999979, exploring rate: 0.00934240000004211, loss: 0.08419230580329895
[27480] time live:324, cumulated reward: 38.40000000000023, exploring rate: 0.009309400000042115, loss: 0.07640431821346283
[27490] 

 99%|█████████▉| 105/106 [00:01<00:00, 83.83it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-27500.webm 






[27510] time live:137, cumulated reward: 15.699999999999969, exploring rate: 0.00921040000004213, loss: 1.0720771551132202
[27520] time live:99, cumulated reward: 9.89999999999998, exploring rate: 0.009177400000042135, loss: 0.1323067992925644
[27530] time live:75, cumulated reward: 7.499999999999989, exploring rate: 0.00914440000004214, loss: 0.09205807000398636
[27540] time live:100, cumulated reward: 9.99999999999998, exploring rate: 0.009111400000042146, loss: 0.03284230828285217
[27550] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.00907840000004215, loss: 0.087761789560318
[27560] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.009045400000042156, loss: 0.04217129945755005
[27570] time live:180, cumulated reward: 21.00000000000003, exploring rate: 0.009012400000042161, loss: 0.07612814009189606
[27580] time live:110, cumulated reward: 11.999999999999977, exploring rate: 0.008979400000042166, loss: 0.5050698518753052
[27590] time liv

 99%|█████████▉| 180/181 [00:02<00:00, 85.97it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-27600.webm 






[27610] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.008880400000042182, loss: 0.022344116121530533
[27620] time live:74, cumulated reward: 7.39999999999999, exploring rate: 0.008847400000042187, loss: 0.08270398527383804
[27630] time live:70, cumulated reward: 6.999999999999991, exploring rate: 0.008814400000042192, loss: 0.0798838809132576
[27640] time live:106, cumulated reward: 11.599999999999978, exploring rate: 0.008781400000042197, loss: 0.7467403411865234
[27650] time live:113, cumulated reward: 12.299999999999976, exploring rate: 0.008748400000042202, loss: 0.42931604385375977
[27660] time live:140, cumulated reward: 15.999999999999972, exploring rate: 0.008715400000042207, loss: 0.12055133283138275
[27670] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.008682400000042213, loss: 0.263322114944458
[27680] time live:218, cumulated reward: 25.800000000000082, exploring rate: 0.008649400000042218, loss: 0.0349898487329483
[27690] t

 99%|█████████▉| 98/99 [00:01<00:00, 79.51it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-27700.webm 






[27710] time live:75, cumulated reward: 7.499999999999989, exploring rate: 0.008550400000042233, loss: 0.025403913110494614
[27720] time live:149, cumulated reward: 16.899999999999984, exploring rate: 0.008517400000042238, loss: 0.10531268268823624
[27730] time live:148, cumulated reward: 16.799999999999983, exploring rate: 0.008484400000042244, loss: 0.17634853720664978
[27740] time live:292, cumulated reward: 35.20000000000019, exploring rate: 0.008451400000042249, loss: 0.03162494674324989
[27750] time live:323, cumulated reward: 38.30000000000023, exploring rate: 0.008418400000042254, loss: 0.6903986930847168
[27760] time live:105, cumulated reward: 11.499999999999979, exploring rate: 0.008385400000042259, loss: 0.09666390717029572
[27770] time live:339, cumulated reward: 40.900000000000254, exploring rate: 0.008352400000042264, loss: 0.14447228610515594
[27780] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.00831940000004227, loss: 0.08958255499601364
[27790]

100%|█████████▉| 452/453 [00:06<00:00, 70.21it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-27800.webm 






[27810] time live:137, cumulated reward: 15.699999999999969, exploring rate: 0.008220400000042285, loss: 0.09972996264696121
[27820] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.00818740000004229, loss: 0.04036973416805267
[27830] time live:246, cumulated reward: 28.600000000000122, exploring rate: 0.008154400000042295, loss: 2.2731919288635254
[27840] time live:226, cumulated reward: 26.600000000000094, exploring rate: 0.0081214000000423, loss: 0.1416381299495697
[27850] time live:400, cumulated reward: 48.00000000000034, exploring rate: 0.008088400000042305, loss: 0.06492535769939423
[27860] time live:107, cumulated reward: 11.699999999999978, exploring rate: 0.00805540000004231, loss: 0.05086655914783478
[27870] time live:215, cumulated reward: 25.500000000000078, exploring rate: 0.008022400000042316, loss: 0.1002940833568573
[27880] time live:113, cumulated reward: 12.299999999999976, exploring rate: 0.007989400000042321, loss: 0.12748342752456665
[27890] ti

 99%|█████████▉| 146/147 [00:01<00:00, 74.18it/s]


[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-27900.webm 

[27910] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.007890400000042336, loss: 0.07824704051017761
[27920] time live:107, cumulated reward: 11.699999999999978, exploring rate: 0.007857400000042342, loss: 0.13592824339866638
[27930] time live:75, cumulated reward: 7.499999999999989, exploring rate: 0.007824400000042347, loss: 0.0720682218670845
[27940] time live:73, cumulated reward: 7.29999999999999, exploring rate: 0.007791400000042346, loss: 0.7635033130645752
[27950] time live:69, cumulated reward: 6.8999999999999915, exploring rate: 0.007758400000042342, loss: 0.10387933999300003
[27960] time live:66, cumulated reward: 6.5999999999999925, exploring rate: 0.007725400000042339, loss: 0.047946229577064514
[27970] time live:175, cumulated reward: 19.50000000000002, exploring rate: 0.007692400000042335, loss: 0.950861394405365
[27980] time live:77, cumulated reward: 7.699999999999989, exploring 

 99%|█████████▉| 142/143 [00:01<00:00, 76.54it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-28000.webm 






[28010] time live:112, cumulated reward: 12.199999999999976, exploring rate: 0.007560400000042321, loss: 0.2942816913127899
[28020] time live:75, cumulated reward: 7.499999999999989, exploring rate: 0.007527400000042318, loss: 0.03123628720641136
[28030] time live:190, cumulated reward: 22.000000000000043, exploring rate: 0.007494400000042314, loss: 1.5937691926956177
[28040] time live:140, cumulated reward: 15.999999999999972, exploring rate: 0.007461400000042311, loss: 0.38898593187332153
[28050] time live:289, cumulated reward: 33.90000000000018, exploring rate: 0.007428400000042307, loss: 0.7257840633392334
[28060] time live:253, cumulated reward: 30.300000000000132, exploring rate: 0.007395400000042304, loss: 0.26473045349121094
[28070] time live:109, cumulated reward: 11.899999999999977, exploring rate: 0.0073624000000423, loss: 0.05807977914810181
[28080] time live:111, cumulated reward: 12.099999999999977, exploring rate: 0.007329400000042297, loss: 0.06529209762811661
[28090] 

 99%|█████████▉| 110/111 [00:01<00:00, 80.33it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-28100.webm 






[28110] time live:74, cumulated reward: 7.39999999999999, exploring rate: 0.007230400000042286, loss: 0.5060139894485474
[28120] time live:73, cumulated reward: 7.29999999999999, exploring rate: 0.007197400000042283, loss: 0.08770754933357239
[28130] time live:74, cumulated reward: 7.39999999999999, exploring rate: 0.007164400000042279, loss: 0.5058832168579102
[28140] time live:111, cumulated reward: 12.099999999999977, exploring rate: 0.007131400000042276, loss: 0.0882149338722229
[28150] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.007098400000042272, loss: 0.13407506048679352
[28160] time live:77, cumulated reward: 7.699999999999989, exploring rate: 0.007065400000042269, loss: 0.10738460719585419
[28170] time live:100, cumulated reward: 9.99999999999998, exploring rate: 0.007032400000042265, loss: 0.10893061757087708
[28180] time live:213, cumulated reward: 24.300000000000075, exploring rate: 0.006999400000042262, loss: 0.05996222048997879
[28190] time live:

 99%|█████████▉| 175/176 [00:02<00:00, 75.47it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-28200.webm 






[28210] time live:108, cumulated reward: 11.799999999999978, exploring rate: 0.006900400000042251, loss: 0.542133629322052
[28220] time live:76, cumulated reward: 7.599999999999989, exploring rate: 0.0068674000000422475, loss: 0.07495445758104324
[28230] time live:111, cumulated reward: 12.099999999999977, exploring rate: 0.006834400000042244, loss: 0.11354187875986099
[28240] time live:77, cumulated reward: 7.699999999999989, exploring rate: 0.0068014000000422405, loss: 0.3549565374851227
[28250] time live:140, cumulated reward: 15.999999999999972, exploring rate: 0.006768400000042237, loss: 0.09060709923505783
[28260] time live:109, cumulated reward: 11.899999999999977, exploring rate: 0.0067354000000422335, loss: 0.2735987603664398
[28270] time live:143, cumulated reward: 16.299999999999976, exploring rate: 0.00670240000004223, loss: 0.05252333730459213
[28280] time live:225, cumulated reward: 26.500000000000092, exploring rate: 0.0066694000000422264, loss: 0.0504111684858799
[28290

 99%|█████████▉| 98/99 [00:01<00:00, 89.09it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-28300.webm 






[28310] time live:189, cumulated reward: 21.90000000000004, exploring rate: 0.006570400000042216, loss: 0.06150634586811066
[28320] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.006537400000042212, loss: 1.0121991634368896
[28330] time live:67, cumulated reward: 6.699999999999992, exploring rate: 0.006504400000042209, loss: 0.4938809871673584
[28340] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.006471400000042205, loss: 0.12916867434978485
[28350] time live:225, cumulated reward: 26.500000000000092, exploring rate: 0.006438400000042202, loss: 0.40805912017822266
[28360] time live:100, cumulated reward: 9.99999999999998, exploring rate: 0.006405400000042198, loss: 0.12561535835266113
[28370] time live:77, cumulated reward: 7.699999999999989, exploring rate: 0.006372400000042195, loss: 0.08343349397182465
[28380] time live:246, cumulated reward: 28.600000000000122, exploring rate: 0.006339400000042191, loss: 0.07512222975492477
[28390] tim

 99%|█████████▉| 142/143 [00:01<00:00, 80.55it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-28400.webm 






[28410] time live:74, cumulated reward: 7.39999999999999, exploring rate: 0.006240400000042181, loss: 0.09884027391672134
[28420] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.006207400000042177, loss: 0.09448010474443436
[28430] time live:74, cumulated reward: 7.39999999999999, exploring rate: 0.006174400000042174, loss: 0.052337344735860825
[28440] time live:112, cumulated reward: 12.199999999999976, exploring rate: 0.00614140000004217, loss: 0.09673473984003067
[28450] time live:75, cumulated reward: 7.499999999999989, exploring rate: 0.006108400000042167, loss: 0.11088603734970093
[28460] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.006075400000042163, loss: 0.5180167555809021
[28470] time live:71, cumulated reward: 7.099999999999991, exploring rate: 0.00604240000004216, loss: 0.16217824816703796
[28480] time live:73, cumulated reward: 7.29999999999999, exploring rate: 0.006009400000042156, loss: 0.12158659845590591
[28490] time li

 99%|█████████▊| 75/76 [00:00<00:00, 78.23it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-28500.webm 






[28510] time live:63, cumulated reward: 5.299999999999994, exploring rate: 0.005910400000042146, loss: 0.0998472273349762
[28520] time live:72, cumulated reward: 7.19999999999999, exploring rate: 0.005877400000042142, loss: 0.05564868822693825
[28530] time live:75, cumulated reward: 7.499999999999989, exploring rate: 0.005844400000042139, loss: 0.44044557213783264
[28540] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.005811400000042135, loss: 0.19063669443130493
[28550] time live:71, cumulated reward: 7.099999999999991, exploring rate: 0.005778400000042132, loss: 1.0263954401016235
[28560] time live:85, cumulated reward: 8.499999999999986, exploring rate: 0.005745400000042128, loss: 0.24482111632823944
[28570] time live:111, cumulated reward: 12.099999999999977, exploring rate: 0.005712400000042125, loss: 0.7533326745033264
[28580] time live:148, cumulated reward: 16.799999999999983, exploring rate: 0.005679400000042121, loss: 0.0846700370311737
[28590] time li

100%|██████████| 63/63 [00:00<00:00, 69.74it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-28600.webm 






[28610] time live:113, cumulated reward: 12.299999999999976, exploring rate: 0.0055804000000421105, loss: 0.20327001810073853
[28620] time live:105, cumulated reward: 11.499999999999979, exploring rate: 0.005547400000042107, loss: 0.062168970704078674
[28630] time live:75, cumulated reward: 7.499999999999989, exploring rate: 0.0055144000000421035, loss: 0.16296815872192383
[28640] time live:76, cumulated reward: 7.599999999999989, exploring rate: 0.0054814000000421, loss: 0.049836061894893646
[28650] time live:75, cumulated reward: 7.499999999999989, exploring rate: 0.0054484000000420965, loss: 0.4432825446128845
[28660] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.005415400000042093, loss: 0.13112398982048035
[28670] time live:75, cumulated reward: 7.499999999999989, exploring rate: 0.0053824000000420895, loss: 0.09049432724714279
[28680] time live:188, cumulated reward: 21.80000000000004, exploring rate: 0.005349400000042086, loss: 0.8919522762298584
[28690]

100%|█████████▉| 221/222 [00:03<00:00, 69.68it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-28700.webm 






[28710] time live:76, cumulated reward: 7.599999999999989, exploring rate: 0.005250400000042075, loss: 0.2079588621854782
[28720] time live:252, cumulated reward: 30.20000000000013, exploring rate: 0.005217400000042072, loss: 0.0974588617682457
[28730] time live:261, cumulated reward: 31.100000000000144, exploring rate: 0.005184400000042068, loss: 0.08320765197277069
[28740] time live:415, cumulated reward: 50.50000000000036, exploring rate: 0.005151400000042065, loss: 0.24249860644340515
[28750] time live:246, cumulated reward: 28.600000000000122, exploring rate: 0.005118400000042061, loss: 0.747001051902771
[28760] time live:359, cumulated reward: 42.90000000000028, exploring rate: 0.005085400000042058, loss: 0.13472875952720642
[28770] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.005052400000042054, loss: 0.164027139544487
[28780] time live:663, cumulated reward: 81.29999999999967, exploring rate: 0.005019400000042051, loss: 0.1476563811302185
[28790] time 

100%|█████████▉| 339/340 [00:04<00:00, 68.08it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-28800.webm 






[28810] time live:472, cumulated reward: 57.20000000000044, exploring rate: 0.00492040000004204, loss: 0.14634136855602264
[28820] time live:323, cumulated reward: 38.30000000000023, exploring rate: 0.004887400000042037, loss: 0.3510703146457672
[28830] time live:373, cumulated reward: 45.3000000000003, exploring rate: 0.004854400000042033, loss: 0.10591866075992584
[28840] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.00482140000004203, loss: 0.05701232701539993
[28850] time live:77, cumulated reward: 7.699999999999989, exploring rate: 0.004788400000042026, loss: 0.06662361323833466
[28860] time live:400, cumulated reward: 48.00000000000034, exploring rate: 0.004755400000042023, loss: 0.12373296916484833
[28870] time live:70, cumulated reward: 6.999999999999991, exploring rate: 0.004722400000042019, loss: 0.18776337802410126
[28880] time live:250, cumulated reward: 30.000000000000128, exploring rate: 0.004689400000042016, loss: 0.2386622130870819
[28890] time li

100%|█████████▉| 211/212 [00:02<00:00, 80.78it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-28900.webm 






[28910] time live:113, cumulated reward: 12.299999999999976, exploring rate: 0.004590400000042005, loss: 0.04784322530031204
[28920] time live:404, cumulated reward: 49.40000000000035, exploring rate: 0.004557400000042002, loss: 0.10675427317619324
[28930] time live:323, cumulated reward: 38.30000000000023, exploring rate: 0.004524400000041998, loss: 0.05158275365829468
[28940] time live:76, cumulated reward: 7.599999999999989, exploring rate: 0.004491400000041995, loss: 0.06697209179401398
[28950] time live:100, cumulated reward: 9.99999999999998, exploring rate: 0.004458400000041991, loss: 0.03446955233812332
[28960] time live:246, cumulated reward: 28.600000000000122, exploring rate: 0.004425400000041988, loss: 0.08389978110790253
[28970] time live:77, cumulated reward: 7.699999999999989, exploring rate: 0.004392400000041984, loss: 1.2593287229537964
[28980] time live:102, cumulated reward: 11.19999999999998, exploring rate: 0.0043594000000419806, loss: 0.23456399142742157
[28990] t

 99%|█████████▉| 103/104 [00:01<00:00, 83.50it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-29000.webm 






[29010] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.00426040000004197, loss: 0.27306056022644043
[29020] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.0042274000000419665, loss: 0.42763879895210266
[29030] time live:106, cumulated reward: 11.599999999999978, exploring rate: 0.004194400000041963, loss: 0.08965151011943817
[29040] time live:77, cumulated reward: 7.699999999999989, exploring rate: 0.0041614000000419595, loss: 1.3330110311508179
[29050] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.004128400000041956, loss: 0.07320985198020935
[29060] time live:189, cumulated reward: 21.90000000000004, exploring rate: 0.0040954000000419525, loss: 0.12067906558513641
[29070] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.004062400000041949, loss: 0.07918200641870499
[29080] time live:105, cumulated reward: 11.499999999999979, exploring rate: 0.0040294000000419454, loss: 0.09222231805324554
[29090]

100%|██████████| 78/78 [00:01<00:00, 74.94it/s]


[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-29100.webm 

[29110] time live:76, cumulated reward: 7.599999999999989, exploring rate: 0.003930400000041935, loss: 0.12538287043571472
[29120] time live:246, cumulated reward: 28.600000000000122, exploring rate: 0.0038974000000419327, loss: 0.10945743322372437
[29130] time live:183, cumulated reward: 21.300000000000033, exploring rate: 0.0038644000000419335, loss: 0.1442919224500656
[29140] time live:102, cumulated reward: 11.19999999999998, exploring rate: 0.0038314000000419343, loss: 0.0845179334282875
[29150] time live:104, cumulated reward: 11.399999999999979, exploring rate: 0.003798400000041935, loss: 0.21265381574630737
[29160] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.003765400000041936, loss: 0.057677555829286575
[29170] time live:144, cumulated reward: 16.399999999999977, exploring rate: 0.003732400000041937, loss: 0.057108037173748016
[29180] time live:138, cumulated reward: 15.799999999999969,

 99%|█████████▊| 78/79 [00:00<00:00, 82.01it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-29200.webm 






[29210] time live:149, cumulated reward: 16.899999999999984, exploring rate: 0.00360040000004194, loss: 0.09517164528369904
[29220] time live:77, cumulated reward: 7.699999999999989, exploring rate: 0.003567400000041941, loss: 0.16862928867340088
[29230] time live:216, cumulated reward: 25.60000000000008, exploring rate: 0.0035344000000419418, loss: 0.12674497067928314
[29240] time live:370, cumulated reward: 45.0000000000003, exploring rate: 0.0035014000000419426, loss: 1.091436743736267
[29250] time live:1430, cumulated reward: 178.9999999999953, exploring rate: 0.0034684000000419434, loss: 0.06789011508226395
[29260] time live:66, cumulated reward: 6.5999999999999925, exploring rate: 0.0034354000000419442, loss: 0.07386821508407593
[29270] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.003402400000041945, loss: 1.373997688293457
[29280] time live:447, cumulated reward: 54.70000000000041, exploring rate: 0.003369400000041946, loss: 0.12034818530082703
[29290] ti

100%|█████████▉| 211/212 [00:02<00:00, 76.37it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-29300.webm 






[29310] time live:177, cumulated reward: 19.700000000000024, exploring rate: 0.0032704000000419483, loss: 0.1181374341249466
[29320] time live:519, cumulated reward: 63.90000000000049, exploring rate: 0.003237400000041949, loss: 0.09605176001787186
[29330] time live:302, cumulated reward: 36.2000000000002, exploring rate: 0.00320440000004195, loss: 0.14061063528060913
[29340] time live:138, cumulated reward: 15.799999999999969, exploring rate: 0.003171400000041951, loss: 0.03541747108101845
[29350] time live:77, cumulated reward: 7.699999999999989, exploring rate: 0.0031384000000419516, loss: 0.08894321322441101
[29360] time live:76, cumulated reward: 7.599999999999989, exploring rate: 0.0031054000000419525, loss: 0.12969675660133362
[29370] time live:102, cumulated reward: 11.19999999999998, exploring rate: 0.0030724000000419533, loss: 0.06936240196228027
[29380] time live:145, cumulated reward: 16.49999999999998, exploring rate: 0.003039400000041954, loss: 0.25054776668548584
[29390]

 99%|█████████▉| 176/177 [00:02<00:00, 82.57it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-29400.webm 






[29410] time live:63, cumulated reward: 5.299999999999994, exploring rate: 0.0029404000000419566, loss: 0.04727110639214516
[29420] time live:75, cumulated reward: 7.499999999999989, exploring rate: 0.0029074000000419574, loss: 0.06852743029594421
[29430] time live:62, cumulated reward: 5.199999999999994, exploring rate: 0.0028744000000419582, loss: 0.06171685457229614
[29440] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.002841400000041959, loss: 0.06728433817625046
[29450] time live:65, cumulated reward: 6.499999999999993, exploring rate: 0.00280840000004196, loss: 0.08014911413192749
[29460] time live:67, cumulated reward: 6.699999999999992, exploring rate: 0.0027754000000419607, loss: 0.042643893510103226
[29470] time live:65, cumulated reward: 6.499999999999993, exploring rate: 0.0027424000000419615, loss: 0.06522262841463089
[29480] time live:65, cumulated reward: 6.499999999999993, exploring rate: 0.0027094000000419624, loss: 0.7137942314147949
[29490] tim

 98%|█████████▊| 63/64 [00:00<00:00, 79.23it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-29500.webm 






[29510] time live:75, cumulated reward: 7.499999999999989, exploring rate: 0.002610400000041965, loss: 0.1692277193069458
[29520] time live:63, cumulated reward: 5.299999999999994, exploring rate: 0.0025774000000419657, loss: 0.10825082659721375
[29530] time live:62, cumulated reward: 5.199999999999994, exploring rate: 0.0025444000000419665, loss: 0.22579993307590485
[29540] time live:65, cumulated reward: 6.499999999999993, exploring rate: 0.0025114000000419673, loss: 0.05099796876311302
[29550] time live:70, cumulated reward: 6.999999999999991, exploring rate: 0.002478400000041968, loss: 0.28528472781181335
[29560] time live:74, cumulated reward: 7.39999999999999, exploring rate: 0.002445400000041969, loss: 0.08764950931072235
[29570] time live:67, cumulated reward: 6.699999999999992, exploring rate: 0.0024124000000419698, loss: 0.10588052868843079
[29580] time live:73, cumulated reward: 7.29999999999999, exploring rate: 0.0023794000000419706, loss: 0.08848542720079422
[29590] time l

 99%|█████████▊| 76/77 [00:00<00:00, 78.54it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-29600.webm 






[29610] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.002280400000041973, loss: 0.10447164624929428
[29620] time live:147, cumulated reward: 16.69999999999998, exploring rate: 0.002247400000041974, loss: 0.23252329230308533
[29630] time live:72, cumulated reward: 7.19999999999999, exploring rate: 0.0022144000000419747, loss: 0.08560201525688171
[29640] time live:106, cumulated reward: 11.599999999999978, exploring rate: 0.0021814000000419755, loss: 0.06298718601465225
[29650] time live:133, cumulated reward: 14.299999999999969, exploring rate: 0.0021484000000419764, loss: 2.019324779510498
[29660] time live:113, cumulated reward: 12.299999999999976, exploring rate: 0.002115400000041977, loss: 0.3025801181793213
[29670] time live:287, cumulated reward: 33.70000000000018, exploring rate: 0.002082400000041978, loss: 0.0728539526462555
[29680] time live:76, cumulated reward: 7.599999999999989, exploring rate: 0.002049400000041979, loss: 0.057645708322525024
[29690] t

100%|█████████▉| 934/935 [00:12<00:00, 73.95it/s]


[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-29700.webm 

[29710] time live:107, cumulated reward: 11.699999999999978, exploring rate: 0.0019504000000419813, loss: 0.09289731085300446
[29720] time live:181, cumulated reward: 21.10000000000003, exploring rate: 0.0019174000000419821, loss: 0.6417953372001648
[29730] time live:189, cumulated reward: 21.90000000000004, exploring rate: 0.001884400000041983, loss: 0.0854884535074234
[29740] time live:217, cumulated reward: 25.70000000000008, exploring rate: 0.0018514000000419838, loss: 0.20231974124908447
[29750] time live:219, cumulated reward: 25.900000000000084, exploring rate: 0.0018184000000419846, loss: 0.13065993785858154
[29760] time live:113, cumulated reward: 12.299999999999976, exploring rate: 0.0017854000000419854, loss: 0.0798882246017456
[29770] time live:135, cumulated reward: 14.499999999999968, exploring rate: 0.0017524000000419863, loss: 0.25880759954452515
[29780] time live:246, cumulated reward: 28.60000000000012

100%|█████████▉| 858/859 [00:11<00:00, 77.18it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-29800.webm 






[29810] time live:98, cumulated reward: 9.799999999999981, exploring rate: 0.0016204000000419896, loss: 0.1646340787410736
[29820] time live:214, cumulated reward: 25.400000000000077, exploring rate: 0.0015874000000419904, loss: 0.13880395889282227
[29830] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0015544000000419912, loss: 0.06872701644897461
[29840] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.001521400000041992, loss: 0.2044902890920639
[29850] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0014884000000419929, loss: 0.054266370832920074
[29860] time live:71, cumulated reward: 7.099999999999991, exploring rate: 0.0014554000000419937, loss: 0.14483095705509186
[29870] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0014224000000419945, loss: 0.04384816437959671
[29880] time live:72, cumulated reward: 7.19999999999999, exploring rate: 0.0013894000000419953, loss: 0.06841292977333069
[29890] t

 99%|█████████▉| 104/105 [00:01<00:00, 84.07it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-29900.webm 






[29910] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.0012904000000419978, loss: 0.1923304945230484
[29920] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0012574000000419986, loss: 0.12453856319189072
[29930] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0012244000000419994, loss: 0.09767059981822968
[29940] time live:64, cumulated reward: 5.399999999999993, exploring rate: 0.0011914000000420003, loss: 0.17575593292713165
[29950] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.001158400000042001, loss: 0.05575591325759888
[29960] time live:97, cumulated reward: 9.699999999999982, exploring rate: 0.001125400000042002, loss: 0.03229641169309616
[29970] time live:61, cumulated reward: 5.099999999999994, exploring rate: 0.0010924000000420027, loss: 0.039504025131464005
[29980] time live:217, cumulated reward: 25.70000000000008, exploring rate: 0.0010594000000420036, loss: 0.033990100026130676
[29990] 

 99%|█████████▊| 76/77 [00:00<00:00, 78.69it/s]

[MoviePy] Done.
[MoviePy] >>>> Video ready: movie/DQN-30000.webm 






In [17]:
from moviepy.editor import *
clip = VideoFileClip("movie/DQN-29700.webm")
display(clip.ipython_display(fps=60, autoplay=1, loop=1))

100%|██████████| 935/935 [00:02<00:00, 466.90it/s]


### DiScusS ThE rEsuLt

Training DQN is faster than the previous lab(e.g. sarsa) and the result after training is also better.
Using state to represent the game is better than using screen since it cost lower memory to store it.
