### this notebook is a copy of train.py 

In [1]:
from rl.callbacks import ModelIntervalCheckpoint, FileLogger
from rl.agents.dqn import DQNAgent
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy
from rl.memory import SequentialMemory
from rl.core import Processor
from keras.optimizers import Adam
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten, Convolution2D, Permute, Concatenate
from gym import spaces
import numpy as np
from PIL import Image
from configparser import ConfigParser
import os
from os.path import join, pardir, exists
from pathlib import Path

from gym_airsim.airsim_car_env import AirSimCarEnv

import tensorflow as tf
from keras.backend.tensorflow_backend import set_session
config = tf.ConfigProto()
config.gpu_options.allow_growth = True  #dynamically grow the memory used on the GPU
set_session(tf.Session(config=config))

class AirSimCarProcessor(Processor):
    def process_observation(self, observation):
        assert observation.ndim == 3  # (height, width, channel)
        img = Image.fromarray(observation)
        img = img.resize(INPUT_SHAPE).convert('L')  # resize and convert to grayscale
        processed_observation = np.array(img)
        assert processed_observation.shape == INPUT_SHAPE
        return processed_observation.astype('uint8')  # saves storage in experience memory

    def process_state_batch(self, batch):
        # We could perform this processing step in `process_observation`. In this case, however,
        # we would need to store a `float32` array instead, which is 4x more memory intensive than
        # an `uint8` array. This matters if we store 1M observations.
        processed_batch = batch.astype('float32') / 255.
        return processed_batch

    def process_reward(self, reward):
        return np.clip(reward, -1., 1.)

config = ConfigParser()
config.read('config.ini')
num_actions = int(config['car_agent']['actions'])
                    
WINDOW_LENGTH = 4
INPUT_SHAPE = (84, 84)

env = AirSimCarEnv()
np.random.seed(123)

# Next, we build our model. We use the same model that was described by Mnih et al. (2015).
input_shape = (WINDOW_LENGTH,) + INPUT_SHAPE
model = Sequential()
model.add(Permute((2, 3, 1), input_shape=input_shape))
model.add(Convolution2D(32, (8, 8), strides=(4, 4)))
model.add(Activation('relu'))
model.add(Convolution2D(64, (4, 4), strides=(2, 2)))
model.add(Activation('relu'))
model.add(Convolution2D(64, (3, 3), strides=(1, 1)))
model.add(Activation('relu'))
model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dense(num_actions))
model.add(Activation('linear'))
print(model.summary())

log_dir = 'logs'
if not exists(log_dir):
    os.makedirs(log_dir)
tensorboard_dir = Path("logs/keras-rl_dqn_AirsimCar_tensorboard/")
if not exists(tensorboard_dir):
    os.makedirs(tensorboard_dir)

memory = SequentialMemory(limit=1000000, window_length=WINDOW_LENGTH)
policy = LinearAnnealedPolicy(EpsGreedyQPolicy(),  attr='eps', value_max=1., 
                              value_min=.1, value_test=.05, nb_steps=1000000)
processor = AirSimCarProcessor()

dqn = DQNAgent(model=model, nb_actions=num_actions, policy=policy, memory=memory,
               processor=processor, nb_steps_warmup=50000, gamma=.99, target_model_update=10000,
               train_interval=4, delta_clip=1.)
dqn.compile(Adam(lr=.0001), metrics=['mae'])

def build_callbacks(env_name):
    log_dir = 'logs'
    if not exists(log_dir):
        os.makedirs(log_dir)
    
    checkpoint_weights_filename = join(log_dir, 'dqn_' + env_name + '_weights_{step}.h5f')
    log_filename = join(log_dir,'dqn_{}_log.json'.format(env_name))
    callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=25000)]
    callbacks += [FileLogger(log_filename, interval=100)]
    return callbacks

callbacks = build_callbacks('AirSimCarRL')

dqn.fit(env, nb_steps=1000000,
        visualize=False,
        verbose=2,
        callbacks=callbacks)

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


Connected!
Client Ver:1 (Min Req: 1), Server Ver:1 (Min Req: 1)

['WayPoint0', 'WayPoint1', 'WayPoint2', 'WayPoint3', 'WayPoint4', 'WayPoint5', 'WayPoint6', 'WayPoint7', 'WayPoint8', 'WayPoint9']



Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
permute_1 (Permute)          (None, 84, 84, 4)         0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 20, 20, 32)        8224      
_________________________________________________________________
activation_1 (Activation)    (None, 20, 20, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 9, 9, 64)          32832     
_________________________________________________________________
activation_2 (Activation)    (None, 9, 9, 64)          0         
_____________________________________________________

   3412/1000000: episode: 17, duration: 7.610s, episode steps: 179, steps per second: 24, episode reward: 64.400, mean reward: 0.360 [-1.000, 0.500], mean action: 2.883 [0.000, 6.000], mean observation: 172.089 [0.000, 255.000], loss: --, mean_absolute_error: --, mean_q: --, mean_eps: --
   3617/1000000: episode: 18, duration: 9.439s, episode steps: 205, steps per second: 22, episode reward: 130.800, mean reward: 0.638 [-1.000, 1.000], mean action: 3.034 [0.000, 6.000], mean observation: 171.188 [0.000, 255.000], loss: --, mean_absolute_error: --, mean_q: --, mean_eps: --
   3812/1000000: episode: 19, duration: 8.486s, episode steps: 195, steps per second: 23, episode reward: 78.800, mean reward: 0.404 [-1.000, 0.500], mean action: 2.969 [0.000, 6.000], mean observation: 172.394 [0.000, 255.000], loss: --, mean_absolute_error: --, mean_q: --, mean_eps: --
   3970/1000000: episode: 20, duration: 6.668s, episode steps: 158, steps per second: 24, episode reward: 57.500, mean reward: 0.364

   8997/1000000: episode: 46, duration: 7.159s, episode steps: 171, steps per second: 24, episode reward: 56.000, mean reward: 0.327 [-1.000, 0.500], mean action: 2.953 [0.000, 6.000], mean observation: 172.070 [0.000, 255.000], loss: --, mean_absolute_error: --, mean_q: --, mean_eps: --
   9198/1000000: episode: 47, duration: 8.885s, episode steps: 201, steps per second: 23, episode reward: 77.800, mean reward: 0.387 [-1.000, 0.500], mean action: 2.945 [0.000, 6.000], mean observation: 171.758 [0.000, 255.000], loss: --, mean_absolute_error: --, mean_q: --, mean_eps: --
   9423/1000000: episode: 48, duration: 10.598s, episode steps: 225, steps per second: 21, episode reward: 131.300, mean reward: 0.584 [-1.000, 1.000], mean action: 2.987 [0.000, 6.000], mean observation: 172.342 [0.000, 255.000], loss: --, mean_absolute_error: --, mean_q: --, mean_eps: --
   9625/1000000: episode: 49, duration: 9.295s, episode steps: 202, steps per second: 22, episode reward: 113.300, mean reward: 0.5

  14823/1000000: episode: 75, duration: 9.301s, episode steps: 203, steps per second: 22, episode reward: 97.300, mean reward: 0.479 [-1.000, 1.000], mean action: 3.123 [0.000, 6.000], mean observation: 172.623 [0.000, 255.000], loss: --, mean_absolute_error: --, mean_q: --, mean_eps: --
  15037/1000000: episode: 76, duration: 9.967s, episode steps: 214, steps per second: 21, episode reward: 162.300, mean reward: 0.758 [-1.000, 1.000], mean action: 3.121 [0.000, 6.000], mean observation: 172.435 [0.000, 255.000], loss: --, mean_absolute_error: --, mean_q: --, mean_eps: --
  15215/1000000: episode: 77, duration: 7.508s, episode steps: 178, steps per second: 24, episode reward: 57.100, mean reward: 0.321 [-1.000, 0.500], mean action: 3.051 [0.000, 6.000], mean observation: 173.005 [0.000, 255.000], loss: --, mean_absolute_error: --, mean_q: --, mean_eps: --
  15412/1000000: episode: 78, duration: 8.666s, episode steps: 197, steps per second: 23, episode reward: 79.800, mean reward: 0.405

  20795/1000000: episode: 104, duration: 8.893s, episode steps: 198, steps per second: 22, episode reward: 90.100, mean reward: 0.455 [-1.000, 1.000], mean action: 3.071 [0.000, 6.000], mean observation: 171.814 [0.000, 255.000], loss: --, mean_absolute_error: --, mean_q: --, mean_eps: --
  20945/1000000: episode: 105, duration: 6.223s, episode steps: 150, steps per second: 24, episode reward: 52.700, mean reward: 0.351 [-1.000, 0.500], mean action: 2.780 [0.000, 6.000], mean observation: 172.923 [0.000, 255.000], loss: --, mean_absolute_error: --, mean_q: --, mean_eps: --
  21153/1000000: episode: 106, duration: 9.503s, episode steps: 208, steps per second: 22, episode reward: 94.900, mean reward: 0.456 [-1.000, 0.500], mean action: 3.130 [0.000, 6.000], mean observation: 172.451 [0.000, 255.000], loss: --, mean_absolute_error: --, mean_q: --, mean_eps: --
  21353/1000000: episode: 107, duration: 8.900s, episode steps: 200, steps per second: 22, episode reward: 123.000, mean reward: 0

  26590/1000000: episode: 133, duration: 8.985s, episode steps: 202, steps per second: 22, episode reward: 131.200, mean reward: 0.650 [-1.000, 1.000], mean action: 2.832 [0.000, 6.000], mean observation: 171.385 [0.000, 255.000], loss: --, mean_absolute_error: --, mean_q: --, mean_eps: --
  26796/1000000: episode: 134, duration: 9.634s, episode steps: 206, steps per second: 21, episode reward: 121.400, mean reward: 0.589 [-1.000, 1.000], mean action: 2.985 [0.000, 6.000], mean observation: 172.355 [0.000, 255.000], loss: --, mean_absolute_error: --, mean_q: --, mean_eps: --
  26994/1000000: episode: 135, duration: 8.673s, episode steps: 198, steps per second: 23, episode reward: 79.100, mean reward: 0.399 [-1.000, 0.500], mean action: 2.859 [0.000, 6.000], mean observation: 172.918 [0.000, 255.000], loss: --, mean_absolute_error: --, mean_q: --, mean_eps: --
  27188/1000000: episode: 136, duration: 8.494s, episode steps: 194, steps per second: 23, episode reward: 80.400, mean reward: 

  32342/1000000: episode: 162, duration: 9.796s, episode steps: 207, steps per second: 21, episode reward: 129.300, mean reward: 0.625 [-1.000, 1.000], mean action: 3.048 [0.000, 6.000], mean observation: 172.058 [0.000, 255.000], loss: --, mean_absolute_error: --, mean_q: --, mean_eps: --
  32581/1000000: episode: 163, duration: 11.484s, episode steps: 239, steps per second: 21, episode reward: 158.000, mean reward: 0.661 [-1.000, 1.000], mean action: 3.130 [0.000, 6.000], mean observation: 172.948 [0.000, 255.000], loss: --, mean_absolute_error: --, mean_q: --, mean_eps: --
  32802/1000000: episode: 164, duration: 10.402s, episode steps: 221, steps per second: 21, episode reward: 126.700, mean reward: 0.573 [-1.000, 1.000], mean action: 3.104 [0.000, 6.000], mean observation: 172.078 [0.000, 255.000], loss: --, mean_absolute_error: --, mean_q: --, mean_eps: --
  32978/1000000: episode: 165, duration: 7.532s, episode steps: 176, steps per second: 23, episode reward: 63.700, mean rewar

  38227/1000000: episode: 191, duration: 7.750s, episode steps: 178, steps per second: 23, episode reward: 73.900, mean reward: 0.415 [-1.000, 0.500], mean action: 2.809 [0.000, 6.000], mean observation: 171.294 [0.000, 255.000], loss: --, mean_absolute_error: --, mean_q: --, mean_eps: --
  38440/1000000: episode: 192, duration: 9.950s, episode steps: 213, steps per second: 21, episode reward: 125.900, mean reward: 0.591 [-1.000, 1.000], mean action: 3.108 [0.000, 6.000], mean observation: 172.254 [0.000, 255.000], loss: --, mean_absolute_error: --, mean_q: --, mean_eps: --
  38631/1000000: episode: 193, duration: 8.354s, episode steps: 191, steps per second: 23, episode reward: 79.600, mean reward: 0.417 [-1.000, 0.500], mean action: 2.869 [0.000, 6.000], mean observation: 171.634 [0.000, 255.000], loss: --, mean_absolute_error: --, mean_q: --, mean_eps: --
  38832/1000000: episode: 194, duration: 8.974s, episode steps: 201, steps per second: 22, episode reward: 86.200, mean reward: 0

  44311/1000000: episode: 220, duration: 9.947s, episode steps: 211, steps per second: 21, episode reward: 132.200, mean reward: 0.627 [-1.000, 1.000], mean action: 3.142 [0.000, 6.000], mean observation: 172.486 [0.000, 255.000], loss: --, mean_absolute_error: --, mean_q: --, mean_eps: --
  44497/1000000: episode: 221, duration: 8.072s, episode steps: 186, steps per second: 23, episode reward: 73.500, mean reward: 0.395 [-1.000, 0.500], mean action: 3.151 [0.000, 6.000], mean observation: 172.267 [0.000, 255.000], loss: --, mean_absolute_error: --, mean_q: --, mean_eps: --
  44735/1000000: episode: 222, duration: 11.467s, episode steps: 238, steps per second: 21, episode reward: 129.400, mean reward: 0.544 [-1.000, 1.000], mean action: 3.130 [0.000, 6.000], mean observation: 173.220 [0.000, 255.000], loss: --, mean_absolute_error: --, mean_q: --, mean_eps: --
  44931/1000000: episode: 223, duration: 8.539s, episode steps: 196, steps per second: 23, episode reward: 76.100, mean reward:

  50187/1000000: episode: 249, duration: 8.671s, episode steps: 178, steps per second: 21, episode reward: 109.600, mean reward: 0.616 [-1.000, 1.000], mean action: 2.978 [0.000, 6.000], mean observation: 171.063 [0.000, 255.000], loss: 0.053052, mean_absolute_error: 0.430716, mean_q: 0.541612, mean_eps: 0.954912
  50367/1000000: episode: 250, duration: 8.857s, episode steps: 180, steps per second: 20, episode reward: 106.100, mean reward: 0.589 [-1.000, 1.000], mean action: 2.883 [0.000, 6.000], mean observation: 170.926 [0.000, 255.000], loss: 0.047521, mean_absolute_error: 0.470444, mean_q: 0.604931, mean_eps: 0.954752
  50549/1000000: episode: 251, duration: 8.937s, episode steps: 182, steps per second: 20, episode reward: 71.100, mean reward: 0.391 [-1.000, 0.500], mean action: 3.055 [0.000, 6.000], mean observation: 172.067 [0.000, 255.000], loss: 0.036351, mean_absolute_error: 0.501065, mean_q: 0.621936, mean_eps: 0.954588
  50743/1000000: episode: 252, duration: 9.930s, episode

  55180/1000000: episode: 276, duration: 6.463s, episode steps: 139, steps per second: 22, episode reward: 47.600, mean reward: 0.342 [-1.000, 0.500], mean action: 2.827 [0.000, 6.000], mean observation: 173.149 [0.000, 255.000], loss: 0.012897, mean_absolute_error: 0.401153, mean_q: 0.497328, mean_eps: 0.950401
  55366/1000000: episode: 277, duration: 9.390s, episode steps: 186, steps per second: 20, episode reward: 114.000, mean reward: 0.613 [-1.000, 1.000], mean action: 3.048 [0.000, 6.000], mean observation: 172.173 [0.000, 255.000], loss: 0.013157, mean_absolute_error: 0.444740, mean_q: 0.541902, mean_eps: 0.950255
  55553/1000000: episode: 278, duration: 9.407s, episode steps: 187, steps per second: 20, episode reward: 91.200, mean reward: 0.488 [-1.000, 1.000], mean action: 3.134 [0.000, 6.000], mean observation: 172.608 [0.000, 255.000], loss: 0.014624, mean_absolute_error: 0.422332, mean_q: 0.517100, mean_eps: 0.950086
  55697/1000000: episode: 279, duration: 6.832s, episode 

  59840/1000000: episode: 303, duration: 6.875s, episode steps: 146, steps per second: 21, episode reward: 54.300, mean reward: 0.372 [-1.000, 0.500], mean action: 2.836 [0.000, 6.000], mean observation: 172.402 [0.000, 255.000], loss: 0.011869, mean_absolute_error: 0.472285, mean_q: 0.565391, mean_eps: 0.946211
  59993/1000000: episode: 304, duration: 7.164s, episode steps: 153, steps per second: 21, episode reward: 56.600, mean reward: 0.370 [-1.000, 0.500], mean action: 2.837 [0.000, 6.000], mean observation: 172.431 [0.000, 255.000], loss: 0.011979, mean_absolute_error: 0.413712, mean_q: 0.502175, mean_eps: 0.946076
  60170/1000000: episode: 305, duration: 8.487s, episode steps: 177, steps per second: 21, episode reward: 70.200, mean reward: 0.397 [-1.000, 0.500], mean action: 3.260 [0.000, 6.000], mean observation: 172.115 [0.000, 255.000], loss: 0.029608, mean_absolute_error: 0.763789, mean_q: 0.923380, mean_eps: 0.945926
  60354/1000000: episode: 306, duration: 9.140s, episode s

  64760/1000000: episode: 330, duration: 8.422s, episode steps: 175, steps per second: 21, episode reward: 71.600, mean reward: 0.409 [-1.000, 0.500], mean action: 2.971 [0.000, 6.000], mean observation: 171.175 [0.000, 255.000], loss: 0.007578, mean_absolute_error: 0.796013, mean_q: 0.944509, mean_eps: 0.941795
  64955/1000000: episode: 331, duration: 9.866s, episode steps: 195, steps per second: 20, episode reward: 86.400, mean reward: 0.443 [-1.000, 0.500], mean action: 3.031 [0.000, 6.000], mean observation: 171.717 [0.000, 255.000], loss: 0.009161, mean_absolute_error: 0.803589, mean_q: 0.953397, mean_eps: 0.941630
  65113/1000000: episode: 332, duration: 7.674s, episode steps: 158, steps per second: 21, episode reward: 67.500, mean reward: 0.427 [-1.000, 0.500], mean action: 2.854 [0.000, 6.000], mean observation: 171.294 [0.000, 255.000], loss: 0.007572, mean_absolute_error: 0.793325, mean_q: 0.942444, mean_eps: 0.941469
  65317/1000000: episode: 333, duration: 10.597s, episode 

  69539/1000000: episode: 357, duration: 7.630s, episode steps: 162, steps per second: 21, episode reward: 61.100, mean reward: 0.377 [-1.000, 0.500], mean action: 2.957 [0.000, 6.000], mean observation: 172.557 [0.000, 255.000], loss: 0.008700, mean_absolute_error: 0.798102, mean_q: 0.938639, mean_eps: 0.937488
  69709/1000000: episode: 358, duration: 8.148s, episode steps: 170, steps per second: 21, episode reward: 69.100, mean reward: 0.406 [-1.000, 0.500], mean action: 2.894 [0.000, 6.000], mean observation: 172.472 [0.000, 255.000], loss: 0.008134, mean_absolute_error: 0.798954, mean_q: 0.940240, mean_eps: 0.937338
  69870/1000000: episode: 359, duration: 7.733s, episode steps: 161, steps per second: 21, episode reward: 67.000, mean reward: 0.416 [-1.000, 0.500], mean action: 2.950 [0.000, 6.000], mean observation: 172.338 [0.000, 255.000], loss: 0.007202, mean_absolute_error: 0.798824, mean_q: 0.944866, mean_eps: 0.937189
  70024/1000000: episode: 360, duration: 7.118s, episode s

  74251/1000000: episode: 384, duration: 9.133s, episode steps: 185, steps per second: 20, episode reward: 79.400, mean reward: 0.429 [-1.000, 0.500], mean action: 3.038 [0.000, 6.000], mean observation: 172.357 [0.000, 255.000], loss: 0.007123, mean_absolute_error: 1.221651, mean_q: 1.444597, mean_eps: 0.933258
  74484/1000000: episode: 385, duration: 12.381s, episode steps: 233, steps per second: 19, episode reward: 128.300, mean reward: 0.551 [-1.000, 1.000], mean action: 3.305 [0.000, 6.000], mean observation: 173.253 [0.000, 255.000], loss: 0.009106, mean_absolute_error: 1.206012, mean_q: 1.422696, mean_eps: 0.933071
  74697/1000000: episode: 386, duration: 11.255s, episode steps: 213, steps per second: 19, episode reward: 112.900, mean reward: 0.530 [-1.000, 1.000], mean action: 3.066 [0.000, 6.000], mean observation: 172.967 [0.000, 255.000], loss: 0.006338, mean_absolute_error: 1.242727, mean_q: 1.471922, mean_eps: 0.932869
  74873/1000000: episode: 387, duration: 8.714s, episo

  78976/1000000: episode: 411, duration: 8.692s, episode steps: 177, steps per second: 20, episode reward: 81.100, mean reward: 0.458 [-1.000, 1.000], mean action: 2.881 [0.000, 6.000], mean observation: 172.187 [0.000, 255.000], loss: 0.006284, mean_absolute_error: 1.232560, mean_q: 1.441518, mean_eps: 0.929003
  79160/1000000: episode: 412, duration: 9.122s, episode steps: 184, steps per second: 20, episode reward: 123.200, mean reward: 0.670 [-1.000, 1.000], mean action: 3.103 [0.000, 6.000], mean observation: 171.433 [0.000, 255.000], loss: 0.007419, mean_absolute_error: 1.243412, mean_q: 1.466179, mean_eps: 0.928841
  79339/1000000: episode: 413, duration: 8.768s, episode steps: 179, steps per second: 20, episode reward: 87.500, mean reward: 0.489 [-1.000, 1.000], mean action: 2.983 [0.000, 6.000], mean observation: 172.003 [0.000, 255.000], loss: 0.007241, mean_absolute_error: 1.239928, mean_q: 1.459364, mean_eps: 0.928677
  79520/1000000: episode: 414, duration: 8.881s, episode 

  84035/1000000: episode: 438, duration: 9.827s, episode steps: 193, steps per second: 20, episode reward: 88.000, mean reward: 0.456 [-1.000, 1.000], mean action: 3.228 [0.000, 6.000], mean observation: 172.655 [0.000, 255.000], loss: 0.007052, mean_absolute_error: 1.728395, mean_q: 2.042767, mean_eps: 0.924456
  84214/1000000: episode: 439, duration: 8.748s, episode steps: 179, steps per second: 20, episode reward: 104.700, mean reward: 0.585 [-1.000, 1.000], mean action: 3.006 [0.000, 6.000], mean observation: 171.670 [0.000, 255.000], loss: 0.007483, mean_absolute_error: 1.682470, mean_q: 1.976937, mean_eps: 0.924288
  84402/1000000: episode: 440, duration: 9.339s, episode steps: 188, steps per second: 20, episode reward: 86.100, mean reward: 0.458 [-1.000, 0.500], mean action: 2.963 [0.000, 6.000], mean observation: 172.730 [0.000, 255.000], loss: 0.006900, mean_absolute_error: 1.673661, mean_q: 1.968769, mean_eps: 0.924123
  84577/1000000: episode: 441, duration: 8.505s, episode 

  88979/1000000: episode: 465, duration: 8.969s, episode steps: 183, steps per second: 20, episode reward: 114.200, mean reward: 0.624 [-1.000, 1.000], mean action: 2.945 [0.000, 6.000], mean observation: 171.032 [0.000, 255.000], loss: 0.006966, mean_absolute_error: 1.720139, mean_q: 2.013854, mean_eps: 0.920003
  89168/1000000: episode: 466, duration: 9.647s, episode steps: 189, steps per second: 20, episode reward: 105.800, mean reward: 0.560 [-1.000, 1.000], mean action: 3.026 [0.000, 6.000], mean observation: 171.861 [0.000, 255.000], loss: 0.007458, mean_absolute_error: 1.735037, mean_q: 2.029332, mean_eps: 0.919835
  89357/1000000: episode: 467, duration: 9.728s, episode steps: 189, steps per second: 19, episode reward: 133.000, mean reward: 0.704 [-1.000, 1.000], mean action: 2.889 [0.000, 6.000], mean observation: 171.770 [0.000, 255.000], loss: 0.006220, mean_absolute_error: 1.698651, mean_q: 1.996965, mean_eps: 0.919664
  89512/1000000: episode: 468, duration: 7.220s, episod

  93739/1000000: episode: 492, duration: 10.503s, episode steps: 203, steps per second: 19, episode reward: 140.200, mean reward: 0.691 [-1.000, 1.000], mean action: 3.118 [0.000, 6.000], mean observation: 172.150 [0.000, 255.000], loss: 0.007113, mean_absolute_error: 2.130026, mean_q: 2.493943, mean_eps: 0.915728
  93922/1000000: episode: 493, duration: 9.014s, episode steps: 183, steps per second: 20, episode reward: 115.500, mean reward: 0.631 [-1.000, 1.000], mean action: 3.027 [0.000, 6.000], mean observation: 170.633 [0.000, 255.000], loss: 0.007568, mean_absolute_error: 2.142674, mean_q: 2.512955, mean_eps: 0.915553
  94113/1000000: episode: 494, duration: 9.647s, episode steps: 191, steps per second: 20, episode reward: 125.200, mean reward: 0.655 [-1.000, 1.000], mean action: 2.979 [0.000, 6.000], mean observation: 171.062 [0.000, 255.000], loss: 0.006244, mean_absolute_error: 2.141389, mean_q: 2.510195, mean_eps: 0.915384
  94305/1000000: episode: 495, duration: 9.704s, episo

  98544/1000000: episode: 519, duration: 10.702s, episode steps: 206, steps per second: 19, episode reward: 126.100, mean reward: 0.612 [-1.000, 1.000], mean action: 3.117 [0.000, 6.000], mean observation: 172.968 [0.000, 255.000], loss: 0.006319, mean_absolute_error: 2.110561, mean_q: 2.471629, mean_eps: 0.911404
  98740/1000000: episode: 520, duration: 10.088s, episode steps: 196, steps per second: 19, episode reward: 124.200, mean reward: 0.634 [-1.000, 1.000], mean action: 3.138 [0.000, 6.000], mean observation: 172.831 [0.000, 255.000], loss: 0.007165, mean_absolute_error: 2.136107, mean_q: 2.494799, mean_eps: 0.911224
  98917/1000000: episode: 521, duration: 8.758s, episode steps: 177, steps per second: 20, episode reward: 81.900, mean reward: 0.463 [-1.000, 1.000], mean action: 2.887 [0.000, 6.000], mean observation: 172.529 [0.000, 255.000], loss: 0.008413, mean_absolute_error: 2.086001, mean_q: 2.448578, mean_eps: 0.911055
  99104/1000000: episode: 522, duration: 9.328s, episo

 103566/1000000: episode: 546, duration: 10.495s, episode steps: 208, steps per second: 20, episode reward: 92.500, mean reward: 0.445 [-1.000, 0.500], mean action: 3.144 [0.000, 6.000], mean observation: 173.282 [0.000, 255.000], loss: 0.006590, mean_absolute_error: 2.524748, mean_q: 2.948025, mean_eps: 0.906884
 103706/1000000: episode: 547, duration: 6.743s, episode steps: 140, steps per second: 21, episode reward: 56.100, mean reward: 0.401 [-1.000, 0.500], mean action: 2.579 [0.000, 6.000], mean observation: 171.572 [0.000, 255.000], loss: 0.006991, mean_absolute_error: 2.514210, mean_q: 2.931952, mean_eps: 0.906728
 103900/1000000: episode: 548, duration: 9.465s, episode steps: 194, steps per second: 20, episode reward: 61.500, mean reward: 0.317 [-1.000, 0.500], mean action: 3.175 [0.000, 6.000], mean observation: 172.482 [0.000, 255.000], loss: 0.006414, mean_absolute_error: 2.516458, mean_q: 2.942553, mean_eps: 0.906578
 104094/1000000: episode: 549, duration: 9.915s, episode 

 108353/1000000: episode: 573, duration: 7.961s, episode steps: 166, steps per second: 21, episode reward: 64.300, mean reward: 0.387 [-1.000, 0.500], mean action: 2.964 [0.000, 6.000], mean observation: 172.410 [0.000, 255.000], loss: 0.006991, mean_absolute_error: 2.506383, mean_q: 2.930909, mean_eps: 0.902557
 108507/1000000: episode: 574, duration: 7.227s, episode steps: 154, steps per second: 21, episode reward: 54.700, mean reward: 0.355 [-1.000, 0.500], mean action: 2.857 [0.000, 6.000], mean observation: 172.915 [0.000, 255.000], loss: 0.007126, mean_absolute_error: 2.534550, mean_q: 2.965179, mean_eps: 0.902413
 108705/1000000: episode: 575, duration: 9.996s, episode steps: 198, steps per second: 20, episode reward: 95.400, mean reward: 0.482 [-1.000, 1.000], mean action: 2.939 [0.000, 6.000], mean observation: 173.369 [0.000, 255.000], loss: 0.005936, mean_absolute_error: 2.510952, mean_q: 2.935452, mean_eps: 0.902255
 108886/1000000: episode: 576, duration: 8.708s, episode s

 113389/1000000: episode: 600, duration: 9.277s, episode steps: 186, steps per second: 20, episode reward: 125.900, mean reward: 0.677 [-1.000, 1.000], mean action: 2.839 [0.000, 6.000], mean observation: 171.621 [0.000, 255.000], loss: 0.008366, mean_absolute_error: 2.962303, mean_q: 3.460709, mean_eps: 0.898034
 113553/1000000: episode: 601, duration: 7.862s, episode steps: 164, steps per second: 21, episode reward: 88.700, mean reward: 0.541 [-1.000, 1.000], mean action: 2.896 [0.000, 6.000], mean observation: 171.412 [0.000, 255.000], loss: 0.008113, mean_absolute_error: 2.958673, mean_q: 3.456264, mean_eps: 0.897875
 113749/1000000: episode: 602, duration: 9.849s, episode steps: 196, steps per second: 20, episode reward: 80.300, mean reward: 0.410 [-1.000, 1.000], mean action: 3.082 [0.000, 6.000], mean observation: 172.400 [0.000, 255.000], loss: 0.007852, mean_absolute_error: 2.985331, mean_q: 3.483767, mean_eps: 0.897713
 113912/1000000: episode: 603, duration: 7.809s, episode 

 118221/1000000: episode: 627, duration: 8.509s, episode steps: 158, steps per second: 19, episode reward: 96.500, mean reward: 0.611 [-1.000, 1.000], mean action: 3.285 [0.000, 6.000], mean observation: 172.506 [0.000, 254.000], loss: 0.007376, mean_absolute_error: 3.029819, mean_q: 3.533249, mean_eps: 0.893672
 118459/1000000: episode: 628, duration: 12.711s, episode steps: 238, steps per second: 19, episode reward: 124.000, mean reward: 0.521 [-1.000, 1.000], mean action: 3.210 [0.000, 6.000], mean observation: 172.859 [0.000, 255.000], loss: 0.007945, mean_absolute_error: 3.015914, mean_q: 3.527231, mean_eps: 0.893494
 118610/1000000: episode: 629, duration: 7.206s, episode steps: 151, steps per second: 21, episode reward: 62.400, mean reward: 0.413 [-1.000, 0.500], mean action: 2.980 [0.000, 6.000], mean observation: 171.502 [0.000, 255.000], loss: 0.009007, mean_absolute_error: 2.997728, mean_q: 3.506181, mean_eps: 0.893319
 118749/1000000: episode: 630, duration: 6.429s, episode

 123310/1000000: episode: 654, duration: 9.526s, episode steps: 188, steps per second: 20, episode reward: 110.100, mean reward: 0.586 [-1.000, 1.000], mean action: 2.968 [0.000, 6.000], mean observation: 172.398 [0.000, 255.000], loss: 0.009528, mean_absolute_error: 3.382135, mean_q: 3.948117, mean_eps: 0.889106
 123471/1000000: episode: 655, duration: 7.787s, episode steps: 161, steps per second: 21, episode reward: 71.300, mean reward: 0.443 [-1.000, 1.000], mean action: 2.888 [0.000, 6.000], mean observation: 171.909 [0.000, 255.000], loss: 0.008777, mean_absolute_error: 3.375233, mean_q: 3.950461, mean_eps: 0.888949
 123657/1000000: episode: 656, duration: 9.200s, episode steps: 186, steps per second: 20, episode reward: 122.000, mean reward: 0.656 [-1.000, 1.000], mean action: 2.903 [0.000, 6.000], mean observation: 171.386 [0.000, 255.000], loss: 0.008596, mean_absolute_error: 3.312487, mean_q: 3.868138, mean_eps: 0.888792
 123824/1000000: episode: 657, duration: 7.945s, episode

 128217/1000000: episode: 681, duration: 10.876s, episode steps: 210, steps per second: 19, episode reward: 135.800, mean reward: 0.647 [-1.000, 1.000], mean action: 3.295 [0.000, 6.000], mean observation: 173.329 [0.000, 255.000], loss: 0.007663, mean_absolute_error: 3.379856, mean_q: 3.952049, mean_eps: 0.884699
 128402/1000000: episode: 682, duration: 9.235s, episode steps: 185, steps per second: 20, episode reward: 116.500, mean reward: 0.630 [-1.000, 1.000], mean action: 2.870 [0.000, 6.000], mean observation: 171.832 [0.000, 255.000], loss: 0.010072, mean_absolute_error: 3.413000, mean_q: 3.985066, mean_eps: 0.884521
 128605/1000000: episode: 683, duration: 10.472s, episode steps: 203, steps per second: 19, episode reward: 138.900, mean reward: 0.684 [-1.000, 1.000], mean action: 3.113 [0.000, 6.000], mean observation: 173.044 [0.000, 255.000], loss: 0.007892, mean_absolute_error: 3.426325, mean_q: 3.999415, mean_eps: 0.884346
 128787/1000000: episode: 684, duration: 8.987s, epis

 133073/1000000: episode: 708, duration: 12.114s, episode steps: 232, steps per second: 19, episode reward: 145.300, mean reward: 0.626 [-1.000, 1.000], mean action: 3.216 [0.000, 6.000], mean observation: 172.809 [0.000, 255.000], loss: 0.011563, mean_absolute_error: 3.768274, mean_q: 4.397249, mean_eps: 0.880338
 133303/1000000: episode: 709, duration: 12.077s, episode steps: 230, steps per second: 19, episode reward: 112.400, mean reward: 0.489 [-1.000, 1.000], mean action: 3.343 [0.000, 6.000], mean observation: 173.265 [0.000, 255.000], loss: 0.010437, mean_absolute_error: 3.780898, mean_q: 4.418583, mean_eps: 0.880131
 133486/1000000: episode: 710, duration: 8.720s, episode steps: 183, steps per second: 21, episode reward: 54.400, mean reward: 0.297 [-1.000, 0.500], mean action: 3.142 [0.000, 6.000], mean observation: 173.159 [0.000, 255.000], loss: 0.009800, mean_absolute_error: 3.790777, mean_q: 4.432035, mean_eps: 0.879945
 133711/1000000: episode: 711, duration: 11.774s, epis

 139387/1000000: episode: 735, duration: 9.147s, episode steps: 252, steps per second: 28, episode reward: 117.400, mean reward: 0.466 [-1.000, 1.000], mean action: 2.861 [0.000, 6.000], mean observation: 172.118 [0.000, 255.000], loss: 0.008330, mean_absolute_error: 3.716746, mean_q: 4.329319, mean_eps: 0.874666
 139626/1000000: episode: 736, duration: 8.289s, episode steps: 239, steps per second: 29, episode reward: 103.900, mean reward: 0.435 [-1.000, 1.000], mean action: 2.921 [0.000, 6.000], mean observation: 172.294 [0.000, 255.000], loss: 0.008854, mean_absolute_error: 3.824608, mean_q: 4.481196, mean_eps: 0.874445
 139872/1000000: episode: 737, duration: 8.660s, episode steps: 246, steps per second: 28, episode reward: 98.300, mean reward: 0.400 [-1.000, 0.500], mean action: 2.862 [0.000, 6.000], mean observation: 172.438 [0.000, 255.000], loss: 0.010414, mean_absolute_error: 3.725337, mean_q: 4.348368, mean_eps: 0.874227
 140119/1000000: episode: 738, duration: 8.741s, episode

 145962/1000000: episode: 762, duration: 10.659s, episode steps: 281, steps per second: 26, episode reward: 187.200, mean reward: 0.666 [-1.000, 1.000], mean action: 3.036 [0.000, 6.000], mean observation: 172.984 [0.000, 255.000], loss: 0.013301, mean_absolute_error: 4.261538, mean_q: 4.984646, mean_eps: 0.868760
 146258/1000000: episode: 763, duration: 11.436s, episode steps: 296, steps per second: 26, episode reward: 167.200, mean reward: 0.565 [-1.000, 1.000], mean action: 3.155 [0.000, 6.000], mean observation: 172.684 [0.000, 255.000], loss: 0.012987, mean_absolute_error: 4.331999, mean_q: 5.080120, mean_eps: 0.868501
 146506/1000000: episode: 764, duration: 8.984s, episode steps: 248, steps per second: 28, episode reward: 128.100, mean reward: 0.517 [-1.000, 1.000], mean action: 3.254 [0.000, 6.000], mean observation: 171.415 [0.000, 255.000], loss: 0.013034, mean_absolute_error: 4.278508, mean_q: 5.006201, mean_eps: 0.868256
 146727/1000000: episode: 765, duration: 7.611s, epis

 152606/1000000: episode: 789, duration: 11.193s, episode steps: 294, steps per second: 26, episode reward: 186.900, mean reward: 0.636 [-1.000, 1.000], mean action: 3.160 [0.000, 6.000], mean observation: 172.390 [0.000, 255.000], loss: 0.016376, mean_absolute_error: 4.631080, mean_q: 5.416221, mean_eps: 0.862788
 152853/1000000: episode: 790, duration: 8.759s, episode steps: 247, steps per second: 28, episode reward: 100.400, mean reward: 0.406 [-1.000, 0.500], mean action: 2.781 [0.000, 6.000], mean observation: 171.559 [0.000, 255.000], loss: 0.016230, mean_absolute_error: 4.548873, mean_q: 5.321043, mean_eps: 0.862543
 153149/1000000: episode: 791, duration: 11.408s, episode steps: 296, steps per second: 26, episode reward: 189.900, mean reward: 0.642 [-1.000, 1.000], mean action: 3.355 [0.000, 6.000], mean observation: 172.178 [0.000, 255.000], loss: 0.014049, mean_absolute_error: 4.585315, mean_q: 5.348862, mean_eps: 0.862298
 153460/1000000: episode: 792, duration: 12.011s, epi

 158608/1000000: episode: 816, duration: 9.382s, episode steps: 189, steps per second: 20, episode reward: 89.200, mean reward: 0.472 [-1.000, 1.000], mean action: 3.122 [0.000, 6.000], mean observation: 172.380 [0.000, 255.000], loss: 0.013602, mean_absolute_error: 4.593372, mean_q: 5.360634, mean_eps: 0.857339
 158746/1000000: episode: 817, duration: 6.297s, episode steps: 138, steps per second: 22, episode reward: 49.900, mean reward: 0.362 [-1.000, 0.500], mean action: 2.703 [0.000, 6.000], mean observation: 173.263 [0.000, 255.000], loss: 0.015621, mean_absolute_error: 4.548647, mean_q: 5.316423, mean_eps: 0.857192
 158961/1000000: episode: 818, duration: 10.955s, episode steps: 215, steps per second: 20, episode reward: 149.300, mean reward: 0.694 [-1.000, 1.000], mean action: 3.307 [0.000, 6.000], mean observation: 172.727 [0.000, 255.000], loss: 0.012986, mean_absolute_error: 4.623700, mean_q: 5.397226, mean_eps: 0.857031
 159159/1000000: episode: 819, duration: 9.798s, episode

 163652/1000000: episode: 842, duration: 9.007s, episode steps: 186, steps per second: 21, episode reward: 88.100, mean reward: 0.474 [-1.000, 1.000], mean action: 3.022 [0.000, 6.000], mean observation: 171.526 [0.000, 255.000], loss: 0.017190, mean_absolute_error: 4.883891, mean_q: 5.702420, mean_eps: 0.852798
 163846/1000000: episode: 843, duration: 9.623s, episode steps: 194, steps per second: 20, episode reward: 110.800, mean reward: 0.571 [-1.000, 1.000], mean action: 3.082 [0.000, 6.000], mean observation: 171.958 [0.000, 255.000], loss: 0.016281, mean_absolute_error: 4.976023, mean_q: 5.801059, mean_eps: 0.852627
 164033/1000000: episode: 844, duration: 9.163s, episode steps: 187, steps per second: 20, episode reward: 127.900, mean reward: 0.684 [-1.000, 1.000], mean action: 2.995 [0.000, 6.000], mean observation: 171.319 [0.000, 255.000], loss: 0.018544, mean_absolute_error: 5.039393, mean_q: 5.883303, mean_eps: 0.852454
 164223/1000000: episode: 845, duration: 9.305s, episode

 168557/1000000: episode: 869, duration: 9.873s, episode steps: 199, steps per second: 20, episode reward: 124.800, mean reward: 0.627 [-1.000, 1.000], mean action: 2.834 [0.000, 6.000], mean observation: 172.095 [0.000, 255.000], loss: 0.016573, mean_absolute_error: 5.043135, mean_q: 5.900397, mean_eps: 0.848388
 168744/1000000: episode: 870, duration: 8.980s, episode steps: 187, steps per second: 21, episode reward: 81.800, mean reward: 0.437 [-1.000, 1.000], mean action: 2.866 [0.000, 6.000], mean observation: 172.265 [0.000, 255.000], loss: 0.017342, mean_absolute_error: 4.991583, mean_q: 5.828718, mean_eps: 0.848215
 168947/1000000: episode: 871, duration: 10.151s, episode steps: 203, steps per second: 20, episode reward: 124.200, mean reward: 0.612 [-1.000, 1.000], mean action: 3.059 [0.000, 6.000], mean observation: 172.592 [0.000, 255.000], loss: 0.015832, mean_absolute_error: 4.884495, mean_q: 5.695843, mean_eps: 0.848040
 169158/1000000: episode: 872, duration: 10.635s, episo

 173576/1000000: episode: 895, duration: 9.150s, episode steps: 188, steps per second: 21, episode reward: 106.000, mean reward: 0.564 [-1.000, 1.000], mean action: 2.782 [0.000, 6.000], mean observation: 171.850 [0.000, 255.000], loss: 0.020442, mean_absolute_error: 5.362123, mean_q: 6.264270, mean_eps: 0.843868
 173788/1000000: episode: 896, duration: 10.777s, episode steps: 212, steps per second: 20, episode reward: 155.300, mean reward: 0.733 [-1.000, 1.000], mean action: 3.241 [0.000, 6.000], mean observation: 172.618 [0.000, 255.000], loss: 0.018896, mean_absolute_error: 5.464889, mean_q: 6.374068, mean_eps: 0.843688
 173943/1000000: episode: 897, duration: 7.140s, episode steps: 155, steps per second: 22, episode reward: 51.200, mean reward: 0.330 [-1.000, 0.500], mean action: 2.942 [0.000, 6.000], mean observation: 172.580 [0.000, 255.000], loss: 0.021511, mean_absolute_error: 5.404087, mean_q: 6.302993, mean_eps: 0.843522
 174059/1000000: episode: 898, duration: 5.334s, episod

 178603/1000000: episode: 922, duration: 11.995s, episode steps: 236, steps per second: 20, episode reward: 154.200, mean reward: 0.653 [-1.000, 1.000], mean action: 3.195 [0.000, 6.000], mean observation: 172.864 [0.000, 255.000], loss: 0.021372, mean_absolute_error: 5.484055, mean_q: 6.401137, mean_eps: 0.839364
 178773/1000000: episode: 923, duration: 7.859s, episode steps: 170, steps per second: 22, episode reward: 59.900, mean reward: 0.352 [-1.000, 0.500], mean action: 2.912 [0.000, 6.000], mean observation: 171.783 [0.000, 255.000], loss: 0.018255, mean_absolute_error: 5.420255, mean_q: 6.332917, mean_eps: 0.839181
 178950/1000000: episode: 924, duration: 8.177s, episode steps: 177, steps per second: 22, episode reward: 57.000, mean reward: 0.322 [-1.000, 0.500], mean action: 3.040 [0.000, 6.000], mean observation: 172.120 [0.000, 255.000], loss: 0.021292, mean_absolute_error: 5.416970, mean_q: 6.327953, mean_eps: 0.839024
 179138/1000000: episode: 925, duration: 9.263s, episode

 183746/1000000: episode: 949, duration: 10.059s, episode steps: 211, steps per second: 21, episode reward: 58.800, mean reward: 0.279 [-1.000, 0.500], mean action: 3.100 [0.000, 6.000], mean observation: 173.317 [0.000, 255.000], loss: 0.023678, mean_absolute_error: 5.810646, mean_q: 6.776971, mean_eps: 0.834724
 183906/1000000: episode: 950, duration: 7.326s, episode steps: 160, steps per second: 22, episode reward: 50.500, mean reward: 0.316 [-1.000, 0.500], mean action: 2.950 [0.000, 6.000], mean observation: 172.816 [0.000, 255.000], loss: 0.024918, mean_absolute_error: 5.889768, mean_q: 6.879252, mean_eps: 0.834557
 184113/1000000: episode: 951, duration: 10.427s, episode steps: 207, steps per second: 20, episode reward: 96.500, mean reward: 0.466 [-1.000, 1.000], mean action: 3.135 [0.000, 6.000], mean observation: 173.394 [0.000, 255.000], loss: 0.024778, mean_absolute_error: 5.792099, mean_q: 6.755435, mean_eps: 0.834391
 184313/1000000: episode: 952, duration: 10.212s, episod

 188794/1000000: episode: 976, duration: 9.973s, episode steps: 200, steps per second: 20, episode reward: 128.300, mean reward: 0.642 [-1.000, 1.000], mean action: 3.040 [0.000, 6.000], mean observation: 172.118 [0.000, 255.000], loss: 0.024468, mean_absolute_error: 5.922635, mean_q: 6.920541, mean_eps: 0.830175
 188955/1000000: episode: 977, duration: 7.503s, episode steps: 161, steps per second: 21, episode reward: 62.200, mean reward: 0.386 [-1.000, 0.500], mean action: 2.553 [0.000, 6.000], mean observation: 171.984 [0.000, 255.000], loss: 0.020629, mean_absolute_error: 5.693055, mean_q: 6.639927, mean_eps: 0.830013
 189082/1000000: episode: 978, duration: 5.817s, episode steps: 127, steps per second: 22, episode reward: 46.000, mean reward: 0.362 [-1.000, 0.500], mean action: 2.654 [0.000, 6.000], mean observation: 172.438 [0.000, 255.000], loss: 0.022638, mean_absolute_error: 5.954780, mean_q: 6.952013, mean_eps: 0.829884
 189254/1000000: episode: 979, duration: 7.900s, episode 

 194585/1000000: episode: 1003, duration: 126.684s, episode steps: 379, steps per second: 3, episode reward: 229.200, mean reward: 0.605 [-1.000, 1.000], mean action: 3.161 [0.000, 6.000], mean observation: 177.800 [0.000, 255.000], loss: 0.028208, mean_absolute_error: 6.250282, mean_q: 7.296674, mean_eps: 0.825044
 194925/1000000: episode: 1004, duration: 113.657s, episode steps: 340, steps per second: 3, episode reward: 208.600, mean reward: 0.614 [-1.000, 1.000], mean action: 2.979 [0.000, 6.000], mean observation: 176.732 [0.000, 255.000], loss: 0.027826, mean_absolute_error: 6.062461, mean_q: 7.077433, mean_eps: 0.824720
 195225/1000000: episode: 1005, duration: 100.324s, episode steps: 300, steps per second: 3, episode reward: 117.700, mean reward: 0.392 [-1.000, 0.500], mean action: 3.013 [0.000, 6.000], mean observation: 176.398 [0.000, 255.000], loss: 0.028236, mean_absolute_error: 6.132969, mean_q: 7.159198, mean_eps: 0.824432
 195482/1000000: episode: 1006, duration: 85.988s

 203134/1000000: episode: 1029, duration: 109.971s, episode steps: 329, steps per second: 3, episode reward: 224.100, mean reward: 0.681 [-1.000, 1.000], mean action: 3.109 [0.000, 6.000], mean observation: 175.420 [0.000, 255.000], loss: 0.032658, mean_absolute_error: 6.365159, mean_q: 7.426039, mean_eps: 0.817327
 203535/1000000: episode: 1030, duration: 133.993s, episode steps: 401, steps per second: 3, episode reward: 249.400, mean reward: 0.622 [-1.000, 1.000], mean action: 3.110 [0.000, 6.000], mean observation: 176.798 [0.000, 255.000], loss: 0.029650, mean_absolute_error: 6.563951, mean_q: 7.673005, mean_eps: 0.816999
 203857/1000000: episode: 1031, duration: 85.540s, episode steps: 322, steps per second: 4, episode reward: 243.600, mean reward: 0.757 [-1.000, 1.000], mean action: 3.183 [0.000, 6.000], mean observation: 175.233 [0.000, 255.000], loss: 0.031387, mean_absolute_error: 6.542468, mean_q: 7.646063, mean_eps: 0.816674
 204074/1000000: episode: 1032, duration: 36.943s,

 211823/1000000: episode: 1055, duration: 137.303s, episode steps: 411, steps per second: 3, episode reward: 295.300, mean reward: 0.718 [-1.000, 1.000], mean action: 3.316 [0.000, 6.000], mean observation: 176.462 [0.000, 255.000], loss: 0.039320, mean_absolute_error: 6.914956, mean_q: 8.082225, mean_eps: 0.809546
 212137/1000000: episode: 1056, duration: 105.009s, episode steps: 314, steps per second: 3, episode reward: 124.300, mean reward: 0.396 [-1.000, 0.500], mean action: 3.064 [0.000, 6.000], mean observation: 176.699 [0.000, 255.000], loss: 0.038543, mean_absolute_error: 7.028896, mean_q: 8.211713, mean_eps: 0.809218
 212480/1000000: episode: 1057, duration: 114.662s, episode steps: 343, steps per second: 3, episode reward: 225.400, mean reward: 0.657 [-1.000, 1.000], mean action: 3.192 [0.000, 6.000], mean observation: 176.776 [0.000, 255.000], loss: 0.037722, mean_absolute_error: 6.924859, mean_q: 8.092810, mean_eps: 0.808923
 212816/1000000: episode: 1058, duration: 112.318

 220968/1000000: episode: 1081, duration: 153.013s, episode steps: 458, steps per second: 3, episode reward: 300.700, mean reward: 0.657 [-1.000, 1.000], mean action: 3.362 [0.000, 6.000], mean observation: 177.728 [0.000, 255.000], loss: 0.038669, mean_absolute_error: 7.323812, mean_q: 8.563115, mean_eps: 0.801336
 221298/1000000: episode: 1082, duration: 110.297s, episode steps: 330, steps per second: 3, episode reward: 246.100, mean reward: 0.746 [-1.000, 1.000], mean action: 2.948 [0.000, 6.000], mean observation: 176.477 [0.000, 255.000], loss: 0.036355, mean_absolute_error: 7.240591, mean_q: 8.447516, mean_eps: 0.800981
 221678/1000000: episode: 1083, duration: 127.012s, episode steps: 380, steps per second: 3, episode reward: 271.900, mean reward: 0.716 [-1.000, 1.000], mean action: 3.029 [0.000, 6.000], mean observation: 177.064 [0.000, 255.000], loss: 0.036264, mean_absolute_error: 7.226420, mean_q: 8.445686, mean_eps: 0.800661
 222080/1000000: episode: 1084, duration: 134.322

 228245/1000000: episode: 1107, duration: 10.501s, episode steps: 206, steps per second: 20, episode reward: 145.100, mean reward: 0.704 [-1.000, 1.000], mean action: 3.131 [0.000, 6.000], mean observation: 173.174 [0.000, 255.000], loss: 0.034908, mean_absolute_error: 7.287315, mean_q: 8.512066, mean_eps: 0.794672
 228386/1000000: episode: 1108, duration: 6.485s, episode steps: 141, steps per second: 22, episode reward: 52.200, mean reward: 0.370 [-1.000, 0.500], mean action: 2.766 [0.000, 6.000], mean observation: 173.242 [0.000, 255.000], loss: 0.039046, mean_absolute_error: 7.400787, mean_q: 8.654543, mean_eps: 0.794516
 228603/1000000: episode: 1109, duration: 11.040s, episode steps: 217, steps per second: 20, episode reward: 134.700, mean reward: 0.621 [-1.000, 1.000], mean action: 3.258 [0.000, 6.000], mean observation: 172.801 [0.000, 255.000], loss: 0.036026, mean_absolute_error: 7.342002, mean_q: 8.595475, mean_eps: 0.794355
 228786/1000000: episode: 1110, duration: 8.867s, e

 233191/1000000: episode: 1133, duration: 10.498s, episode steps: 208, steps per second: 20, episode reward: 128.200, mean reward: 0.616 [-1.000, 1.000], mean action: 3.019 [0.000, 6.000], mean observation: 172.722 [0.000, 255.000], loss: 0.037813, mean_absolute_error: 7.678301, mean_q: 8.977395, mean_eps: 0.790223
 233439/1000000: episode: 1134, duration: 13.016s, episode steps: 248, steps per second: 19, episode reward: 115.200, mean reward: 0.465 [-1.000, 1.000], mean action: 3.190 [0.000, 6.000], mean observation: 173.967 [0.000, 255.000], loss: 0.042242, mean_absolute_error: 7.564549, mean_q: 8.837642, mean_eps: 0.790017
 233624/1000000: episode: 1135, duration: 8.944s, episode steps: 185, steps per second: 21, episode reward: 79.800, mean reward: 0.431 [-1.000, 0.500], mean action: 3.092 [0.000, 6.000], mean observation: 172.524 [0.000, 255.000], loss: 0.047529, mean_absolute_error: 7.757212, mean_q: 9.074282, mean_eps: 0.789823
 233822/1000000: episode: 1136, duration: 9.799s, e

 238308/1000000: episode: 1159, duration: 6.753s, episode steps: 146, steps per second: 22, episode reward: 56.300, mean reward: 0.386 [-1.000, 0.500], mean action: 2.678 [0.000, 6.000], mean observation: 171.625 [0.000, 255.000], loss: 0.037847, mean_absolute_error: 7.585787, mean_q: 8.874404, mean_eps: 0.785589
 238528/1000000: episode: 1160, duration: 11.363s, episode steps: 220, steps per second: 19, episode reward: 108.800, mean reward: 0.495 [-1.000, 1.000], mean action: 3.350 [0.000, 6.000], mean observation: 172.387 [0.000, 255.000], loss: 0.041786, mean_absolute_error: 7.878334, mean_q: 9.206760, mean_eps: 0.785426
 238712/1000000: episode: 1161, duration: 8.898s, episode steps: 184, steps per second: 21, episode reward: 126.700, mean reward: 0.689 [-1.000, 1.000], mean action: 3.016 [0.000, 6.000], mean observation: 170.307 [0.000, 255.000], loss: 0.040341, mean_absolute_error: 7.534919, mean_q: 8.820659, mean_eps: 0.785244
 238918/1000000: episode: 1162, duration: 10.412s, e

 243452/1000000: episode: 1185, duration: 9.685s, episode steps: 194, steps per second: 20, episode reward: 88.300, mean reward: 0.455 [-1.000, 0.500], mean action: 3.108 [0.000, 6.000], mean observation: 172.396 [0.000, 255.000], loss: 0.045625, mean_absolute_error: 8.098928, mean_q: 9.475380, mean_eps: 0.780981
 243667/1000000: episode: 1186, duration: 10.992s, episode steps: 215, steps per second: 20, episode reward: 154.900, mean reward: 0.720 [-1.000, 1.000], mean action: 3.372 [0.000, 6.000], mean observation: 172.138 [0.000, 255.000], loss: 0.046038, mean_absolute_error: 8.202675, mean_q: 9.585950, mean_eps: 0.780798
 243888/1000000: episode: 1187, duration: 11.370s, episode steps: 221, steps per second: 19, episode reward: 151.800, mean reward: 0.687 [-1.000, 1.000], mean action: 3.439 [0.000, 6.000], mean observation: 172.325 [0.000, 255.000], loss: 0.047840, mean_absolute_error: 8.075981, mean_q: 9.453526, mean_eps: 0.780602
 244059/1000000: episode: 1188, duration: 7.988s, e

 248574/1000000: episode: 1211, duration: 11.147s, episode steps: 216, steps per second: 19, episode reward: 104.500, mean reward: 0.484 [-1.000, 1.000], mean action: 3.125 [0.000, 6.000], mean observation: 173.496 [0.000, 255.000], loss: 0.046931, mean_absolute_error: 8.287865, mean_q: 9.704166, mean_eps: 0.776381
 248740/1000000: episode: 1212, duration: 7.996s, episode steps: 166, steps per second: 21, episode reward: 108.500, mean reward: 0.654 [-1.000, 1.000], mean action: 2.687 [0.000, 6.000], mean observation: 171.354 [0.000, 255.000], loss: 0.054548, mean_absolute_error: 8.255920, mean_q: 9.668928, mean_eps: 0.776210
 248884/1000000: episode: 1213, duration: 7.663s, episode steps: 144, steps per second: 19, episode reward: 76.900, mean reward: 0.534 [-1.000, 1.000], mean action: 3.576 [0.000, 6.000], mean observation: 174.069 [0.000, 255.000], loss: 0.047638, mean_absolute_error: 8.053735, mean_q: 9.438268, mean_eps: 0.776071
 249050/1000000: episode: 1214, duration: 7.841s, ep

 253327/1000000: episode: 1237, duration: 9.001s, episode steps: 187, steps per second: 21, episode reward: 71.200, mean reward: 0.381 [-1.000, 0.500], mean action: 3.102 [0.000, 6.000], mean observation: 172.491 [0.000, 255.000], loss: 0.058893, mean_absolute_error: 8.456083, mean_q: 9.897354, mean_eps: 0.772091
 253540/1000000: episode: 1238, duration: 10.637s, episode steps: 213, steps per second: 20, episode reward: 121.300, mean reward: 0.569 [-1.000, 1.000], mean action: 3.061 [0.000, 6.000], mean observation: 173.327 [0.000, 255.000], loss: 0.060002, mean_absolute_error: 8.587063, mean_q: 10.033068, mean_eps: 0.771911
 253744/1000000: episode: 1239, duration: 10.434s, episode steps: 204, steps per second: 20, episode reward: 140.600, mean reward: 0.689 [-1.000, 1.000], mean action: 2.975 [0.000, 6.000], mean observation: 172.584 [0.000, 255.000], loss: 0.053826, mean_absolute_error: 8.353848, mean_q: 9.759170, mean_eps: 0.771724
 253952/1000000: episode: 1240, duration: 10.587s,

 258242/1000000: episode: 1263, duration: 8.776s, episode steps: 182, steps per second: 21, episode reward: 77.100, mean reward: 0.424 [-1.000, 1.000], mean action: 3.027 [0.000, 6.000], mean observation: 172.246 [0.000, 255.000], loss: 0.049060, mean_absolute_error: 8.496892, mean_q: 9.930955, mean_eps: 0.767665
 258490/1000000: episode: 1264, duration: 12.811s, episode steps: 248, steps per second: 19, episode reward: 159.800, mean reward: 0.644 [-1.000, 1.000], mean action: 3.383 [0.000, 6.000], mean observation: 173.897 [0.000, 255.000], loss: 0.051111, mean_absolute_error: 8.506774, mean_q: 9.960020, mean_eps: 0.767471
 258710/1000000: episode: 1265, duration: 11.242s, episode steps: 220, steps per second: 20, episode reward: 169.800, mean reward: 0.772 [-1.000, 1.000], mean action: 3.259 [0.000, 6.000], mean observation: 172.965 [0.000, 255.000], loss: 0.048719, mean_absolute_error: 8.337488, mean_q: 9.758266, mean_eps: 0.767260
 258929/1000000: episode: 1266, duration: 11.257s, 

 263610/1000000: episode: 1289, duration: 12.103s, episode steps: 237, steps per second: 20, episode reward: 124.500, mean reward: 0.525 [-1.000, 1.000], mean action: 3.342 [0.000, 6.000], mean observation: 172.819 [0.000, 255.000], loss: 0.052705, mean_absolute_error: 9.094199, mean_q: 10.646226, mean_eps: 0.762857
 263818/1000000: episode: 1290, duration: 10.352s, episode steps: 208, steps per second: 20, episode reward: 123.900, mean reward: 0.596 [-1.000, 1.000], mean action: 3.101 [0.000, 6.000], mean observation: 172.963 [0.000, 255.000], loss: 0.065782, mean_absolute_error: 9.204482, mean_q: 10.759085, mean_eps: 0.762657
 264039/1000000: episode: 1291, duration: 10.780s, episode steps: 221, steps per second: 21, episode reward: 94.100, mean reward: 0.426 [-1.000, 1.000], mean action: 3.163 [0.000, 6.000], mean observation: 173.138 [0.000, 255.000], loss: 0.058926, mean_absolute_error: 9.231323, mean_q: 10.791308, mean_eps: 0.762465
 264218/1000000: episode: 1292, duration: 8.569

 268762/1000000: episode: 1315, duration: 11.139s, episode steps: 220, steps per second: 20, episode reward: 135.500, mean reward: 0.616 [-1.000, 1.000], mean action: 3.245 [0.000, 6.000], mean observation: 172.395 [0.000, 255.000], loss: 0.053725, mean_absolute_error: 9.052836, mean_q: 10.603282, mean_eps: 0.758213
 268958/1000000: episode: 1316, duration: 9.675s, episode steps: 196, steps per second: 20, episode reward: 120.300, mean reward: 0.614 [-1.000, 1.000], mean action: 2.903 [0.000, 6.000], mean observation: 172.098 [0.000, 255.000], loss: 0.054051, mean_absolute_error: 9.096954, mean_q: 10.638370, mean_eps: 0.758026
 269196/1000000: episode: 1317, duration: 12.365s, episode steps: 238, steps per second: 19, episode reward: 174.800, mean reward: 0.734 [-1.000, 1.000], mean action: 3.269 [0.000, 6.000], mean observation: 172.447 [0.000, 255.000], loss: 0.056754, mean_absolute_error: 9.094985, mean_q: 10.635015, mean_eps: 0.757832
 269418/1000000: episode: 1318, duration: 11.16

 273892/1000000: episode: 1341, duration: 10.624s, episode steps: 210, steps per second: 20, episode reward: 128.600, mean reward: 0.612 [-1.000, 1.000], mean action: 2.933 [0.000, 6.000], mean observation: 173.350 [0.000, 255.000], loss: 0.067664, mean_absolute_error: 9.457007, mean_q: 11.083093, mean_eps: 0.753593
 274076/1000000: episode: 1342, duration: 8.688s, episode steps: 184, steps per second: 21, episode reward: 73.300, mean reward: 0.398 [-1.000, 0.500], mean action: 2.848 [0.000, 6.000], mean observation: 172.996 [0.000, 255.000], loss: 0.063772, mean_absolute_error: 9.529010, mean_q: 11.160856, mean_eps: 0.753416
 274293/1000000: episode: 1343, duration: 10.680s, episode steps: 217, steps per second: 20, episode reward: 86.200, mean reward: 0.397 [-1.000, 0.500], mean action: 3.300 [0.000, 6.000], mean observation: 173.457 [0.000, 255.000], loss: 0.088065, mean_absolute_error: 9.489939, mean_q: 11.115613, mean_eps: 0.753234
 274498/1000000: episode: 1344, duration: 10.392s

 279000/1000000: episode: 1367, duration: 8.690s, episode steps: 180, steps per second: 21, episode reward: 114.300, mean reward: 0.635 [-1.000, 1.000], mean action: 2.744 [0.000, 6.000], mean observation: 170.969 [0.000, 255.000], loss: 0.061558, mean_absolute_error: 9.538702, mean_q: 11.175678, mean_eps: 0.748983
 279238/1000000: episode: 1368, duration: 12.062s, episode steps: 238, steps per second: 20, episode reward: 141.300, mean reward: 0.594 [-1.000, 1.000], mean action: 3.244 [0.000, 6.000], mean observation: 173.319 [0.000, 255.000], loss: 0.063547, mean_absolute_error: 9.531640, mean_q: 11.158512, mean_eps: 0.748794
 279365/1000000: episode: 1369, duration: 6.782s, episode steps: 127, steps per second: 19, episode reward: 71.800, mean reward: 0.565 [-1.000, 1.000], mean action: 3.346 [0.000, 6.000], mean observation: 172.786 [0.000, 255.000], loss: 0.067298, mean_absolute_error: 9.318385, mean_q: 10.897055, mean_eps: 0.748628
 279605/1000000: episode: 1370, duration: 12.133s

 284204/1000000: episode: 1393, duration: 9.255s, episode steps: 188, steps per second: 20, episode reward: 121.400, mean reward: 0.646 [-1.000, 1.000], mean action: 3.011 [0.000, 6.000], mean observation: 172.038 [0.000, 255.000], loss: 0.064417, mean_absolute_error: 9.813090, mean_q: 11.478049, mean_eps: 0.744303
 284402/1000000: episode: 1394, duration: 9.915s, episode steps: 198, steps per second: 20, episode reward: 140.600, mean reward: 0.710 [-1.000, 1.000], mean action: 2.975 [0.000, 6.000], mean observation: 172.261 [0.000, 255.000], loss: 0.071476, mean_absolute_error: 9.996743, mean_q: 11.706665, mean_eps: 0.744128
 284601/1000000: episode: 1395, duration: 9.948s, episode steps: 199, steps per second: 20, episode reward: 149.800, mean reward: 0.753 [-1.000, 1.000], mean action: 3.181 [0.000, 6.000], mean observation: 172.613 [0.000, 255.000], loss: 0.073655, mean_absolute_error: 9.779546, mean_q: 11.445553, mean_eps: 0.743948
 284815/1000000: episode: 1396, duration: 10.768s

 289300/1000000: episode: 1419, duration: 6.916s, episode steps: 149, steps per second: 22, episode reward: 54.200, mean reward: 0.364 [-1.000, 0.500], mean action: 2.812 [0.000, 6.000], mean observation: 172.404 [0.000, 255.000], loss: 0.071804, mean_absolute_error: 10.011015, mean_q: 11.714461, mean_eps: 0.739698
 289468/1000000: episode: 1420, duration: 7.869s, episode steps: 168, steps per second: 21, episode reward: 56.900, mean reward: 0.339 [-1.000, 0.500], mean action: 2.917 [0.000, 6.000], mean observation: 172.182 [0.000, 255.000], loss: 0.065356, mean_absolute_error: 9.879684, mean_q: 11.547935, mean_eps: 0.739556
 289681/1000000: episode: 1421, duration: 10.808s, episode steps: 213, steps per second: 20, episode reward: 149.000, mean reward: 0.700 [-1.000, 1.000], mean action: 3.371 [0.000, 6.000], mean observation: 172.220 [0.000, 255.000], loss: 0.066501, mean_absolute_error: 9.823545, mean_q: 11.487565, mean_eps: 0.739383
 289865/1000000: episode: 1422, duration: 8.942s,

 294364/1000000: episode: 1445, duration: 9.156s, episode steps: 188, steps per second: 21, episode reward: 129.800, mean reward: 0.690 [-1.000, 1.000], mean action: 2.984 [0.000, 6.000], mean observation: 170.506 [0.000, 255.000], loss: 0.080080, mean_absolute_error: 10.308260, mean_q: 12.064312, mean_eps: 0.735159
 294550/1000000: episode: 1446, duration: 9.000s, episode steps: 186, steps per second: 21, episode reward: 129.600, mean reward: 0.697 [-1.000, 1.000], mean action: 2.914 [0.000, 6.000], mean observation: 170.655 [0.000, 255.000], loss: 0.076492, mean_absolute_error: 9.930111, mean_q: 11.618166, mean_eps: 0.734990
 294706/1000000: episode: 1447, duration: 7.316s, episode steps: 156, steps per second: 21, episode reward: 62.500, mean reward: 0.401 [-1.000, 0.500], mean action: 2.833 [0.000, 6.000], mean observation: 171.246 [0.000, 255.000], loss: 0.075581, mean_absolute_error: 10.230501, mean_q: 11.981598, mean_eps: 0.734835
 294903/1000000: episode: 1448, duration: 9.724s

 299178/1000000: episode: 1471, duration: 10.683s, episode steps: 212, steps per second: 20, episode reward: 137.600, mean reward: 0.649 [-1.000, 1.000], mean action: 3.184 [0.000, 6.000], mean observation: 172.631 [0.000, 255.000], loss: 0.083123, mean_absolute_error: 10.214826, mean_q: 11.958391, mean_eps: 0.730835
 299306/1000000: episode: 1472, duration: 6.789s, episode steps: 128, steps per second: 19, episode reward: 67.100, mean reward: 0.524 [-1.000, 1.000], mean action: 3.656 [0.000, 6.000], mean observation: 173.461 [0.000, 255.000], loss: 0.065471, mean_absolute_error: 10.430578, mean_q: 12.231787, mean_eps: 0.730682
 299495/1000000: episode: 1473, duration: 9.205s, episode steps: 189, steps per second: 21, episode reward: 79.800, mean reward: 0.422 [-1.000, 0.500], mean action: 3.042 [0.000, 6.000], mean observation: 172.761 [0.000, 255.000], loss: 0.074784, mean_absolute_error: 10.209995, mean_q: 11.937049, mean_eps: 0.730540
 299712/1000000: episode: 1474, duration: 11.06

 303959/1000000: episode: 1497, duration: 7.179s, episode steps: 153, steps per second: 21, episode reward: 55.400, mean reward: 0.362 [-1.000, 0.500], mean action: 2.843 [0.000, 6.000], mean observation: 172.331 [0.000, 255.000], loss: 0.077798, mean_absolute_error: 10.553634, mean_q: 12.335579, mean_eps: 0.726506
 304235/1000000: episode: 1498, duration: 14.792s, episode steps: 276, steps per second: 19, episode reward: 206.700, mean reward: 0.749 [-1.000, 1.000], mean action: 3.446 [0.000, 6.000], mean observation: 173.393 [0.000, 255.000], loss: 0.077145, mean_absolute_error: 10.587212, mean_q: 12.396337, mean_eps: 0.726314
 304428/1000000: episode: 1499, duration: 9.603s, episode steps: 193, steps per second: 20, episode reward: 137.700, mean reward: 0.713 [-1.000, 1.000], mean action: 3.021 [0.000, 6.000], mean observation: 171.492 [0.000, 255.000], loss: 0.082277, mean_absolute_error: 10.588195, mean_q: 12.386360, mean_eps: 0.726103
 304572/1000000: episode: 1500, duration: 6.58

 309166/1000000: episode: 1523, duration: 10.503s, episode steps: 209, steps per second: 20, episode reward: 138.500, mean reward: 0.663 [-1.000, 1.000], mean action: 3.249 [0.000, 6.000], mean observation: 172.809 [0.000, 255.000], loss: 0.089015, mean_absolute_error: 10.926779, mean_q: 12.795064, mean_eps: 0.721844
 309399/1000000: episode: 1524, duration: 11.966s, episode steps: 233, steps per second: 19, episode reward: 154.600, mean reward: 0.664 [-1.000, 1.000], mean action: 3.464 [0.000, 6.000], mean observation: 173.395 [0.000, 255.000], loss: 0.077745, mean_absolute_error: 10.702351, mean_q: 12.531638, mean_eps: 0.721646
 309525/1000000: episode: 1525, duration: 6.794s, episode steps: 126, steps per second: 19, episode reward: 71.900, mean reward: 0.571 [-1.000, 1.000], mean action: 3.659 [0.000, 6.000], mean observation: 173.526 [0.000, 255.000], loss: 0.089487, mean_absolute_error: 10.795689, mean_q: 12.648926, mean_eps: 0.721484
 309734/1000000: episode: 1526, duration: 10.

 314330/1000000: episode: 1549, duration: 10.014s, episode steps: 200, steps per second: 20, episode reward: 139.000, mean reward: 0.695 [-1.000, 1.000], mean action: 3.215 [0.000, 6.000], mean observation: 171.854 [0.000, 255.000], loss: 0.075913, mean_absolute_error: 10.749482, mean_q: 12.587936, mean_eps: 0.717193
 314516/1000000: episode: 1550, duration: 9.000s, episode steps: 186, steps per second: 21, episode reward: 126.200, mean reward: 0.678 [-1.000, 1.000], mean action: 3.043 [0.000, 6.000], mean observation: 171.202 [0.000, 255.000], loss: 0.087255, mean_absolute_error: 11.036563, mean_q: 12.947387, mean_eps: 0.717020
 314723/1000000: episode: 1551, duration: 10.539s, episode steps: 207, steps per second: 20, episode reward: 140.700, mean reward: 0.680 [-1.000, 1.000], mean action: 3.242 [0.000, 6.000], mean observation: 172.743 [0.000, 255.000], loss: 0.093842, mean_absolute_error: 10.787832, mean_q: 12.626924, mean_eps: 0.716844
 314937/1000000: episode: 1552, duration: 10

 319550/1000000: episode: 1575, duration: 9.065s, episode steps: 188, steps per second: 21, episode reward: 129.100, mean reward: 0.687 [-1.000, 1.000], mean action: 2.920 [0.000, 6.000], mean observation: 170.390 [0.000, 255.000], loss: 0.081876, mean_absolute_error: 10.914917, mean_q: 12.766411, mean_eps: 0.712490
 319714/1000000: episode: 1576, duration: 7.836s, episode steps: 164, steps per second: 21, episode reward: 96.500, mean reward: 0.588 [-1.000, 1.000], mean action: 2.835 [0.000, 6.000], mean observation: 170.473 [0.000, 255.000], loss: 0.088669, mean_absolute_error: 11.149327, mean_q: 13.040489, mean_eps: 0.712331
 319967/1000000: episode: 1577, duration: 13.204s, episode steps: 253, steps per second: 19, episode reward: 164.500, mean reward: 0.650 [-1.000, 1.000], mean action: 3.510 [0.000, 6.000], mean observation: 172.705 [0.000, 255.000], loss: 0.083632, mean_absolute_error: 10.797744, mean_q: 12.631850, mean_eps: 0.712144
 320158/1000000: episode: 1578, duration: 9.25

 324940/1000000: episode: 1601, duration: 10.856s, episode steps: 213, steps per second: 20, episode reward: 150.100, mean reward: 0.705 [-1.000, 1.000], mean action: 3.141 [0.000, 6.000], mean observation: 172.284 [0.000, 255.000], loss: 0.085586, mean_absolute_error: 11.313297, mean_q: 13.234368, mean_eps: 0.707651
 325150/1000000: episode: 1602, duration: 10.467s, episode steps: 210, steps per second: 20, episode reward: 91.500, mean reward: 0.436 [-1.000, 0.500], mean action: 3.024 [0.000, 6.000], mean observation: 173.649 [0.000, 255.000], loss: 0.096436, mean_absolute_error: 11.037053, mean_q: 12.900695, mean_eps: 0.707460
 325367/1000000: episode: 1603, duration: 11.130s, episode steps: 217, steps per second: 19, episode reward: 135.800, mean reward: 0.626 [-1.000, 1.000], mean action: 3.083 [0.000, 6.000], mean observation: 172.660 [0.000, 255.000], loss: 0.089935, mean_absolute_error: 11.392438, mean_q: 13.325254, mean_eps: 0.707268
 325576/1000000: episode: 1604, duration: 10

 330797/1000000: episode: 1627, duration: 9.521s, episode steps: 259, steps per second: 27, episode reward: 178.400, mean reward: 0.689 [-1.000, 1.000], mean action: 2.965 [0.000, 6.000], mean observation: 171.635 [0.000, 255.000], loss: 0.097856, mean_absolute_error: 11.770517, mean_q: 13.787640, mean_eps: 0.702399
 331048/1000000: episode: 1628, duration: 8.997s, episode steps: 251, steps per second: 28, episode reward: 158.200, mean reward: 0.630 [-1.000, 1.000], mean action: 2.952 [0.000, 6.000], mean observation: 171.311 [0.000, 255.000], loss: 0.091826, mean_absolute_error: 11.897503, mean_q: 13.915233, mean_eps: 0.702170
 331310/1000000: episode: 1629, duration: 9.665s, episode steps: 262, steps per second: 27, episode reward: 161.900, mean reward: 0.618 [-1.000, 1.000], mean action: 2.832 [0.000, 6.000], mean observation: 171.617 [0.000, 255.000], loss: 0.095679, mean_absolute_error: 11.712070, mean_q: 13.694529, mean_eps: 0.701940
 331561/1000000: episode: 1630, duration: 9.16

 337779/1000000: episode: 1653, duration: 12.627s, episode steps: 323, steps per second: 26, episode reward: 245.000, mean reward: 0.759 [-1.000, 1.000], mean action: 3.427 [0.000, 6.000], mean observation: 172.823 [0.000, 255.000], loss: 0.098771, mean_absolute_error: 11.545746, mean_q: 13.513094, mean_eps: 0.696146
 338056/1000000: episode: 1654, duration: 10.508s, episode steps: 277, steps per second: 26, episode reward: 168.000, mean reward: 0.606 [-1.000, 1.000], mean action: 3.011 [0.000, 6.000], mean observation: 172.216 [0.000, 255.000], loss: 0.103703, mean_absolute_error: 11.690157, mean_q: 13.676145, mean_eps: 0.695876
 338318/1000000: episode: 1655, duration: 9.952s, episode steps: 262, steps per second: 26, episode reward: 189.800, mean reward: 0.724 [-1.000, 1.000], mean action: 3.149 [0.000, 6.000], mean observation: 171.894 [0.000, 255.000], loss: 0.106777, mean_absolute_error: 11.883198, mean_q: 13.928059, mean_eps: 0.695633
 338601/1000000: episode: 1656, duration: 10

 344790/1000000: episode: 1679, duration: 7.116s, episode steps: 210, steps per second: 30, episode reward: 72.700, mean reward: 0.346 [-1.000, 0.500], mean action: 3.148 [0.000, 6.000], mean observation: 172.375 [0.000, 255.000], loss: 0.107493, mean_absolute_error: 12.067001, mean_q: 14.121205, mean_eps: 0.689784
 345048/1000000: episode: 1680, duration: 9.427s, episode steps: 258, steps per second: 27, episode reward: 183.200, mean reward: 0.710 [-1.000, 1.000], mean action: 3.364 [0.000, 6.000], mean observation: 171.313 [0.000, 255.000], loss: 0.103590, mean_absolute_error: 12.243336, mean_q: 14.322006, mean_eps: 0.689574
 345330/1000000: episode: 1681, duration: 10.625s, episode steps: 282, steps per second: 27, episode reward: 174.500, mean reward: 0.619 [-1.000, 1.000], mean action: 3.280 [0.000, 6.000], mean observation: 172.526 [0.000, 255.000], loss: 0.107392, mean_absolute_error: 11.983824, mean_q: 14.002538, mean_eps: 0.689331
 345592/1000000: episode: 1682, duration: 9.88

 351432/1000000: episode: 1705, duration: 11.623s, episode steps: 226, steps per second: 19, episode reward: 147.300, mean reward: 0.652 [-1.000, 1.000], mean action: 3.319 [0.000, 6.000], mean observation: 171.972 [0.000, 255.000], loss: 0.113336, mean_absolute_error: 12.876592, mean_q: 15.071047, mean_eps: 0.683814
 351670/1000000: episode: 1706, duration: 12.226s, episode steps: 238, steps per second: 19, episode reward: 155.100, mean reward: 0.652 [-1.000, 1.000], mean action: 3.151 [0.000, 6.000], mean observation: 172.217 [0.000, 255.000], loss: 0.117336, mean_absolute_error: 13.000509, mean_q: 15.218068, mean_eps: 0.683605
 351886/1000000: episode: 1707, duration: 11.039s, episode steps: 216, steps per second: 20, episode reward: 102.800, mean reward: 0.476 [-1.000, 1.000], mean action: 2.931 [0.000, 6.000], mean observation: 172.740 [0.000, 255.000], loss: 0.109714, mean_absolute_error: 12.825227, mean_q: 15.017727, mean_eps: 0.683400
 352050/1000000: episode: 1708, duration: 7

 356740/1000000: episode: 1731, duration: 11.355s, episode steps: 223, steps per second: 20, episode reward: 138.800, mean reward: 0.622 [-1.000, 1.000], mean action: 3.251 [0.000, 6.000], mean observation: 172.438 [0.000, 255.000], loss: 0.119445, mean_absolute_error: 12.992076, mean_q: 15.203007, mean_eps: 0.679035
 356974/1000000: episode: 1732, duration: 12.038s, episode steps: 234, steps per second: 19, episode reward: 173.000, mean reward: 0.739 [-1.000, 1.000], mean action: 3.248 [0.000, 6.000], mean observation: 172.616 [0.000, 255.000], loss: 0.112048, mean_absolute_error: 13.158929, mean_q: 15.401728, mean_eps: 0.678830
 357160/1000000: episode: 1733, duration: 9.027s, episode steps: 186, steps per second: 21, episode reward: 127.700, mean reward: 0.687 [-1.000, 1.000], mean action: 3.091 [0.000, 6.000], mean observation: 171.251 [0.000, 255.000], loss: 0.116869, mean_absolute_error: 12.957890, mean_q: 15.160261, mean_eps: 0.678641
 357420/1000000: episode: 1734, duration: 13

 362492/1000000: episode: 1757, duration: 12.137s, episode steps: 236, steps per second: 19, episode reward: 172.000, mean reward: 0.729 [-1.000, 1.000], mean action: 3.356 [0.000, 6.000], mean observation: 173.196 [0.000, 255.000], loss: 0.143980, mean_absolute_error: 13.136781, mean_q: 15.345758, mean_eps: 0.673865
 362730/1000000: episode: 1758, duration: 12.105s, episode steps: 238, steps per second: 20, episode reward: 151.800, mean reward: 0.638 [-1.000, 1.000], mean action: 3.361 [0.000, 6.000], mean observation: 173.700 [0.000, 255.000], loss: 0.118537, mean_absolute_error: 13.464312, mean_q: 15.749774, mean_eps: 0.673651
 362922/1000000: episode: 1759, duration: 9.335s, episode steps: 192, steps per second: 21, episode reward: 122.400, mean reward: 0.637 [-1.000, 1.000], mean action: 2.880 [0.000, 6.000], mean observation: 171.017 [0.000, 255.000], loss: 0.117159, mean_absolute_error: 13.179760, mean_q: 15.430393, mean_eps: 0.673457
 363116/1000000: episode: 1760, duration: 9.

 367740/1000000: episode: 1783, duration: 10.926s, episode steps: 214, steps per second: 20, episode reward: 148.000, mean reward: 0.692 [-1.000, 1.000], mean action: 3.290 [0.000, 6.000], mean observation: 172.016 [0.000, 255.000], loss: 0.115227, mean_absolute_error: 13.015772, mean_q: 15.220364, mean_eps: 0.669131
 367966/1000000: episode: 1784, duration: 11.449s, episode steps: 226, steps per second: 20, episode reward: 163.400, mean reward: 0.723 [-1.000, 1.000], mean action: 3.292 [0.000, 6.000], mean observation: 172.157 [0.000, 255.000], loss: 0.128164, mean_absolute_error: 13.541697, mean_q: 15.844616, mean_eps: 0.668933
 368185/1000000: episode: 1785, duration: 11.192s, episode steps: 219, steps per second: 20, episode reward: 165.400, mean reward: 0.755 [-1.000, 1.000], mean action: 3.411 [0.000, 6.000], mean observation: 172.596 [0.000, 255.000], loss: 0.135670, mean_absolute_error: 13.568930, mean_q: 15.886510, mean_eps: 0.668732
 368410/1000000: episode: 1786, duration: 1

 373706/1000000: episode: 1809, duration: 13.982s, episode steps: 354, steps per second: 25, episode reward: 192.900, mean reward: 0.545 [-1.000, 1.000], mean action: 3.311 [0.000, 6.000], mean observation: 173.920 [0.000, 255.000], loss: 0.122148, mean_absolute_error: 13.673887, mean_q: 15.992837, mean_eps: 0.663825
 373882/1000000: episode: 1810, duration: 6.200s, episode steps: 176, steps per second: 28, episode reward: 66.500, mean reward: 0.378 [-1.000, 0.500], mean action: 2.341 [0.000, 6.000], mean observation: 172.929 [0.000, 255.000], loss: 0.142491, mean_absolute_error: 13.761496, mean_q: 16.094295, mean_eps: 0.663585
 374107/1000000: episode: 1811, duration: 8.275s, episode steps: 225, steps per second: 27, episode reward: 154.100, mean reward: 0.685 [-1.000, 1.000], mean action: 2.778 [0.000, 6.000], mean observation: 171.103 [0.000, 255.000], loss: 0.136125, mean_absolute_error: 13.859450, mean_q: 16.199643, mean_eps: 0.663405
 374355/1000000: episode: 1812, duration: 9.14

 380692/1000000: episode: 1835, duration: 10.715s, episode steps: 275, steps per second: 26, episode reward: 198.800, mean reward: 0.723 [-1.000, 1.000], mean action: 3.291 [0.000, 6.000], mean observation: 171.759 [0.000, 255.000], loss: 0.136760, mean_absolute_error: 14.008351, mean_q: 16.386815, mean_eps: 0.657501
 380980/1000000: episode: 1836, duration: 11.035s, episode steps: 288, steps per second: 26, episode reward: 179.300, mean reward: 0.623 [-1.000, 1.000], mean action: 2.910 [0.000, 6.000], mean observation: 172.570 [0.000, 255.000], loss: 0.146272, mean_absolute_error: 14.102384, mean_q: 16.519731, mean_eps: 0.657249
 381261/1000000: episode: 1837, duration: 10.930s, episode steps: 281, steps per second: 26, episode reward: 192.900, mean reward: 0.686 [-1.000, 1.000], mean action: 3.167 [0.000, 6.000], mean observation: 171.594 [0.000, 255.000], loss: 0.146463, mean_absolute_error: 13.701052, mean_q: 16.045700, mean_eps: 0.656992
 381546/1000000: episode: 1838, duration: 1

 388101/1000000: episode: 1861, duration: 10.502s, episode steps: 279, steps per second: 27, episode reward: 201.000, mean reward: 0.720 [-1.000, 1.000], mean action: 3.290 [0.000, 6.000], mean observation: 172.295 [0.000, 255.000], loss: 0.132565, mean_absolute_error: 14.065085, mean_q: 16.475701, mean_eps: 0.650834
 388385/1000000: episode: 1862, duration: 10.807s, episode steps: 284, steps per second: 26, episode reward: 211.400, mean reward: 0.744 [-1.000, 1.000], mean action: 3.246 [0.000, 6.000], mean observation: 172.540 [0.000, 255.000], loss: 0.149236, mean_absolute_error: 14.155450, mean_q: 16.571490, mean_eps: 0.650580
 388671/1000000: episode: 1863, duration: 10.824s, episode steps: 286, steps per second: 26, episode reward: 214.200, mean reward: 0.749 [-1.000, 1.000], mean action: 3.213 [0.000, 6.000], mean observation: 172.453 [0.000, 255.000], loss: 0.135909, mean_absolute_error: 14.256532, mean_q: 16.688022, mean_eps: 0.650325
 388954/1000000: episode: 1864, duration: 1

 395460/1000000: episode: 1887, duration: 12.046s, episode steps: 310, steps per second: 26, episode reward: 245.000, mean reward: 0.790 [-1.000, 1.000], mean action: 3.387 [0.000, 6.000], mean observation: 172.907 [0.000, 255.000], loss: 0.145596, mean_absolute_error: 14.287825, mean_q: 16.720145, mean_eps: 0.644226
 395717/1000000: episode: 1888, duration: 9.559s, episode steps: 257, steps per second: 27, episode reward: 172.600, mean reward: 0.672 [-1.000, 1.000], mean action: 3.136 [0.000, 6.000], mean observation: 171.841 [0.000, 255.000], loss: 0.138028, mean_absolute_error: 14.517028, mean_q: 16.975939, mean_eps: 0.643971
 395993/1000000: episode: 1889, duration: 10.509s, episode steps: 276, steps per second: 26, episode reward: 199.600, mean reward: 0.723 [-1.000, 1.000], mean action: 3.116 [0.000, 6.000], mean observation: 172.739 [0.000, 255.000], loss: 0.143963, mean_absolute_error: 14.388484, mean_q: 16.826830, mean_eps: 0.643730
 396277/1000000: episode: 1890, duration: 10

 402738/1000000: episode: 1913, duration: 10.691s, episode steps: 281, steps per second: 26, episode reward: 188.600, mean reward: 0.671 [-1.000, 1.000], mean action: 3.331 [0.000, 6.000], mean observation: 172.744 [0.000, 255.000], loss: 0.151963, mean_absolute_error: 14.988960, mean_q: 17.521345, mean_eps: 0.637662
 403050/1000000: episode: 1914, duration: 12.022s, episode steps: 312, steps per second: 26, episode reward: 248.200, mean reward: 0.796 [-1.000, 1.000], mean action: 3.314 [0.000, 6.000], mean observation: 173.054 [0.000, 255.000], loss: 0.141313, mean_absolute_error: 14.800293, mean_q: 17.319688, mean_eps: 0.637395
 403412/1000000: episode: 1915, duration: 14.597s, episode steps: 362, steps per second: 25, episode reward: 262.800, mean reward: 0.726 [-1.000, 1.000], mean action: 3.367 [0.000, 6.000], mean observation: 173.343 [0.000, 255.000], loss: 0.169264, mean_absolute_error: 14.844296, mean_q: 17.363560, mean_eps: 0.637093
 403686/1000000: episode: 1916, duration: 1

 409739/1000000: episode: 1939, duration: 15.170s, episode steps: 280, steps per second: 18, episode reward: 186.500, mean reward: 0.666 [-1.000, 1.000], mean action: 3.450 [0.000, 6.000], mean observation: 173.858 [0.000, 255.000], loss: 0.146563, mean_absolute_error: 14.999586, mean_q: 17.550259, mean_eps: 0.631362
 409938/1000000: episode: 1940, duration: 10.427s, episode steps: 199, steps per second: 19, episode reward: 108.400, mean reward: 0.545 [-1.000, 1.000], mean action: 3.035 [0.000, 6.000], mean observation: 173.799 [0.000, 255.000], loss: 0.153794, mean_absolute_error: 14.917350, mean_q: 17.438784, mean_eps: 0.631146
 410136/1000000: episode: 1941, duration: 10.052s, episode steps: 198, steps per second: 20, episode reward: 152.200, mean reward: 0.769 [-1.000, 1.000], mean action: 3.157 [0.000, 6.000], mean observation: 172.579 [0.000, 255.000], loss: 0.175089, mean_absolute_error: 15.330116, mean_q: 17.935448, mean_eps: 0.630968
 410367/1000000: episode: 1942, duration: 1

 415168/1000000: episode: 1965, duration: 10.525s, episode steps: 207, steps per second: 20, episode reward: 149.500, mean reward: 0.722 [-1.000, 1.000], mean action: 3.101 [0.000, 6.000], mean observation: 172.219 [0.000, 255.000], loss: 0.158820, mean_absolute_error: 15.186578, mean_q: 17.794589, mean_eps: 0.626442
 415327/1000000: episode: 1966, duration: 7.522s, episode steps: 159, steps per second: 21, episode reward: 67.600, mean reward: 0.425 [-1.000, 0.500], mean action: 2.748 [0.000, 6.000], mean observation: 171.190 [0.000, 255.000], loss: 0.171149, mean_absolute_error: 15.346822, mean_q: 17.981755, mean_eps: 0.626279
 415563/1000000: episode: 1967, duration: 12.221s, episode steps: 236, steps per second: 19, episode reward: 178.300, mean reward: 0.756 [-1.000, 1.000], mean action: 3.203 [0.000, 6.000], mean observation: 172.127 [0.000, 255.000], loss: 0.161187, mean_absolute_error: 15.307726, mean_q: 17.913458, mean_eps: 0.626100
 415774/1000000: episode: 1968, duration: 10.

 420606/1000000: episode: 1991, duration: 11.234s, episode steps: 218, steps per second: 19, episode reward: 162.600, mean reward: 0.746 [-1.000, 1.000], mean action: 3.257 [0.000, 6.000], mean observation: 172.757 [0.000, 255.000], loss: 0.163748, mean_absolute_error: 15.833152, mean_q: 18.514124, mean_eps: 0.621554
 420826/1000000: episode: 1992, duration: 11.218s, episode steps: 220, steps per second: 20, episode reward: 164.600, mean reward: 0.748 [-1.000, 1.000], mean action: 3.305 [0.000, 6.000], mean observation: 172.892 [0.000, 255.000], loss: 0.158864, mean_absolute_error: 15.635932, mean_q: 18.308386, mean_eps: 0.621356
 421033/1000000: episode: 1993, duration: 10.463s, episode steps: 207, steps per second: 20, episode reward: 145.100, mean reward: 0.701 [-1.000, 1.000], mean action: 3.251 [0.000, 6.000], mean observation: 172.695 [0.000, 255.000], loss: 0.175403, mean_absolute_error: 15.222850, mean_q: 17.811555, mean_eps: 0.621163
 421257/1000000: episode: 1994, duration: 1

 426090/1000000: episode: 2017, duration: 11.970s, episode steps: 232, steps per second: 19, episode reward: 161.600, mean reward: 0.697 [-1.000, 1.000], mean action: 3.448 [0.000, 6.000], mean observation: 173.095 [0.000, 255.000], loss: 0.182556, mean_absolute_error: 15.711280, mean_q: 18.381594, mean_eps: 0.616623
 426304/1000000: episode: 2018, duration: 10.961s, episode steps: 214, steps per second: 20, episode reward: 153.600, mean reward: 0.718 [-1.000, 1.000], mean action: 3.201 [0.000, 6.000], mean observation: 172.409 [0.000, 255.000], loss: 0.161926, mean_absolute_error: 15.632809, mean_q: 18.301360, mean_eps: 0.616424
 426511/1000000: episode: 2019, duration: 10.503s, episode steps: 207, steps per second: 20, episode reward: 146.700, mean reward: 0.709 [-1.000, 1.000], mean action: 3.237 [0.000, 6.000], mean observation: 172.341 [0.000, 255.000], loss: 0.163086, mean_absolute_error: 15.271976, mean_q: 17.863549, mean_eps: 0.616235
 426703/1000000: episode: 2020, duration: 9

 431577/1000000: episode: 2043, duration: 11.260s, episode steps: 220, steps per second: 20, episode reward: 163.000, mean reward: 0.741 [-1.000, 1.000], mean action: 3.468 [0.000, 6.000], mean observation: 172.903 [0.000, 255.000], loss: 0.184293, mean_absolute_error: 15.992888, mean_q: 18.707493, mean_eps: 0.611679
 431798/1000000: episode: 2044, duration: 11.414s, episode steps: 221, steps per second: 19, episode reward: 158.700, mean reward: 0.718 [-1.000, 1.000], mean action: 3.249 [0.000, 6.000], mean observation: 172.765 [0.000, 255.000], loss: 0.188831, mean_absolute_error: 16.017722, mean_q: 18.742502, mean_eps: 0.611481
 432015/1000000: episode: 2045, duration: 11.077s, episode steps: 217, steps per second: 20, episode reward: 155.300, mean reward: 0.716 [-1.000, 1.000], mean action: 3.041 [0.000, 6.000], mean observation: 172.714 [0.000, 255.000], loss: 0.173480, mean_absolute_error: 15.948645, mean_q: 18.665903, mean_eps: 0.611285
 432250/1000000: episode: 2046, duration: 1

 437262/1000000: episode: 2069, duration: 10.597s, episode steps: 208, steps per second: 20, episode reward: 154.800, mean reward: 0.744 [-1.000, 1.000], mean action: 3.154 [0.000, 6.000], mean observation: 172.878 [0.000, 255.000], loss: 0.197839, mean_absolute_error: 15.735160, mean_q: 18.399479, mean_eps: 0.606558
 437449/1000000: episode: 2070, duration: 9.064s, episode steps: 187, steps per second: 21, episode reward: 109.500, mean reward: 0.586 [-1.000, 1.000], mean action: 3.193 [0.000, 6.000], mean observation: 171.418 [0.000, 255.000], loss: 0.193282, mean_absolute_error: 16.146934, mean_q: 18.878962, mean_eps: 0.606380
 437717/1000000: episode: 2071, duration: 14.044s, episode steps: 268, steps per second: 19, episode reward: 160.800, mean reward: 0.600 [-1.000, 1.000], mean action: 3.459 [0.000, 6.000], mean observation: 174.086 [0.000, 255.000], loss: 0.187398, mean_absolute_error: 16.355083, mean_q: 19.160877, mean_eps: 0.606174
 437995/1000000: episode: 2072, duration: 15

 442949/1000000: episode: 2095, duration: 9.270s, episode steps: 188, steps per second: 20, episode reward: 136.900, mean reward: 0.728 [-1.000, 1.000], mean action: 2.840 [0.000, 6.000], mean observation: 170.568 [0.000, 255.000], loss: 0.193554, mean_absolute_error: 16.441044, mean_q: 19.266026, mean_eps: 0.601430
 443182/1000000: episode: 2096, duration: 12.038s, episode steps: 233, steps per second: 19, episode reward: 174.600, mean reward: 0.749 [-1.000, 1.000], mean action: 3.425 [0.000, 6.000], mean observation: 172.086 [0.000, 255.000], loss: 0.187139, mean_absolute_error: 16.434233, mean_q: 19.264565, mean_eps: 0.601241
 443346/1000000: episode: 2097, duration: 7.625s, episode steps: 164, steps per second: 22, episode reward: 53.700, mean reward: 0.327 [-1.000, 0.500], mean action: 2.628 [0.000, 6.000], mean observation: 171.774 [0.000, 255.000], loss: 0.191460, mean_absolute_error: 16.643967, mean_q: 19.502865, mean_eps: 0.601062
 443539/1000000: episode: 2098, duration: 9.43

 448628/1000000: episode: 2121, duration: 10.851s, episode steps: 213, steps per second: 20, episode reward: 150.000, mean reward: 0.704 [-1.000, 1.000], mean action: 3.277 [0.000, 6.000], mean observation: 172.758 [0.000, 255.000], loss: 0.196084, mean_absolute_error: 16.334406, mean_q: 19.131890, mean_eps: 0.596332
 448914/1000000: episode: 2122, duration: 15.374s, episode steps: 286, steps per second: 19, episode reward: 190.800, mean reward: 0.667 [-1.000, 1.000], mean action: 3.476 [0.000, 6.000], mean observation: 173.053 [0.000, 255.000], loss: 0.196519, mean_absolute_error: 16.611569, mean_q: 19.482922, mean_eps: 0.596107
 449193/1000000: episode: 2123, duration: 15.163s, episode steps: 279, steps per second: 18, episode reward: 183.500, mean reward: 0.658 [-1.000, 1.000], mean action: 3.824 [0.000, 6.000], mean observation: 173.449 [0.000, 255.000], loss: 0.189378, mean_absolute_error: 16.509791, mean_q: 19.332707, mean_eps: 0.595851
 449434/1000000: episode: 2124, duration: 1

 454530/1000000: episode: 2147, duration: 9.746s, episode steps: 195, steps per second: 20, episode reward: 147.100, mean reward: 0.754 [-1.000, 1.000], mean action: 3.051 [0.000, 6.000], mean observation: 171.537 [0.000, 255.000], loss: 0.189071, mean_absolute_error: 16.950411, mean_q: 19.835503, mean_eps: 0.591011
 454793/1000000: episode: 2148, duration: 13.781s, episode steps: 263, steps per second: 19, episode reward: 167.900, mean reward: 0.638 [-1.000, 1.000], mean action: 3.357 [0.000, 6.000], mean observation: 172.823 [0.000, 255.000], loss: 0.187234, mean_absolute_error: 16.868924, mean_q: 19.747171, mean_eps: 0.590804
 455013/1000000: episode: 2149, duration: 11.241s, episode steps: 220, steps per second: 20, episode reward: 163.500, mean reward: 0.743 [-1.000, 1.000], mean action: 3.182 [0.000, 6.000], mean observation: 172.186 [0.000, 255.000], loss: 0.175729, mean_absolute_error: 17.080811, mean_q: 20.002463, mean_eps: 0.590586
 455227/1000000: episode: 2150, duration: 10

 460128/1000000: episode: 2173, duration: 12.048s, episode steps: 233, steps per second: 19, episode reward: 174.000, mean reward: 0.747 [-1.000, 1.000], mean action: 3.322 [0.000, 6.000], mean observation: 173.251 [0.000, 255.000], loss: 0.212716, mean_absolute_error: 17.095806, mean_q: 20.034962, mean_eps: 0.585991
 460360/1000000: episode: 2174, duration: 12.006s, episode steps: 232, steps per second: 19, episode reward: 172.900, mean reward: 0.745 [-1.000, 1.000], mean action: 3.272 [0.000, 6.000], mean observation: 172.979 [0.000, 255.000], loss: 0.193700, mean_absolute_error: 16.887977, mean_q: 19.777196, mean_eps: 0.585782
 460581/1000000: episode: 2175, duration: 11.246s, episode steps: 221, steps per second: 20, episode reward: 140.700, mean reward: 0.637 [-1.000, 1.000], mean action: 3.240 [0.000, 6.000], mean observation: 173.403 [0.000, 255.000], loss: 0.191859, mean_absolute_error: 16.911195, mean_q: 19.802902, mean_eps: 0.585577
 460778/1000000: episode: 2176, duration: 9

 466097/1000000: episode: 2199, duration: 10.714s, episode steps: 211, steps per second: 20, episode reward: 156.900, mean reward: 0.744 [-1.000, 1.000], mean action: 3.161 [0.000, 6.000], mean observation: 172.274 [0.000, 255.000], loss: 0.194985, mean_absolute_error: 16.986283, mean_q: 19.906523, mean_eps: 0.580607
 466330/1000000: episode: 2200, duration: 12.053s, episode steps: 233, steps per second: 19, episode reward: 184.000, mean reward: 0.790 [-1.000, 1.000], mean action: 3.343 [0.000, 6.000], mean observation: 172.357 [0.000, 255.000], loss: 0.189268, mean_absolute_error: 17.141495, mean_q: 20.091225, mean_eps: 0.580407
 466612/1000000: episode: 2201, duration: 15.524s, episode steps: 282, steps per second: 18, episode reward: 241.200, mean reward: 0.855 [-1.000, 1.000], mean action: 3.496 [0.000, 6.000], mean observation: 172.946 [0.000, 255.000], loss: 0.176339, mean_absolute_error: 17.181701, mean_q: 20.128153, mean_eps: 0.580177
 466818/1000000: episode: 2202, duration: 1

 471683/1000000: episode: 2225, duration: 14.573s, episode steps: 272, steps per second: 19, episode reward: 217.800, mean reward: 0.801 [-1.000, 1.000], mean action: 3.566 [0.000, 6.000], mean observation: 172.748 [0.000, 255.000], loss: 0.205614, mean_absolute_error: 17.336941, mean_q: 20.314282, mean_eps: 0.575609
 471959/1000000: episode: 2226, duration: 14.937s, episode steps: 276, steps per second: 18, episode reward: 197.400, mean reward: 0.715 [-1.000, 1.000], mean action: 3.562 [0.000, 6.000], mean observation: 173.092 [0.000, 255.000], loss: 0.216113, mean_absolute_error: 17.119719, mean_q: 20.049108, mean_eps: 0.575362
 472236/1000000: episode: 2227, duration: 15.198s, episode steps: 277, steps per second: 18, episode reward: 233.500, mean reward: 0.843 [-1.000, 1.000], mean action: 3.451 [0.000, 6.000], mean observation: 172.945 [0.000, 255.000], loss: 0.211447, mean_absolute_error: 17.544413, mean_q: 20.561811, mean_eps: 0.575114
 472466/1000000: episode: 2228, duration: 1

 477765/1000000: episode: 2251, duration: 15.856s, episode steps: 286, steps per second: 18, episode reward: 229.300, mean reward: 0.802 [-1.000, 1.000], mean action: 3.521 [0.000, 6.000], mean observation: 172.994 [0.000, 255.000], loss: 0.216750, mean_absolute_error: 17.734686, mean_q: 20.759390, mean_eps: 0.570140
 477998/1000000: episode: 2252, duration: 12.049s, episode steps: 233, steps per second: 19, episode reward: 172.700, mean reward: 0.741 [-1.000, 1.000], mean action: 3.433 [0.000, 6.000], mean observation: 172.936 [0.000, 255.000], loss: 0.209238, mean_absolute_error: 17.540058, mean_q: 20.549586, mean_eps: 0.569906
 478096/1000000: episode: 2253, duration: 4.486s, episode steps: 98, steps per second: 22, episode reward: 35.900, mean reward: 0.366 [-1.000, 0.500], mean action: 1.673 [0.000, 6.000], mean observation: 172.685 [0.000, 255.000], loss: 0.200497, mean_absolute_error: 17.642341, mean_q: 20.662625, mean_eps: 0.569759
 478333/1000000: episode: 2254, duration: 12.3

 483654/1000000: episode: 2277, duration: 11.572s, episode steps: 225, steps per second: 19, episode reward: 176.100, mean reward: 0.783 [-1.000, 1.000], mean action: 3.338 [0.000, 6.000], mean observation: 172.780 [0.000, 255.000], loss: 0.212264, mean_absolute_error: 17.708129, mean_q: 20.719062, mean_eps: 0.564812
 483866/1000000: episode: 2278, duration: 10.759s, episode steps: 212, steps per second: 20, episode reward: 148.600, mean reward: 0.701 [-1.000, 1.000], mean action: 3.514 [0.000, 6.000], mean observation: 172.657 [0.000, 255.000], loss: 0.228847, mean_absolute_error: 17.414614, mean_q: 20.393611, mean_eps: 0.564616
 484098/1000000: episode: 2279, duration: 12.013s, episode steps: 232, steps per second: 19, episode reward: 179.500, mean reward: 0.774 [-1.000, 1.000], mean action: 3.517 [0.000, 6.000], mean observation: 173.181 [0.000, 255.000], loss: 0.218739, mean_absolute_error: 17.657840, mean_q: 20.663504, mean_eps: 0.564416
 484295/1000000: episode: 2280, duration: 1

 489986/1000000: episode: 2303, duration: 16.045s, episode steps: 288, steps per second: 18, episode reward: 244.300, mean reward: 0.848 [-1.000, 1.000], mean action: 3.427 [0.000, 6.000], mean observation: 172.882 [0.000, 255.000], loss: 0.203053, mean_absolute_error: 17.665107, mean_q: 20.706085, mean_eps: 0.559142
 490243/1000000: episode: 2304, duration: 13.480s, episode steps: 257, steps per second: 19, episode reward: 181.000, mean reward: 0.704 [-1.000, 1.000], mean action: 3.409 [0.000, 6.000], mean observation: 173.678 [0.000, 255.000], loss: 0.233698, mean_absolute_error: 18.600785, mean_q: 21.778304, mean_eps: 0.558897
 490507/1000000: episode: 2305, duration: 14.125s, episode steps: 264, steps per second: 19, episode reward: 172.900, mean reward: 0.655 [-1.000, 1.000], mean action: 3.443 [0.000, 6.000], mean observation: 173.322 [0.000, 255.000], loss: 0.234186, mean_absolute_error: 18.431992, mean_q: 21.585642, mean_eps: 0.558663
 490698/1000000: episode: 2306, duration: 9

 496225/1000000: episode: 2329, duration: 8.573s, episode steps: 178, steps per second: 21, episode reward: 120.200, mean reward: 0.675 [-1.000, 1.000], mean action: 2.607 [0.000, 6.000], mean observation: 170.461 [0.000, 255.000], loss: 0.231989, mean_absolute_error: 18.288091, mean_q: 21.393710, mean_eps: 0.553478
 496459/1000000: episode: 2330, duration: 12.143s, episode steps: 234, steps per second: 19, episode reward: 169.100, mean reward: 0.723 [-1.000, 1.000], mean action: 3.449 [0.000, 6.000], mean observation: 172.501 [0.000, 255.000], loss: 0.208869, mean_absolute_error: 18.132817, mean_q: 21.254029, mean_eps: 0.553292
 496747/1000000: episode: 2331, duration: 15.838s, episode steps: 288, steps per second: 18, episode reward: 253.300, mean reward: 0.880 [-1.000, 1.000], mean action: 3.715 [0.000, 6.000], mean observation: 172.329 [0.000, 255.000], loss: 0.210570, mean_absolute_error: 17.833214, mean_q: 20.877588, mean_eps: 0.553058
 496968/1000000: episode: 2332, duration: 11

 502325/1000000: episode: 2355, duration: 10.761s, episode steps: 212, steps per second: 20, episode reward: 157.800, mean reward: 0.744 [-1.000, 1.000], mean action: 3.274 [0.000, 6.000], mean observation: 172.881 [0.000, 255.000], loss: 0.231765, mean_absolute_error: 18.935045, mean_q: 22.184406, mean_eps: 0.548002
 502532/1000000: episode: 2356, duration: 10.467s, episode steps: 207, steps per second: 20, episode reward: 146.500, mean reward: 0.708 [-1.000, 1.000], mean action: 3.058 [0.000, 6.000], mean observation: 173.251 [0.000, 255.000], loss: 0.238229, mean_absolute_error: 18.763133, mean_q: 21.956693, mean_eps: 0.547815
 502813/1000000: episode: 2357, duration: 15.305s, episode steps: 281, steps per second: 18, episode reward: 235.700, mean reward: 0.839 [-1.000, 1.000], mean action: 3.370 [0.000, 6.000], mean observation: 173.247 [0.000, 255.000], loss: 0.242710, mean_absolute_error: 18.551951, mean_q: 21.738444, mean_eps: 0.547595
 503087/1000000: episode: 2358, duration: 1

 508209/1000000: episode: 2381, duration: 12.168s, episode steps: 235, steps per second: 19, episode reward: 188.600, mean reward: 0.803 [-1.000, 1.000], mean action: 3.281 [0.000, 6.000], mean observation: 172.794 [0.000, 255.000], loss: 0.226165, mean_absolute_error: 18.909103, mean_q: 22.145129, mean_eps: 0.542717
 508421/1000000: episode: 2382, duration: 10.800s, episode steps: 212, steps per second: 20, episode reward: 153.000, mean reward: 0.722 [-1.000, 1.000], mean action: 3.264 [0.000, 6.000], mean observation: 172.406 [0.000, 255.000], loss: 0.240468, mean_absolute_error: 18.996110, mean_q: 22.240016, mean_eps: 0.542516
 508650/1000000: episode: 2383, duration: 11.818s, episode steps: 229, steps per second: 19, episode reward: 177.500, mean reward: 0.775 [-1.000, 1.000], mean action: 3.188 [0.000, 6.000], mean observation: 172.737 [0.000, 255.000], loss: 0.241497, mean_absolute_error: 18.888751, mean_q: 22.079370, mean_eps: 0.542318
 508845/1000000: episode: 2384, duration: 9

 514045/1000000: episode: 2407, duration: 12.072s, episode steps: 231, steps per second: 19, episode reward: 178.300, mean reward: 0.772 [-1.000, 1.000], mean action: 3.390 [0.000, 6.000], mean observation: 173.066 [0.000, 255.000], loss: 0.249342, mean_absolute_error: 19.510678, mean_q: 22.845729, mean_eps: 0.537463
 514242/1000000: episode: 2408, duration: 9.954s, episode steps: 197, steps per second: 20, episode reward: 147.800, mean reward: 0.750 [-1.000, 1.000], mean action: 3.208 [0.000, 6.000], mean observation: 172.146 [0.000, 255.000], loss: 0.244972, mean_absolute_error: 19.187587, mean_q: 22.469881, mean_eps: 0.537270
 514477/1000000: episode: 2409, duration: 12.084s, episode steps: 235, steps per second: 19, episode reward: 182.500, mean reward: 0.777 [-1.000, 1.000], mean action: 3.409 [0.000, 6.000], mean observation: 172.688 [0.000, 255.000], loss: 0.243549, mean_absolute_error: 19.015161, mean_q: 22.261540, mean_eps: 0.537076
 514683/1000000: episode: 2410, duration: 10

 519846/1000000: episode: 2433, duration: 12.866s, episode steps: 244, steps per second: 19, episode reward: 186.200, mean reward: 0.763 [-1.000, 1.000], mean action: 3.340 [0.000, 6.000], mean observation: 172.661 [0.000, 255.000], loss: 0.246881, mean_absolute_error: 18.950477, mean_q: 22.183021, mean_eps: 0.532248
 520034/1000000: episode: 2434, duration: 9.155s, episode steps: 188, steps per second: 21, episode reward: 129.100, mean reward: 0.687 [-1.000, 1.000], mean action: 3.080 [0.000, 6.000], mean observation: 170.754 [0.000, 255.000], loss: 0.257851, mean_absolute_error: 18.709521, mean_q: 21.900263, mean_eps: 0.532054
 520316/1000000: episode: 2435, duration: 15.281s, episode steps: 282, steps per second: 18, episode reward: 216.500, mean reward: 0.768 [-1.000, 1.000], mean action: 3.528 [0.000, 6.000], mean observation: 172.737 [0.000, 255.000], loss: 0.256615, mean_absolute_error: 19.159965, mean_q: 22.398064, mean_eps: 0.531843
 520510/1000000: episode: 2436, duration: 9.

 525728/1000000: episode: 2459, duration: 15.478s, episode steps: 282, steps per second: 18, episode reward: 229.200, mean reward: 0.813 [-1.000, 1.000], mean action: 3.465 [0.000, 6.000], mean observation: 172.976 [0.000, 255.000], loss: 0.249095, mean_absolute_error: 19.155596, mean_q: 22.420799, mean_eps: 0.526973
 526009/1000000: episode: 2460, duration: 15.548s, episode steps: 281, steps per second: 18, episode reward: 224.700, mean reward: 0.800 [-1.000, 1.000], mean action: 3.495 [0.000, 6.000], mean observation: 172.840 [0.000, 255.000], loss: 0.236882, mean_absolute_error: 19.022956, mean_q: 22.238213, mean_eps: 0.526719
 526296/1000000: episode: 2461, duration: 15.999s, episode steps: 287, steps per second: 18, episode reward: 248.400, mean reward: 0.866 [-1.000, 1.000], mean action: 3.467 [0.000, 6.000], mean observation: 172.592 [0.000, 255.000], loss: 0.255912, mean_absolute_error: 19.311030, mean_q: 22.625330, mean_eps: 0.526463
 526577/1000000: episode: 2462, duration: 1

 532154/1000000: episode: 2485, duration: 11.142s, episode steps: 218, steps per second: 20, episode reward: 158.500, mean reward: 0.727 [-1.000, 1.000], mean action: 3.390 [0.000, 6.000], mean observation: 171.872 [0.000, 255.000], loss: 0.289713, mean_absolute_error: 19.839115, mean_q: 23.222546, mean_eps: 0.521160
 532435/1000000: episode: 2486, duration: 15.564s, episode steps: 281, steps per second: 18, episode reward: 219.300, mean reward: 0.780 [-1.000, 1.000], mean action: 3.463 [0.000, 6.000], mean observation: 172.945 [0.000, 255.000], loss: 0.275200, mean_absolute_error: 19.968930, mean_q: 23.405298, mean_eps: 0.520935
 532674/1000000: episode: 2487, duration: 12.459s, episode steps: 239, steps per second: 19, episode reward: 167.700, mean reward: 0.702 [-1.000, 1.000], mean action: 3.276 [0.000, 6.000], mean observation: 172.855 [0.000, 255.000], loss: 0.273349, mean_absolute_error: 19.824383, mean_q: 23.194602, mean_eps: 0.520701
 532932/1000000: episode: 2488, duration: 1

 538426/1000000: episode: 2511, duration: 11.732s, episode steps: 226, steps per second: 19, episode reward: 179.500, mean reward: 0.794 [-1.000, 1.000], mean action: 3.296 [0.000, 6.000], mean observation: 173.276 [0.000, 255.000], loss: 0.272872, mean_absolute_error: 19.940907, mean_q: 23.358860, mean_eps: 0.515519
 538710/1000000: episode: 2512, duration: 15.684s, episode steps: 284, steps per second: 18, episode reward: 199.900, mean reward: 0.704 [-1.000, 1.000], mean action: 3.380 [0.000, 6.000], mean observation: 173.000 [0.000, 255.000], loss: 0.278603, mean_absolute_error: 19.944166, mean_q: 23.336455, mean_eps: 0.515289
 538952/1000000: episode: 2513, duration: 12.723s, episode steps: 242, steps per second: 19, episode reward: 108.000, mean reward: 0.446 [-1.000, 1.000], mean action: 3.260 [0.000, 6.000], mean observation: 174.304 [0.000, 255.000], loss: 0.283425, mean_absolute_error: 20.381627, mean_q: 23.875347, mean_eps: 0.515053
 539232/1000000: episode: 2514, duration: 1

 545176/1000000: episode: 2537, duration: 11.389s, episode steps: 223, steps per second: 20, episode reward: 165.200, mean reward: 0.741 [-1.000, 1.000], mean action: 3.309 [0.000, 6.000], mean observation: 171.589 [0.000, 255.000], loss: 0.281155, mean_absolute_error: 20.298850, mean_q: 23.756710, mean_eps: 0.509442
 545390/1000000: episode: 2538, duration: 10.931s, episode steps: 214, steps per second: 20, episode reward: 150.800, mean reward: 0.705 [-1.000, 1.000], mean action: 3.313 [0.000, 6.000], mean observation: 171.884 [0.000, 255.000], loss: 0.283042, mean_absolute_error: 20.648125, mean_q: 24.162210, mean_eps: 0.509246
 545686/1000000: episode: 2539, duration: 16.562s, episode steps: 296, steps per second: 18, episode reward: 226.400, mean reward: 0.765 [-1.000, 1.000], mean action: 3.517 [0.000, 6.000], mean observation: 172.443 [0.000, 255.000], loss: 0.280300, mean_absolute_error: 20.556618, mean_q: 24.058141, mean_eps: 0.509016
 545884/1000000: episode: 2540, duration: 9

 551625/1000000: episode: 2563, duration: 15.990s, episode steps: 288, steps per second: 18, episode reward: 239.400, mean reward: 0.831 [-1.000, 1.000], mean action: 3.660 [0.000, 6.000], mean observation: 172.655 [0.000, 255.000], loss: 0.297853, mean_absolute_error: 20.930159, mean_q: 24.473389, mean_eps: 0.503666
 551860/1000000: episode: 2564, duration: 12.368s, episode steps: 235, steps per second: 19, episode reward: 175.200, mean reward: 0.746 [-1.000, 1.000], mean action: 3.434 [0.000, 6.000], mean observation: 172.962 [0.000, 255.000], loss: 0.260453, mean_absolute_error: 20.442297, mean_q: 23.918430, mean_eps: 0.503432
 552142/1000000: episode: 2565, duration: 15.604s, episode steps: 282, steps per second: 18, episode reward: 234.300, mean reward: 0.831 [-1.000, 1.000], mean action: 3.567 [0.000, 6.000], mean observation: 172.732 [0.000, 255.000], loss: 0.285072, mean_absolute_error: 20.569517, mean_q: 24.074549, mean_eps: 0.503200
 552422/1000000: episode: 2566, duration: 1

 558327/1000000: episode: 2589, duration: 10.165s, episode steps: 199, steps per second: 20, episode reward: 148.700, mean reward: 0.747 [-1.000, 1.000], mean action: 2.910 [0.000, 6.000], mean observation: 171.829 [0.000, 255.000], loss: 0.263728, mean_absolute_error: 20.604204, mean_q: 24.105658, mean_eps: 0.497597
 558615/1000000: episode: 2590, duration: 16.055s, episode steps: 288, steps per second: 18, episode reward: 234.600, mean reward: 0.815 [-1.000, 1.000], mean action: 3.472 [0.000, 6.000], mean observation: 172.453 [0.000, 255.000], loss: 0.279911, mean_absolute_error: 20.990838, mean_q: 24.565100, mean_eps: 0.497377
 558907/1000000: episode: 2591, duration: 16.310s, episode steps: 292, steps per second: 18, episode reward: 226.300, mean reward: 0.775 [-1.000, 1.000], mean action: 3.654 [0.000, 6.000], mean observation: 172.552 [0.000, 255.000], loss: 0.284383, mean_absolute_error: 20.476507, mean_q: 23.966607, mean_eps: 0.497116
 559166/1000000: episode: 2592, duration: 1

 565119/1000000: episode: 2615, duration: 9.984s, episode steps: 197, steps per second: 20, episode reward: 148.700, mean reward: 0.755 [-1.000, 1.000], mean action: 3.508 [0.000, 6.000], mean observation: 172.200 [0.000, 255.000], loss: 0.285727, mean_absolute_error: 21.066007, mean_q: 24.658887, mean_eps: 0.491482
 565366/1000000: episode: 2616, duration: 13.077s, episode steps: 247, steps per second: 19, episode reward: 163.800, mean reward: 0.663 [-1.000, 1.000], mean action: 3.312 [0.000, 6.000], mean observation: 173.255 [0.000, 255.000], loss: 0.279505, mean_absolute_error: 20.971778, mean_q: 24.529256, mean_eps: 0.491282
 565641/1000000: episode: 2617, duration: 14.830s, episode steps: 275, steps per second: 19, episode reward: 216.000, mean reward: 0.785 [-1.000, 1.000], mean action: 3.535 [0.000, 6.000], mean observation: 173.466 [0.000, 255.000], loss: 0.265623, mean_absolute_error: 20.828581, mean_q: 24.371806, mean_eps: 0.491046
 565900/1000000: episode: 2618, duration: 13

 571594/1000000: episode: 2641, duration: 12.144s, episode steps: 234, steps per second: 19, episode reward: 162.800, mean reward: 0.696 [-1.000, 1.000], mean action: 3.346 [0.000, 6.000], mean observation: 172.583 [0.000, 255.000], loss: 0.291099, mean_absolute_error: 21.667798, mean_q: 25.354559, mean_eps: 0.485672
 571808/1000000: episode: 2642, duration: 10.996s, episode steps: 214, steps per second: 19, episode reward: 157.900, mean reward: 0.738 [-1.000, 1.000], mean action: 3.458 [0.000, 6.000], mean observation: 172.032 [0.000, 255.000], loss: 0.291581, mean_absolute_error: 22.039884, mean_q: 25.792338, mean_eps: 0.485470
 572004/1000000: episode: 2643, duration: 9.707s, episode steps: 196, steps per second: 20, episode reward: 146.200, mean reward: 0.746 [-1.000, 1.000], mean action: 3.245 [0.000, 6.000], mean observation: 170.899 [0.000, 255.000], loss: 0.290845, mean_absolute_error: 22.058801, mean_q: 25.850958, mean_eps: 0.485286
 572206/1000000: episode: 2644, duration: 9.

 577878/1000000: episode: 2667, duration: 15.653s, episode steps: 285, steps per second: 18, episode reward: 236.300, mean reward: 0.829 [-1.000, 1.000], mean action: 3.663 [0.000, 6.000], mean observation: 172.833 [0.000, 255.000], loss: 0.283535, mean_absolute_error: 21.210263, mean_q: 24.821402, mean_eps: 0.480038
 578172/1000000: episode: 2668, duration: 16.374s, episode steps: 294, steps per second: 18, episode reward: 244.600, mean reward: 0.832 [-1.000, 1.000], mean action: 3.449 [0.000, 6.000], mean observation: 172.748 [0.000, 255.000], loss: 0.297729, mean_absolute_error: 21.195162, mean_q: 24.821915, mean_eps: 0.479778
 578471/1000000: episode: 2669, duration: 16.794s, episode steps: 299, steps per second: 18, episode reward: 240.200, mean reward: 0.803 [-1.000, 1.000], mean action: 3.766 [0.000, 6.000], mean observation: 173.297 [0.000, 255.000], loss: 0.295184, mean_absolute_error: 21.600882, mean_q: 25.310451, mean_eps: 0.479512
 578728/1000000: episode: 2670, duration: 1

 584917/1000000: episode: 2693, duration: 17.528s, episode steps: 307, steps per second: 18, episode reward: 265.000, mean reward: 0.863 [-1.000, 1.000], mean action: 3.674 [0.000, 6.000], mean observation: 172.137 [0.000, 255.000], loss: 0.309298, mean_absolute_error: 22.174044, mean_q: 25.967603, mean_eps: 0.473712
 585127/1000000: episode: 2694, duration: 10.763s, episode steps: 210, steps per second: 20, episode reward: 152.600, mean reward: 0.727 [-1.000, 1.000], mean action: 3.295 [0.000, 6.000], mean observation: 172.628 [0.000, 255.000], loss: 0.284055, mean_absolute_error: 21.094707, mean_q: 24.727169, mean_eps: 0.473480
 585424/1000000: episode: 2695, duration: 16.673s, episode steps: 297, steps per second: 18, episode reward: 253.900, mean reward: 0.855 [-1.000, 1.000], mean action: 3.758 [0.000, 6.000], mean observation: 172.978 [0.000, 255.000], loss: 0.320474, mean_absolute_error: 22.289012, mean_q: 26.132675, mean_eps: 0.473253
 585702/1000000: episode: 2696, duration: 1

 591745/1000000: episode: 2719, duration: 14.053s, episode steps: 267, steps per second: 19, episode reward: 194.000, mean reward: 0.727 [-1.000, 1.000], mean action: 3.517 [0.000, 6.000], mean observation: 173.033 [0.000, 255.000], loss: 0.287288, mean_absolute_error: 21.957766, mean_q: 25.753277, mean_eps: 0.467549
 592055/1000000: episode: 2720, duration: 17.671s, episode steps: 310, steps per second: 18, episode reward: 250.300, mean reward: 0.807 [-1.000, 1.000], mean action: 3.761 [0.000, 6.000], mean observation: 172.212 [0.000, 255.000], loss: 0.282149, mean_absolute_error: 22.233934, mean_q: 26.044006, mean_eps: 0.467290
 592337/1000000: episode: 2721, duration: 15.462s, episode steps: 282, steps per second: 18, episode reward: 218.600, mean reward: 0.775 [-1.000, 1.000], mean action: 3.511 [0.000, 6.000], mean observation: 173.519 [0.000, 255.000], loss: 0.286836, mean_absolute_error: 22.405027, mean_q: 26.244632, mean_eps: 0.467024
 592639/1000000: episode: 2722, duration: 1

 598518/1000000: episode: 2745, duration: 9.412s, episode steps: 192, steps per second: 20, episode reward: 138.200, mean reward: 0.720 [-1.000, 1.000], mean action: 3.146 [0.000, 6.000], mean observation: 170.728 [0.000, 255.000], loss: 0.269352, mean_absolute_error: 22.419921, mean_q: 26.250371, mean_eps: 0.461420
 598801/1000000: episode: 2746, duration: 15.462s, episode steps: 283, steps per second: 18, episode reward: 237.700, mean reward: 0.840 [-1.000, 1.000], mean action: 3.435 [0.000, 6.000], mean observation: 172.289 [0.000, 255.000], loss: 0.280684, mean_absolute_error: 22.467370, mean_q: 26.300633, mean_eps: 0.461206
 599100/1000000: episode: 2747, duration: 16.617s, episode steps: 299, steps per second: 18, episode reward: 189.100, mean reward: 0.632 [-1.000, 1.000], mean action: 3.712 [0.000, 6.000], mean observation: 172.164 [0.000, 255.000], loss: 0.279199, mean_absolute_error: 22.165790, mean_q: 25.962474, mean_eps: 0.460945
 599287/1000000: episode: 2748, duration: 9.

 605377/1000000: episode: 2771, duration: 15.853s, episode steps: 289, steps per second: 18, episode reward: 214.300, mean reward: 0.742 [-1.000, 1.000], mean action: 3.720 [0.000, 6.000], mean observation: 172.682 [0.000, 255.000], loss: 0.291324, mean_absolute_error: 22.582287, mean_q: 26.419321, mean_eps: 0.455291
 605681/1000000: episode: 2772, duration: 17.024s, episode steps: 304, steps per second: 18, episode reward: 198.300, mean reward: 0.652 [-1.000, 1.000], mean action: 3.747 [0.000, 6.000], mean observation: 173.197 [0.000, 255.000], loss: 0.302737, mean_absolute_error: 22.790162, mean_q: 26.682527, mean_eps: 0.455023
 605962/1000000: episode: 2773, duration: 15.225s, episode steps: 281, steps per second: 18, episode reward: 224.600, mean reward: 0.799 [-1.000, 1.000], mean action: 3.459 [0.000, 6.000], mean observation: 173.767 [0.000, 255.000], loss: 0.303445, mean_absolute_error: 23.022437, mean_q: 26.976927, mean_eps: 0.454760
 606245/1000000: episode: 2774, duration: 1

 612451/1000000: episode: 2797, duration: 12.593s, episode steps: 241, steps per second: 19, episode reward: 170.500, mean reward: 0.707 [-1.000, 1.000], mean action: 3.519 [0.000, 6.000], mean observation: 173.353 [0.000, 255.000], loss: 0.291266, mean_absolute_error: 22.749135, mean_q: 26.634948, mean_eps: 0.448903
 612732/1000000: episode: 2798, duration: 15.420s, episode steps: 281, steps per second: 18, episode reward: 222.100, mean reward: 0.790 [-1.000, 1.000], mean action: 3.680 [0.000, 6.000], mean observation: 173.284 [0.000, 255.000], loss: 0.290401, mean_absolute_error: 22.876635, mean_q: 26.788349, mean_eps: 0.448669
 612948/1000000: episode: 2799, duration: 11.050s, episode steps: 216, steps per second: 20, episode reward: 147.300, mean reward: 0.682 [-1.000, 1.000], mean action: 3.139 [0.000, 6.000], mean observation: 172.790 [0.000, 255.000], loss: 0.305129, mean_absolute_error: 22.736105, mean_q: 26.627760, mean_eps: 0.448446
 613205/1000000: episode: 2800, duration: 1

 619408/1000000: episode: 2823, duration: 10.749s, episode steps: 212, steps per second: 20, episode reward: 151.700, mean reward: 0.716 [-1.000, 1.000], mean action: 3.217 [0.000, 6.000], mean observation: 172.168 [0.000, 255.000], loss: 0.259778, mean_absolute_error: 23.048631, mean_q: 26.969802, mean_eps: 0.442630
 619590/1000000: episode: 2824, duration: 8.795s, episode steps: 182, steps per second: 21, episode reward: 131.300, mean reward: 0.721 [-1.000, 1.000], mean action: 2.967 [0.000, 6.000], mean observation: 171.438 [0.000, 255.000], loss: 0.298294, mean_absolute_error: 22.921094, mean_q: 26.821605, mean_eps: 0.442452
 619948/1000000: episode: 2825, duration: 21.036s, episode steps: 358, steps per second: 17, episode reward: 265.800, mean reward: 0.742 [-1.000, 1.000], mean action: 3.760 [0.000, 6.000], mean observation: 166.498 [0.000, 255.000], loss: 0.297364, mean_absolute_error: 22.645660, mean_q: 26.499522, mean_eps: 0.442209
 620239/1000000: episode: 2826, duration: 16

 626901/1000000: episode: 2849, duration: 16.968s, episode steps: 304, steps per second: 18, episode reward: 204.600, mean reward: 0.673 [-1.000, 1.000], mean action: 3.691 [0.000, 6.000], mean observation: 172.614 [0.000, 255.000], loss: 0.290235, mean_absolute_error: 22.951073, mean_q: 26.858765, mean_eps: 0.435925
 627137/1000000: episode: 2850, duration: 12.390s, episode steps: 236, steps per second: 19, episode reward: 189.500, mean reward: 0.803 [-1.000, 1.000], mean action: 3.305 [0.000, 6.000], mean observation: 172.823 [0.000, 255.000], loss: 0.288735, mean_absolute_error: 23.299079, mean_q: 27.288591, mean_eps: 0.435682
 627418/1000000: episode: 2851, duration: 15.490s, episode steps: 281, steps per second: 18, episode reward: 227.300, mean reward: 0.809 [-1.000, 1.000], mean action: 3.584 [0.000, 6.000], mean observation: 173.027 [0.000, 255.000], loss: 0.310686, mean_absolute_error: 22.986392, mean_q: 26.920872, mean_eps: 0.435450
 627721/1000000: episode: 2852, duration: 1

 634056/1000000: episode: 2875, duration: 12.116s, episode steps: 231, steps per second: 19, episode reward: 137.500, mean reward: 0.595 [-1.000, 1.000], mean action: 3.494 [0.000, 6.000], mean observation: 172.830 [0.000, 255.000], loss: 0.323683, mean_absolute_error: 23.900298, mean_q: 27.987988, mean_eps: 0.429454
 634362/1000000: episode: 2876, duration: 17.262s, episode steps: 306, steps per second: 18, episode reward: 195.300, mean reward: 0.638 [-1.000, 1.000], mean action: 3.686 [0.000, 6.000], mean observation: 172.418 [0.000, 255.000], loss: 0.334709, mean_absolute_error: 24.070061, mean_q: 28.201068, mean_eps: 0.429213
 634630/1000000: episode: 2877, duration: 14.455s, episode steps: 268, steps per second: 19, episode reward: 205.200, mean reward: 0.766 [-1.000, 1.000], mean action: 3.369 [0.000, 6.000], mean observation: 173.441 [0.000, 255.000], loss: 0.349787, mean_absolute_error: 23.976434, mean_q: 28.069461, mean_eps: 0.428954
 634862/1000000: episode: 2878, duration: 1

 641401/1000000: episode: 2901, duration: 15.766s, episode steps: 283, steps per second: 18, episode reward: 231.400, mean reward: 0.818 [-1.000, 1.000], mean action: 3.473 [0.000, 6.000], mean observation: 173.217 [0.000, 255.000], loss: 0.351354, mean_absolute_error: 24.186405, mean_q: 28.310595, mean_eps: 0.422866
 641708/1000000: episode: 2902, duration: 17.255s, episode steps: 307, steps per second: 18, episode reward: 265.000, mean reward: 0.863 [-1.000, 1.000], mean action: 3.720 [0.000, 6.000], mean observation: 172.555 [0.000, 255.000], loss: 0.370272, mean_absolute_error: 24.101325, mean_q: 28.206886, mean_eps: 0.422601
 642007/1000000: episode: 2903, duration: 16.885s, episode steps: 299, steps per second: 18, episode reward: 245.800, mean reward: 0.822 [-1.000, 1.000], mean action: 3.763 [0.000, 6.000], mean observation: 172.984 [0.000, 255.000], loss: 0.366780, mean_absolute_error: 24.201072, mean_q: 28.342854, mean_eps: 0.422330
 642288/1000000: episode: 2904, duration: 1

 648414/1000000: episode: 2927, duration: 12.711s, episode steps: 244, steps per second: 19, episode reward: 177.100, mean reward: 0.726 [-1.000, 1.000], mean action: 3.389 [0.000, 6.000], mean observation: 172.847 [0.000, 255.000], loss: 0.380396, mean_absolute_error: 23.881693, mean_q: 27.969975, mean_eps: 0.416537
 648705/1000000: episode: 2928, duration: 15.967s, episode steps: 291, steps per second: 18, episode reward: 228.600, mean reward: 0.786 [-1.000, 1.000], mean action: 3.711 [0.000, 6.000], mean observation: 172.072 [0.000, 255.000], loss: 0.359238, mean_absolute_error: 24.435425, mean_q: 28.620206, mean_eps: 0.416296
 648918/1000000: episode: 2929, duration: 10.908s, episode steps: 213, steps per second: 20, episode reward: 155.600, mean reward: 0.731 [-1.000, 1.000], mean action: 3.390 [0.000, 6.000], mean observation: 171.813 [0.000, 255.000], loss: 0.396055, mean_absolute_error: 24.085467, mean_q: 28.193827, mean_eps: 0.416069
 649197/1000000: episode: 2930, duration: 1

 655915/1000000: episode: 2953, duration: 11.049s, episode steps: 215, steps per second: 19, episode reward: 160.300, mean reward: 0.746 [-1.000, 1.000], mean action: 3.205 [0.000, 6.000], mean observation: 172.656 [0.000, 255.000], loss: 0.343222, mean_absolute_error: 24.365173, mean_q: 28.521856, mean_eps: 0.409775
 656202/1000000: episode: 2954, duration: 15.803s, episode steps: 287, steps per second: 18, episode reward: 200.400, mean reward: 0.698 [-1.000, 1.000], mean action: 3.571 [0.000, 6.000], mean observation: 172.833 [0.000, 255.000], loss: 0.375579, mean_absolute_error: 24.686144, mean_q: 28.955771, mean_eps: 0.409548
 656513/1000000: episode: 2955, duration: 17.747s, episode steps: 311, steps per second: 18, episode reward: 262.400, mean reward: 0.844 [-1.000, 1.000], mean action: 3.688 [0.000, 6.000], mean observation: 172.411 [0.000, 255.000], loss: 0.368965, mean_absolute_error: 24.387929, mean_q: 28.545968, mean_eps: 0.409278
 656862/1000000: episode: 2956, duration: 2

 663712/1000000: episode: 2979, duration: 15.311s, episode steps: 279, steps per second: 18, episode reward: 224.100, mean reward: 0.803 [-1.000, 1.000], mean action: 3.552 [0.000, 6.000], mean observation: 173.538 [0.000, 255.000], loss: 0.387860, mean_absolute_error: 25.598914, mean_q: 29.981851, mean_eps: 0.402785
 664026/1000000: episode: 2980, duration: 17.991s, episode steps: 314, steps per second: 17, episode reward: 230.300, mean reward: 0.733 [-1.000, 1.000], mean action: 3.745 [0.000, 6.000], mean observation: 172.147 [0.000, 255.000], loss: 0.352371, mean_absolute_error: 25.175775, mean_q: 29.484125, mean_eps: 0.402519
 664337/1000000: episode: 2981, duration: 17.600s, episode steps: 311, steps per second: 18, episode reward: 223.300, mean reward: 0.718 [-1.000, 1.000], mean action: 3.820 [0.000, 6.000], mean observation: 172.376 [0.000, 255.000], loss: 0.366658, mean_absolute_error: 25.648801, mean_q: 30.065958, mean_eps: 0.402236
 664601/1000000: episode: 2982, duration: 1

 671487/1000000: episode: 3005, duration: 17.004s, episode steps: 300, steps per second: 18, episode reward: 247.600, mean reward: 0.825 [-1.000, 1.000], mean action: 3.613 [0.000, 6.000], mean observation: 172.229 [0.000, 255.000], loss: 0.367125, mean_absolute_error: 25.796226, mean_q: 30.226054, mean_eps: 0.395798
 671767/1000000: episode: 3006, duration: 15.495s, episode steps: 280, steps per second: 18, episode reward: 223.200, mean reward: 0.797 [-1.000, 1.000], mean action: 3.571 [0.000, 6.000], mean observation: 172.639 [0.000, 255.000], loss: 0.360041, mean_absolute_error: 25.575019, mean_q: 29.960466, mean_eps: 0.395537
 672047/1000000: episode: 3007, duration: 15.147s, episode steps: 280, steps per second: 18, episode reward: 216.300, mean reward: 0.773 [-1.000, 1.000], mean action: 3.529 [0.000, 6.000], mean observation: 172.946 [0.000, 255.000], loss: 0.381683, mean_absolute_error: 26.008425, mean_q: 30.429708, mean_eps: 0.395285
 672237/1000000: episode: 3008, duration: 9

 679346/1000000: episode: 3031, duration: 20.002s, episode steps: 339, steps per second: 17, episode reward: 218.300, mean reward: 0.644 [-1.000, 1.000], mean action: 3.676 [0.000, 6.000], mean observation: 166.839 [0.000, 255.000], loss: 0.377587, mean_absolute_error: 26.109824, mean_q: 30.593966, mean_eps: 0.388742
 679650/1000000: episode: 3032, duration: 17.202s, episode steps: 304, steps per second: 18, episode reward: 213.000, mean reward: 0.701 [-1.000, 1.000], mean action: 3.681 [0.000, 6.000], mean observation: 172.752 [0.000, 255.000], loss: 0.362318, mean_absolute_error: 24.907180, mean_q: 29.149040, mean_eps: 0.388452
 679996/1000000: episode: 3033, duration: 20.294s, episode steps: 346, steps per second: 17, episode reward: 256.900, mean reward: 0.742 [-1.000, 1.000], mean action: 3.893 [0.000, 6.000], mean observation: 166.655 [0.000, 255.000], loss: 0.331556, mean_absolute_error: 25.271420, mean_q: 29.580218, mean_eps: 0.388160
 680264/1000000: episode: 3034, duration: 1

 687591/1000000: episode: 3057, duration: 20.314s, episode steps: 350, steps per second: 17, episode reward: 270.800, mean reward: 0.774 [-1.000, 1.000], mean action: 3.840 [0.000, 6.000], mean observation: 166.180 [0.000, 255.000], loss: 0.389834, mean_absolute_error: 26.212936, mean_q: 30.707624, mean_eps: 0.381326
 687940/1000000: episode: 3058, duration: 20.589s, episode steps: 349, steps per second: 17, episode reward: 264.600, mean reward: 0.758 [-1.000, 1.000], mean action: 3.831 [0.000, 6.000], mean observation: 166.178 [0.000, 255.000], loss: 0.373650, mean_absolute_error: 25.921622, mean_q: 30.347351, mean_eps: 0.381012
 688202/1000000: episode: 3059, duration: 13.927s, episode steps: 262, steps per second: 19, episode reward: 181.300, mean reward: 0.692 [-1.000, 1.000], mean action: 3.385 [0.000, 6.000], mean observation: 173.809 [0.000, 255.000], loss: 0.356472, mean_absolute_error: 26.043696, mean_q: 30.456354, mean_eps: 0.380737
 688541/1000000: episode: 3060, duration: 1

 695702/1000000: episode: 3083, duration: 15.073s, episode steps: 277, steps per second: 18, episode reward: 213.700, mean reward: 0.771 [-1.000, 1.000], mean action: 3.433 [0.000, 6.000], mean observation: 173.073 [0.000, 255.000], loss: 0.368710, mean_absolute_error: 26.052964, mean_q: 30.506370, mean_eps: 0.373992
 696045/1000000: episode: 3084, duration: 20.092s, episode steps: 343, steps per second: 17, episode reward: 287.300, mean reward: 0.838 [-1.000, 1.000], mean action: 3.834 [0.000, 6.000], mean observation: 165.632 [0.000, 255.000], loss: 0.370782, mean_absolute_error: 26.101444, mean_q: 30.551021, mean_eps: 0.373713
 696306/1000000: episode: 3085, duration: 13.777s, episode steps: 261, steps per second: 19, episode reward: 200.100, mean reward: 0.767 [-1.000, 1.000], mean action: 3.605 [0.000, 6.000], mean observation: 172.835 [0.000, 255.000], loss: 0.405576, mean_absolute_error: 26.393428, mean_q: 30.884399, mean_eps: 0.373442
 696490/1000000: episode: 3086, duration: 8

 703625/1000000: episode: 3109, duration: 20.162s, episode steps: 343, steps per second: 17, episode reward: 292.900, mean reward: 0.854 [-1.000, 1.000], mean action: 3.641 [0.000, 6.000], mean observation: 165.665 [0.000, 255.000], loss: 0.400667, mean_absolute_error: 27.196424, mean_q: 31.826584, mean_eps: 0.366891
 703919/1000000: episode: 3110, duration: 16.473s, episode steps: 294, steps per second: 18, episode reward: 246.900, mean reward: 0.840 [-1.000, 1.000], mean action: 3.514 [0.000, 6.000], mean observation: 173.143 [0.000, 255.000], loss: 0.387791, mean_absolute_error: 27.052227, mean_q: 31.657720, mean_eps: 0.366605
 704265/1000000: episode: 3111, duration: 20.373s, episode steps: 346, steps per second: 17, episode reward: 262.900, mean reward: 0.760 [-1.000, 1.000], mean action: 3.780 [0.000, 6.000], mean observation: 165.783 [0.000, 255.000], loss: 0.409715, mean_absolute_error: 27.022363, mean_q: 31.639147, mean_eps: 0.366317
 704607/1000000: episode: 3112, duration: 2

 712089/1000000: episode: 3135, duration: 16.913s, episode steps: 300, steps per second: 18, episode reward: 246.500, mean reward: 0.822 [-1.000, 1.000], mean action: 3.643 [0.000, 6.000], mean observation: 172.863 [0.000, 255.000], loss: 0.385985, mean_absolute_error: 27.314563, mean_q: 31.984110, mean_eps: 0.359254
 712386/1000000: episode: 3136, duration: 16.679s, episode steps: 297, steps per second: 18, episode reward: 231.400, mean reward: 0.779 [-1.000, 1.000], mean action: 3.636 [0.000, 6.000], mean observation: 173.120 [0.000, 255.000], loss: 0.395244, mean_absolute_error: 27.201538, mean_q: 31.847166, mean_eps: 0.358986
 712665/1000000: episode: 3137, duration: 15.387s, episode steps: 279, steps per second: 18, episode reward: 221.200, mean reward: 0.793 [-1.000, 1.000], mean action: 3.520 [0.000, 6.000], mean observation: 173.413 [0.000, 255.000], loss: 0.394852, mean_absolute_error: 27.065019, mean_q: 31.706649, mean_eps: 0.358727
 713019/1000000: episode: 3138, duration: 2

 722161/1000000: episode: 3161, duration: 16.188s, episode steps: 389, steps per second: 24, episode reward: 292.900, mean reward: 0.753 [-1.000, 1.000], mean action: 3.684 [0.000, 6.000], mean observation: 172.324 [0.000, 255.000], loss: 0.374537, mean_absolute_error: 27.187706, mean_q: 31.808648, mean_eps: 0.350231
 722634/1000000: episode: 3162, duration: 20.728s, episode steps: 473, steps per second: 23, episode reward: 334.600, mean reward: 0.707 [-1.000, 1.000], mean action: 3.748 [0.000, 6.000], mean observation: 164.604 [0.000, 255.000], loss: 0.374196, mean_absolute_error: 27.027097, mean_q: 31.652778, mean_eps: 0.349842
 723007/1000000: episode: 3163, duration: 15.204s, episode steps: 373, steps per second: 25, episode reward: 236.700, mean reward: 0.635 [-1.000, 1.000], mean action: 3.550 [0.000, 6.000], mean observation: 173.181 [0.000, 255.000], loss: 0.398006, mean_absolute_error: 27.179011, mean_q: 31.833346, mean_eps: 0.349462
 723334/1000000: episode: 3164, duration: 1

 733240/1000000: episode: 3187, duration: 17.461s, episode steps: 408, steps per second: 23, episode reward: 340.100, mean reward: 0.834 [-1.000, 1.000], mean action: 3.775 [0.000, 6.000], mean observation: 172.295 [0.000, 255.000], loss: 0.387845, mean_absolute_error: 27.474212, mean_q: 32.173022, mean_eps: 0.340269
 733691/1000000: episode: 3188, duration: 20.697s, episode steps: 451, steps per second: 22, episode reward: 353.000, mean reward: 0.783 [-1.000, 1.000], mean action: 3.754 [0.000, 6.000], mean observation: 165.280 [0.000, 255.000], loss: 0.369504, mean_absolute_error: 27.589996, mean_q: 32.314099, mean_eps: 0.339882
 734158/1000000: episode: 3189, duration: 20.438s, episode steps: 467, steps per second: 23, episode reward: 399.000, mean reward: 0.854 [0.500, 1.000], mean action: 3.717 [0.000, 6.000], mean observation: 165.987 [0.000, 255.000], loss: 0.372250, mean_absolute_error: 27.648046, mean_q: 32.373201, mean_eps: 0.339468
 734561/1000000: episode: 3190, duration: 16

 744356/1000000: episode: 3213, duration: 20.450s, episode steps: 466, steps per second: 23, episode reward: 371.000, mean reward: 0.796 [0.500, 1.000], mean action: 3.655 [0.000, 6.000], mean observation: 165.919 [0.000, 255.000], loss: 0.362913, mean_absolute_error: 27.676424, mean_q: 32.398898, mean_eps: 0.330290
 744827/1000000: episode: 3214, duration: 20.469s, episode steps: 471, steps per second: 23, episode reward: 373.700, mean reward: 0.793 [0.100, 1.000], mean action: 3.652 [0.000, 6.000], mean observation: 165.835 [0.000, 255.000], loss: 0.371047, mean_absolute_error: 27.868144, mean_q: 32.628261, mean_eps: 0.329869
 745301/1000000: episode: 3215, duration: 20.472s, episode steps: 474, steps per second: 23, episode reward: 418.500, mean reward: 0.883 [0.500, 1.000], mean action: 3.540 [0.000, 6.000], mean observation: 165.315 [0.000, 255.000], loss: 0.375165, mean_absolute_error: 27.962522, mean_q: 32.768117, mean_eps: 0.329442
 745781/1000000: episode: 3216, duration: 21.1

 756058/1000000: episode: 3239, duration: 20.414s, episode steps: 466, steps per second: 23, episode reward: 412.500, mean reward: 0.885 [0.500, 1.000], mean action: 3.401 [0.000, 6.000], mean observation: 164.896 [0.000, 255.000], loss: 0.376772, mean_absolute_error: 28.218794, mean_q: 33.038893, mean_eps: 0.319758
 756530/1000000: episode: 3240, duration: 20.412s, episode steps: 472, steps per second: 23, episode reward: 405.500, mean reward: 0.859 [0.500, 1.000], mean action: 3.576 [0.000, 6.000], mean observation: 165.093 [0.000, 255.000], loss: 0.392486, mean_absolute_error: 28.352395, mean_q: 33.223162, mean_eps: 0.319335
 756906/1000000: episode: 3241, duration: 15.309s, episode steps: 376, steps per second: 25, episode reward: 303.900, mean reward: 0.808 [-1.000, 1.000], mean action: 3.644 [0.000, 6.000], mean observation: 172.997 [0.000, 255.000], loss: 0.413840, mean_absolute_error: 27.886355, mean_q: 32.664304, mean_eps: 0.318954
 757384/1000000: episode: 3242, duration: 20.

 767978/1000000: episode: 3265, duration: 20.558s, episode steps: 465, steps per second: 23, episode reward: 386.500, mean reward: 0.831 [0.500, 1.000], mean action: 3.677 [0.000, 6.000], mean observation: 165.817 [0.000, 255.000], loss: 0.383902, mean_absolute_error: 28.630064, mean_q: 33.508945, mean_eps: 0.309029
 768445/1000000: episode: 3266, duration: 20.413s, episode steps: 467, steps per second: 23, episode reward: 375.500, mean reward: 0.804 [0.500, 1.000], mean action: 3.600 [0.000, 6.000], mean observation: 165.148 [0.000, 255.000], loss: 0.402125, mean_absolute_error: 28.265387, mean_q: 33.100690, mean_eps: 0.308609
 768912/1000000: episode: 3267, duration: 20.473s, episode steps: 467, steps per second: 23, episode reward: 424.000, mean reward: 0.908 [0.500, 1.000], mean action: 3.610 [0.000, 6.000], mean observation: 165.687 [0.000, 255.000], loss: 0.383436, mean_absolute_error: 28.756914, mean_q: 33.674943, mean_eps: 0.308190
 769382/1000000: episode: 3268, duration: 20.5

 779958/1000000: episode: 3291, duration: 20.404s, episode steps: 468, steps per second: 23, episode reward: 376.000, mean reward: 0.803 [0.500, 1.000], mean action: 3.404 [0.000, 6.000], mean observation: 165.944 [0.000, 255.000], loss: 0.426707, mean_absolute_error: 29.100784, mean_q: 34.061735, mean_eps: 0.298248
 780432/1000000: episode: 3292, duration: 20.514s, episode steps: 474, steps per second: 23, episode reward: 378.500, mean reward: 0.799 [0.500, 1.000], mean action: 3.399 [0.000, 6.000], mean observation: 165.508 [0.000, 255.000], loss: 0.396091, mean_absolute_error: 29.213315, mean_q: 34.230978, mean_eps: 0.297825
 780902/1000000: episode: 3293, duration: 20.493s, episode steps: 470, steps per second: 23, episode reward: 421.000, mean reward: 0.896 [0.500, 1.000], mean action: 3.411 [0.000, 6.000], mean observation: 165.556 [0.000, 255.000], loss: 0.394577, mean_absolute_error: 29.170374, mean_q: 34.152006, mean_eps: 0.297401
 781366/1000000: episode: 3294, duration: 20.4

 791790/1000000: episode: 3317, duration: 20.484s, episode steps: 472, steps per second: 23, episode reward: 406.500, mean reward: 0.861 [0.500, 1.000], mean action: 3.441 [0.000, 6.000], mean observation: 165.714 [0.000, 255.000], loss: 0.373558, mean_absolute_error: 29.899232, mean_q: 35.017374, mean_eps: 0.287601
 792254/1000000: episode: 3318, duration: 20.463s, episode steps: 464, steps per second: 23, episode reward: 415.000, mean reward: 0.894 [0.500, 1.000], mean action: 3.403 [0.000, 6.000], mean observation: 165.520 [0.000, 255.000], loss: 0.399880, mean_absolute_error: 29.649220, mean_q: 34.721098, mean_eps: 0.287180
 792711/1000000: episode: 3319, duration: 20.562s, episode steps: 457, steps per second: 22, episode reward: 408.000, mean reward: 0.893 [0.500, 1.000], mean action: 3.567 [0.000, 6.000], mean observation: 165.401 [0.000, 255.000], loss: 0.409804, mean_absolute_error: 29.709831, mean_q: 34.819092, mean_eps: 0.286766
 793180/1000000: episode: 3320, duration: 20.4

<keras.callbacks.History at 0x20dbf537f48>

### continue training 

In [2]:
# def build_callbacks_cont(env_name):
#     checkpoint_weights_filename = join(log_dir, 'dqn_' + env_name + '_weights_500000_add_{step}.h5f')
#     log_filename = join(log_dir,'dqn_{}_log.json'.format(env_name))
#     callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=25000)]
#     callbacks += [FileLogger(log_filename, interval=100)]
#     return callbacks

# callbacks_cont = build_callbacks_cont('AirSimCarRL')

# dqn = DQNAgent(model=model, nb_actions=num_actions, policy=policy, memory=memory,
#                processor=processor, nb_steps_warmup=50000, gamma=.99, target_model_update=10000,
#                train_interval=4, delta_clip=1.)
# dqn.compile(Adam(lr=.0001), metrics=['mae'])

# weights_filename = join(log_dir,'dqn_{}_weights_{}.h5f'.format('AirSimCarRL', 500000))
# dqn.load_weights(weights_filename)

# dqn.fit(env, nb_steps=2000000,
#         visualize=False,
#         verbose=2,
#         callbacks=callbacks_cont)

### testing

In [None]:
# log_dir = 'logs'
# if not exists(log_dir):
#     os.makedirs(log_dir)
    
# weights_filename = join(log_dir,'dqn_{}_weights_{}.h5f'.format('AirSimCarRL', 1000000))
# dqn.load_weights(weights_filename)
# dqn.test(env, nb_episodes=10, visualize=True)