### this notebook is a copy of train.py 

In [1]:
from rl.callbacks import ModelIntervalCheckpoint, FileLogger
from rl.agents.dqn import DQNAgent
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy
from rl.memory import SequentialMemory
from rl.core import Processor
from keras.optimizers import Adam
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten, Convolution2D, Permute, Concatenate
from gym import spaces
import numpy as np
from PIL import Image
from configparser import ConfigParser
import os
from os.path import join, pardir, exists

from gym_airsim.airsim_car_env import AirSimCarEnv

import tensorflow as tf
from keras.backend.tensorflow_backend import set_session
config = tf.ConfigProto()
config.gpu_options.allow_growth = True  #dynamically grow the memory used on the GPU
set_session(tf.Session(config=config))

class AirSimCarProcessor(Processor):
    def process_observation(self, observation):
        assert observation.ndim == 3  # (height, width, channel)
        img = Image.fromarray(observation)
        img = img.resize(INPUT_SHAPE).convert('L')  # resize and convert to grayscale
        processed_observation = np.array(img)
        assert processed_observation.shape == INPUT_SHAPE
        return processed_observation.astype('uint8')  # saves storage in experience memory

    def process_state_batch(self, batch):
        # We could perform this processing step in `process_observation`. In this case, however,
        # we would need to store a `float32` array instead, which is 4x more memory intensive than
        # an `uint8` array. This matters if we store 1M observations.
        processed_batch = batch.astype('float32') / 255.
        return processed_batch

    def process_reward(self, reward):
        return np.clip(reward, -1., 1.)

config = ConfigParser()
config.read('config.ini')
num_actions = int(config['car_agent']['actions'])
                    
WINDOW_LENGTH = 4
INPUT_SHAPE = (84, 84)

env = AirSimCarEnv()
np.random.seed(123)

# Next, we build our model. We use the same model that was described by Mnih et al. (2015).
input_shape = (WINDOW_LENGTH,) + INPUT_SHAPE
model = Sequential()
model.add(Permute((2, 3, 1), input_shape=input_shape))
model.add(Convolution2D(32, (8, 8), strides=(4, 4)))
model.add(Activation('relu'))
model.add(Convolution2D(64, (4, 4), strides=(2, 2)))
model.add(Activation('relu'))
model.add(Convolution2D(64, (3, 3), strides=(1, 1)))
model.add(Activation('relu'))
model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dense(num_actions))
model.add(Activation('linear'))
print(model.summary())

log_dir = 'logs'
if not exists(log_dir):
    os.makedirs(log_dir)

memory = SequentialMemory(limit=50000, window_length=WINDOW_LENGTH)
policy = LinearAnnealedPolicy(EpsGreedyQPolicy(),  attr='eps', value_max=1., 
                              value_min=.1, value_test=.05, nb_steps=1000000)
processor = AirSimCarProcessor()

dqn = DQNAgent(model=model, nb_actions=num_actions, policy=policy, memory=memory,
               processor=processor, nb_steps_warmup=50000, gamma=.99, target_model_update=10000,
               train_interval=4, delta_clip=1.)
dqn.compile(Adam(lr=.0001), metrics=['mae'])

def build_callbacks(env_name):
    log_dir = 'logs'
    if not exists(log_dir):
        os.makedirs(log_dir)
    
    checkpoint_weights_filename = join(log_dir, 'dqn_' + env_name + '_weights_{step}.h5f')
    log_filename = join(log_dir,'dqn_{}_log.json'.format(env_name))
    callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=25000)]
    callbacks += [FileLogger(log_filename, interval=100)]
    return callbacks

callbacks = build_callbacks('AirSimCarRL')

dqn.fit(env, nb_steps=2000000,
        visualize=False,
        verbose=2,
        callbacks=callbacks)

# def build_callbacks_cont(env_name):
#     checkpoint_weights_filename = join(log_dir, 'dqn_' + env_name + '_weights_500000_add_{step}.h5f')
#     log_filename = join(log_dir,'dqn_{}_log.json'.format(env_name))
#     callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=25000)]
#     callbacks += [FileLogger(log_filename, interval=100)]
#     return callbacks

# callbacks_cont = build_callbacks_cont('AirSimCarRL')

# dqn = DQNAgent(model=model, nb_actions=num_actions, policy=policy, memory=memory,
#                processor=processor, nb_steps_warmup=50000, gamma=.99, target_model_update=10000,
#                train_interval=4, delta_clip=1.)
# dqn.compile(Adam(lr=.0001), metrics=['mae'])

# weights_filename = join(log_dir,'dqn_{}_weights_{}.h5f'.format('AirSimCarRL', 500000))
# dqn.load_weights(weights_filename)

# dqn.fit(env, nb_steps=2000000,
#         visualize=False,
#         verbose=2,
#         callbacks=callbacks_cont)

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


Connected!
Client Ver:1 (Min Req: 1), Server Ver:1 (Min Req: 1)

['WayPoint0', 'WayPoint1', 'WayPoint2', 'WayPoint3', 'WayPoint4', 'WayPoint5', 'WayPoint6', 'WayPoint7', 'WayPoint8', 'WayPoint9']



Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
permute_1 (Permute)          (None, 84, 84, 4)         0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 20, 20, 32)        8224      
_________________________________________________________________
activation_1 (Activation)    (None, 20, 20, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 9, 9, 64)          32832     
_________________________________________________________________
activation_2 (Activation)    (None, 9, 9, 64)          0         
_____________________________________________________

    5311/2000000: episode: 17, duration: 7.562s, episode steps: 273, steps per second: 36, episode reward: 113.400, mean reward: 0.415 [-1.000, 0.500], mean action: 2.945 [0.000, 6.000], mean observation: 172.384 [24.000, 255.000], loss: --, mean_absolute_error: --, mean_q: --, mean_eps: --
    5627/2000000: episode: 18, duration: 9.100s, episode steps: 316, steps per second: 35, episode reward: 179.200, mean reward: 0.567 [-1.000, 1.000], mean action: 3.035 [0.000, 6.000], mean observation: 171.963 [24.000, 255.000], loss: --, mean_absolute_error: --, mean_q: --, mean_eps: --
    5910/2000000: episode: 19, duration: 7.723s, episode steps: 283, steps per second: 37, episode reward: 105.200, mean reward: 0.372 [-1.000, 0.500], mean action: 2.979 [0.000, 6.000], mean observation: 172.880 [24.000, 255.000], loss: --, mean_absolute_error: --, mean_q: --, mean_eps: --
    6220/2000000: episode: 20, duration: 8.901s, episode steps: 310, steps per second: 35, episode reward: 183.700, mean rew

   14241/2000000: episode: 46, duration: 9.432s, episode steps: 316, steps per second: 34, episode reward: 157.000, mean reward: 0.497 [-1.000, 1.000], mean action: 3.051 [0.000, 6.000], mean observation: 172.107 [23.000, 255.000], loss: --, mean_absolute_error: --, mean_q: --, mean_eps: --
   14552/2000000: episode: 47, duration: 8.628s, episode steps: 311, steps per second: 36, episode reward: 116.000, mean reward: 0.373 [-1.000, 0.500], mean action: 3.000 [0.000, 6.000], mean observation: 172.861 [22.000, 255.000], loss: --, mean_absolute_error: --, mean_q: --, mean_eps: --
   14870/2000000: episode: 48, duration: 9.564s, episode steps: 318, steps per second: 33, episode reward: 174.900, mean reward: 0.550 [-1.000, 1.000], mean action: 3.019 [0.000, 6.000], mean observation: 172.196 [24.000, 255.000], loss: --, mean_absolute_error: --, mean_q: --, mean_eps: --
   15179/2000000: episode: 49, duration: 8.869s, episode steps: 309, steps per second: 35, episode reward: 143.800, mean rew

   23493/2000000: episode: 75, duration: 8.305s, episode steps: 296, steps per second: 36, episode reward: 121.300, mean reward: 0.410 [-1.000, 0.500], mean action: 2.889 [0.000, 6.000], mean observation: 171.197 [24.000, 255.000], loss: --, mean_absolute_error: --, mean_q: --, mean_eps: --
   23801/2000000: episode: 76, duration: 8.788s, episode steps: 308, steps per second: 35, episode reward: 131.000, mean reward: 0.425 [-1.000, 1.000], mean action: 3.042 [0.000, 6.000], mean observation: 171.047 [22.000, 255.000], loss: --, mean_absolute_error: --, mean_q: --, mean_eps: --
   24090/2000000: episode: 77, duration: 8.028s, episode steps: 289, steps per second: 36, episode reward: 120.600, mean reward: 0.417 [-1.000, 0.500], mean action: 2.837 [0.000, 6.000], mean observation: 170.951 [24.000, 255.000], loss: --, mean_absolute_error: --, mean_q: --, mean_eps: --
   24400/2000000: episode: 78, duration: 8.927s, episode steps: 310, steps per second: 35, episode reward: 138.600, mean rew

   32580/2000000: episode: 104, duration: 9.506s, episode steps: 318, steps per second: 33, episode reward: 152.100, mean reward: 0.478 [-1.000, 1.000], mean action: 3.107 [0.000, 6.000], mean observation: 172.152 [24.000, 255.000], loss: --, mean_absolute_error: --, mean_q: --, mean_eps: --
   32894/2000000: episode: 105, duration: 9.109s, episode steps: 314, steps per second: 34, episode reward: 147.400, mean reward: 0.469 [-1.000, 1.000], mean action: 3.099 [0.000, 6.000], mean observation: 172.204 [25.000, 255.000], loss: --, mean_absolute_error: --, mean_q: --, mean_eps: --
   33219/2000000: episode: 106, duration: 9.776s, episode steps: 325, steps per second: 33, episode reward: 201.800, mean reward: 0.621 [-1.000, 1.000], mean action: 2.969 [0.000, 6.000], mean observation: 172.455 [24.000, 255.000], loss: --, mean_absolute_error: --, mean_q: --, mean_eps: --
   33540/2000000: episode: 107, duration: 9.527s, episode steps: 321, steps per second: 34, episode reward: 203.200, mean

   41529/2000000: episode: 132, duration: 8.890s, episode steps: 310, steps per second: 35, episode reward: 200.800, mean reward: 0.648 [-1.000, 1.000], mean action: 2.852 [0.000, 6.000], mean observation: 171.476 [25.000, 255.000], loss: --, mean_absolute_error: --, mean_q: --, mean_eps: --
   41875/2000000: episode: 133, duration: 10.531s, episode steps: 346, steps per second: 33, episode reward: 245.100, mean reward: 0.708 [-1.000, 1.000], mean action: 3.179 [0.000, 6.000], mean observation: 172.954 [24.000, 255.000], loss: --, mean_absolute_error: --, mean_q: --, mean_eps: --
   42185/2000000: episode: 134, duration: 8.773s, episode steps: 310, steps per second: 35, episode reward: 123.100, mean reward: 0.397 [-1.000, 0.500], mean action: 2.942 [0.000, 6.000], mean observation: 172.085 [23.000, 255.000], loss: --, mean_absolute_error: --, mean_q: --, mean_eps: --
   42575/2000000: episode: 135, duration: 12.197s, episode steps: 390, steps per second: 32, episode reward: 213.100, me

   50290/2000000: episode: 160, duration: 6.784s, episode steps: 200, steps per second: 29, episode reward: 72.900, mean reward: 0.365 [-1.000, 0.500], mean action: 2.965 [0.000, 6.000], mean observation: 172.522 [25.000, 255.000], loss: 0.042876, mean_absolute_error: 0.396783, mean_q: 0.488627, mean_eps: 0.954829
   50558/2000000: episode: 161, duration: 10.069s, episode steps: 268, steps per second: 27, episode reward: 195.700, mean reward: 0.730 [-1.000, 1.000], mean action: 2.929 [0.000, 6.000], mean observation: 172.395 [25.000, 255.000], loss: 0.042390, mean_absolute_error: 0.478190, mean_q: 0.582479, mean_eps: 0.954618
   50805/2000000: episode: 162, duration: 8.634s, episode steps: 247, steps per second: 29, episode reward: 98.400, mean reward: 0.398 [-1.000, 0.500], mean action: 3.085 [0.000, 6.000], mean observation: 172.443 [21.000, 255.000], loss: 0.030791, mean_absolute_error: 0.491256, mean_q: 0.629130, mean_eps: 0.954386
   51050/2000000: episode: 163, duration: 8.668s, 

   56669/2000000: episode: 186, duration: 7.015s, episode steps: 207, steps per second: 30, episode reward: 73.200, mean reward: 0.354 [-1.000, 0.500], mean action: 2.787 [0.000, 6.000], mean observation: 172.448 [24.000, 255.000], loss: 0.013547, mean_absolute_error: 0.470631, mean_q: 0.575692, mean_eps: 0.949091
   56932/2000000: episode: 187, duration: 9.569s, episode steps: 263, steps per second: 27, episode reward: 177.000, mean reward: 0.673 [-1.000, 1.000], mean action: 2.970 [0.000, 6.000], mean observation: 171.558 [23.000, 255.000], loss: 0.012108, mean_absolute_error: 0.465357, mean_q: 0.558915, mean_eps: 0.948880
   57144/2000000: episode: 188, duration: 7.329s, episode steps: 212, steps per second: 29, episode reward: 93.800, mean reward: 0.442 [-1.000, 1.000], mean action: 2.981 [0.000, 6.000], mean observation: 172.000 [24.000, 255.000], loss: 0.009981, mean_absolute_error: 0.461910, mean_q: 0.564613, mean_eps: 0.948668
   57406/2000000: episode: 189, duration: 9.839s, e

   62998/2000000: episode: 212, duration: 10.530s, episode steps: 279, steps per second: 26, episode reward: 166.300, mean reward: 0.596 [-1.000, 1.000], mean action: 2.993 [0.000, 6.000], mean observation: 173.064 [24.000, 255.000], loss: 0.008741, mean_absolute_error: 0.867178, mean_q: 1.030330, mean_eps: 0.943428
   63233/2000000: episode: 213, duration: 8.194s, episode steps: 235, steps per second: 29, episode reward: 94.400, mean reward: 0.402 [-1.000, 0.500], mean action: 2.817 [0.000, 6.000], mean observation: 171.801 [24.000, 255.000], loss: 0.007327, mean_absolute_error: 0.878190, mean_q: 1.051248, mean_eps: 0.943196
   63516/2000000: episode: 214, duration: 10.767s, episode steps: 283, steps per second: 26, episode reward: 176.400, mean reward: 0.623 [-1.000, 1.000], mean action: 3.177 [0.000, 6.000], mean observation: 173.017 [24.000, 255.000], loss: 0.007093, mean_absolute_error: 0.888906, mean_q: 1.059207, mean_eps: 0.942963
   63766/2000000: episode: 215, duration: 8.971s

   69426/2000000: episode: 238, duration: 6.936s, episode steps: 202, steps per second: 29, episode reward: 80.700, mean reward: 0.400 [-1.000, 0.500], mean action: 2.807 [0.000, 6.000], mean observation: 171.453 [19.000, 255.000], loss: 0.007497, mean_absolute_error: 0.880717, mean_q: 1.047804, mean_eps: 0.937608
   69662/2000000: episode: 239, duration: 8.260s, episode steps: 236, steps per second: 29, episode reward: 94.900, mean reward: 0.402 [-1.000, 0.500], mean action: 3.004 [0.000, 6.000], mean observation: 171.568 [24.000, 255.000], loss: 0.007532, mean_absolute_error: 0.882792, mean_q: 1.049103, mean_eps: 0.937410
   69880/2000000: episode: 240, duration: 7.594s, episode steps: 218, steps per second: 29, episode reward: 91.900, mean reward: 0.422 [-1.000, 0.500], mean action: 2.807 [0.000, 6.000], mean observation: 171.256 [23.000, 255.000], loss: 0.007498, mean_absolute_error: 0.876816, mean_q: 1.042563, mean_eps: 0.937207
   70086/2000000: episode: 241, duration: 7.096s, ep

   75781/2000000: episode: 264, duration: 7.126s, episode steps: 209, steps per second: 29, episode reward: 84.200, mean reward: 0.403 [-1.000, 0.500], mean action: 2.689 [0.000, 6.000], mean observation: 172.588 [23.000, 255.000], loss: 0.007282, mean_absolute_error: 1.358859, mean_q: 1.614150, mean_eps: 0.931892
   76058/2000000: episode: 265, duration: 10.502s, episode steps: 277, steps per second: 26, episode reward: 192.900, mean reward: 0.696 [-1.000, 1.000], mean action: 2.924 [0.000, 6.000], mean observation: 173.121 [24.000, 255.000], loss: 0.008122, mean_absolute_error: 1.370500, mean_q: 1.624534, mean_eps: 0.931672
   76287/2000000: episode: 266, duration: 7.823s, episode steps: 229, steps per second: 29, episode reward: 77.400, mean reward: 0.338 [-1.000, 0.500], mean action: 2.943 [0.000, 6.000], mean observation: 172.730 [24.000, 255.000], loss: 0.007851, mean_absolute_error: 1.379968, mean_q: 1.640491, mean_eps: 0.931445
   76538/2000000: episode: 267, duration: 9.070s, 

   82342/2000000: episode: 290, duration: 8.753s, episode steps: 249, steps per second: 28, episode reward: 95.800, mean reward: 0.385 [-1.000, 0.500], mean action: 2.924 [0.000, 6.000], mean observation: 172.312 [25.000, 255.000], loss: 0.007941, mean_absolute_error: 1.788301, mean_q: 2.111458, mean_eps: 0.926004
   82627/2000000: episode: 291, duration: 10.883s, episode steps: 285, steps per second: 26, episode reward: 197.400, mean reward: 0.693 [-1.000, 1.000], mean action: 3.182 [0.000, 6.000], mean observation: 172.117 [24.000, 255.000], loss: 0.008380, mean_absolute_error: 1.778099, mean_q: 2.098153, mean_eps: 0.925764
   82882/2000000: episode: 292, duration: 9.368s, episode steps: 255, steps per second: 27, episode reward: 170.800, mean reward: 0.670 [-1.000, 1.000], mean action: 3.110 [0.000, 6.000], mean observation: 171.196 [24.000, 255.000], loss: 0.008305, mean_absolute_error: 1.763196, mean_q: 2.086595, mean_eps: 0.925521
   83108/2000000: episode: 293, duration: 7.934s,

   88890/2000000: episode: 316, duration: 9.518s, episode steps: 254, steps per second: 27, episode reward: 123.000, mean reward: 0.484 [-1.000, 1.000], mean action: 3.063 [0.000, 6.000], mean observation: 172.517 [24.000, 255.000], loss: 0.006465, mean_absolute_error: 1.749101, mean_q: 2.059884, mean_eps: 0.920114
   89139/2000000: episode: 317, duration: 8.888s, episode steps: 249, steps per second: 28, episode reward: 143.400, mean reward: 0.576 [-1.000, 1.000], mean action: 2.908 [0.000, 6.000], mean observation: 171.728 [24.000, 255.000], loss: 0.006606, mean_absolute_error: 1.795779, mean_q: 2.119639, mean_eps: 0.919887
   89390/2000000: episode: 318, duration: 9.114s, episode steps: 251, steps per second: 28, episode reward: 129.900, mean reward: 0.518 [-1.000, 1.000], mean action: 3.000 [0.000, 6.000], mean observation: 172.347 [24.000, 255.000], loss: 0.006291, mean_absolute_error: 1.754709, mean_q: 2.066702, mean_eps: 0.919662
   89636/2000000: episode: 319, duration: 8.795s,

   95318/2000000: episode: 342, duration: 9.198s, episode steps: 252, steps per second: 27, episode reward: 114.300, mean reward: 0.454 [-1.000, 1.000], mean action: 3.091 [0.000, 6.000], mean observation: 171.902 [25.000, 255.000], loss: 0.007002, mean_absolute_error: 2.188545, mean_q: 2.585343, mean_eps: 0.914327
   95519/2000000: episode: 343, duration: 6.750s, episode steps: 201, steps per second: 30, episode reward: 69.400, mean reward: 0.345 [-1.000, 0.500], mean action: 2.900 [0.000, 6.000], mean observation: 172.458 [24.000, 255.000], loss: 0.006942, mean_absolute_error: 2.154993, mean_q: 2.536192, mean_eps: 0.914124
   95753/2000000: episode: 344, duration: 8.146s, episode steps: 234, steps per second: 29, episode reward: 93.500, mean reward: 0.400 [-1.000, 0.500], mean action: 2.936 [0.000, 6.000], mean observation: 172.232 [24.000, 255.000], loss: 0.007160, mean_absolute_error: 2.133841, mean_q: 2.514335, mean_eps: 0.913928
   96035/2000000: episode: 345, duration: 10.744s, 

  101808/2000000: episode: 368, duration: 7.918s, episode steps: 226, steps per second: 29, episode reward: 91.100, mean reward: 0.403 [-1.000, 0.500], mean action: 2.903 [0.000, 6.000], mean observation: 171.536 [24.000, 255.000], loss: 0.009143, mean_absolute_error: 2.572245, mean_q: 3.024036, mean_eps: 0.908475
  102023/2000000: episode: 369, duration: 7.201s, episode steps: 215, steps per second: 30, episode reward: 68.800, mean reward: 0.320 [-1.000, 0.500], mean action: 3.033 [0.000, 6.000], mean observation: 172.038 [24.000, 255.000], loss: 0.008134, mean_absolute_error: 2.598777, mean_q: 3.048682, mean_eps: 0.908277
  102277/2000000: episode: 370, duration: 9.212s, episode steps: 254, steps per second: 28, episode reward: 105.100, mean reward: 0.414 [-1.000, 0.500], mean action: 3.146 [0.000, 6.000], mean observation: 172.043 [24.000, 255.000], loss: 0.007596, mean_absolute_error: 2.549120, mean_q: 2.997563, mean_eps: 0.908065
  102527/2000000: episode: 371, duration: 8.897s, e

  108143/2000000: episode: 394, duration: 8.765s, episode steps: 245, steps per second: 28, episode reward: 101.400, mean reward: 0.414 [-1.000, 0.500], mean action: 3.016 [0.000, 6.000], mean observation: 171.551 [23.000, 255.000], loss: 0.007066, mean_absolute_error: 2.573818, mean_q: 3.020204, mean_eps: 0.902782
  108398/2000000: episode: 395, duration: 9.358s, episode steps: 255, steps per second: 27, episode reward: 125.300, mean reward: 0.491 [-1.000, 1.000], mean action: 3.094 [0.000, 6.000], mean observation: 172.076 [24.000, 255.000], loss: 0.006428, mean_absolute_error: 2.561363, mean_q: 3.002400, mean_eps: 0.902557
  108649/2000000: episode: 396, duration: 9.035s, episode steps: 251, steps per second: 28, episode reward: 132.000, mean reward: 0.526 [-1.000, 1.000], mean action: 3.020 [0.000, 6.000], mean observation: 171.297 [23.000, 255.000], loss: 0.006850, mean_absolute_error: 2.540250, mean_q: 2.978912, mean_eps: 0.902328
  108898/2000000: episode: 397, duration: 8.644s,

  114565/2000000: episode: 420, duration: 7.030s, episode steps: 204, steps per second: 29, episode reward: 84.900, mean reward: 0.416 [-1.000, 0.500], mean action: 2.755 [0.000, 6.000], mean observation: 172.335 [25.000, 255.000], loss: 0.007730, mean_absolute_error: 2.979076, mean_q: 3.499260, mean_eps: 0.896982
  114794/2000000: episode: 421, duration: 7.933s, episode steps: 229, steps per second: 29, episode reward: 89.000, mean reward: 0.389 [-1.000, 0.500], mean action: 2.956 [0.000, 6.000], mean observation: 172.507 [24.000, 255.000], loss: 0.006704, mean_absolute_error: 2.948874, mean_q: 3.457564, mean_eps: 0.896788
  115090/2000000: episode: 422, duration: 11.386s, episode steps: 296, steps per second: 26, episode reward: 221.400, mean reward: 0.748 [-1.000, 1.000], mean action: 3.034 [0.000, 6.000], mean observation: 172.926 [24.000, 255.000], loss: 0.007379, mean_absolute_error: 2.966253, mean_q: 3.478952, mean_eps: 0.896552
  115308/2000000: episode: 423, duration: 7.549s, 

  121039/2000000: episode: 446, duration: 8.057s, episode steps: 229, steps per second: 28, episode reward: 95.400, mean reward: 0.417 [-1.000, 0.500], mean action: 2.808 [0.000, 6.000], mean observation: 171.208 [25.000, 255.000], loss: 0.007942, mean_absolute_error: 3.426123, mean_q: 4.012323, mean_eps: 0.891168
  121287/2000000: episode: 447, duration: 9.018s, episode steps: 248, steps per second: 28, episode reward: 120.200, mean reward: 0.485 [-1.000, 1.000], mean action: 2.919 [0.000, 6.000], mean observation: 171.458 [24.000, 255.000], loss: 0.009256, mean_absolute_error: 3.394635, mean_q: 3.978244, mean_eps: 0.890954
  121536/2000000: episode: 448, duration: 8.928s, episode steps: 249, steps per second: 28, episode reward: 104.300, mean reward: 0.419 [-1.000, 1.000], mean action: 3.145 [0.000, 6.000], mean observation: 171.440 [24.000, 255.000], loss: 0.008144, mean_absolute_error: 3.386309, mean_q: 3.967311, mean_eps: 0.890731
  121824/2000000: episode: 449, duration: 11.019s,

  127364/2000000: episode: 472, duration: 8.690s, episode steps: 248, steps per second: 29, episode reward: 76.500, mean reward: 0.308 [-1.000, 0.500], mean action: 2.899 [0.000, 6.000], mean observation: 173.031 [23.000, 255.000], loss: 0.008237, mean_absolute_error: 3.283738, mean_q: 3.843803, mean_eps: 0.885486
  127615/2000000: episode: 473, duration: 9.039s, episode steps: 251, steps per second: 28, episode reward: 160.900, mean reward: 0.641 [-1.000, 1.000], mean action: 2.876 [0.000, 6.000], mean observation: 171.606 [24.000, 255.000], loss: 0.006606, mean_absolute_error: 3.326457, mean_q: 3.904470, mean_eps: 0.885261
  127881/2000000: episode: 474, duration: 10.088s, episode steps: 266, steps per second: 26, episode reward: 157.000, mean reward: 0.590 [-1.000, 1.000], mean action: 2.996 [0.000, 6.000], mean observation: 173.253 [24.000, 255.000], loss: 0.006845, mean_absolute_error: 3.330324, mean_q: 3.895534, mean_eps: 0.885027
  128113/2000000: episode: 475, duration: 8.185s,

  133548/2000000: episode: 498, duration: 7.582s, episode steps: 191, steps per second: 25, episode reward: 108.500, mean reward: 0.568 [-1.000, 1.000], mean action: 3.215 [0.000, 6.000], mean observation: 172.843 [24.000, 255.000], loss: 0.009873, mean_absolute_error: 3.727258, mean_q: 4.364660, mean_eps: 0.879893
  133834/2000000: episode: 499, duration: 10.956s, episode steps: 286, steps per second: 26, episode reward: 196.900, mean reward: 0.688 [-1.000, 1.000], mean action: 3.115 [0.000, 6.000], mean observation: 172.565 [25.000, 255.000], loss: 0.007985, mean_absolute_error: 3.735006, mean_q: 4.371575, mean_eps: 0.879679
  134080/2000000: episode: 500, duration: 8.732s, episode steps: 246, steps per second: 28, episode reward: 102.800, mean reward: 0.418 [-1.000, 1.000], mean action: 2.898 [0.000, 6.000], mean observation: 172.205 [24.000, 255.000], loss: 0.008017, mean_absolute_error: 3.735133, mean_q: 4.377145, mean_eps: 0.879440
  134320/2000000: episode: 501, duration: 8.436s

  139859/2000000: episode: 524, duration: 7.238s, episode steps: 217, steps per second: 30, episode reward: 71.400, mean reward: 0.329 [-1.000, 0.500], mean action: 2.977 [0.000, 6.000], mean observation: 171.628 [23.000, 255.000], loss: 0.007476, mean_absolute_error: 3.676206, mean_q: 4.314828, mean_eps: 0.874225
  140082/2000000: episode: 525, duration: 7.785s, episode steps: 223, steps per second: 29, episode reward: 92.800, mean reward: 0.416 [-1.000, 0.500], mean action: 2.798 [0.000, 6.000], mean observation: 171.146 [24.000, 255.000], loss: 0.022033, mean_absolute_error: 3.862968, mean_q: 4.528318, mean_eps: 0.874027
  140332/2000000: episode: 526, duration: 8.788s, episode steps: 250, steps per second: 28, episode reward: 93.100, mean reward: 0.372 [-1.000, 0.500], mean action: 3.100 [0.000, 6.000], mean observation: 171.789 [23.000, 255.000], loss: 0.014172, mean_absolute_error: 4.132523, mean_q: 4.852550, mean_eps: 0.873815
  140572/2000000: episode: 527, duration: 8.323s, ep

  146170/2000000: episode: 550, duration: 8.808s, episode steps: 247, steps per second: 28, episode reward: 106.000, mean reward: 0.429 [-1.000, 0.500], mean action: 2.984 [0.000, 6.000], mean observation: 172.004 [24.000, 255.000], loss: 0.008558, mean_absolute_error: 4.139194, mean_q: 4.848891, mean_eps: 0.868559
  146420/2000000: episode: 551, duration: 8.964s, episode steps: 250, steps per second: 28, episode reward: 129.300, mean reward: 0.517 [-1.000, 1.000], mean action: 3.092 [0.000, 6.000], mean observation: 171.272 [23.000, 255.000], loss: 0.008365, mean_absolute_error: 4.211241, mean_q: 4.934277, mean_eps: 0.868335
  146673/2000000: episode: 552, duration: 9.123s, episode steps: 253, steps per second: 28, episode reward: 133.600, mean reward: 0.528 [-1.000, 1.000], mean action: 3.000 [0.000, 6.000], mean observation: 171.409 [24.000, 255.000], loss: 0.007875, mean_absolute_error: 4.099833, mean_q: 4.793957, mean_eps: 0.868109
  146914/2000000: episode: 553, duration: 8.514s,

  152678/2000000: episode: 576, duration: 9.947s, episode steps: 265, steps per second: 27, episode reward: 180.900, mean reward: 0.683 [-1.000, 1.000], mean action: 3.140 [0.000, 6.000], mean observation: 172.392 [21.000, 255.000], loss: 0.010144, mean_absolute_error: 4.564602, mean_q: 5.337723, mean_eps: 0.862709
  152928/2000000: episode: 577, duration: 9.101s, episode steps: 250, steps per second: 27, episode reward: 122.600, mean reward: 0.490 [-1.000, 1.000], mean action: 2.888 [0.000, 6.000], mean observation: 172.421 [24.000, 255.000], loss: 0.010873, mean_absolute_error: 4.710052, mean_q: 5.506902, mean_eps: 0.862478
  153195/2000000: episode: 578, duration: 10.093s, episode steps: 267, steps per second: 26, episode reward: 187.000, mean reward: 0.700 [-1.000, 1.000], mean action: 3.131 [0.000, 6.000], mean observation: 172.777 [24.000, 255.000], loss: 0.009941, mean_absolute_error: 4.668082, mean_q: 5.468372, mean_eps: 0.862246
  153480/2000000: episode: 579, duration: 10.955

  159476/2000000: episode: 602, duration: 8.693s, episode steps: 246, steps per second: 28, episode reward: 99.900, mean reward: 0.406 [-1.000, 0.500], mean action: 2.886 [0.000, 6.000], mean observation: 171.663 [23.000, 255.000], loss: 0.008390, mean_absolute_error: 4.623584, mean_q: 5.411893, mean_eps: 0.856583
  159790/2000000: episode: 603, duration: 12.298s, episode steps: 314, steps per second: 26, episode reward: 148.200, mean reward: 0.472 [-1.000, 1.000], mean action: 3.280 [0.000, 6.000], mean observation: 173.350 [21.000, 255.000], loss: 0.009234, mean_absolute_error: 4.646150, mean_q: 5.424285, mean_eps: 0.856331
  160074/2000000: episode: 604, duration: 10.644s, episode steps: 284, steps per second: 27, episode reward: 170.700, mean reward: 0.601 [-1.000, 1.000], mean action: 3.342 [0.000, 6.000], mean observation: 171.706 [23.000, 255.000], loss: 0.022390, mean_absolute_error: 4.692882, mean_q: 5.482770, mean_eps: 0.856061
  160350/2000000: episode: 605, duration: 10.370

  165846/2000000: episode: 628, duration: 10.032s, episode steps: 268, steps per second: 27, episode reward: 172.100, mean reward: 0.642 [-1.000, 1.000], mean action: 2.989 [0.000, 6.000], mean observation: 173.335 [23.000, 255.000], loss: 0.011363, mean_absolute_error: 5.001504, mean_q: 5.847563, mean_eps: 0.850859
  166126/2000000: episode: 629, duration: 10.614s, episode steps: 280, steps per second: 26, episode reward: 164.200, mean reward: 0.586 [-1.000, 1.000], mean action: 3.004 [0.000, 6.000], mean observation: 173.035 [24.000, 255.000], loss: 0.011853, mean_absolute_error: 5.009230, mean_q: 5.852277, mean_eps: 0.850613
  166380/2000000: episode: 630, duration: 9.343s, episode steps: 254, steps per second: 27, episode reward: 186.500, mean reward: 0.734 [-1.000, 1.000], mean action: 3.043 [0.000, 6.000], mean observation: 172.072 [24.000, 255.000], loss: 0.010325, mean_absolute_error: 5.120680, mean_q: 5.991822, mean_eps: 0.850373
  166690/2000000: episode: 631, duration: 12.09

  172354/2000000: episode: 654, duration: 10.626s, episode steps: 280, steps per second: 26, episode reward: 201.100, mean reward: 0.718 [-1.000, 1.000], mean action: 3.121 [0.000, 6.000], mean observation: 173.114 [24.000, 255.000], loss: 0.012773, mean_absolute_error: 5.382006, mean_q: 6.293419, mean_eps: 0.845007
  172609/2000000: episode: 655, duration: 9.379s, episode steps: 255, steps per second: 27, episode reward: 136.200, mean reward: 0.534 [-1.000, 1.000], mean action: 2.969 [0.000, 6.000], mean observation: 171.778 [24.000, 255.000], loss: 0.012036, mean_absolute_error: 5.346435, mean_q: 6.253328, mean_eps: 0.844766
  172863/2000000: episode: 656, duration: 8.936s, episode steps: 254, steps per second: 28, episode reward: 74.300, mean reward: 0.293 [-1.000, 0.500], mean action: 2.992 [0.000, 6.000], mean observation: 172.764 [25.000, 255.000], loss: 0.012391, mean_absolute_error: 5.369228, mean_q: 6.269170, mean_eps: 0.844538
  173114/2000000: episode: 657, duration: 9.086s,

  178947/2000000: episode: 680, duration: 8.416s, episode steps: 242, steps per second: 29, episode reward: 99.500, mean reward: 0.411 [-1.000, 0.500], mean action: 2.781 [0.000, 6.000], mean observation: 171.831 [25.000, 255.000], loss: 0.013439, mean_absolute_error: 5.428080, mean_q: 6.348907, mean_eps: 0.839057
  179205/2000000: episode: 681, duration: 9.452s, episode steps: 258, steps per second: 27, episode reward: 167.900, mean reward: 0.651 [-1.000, 1.000], mean action: 3.167 [0.000, 6.000], mean observation: 170.890 [24.000, 255.000], loss: 0.013990, mean_absolute_error: 5.322096, mean_q: 6.216038, mean_eps: 0.838832
  179406/2000000: episode: 682, duration: 6.904s, episode steps: 201, steps per second: 29, episode reward: 80.200, mean reward: 0.399 [-1.000, 0.500], mean action: 2.905 [0.000, 6.000], mean observation: 171.302 [24.000, 255.000], loss: 0.014796, mean_absolute_error: 5.405493, mean_q: 6.319936, mean_eps: 0.838625
  179658/2000000: episode: 683, duration: 8.974s, e

  185381/2000000: episode: 706, duration: 10.644s, episode steps: 280, steps per second: 26, episode reward: 170.600, mean reward: 0.609 [-1.000, 1.000], mean action: 3.143 [0.000, 6.000], mean observation: 172.931 [21.000, 255.000], loss: 0.015987, mean_absolute_error: 5.860912, mean_q: 6.852692, mean_eps: 0.833282
  185593/2000000: episode: 707, duration: 8.320s, episode steps: 212, steps per second: 25, episode reward: 108.200, mean reward: 0.510 [-1.000, 1.000], mean action: 3.340 [0.000, 6.000], mean observation: 172.928 [24.000, 255.000], loss: 0.013292, mean_absolute_error: 5.775025, mean_q: 6.748895, mean_eps: 0.833061
  185788/2000000: episode: 708, duration: 6.668s, episode steps: 195, steps per second: 29, episode reward: 70.800, mean reward: 0.363 [-1.000, 0.500], mean action: 2.779 [0.000, 6.000], mean observation: 172.831 [24.000, 255.000], loss: 0.019953, mean_absolute_error: 5.843306, mean_q: 6.821886, mean_eps: 0.832879
  186007/2000000: episode: 709, duration: 7.563s,

  191628/2000000: episode: 732, duration: 10.611s, episode steps: 283, steps per second: 27, episode reward: 149.100, mean reward: 0.527 [-1.000, 1.000], mean action: 3.064 [0.000, 6.000], mean observation: 172.630 [24.000, 255.000], loss: 0.021512, mean_absolute_error: 6.210774, mean_q: 7.269970, mean_eps: 0.827663
  191882/2000000: episode: 733, duration: 9.313s, episode steps: 254, steps per second: 27, episode reward: 125.600, mean reward: 0.494 [-1.000, 1.000], mean action: 2.953 [0.000, 6.000], mean observation: 171.937 [24.000, 255.000], loss: 0.019449, mean_absolute_error: 6.339170, mean_q: 7.424843, mean_eps: 0.827421
  192123/2000000: episode: 734, duration: 8.413s, episode steps: 241, steps per second: 29, episode reward: 98.600, mean reward: 0.409 [-1.000, 0.500], mean action: 2.913 [0.000, 6.000], mean observation: 172.485 [25.000, 255.000], loss: 0.020144, mean_absolute_error: 6.316121, mean_q: 7.383019, mean_eps: 0.827198
  192386/2000000: episode: 735, duration: 9.896s,

  198181/2000000: episode: 758, duration: 9.642s, episode steps: 250, steps per second: 26, episode reward: 121.300, mean reward: 0.485 [-1.000, 1.000], mean action: 3.116 [0.000, 6.000], mean observation: 172.149 [24.000, 255.000], loss: 0.017998, mean_absolute_error: 6.260537, mean_q: 7.310789, mean_eps: 0.821750
  198379/2000000: episode: 759, duration: 6.765s, episode steps: 198, steps per second: 29, episode reward: 73.900, mean reward: 0.373 [-1.000, 0.500], mean action: 2.768 [0.000, 6.000], mean observation: 172.609 [24.000, 255.000], loss: 0.017243, mean_absolute_error: 6.400150, mean_q: 7.495709, mean_eps: 0.821548
  198571/2000000: episode: 760, duration: 6.455s, episode steps: 192, steps per second: 30, episode reward: 61.700, mean reward: 0.321 [-1.000, 0.500], mean action: 2.760 [0.000, 6.000], mean observation: 173.312 [24.000, 255.000], loss: 0.020352, mean_absolute_error: 6.293842, mean_q: 7.354716, mean_eps: 0.821373
  198818/2000000: episode: 761, duration: 9.601s, e

  204601/2000000: episode: 784, duration: 9.882s, episode steps: 255, steps per second: 26, episode reward: 167.900, mean reward: 0.658 [-1.000, 1.000], mean action: 3.173 [0.000, 6.000], mean observation: 172.075 [24.000, 255.000], loss: 0.024184, mean_absolute_error: 6.823140, mean_q: 7.975423, mean_eps: 0.815973
  204849/2000000: episode: 785, duration: 9.381s, episode steps: 248, steps per second: 26, episode reward: 148.700, mean reward: 0.600 [-1.000, 1.000], mean action: 2.956 [0.000, 6.000], mean observation: 171.856 [23.000, 255.000], loss: 0.021854, mean_absolute_error: 6.785598, mean_q: 7.925518, mean_eps: 0.815747
  205072/2000000: episode: 786, duration: 7.925s, episode steps: 223, steps per second: 28, episode reward: 94.400, mean reward: 0.423 [-1.000, 0.500], mean action: 3.063 [0.000, 6.000], mean observation: 172.217 [23.000, 255.000], loss: 0.026897, mean_absolute_error: 6.866502, mean_q: 8.025454, mean_eps: 0.815536
  205346/2000000: episode: 787, duration: 10.632s,

  211178/2000000: episode: 810, duration: 11.217s, episode steps: 290, steps per second: 26, episode reward: 203.900, mean reward: 0.703 [-1.000, 1.000], mean action: 3.169 [0.000, 6.000], mean observation: 172.274 [24.000, 255.000], loss: 0.025420, mean_absolute_error: 7.152176, mean_q: 8.368780, mean_eps: 0.810071
  211428/2000000: episode: 811, duration: 9.075s, episode steps: 250, steps per second: 28, episode reward: 157.700, mean reward: 0.631 [-1.000, 1.000], mean action: 2.976 [0.000, 6.000], mean observation: 170.851 [24.000, 255.000], loss: 0.025782, mean_absolute_error: 7.034053, mean_q: 8.212449, mean_eps: 0.809828
  211678/2000000: episode: 812, duration: 9.123s, episode steps: 250, steps per second: 27, episode reward: 165.900, mean reward: 0.664 [-1.000, 1.000], mean action: 3.144 [0.000, 6.000], mean observation: 170.921 [24.000, 255.000], loss: 0.028965, mean_absolute_error: 7.208268, mean_q: 8.415497, mean_eps: 0.809603
  211933/2000000: episode: 813, duration: 9.336s

  217973/2000000: episode: 836, duration: 74.547s, episode steps: 263, steps per second: 4, episode reward: 190.300, mean reward: 0.724 [-1.000, 1.000], mean action: 2.833 [0.000, 6.000], mean observation: 174.825 [24.000, 255.000], loss: 0.024540, mean_absolute_error: 7.155634, mean_q: 8.359709, mean_eps: 0.803942
  218279/2000000: episode: 837, duration: 98.260s, episode steps: 306, steps per second: 3, episode reward: 142.400, mean reward: 0.465 [-1.000, 1.000], mean action: 2.843 [0.000, 6.000], mean observation: 176.334 [25.000, 255.000], loss: 0.026593, mean_absolute_error: 7.365689, mean_q: 8.601116, mean_eps: 0.803687
  218495/2000000: episode: 838, duration: 8.782s, episode steps: 216, steps per second: 25, episode reward: 84.100, mean reward: 0.389 [-1.000, 0.500], mean action: 3.037 [0.000, 6.000], mean observation: 172.515 [24.000, 255.000], loss: 0.020894, mean_absolute_error: 6.983053, mean_q: 8.168962, mean_eps: 0.803453
  218794/2000000: episode: 839, duration: 11.850s,

  224574/2000000: episode: 862, duration: 9.109s, episode steps: 252, steps per second: 28, episode reward: 125.900, mean reward: 0.500 [-1.000, 1.000], mean action: 3.020 [0.000, 6.000], mean observation: 172.258 [24.000, 255.000], loss: 0.027604, mean_absolute_error: 7.525532, mean_q: 8.811425, mean_eps: 0.797997
  224831/2000000: episode: 863, duration: 9.453s, episode steps: 257, steps per second: 27, episode reward: 186.900, mean reward: 0.727 [-1.000, 1.000], mean action: 2.844 [0.000, 6.000], mean observation: 171.694 [23.000, 255.000], loss: 0.033764, mean_absolute_error: 7.566266, mean_q: 8.851133, mean_eps: 0.797768
  225084/2000000: episode: 864, duration: 9.728s, episode steps: 253, steps per second: 26, episode reward: 151.300, mean reward: 0.598 [-1.000, 1.000], mean action: 3.071 [0.000, 6.000], mean observation: 172.258 [24.000, 255.000], loss: 0.029457, mean_absolute_error: 7.614922, mean_q: 8.889498, mean_eps: 0.797540
  225367/2000000: episode: 865, duration: 10.865s

  231054/2000000: episode: 888, duration: 8.703s, episode steps: 244, steps per second: 28, episode reward: 137.500, mean reward: 0.564 [-1.000, 1.000], mean action: 2.955 [0.000, 6.000], mean observation: 171.203 [24.000, 255.000], loss: 0.033920, mean_absolute_error: 8.046097, mean_q: 9.405686, mean_eps: 0.792161
  231347/2000000: episode: 889, duration: 10.972s, episode steps: 293, steps per second: 27, episode reward: 126.200, mean reward: 0.431 [-1.000, 0.500], mean action: 3.116 [0.000, 6.000], mean observation: 172.496 [24.000, 255.000], loss: 0.039496, mean_absolute_error: 8.041105, mean_q: 9.399513, mean_eps: 0.791920
  231594/2000000: episode: 890, duration: 8.832s, episode steps: 247, steps per second: 28, episode reward: 130.200, mean reward: 0.527 [-1.000, 1.000], mean action: 2.935 [0.000, 6.000], mean observation: 171.086 [23.000, 255.000], loss: 0.035896, mean_absolute_error: 7.918229, mean_q: 9.254613, mean_eps: 0.791677
  231857/2000000: episode: 891, duration: 9.867s

  237806/2000000: episode: 914, duration: 11.076s, episode steps: 288, steps per second: 26, episode reward: 196.800, mean reward: 0.683 [-1.000, 1.000], mean action: 3.212 [0.000, 6.000], mean observation: 171.835 [24.000, 255.000], loss: 0.032333, mean_absolute_error: 7.998887, mean_q: 9.348995, mean_eps: 0.786104
  238070/2000000: episode: 915, duration: 9.830s, episode steps: 264, steps per second: 27, episode reward: 158.300, mean reward: 0.600 [-1.000, 1.000], mean action: 3.133 [0.000, 6.000], mean observation: 172.007 [24.000, 255.000], loss: 0.035233, mean_absolute_error: 8.109464, mean_q: 9.487122, mean_eps: 0.785856
  238355/2000000: episode: 916, duration: 10.796s, episode steps: 285, steps per second: 26, episode reward: 200.900, mean reward: 0.705 [-1.000, 1.000], mean action: 3.182 [0.000, 6.000], mean observation: 172.139 [24.000, 255.000], loss: 0.030592, mean_absolute_error: 8.037193, mean_q: 9.393259, mean_eps: 0.785609
  238619/2000000: episode: 917, duration: 9.702

  244558/2000000: episode: 940, duration: 7.948s, episode steps: 219, steps per second: 28, episode reward: 95.600, mean reward: 0.437 [-1.000, 1.000], mean action: 2.826 [0.000, 6.000], mean observation: 171.889 [24.000, 255.000], loss: 0.037195, mean_absolute_error: 8.517441, mean_q: 9.951127, mean_eps: 0.779997
  244850/2000000: episode: 941, duration: 11.074s, episode steps: 292, steps per second: 26, episode reward: 182.300, mean reward: 0.624 [-1.000, 1.000], mean action: 3.192 [0.000, 6.000], mean observation: 173.497 [22.000, 255.000], loss: 0.035014, mean_absolute_error: 8.790177, mean_q: 10.256199, mean_eps: 0.779766
  245100/2000000: episode: 942, duration: 9.562s, episode steps: 250, steps per second: 26, episode reward: 173.200, mean reward: 0.693 [-1.000, 1.000], mean action: 2.976 [0.000, 6.000], mean observation: 172.377 [24.000, 255.000], loss: 0.038454, mean_absolute_error: 8.861103, mean_q: 10.345149, mean_eps: 0.779523
  245358/2000000: episode: 943, duration: 9.755

  251095/2000000: episode: 966, duration: 8.989s, episode steps: 249, steps per second: 28, episode reward: 163.400, mean reward: 0.656 [-1.000, 1.000], mean action: 3.237 [0.000, 6.000], mean observation: 170.758 [25.000, 255.000], loss: 0.044603, mean_absolute_error: 9.095455, mean_q: 10.639079, mean_eps: 0.774127
  251356/2000000: episode: 967, duration: 9.896s, episode steps: 261, steps per second: 26, episode reward: 173.100, mean reward: 0.663 [-1.000, 1.000], mean action: 3.184 [0.000, 6.000], mean observation: 171.490 [24.000, 255.000], loss: 0.040206, mean_absolute_error: 9.231750, mean_q: 10.795252, mean_eps: 0.773898
  251591/2000000: episode: 968, duration: 8.049s, episode steps: 235, steps per second: 29, episode reward: 72.400, mean reward: 0.308 [-1.000, 0.500], mean action: 2.860 [0.000, 6.000], mean observation: 172.444 [23.000, 255.000], loss: 0.045531, mean_absolute_error: 9.245988, mean_q: 10.801260, mean_eps: 0.773675
  251877/2000000: episode: 969, duration: 10.80

  257612/2000000: episode: 992, duration: 8.669s, episode steps: 240, steps per second: 28, episode reward: 152.000, mean reward: 0.633 [-1.000, 1.000], mean action: 2.913 [0.000, 6.000], mean observation: 171.720 [24.000, 255.000], loss: 0.037425, mean_absolute_error: 8.988063, mean_q: 10.497750, mean_eps: 0.768259
  257894/2000000: episode: 993, duration: 10.806s, episode steps: 282, steps per second: 26, episode reward: 192.300, mean reward: 0.682 [-1.000, 1.000], mean action: 3.252 [0.000, 6.000], mean observation: 172.772 [24.000, 255.000], loss: 0.039240, mean_absolute_error: 9.079837, mean_q: 10.594860, mean_eps: 0.768023
  258148/2000000: episode: 994, duration: 9.570s, episode steps: 254, steps per second: 27, episode reward: 120.000, mean reward: 0.472 [-1.000, 1.000], mean action: 3.012 [0.000, 6.000], mean observation: 172.549 [24.000, 255.000], loss: 0.037669, mean_absolute_error: 9.219564, mean_q: 10.764686, mean_eps: 0.767782
  258429/2000000: episode: 995, duration: 10.

  264339/2000000: episode: 1018, duration: 11.211s, episode steps: 290, steps per second: 26, episode reward: 175.900, mean reward: 0.607 [-1.000, 1.000], mean action: 3.048 [0.000, 6.000], mean observation: 173.449 [24.000, 255.000], loss: 0.042496, mean_absolute_error: 9.484577, mean_q: 11.070338, mean_eps: 0.762225
  264576/2000000: episode: 1019, duration: 8.408s, episode steps: 237, steps per second: 28, episode reward: 99.800, mean reward: 0.421 [-1.000, 0.500], mean action: 2.945 [0.000, 6.000], mean observation: 171.965 [24.000, 255.000], loss: 0.042131, mean_absolute_error: 9.499712, mean_q: 11.091222, mean_eps: 0.761990
  264802/2000000: episode: 1020, duration: 10.026s, episode steps: 226, steps per second: 23, episode reward: 145.500, mean reward: 0.644 [-1.000, 1.000], mean action: 3.111 [0.000, 6.000], mean observation: 172.379 [24.000, 255.000], loss: 0.038584, mean_absolute_error: 9.363643, mean_q: 10.935456, mean_eps: 0.761781
  264962/2000000: episode: 1021, duration:

  270584/2000000: episode: 1044, duration: 5.100s, episode steps: 145, steps per second: 28, episode reward: 54.200, mean reward: 0.374 [-1.000, 0.500], mean action: 2.269 [0.000, 6.000], mean observation: 171.501 [24.000, 255.000], loss: 0.050698, mean_absolute_error: 9.725435, mean_q: 11.372896, mean_eps: 0.756541
  270837/2000000: episode: 1045, duration: 9.307s, episode steps: 253, steps per second: 27, episode reward: 151.200, mean reward: 0.598 [-1.000, 1.000], mean action: 3.245 [0.000, 6.000], mean observation: 170.263 [22.000, 255.000], loss: 0.048165, mean_absolute_error: 9.894369, mean_q: 11.559208, mean_eps: 0.756361
  271134/2000000: episode: 1046, duration: 11.367s, episode steps: 297, steps per second: 26, episode reward: 234.100, mean reward: 0.788 [-1.000, 1.000], mean action: 3.236 [0.000, 6.000], mean observation: 171.662 [24.000, 255.000], loss: 0.043878, mean_absolute_error: 9.693371, mean_q: 11.314323, mean_eps: 0.756113
  271405/2000000: episode: 1047, duration: 

  277153/2000000: episode: 1070, duration: 10.745s, episode steps: 283, steps per second: 26, episode reward: 181.600, mean reward: 0.642 [-1.000, 1.000], mean action: 3.085 [0.000, 6.000], mean observation: 173.442 [24.000, 255.000], loss: 0.050457, mean_absolute_error: 9.988284, mean_q: 11.670779, mean_eps: 0.750689
  277396/2000000: episode: 1071, duration: 8.771s, episode steps: 243, steps per second: 28, episode reward: 137.800, mean reward: 0.567 [-1.000, 1.000], mean action: 2.975 [0.000, 6.000], mean observation: 171.859 [25.000, 255.000], loss: 0.049682, mean_absolute_error: 10.034572, mean_q: 11.704389, mean_eps: 0.750453
  277563/2000000: episode: 1072, duration: 7.006s, episode steps: 167, steps per second: 24, episode reward: 88.800, mean reward: 0.532 [-1.000, 1.000], mean action: 3.263 [0.000, 6.000], mean observation: 173.349 [22.000, 255.000], loss: 0.047821, mean_absolute_error: 10.133456, mean_q: 11.822540, mean_eps: 0.750270
  277778/2000000: episode: 1073, duration

  283554/2000000: episode: 1096, duration: 8.211s, episode steps: 230, steps per second: 28, episode reward: 99.700, mean reward: 0.433 [-1.000, 1.000], mean action: 2.891 [0.000, 6.000], mean observation: 171.698 [21.000, 255.000], loss: 0.046853, mean_absolute_error: 10.429493, mean_q: 12.183919, mean_eps: 0.744906
  283707/2000000: episode: 1097, duration: 6.240s, episode steps: 153, steps per second: 25, episode reward: 93.200, mean reward: 0.609 [-1.000, 1.000], mean action: 3.562 [0.000, 6.000], mean observation: 173.462 [23.000, 255.000], loss: 0.057224, mean_absolute_error: 10.176443, mean_q: 11.880965, mean_eps: 0.744733
  283863/2000000: episode: 1098, duration: 6.393s, episode steps: 156, steps per second: 24, episode reward: 89.100, mean reward: 0.571 [-1.000, 1.000], mean action: 3.551 [0.000, 6.000], mean observation: 173.644 [25.000, 255.000], loss: 0.046994, mean_absolute_error: 10.511247, mean_q: 12.274071, mean_eps: 0.744594
  284069/2000000: episode: 1099, duration: 

  289733/2000000: episode: 1122, duration: 10.938s, episode steps: 287, steps per second: 26, episode reward: 189.900, mean reward: 0.662 [-1.000, 1.000], mean action: 3.206 [0.000, 6.000], mean observation: 172.276 [23.000, 255.000], loss: 0.048460, mean_absolute_error: 10.746642, mean_q: 12.550038, mean_eps: 0.739369
  289938/2000000: episode: 1123, duration: 8.090s, episode steps: 205, steps per second: 25, episode reward: 100.900, mean reward: 0.492 [-1.000, 1.000], mean action: 3.020 [0.000, 6.000], mean observation: 173.230 [24.000, 255.000], loss: 0.052254, mean_absolute_error: 10.660540, mean_q: 12.458622, mean_eps: 0.739148
  290195/2000000: episode: 1124, duration: 9.557s, episode steps: 257, steps per second: 27, episode reward: 197.400, mean reward: 0.768 [-1.000, 1.000], mean action: 2.930 [0.000, 6.000], mean observation: 171.717 [20.000, 255.000], loss: 0.065937, mean_absolute_error: 10.934090, mean_q: 12.791857, mean_eps: 0.738941
  290482/2000000: episode: 1125, durati

  296409/2000000: episode: 1148, duration: 8.715s, episode steps: 245, steps per second: 28, episode reward: 146.400, mean reward: 0.598 [-1.000, 1.000], mean action: 3.049 [0.000, 6.000], mean observation: 171.997 [24.000, 255.000], loss: 0.053808, mean_absolute_error: 10.731055, mean_q: 12.520490, mean_eps: 0.733343
  296673/2000000: episode: 1149, duration: 9.929s, episode steps: 264, steps per second: 27, episode reward: 193.700, mean reward: 0.734 [-1.000, 1.000], mean action: 3.015 [0.000, 6.000], mean observation: 172.464 [24.000, 255.000], loss: 0.054497, mean_absolute_error: 10.956935, mean_q: 12.805348, mean_eps: 0.733112
  296966/2000000: episode: 1150, duration: 11.322s, episode steps: 293, steps per second: 26, episode reward: 207.000, mean reward: 0.706 [-1.000, 1.000], mean action: 3.068 [0.000, 6.000], mean observation: 172.831 [23.000, 255.000], loss: 0.050473, mean_absolute_error: 10.866408, mean_q: 12.677456, mean_eps: 0.732862
  297222/2000000: episode: 1151, durati

  302933/2000000: episode: 1174, duration: 8.746s, episode steps: 243, steps per second: 28, episode reward: 163.100, mean reward: 0.671 [-1.000, 1.000], mean action: 2.860 [0.000, 6.000], mean observation: 170.818 [23.000, 255.000], loss: 0.059813, mean_absolute_error: 11.389171, mean_q: 13.300202, mean_eps: 0.727469
  303127/2000000: episode: 1175, duration: 6.489s, episode steps: 194, steps per second: 30, episode reward: 69.100, mean reward: 0.356 [-1.000, 0.500], mean action: 2.613 [0.000, 6.000], mean observation: 172.295 [24.000, 255.000], loss: 0.067062, mean_absolute_error: 11.259387, mean_q: 13.149539, mean_eps: 0.727273
  303413/2000000: episode: 1176, duration: 10.967s, episode steps: 286, steps per second: 26, episode reward: 195.200, mean reward: 0.683 [-1.000, 1.000], mean action: 2.885 [0.000, 6.000], mean observation: 172.804 [24.000, 255.000], loss: 0.063512, mean_absolute_error: 11.450841, mean_q: 13.380966, mean_eps: 0.727057
  303693/2000000: episode: 1177, duratio

  309298/2000000: episode: 1200, duration: 9.906s, episode steps: 262, steps per second: 26, episode reward: 171.100, mean reward: 0.653 [-1.000, 1.000], mean action: 3.122 [0.000, 6.000], mean observation: 171.173 [24.000, 255.000], loss: 0.047315, mean_absolute_error: 11.113039, mean_q: 12.986655, mean_eps: 0.721751
  309577/2000000: episode: 1201, duration: 10.821s, episode steps: 279, steps per second: 26, episode reward: 203.400, mean reward: 0.729 [-1.000, 1.000], mean action: 3.326 [0.000, 6.000], mean observation: 172.082 [24.000, 255.000], loss: 0.049497, mean_absolute_error: 11.164608, mean_q: 13.036603, mean_eps: 0.721506
  309869/2000000: episode: 1202, duration: 11.424s, episode steps: 292, steps per second: 26, episode reward: 199.000, mean reward: 0.682 [-1.000, 1.000], mean action: 3.178 [0.000, 6.000], mean observation: 172.007 [24.000, 255.000], loss: 0.056583, mean_absolute_error: 11.257724, mean_q: 13.161900, mean_eps: 0.721248
  310071/2000000: episode: 1203, durat

  315802/2000000: episode: 1226, duration: 9.555s, episode steps: 255, steps per second: 27, episode reward: 165.600, mean reward: 0.649 [-1.000, 1.000], mean action: 2.992 [0.000, 6.000], mean observation: 172.154 [24.000, 255.000], loss: 0.064717, mean_absolute_error: 11.612863, mean_q: 13.567744, mean_eps: 0.715893
  316076/2000000: episode: 1227, duration: 10.868s, episode steps: 274, steps per second: 25, episode reward: 203.400, mean reward: 0.742 [-1.000, 1.000], mean action: 3.263 [0.000, 6.000], mean observation: 172.722 [23.000, 255.000], loss: 0.057033, mean_absolute_error: 11.728970, mean_q: 13.700046, mean_eps: 0.715656
  316279/2000000: episode: 1228, duration: 8.385s, episode steps: 203, steps per second: 24, episode reward: 103.000, mean reward: 0.507 [-1.000, 1.000], mean action: 3.365 [0.000, 6.000], mean observation: 174.133 [23.000, 255.000], loss: 0.065088, mean_absolute_error: 11.435215, mean_q: 13.350845, mean_eps: 0.715442
  316541/2000000: episode: 1229, durati

  322349/2000000: episode: 1252, duration: 10.995s, episode steps: 285, steps per second: 26, episode reward: 171.300, mean reward: 0.601 [-1.000, 1.000], mean action: 3.074 [0.000, 6.000], mean observation: 172.795 [21.000, 255.000], loss: 0.067575, mean_absolute_error: 11.877608, mean_q: 13.873940, mean_eps: 0.710015
  322556/2000000: episode: 1253, duration: 8.445s, episode steps: 207, steps per second: 25, episode reward: 114.100, mean reward: 0.551 [-1.000, 1.000], mean action: 3.295 [0.000, 6.000], mean observation: 172.611 [23.000, 255.000], loss: 0.064991, mean_absolute_error: 12.307302, mean_q: 14.395272, mean_eps: 0.709793
  322719/2000000: episode: 1254, duration: 6.801s, episode steps: 163, steps per second: 24, episode reward: 91.100, mean reward: 0.559 [-1.000, 1.000], mean action: 3.350 [0.000, 6.000], mean observation: 172.604 [25.000, 255.000], loss: 0.061089, mean_absolute_error: 12.076576, mean_q: 14.117510, mean_eps: 0.709628
  322978/2000000: episode: 1255, duratio

  328986/2000000: episode: 1278, duration: 8.662s, episode steps: 244, steps per second: 28, episode reward: 161.100, mean reward: 0.660 [-1.000, 1.000], mean action: 3.078 [0.000, 6.000], mean observation: 171.987 [25.000, 255.000], loss: 0.057141, mean_absolute_error: 12.154183, mean_q: 14.207436, mean_eps: 0.704022
  329275/2000000: episode: 1279, duration: 10.811s, episode steps: 289, steps per second: 27, episode reward: 141.100, mean reward: 0.488 [-1.000, 1.000], mean action: 3.138 [0.000, 6.000], mean observation: 174.133 [23.000, 255.000], loss: 0.070147, mean_absolute_error: 12.132373, mean_q: 14.175129, mean_eps: 0.703783
  329551/2000000: episode: 1280, duration: 10.362s, episode steps: 276, steps per second: 27, episode reward: 155.100, mean reward: 0.562 [-1.000, 1.000], mean action: 3.196 [0.000, 6.000], mean observation: 172.716 [24.000, 255.000], loss: 0.065184, mean_absolute_error: 12.256013, mean_q: 14.328533, mean_eps: 0.703529
  329824/2000000: episode: 1281, durat

  335492/2000000: episode: 1304, duration: 9.994s, episode steps: 264, steps per second: 26, episode reward: 165.700, mean reward: 0.628 [-1.000, 1.000], mean action: 3.076 [0.000, 6.000], mean observation: 171.771 [24.000, 255.000], loss: 0.065194, mean_absolute_error: 12.672291, mean_q: 14.818490, mean_eps: 0.698178
  335737/2000000: episode: 1305, duration: 9.004s, episode steps: 245, steps per second: 27, episode reward: 163.600, mean reward: 0.668 [-1.000, 1.000], mean action: 2.890 [0.000, 6.000], mean observation: 171.522 [24.000, 255.000], loss: 0.070084, mean_absolute_error: 12.803909, mean_q: 14.962217, mean_eps: 0.697947
  336022/2000000: episode: 1306, duration: 11.332s, episode steps: 285, steps per second: 25, episode reward: 202.100, mean reward: 0.709 [-1.000, 1.000], mean action: 3.340 [0.000, 6.000], mean observation: 172.259 [23.000, 255.000], loss: 0.071311, mean_absolute_error: 12.915824, mean_q: 15.100575, mean_eps: 0.697708
  336274/2000000: episode: 1307, durati

  342230/2000000: episode: 1330, duration: 8.694s, episode steps: 244, steps per second: 28, episode reward: 156.300, mean reward: 0.641 [-1.000, 1.000], mean action: 3.029 [0.000, 6.000], mean observation: 170.191 [24.000, 255.000], loss: 0.074018, mean_absolute_error: 13.320217, mean_q: 15.589294, mean_eps: 0.692103
  342517/2000000: episode: 1331, duration: 11.079s, episode steps: 287, steps per second: 26, episode reward: 198.700, mean reward: 0.692 [-1.000, 1.000], mean action: 3.136 [0.000, 6.000], mean observation: 171.751 [23.000, 255.000], loss: 0.077453, mean_absolute_error: 13.234556, mean_q: 15.473395, mean_eps: 0.691863
  342774/2000000: episode: 1332, duration: 9.455s, episode steps: 257, steps per second: 27, episode reward: 176.500, mean reward: 0.687 [-1.000, 1.000], mean action: 3.089 [0.000, 6.000], mean observation: 170.575 [23.000, 255.000], loss: 0.074501, mean_absolute_error: 13.125982, mean_q: 15.351402, mean_eps: 0.691619
  343027/2000000: episode: 1333, durati

  349042/2000000: episode: 1356, duration: 8.227s, episode steps: 232, steps per second: 28, episode reward: 143.300, mean reward: 0.618 [-1.000, 1.000], mean action: 2.845 [0.000, 6.000], mean observation: 171.333 [21.000, 255.000], loss: 0.068869, mean_absolute_error: 13.296667, mean_q: 15.529101, mean_eps: 0.685967
  349290/2000000: episode: 1357, duration: 8.940s, episode steps: 248, steps per second: 28, episode reward: 158.600, mean reward: 0.640 [-1.000, 1.000], mean action: 2.952 [0.000, 6.000], mean observation: 171.323 [24.000, 255.000], loss: 0.065800, mean_absolute_error: 13.384962, mean_q: 15.641534, mean_eps: 0.685751
  349442/2000000: episode: 1358, duration: 6.136s, episode steps: 152, steps per second: 25, episode reward: 90.200, mean reward: 0.593 [-1.000, 1.000], mean action: 3.368 [0.000, 6.000], mean observation: 173.595 [23.000, 255.000], loss: 0.065905, mean_absolute_error: 13.396400, mean_q: 15.676000, mean_eps: 0.685571
  349718/2000000: episode: 1359, duration

  355834/2000000: episode: 1382, duration: 10.691s, episode steps: 278, steps per second: 26, episode reward: 174.800, mean reward: 0.629 [-1.000, 1.000], mean action: 3.259 [0.000, 6.000], mean observation: 172.599 [24.000, 255.000], loss: 0.067440, mean_absolute_error: 13.713413, mean_q: 16.032291, mean_eps: 0.679875
  356095/2000000: episode: 1383, duration: 9.688s, episode steps: 261, steps per second: 27, episode reward: 161.700, mean reward: 0.620 [-1.000, 1.000], mean action: 3.019 [0.000, 6.000], mean observation: 171.253 [23.000, 255.000], loss: 0.076737, mean_absolute_error: 14.128073, mean_q: 16.526009, mean_eps: 0.679632
  356363/2000000: episode: 1384, duration: 9.993s, episode steps: 268, steps per second: 27, episode reward: 173.400, mean reward: 0.647 [-1.000, 1.000], mean action: 3.034 [0.000, 6.000], mean observation: 171.417 [23.000, 255.000], loss: 0.077828, mean_absolute_error: 13.834375, mean_q: 16.165419, mean_eps: 0.679395
  356682/2000000: episode: 1385, durati

  362877/2000000: episode: 1408, duration: 11.399s, episode steps: 301, steps per second: 26, episode reward: 195.100, mean reward: 0.648 [-1.000, 1.000], mean action: 3.286 [0.000, 6.000], mean observation: 172.462 [23.000, 255.000], loss: 0.086626, mean_absolute_error: 14.443632, mean_q: 16.878475, mean_eps: 0.673547
  363139/2000000: episode: 1409, duration: 9.547s, episode steps: 262, steps per second: 27, episode reward: 186.300, mean reward: 0.711 [-1.000, 1.000], mean action: 2.939 [0.000, 6.000], mean observation: 171.866 [24.000, 255.000], loss: 0.074677, mean_absolute_error: 14.567273, mean_q: 17.025263, mean_eps: 0.673293
  363388/2000000: episode: 1410, duration: 8.842s, episode steps: 249, steps per second: 28, episode reward: 165.000, mean reward: 0.663 [-1.000, 1.000], mean action: 3.201 [0.000, 6.000], mean observation: 171.784 [24.000, 255.000], loss: 0.077162, mean_absolute_error: 14.699193, mean_q: 17.190666, mean_eps: 0.673064
  363643/2000000: episode: 1411, durati

  369539/2000000: episode: 1434, duration: 10.398s, episode steps: 269, steps per second: 26, episode reward: 179.700, mean reward: 0.668 [-1.000, 1.000], mean action: 3.245 [0.000, 6.000], mean observation: 172.633 [24.000, 255.000], loss: 0.088534, mean_absolute_error: 14.184629, mean_q: 16.591668, mean_eps: 0.667536
  369793/2000000: episode: 1435, duration: 9.420s, episode steps: 254, steps per second: 27, episode reward: 170.400, mean reward: 0.671 [-1.000, 1.000], mean action: 3.039 [0.000, 6.000], mean observation: 171.511 [24.000, 255.000], loss: 0.072506, mean_absolute_error: 14.493665, mean_q: 16.938495, mean_eps: 0.667301
  370072/2000000: episode: 1436, duration: 10.482s, episode steps: 279, steps per second: 27, episode reward: 178.100, mean reward: 0.638 [-1.000, 1.000], mean action: 3.222 [0.000, 6.000], mean observation: 172.570 [23.000, 255.000], loss: 0.095002, mean_absolute_error: 14.597496, mean_q: 17.050564, mean_eps: 0.667061
  370352/2000000: episode: 1437, durat

  376314/2000000: episode: 1460, duration: 9.208s, episode steps: 253, steps per second: 27, episode reward: 177.500, mean reward: 0.702 [-1.000, 1.000], mean action: 3.091 [0.000, 6.000], mean observation: 170.563 [24.000, 255.000], loss: 0.088533, mean_absolute_error: 15.068477, mean_q: 17.626068, mean_eps: 0.661431
  376573/2000000: episode: 1461, duration: 9.626s, episode steps: 259, steps per second: 27, episode reward: 173.300, mean reward: 0.669 [-1.000, 1.000], mean action: 2.876 [0.000, 6.000], mean observation: 170.912 [24.000, 255.000], loss: 0.096704, mean_absolute_error: 15.263386, mean_q: 17.831227, mean_eps: 0.661200
  376825/2000000: episode: 1462, duration: 9.566s, episode steps: 252, steps per second: 26, episode reward: 194.000, mean reward: 0.770 [-1.000, 1.000], mean action: 3.028 [0.000, 6.000], mean observation: 171.171 [23.000, 255.000], loss: 0.082060, mean_absolute_error: 14.838197, mean_q: 17.347214, mean_eps: 0.660970
  377101/2000000: episode: 1463, duratio

  383190/2000000: episode: 1486, duration: 10.790s, episode steps: 278, steps per second: 26, episode reward: 188.700, mean reward: 0.679 [-1.000, 1.000], mean action: 3.090 [0.000, 6.000], mean observation: 172.329 [23.000, 255.000], loss: 0.090896, mean_absolute_error: 15.984031, mean_q: 18.686567, mean_eps: 0.655255
  383450/2000000: episode: 1487, duration: 9.803s, episode steps: 260, steps per second: 27, episode reward: 182.300, mean reward: 0.701 [-1.000, 1.000], mean action: 3.185 [0.000, 6.000], mean observation: 171.804 [23.000, 255.000], loss: 0.089730, mean_absolute_error: 15.726683, mean_q: 18.388676, mean_eps: 0.655012
  383714/2000000: episode: 1488, duration: 9.972s, episode steps: 264, steps per second: 26, episode reward: 172.900, mean reward: 0.655 [-1.000, 1.000], mean action: 3.314 [0.000, 6.000], mean observation: 172.066 [24.000, 255.000], loss: 0.105061, mean_absolute_error: 15.835408, mean_q: 18.518272, mean_eps: 0.654776
  383967/2000000: episode: 1489, durati

  390038/2000000: episode: 1512, duration: 9.513s, episode steps: 261, steps per second: 27, episode reward: 165.000, mean reward: 0.632 [-1.000, 1.000], mean action: 2.958 [0.000, 6.000], mean observation: 171.450 [24.000, 255.000], loss: 0.101373, mean_absolute_error: 15.741736, mean_q: 18.422964, mean_eps: 0.649083
  390346/2000000: episode: 1513, duration: 12.008s, episode steps: 308, steps per second: 26, episode reward: 249.200, mean reward: 0.809 [-1.000, 1.000], mean action: 3.334 [0.000, 6.000], mean observation: 172.883 [24.000, 255.000], loss: 0.117983, mean_absolute_error: 16.372751, mean_q: 19.141191, mean_eps: 0.648827
  390628/2000000: episode: 1514, duration: 10.833s, episode steps: 282, steps per second: 26, episode reward: 166.300, mean reward: 0.590 [-1.000, 1.000], mean action: 3.064 [0.000, 6.000], mean observation: 172.982 [24.000, 255.000], loss: 0.097252, mean_absolute_error: 16.452798, mean_q: 19.250854, mean_eps: 0.648563
  390886/2000000: episode: 1515, durat

  396835/2000000: episode: 1538, duration: 9.564s, episode steps: 253, steps per second: 26, episode reward: 167.200, mean reward: 0.661 [-1.000, 1.000], mean action: 3.024 [0.000, 6.000], mean observation: 171.694 [24.000, 255.000], loss: 0.103443, mean_absolute_error: 16.596821, mean_q: 19.416951, mean_eps: 0.642963
  397134/2000000: episode: 1539, duration: 11.624s, episode steps: 299, steps per second: 26, episode reward: 232.300, mean reward: 0.777 [-1.000, 1.000], mean action: 3.194 [0.000, 6.000], mean observation: 172.141 [24.000, 255.000], loss: 0.097842, mean_absolute_error: 16.673976, mean_q: 19.481131, mean_eps: 0.642714
  397375/2000000: episode: 1540, duration: 8.659s, episode steps: 241, steps per second: 28, episode reward: 143.800, mean reward: 0.597 [-1.000, 1.000], mean action: 2.913 [0.000, 6.000], mean observation: 171.731 [24.000, 255.000], loss: 0.100957, mean_absolute_error: 15.926844, mean_q: 18.601014, mean_eps: 0.642471
  397649/2000000: episode: 1541, durati

  404086/2000000: episode: 1564, duration: 11.563s, episode steps: 299, steps per second: 26, episode reward: 203.500, mean reward: 0.681 [-1.000, 1.000], mean action: 3.224 [0.000, 6.000], mean observation: 173.041 [24.000, 255.000], loss: 0.103023, mean_absolute_error: 16.724104, mean_q: 19.545879, mean_eps: 0.636458
  404349/2000000: episode: 1565, duration: 9.696s, episode steps: 263, steps per second: 27, episode reward: 189.800, mean reward: 0.722 [-1.000, 1.000], mean action: 3.255 [0.000, 6.000], mean observation: 171.402 [23.000, 255.000], loss: 0.109811, mean_absolute_error: 17.172710, mean_q: 20.071536, mean_eps: 0.636204
  404645/2000000: episode: 1566, duration: 11.414s, episode steps: 296, steps per second: 26, episode reward: 212.000, mean reward: 0.716 [-1.000, 1.000], mean action: 3.389 [0.000, 6.000], mean observation: 172.582 [24.000, 255.000], loss: 0.115480, mean_absolute_error: 17.083949, mean_q: 19.999436, mean_eps: 0.635952
  404898/2000000: episode: 1567, durat

  410904/2000000: episode: 1590, duration: 11.398s, episode steps: 293, steps per second: 26, episode reward: 203.200, mean reward: 0.694 [-1.000, 1.000], mean action: 3.263 [0.000, 6.000], mean observation: 171.764 [22.000, 255.000], loss: 0.111620, mean_absolute_error: 17.359456, mean_q: 20.301918, mean_eps: 0.630320
  411184/2000000: episode: 1591, duration: 11.342s, episode steps: 280, steps per second: 25, episode reward: 199.700, mean reward: 0.713 [-1.000, 1.000], mean action: 3.289 [0.000, 6.000], mean observation: 171.760 [23.000, 255.000], loss: 0.111558, mean_absolute_error: 17.236747, mean_q: 20.168987, mean_eps: 0.630062
  411424/2000000: episode: 1592, duration: 9.181s, episode steps: 240, steps per second: 26, episode reward: 171.300, mean reward: 0.714 [-1.000, 1.000], mean action: 2.833 [0.000, 6.000], mean observation: 170.811 [23.000, 255.000], loss: 0.101993, mean_absolute_error: 17.493721, mean_q: 20.460890, mean_eps: 0.629828
  411678/2000000: episode: 1593, durat

  417938/2000000: episode: 1616, duration: 11.757s, episode steps: 302, steps per second: 26, episode reward: 237.800, mean reward: 0.787 [-1.000, 1.000], mean action: 3.248 [0.000, 6.000], mean observation: 172.073 [24.000, 255.000], loss: 0.116505, mean_absolute_error: 18.003585, mean_q: 21.044128, mean_eps: 0.623993
  418170/2000000: episode: 1617, duration: 8.488s, episode steps: 232, steps per second: 27, episode reward: 158.000, mean reward: 0.681 [-1.000, 1.000], mean action: 2.845 [0.000, 6.000], mean observation: 170.840 [24.000, 255.000], loss: 0.116700, mean_absolute_error: 17.967748, mean_q: 21.017797, mean_eps: 0.623751
  418471/2000000: episode: 1618, duration: 11.769s, episode steps: 301, steps per second: 26, episode reward: 197.200, mean reward: 0.655 [-1.000, 1.000], mean action: 3.110 [0.000, 6.000], mean observation: 171.957 [24.000, 255.000], loss: 0.118034, mean_absolute_error: 17.863358, mean_q: 20.885848, mean_eps: 0.623512
  418704/2000000: episode: 1619, durat

  425002/2000000: episode: 1642, duration: 9.262s, episode steps: 223, steps per second: 24, episode reward: 94.800, mean reward: 0.425 [-1.000, 0.500], mean action: 3.063 [0.000, 6.000], mean observation: 172.127 [24.000, 255.000], loss: 0.125735, mean_absolute_error: 18.636487, mean_q: 21.816477, mean_eps: 0.617599
  425234/2000000: episode: 1643, duration: 9.366s, episode steps: 232, steps per second: 25, episode reward: 162.500, mean reward: 0.700 [-1.000, 1.000], mean action: 2.996 [0.000, 6.000], mean observation: 171.376 [24.000, 255.000], loss: 0.123209, mean_absolute_error: 18.440371, mean_q: 21.564194, mean_eps: 0.617394
  425523/2000000: episode: 1644, duration: 11.155s, episode steps: 289, steps per second: 26, episode reward: 218.100, mean reward: 0.755 [-1.000, 1.000], mean action: 3.211 [0.000, 6.000], mean observation: 172.326 [24.000, 255.000], loss: 0.125512, mean_absolute_error: 18.604384, mean_q: 21.761018, mean_eps: 0.617160
  425804/2000000: episode: 1645, duratio

  431688/2000000: episode: 1668, duration: 9.870s, episode steps: 265, steps per second: 27, episode reward: 156.400, mean reward: 0.590 [-1.000, 1.000], mean action: 2.985 [0.000, 6.000], mean observation: 171.664 [24.000, 255.000], loss: 0.133533, mean_absolute_error: 18.752467, mean_q: 21.921063, mean_eps: 0.611601
  432014/2000000: episode: 1669, duration: 12.811s, episode steps: 326, steps per second: 25, episode reward: 253.000, mean reward: 0.776 [-1.000, 1.000], mean action: 3.402 [0.000, 6.000], mean observation: 172.650 [24.000, 255.000], loss: 0.128110, mean_absolute_error: 18.884783, mean_q: 22.104991, mean_eps: 0.611335
  432324/2000000: episode: 1670, duration: 12.292s, episode steps: 310, steps per second: 25, episode reward: 220.000, mean reward: 0.710 [-1.000, 1.000], mean action: 3.400 [0.000, 6.000], mean observation: 173.008 [23.000, 255.000], loss: 0.121986, mean_absolute_error: 18.505101, mean_q: 21.632958, mean_eps: 0.611049
  432698/2000000: episode: 1671, durat

  439266/2000000: episode: 1694, duration: 11.319s, episode steps: 292, steps per second: 26, episode reward: 222.400, mean reward: 0.762 [-1.000, 1.000], mean action: 3.171 [0.000, 6.000], mean observation: 172.407 [24.000, 255.000], loss: 0.116683, mean_absolute_error: 18.897861, mean_q: 22.111638, mean_eps: 0.604792
  439582/2000000: episode: 1695, duration: 12.440s, episode steps: 316, steps per second: 25, episode reward: 202.600, mean reward: 0.641 [-1.000, 1.000], mean action: 3.472 [0.000, 6.000], mean observation: 172.883 [25.000, 255.000], loss: 0.132176, mean_absolute_error: 19.036280, mean_q: 22.267655, mean_eps: 0.604518
  439839/2000000: episode: 1696, duration: 9.775s, episode steps: 257, steps per second: 26, episode reward: 189.800, mean reward: 0.739 [-1.000, 1.000], mean action: 3.152 [0.000, 6.000], mean observation: 171.921 [24.000, 255.000], loss: 0.136115, mean_absolute_error: 18.826697, mean_q: 22.009813, mean_eps: 0.604261
  440122/2000000: episode: 1697, durat

  446686/2000000: episode: 1720, duration: 12.048s, episode steps: 312, steps per second: 26, episode reward: 213.500, mean reward: 0.684 [-1.000, 1.000], mean action: 3.401 [0.000, 6.000], mean observation: 171.973 [23.000, 255.000], loss: 0.124268, mean_absolute_error: 19.451618, mean_q: 22.763265, mean_eps: 0.598123
  446954/2000000: episode: 1721, duration: 11.548s, episode steps: 268, steps per second: 23, episode reward: 205.100, mean reward: 0.765 [-1.000, 1.000], mean action: 3.340 [0.000, 6.000], mean observation: 172.060 [22.000, 255.000], loss: 0.123600, mean_absolute_error: 19.504689, mean_q: 22.824280, mean_eps: 0.597862
  447189/2000000: episode: 1722, duration: 12.080s, episode steps: 235, steps per second: 19, episode reward: 161.900, mean reward: 0.689 [-1.000, 1.000], mean action: 3.315 [0.000, 6.000], mean observation: 172.317 [24.000, 255.000], loss: 0.130290, mean_absolute_error: 19.795017, mean_q: 23.153254, mean_eps: 0.597635
  447430/2000000: episode: 1723, dura

  456227/2000000: episode: 1746, duration: 97.848s, episode steps: 351, steps per second: 4, episode reward: 263.200, mean reward: 0.750 [-1.000, 1.000], mean action: 3.293 [0.000, 6.000], mean observation: 176.218 [26.000, 255.000], loss: 0.132581, mean_absolute_error: 19.648007, mean_q: 22.998098, mean_eps: 0.589555
  456472/2000000: episode: 1747, duration: 12.653s, episode steps: 245, steps per second: 19, episode reward: 167.500, mean reward: 0.684 [-1.000, 1.000], mean action: 3.298 [0.000, 6.000], mean observation: 172.750 [23.000, 255.000], loss: 0.146385, mean_absolute_error: 20.068329, mean_q: 23.490290, mean_eps: 0.589287
  456766/2000000: episode: 1748, duration: 11.989s, episode steps: 294, steps per second: 25, episode reward: 203.100, mean reward: 0.691 [-1.000, 1.000], mean action: 3.330 [0.000, 6.000], mean observation: 173.079 [23.000, 255.000], loss: 0.135312, mean_absolute_error: 19.925840, mean_q: 23.314920, mean_eps: 0.589044
  457137/2000000: episode: 1749, durat

  463926/2000000: episode: 1772, duration: 12.052s, episode steps: 309, steps per second: 26, episode reward: 235.300, mean reward: 0.761 [-1.000, 1.000], mean action: 3.366 [0.000, 6.000], mean observation: 172.126 [22.000, 255.000], loss: 0.139998, mean_absolute_error: 20.213958, mean_q: 23.684139, mean_eps: 0.582605
  464214/2000000: episode: 1773, duration: 11.215s, episode steps: 288, steps per second: 26, episode reward: 219.500, mean reward: 0.762 [-1.000, 1.000], mean action: 3.156 [0.000, 6.000], mean observation: 171.537 [23.000, 255.000], loss: 0.166095, mean_absolute_error: 20.469718, mean_q: 23.973708, mean_eps: 0.582337
  464534/2000000: episode: 1774, duration: 12.420s, episode steps: 320, steps per second: 26, episode reward: 211.900, mean reward: 0.662 [-1.000, 1.000], mean action: 3.684 [0.000, 6.000], mean observation: 171.820 [22.000, 255.000], loss: 0.146874, mean_absolute_error: 20.520172, mean_q: 24.056249, mean_eps: 0.582063
  464914/2000000: episode: 1775, dura

  471398/2000000: episode: 1798, duration: 12.485s, episode steps: 315, steps per second: 25, episode reward: 240.100, mean reward: 0.762 [-1.000, 1.000], mean action: 3.435 [0.000, 6.000], mean observation: 173.330 [24.000, 255.000], loss: 0.144200, mean_absolute_error: 20.945034, mean_q: 24.543809, mean_eps: 0.575884
  471679/2000000: episode: 1799, duration: 10.975s, episode steps: 281, steps per second: 26, episode reward: 207.500, mean reward: 0.738 [-1.000, 1.000], mean action: 3.363 [0.000, 6.000], mean observation: 172.685 [25.000, 255.000], loss: 0.152619, mean_absolute_error: 20.838496, mean_q: 24.404787, mean_eps: 0.575616
  472019/2000000: episode: 1800, duration: 13.569s, episode steps: 340, steps per second: 25, episode reward: 269.200, mean reward: 0.792 [-1.000, 1.000], mean action: 3.482 [0.000, 6.000], mean observation: 173.466 [24.000, 255.000], loss: 0.159778, mean_absolute_error: 20.597979, mean_q: 24.116739, mean_eps: 0.575337
  472346/2000000: episode: 1801, dura

  479618/2000000: episode: 1824, duration: 15.764s, episode steps: 379, steps per second: 24, episode reward: 260.200, mean reward: 0.687 [-1.000, 1.000], mean action: 3.694 [0.000, 6.000], mean observation: 172.411 [25.000, 255.000], loss: 0.154517, mean_absolute_error: 21.269757, mean_q: 24.912760, mean_eps: 0.568515
  479897/2000000: episode: 1825, duration: 10.724s, episode steps: 279, steps per second: 26, episode reward: 202.000, mean reward: 0.724 [-1.000, 1.000], mean action: 3.165 [0.000, 6.000], mean observation: 172.271 [23.000, 255.000], loss: 0.129979, mean_absolute_error: 20.767447, mean_q: 24.299482, mean_eps: 0.568218
  480273/2000000: episode: 1826, duration: 15.438s, episode steps: 376, steps per second: 24, episode reward: 288.000, mean reward: 0.766 [-1.000, 1.000], mean action: 3.633 [0.000, 6.000], mean observation: 172.605 [22.000, 255.000], loss: 0.158351, mean_absolute_error: 21.188679, mean_q: 24.797679, mean_eps: 0.567923
  480593/2000000: episode: 1827, dura

  487455/2000000: episode: 1850, duration: 9.163s, episode steps: 209, steps per second: 23, episode reward: 146.800, mean reward: 0.702 [-1.000, 1.000], mean action: 2.708 [0.000, 6.000], mean observation: 171.000 [23.000, 255.000], loss: 0.145438, mean_absolute_error: 21.729524, mean_q: 25.431444, mean_eps: 0.561385
  487792/2000000: episode: 1851, duration: 13.818s, episode steps: 337, steps per second: 24, episode reward: 236.900, mean reward: 0.703 [-1.000, 1.000], mean action: 3.460 [0.000, 6.000], mean observation: 173.230 [24.000, 255.000], loss: 0.143346, mean_absolute_error: 21.432083, mean_q: 25.070681, mean_eps: 0.561140
  488158/2000000: episode: 1852, duration: 15.252s, episode steps: 366, steps per second: 24, episode reward: 282.300, mean reward: 0.771 [-1.000, 1.000], mean action: 3.568 [0.000, 6.000], mean observation: 173.132 [23.000, 255.000], loss: 0.148885, mean_absolute_error: 21.508580, mean_q: 25.188795, mean_eps: 0.560823
  488463/2000000: episode: 1853, durat

  495615/2000000: episode: 1876, duration: 12.038s, episode steps: 302, steps per second: 25, episode reward: 235.700, mean reward: 0.780 [-1.000, 1.000], mean action: 3.351 [0.000, 6.000], mean observation: 172.378 [24.000, 255.000], loss: 0.150622, mean_absolute_error: 22.261349, mean_q: 26.070130, mean_eps: 0.554082
  496002/2000000: episode: 1877, duration: 16.475s, episode steps: 387, steps per second: 23, episode reward: 328.300, mean reward: 0.848 [-1.000, 1.000], mean action: 3.574 [0.000, 6.000], mean observation: 172.325 [21.000, 255.000], loss: 0.149520, mean_absolute_error: 22.115822, mean_q: 25.906423, mean_eps: 0.553773
  496349/2000000: episode: 1878, duration: 13.883s, episode steps: 347, steps per second: 25, episode reward: 245.000, mean reward: 0.706 [-1.000, 1.000], mean action: 3.622 [0.000, 6.000], mean observation: 172.560 [23.000, 255.000], loss: 0.169130, mean_absolute_error: 22.104878, mean_q: 25.876423, mean_eps: 0.553442
  496631/2000000: episode: 1879, dura

  503843/2000000: episode: 1902, duration: 11.289s, episode steps: 223, steps per second: 20, episode reward: 157.700, mean reward: 0.707 [-1.000, 1.000], mean action: 3.305 [0.000, 6.000], mean observation: 172.615 [23.000, 255.000], loss: 0.165978, mean_absolute_error: 22.867457, mean_q: 26.763476, mean_eps: 0.546643
  504130/2000000: episode: 1903, duration: 12.473s, episode steps: 287, steps per second: 23, episode reward: 183.900, mean reward: 0.641 [-1.000, 1.000], mean action: 3.324 [0.000, 6.000], mean observation: 173.065 [24.000, 255.000], loss: 0.155015, mean_absolute_error: 22.775655, mean_q: 26.638702, mean_eps: 0.546413
  504409/2000000: episode: 1904, duration: 11.442s, episode steps: 279, steps per second: 24, episode reward: 217.500, mean reward: 0.780 [-1.000, 1.000], mean action: 3.459 [0.000, 6.000], mean observation: 172.964 [24.000, 255.000], loss: 0.178438, mean_absolute_error: 22.647910, mean_q: 26.478789, mean_eps: 0.546157
  504672/2000000: episode: 1905, dura

  511331/2000000: episode: 1928, duration: 11.876s, episode steps: 301, steps per second: 25, episode reward: 216.300, mean reward: 0.719 [-1.000, 1.000], mean action: 3.322 [0.000, 6.000], mean observation: 172.332 [24.000, 255.000], loss: 0.162745, mean_absolute_error: 23.120135, mean_q: 27.052011, mean_eps: 0.539938
  511641/2000000: episode: 1929, duration: 12.236s, episode steps: 310, steps per second: 25, episode reward: 239.800, mean reward: 0.774 [-1.000, 1.000], mean action: 3.306 [0.000, 6.000], mean observation: 172.028 [24.000, 255.000], loss: 0.171927, mean_absolute_error: 23.293203, mean_q: 27.239999, mean_eps: 0.539663
  511920/2000000: episode: 1930, duration: 11.508s, episode steps: 279, steps per second: 24, episode reward: 201.900, mean reward: 0.724 [-1.000, 1.000], mean action: 3.348 [0.000, 6.000], mean observation: 171.918 [23.000, 255.000], loss: 0.170141, mean_absolute_error: 23.483911, mean_q: 27.466822, mean_eps: 0.539398
  512214/2000000: episode: 1931, dura

  519413/2000000: episode: 1954, duration: 7.741s, episode steps: 219, steps per second: 28, episode reward: 141.100, mean reward: 0.644 [-1.000, 1.000], mean action: 3.014 [0.000, 6.000], mean observation: 171.404 [24.000, 255.000], loss: 0.183140, mean_absolute_error: 23.583730, mean_q: 27.561474, mean_eps: 0.532626
  519794/2000000: episode: 1955, duration: 15.600s, episode steps: 381, steps per second: 24, episode reward: 304.300, mean reward: 0.799 [-1.000, 1.000], mean action: 3.625 [0.000, 6.000], mean observation: 173.272 [25.000, 255.000], loss: 0.160878, mean_absolute_error: 23.602006, mean_q: 27.609829, mean_eps: 0.532356
  520106/2000000: episode: 1956, duration: 12.118s, episode steps: 312, steps per second: 26, episode reward: 241.300, mean reward: 0.773 [-1.000, 1.000], mean action: 3.186 [0.000, 6.000], mean observation: 172.915 [24.000, 255.000], loss: 0.180802, mean_absolute_error: 23.526130, mean_q: 27.515229, mean_eps: 0.532045
  520400/2000000: episode: 1957, durat

  527532/2000000: episode: 1980, duration: 11.802s, episode steps: 277, steps per second: 23, episode reward: 211.000, mean reward: 0.762 [-1.000, 1.000], mean action: 3.401 [0.000, 6.000], mean observation: 172.109 [22.000, 255.000], loss: 0.175556, mean_absolute_error: 24.027090, mean_q: 28.106469, mean_eps: 0.525347
  527919/2000000: episode: 1981, duration: 17.245s, episode steps: 387, steps per second: 22, episode reward: 312.100, mean reward: 0.806 [-1.000, 1.000], mean action: 3.643 [0.000, 6.000], mean observation: 172.347 [25.000, 255.000], loss: 0.168777, mean_absolute_error: 23.907190, mean_q: 27.984414, mean_eps: 0.525048
  528155/2000000: episode: 1982, duration: 8.509s, episode steps: 236, steps per second: 28, episode reward: 141.900, mean reward: 0.601 [-1.000, 1.000], mean action: 3.034 [0.000, 6.000], mean observation: 170.794 [24.000, 255.000], loss: 0.167504, mean_absolute_error: 24.150037, mean_q: 28.244829, mean_eps: 0.524768
  528458/2000000: episode: 1983, durat

  535546/2000000: episode: 2006, duration: 16.469s, episode steps: 391, steps per second: 24, episode reward: 333.400, mean reward: 0.853 [-1.000, 1.000], mean action: 3.655 [0.000, 6.000], mean observation: 172.123 [24.000, 255.000], loss: 0.163326, mean_absolute_error: 24.198841, mean_q: 28.293961, mean_eps: 0.518185
  535846/2000000: episode: 2007, duration: 12.255s, episode steps: 300, steps per second: 24, episode reward: 219.600, mean reward: 0.732 [-1.000, 1.000], mean action: 3.503 [0.000, 6.000], mean observation: 172.978 [21.000, 255.000], loss: 0.173121, mean_absolute_error: 23.863873, mean_q: 27.907749, mean_eps: 0.517874
  536147/2000000: episode: 2008, duration: 12.021s, episode steps: 301, steps per second: 25, episode reward: 241.300, mean reward: 0.802 [-1.000, 1.000], mean action: 3.262 [0.000, 6.000], mean observation: 172.943 [24.000, 255.000], loss: 0.176940, mean_absolute_error: 23.841990, mean_q: 27.886645, mean_eps: 0.517604
  536416/2000000: episode: 2009, dura

  544434/2000000: episode: 2032, duration: 12.162s, episode steps: 312, steps per second: 26, episode reward: 234.200, mean reward: 0.751 [-1.000, 1.000], mean action: 3.295 [0.000, 6.000], mean observation: 172.391 [24.000, 255.000], loss: 0.204996, mean_absolute_error: 24.386674, mean_q: 28.525269, mean_eps: 0.510150
  544742/2000000: episode: 2033, duration: 12.303s, episode steps: 308, steps per second: 25, episode reward: 234.100, mean reward: 0.760 [-1.000, 1.000], mean action: 3.633 [0.000, 6.000], mean observation: 172.413 [22.000, 255.000], loss: 0.188507, mean_absolute_error: 24.270959, mean_q: 28.370979, mean_eps: 0.509871
  545050/2000000: episode: 2034, duration: 12.086s, episode steps: 308, steps per second: 25, episode reward: 200.200, mean reward: 0.650 [-1.000, 1.000], mean action: 3.451 [0.000, 6.000], mean observation: 172.917 [24.000, 255.000], loss: 0.188384, mean_absolute_error: 24.366266, mean_q: 28.500149, mean_eps: 0.509594
  545433/2000000: episode: 2035, dura

  554472/2000000: episode: 2058, duration: 12.478s, episode steps: 314, steps per second: 25, episode reward: 239.000, mean reward: 0.761 [-1.000, 1.000], mean action: 3.449 [0.000, 6.000], mean observation: 173.375 [23.000, 255.000], loss: 0.205778, mean_absolute_error: 24.987064, mean_q: 29.236667, mean_eps: 0.501117
  554748/2000000: episode: 2059, duration: 10.586s, episode steps: 276, steps per second: 26, episode reward: 209.100, mean reward: 0.758 [-1.000, 1.000], mean action: 3.065 [0.000, 6.000], mean observation: 173.212 [23.000, 255.000], loss: 0.201546, mean_absolute_error: 25.255070, mean_q: 29.569712, mean_eps: 0.500853
  555022/2000000: episode: 2060, duration: 10.555s, episode steps: 274, steps per second: 26, episode reward: 207.900, mean reward: 0.759 [-1.000, 1.000], mean action: 3.263 [0.000, 6.000], mean observation: 173.165 [22.000, 255.000], loss: 0.196528, mean_absolute_error: 24.899052, mean_q: 29.117805, mean_eps: 0.500604
  555414/2000000: episode: 2061, dura

  566796/2000000: episode: 2084, duration: 20.611s, episode steps: 351, steps per second: 17, episode reward: 249.300, mean reward: 0.710 [-1.000, 1.000], mean action: 3.632 [0.000, 6.000], mean observation: 165.399 [0.000, 255.000], loss: 0.206852, mean_absolute_error: 25.213487, mean_q: 29.519556, mean_eps: 0.490042
  567018/2000000: episode: 2085, duration: 11.404s, episode steps: 222, steps per second: 19, episode reward: 168.100, mean reward: 0.757 [-1.000, 1.000], mean action: 3.275 [0.000, 6.000], mean observation: 172.458 [25.000, 255.000], loss: 0.200367, mean_absolute_error: 25.088130, mean_q: 29.396491, mean_eps: 0.489785
  567314/2000000: episode: 2086, duration: 16.403s, episode steps: 296, steps per second: 18, episode reward: 231.100, mean reward: 0.781 [-1.000, 1.000], mean action: 3.679 [0.000, 6.000], mean observation: 172.126 [24.000, 255.000], loss: 0.197355, mean_absolute_error: 25.225091, mean_q: 29.558551, mean_eps: 0.489551
  567550/2000000: episode: 2087, durat

  573917/2000000: episode: 2110, duration: 17.267s, episode steps: 305, steps per second: 18, episode reward: 239.900, mean reward: 0.787 [-1.000, 1.000], mean action: 3.685 [0.000, 6.000], mean observation: 172.046 [23.000, 255.000], loss: 0.219694, mean_absolute_error: 25.595864, mean_q: 29.985569, mean_eps: 0.483612
  574220/2000000: episode: 2111, duration: 17.018s, episode steps: 303, steps per second: 18, episode reward: 258.100, mean reward: 0.852 [-1.000, 1.000], mean action: 3.710 [0.000, 6.000], mean observation: 172.115 [23.000, 255.000], loss: 0.231261, mean_absolute_error: 25.526256, mean_q: 29.883440, mean_eps: 0.483339
  574512/2000000: episode: 2112, duration: 16.260s, episode steps: 292, steps per second: 18, episode reward: 213.600, mean reward: 0.732 [-1.000, 1.000], mean action: 3.548 [0.000, 6.000], mean observation: 172.655 [25.000, 255.000], loss: 0.225260, mean_absolute_error: 25.615494, mean_q: 30.018010, mean_eps: 0.483072
  574785/2000000: episode: 2113, dura

  583057/2000000: episode: 2136, duration: 20.516s, episode steps: 403, steps per second: 20, episode reward: 292.200, mean reward: 0.725 [-1.000, 1.000], mean action: 3.613 [0.000, 6.000], mean observation: 165.959 [0.000, 255.000], loss: 0.229169, mean_absolute_error: 26.182498, mean_q: 30.650892, mean_eps: 0.475430
  583360/2000000: episode: 2137, duration: 17.134s, episode steps: 303, steps per second: 18, episode reward: 253.100, mean reward: 0.835 [-1.000, 1.000], mean action: 3.614 [0.000, 6.000], mean observation: 172.293 [24.000, 255.000], loss: 0.245284, mean_absolute_error: 26.489707, mean_q: 31.012703, mean_eps: 0.475113
  583714/2000000: episode: 2138, duration: 20.908s, episode steps: 354, steps per second: 17, episode reward: 237.900, mean reward: 0.672 [-1.000, 1.000], mean action: 3.579 [0.000, 6.000], mean observation: 165.444 [0.000, 255.000], loss: 0.236309, mean_absolute_error: 26.148643, mean_q: 30.626097, mean_eps: 0.474818
  583986/2000000: episode: 2139, durati

  591455/2000000: episode: 2162, duration: 16.874s, episode steps: 405, steps per second: 24, episode reward: 301.400, mean reward: 0.744 [-1.000, 1.000], mean action: 3.553 [0.000, 6.000], mean observation: 172.764 [23.000, 255.000], loss: 0.251437, mean_absolute_error: 27.056325, mean_q: 31.700661, mean_eps: 0.467873
  591847/2000000: episode: 2163, duration: 16.009s, episode steps: 392, steps per second: 24, episode reward: 294.000, mean reward: 0.750 [-1.000, 1.000], mean action: 3.679 [0.000, 6.000], mean observation: 172.584 [25.000, 255.000], loss: 0.244663, mean_absolute_error: 26.912375, mean_q: 31.519845, mean_eps: 0.467515
  592137/2000000: episode: 2164, duration: 11.258s, episode steps: 290, steps per second: 26, episode reward: 209.000, mean reward: 0.721 [-1.000, 1.000], mean action: 3.300 [0.000, 6.000], mean observation: 172.452 [24.000, 255.000], loss: 0.247529, mean_absolute_error: 27.014924, mean_q: 31.639094, mean_eps: 0.467207
  592439/2000000: episode: 2165, dura

  599840/2000000: episode: 2188, duration: 20.581s, episode steps: 354, steps per second: 17, episode reward: 256.800, mean reward: 0.725 [0.100, 1.000], mean action: 3.746 [0.000, 6.000], mean observation: 166.733 [0.000, 255.000], loss: 0.250110, mean_absolute_error: 26.944393, mean_q: 31.547009, mean_eps: 0.460304
  600121/2000000: episode: 2189, duration: 15.299s, episode steps: 281, steps per second: 18, episode reward: 215.500, mean reward: 0.767 [-1.000, 1.000], mean action: 3.480 [0.000, 6.000], mean observation: 172.330 [24.000, 255.000], loss: 0.264645, mean_absolute_error: 26.814208, mean_q: 31.379505, mean_eps: 0.460018
  600479/2000000: episode: 2190, duration: 20.904s, episode steps: 358, steps per second: 17, episode reward: 251.500, mean reward: 0.703 [-1.000, 1.000], mean action: 3.774 [0.000, 6.000], mean observation: 164.738 [0.000, 255.000], loss: 0.282412, mean_absolute_error: 26.982302, mean_q: 31.573982, mean_eps: 0.459730
  600688/2000000: episode: 2191, duratio

  606711/2000000: episode: 2214, duration: 16.917s, episode steps: 303, steps per second: 18, episode reward: 250.400, mean reward: 0.826 [-1.000, 1.000], mean action: 3.726 [0.000, 6.000], mean observation: 172.743 [24.000, 255.000], loss: 0.267282, mean_absolute_error: 27.249730, mean_q: 31.930114, mean_eps: 0.454098
  606907/2000000: episode: 2215, duration: 9.713s, episode steps: 196, steps per second: 20, episode reward: 136.100, mean reward: 0.694 [-1.000, 1.000], mean action: 3.036 [0.000, 6.000], mean observation: 171.621 [25.000, 255.000], loss: 0.254402, mean_absolute_error: 27.011645, mean_q: 31.607292, mean_eps: 0.453873
  607135/2000000: episode: 2216, duration: 11.720s, episode steps: 228, steps per second: 19, episode reward: 157.600, mean reward: 0.691 [-1.000, 1.000], mean action: 3.325 [0.000, 6.000], mean observation: 172.705 [24.000, 255.000], loss: 0.270220, mean_absolute_error: 27.245537, mean_q: 31.907761, mean_eps: 0.453682
  607435/2000000: episode: 2217, durat

  613350/2000000: episode: 2240, duration: 15.965s, episode steps: 290, steps per second: 18, episode reward: 228.400, mean reward: 0.788 [-1.000, 1.000], mean action: 3.648 [0.000, 6.000], mean observation: 172.983 [24.000, 255.000], loss: 0.291870, mean_absolute_error: 27.621274, mean_q: 32.311063, mean_eps: 0.448116
  613582/2000000: episode: 2241, duration: 11.772s, episode steps: 232, steps per second: 20, episode reward: 163.600, mean reward: 0.705 [-1.000, 1.000], mean action: 3.108 [0.000, 6.000], mean observation: 172.523 [23.000, 255.000], loss: 0.314887, mean_absolute_error: 27.761960, mean_q: 32.454868, mean_eps: 0.447881
  613875/2000000: episode: 2242, duration: 16.189s, episode steps: 293, steps per second: 18, episode reward: 256.600, mean reward: 0.876 [-1.000, 1.000], mean action: 3.549 [0.000, 6.000], mean observation: 172.480 [25.000, 255.000], loss: 0.277774, mean_absolute_error: 27.859253, mean_q: 32.577386, mean_eps: 0.447645
  614077/2000000: episode: 2243, dura

  619795/2000000: episode: 2266, duration: 15.924s, episode steps: 287, steps per second: 18, episode reward: 236.100, mean reward: 0.823 [-1.000, 1.000], mean action: 3.627 [0.000, 6.000], mean observation: 172.854 [23.000, 255.000], loss: 0.291668, mean_absolute_error: 27.997126, mean_q: 32.756683, mean_eps: 0.442315
  620017/2000000: episode: 2267, duration: 11.487s, episode steps: 222, steps per second: 19, episode reward: 141.400, mean reward: 0.637 [-1.000, 1.000], mean action: 3.131 [0.000, 6.000], mean observation: 172.501 [25.000, 255.000], loss: 0.321042, mean_absolute_error: 27.617107, mean_q: 32.280242, mean_eps: 0.442085
  620244/2000000: episode: 2268, duration: 11.671s, episode steps: 227, steps per second: 19, episode reward: 156.000, mean reward: 0.687 [-1.000, 1.000], mean action: 3.278 [0.000, 6.000], mean observation: 172.014 [23.000, 255.000], loss: 0.294577, mean_absolute_error: 28.280255, mean_q: 33.066075, mean_eps: 0.441883
  620482/2000000: episode: 2269, dura

  626613/2000000: episode: 2292, duration: 15.307s, episode steps: 283, steps per second: 18, episode reward: 224.700, mean reward: 0.794 [-1.000, 1.000], mean action: 3.675 [0.000, 6.000], mean observation: 172.649 [24.000, 255.000], loss: 0.297879, mean_absolute_error: 27.668825, mean_q: 32.362195, mean_eps: 0.436175
  626810/2000000: episode: 2293, duration: 9.935s, episode steps: 197, steps per second: 20, episode reward: 131.900, mean reward: 0.670 [-1.000, 1.000], mean action: 2.970 [0.000, 6.000], mean observation: 171.797 [24.000, 255.000], loss: 0.275873, mean_absolute_error: 27.820669, mean_q: 32.542995, mean_eps: 0.435959
  627044/2000000: episode: 2294, duration: 12.102s, episode steps: 234, steps per second: 19, episode reward: 158.000, mean reward: 0.675 [-1.000, 1.000], mean action: 3.350 [0.000, 6.000], mean observation: 172.550 [24.000, 255.000], loss: 0.297384, mean_absolute_error: 27.697559, mean_q: 32.362040, mean_eps: 0.435767
  627254/2000000: episode: 2295, durat

  632672/2000000: episode: 2318, duration: 15.554s, episode steps: 283, steps per second: 18, episode reward: 238.100, mean reward: 0.841 [-1.000, 1.000], mean action: 3.565 [0.000, 6.000], mean observation: 173.227 [24.000, 255.000], loss: 0.343811, mean_absolute_error: 27.733598, mean_q: 32.414805, mean_eps: 0.430723
  632974/2000000: episode: 2319, duration: 16.900s, episode steps: 302, steps per second: 18, episode reward: 178.000, mean reward: 0.589 [-1.000, 1.000], mean action: 3.666 [0.000, 6.000], mean observation: 173.067 [24.000, 255.000], loss: 0.337115, mean_absolute_error: 27.682172, mean_q: 32.374213, mean_eps: 0.430460
  633273/2000000: episode: 2320, duration: 16.904s, episode steps: 299, steps per second: 18, episode reward: 209.700, mean reward: 0.701 [-1.000, 1.000], mean action: 3.736 [0.000, 6.000], mean observation: 172.571 [24.000, 255.000], loss: 0.365605, mean_absolute_error: 27.628172, mean_q: 32.343125, mean_eps: 0.430188
  633630/2000000: episode: 2321, dura

  640230/2000000: episode: 2344, duration: 15.737s, episode steps: 284, steps per second: 18, episode reward: 239.900, mean reward: 0.845 [-1.000, 1.000], mean action: 3.750 [0.000, 6.000], mean observation: 173.139 [23.000, 255.000], loss: 0.359768, mean_absolute_error: 28.015380, mean_q: 32.803399, mean_eps: 0.423921
  640527/2000000: episode: 2345, duration: 16.422s, episode steps: 297, steps per second: 18, episode reward: 243.200, mean reward: 0.819 [-1.000, 1.000], mean action: 3.771 [0.000, 6.000], mean observation: 172.645 [23.000, 255.000], loss: 0.370856, mean_absolute_error: 28.173281, mean_q: 32.964673, mean_eps: 0.423660
  640829/2000000: episode: 2346, duration: 17.097s, episode steps: 302, steps per second: 18, episode reward: 236.900, mean reward: 0.784 [-1.000, 1.000], mean action: 3.599 [0.000, 6.000], mean observation: 172.723 [24.000, 255.000], loss: 0.351020, mean_absolute_error: 28.091619, mean_q: 32.866574, mean_eps: 0.423390
  641144/2000000: episode: 2347, dura

  649426/2000000: episode: 2370, duration: 16.934s, episode steps: 304, steps per second: 18, episode reward: 199.000, mean reward: 0.655 [-1.000, 1.000], mean action: 3.717 [0.000, 6.000], mean observation: 172.036 [24.000, 255.000], loss: 0.337760, mean_absolute_error: 27.027800, mean_q: 31.645339, mean_eps: 0.415653
  649654/2000000: episode: 2371, duration: 11.740s, episode steps: 228, steps per second: 19, episode reward: 127.800, mean reward: 0.561 [-1.000, 1.000], mean action: 3.272 [0.000, 6.000], mean observation: 171.751 [23.000, 255.000], loss: 0.335112, mean_absolute_error: 26.733471, mean_q: 31.278610, mean_eps: 0.415414
  650001/2000000: episode: 2372, duration: 20.290s, episode steps: 347, steps per second: 17, episode reward: 236.500, mean reward: 0.682 [-1.000, 1.000], mean action: 3.718 [0.000, 6.000], mean observation: 165.553 [7.000, 255.000], loss: 0.345870, mean_absolute_error: 27.256056, mean_q: 31.918389, mean_eps: 0.415155
  650300/2000000: episode: 2373, durat

  657623/2000000: episode: 2396, duration: 16.706s, episode steps: 300, steps per second: 18, episode reward: 231.900, mean reward: 0.773 [-1.000, 1.000], mean action: 3.707 [0.000, 6.000], mean observation: 172.829 [23.000, 255.000], loss: 0.364267, mean_absolute_error: 27.998162, mean_q: 32.790096, mean_eps: 0.408275
  657974/2000000: episode: 2397, duration: 20.422s, episode steps: 351, steps per second: 17, episode reward: 286.500, mean reward: 0.816 [-1.000, 1.000], mean action: 3.735 [0.000, 6.000], mean observation: 165.535 [0.000, 255.000], loss: 0.351677, mean_absolute_error: 28.054934, mean_q: 32.885462, mean_eps: 0.407982
  658333/2000000: episode: 2398, duration: 20.947s, episode steps: 359, steps per second: 17, episode reward: 270.700, mean reward: 0.754 [0.100, 1.000], mean action: 3.685 [0.000, 6.000], mean observation: 164.888 [0.000, 255.000], loss: 0.376171, mean_absolute_error: 27.630894, mean_q: 32.382684, mean_eps: 0.407661
  658634/2000000: episode: 2399, duratio

  665964/2000000: episode: 2422, duration: 17.233s, episode steps: 305, steps per second: 18, episode reward: 234.700, mean reward: 0.770 [-1.000, 1.000], mean action: 3.590 [0.000, 6.000], mean observation: 172.653 [24.000, 255.000], loss: 0.395278, mean_absolute_error: 28.421818, mean_q: 33.291420, mean_eps: 0.400771
  666251/2000000: episode: 2423, duration: 15.718s, episode steps: 287, steps per second: 18, episode reward: 236.300, mean reward: 0.823 [-1.000, 1.000], mean action: 3.557 [0.000, 6.000], mean observation: 173.012 [24.000, 255.000], loss: 0.399844, mean_absolute_error: 28.225705, mean_q: 33.105022, mean_eps: 0.400505
  666547/2000000: episode: 2424, duration: 16.414s, episode steps: 296, steps per second: 18, episode reward: 248.400, mean reward: 0.839 [-1.000, 1.000], mean action: 3.770 [0.000, 6.000], mean observation: 172.864 [23.000, 255.000], loss: 0.388895, mean_absolute_error: 28.522623, mean_q: 33.417811, mean_eps: 0.400242
  666840/2000000: episode: 2425, dura

  674136/2000000: episode: 2448, duration: 16.270s, episode steps: 295, steps per second: 18, episode reward: 219.000, mean reward: 0.742 [-1.000, 1.000], mean action: 3.725 [0.000, 6.000], mean observation: 171.987 [23.000, 255.000], loss: 0.382340, mean_absolute_error: 28.415926, mean_q: 33.301495, mean_eps: 0.393411
  674498/2000000: episode: 2449, duration: 21.091s, episode steps: 362, steps per second: 17, episode reward: 270.500, mean reward: 0.747 [-1.000, 1.000], mean action: 3.627 [0.000, 6.000], mean observation: 164.734 [0.000, 255.000], loss: 0.435179, mean_absolute_error: 28.898588, mean_q: 33.855133, mean_eps: 0.393116
  674784/2000000: episode: 2450, duration: 15.607s, episode steps: 286, steps per second: 18, episode reward: 240.200, mean reward: 0.840 [-1.000, 1.000], mean action: 3.430 [0.000, 6.000], mean observation: 172.349 [24.000, 255.000], loss: 0.410150, mean_absolute_error: 28.635364, mean_q: 33.497987, mean_eps: 0.392824
  675081/2000000: episode: 2451, durat

  681837/2000000: episode: 2474, duration: 7.743s, episode steps: 163, steps per second: 21, episode reward: 111.000, mean reward: 0.681 [-1.000, 1.000], mean action: 2.816 [0.000, 6.000], mean observation: 171.539 [23.000, 255.000], loss: 0.406093, mean_absolute_error: 28.761659, mean_q: 33.700862, mean_eps: 0.386420
  682070/2000000: episode: 2475, duration: 12.077s, episode steps: 233, steps per second: 19, episode reward: 190.800, mean reward: 0.819 [-1.000, 1.000], mean action: 3.056 [0.000, 6.000], mean observation: 173.143 [24.000, 255.000], loss: 0.374994, mean_absolute_error: 28.990510, mean_q: 33.918479, mean_eps: 0.386241
  682365/2000000: episode: 2476, duration: 16.194s, episode steps: 295, steps per second: 18, episode reward: 225.500, mean reward: 0.764 [-1.000, 1.000], mean action: 3.485 [0.000, 6.000], mean observation: 172.605 [24.000, 255.000], loss: 0.415812, mean_absolute_error: 29.452780, mean_q: 34.487635, mean_eps: 0.386004
  682723/2000000: episode: 2477, durat

  689370/2000000: episode: 2500, duration: 13.019s, episode steps: 247, steps per second: 19, episode reward: 158.200, mean reward: 0.640 [-1.000, 1.000], mean action: 3.656 [0.000, 6.000], mean observation: 173.752 [24.000, 255.000], loss: 0.392638, mean_absolute_error: 29.053292, mean_q: 34.032386, mean_eps: 0.379679
  689725/2000000: episode: 2501, duration: 20.643s, episode steps: 355, steps per second: 17, episode reward: 291.500, mean reward: 0.821 [0.500, 1.000], mean action: 3.628 [0.000, 6.000], mean observation: 166.140 [0.000, 255.000], loss: 0.356868, mean_absolute_error: 29.794266, mean_q: 34.918164, mean_eps: 0.379407
  690071/2000000: episode: 2502, duration: 20.191s, episode steps: 346, steps per second: 17, episode reward: 196.600, mean reward: 0.568 [-1.000, 1.000], mean action: 3.590 [0.000, 6.000], mean observation: 165.917 [0.000, 255.000], loss: 0.409699, mean_absolute_error: 29.351040, mean_q: 34.396454, mean_eps: 0.379092
  690410/2000000: episode: 2503, duratio

  697531/2000000: episode: 2526, duration: 17.058s, episode steps: 306, steps per second: 18, episode reward: 245.700, mean reward: 0.803 [-1.000, 1.000], mean action: 3.837 [0.000, 6.000], mean observation: 171.787 [23.000, 255.000], loss: 0.396909, mean_absolute_error: 29.981127, mean_q: 35.087862, mean_eps: 0.372360
  697826/2000000: episode: 2527, duration: 16.364s, episode steps: 295, steps per second: 18, episode reward: 233.900, mean reward: 0.793 [-1.000, 1.000], mean action: 3.681 [0.000, 6.000], mean observation: 171.943 [24.000, 255.000], loss: 0.392083, mean_absolute_error: 29.696897, mean_q: 34.762717, mean_eps: 0.372090
  698128/2000000: episode: 2528, duration: 16.997s, episode steps: 302, steps per second: 18, episode reward: 254.400, mean reward: 0.842 [-1.000, 1.000], mean action: 3.732 [0.000, 6.000], mean observation: 172.098 [23.000, 255.000], loss: 0.350728, mean_absolute_error: 30.195207, mean_q: 35.340772, mean_eps: 0.371822
  698428/2000000: episode: 2529, dura

  705955/2000000: episode: 2552, duration: 15.524s, episode steps: 280, steps per second: 18, episode reward: 237.700, mean reward: 0.849 [-1.000, 1.000], mean action: 3.536 [0.000, 6.000], mean observation: 172.886 [25.000, 255.000], loss: 0.423091, mean_absolute_error: 31.085293, mean_q: 36.405924, mean_eps: 0.364767
  706246/2000000: episode: 2553, duration: 16.055s, episode steps: 291, steps per second: 18, episode reward: 231.500, mean reward: 0.796 [-1.000, 1.000], mean action: 3.567 [0.000, 6.000], mean observation: 172.559 [24.000, 255.000], loss: 0.381699, mean_absolute_error: 31.536683, mean_q: 36.903392, mean_eps: 0.364510
  706601/2000000: episode: 2554, duration: 20.810s, episode steps: 355, steps per second: 17, episode reward: 296.300, mean reward: 0.835 [0.100, 1.000], mean action: 3.592 [0.000, 6.000], mean observation: 165.179 [0.000, 255.000], loss: 0.384737, mean_absolute_error: 31.094702, mean_q: 36.380914, mean_eps: 0.364218
  706951/2000000: episode: 2555, durati

  714355/2000000: episode: 2578, duration: 15.446s, episode steps: 285, steps per second: 18, episode reward: 208.300, mean reward: 0.731 [-1.000, 1.000], mean action: 3.646 [0.000, 6.000], mean observation: 173.153 [23.000, 255.000], loss: 0.387786, mean_absolute_error: 31.695372, mean_q: 37.111159, mean_eps: 0.357209
  714714/2000000: episode: 2579, duration: 20.872s, episode steps: 359, steps per second: 17, episode reward: 275.200, mean reward: 0.767 [-1.000, 1.000], mean action: 3.593 [0.000, 6.000], mean observation: 164.403 [0.000, 255.000], loss: 0.375040, mean_absolute_error: 31.850569, mean_q: 37.275502, mean_eps: 0.356919
  715006/2000000: episode: 2580, duration: 16.190s, episode steps: 292, steps per second: 18, episode reward: 241.300, mean reward: 0.826 [-1.000, 1.000], mean action: 3.729 [0.000, 6.000], mean observation: 172.791 [23.000, 255.000], loss: 0.404166, mean_absolute_error: 31.453085, mean_q: 36.821897, mean_eps: 0.356626
  715287/2000000: episode: 2581, durat

  722624/2000000: episode: 2604, duration: 24.488s, episode steps: 419, steps per second: 17, episode reward: 292.800, mean reward: 0.699 [-1.000, 1.000], mean action: 3.315 [0.000, 6.000], mean observation: 162.412 [0.000, 255.000], loss: 0.399122, mean_absolute_error: 31.757537, mean_q: 37.149848, mean_eps: 0.349827
  722981/2000000: episode: 2605, duration: 20.561s, episode steps: 357, steps per second: 17, episode reward: 247.100, mean reward: 0.692 [-1.000, 1.000], mean action: 3.854 [0.000, 6.000], mean observation: 164.353 [0.000, 255.000], loss: 0.394880, mean_absolute_error: 31.502807, mean_q: 36.881082, mean_eps: 0.349478
  723269/2000000: episode: 2606, duration: 15.924s, episode steps: 288, steps per second: 18, episode reward: 248.400, mean reward: 0.863 [-1.000, 1.000], mean action: 3.688 [0.000, 6.000], mean observation: 172.584 [21.000, 255.000], loss: 0.394538, mean_absolute_error: 31.900425, mean_q: 37.339049, mean_eps: 0.349187
  723553/2000000: episode: 2607, durati

  730857/2000000: episode: 2630, duration: 16.014s, episode steps: 290, steps per second: 18, episode reward: 222.900, mean reward: 0.769 [-1.000, 1.000], mean action: 3.559 [0.000, 6.000], mean observation: 172.833 [24.000, 255.000], loss: 0.388714, mean_absolute_error: 32.545375, mean_q: 38.079664, mean_eps: 0.342359
  731154/2000000: episode: 2631, duration: 16.690s, episode steps: 297, steps per second: 18, episode reward: 222.900, mean reward: 0.751 [-1.000, 1.000], mean action: 3.640 [0.000, 6.000], mean observation: 172.852 [24.000, 255.000], loss: 0.398677, mean_absolute_error: 32.476865, mean_q: 38.015463, mean_eps: 0.342095
  731513/2000000: episode: 2632, duration: 20.738s, episode steps: 359, steps per second: 17, episode reward: 270.700, mean reward: 0.754 [0.100, 1.000], mean action: 3.727 [0.000, 6.000], mean observation: 166.039 [0.000, 255.000], loss: 0.396170, mean_absolute_error: 32.311382, mean_q: 37.821392, mean_eps: 0.341799
  731793/2000000: episode: 2633, durati

  739338/2000000: episode: 2656, duration: 16.155s, episode steps: 290, steps per second: 18, episode reward: 244.200, mean reward: 0.842 [-1.000, 1.000], mean action: 3.693 [0.000, 6.000], mean observation: 172.765 [23.000, 255.000], loss: 0.355777, mean_absolute_error: 32.463226, mean_q: 38.001713, mean_eps: 0.334727
  739690/2000000: episode: 2657, duration: 20.437s, episode steps: 352, steps per second: 17, episode reward: 288.100, mean reward: 0.818 [0.100, 1.000], mean action: 3.577 [0.000, 6.000], mean observation: 166.388 [0.000, 255.000], loss: 0.359583, mean_absolute_error: 32.441406, mean_q: 37.979812, mean_eps: 0.334437
  739994/2000000: episode: 2658, duration: 17.079s, episode steps: 304, steps per second: 18, episode reward: 237.100, mean reward: 0.780 [-1.000, 1.000], mean action: 3.891 [0.000, 6.000], mean observation: 172.628 [23.000, 255.000], loss: 0.366120, mean_absolute_error: 32.429692, mean_q: 37.997227, mean_eps: 0.334142
  740348/2000000: episode: 2659, durati

  747417/2000000: episode: 2682, duration: 20.062s, episode steps: 345, steps per second: 17, episode reward: 283.600, mean reward: 0.822 [-1.000, 1.000], mean action: 3.841 [0.000, 6.000], mean observation: 165.545 [18.000, 255.000], loss: 0.361459, mean_absolute_error: 32.731312, mean_q: 38.323992, mean_eps: 0.327480
  747704/2000000: episode: 2683, duration: 15.726s, episode steps: 287, steps per second: 18, episode reward: 231.700, mean reward: 0.807 [-1.000, 1.000], mean action: 3.422 [0.000, 6.000], mean observation: 172.723 [24.000, 255.000], loss: 0.339794, mean_absolute_error: 33.213513, mean_q: 38.847273, mean_eps: 0.327196
  748046/2000000: episode: 2684, duration: 19.937s, episode steps: 342, steps per second: 17, episode reward: 238.700, mean reward: 0.698 [-1.000, 1.000], mean action: 3.678 [0.000, 6.000], mean observation: 165.819 [14.000, 255.000], loss: 0.339411, mean_absolute_error: 33.122628, mean_q: 38.747013, mean_eps: 0.326913
  748399/2000000: episode: 2685, dura

  756764/2000000: episode: 2708, duration: 20.157s, episode steps: 346, steps per second: 17, episode reward: 250.500, mean reward: 0.724 [-1.000, 1.000], mean action: 3.728 [0.000, 6.000], mean observation: 165.851 [10.000, 255.000], loss: 0.374949, mean_absolute_error: 33.040796, mean_q: 38.653295, mean_eps: 0.319069
  757115/2000000: episode: 2709, duration: 20.529s, episode steps: 351, steps per second: 17, episode reward: 308.000, mean reward: 0.877 [0.500, 1.000], mean action: 3.587 [0.000, 6.000], mean observation: 165.699 [0.000, 255.000], loss: 0.351499, mean_absolute_error: 32.916182, mean_q: 38.540342, mean_eps: 0.318756
  757466/2000000: episode: 2710, duration: 20.497s, episode steps: 351, steps per second: 17, episode reward: 313.500, mean reward: 0.893 [0.500, 1.000], mean action: 3.530 [0.000, 6.000], mean observation: 166.068 [0.000, 255.000], loss: 0.359435, mean_absolute_error: 33.051482, mean_q: 38.680674, mean_eps: 0.318439
  757817/2000000: episode: 2711, duration

  765702/2000000: episode: 2734, duration: 20.482s, episode steps: 354, steps per second: 17, episode reward: 269.500, mean reward: 0.761 [0.500, 1.000], mean action: 3.576 [0.000, 6.000], mean observation: 166.761 [14.000, 255.000], loss: 0.339152, mean_absolute_error: 33.451551, mean_q: 39.164745, mean_eps: 0.311028
  766057/2000000: episode: 2735, duration: 20.638s, episode steps: 355, steps per second: 17, episode reward: 297.500, mean reward: 0.838 [0.500, 1.000], mean action: 3.513 [0.000, 6.000], mean observation: 165.680 [0.000, 255.000], loss: 0.356861, mean_absolute_error: 33.477586, mean_q: 39.224729, mean_eps: 0.310708
  766408/2000000: episode: 2736, duration: 20.351s, episode steps: 351, steps per second: 17, episode reward: 298.500, mean reward: 0.850 [0.500, 1.000], mean action: 3.581 [0.000, 6.000], mean observation: 166.508 [0.000, 255.000], loss: 0.350931, mean_absolute_error: 33.761437, mean_q: 39.554903, mean_eps: 0.310391
  766767/2000000: episode: 2737, duration:

  774210/2000000: episode: 2760, duration: 19.997s, episode steps: 344, steps per second: 17, episode reward: 282.700, mean reward: 0.822 [-1.000, 1.000], mean action: 3.715 [0.000, 6.000], mean observation: 166.115 [7.000, 255.000], loss: 0.364890, mean_absolute_error: 33.899478, mean_q: 39.701196, mean_eps: 0.303366
  774569/2000000: episode: 2761, duration: 20.694s, episode steps: 359, steps per second: 17, episode reward: 253.200, mean reward: 0.705 [0.100, 1.000], mean action: 3.607 [0.000, 6.000], mean observation: 166.601 [3.000, 255.000], loss: 0.319066, mean_absolute_error: 34.069025, mean_q: 39.874618, mean_eps: 0.303049
  774923/2000000: episode: 2762, duration: 20.429s, episode steps: 354, steps per second: 17, episode reward: 289.800, mean reward: 0.819 [0.100, 1.000], mean action: 3.664 [0.000, 6.000], mean observation: 166.450 [1.000, 255.000], loss: 0.321420, mean_absolute_error: 34.244872, mean_q: 40.082772, mean_eps: 0.302729
  775277/2000000: episode: 2763, duration:

  783281/2000000: episode: 2786, duration: 20.414s, episode steps: 354, steps per second: 17, episode reward: 314.000, mean reward: 0.887 [0.500, 1.000], mean action: 3.602 [0.000, 6.000], mean observation: 166.697 [0.000, 255.000], loss: 0.295080, mean_absolute_error: 34.407079, mean_q: 40.279484, mean_eps: 0.295206
  783585/2000000: episode: 2787, duration: 17.248s, episode steps: 304, steps per second: 18, episode reward: 255.300, mean reward: 0.840 [-1.000, 1.000], mean action: 3.806 [0.000, 6.000], mean observation: 172.660 [23.000, 255.000], loss: 0.296291, mean_absolute_error: 34.469472, mean_q: 40.369290, mean_eps: 0.294909
  783941/2000000: episode: 2788, duration: 20.548s, episode steps: 356, steps per second: 17, episode reward: 275.100, mean reward: 0.773 [0.100, 1.000], mean action: 3.576 [0.000, 6.000], mean observation: 167.072 [13.000, 255.000], loss: 0.294783, mean_absolute_error: 34.604783, mean_q: 40.522259, mean_eps: 0.294612
  784290/2000000: episode: 2789, duratio

  792305/2000000: episode: 2812, duration: 20.532s, episode steps: 354, steps per second: 17, episode reward: 313.500, mean reward: 0.886 [0.500, 1.000], mean action: 3.517 [0.000, 6.000], mean observation: 165.686 [13.000, 255.000], loss: 0.293907, mean_absolute_error: 35.545514, mean_q: 41.608722, mean_eps: 0.287085
  792584/2000000: episode: 2813, duration: 15.008s, episode steps: 279, steps per second: 19, episode reward: 204.100, mean reward: 0.732 [-1.000, 1.000], mean action: 3.498 [0.000, 6.000], mean observation: 172.688 [23.000, 255.000], loss: 0.315865, mean_absolute_error: 35.116819, mean_q: 41.109431, mean_eps: 0.286800
  792941/2000000: episode: 2814, duration: 20.605s, episode steps: 357, steps per second: 17, episode reward: 287.000, mean reward: 0.804 [0.500, 1.000], mean action: 3.599 [0.000, 6.000], mean observation: 165.245 [0.000, 255.000], loss: 0.336078, mean_absolute_error: 35.586796, mean_q: 41.651415, mean_eps: 0.286514
  793300/2000000: episode: 2815, duratio

  801199/2000000: episode: 2838, duration: 20.593s, episode steps: 357, steps per second: 17, episode reward: 313.500, mean reward: 0.878 [0.500, 1.000], mean action: 3.633 [0.000, 6.000], mean observation: 165.586 [6.000, 255.000], loss: 0.267370, mean_absolute_error: 36.125149, mean_q: 42.278158, mean_eps: 0.279082
  801494/2000000: episode: 2839, duration: 16.701s, episode steps: 295, steps per second: 18, episode reward: 218.500, mean reward: 0.741 [-1.000, 1.000], mean action: 3.705 [0.000, 6.000], mean observation: 173.120 [25.000, 255.000], loss: 0.291644, mean_absolute_error: 35.771316, mean_q: 41.902854, mean_eps: 0.278789
  801846/2000000: episode: 2840, duration: 20.508s, episode steps: 352, steps per second: 17, episode reward: 228.900, mean reward: 0.650 [0.100, 1.000], mean action: 3.619 [0.000, 6.000], mean observation: 167.223 [0.000, 255.000], loss: 0.284107, mean_absolute_error: 35.737342, mean_q: 41.838859, mean_eps: 0.278497
  802196/2000000: episode: 2841, duration

  810302/2000000: episode: 2864, duration: 20.524s, episode steps: 352, steps per second: 17, episode reward: 307.000, mean reward: 0.872 [0.500, 1.000], mean action: 3.602 [0.000, 6.000], mean observation: 165.448 [0.000, 255.000], loss: 0.265890, mean_absolute_error: 36.598688, mean_q: 42.850726, mean_eps: 0.270887
  810658/2000000: episode: 2865, duration: 20.545s, episode steps: 356, steps per second: 17, episode reward: 267.200, mean reward: 0.751 [0.100, 1.000], mean action: 3.691 [0.000, 6.000], mean observation: 167.036 [6.000, 255.000], loss: 0.260869, mean_absolute_error: 36.837672, mean_q: 43.113931, mean_eps: 0.270568
  811014/2000000: episode: 2866, duration: 20.631s, episode steps: 356, steps per second: 17, episode reward: 304.500, mean reward: 0.855 [0.100, 1.000], mean action: 3.587 [0.000, 6.000], mean observation: 165.204 [0.000, 255.000], loss: 0.266445, mean_absolute_error: 36.269488, mean_q: 42.476274, mean_eps: 0.270248
  811371/2000000: episode: 2867, duration: 

  819435/2000000: episode: 2890, duration: 20.490s, episode steps: 353, steps per second: 17, episode reward: 325.000, mean reward: 0.921 [0.500, 1.000], mean action: 3.513 [0.000, 6.000], mean observation: 165.476 [0.000, 255.000], loss: 0.261408, mean_absolute_error: 36.816944, mean_q: 43.080727, mean_eps: 0.262668
  819790/2000000: episode: 2891, duration: 20.546s, episode steps: 355, steps per second: 17, episode reward: 309.500, mean reward: 0.872 [0.500, 1.000], mean action: 3.575 [0.000, 6.000], mean observation: 165.424 [0.000, 255.000], loss: 0.244278, mean_absolute_error: 36.554442, mean_q: 42.783324, mean_eps: 0.262349
  820143/2000000: episode: 2892, duration: 20.453s, episode steps: 353, steps per second: 17, episode reward: 325.000, mean reward: 0.921 [0.500, 1.000], mean action: 3.428 [0.000, 6.000], mean observation: 166.199 [7.000, 255.000], loss: 0.258488, mean_absolute_error: 36.777620, mean_q: 43.043319, mean_eps: 0.262031
  820498/2000000: episode: 2893, duration: 

  828528/2000000: episode: 2916, duration: 21.248s, episode steps: 365, steps per second: 17, episode reward: 274.900, mean reward: 0.753 [-1.000, 1.000], mean action: 3.597 [0.000, 6.000], mean observation: 164.428 [0.000, 255.000], loss: 0.222285, mean_absolute_error: 37.543277, mean_q: 43.938154, mean_eps: 0.254490
  828822/2000000: episode: 2917, duration: 16.511s, episode steps: 294, steps per second: 18, episode reward: 252.600, mean reward: 0.859 [-1.000, 1.000], mean action: 3.585 [0.000, 6.000], mean observation: 172.735 [23.000, 255.000], loss: 0.258931, mean_absolute_error: 37.084000, mean_q: 43.378464, mean_eps: 0.254193
  829177/2000000: episode: 2918, duration: 20.588s, episode steps: 355, steps per second: 17, episode reward: 284.000, mean reward: 0.800 [0.500, 1.000], mean action: 3.665 [0.000, 6.000], mean observation: 166.329 [0.000, 255.000], loss: 0.234419, mean_absolute_error: 37.026277, mean_q: 43.333217, mean_eps: 0.253900
  829460/2000000: episode: 2919, duratio

  837018/2000000: episode: 2942, duration: 20.467s, episode steps: 351, steps per second: 17, episode reward: 276.500, mean reward: 0.788 [0.100, 1.000], mean action: 3.701 [0.000, 6.000], mean observation: 167.084 [0.000, 255.000], loss: 0.225690, mean_absolute_error: 37.551640, mean_q: 43.936218, mean_eps: 0.246842
  837298/2000000: episode: 2943, duration: 15.357s, episode steps: 280, steps per second: 18, episode reward: 241.600, mean reward: 0.863 [-1.000, 1.000], mean action: 3.657 [0.000, 6.000], mean observation: 172.967 [25.000, 255.000], loss: 0.255201, mean_absolute_error: 37.882387, mean_q: 44.314984, mean_eps: 0.246558
  837581/2000000: episode: 2944, duration: 15.733s, episode steps: 283, steps per second: 18, episode reward: 243.500, mean reward: 0.860 [-1.000, 1.000], mean action: 3.604 [0.000, 6.000], mean observation: 172.549 [20.000, 255.000], loss: 0.245611, mean_absolute_error: 37.222305, mean_q: 43.556193, mean_eps: 0.246304
  837931/2000000: episode: 2945, durati

  845820/2000000: episode: 2968, duration: 20.396s, episode steps: 347, steps per second: 17, episode reward: 310.500, mean reward: 0.895 [0.500, 1.000], mean action: 3.452 [0.000, 6.000], mean observation: 165.635 [0.000, 255.000], loss: 0.250568, mean_absolute_error: 37.965332, mean_q: 44.479183, mean_eps: 0.238919
  846172/2000000: episode: 2969, duration: 20.546s, episode steps: 352, steps per second: 17, episode reward: 311.000, mean reward: 0.884 [0.500, 1.000], mean action: 3.628 [0.000, 6.000], mean observation: 165.039 [0.000, 255.000], loss: 0.285233, mean_absolute_error: 37.434004, mean_q: 43.852898, mean_eps: 0.238605
  846528/2000000: episode: 2970, duration: 20.673s, episode steps: 356, steps per second: 17, episode reward: 296.200, mean reward: 0.832 [0.100, 1.000], mean action: 3.750 [0.000, 6.000], mean observation: 165.198 [0.000, 255.000], loss: 0.244818, mean_absolute_error: 37.642684, mean_q: 44.072533, mean_eps: 0.238287
  846881/2000000: episode: 2971, duration: 

  854846/2000000: episode: 2994, duration: 20.460s, episode steps: 352, steps per second: 17, episode reward: 314.000, mean reward: 0.892 [0.500, 1.000], mean action: 3.460 [0.000, 6.000], mean observation: 166.406 [1.000, 255.000], loss: 0.245282, mean_absolute_error: 38.290943, mean_q: 44.800861, mean_eps: 0.230797
  855118/2000000: episode: 2995, duration: 14.633s, episode steps: 272, steps per second: 19, episode reward: 214.400, mean reward: 0.788 [-1.000, 1.000], mean action: 3.629 [0.000, 6.000], mean observation: 173.754 [25.000, 255.000], loss: 0.236857, mean_absolute_error: 38.196090, mean_q: 44.688373, mean_eps: 0.230516
  855474/2000000: episode: 2996, duration: 20.765s, episode steps: 356, steps per second: 17, episode reward: 275.000, mean reward: 0.772 [0.500, 1.000], mean action: 3.705 [0.000, 6.000], mean observation: 165.431 [0.000, 255.000], loss: 0.235522, mean_absolute_error: 38.210823, mean_q: 44.701007, mean_eps: 0.230234
  855824/2000000: episode: 2997, duration

  863779/2000000: episode: 3020, duration: 20.611s, episode steps: 351, steps per second: 17, episode reward: 308.000, mean reward: 0.877 [0.500, 1.000], mean action: 3.547 [0.000, 6.000], mean observation: 165.513 [0.000, 255.000], loss: 0.247118, mean_absolute_error: 38.550245, mean_q: 45.138555, mean_eps: 0.222758
  864131/2000000: episode: 3021, duration: 20.704s, episode steps: 352, steps per second: 17, episode reward: 268.100, mean reward: 0.762 [0.100, 1.000], mean action: 3.432 [0.000, 6.000], mean observation: 165.514 [0.000, 255.000], loss: 0.245777, mean_absolute_error: 38.639824, mean_q: 45.213933, mean_eps: 0.222441
  864482/2000000: episode: 3022, duration: 20.546s, episode steps: 351, steps per second: 17, episode reward: 311.000, mean reward: 0.886 [0.500, 1.000], mean action: 3.427 [0.000, 6.000], mean observation: 165.339 [0.000, 255.000], loss: 0.276802, mean_absolute_error: 38.681555, mean_q: 45.270365, mean_eps: 0.222125
  864678/2000000: episode: 3023, duration: 

  873891/2000000: episode: 3046, duration: 20.541s, episode steps: 353, steps per second: 17, episode reward: 327.500, mean reward: 0.928 [0.500, 1.000], mean action: 3.550 [0.000, 6.000], mean observation: 165.935 [19.000, 255.000], loss: 0.214457, mean_absolute_error: 38.205205, mean_q: 44.720751, mean_eps: 0.213657
  874241/2000000: episode: 3047, duration: 20.413s, episode steps: 350, steps per second: 17, episode reward: 327.500, mean reward: 0.936 [0.500, 1.000], mean action: 3.637 [0.000, 6.000], mean observation: 165.413 [0.000, 255.000], loss: 0.237965, mean_absolute_error: 38.027808, mean_q: 44.508303, mean_eps: 0.213341
  874594/2000000: episode: 3048, duration: 20.533s, episode steps: 353, steps per second: 17, episode reward: 314.500, mean reward: 0.891 [0.500, 1.000], mean action: 3.609 [0.000, 6.000], mean observation: 165.189 [0.000, 255.000], loss: 0.233641, mean_absolute_error: 38.365928, mean_q: 44.886367, mean_eps: 0.213024
  874885/2000000: episode: 3049, duration:

  882850/2000000: episode: 3072, duration: 10.892s, episode steps: 214, steps per second: 20, episode reward: 162.100, mean reward: 0.757 [-1.000, 1.000], mean action: 2.939 [0.000, 6.000], mean observation: 172.124 [22.000, 255.000], loss: 0.256831, mean_absolute_error: 38.676426, mean_q: 45.272534, mean_eps: 0.205532
  883199/2000000: episode: 3073, duration: 20.424s, episode steps: 349, steps per second: 17, episode reward: 292.000, mean reward: 0.837 [0.500, 1.000], mean action: 3.613 [0.000, 6.000], mean observation: 166.135 [3.000, 255.000], loss: 0.231940, mean_absolute_error: 38.608845, mean_q: 45.196931, mean_eps: 0.205278
  883555/2000000: episode: 3074, duration: 20.732s, episode steps: 356, steps per second: 17, episode reward: 264.600, mean reward: 0.743 [0.100, 1.000], mean action: 3.508 [0.000, 6.000], mean observation: 167.728 [23.000, 255.000], loss: 0.223505, mean_absolute_error: 38.685017, mean_q: 45.301715, mean_eps: 0.204962
  883910/2000000: episode: 3075, duratio

  891978/2000000: episode: 3098, duration: 20.532s, episode steps: 353, steps per second: 17, episode reward: 321.500, mean reward: 0.911 [0.500, 1.000], mean action: 3.589 [0.000, 6.000], mean observation: 166.158 [18.000, 255.000], loss: 0.203550, mean_absolute_error: 39.803480, mean_q: 46.589099, mean_eps: 0.197378
  892328/2000000: episode: 3099, duration: 20.437s, episode steps: 350, steps per second: 17, episode reward: 319.500, mean reward: 0.913 [0.500, 1.000], mean action: 3.660 [0.000, 6.000], mean observation: 165.838 [1.000, 255.000], loss: 0.225486, mean_absolute_error: 39.574156, mean_q: 46.335679, mean_eps: 0.197063
  892678/2000000: episode: 3100, duration: 20.425s, episode steps: 350, steps per second: 17, episode reward: 321.500, mean reward: 0.919 [0.500, 1.000], mean action: 3.600 [0.000, 6.000], mean observation: 166.459 [17.000, 255.000], loss: 0.202454, mean_absolute_error: 39.819426, mean_q: 46.614702, mean_eps: 0.196748
  893030/2000000: episode: 3101, duration

  900978/2000000: episode: 3124, duration: 20.487s, episode steps: 350, steps per second: 17, episode reward: 327.000, mean reward: 0.934 [0.500, 1.000], mean action: 3.591 [0.000, 6.000], mean observation: 167.048 [25.000, 255.000], loss: 0.202752, mean_absolute_error: 40.797218, mean_q: 47.756953, mean_eps: 0.189278
  901329/2000000: episode: 3125, duration: 20.452s, episode steps: 351, steps per second: 17, episode reward: 313.000, mean reward: 0.892 [0.500, 1.000], mean action: 3.652 [0.000, 6.000], mean observation: 166.262 [0.000, 255.000], loss: 0.232710, mean_absolute_error: 40.215284, mean_q: 47.070352, mean_eps: 0.188961
  901682/2000000: episode: 3126, duration: 20.617s, episode steps: 353, steps per second: 17, episode reward: 326.500, mean reward: 0.925 [0.500, 1.000], mean action: 3.450 [0.000, 6.000], mean observation: 166.493 [21.000, 255.000], loss: 0.189812, mean_absolute_error: 40.331808, mean_q: 47.206876, mean_eps: 0.188645
  902032/2000000: episode: 3127, duration

  910027/2000000: episode: 3150, duration: 20.585s, episode steps: 355, steps per second: 17, episode reward: 330.000, mean reward: 0.930 [0.500, 1.000], mean action: 3.606 [0.000, 6.000], mean observation: 166.571 [23.000, 255.000], loss: 0.192239, mean_absolute_error: 40.687069, mean_q: 47.614819, mean_eps: 0.181137
  910384/2000000: episode: 3151, duration: 20.667s, episode steps: 357, steps per second: 17, episode reward: 315.500, mean reward: 0.884 [0.500, 1.000], mean action: 3.451 [0.000, 6.000], mean observation: 166.266 [25.000, 255.000], loss: 0.242676, mean_absolute_error: 40.916323, mean_q: 47.892025, mean_eps: 0.180816
  910738/2000000: episode: 3152, duration: 20.617s, episode steps: 354, steps per second: 17, episode reward: 310.500, mean reward: 0.877 [0.500, 1.000], mean action: 3.545 [0.000, 6.000], mean observation: 166.258 [25.000, 255.000], loss: 0.216471, mean_absolute_error: 41.168841, mean_q: 48.176425, mean_eps: 0.180496
  911093/2000000: episode: 3153, duratio

  919226/2000000: episode: 3176, duration: 21.354s, episode steps: 361, steps per second: 17, episode reward: 300.700, mean reward: 0.833 [-1.000, 1.000], mean action: 3.626 [0.000, 6.000], mean observation: 163.342 [0.000, 255.000], loss: 0.209545, mean_absolute_error: 41.562041, mean_q: 48.633391, mean_eps: 0.172859
  919578/2000000: episode: 3177, duration: 20.531s, episode steps: 352, steps per second: 17, episode reward: 324.000, mean reward: 0.920 [0.500, 1.000], mean action: 3.449 [0.000, 6.000], mean observation: 166.601 [25.000, 255.000], loss: 0.207388, mean_absolute_error: 41.757093, mean_q: 48.869708, mean_eps: 0.172538
  919932/2000000: episode: 3178, duration: 20.612s, episode steps: 354, steps per second: 17, episode reward: 320.000, mean reward: 0.904 [0.500, 1.000], mean action: 3.624 [0.000, 6.000], mean observation: 166.464 [20.000, 255.000], loss: 0.217811, mean_absolute_error: 41.674449, mean_q: 48.762900, mean_eps: 0.172221
  920284/2000000: episode: 3179, duratio

  928380/2000000: episode: 3202, duration: 20.478s, episode steps: 350, steps per second: 17, episode reward: 310.500, mean reward: 0.887 [0.500, 1.000], mean action: 3.711 [0.000, 6.000], mean observation: 166.217 [1.000, 255.000], loss: 0.226220, mean_absolute_error: 41.978048, mean_q: 49.116046, mean_eps: 0.164616
  928734/2000000: episode: 3203, duration: 20.684s, episode steps: 354, steps per second: 17, episode reward: 318.800, mean reward: 0.901 [0.100, 1.000], mean action: 3.565 [0.000, 6.000], mean observation: 167.253 [23.000, 255.000], loss: 0.200774, mean_absolute_error: 42.046065, mean_q: 49.202158, mean_eps: 0.164300
  929087/2000000: episode: 3204, duration: 20.507s, episode steps: 353, steps per second: 17, episode reward: 306.500, mean reward: 0.868 [0.500, 1.000], mean action: 3.592 [0.000, 6.000], mean observation: 166.746 [14.000, 255.000], loss: 0.216069, mean_absolute_error: 42.024460, mean_q: 49.169443, mean_eps: 0.163981
  929439/2000000: episode: 3205, duration

  937537/2000000: episode: 3228, duration: 20.396s, episode steps: 352, steps per second: 17, episode reward: 310.000, mean reward: 0.881 [0.500, 1.000], mean action: 3.568 [0.000, 6.000], mean observation: 166.161 [0.000, 255.000], loss: 0.209510, mean_absolute_error: 42.455018, mean_q: 49.667092, mean_eps: 0.156374
  937890/2000000: episode: 3229, duration: 20.449s, episode steps: 353, steps per second: 17, episode reward: 320.500, mean reward: 0.908 [0.500, 1.000], mean action: 3.705 [0.000, 6.000], mean observation: 165.986 [0.000, 255.000], loss: 0.215097, mean_absolute_error: 42.674292, mean_q: 49.935661, mean_eps: 0.156057
  938243/2000000: episode: 3230, duration: 20.605s, episode steps: 353, steps per second: 17, episode reward: 294.000, mean reward: 0.833 [0.500, 1.000], mean action: 3.567 [0.000, 6.000], mean observation: 165.281 [0.000, 255.000], loss: 0.192993, mean_absolute_error: 42.562495, mean_q: 49.824374, mean_eps: 0.155741
  938600/2000000: episode: 3231, duration: 

  946711/2000000: episode: 3254, duration: 20.429s, episode steps: 349, steps per second: 17, episode reward: 321.500, mean reward: 0.921 [0.500, 1.000], mean action: 3.413 [0.000, 6.000], mean observation: 166.979 [23.000, 255.000], loss: 0.187067, mean_absolute_error: 42.659793, mean_q: 49.943908, mean_eps: 0.148118
  947061/2000000: episode: 3255, duration: 20.516s, episode steps: 350, steps per second: 17, episode reward: 316.500, mean reward: 0.904 [0.500, 1.000], mean action: 3.406 [0.000, 6.000], mean observation: 166.839 [24.000, 255.000], loss: 0.210401, mean_absolute_error: 42.963073, mean_q: 50.272022, mean_eps: 0.147803
  947414/2000000: episode: 3256, duration: 20.516s, episode steps: 353, steps per second: 17, episode reward: 323.000, mean reward: 0.915 [0.500, 1.000], mean action: 3.445 [0.000, 6.000], mean observation: 166.914 [22.000, 255.000], loss: 0.196789, mean_absolute_error: 43.040704, mean_q: 50.373034, mean_eps: 0.147486
  947766/2000000: episode: 3257, duratio

  955891/2000000: episode: 3280, duration: 20.616s, episode steps: 356, steps per second: 17, episode reward: 313.500, mean reward: 0.881 [0.500, 1.000], mean action: 3.494 [0.000, 6.000], mean observation: 166.206 [24.000, 255.000], loss: 0.184367, mean_absolute_error: 43.258235, mean_q: 50.640577, mean_eps: 0.139859
  956230/2000000: episode: 3281, duration: 19.906s, episode steps: 339, steps per second: 17, episode reward: 263.800, mean reward: 0.778 [-1.000, 1.000], mean action: 3.761 [0.000, 6.000], mean observation: 166.342 [0.000, 255.000], loss: 0.179357, mean_absolute_error: 43.185809, mean_q: 50.521057, mean_eps: 0.139546
  956583/2000000: episode: 3282, duration: 20.544s, episode steps: 353, steps per second: 17, episode reward: 282.800, mean reward: 0.801 [0.100, 1.000], mean action: 3.637 [0.000, 6.000], mean observation: 167.104 [24.000, 255.000], loss: 0.188515, mean_absolute_error: 42.747814, mean_q: 50.043114, mean_eps: 0.139235
  956941/2000000: episode: 3283, duratio

  967728/2000000: episode: 3306, duration: 20.466s, episode steps: 477, steps per second: 23, episode reward: 439.000, mean reward: 0.920 [0.500, 1.000], mean action: 3.361 [0.000, 6.000], mean observation: 166.074 [18.000, 255.000], loss: 0.171252, mean_absolute_error: 43.089849, mean_q: 50.416749, mean_eps: 0.129261
  968202/2000000: episode: 3307, duration: 20.480s, episode steps: 474, steps per second: 23, episode reward: 425.000, mean reward: 0.897 [0.500, 1.000], mean action: 3.477 [0.000, 6.000], mean observation: 166.785 [24.000, 255.000], loss: 0.162591, mean_absolute_error: 43.342194, mean_q: 50.707299, mean_eps: 0.128832
  968681/2000000: episode: 3308, duration: 20.573s, episode steps: 479, steps per second: 23, episode reward: 401.400, mean reward: 0.838 [0.100, 1.000], mean action: 3.622 [0.000, 6.000], mean observation: 167.103 [24.000, 255.000], loss: 0.173191, mean_absolute_error: 43.013055, mean_q: 50.353631, mean_eps: 0.128402
  969153/2000000: episode: 3309, duratio

  980133/2000000: episode: 3332, duration: 20.424s, episode steps: 478, steps per second: 23, episode reward: 430.500, mean reward: 0.901 [0.500, 1.000], mean action: 3.600 [0.000, 6.000], mean observation: 166.847 [24.000, 255.000], loss: 0.157701, mean_absolute_error: 44.064288, mean_q: 51.587357, mean_eps: 0.118095
  980614/2000000: episode: 3333, duration: 20.470s, episode steps: 481, steps per second: 23, episode reward: 396.500, mean reward: 0.824 [0.500, 1.000], mean action: 3.347 [0.000, 6.000], mean observation: 167.208 [25.000, 255.000], loss: 0.148320, mean_absolute_error: 43.857651, mean_q: 51.341583, mean_eps: 0.117663
  981097/2000000: episode: 3334, duration: 20.606s, episode steps: 483, steps per second: 23, episode reward: 447.700, mean reward: 0.927 [0.100, 1.000], mean action: 3.327 [0.000, 6.000], mean observation: 167.062 [24.000, 255.000], loss: 0.146760, mean_absolute_error: 43.969156, mean_q: 51.458430, mean_eps: 0.117230
  981573/2000000: episode: 3335, duratio

  992612/2000000: episode: 3358, duration: 20.503s, episode steps: 482, steps per second: 24, episode reward: 446.000, mean reward: 0.925 [0.500, 1.000], mean action: 3.438 [0.000, 6.000], mean observation: 166.331 [25.000, 255.000], loss: 0.147905, mean_absolute_error: 43.705773, mean_q: 51.114933, mean_eps: 0.106867
  993091/2000000: episode: 3359, duration: 20.427s, episode steps: 479, steps per second: 23, episode reward: 410.500, mean reward: 0.857 [0.500, 1.000], mean action: 3.641 [0.000, 6.000], mean observation: 166.162 [16.000, 255.000], loss: 0.125815, mean_absolute_error: 44.161824, mean_q: 51.653993, mean_eps: 0.106435
  993574/2000000: episode: 3360, duration: 20.574s, episode steps: 483, steps per second: 23, episode reward: 443.000, mean reward: 0.917 [0.500, 1.000], mean action: 3.493 [0.000, 6.000], mean observation: 166.001 [24.000, 255.000], loss: 0.136093, mean_absolute_error: 43.894111, mean_q: 51.351697, mean_eps: 0.106001
  994047/2000000: episode: 3361, duratio

 1005135/2000000: episode: 3384, duration: 20.565s, episode steps: 478, steps per second: 23, episode reward: 377.000, mean reward: 0.789 [0.500, 1.000], mean action: 3.490 [0.000, 6.000], mean observation: 165.977 [0.000, 255.000], loss: 0.143646, mean_absolute_error: 43.877070, mean_q: 51.361808, mean_eps: 0.100000
 1005323/2000000: episode: 3385, duration: 7.583s, episode steps: 188, steps per second: 25, episode reward: 108.800, mean reward: 0.579 [-1.000, 1.000], mean action: 3.181 [0.000, 6.000], mean observation: 172.989 [23.000, 255.000], loss: 0.116543, mean_absolute_error: 44.091028, mean_q: 51.564943, mean_eps: 0.100000
 1005806/2000000: episode: 3386, duration: 20.558s, episode steps: 483, steps per second: 23, episode reward: 376.500, mean reward: 0.780 [0.500, 1.000], mean action: 3.518 [0.000, 6.000], mean observation: 165.616 [0.000, 255.000], loss: 0.125298, mean_absolute_error: 44.471466, mean_q: 52.015066, mean_eps: 0.100000
 1006288/2000000: episode: 3387, duration:

 1016490/2000000: episode: 3410, duration: 20.399s, episode steps: 475, steps per second: 23, episode reward: 396.800, mean reward: 0.835 [-1.000, 1.000], mean action: 3.815 [0.000, 6.000], mean observation: 164.772 [0.000, 255.000], loss: 0.135402, mean_absolute_error: 44.096816, mean_q: 51.586624, mean_eps: 0.100000
 1016978/2000000: episode: 3411, duration: 20.635s, episode steps: 488, steps per second: 24, episode reward: 347.500, mean reward: 0.712 [0.500, 1.000], mean action: 3.424 [0.000, 6.000], mean observation: 165.786 [25.000, 255.000], loss: 0.132312, mean_absolute_error: 44.342124, mean_q: 51.873038, mean_eps: 0.100000
 1017462/2000000: episode: 3412, duration: 20.603s, episode steps: 484, steps per second: 23, episode reward: 428.000, mean reward: 0.884 [0.500, 1.000], mean action: 3.564 [0.000, 6.000], mean observation: 166.632 [24.000, 255.000], loss: 0.135682, mean_absolute_error: 44.291265, mean_q: 51.807589, mean_eps: 0.100000
 1017946/2000000: episode: 3413, duratio

 1028906/2000000: episode: 3436, duration: 20.588s, episode steps: 488, steps per second: 24, episode reward: 426.500, mean reward: 0.874 [0.500, 1.000], mean action: 3.459 [0.000, 6.000], mean observation: 166.202 [24.000, 255.000], loss: 0.137907, mean_absolute_error: 44.715321, mean_q: 52.325503, mean_eps: 0.100000
 1029380/2000000: episode: 3437, duration: 20.507s, episode steps: 474, steps per second: 23, episode reward: 429.000, mean reward: 0.905 [0.500, 1.000], mean action: 3.565 [0.000, 6.000], mean observation: 166.510 [25.000, 255.000], loss: 0.116890, mean_absolute_error: 45.013143, mean_q: 52.658590, mean_eps: 0.100000
 1029862/2000000: episode: 3438, duration: 20.583s, episode steps: 482, steps per second: 23, episode reward: 425.500, mean reward: 0.883 [0.500, 1.000], mean action: 3.415 [0.000, 6.000], mean observation: 165.813 [20.000, 255.000], loss: 0.130887, mean_absolute_error: 45.030446, mean_q: 52.683123, mean_eps: 0.100000
 1030343/2000000: episode: 3439, duratio

 1041464/2000000: episode: 3462, duration: 20.678s, episode steps: 486, steps per second: 24, episode reward: 433.500, mean reward: 0.892 [0.500, 1.000], mean action: 3.391 [0.000, 6.000], mean observation: 167.118 [25.000, 255.000], loss: 0.126870, mean_absolute_error: 45.232261, mean_q: 52.935271, mean_eps: 0.100000
 1041950/2000000: episode: 3463, duration: 20.594s, episode steps: 486, steps per second: 24, episode reward: 417.000, mean reward: 0.858 [0.500, 1.000], mean action: 3.564 [0.000, 6.000], mean observation: 166.756 [23.000, 255.000], loss: 0.138133, mean_absolute_error: 44.945380, mean_q: 52.603531, mean_eps: 0.100000
 1042432/2000000: episode: 3464, duration: 20.612s, episode steps: 482, steps per second: 23, episode reward: 430.500, mean reward: 0.893 [0.500, 1.000], mean action: 3.527 [0.000, 6.000], mean observation: 166.448 [24.000, 255.000], loss: 0.124889, mean_absolute_error: 45.451788, mean_q: 53.187132, mean_eps: 0.100000
 1042910/2000000: episode: 3465, duratio

 1053994/2000000: episode: 3488, duration: 20.569s, episode steps: 484, steps per second: 24, episode reward: 422.000, mean reward: 0.872 [0.500, 1.000], mean action: 3.607 [0.000, 6.000], mean observation: 166.102 [25.000, 255.000], loss: 0.121933, mean_absolute_error: 45.700130, mean_q: 53.458144, mean_eps: 0.100000
 1054477/2000000: episode: 3489, duration: 20.624s, episode steps: 483, steps per second: 23, episode reward: 415.500, mean reward: 0.860 [0.500, 1.000], mean action: 3.511 [0.000, 6.000], mean observation: 166.066 [24.000, 255.000], loss: 0.117648, mean_absolute_error: 45.555476, mean_q: 53.293471, mean_eps: 0.100000
 1054955/2000000: episode: 3490, duration: 20.486s, episode steps: 478, steps per second: 23, episode reward: 415.500, mean reward: 0.869 [0.500, 1.000], mean action: 3.709 [0.000, 6.000], mean observation: 166.058 [23.000, 255.000], loss: 0.122478, mean_absolute_error: 45.749048, mean_q: 53.532611, mean_eps: 0.100000
 1055436/2000000: episode: 3491, duratio

 1066530/2000000: episode: 3514, duration: 20.597s, episode steps: 486, steps per second: 24, episode reward: 405.500, mean reward: 0.834 [0.500, 1.000], mean action: 3.508 [0.000, 6.000], mean observation: 166.162 [23.000, 255.000], loss: 0.110787, mean_absolute_error: 45.604175, mean_q: 53.379972, mean_eps: 0.100000
 1067014/2000000: episode: 3515, duration: 20.554s, episode steps: 484, steps per second: 24, episode reward: 427.000, mean reward: 0.882 [0.500, 1.000], mean action: 3.310 [0.000, 6.000], mean observation: 165.886 [24.000, 255.000], loss: 0.107185, mean_absolute_error: 45.779228, mean_q: 53.547153, mean_eps: 0.100000
 1067499/2000000: episode: 3516, duration: 20.585s, episode steps: 485, steps per second: 24, episode reward: 399.000, mean reward: 0.823 [0.500, 1.000], mean action: 3.649 [0.000, 6.000], mean observation: 166.211 [21.000, 255.000], loss: 0.114793, mean_absolute_error: 45.454573, mean_q: 53.173642, mean_eps: 0.100000
 1067987/2000000: episode: 3517, duratio

 1079075/2000000: episode: 3540, duration: 20.556s, episode steps: 478, steps per second: 23, episode reward: 416.000, mean reward: 0.870 [0.500, 1.000], mean action: 3.525 [0.000, 6.000], mean observation: 166.900 [24.000, 255.000], loss: 0.105660, mean_absolute_error: 45.824431, mean_q: 53.613603, mean_eps: 0.100000
 1079560/2000000: episode: 3541, duration: 20.565s, episode steps: 485, steps per second: 24, episode reward: 420.500, mean reward: 0.867 [0.500, 1.000], mean action: 3.423 [0.000, 6.000], mean observation: 165.823 [16.000, 255.000], loss: 0.107969, mean_absolute_error: 45.609090, mean_q: 53.342831, mean_eps: 0.100000
 1080043/2000000: episode: 3542, duration: 20.576s, episode steps: 483, steps per second: 23, episode reward: 406.500, mean reward: 0.842 [0.500, 1.000], mean action: 3.542 [0.000, 6.000], mean observation: 166.704 [25.000, 255.000], loss: 0.108760, mean_absolute_error: 45.913737, mean_q: 53.697687, mean_eps: 0.100000
 1080528/2000000: episode: 3543, duratio

 1091595/2000000: episode: 3566, duration: 20.505s, episode steps: 477, steps per second: 23, episode reward: 429.000, mean reward: 0.899 [0.500, 1.000], mean action: 3.518 [0.000, 6.000], mean observation: 166.710 [24.000, 255.000], loss: 0.107898, mean_absolute_error: 46.767037, mean_q: 54.706495, mean_eps: 0.100000
 1092075/2000000: episode: 3567, duration: 20.484s, episode steps: 480, steps per second: 23, episode reward: 443.000, mean reward: 0.923 [0.500, 1.000], mean action: 3.667 [0.000, 6.000], mean observation: 166.595 [24.000, 255.000], loss: 0.115265, mean_absolute_error: 46.554101, mean_q: 54.461199, mean_eps: 0.100000
 1092546/2000000: episode: 3568, duration: 20.456s, episode steps: 471, steps per second: 23, episode reward: 431.500, mean reward: 0.916 [0.500, 1.000], mean action: 3.350 [0.000, 6.000], mean observation: 166.388 [23.000, 255.000], loss: 0.114758, mean_absolute_error: 46.948811, mean_q: 54.922087, mean_eps: 0.100000
 1093020/2000000: episode: 3569, duratio

 1104094/2000000: episode: 3592, duration: 20.542s, episode steps: 484, steps per second: 24, episode reward: 424.000, mean reward: 0.876 [0.500, 1.000], mean action: 3.707 [0.000, 6.000], mean observation: 166.050 [0.000, 255.000], loss: 0.097491, mean_absolute_error: 46.802145, mean_q: 54.746756, mean_eps: 0.100000
 1104577/2000000: episode: 3593, duration: 20.612s, episode steps: 483, steps per second: 23, episode reward: 401.500, mean reward: 0.831 [0.500, 1.000], mean action: 3.602 [0.000, 6.000], mean observation: 166.497 [15.000, 255.000], loss: 0.095309, mean_absolute_error: 46.957113, mean_q: 54.934527, mean_eps: 0.100000
 1105058/2000000: episode: 3594, duration: 20.532s, episode steps: 481, steps per second: 23, episode reward: 431.000, mean reward: 0.896 [0.500, 1.000], mean action: 3.557 [0.000, 6.000], mean observation: 166.640 [18.000, 255.000], loss: 0.108451, mean_absolute_error: 46.912143, mean_q: 54.874108, mean_eps: 0.100000
 1105547/2000000: episode: 3595, duration

 1116600/2000000: episode: 3618, duration: 20.514s, episode steps: 478, steps per second: 23, episode reward: 433.000, mean reward: 0.906 [0.500, 1.000], mean action: 3.492 [0.000, 6.000], mean observation: 166.402 [23.000, 255.000], loss: 0.094618, mean_absolute_error: 47.189883, mean_q: 55.194924, mean_eps: 0.100000
 1117086/2000000: episode: 3619, duration: 20.651s, episode steps: 486, steps per second: 24, episode reward: 421.500, mean reward: 0.867 [0.500, 1.000], mean action: 3.391 [0.000, 6.000], mean observation: 166.265 [23.000, 255.000], loss: 0.101145, mean_absolute_error: 47.542672, mean_q: 55.599469, mean_eps: 0.100000
 1117573/2000000: episode: 3620, duration: 20.594s, episode steps: 487, steps per second: 24, episode reward: 414.500, mean reward: 0.851 [0.500, 1.000], mean action: 3.579 [0.000, 6.000], mean observation: 165.850 [19.000, 255.000], loss: 0.097084, mean_absolute_error: 47.270484, mean_q: 55.297947, mean_eps: 0.100000
 1118054/2000000: episode: 3621, duratio

 1129137/2000000: episode: 3644, duration: 20.512s, episode steps: 481, steps per second: 23, episode reward: 426.500, mean reward: 0.887 [0.500, 1.000], mean action: 3.524 [0.000, 6.000], mean observation: 166.851 [23.000, 255.000], loss: 0.093310, mean_absolute_error: 47.442724, mean_q: 55.470480, mean_eps: 0.100000
 1129625/2000000: episode: 3645, duration: 20.494s, episode steps: 488, steps per second: 24, episode reward: 442.000, mean reward: 0.906 [0.500, 1.000], mean action: 3.586 [0.000, 6.000], mean observation: 166.623 [24.000, 255.000], loss: 0.087652, mean_absolute_error: 47.396258, mean_q: 55.423254, mean_eps: 0.100000
 1130106/2000000: episode: 3646, duration: 20.470s, episode steps: 481, steps per second: 23, episode reward: 446.500, mean reward: 0.928 [0.500, 1.000], mean action: 3.308 [0.000, 6.000], mean observation: 166.643 [23.000, 255.000], loss: 0.100740, mean_absolute_error: 47.537967, mean_q: 55.587538, mean_eps: 0.100000
 1130586/2000000: episode: 3647, duratio

 1141692/2000000: episode: 3670, duration: 20.436s, episode steps: 483, steps per second: 24, episode reward: 435.000, mean reward: 0.901 [0.500, 1.000], mean action: 3.602 [0.000, 6.000], mean observation: 166.453 [16.000, 255.000], loss: 0.087050, mean_absolute_error: 47.924088, mean_q: 56.044313, mean_eps: 0.100000
 1142173/2000000: episode: 3671, duration: 20.531s, episode steps: 481, steps per second: 23, episode reward: 434.000, mean reward: 0.902 [0.500, 1.000], mean action: 3.430 [0.000, 6.000], mean observation: 166.946 [23.000, 255.000], loss: 0.089016, mean_absolute_error: 47.884500, mean_q: 55.994354, mean_eps: 0.100000
 1142656/2000000: episode: 3672, duration: 20.557s, episode steps: 483, steps per second: 23, episode reward: 423.000, mean reward: 0.876 [0.500, 1.000], mean action: 3.638 [0.000, 6.000], mean observation: 166.040 [0.000, 255.000], loss: 0.090247, mean_absolute_error: 47.939628, mean_q: 56.066166, mean_eps: 0.100000
 1143142/2000000: episode: 3673, duration

 1153538/2000000: episode: 3696, duration: 20.597s, episode steps: 356, steps per second: 17, episode reward: 311.500, mean reward: 0.875 [0.500, 1.000], mean action: 3.610 [0.000, 6.000], mean observation: 165.574 [0.000, 255.000], loss: 0.083589, mean_absolute_error: 48.483235, mean_q: 56.682546, mean_eps: 0.100000
 1153895/2000000: episode: 3697, duration: 20.519s, episode steps: 357, steps per second: 17, episode reward: 317.500, mean reward: 0.889 [0.500, 1.000], mean action: 3.557 [0.000, 6.000], mean observation: 165.661 [0.000, 255.000], loss: 0.085907, mean_absolute_error: 48.426018, mean_q: 56.624393, mean_eps: 0.100000
 1154256/2000000: episode: 3698, duration: 20.659s, episode steps: 361, steps per second: 17, episode reward: 289.500, mean reward: 0.802 [0.500, 1.000], mean action: 3.665 [0.000, 6.000], mean observation: 166.239 [21.000, 255.000], loss: 0.084388, mean_absolute_error: 48.304914, mean_q: 56.494206, mean_eps: 0.100000
 1154617/2000000: episode: 3699, duration:

 1162863/2000000: episode: 3722, duration: 20.715s, episode steps: 361, steps per second: 17, episode reward: 298.500, mean reward: 0.827 [0.500, 1.000], mean action: 3.684 [0.000, 6.000], mean observation: 165.236 [0.000, 255.000], loss: 0.092767, mean_absolute_error: 48.290709, mean_q: 56.453685, mean_eps: 0.100000
 1163221/2000000: episode: 3723, duration: 20.527s, episode steps: 358, steps per second: 17, episode reward: 321.000, mean reward: 0.897 [0.500, 1.000], mean action: 3.665 [0.000, 6.000], mean observation: 166.419 [18.000, 255.000], loss: 0.092752, mean_absolute_error: 48.734144, mean_q: 56.980737, mean_eps: 0.100000
 1163658/2000000: episode: 3724, duration: 66.270s, episode steps: 437, steps per second: 7, episode reward: 391.500, mean reward: 0.896 [0.500, 1.000], mean action: 3.613 [0.000, 6.000], mean observation: 169.112 [23.000, 255.000], loss: 0.086463, mean_absolute_error: 48.449270, mean_q: 56.653073, mean_eps: 0.100000
 1164007/2000000: episode: 3725, duration:

 1174644/2000000: episode: 3748, duration: 20.506s, episode steps: 475, steps per second: 23, episode reward: 412.000, mean reward: 0.867 [0.500, 1.000], mean action: 3.655 [0.000, 6.000], mean observation: 165.617 [13.000, 255.000], loss: 0.097591, mean_absolute_error: 47.922613, mean_q: 56.038464, mean_eps: 0.100000
 1175126/2000000: episode: 3749, duration: 20.531s, episode steps: 482, steps per second: 23, episode reward: 428.500, mean reward: 0.889 [0.500, 1.000], mean action: 3.494 [0.000, 6.000], mean observation: 165.945 [22.000, 255.000], loss: 0.094601, mean_absolute_error: 47.900329, mean_q: 56.030909, mean_eps: 0.100000
 1175611/2000000: episode: 3750, duration: 20.660s, episode steps: 485, steps per second: 23, episode reward: 374.500, mean reward: 0.772 [0.500, 1.000], mean action: 3.680 [0.000, 6.000], mean observation: 164.584 [0.000, 255.000], loss: 0.101959, mean_absolute_error: 48.193440, mean_q: 56.361103, mean_eps: 0.100000
 1176097/2000000: episode: 3751, duration

 1187112/2000000: episode: 3774, duration: 20.494s, episode steps: 482, steps per second: 24, episode reward: 416.500, mean reward: 0.864 [0.500, 1.000], mean action: 3.672 [0.000, 6.000], mean observation: 165.804 [0.000, 255.000], loss: 0.098498, mean_absolute_error: 47.976039, mean_q: 56.124663, mean_eps: 0.100000
 1187596/2000000: episode: 3775, duration: 20.521s, episode steps: 484, steps per second: 24, episode reward: 441.500, mean reward: 0.912 [0.500, 1.000], mean action: 3.550 [0.000, 6.000], mean observation: 166.476 [23.000, 255.000], loss: 0.098725, mean_absolute_error: 48.378857, mean_q: 56.574655, mean_eps: 0.100000
 1188074/2000000: episode: 3776, duration: 20.437s, episode steps: 478, steps per second: 23, episode reward: 436.000, mean reward: 0.912 [0.500, 1.000], mean action: 3.565 [0.000, 6.000], mean observation: 165.788 [0.000, 255.000], loss: 0.091903, mean_absolute_error: 48.132168, mean_q: 56.288097, mean_eps: 0.100000
 1188554/2000000: episode: 3777, duration:

 1199619/2000000: episode: 3800, duration: 20.554s, episode steps: 487, steps per second: 24, episode reward: 427.000, mean reward: 0.877 [0.500, 1.000], mean action: 3.630 [0.000, 6.000], mean observation: 165.997 [13.000, 255.000], loss: 0.096026, mean_absolute_error: 48.391476, mean_q: 56.601017, mean_eps: 0.100000
 1200096/2000000: episode: 3801, duration: 20.632s, episode steps: 477, steps per second: 23, episode reward: 399.100, mean reward: 0.837 [0.100, 1.000], mean action: 3.488 [0.000, 6.000], mean observation: 166.132 [0.000, 255.000], loss: 0.097793, mean_absolute_error: 48.599756, mean_q: 56.839899, mean_eps: 0.100000
 1200581/2000000: episode: 3802, duration: 20.528s, episode steps: 485, steps per second: 24, episode reward: 421.000, mean reward: 0.868 [0.500, 1.000], mean action: 3.530 [0.000, 6.000], mean observation: 165.885 [0.000, 255.000], loss: 0.103424, mean_absolute_error: 48.459849, mean_q: 56.677234, mean_eps: 0.100000
 1201060/2000000: episode: 3803, duration:

 1212135/2000000: episode: 3826, duration: 20.419s, episode steps: 478, steps per second: 23, episode reward: 431.000, mean reward: 0.902 [0.500, 1.000], mean action: 3.695 [0.000, 6.000], mean observation: 166.271 [18.000, 255.000], loss: 0.094268, mean_absolute_error: 49.108858, mean_q: 57.416199, mean_eps: 0.100000
 1212617/2000000: episode: 3827, duration: 20.732s, episode steps: 482, steps per second: 23, episode reward: 375.500, mean reward: 0.779 [0.500, 1.000], mean action: 3.672 [0.000, 6.000], mean observation: 166.024 [13.000, 255.000], loss: 0.099638, mean_absolute_error: 48.744956, mean_q: 57.027723, mean_eps: 0.100000
 1213102/2000000: episode: 3828, duration: 20.654s, episode steps: 485, steps per second: 23, episode reward: 412.500, mean reward: 0.851 [0.500, 1.000], mean action: 3.470 [0.000, 6.000], mean observation: 164.756 [0.000, 255.000], loss: 0.095976, mean_absolute_error: 48.494731, mean_q: 56.707605, mean_eps: 0.100000
 1213586/2000000: episode: 3829, duration

 1224597/2000000: episode: 3852, duration: 20.420s, episode steps: 478, steps per second: 23, episode reward: 444.500, mean reward: 0.930 [0.500, 1.000], mean action: 3.496 [0.000, 6.000], mean observation: 166.624 [16.000, 255.000], loss: 0.076661, mean_absolute_error: 49.069178, mean_q: 57.360392, mean_eps: 0.100000
 1225080/2000000: episode: 3853, duration: 20.640s, episode steps: 483, steps per second: 23, episode reward: 425.500, mean reward: 0.881 [0.500, 1.000], mean action: 3.497 [0.000, 6.000], mean observation: 166.252 [8.000, 255.000], loss: 0.086434, mean_absolute_error: 49.065264, mean_q: 57.383764, mean_eps: 0.100000
 1225561/2000000: episode: 3854, duration: 20.434s, episode steps: 481, steps per second: 24, episode reward: 446.000, mean reward: 0.927 [0.500, 1.000], mean action: 3.432 [0.000, 6.000], mean observation: 167.220 [23.000, 255.000], loss: 0.082954, mean_absolute_error: 49.209082, mean_q: 57.521686, mean_eps: 0.100000
 1226038/2000000: episode: 3855, duration

 1236695/2000000: episode: 3878, duration: 20.563s, episode steps: 478, steps per second: 23, episode reward: 422.500, mean reward: 0.884 [0.500, 1.000], mean action: 3.665 [0.000, 6.000], mean observation: 165.985 [11.000, 255.000], loss: 0.079589, mean_absolute_error: 49.260144, mean_q: 57.610128, mean_eps: 0.100000
 1237164/2000000: episode: 3879, duration: 20.444s, episode steps: 469, steps per second: 23, episode reward: 442.500, mean reward: 0.943 [0.500, 1.000], mean action: 3.748 [0.000, 6.000], mean observation: 165.933 [12.000, 255.000], loss: 0.083174, mean_absolute_error: 49.442629, mean_q: 57.817173, mean_eps: 0.100000
 1237632/2000000: episode: 3880, duration: 20.532s, episode steps: 468, steps per second: 23, episode reward: 438.500, mean reward: 0.937 [0.500, 1.000], mean action: 3.417 [0.000, 6.000], mean observation: 166.099 [23.000, 255.000], loss: 0.075001, mean_absolute_error: 49.275084, mean_q: 57.613578, mean_eps: 0.100000
 1238099/2000000: episode: 3881, duratio

 1249009/2000000: episode: 3904, duration: 20.414s, episode steps: 477, steps per second: 23, episode reward: 448.000, mean reward: 0.939 [0.500, 1.000], mean action: 3.537 [0.000, 6.000], mean observation: 166.368 [16.000, 255.000], loss: 0.077135, mean_absolute_error: 49.518320, mean_q: 57.913032, mean_eps: 0.100000
 1249487/2000000: episode: 3905, duration: 20.453s, episode steps: 478, steps per second: 23, episode reward: 442.000, mean reward: 0.925 [0.500, 1.000], mean action: 3.515 [0.000, 6.000], mean observation: 165.868 [0.000, 255.000], loss: 0.083405, mean_absolute_error: 49.717776, mean_q: 58.136844, mean_eps: 0.100000
 1249966/2000000: episode: 3906, duration: 20.530s, episode steps: 479, steps per second: 23, episode reward: 434.500, mean reward: 0.907 [0.500, 1.000], mean action: 3.597 [0.000, 6.000], mean observation: 165.864 [0.000, 255.000], loss: 0.089585, mean_absolute_error: 49.774234, mean_q: 58.193882, mean_eps: 0.100000
 1250439/2000000: episode: 3907, duration:

 1261370/2000000: episode: 3930, duration: 20.568s, episode steps: 481, steps per second: 23, episode reward: 395.500, mean reward: 0.822 [0.500, 1.000], mean action: 3.613 [0.000, 6.000], mean observation: 165.857 [2.000, 255.000], loss: 0.087200, mean_absolute_error: 50.035563, mean_q: 58.515601, mean_eps: 0.100000
 1261842/2000000: episode: 3931, duration: 20.458s, episode steps: 472, steps per second: 23, episode reward: 444.000, mean reward: 0.941 [0.500, 1.000], mean action: 3.489 [0.000, 6.000], mean observation: 166.581 [25.000, 255.000], loss: 0.074870, mean_absolute_error: 50.168520, mean_q: 58.651467, mean_eps: 0.100000
 1262316/2000000: episode: 3932, duration: 20.362s, episode steps: 474, steps per second: 23, episode reward: 443.500, mean reward: 0.936 [0.500, 1.000], mean action: 3.407 [0.000, 6.000], mean observation: 166.276 [8.000, 255.000], loss: 0.066805, mean_absolute_error: 50.302571, mean_q: 58.803198, mean_eps: 0.100000
 1262790/2000000: episode: 3933, duration:

 1273639/2000000: episode: 3956, duration: 20.506s, episode steps: 460, steps per second: 22, episode reward: 398.000, mean reward: 0.865 [0.500, 1.000], mean action: 3.770 [0.000, 6.000], mean observation: 165.457 [14.000, 255.000], loss: 0.075574, mean_absolute_error: 50.395834, mean_q: 58.928369, mean_eps: 0.100000
 1274103/2000000: episode: 3957, duration: 20.578s, episode steps: 464, steps per second: 23, episode reward: 422.500, mean reward: 0.911 [0.500, 1.000], mean action: 3.459 [0.000, 6.000], mean observation: 165.864 [21.000, 255.000], loss: 0.068494, mean_absolute_error: 50.242230, mean_q: 58.733312, mean_eps: 0.100000
 1274582/2000000: episode: 3958, duration: 20.463s, episode steps: 479, steps per second: 23, episode reward: 422.500, mean reward: 0.882 [0.500, 1.000], mean action: 3.505 [0.000, 6.000], mean observation: 166.431 [18.000, 255.000], loss: 0.065844, mean_absolute_error: 50.460813, mean_q: 58.989966, mean_eps: 0.100000
 1275063/2000000: episode: 3959, duratio

 1285790/2000000: episode: 3982, duration: 20.513s, episode steps: 476, steps per second: 23, episode reward: 423.500, mean reward: 0.890 [0.500, 1.000], mean action: 3.576 [0.000, 6.000], mean observation: 166.297 [13.000, 255.000], loss: 0.073858, mean_absolute_error: 50.552353, mean_q: 59.100603, mean_eps: 0.100000
 1286270/2000000: episode: 3983, duration: 20.514s, episode steps: 480, steps per second: 23, episode reward: 423.500, mean reward: 0.882 [0.500, 1.000], mean action: 3.413 [0.000, 6.000], mean observation: 166.101 [17.000, 255.000], loss: 0.074359, mean_absolute_error: 50.338623, mean_q: 58.851758, mean_eps: 0.100000
 1286743/2000000: episode: 3984, duration: 20.529s, episode steps: 473, steps per second: 23, episode reward: 413.000, mean reward: 0.873 [0.500, 1.000], mean action: 3.643 [0.000, 6.000], mean observation: 165.846 [0.000, 255.000], loss: 0.074604, mean_absolute_error: 50.840702, mean_q: 59.428419, mean_eps: 0.100000
 1287220/2000000: episode: 3985, duration

 1298128/2000000: episode: 4008, duration: 20.632s, episode steps: 474, steps per second: 23, episode reward: 421.500, mean reward: 0.889 [0.500, 1.000], mean action: 3.435 [0.000, 6.000], mean observation: 166.166 [16.000, 255.000], loss: 0.080359, mean_absolute_error: 50.615182, mean_q: 59.193546, mean_eps: 0.100000
 1298608/2000000: episode: 4009, duration: 20.561s, episode steps: 480, steps per second: 23, episode reward: 371.100, mean reward: 0.773 [0.100, 1.000], mean action: 3.621 [0.000, 6.000], mean observation: 165.823 [0.000, 255.000], loss: 0.078078, mean_absolute_error: 50.689906, mean_q: 59.281361, mean_eps: 0.100000
 1299079/2000000: episode: 4010, duration: 20.539s, episode steps: 471, steps per second: 23, episode reward: 413.000, mean reward: 0.877 [0.500, 1.000], mean action: 3.711 [0.000, 6.000], mean observation: 165.853 [13.000, 255.000], loss: 0.083956, mean_absolute_error: 50.621022, mean_q: 59.180341, mean_eps: 0.100000
 1299550/2000000: episode: 4011, duration

 1310417/2000000: episode: 4034, duration: 20.524s, episode steps: 472, steps per second: 23, episode reward: 439.000, mean reward: 0.930 [0.500, 1.000], mean action: 3.487 [0.000, 6.000], mean observation: 166.046 [17.000, 255.000], loss: 0.085601, mean_absolute_error: 51.528969, mean_q: 60.241542, mean_eps: 0.100000
 1310894/2000000: episode: 4035, duration: 20.443s, episode steps: 477, steps per second: 23, episode reward: 440.000, mean reward: 0.922 [0.500, 1.000], mean action: 3.730 [0.000, 6.000], mean observation: 166.481 [18.000, 255.000], loss: 0.085827, mean_absolute_error: 51.337396, mean_q: 60.014286, mean_eps: 0.100000
 1311362/2000000: episode: 4036, duration: 20.464s, episode steps: 468, steps per second: 23, episode reward: 435.500, mean reward: 0.931 [0.500, 1.000], mean action: 3.564 [0.000, 6.000], mean observation: 166.052 [13.000, 255.000], loss: 0.088307, mean_absolute_error: 51.156361, mean_q: 59.811875, mean_eps: 0.100000
 1311832/2000000: episode: 4037, duratio

<keras.callbacks.History at 0x2cb956a7cc8>

### continue training 

In [2]:
# weights_filename = join(log_dir,'dqn_{}_weights_{}.h5f'.format('AirSimCarRL', 500000))
# dqn.load_weights(weights_filename)
# dqn.test(env, nb_episodes=10, visualize=True)

### testing

In [None]:
log_dir = 'logs'
if not exists(log_dir):
    os.makedirs(log_dir)
    
weights_filename = join(log_dir,'dqn_{}_weights_{}.h5f'.format('AirSimCarRL', 1000000))
dqn.load_weights(weights_filename)
dqn.test(env, nb_episodes=10, visualize=True)