### this notebook is a copy of train.py 

In [None]:
from rl.callbacks import ModelIntervalCheckpoint, FileLogger
from rl.agents.dqn import DQNAgent
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy
from rl.memory import SequentialMemory
from rl.core import Processor
from keras.optimizers import Adam
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten, Convolution2D, Permute, Concatenate
from gym import spaces
import numpy as np
from PIL import Image
from configparser import ConfigParser
import os
from os.path import join, pardir, exists

from gym_airsim.airsim_car_env import AirSimCarEnv

import tensorflow as tf
from keras.backend.tensorflow_backend import set_session
config = tf.ConfigProto()
config.gpu_options.allow_growth = True  #dynamically grow the memory used on the GPU
set_session(tf.Session(config=config))

class AirSimCarProcessor(Processor):
    def process_observation(self, observation):
        assert observation.ndim == 3  # (height, width, channel)
        img = Image.fromarray(observation)
        img = img.resize(INPUT_SHAPE).convert('L')  # resize and convert to grayscale
        processed_observation = np.array(img)
        assert processed_observation.shape == INPUT_SHAPE
        return processed_observation.astype('uint8')  # saves storage in experience memory

    def process_state_batch(self, batch):
        # We could perform this processing step in `process_observation`. In this case, however,
        # we would need to store a `float32` array instead, which is 4x more memory intensive than
        # an `uint8` array. This matters if we store 1M observations.
        processed_batch = batch.astype('float32') / 255.
        return processed_batch

    def process_reward(self, reward):
        return np.clip(reward, -1., 1.)

config = ConfigParser()
config.read('config.ini')
num_actions = int(config['car_agent']['actions'])
                    
WINDOW_LENGTH = 4
INPUT_SHAPE = (84, 84)

env = AirSimCarEnv()
np.random.seed(123)

# Next, we build our model. We use the same model that was described by Mnih et al. (2015).
input_shape = (WINDOW_LENGTH,) + INPUT_SHAPE
model = Sequential()
model.add(Permute((2, 3, 1), input_shape=input_shape))
model.add(Convolution2D(32, (8, 8), strides=(4, 4)))
model.add(Activation('relu'))
model.add(Convolution2D(64, (4, 4), strides=(2, 2)))
model.add(Activation('relu'))
model.add(Convolution2D(64, (3, 3), strides=(1, 1)))
model.add(Activation('relu'))
model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dense(num_actions))
model.add(Activation('linear'))
print(model.summary())


def build_callbacks(env_name):
    log_dir = 'logs'
    if not exists(log_dir):
        os.makedirs(log_dir)
    
    checkpoint_weights_filename = join(log_dir, 'dqn_' + env_name + '_weights_{step}.h5f')
    log_filename = join(log_dir,'dqn_{}_log.json'.format(env_name))
    callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=25000)]
    callbacks += [FileLogger(log_filename, interval=100)]
    return callbacks

memory = SequentialMemory(limit=50000, window_length=WINDOW_LENGTH)
policy = LinearAnnealedPolicy(EpsGreedyQPolicy(),  attr='eps', value_max=1., 
                              value_min=.1, value_test=.05, nb_steps=1000000)
processor = AirSimCarProcessor()

dqn = DQNAgent(model=model, nb_actions=num_actions, policy=policy, memory=memory,
               processor=processor, nb_steps_warmup=50000, gamma=.99, target_model_update=10000,
               train_interval=4, delta_clip=1.)
dqn.compile(Adam(lr=.0001), metrics=['mae'])

callbacks = build_callbacks('AirSimCarRL')

dqn.fit(env, nb_steps=2000000,
        visualize=False,
        verbose=2,
        callbacks=callbacks)

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


Connected!
Client Ver:1 (Min Req: 1), Server Ver:1 (Min Req: 1)

['WayPoint0', 'WayPoint1', 'WayPoint2', 'WayPoint3', 'WayPoint4', 'WayPoint5', 'WayPoint6', 'WayPoint7', 'WayPoint8', 'WayPoint9']



Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
permute_1 (Permute)          (None, 84, 84, 4)         0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 20, 20, 32)        8224      
_________________________________________________________________
activation_1 (Activation)    (None, 20, 20, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 9, 9, 64)          32832     
_________________________________________________________________
activation_2 (Activation)    (None, 9, 9, 64)          0         
_____________________________________________________

    3383/2000000: episode: 17, duration: 9.572s, episode steps: 190, steps per second: 20, episode reward: 142.600, mean reward: 0.751 [-1.000, 1.000], mean action: 2.968 [0.000, 6.000], mean observation: 172.108 [24.000, 255.000], loss: --, mean_absolute_error: --, mean_q: --, mean_eps: --
    3565/2000000: episode: 18, duration: 8.958s, episode steps: 182, steps per second: 20, episode reward: 76.300, mean reward: 0.419 [-1.000, 0.500], mean action: 2.951 [0.000, 6.000], mean observation: 173.011 [23.000, 255.000], loss: --, mean_absolute_error: --, mean_q: --, mean_eps: --
    3748/2000000: episode: 19, duration: 8.958s, episode steps: 183, steps per second: 20, episode reward: 116.300, mean reward: 0.636 [-1.000, 1.000], mean action: 3.131 [0.000, 6.000], mean observation: 171.915 [24.000, 255.000], loss: --, mean_absolute_error: --, mean_q: --, mean_eps: --
    3925/2000000: episode: 20, duration: 8.607s, episode steps: 177, steps per second: 21, episode reward: 80.400, mean rewar

    8833/2000000: episode: 46, duration: 7.686s, episode steps: 180, steps per second: 23, episode reward: 73.300, mean reward: 0.407 [-1.000, 0.500], mean action: 2.833 [0.000, 6.000], mean observation: 172.125 [24.000, 255.000], loss: --, mean_absolute_error: --, mean_q: --, mean_eps: --
    9013/2000000: episode: 47, duration: 7.582s, episode steps: 180, steps per second: 24, episode reward: 55.700, mean reward: 0.309 [-1.000, 0.500], mean action: 2.883 [0.000, 6.000], mean observation: 172.735 [21.000, 255.000], loss: --, mean_absolute_error: --, mean_q: --, mean_eps: --
    9218/2000000: episode: 48, duration: 9.074s, episode steps: 205, steps per second: 23, episode reward: 78.600, mean reward: 0.383 [-1.000, 0.500], mean action: 2.971 [0.000, 6.000], mean observation: 172.466 [23.000, 255.000], loss: --, mean_absolute_error: --, mean_q: --, mean_eps: --
    9443/2000000: episode: 49, duration: 10.508s, episode steps: 225, steps per second: 21, episode reward: 137.600, mean rewar

   14798/2000000: episode: 75, duration: 9.678s, episode steps: 210, steps per second: 22, episode reward: 113.400, mean reward: 0.540 [-1.000, 1.000], mean action: 3.043 [0.000, 6.000], mean observation: 171.495 [23.000, 255.000], loss: --, mean_absolute_error: --, mean_q: --, mean_eps: --
   15028/2000000: episode: 76, duration: 10.852s, episode steps: 230, steps per second: 21, episode reward: 143.700, mean reward: 0.625 [-1.000, 1.000], mean action: 3.122 [0.000, 6.000], mean observation: 172.358 [23.000, 255.000], loss: --, mean_absolute_error: --, mean_q: --, mean_eps: --
   15198/2000000: episode: 77, duration: 7.170s, episode steps: 170, steps per second: 24, episode reward: 58.300, mean reward: 0.343 [-1.000, 0.500], mean action: 3.076 [0.000, 6.000], mean observation: 171.806 [24.000, 255.000], loss: --, mean_absolute_error: --, mean_q: --, mean_eps: --
   15393/2000000: episode: 78, duration: 8.357s, episode steps: 195, steps per second: 23, episode reward: 76.800, mean rewa

   20844/2000000: episode: 104, duration: 7.683s, episode steps: 180, steps per second: 23, episode reward: 75.300, mean reward: 0.418 [-1.000, 0.500], mean action: 2.944 [0.000, 6.000], mean observation: 172.096 [23.000, 255.000], loss: --, mean_absolute_error: --, mean_q: --, mean_eps: --
   21051/2000000: episode: 105, duration: 9.094s, episode steps: 207, steps per second: 23, episode reward: 92.000, mean reward: 0.444 [-1.000, 0.500], mean action: 2.894 [0.000, 6.000], mean observation: 172.848 [24.000, 255.000], loss: --, mean_absolute_error: --, mean_q: --, mean_eps: --
   21278/2000000: episode: 106, duration: 10.525s, episode steps: 227, steps per second: 22, episode reward: 170.900, mean reward: 0.753 [-1.000, 1.000], mean action: 3.048 [0.000, 6.000], mean observation: 173.538 [24.000, 255.000], loss: --, mean_absolute_error: --, mean_q: --, mean_eps: --
   21480/2000000: episode: 107, duration: 8.678s, episode steps: 202, steps per second: 23, episode reward: 77.900, mean r

   26611/2000000: episode: 132, duration: 6.796s, episode steps: 165, steps per second: 24, episode reward: 57.000, mean reward: 0.345 [-1.000, 0.500], mean action: 2.836 [0.000, 6.000], mean observation: 172.686 [24.000, 255.000], loss: --, mean_absolute_error: --, mean_q: --, mean_eps: --
   26812/2000000: episode: 133, duration: 9.603s, episode steps: 201, steps per second: 21, episode reward: 122.400, mean reward: 0.609 [-1.000, 1.000], mean action: 2.990 [0.000, 6.000], mean observation: 172.008 [24.000, 255.000], loss: --, mean_absolute_error: --, mean_q: --, mean_eps: --
   27010/2000000: episode: 134, duration: 9.048s, episode steps: 198, steps per second: 22, episode reward: 112.000, mean reward: 0.566 [-1.000, 1.000], mean action: 2.929 [0.000, 6.000], mean observation: 171.319 [23.000, 255.000], loss: --, mean_absolute_error: --, mean_q: --, mean_eps: --
   27184/2000000: episode: 135, duration: 7.749s, episode steps: 174, steps per second: 22, episode reward: 71.500, mean r

   32314/2000000: episode: 161, duration: 9.654s, episode steps: 211, steps per second: 22, episode reward: 117.400, mean reward: 0.556 [-1.000, 1.000], mean action: 3.052 [0.000, 6.000], mean observation: 172.373 [24.000, 255.000], loss: --, mean_absolute_error: --, mean_q: --, mean_eps: --
   32554/2000000: episode: 162, duration: 11.241s, episode steps: 240, steps per second: 21, episode reward: 173.400, mean reward: 0.722 [-1.000, 1.000], mean action: 3.075 [0.000, 6.000], mean observation: 172.816 [23.000, 255.000], loss: --, mean_absolute_error: --, mean_q: --, mean_eps: --
   32764/2000000: episode: 163, duration: 9.404s, episode steps: 210, steps per second: 22, episode reward: 96.700, mean reward: 0.460 [-1.000, 0.500], mean action: 3.019 [0.000, 6.000], mean observation: 172.997 [23.000, 255.000], loss: --, mean_absolute_error: --, mean_q: --, mean_eps: --
   32953/2000000: episode: 164, duration: 8.097s, episode steps: 189, steps per second: 23, episode reward: 78.200, mean 

   38239/2000000: episode: 190, duration: 7.835s, episode steps: 183, steps per second: 23, episode reward: 76.400, mean reward: 0.417 [-1.000, 0.500], mean action: 2.863 [0.000, 6.000], mean observation: 171.720 [23.000, 255.000], loss: --, mean_absolute_error: --, mean_q: --, mean_eps: --
   38453/2000000: episode: 191, duration: 9.723s, episode steps: 214, steps per second: 22, episode reward: 124.400, mean reward: 0.581 [-1.000, 1.000], mean action: 3.056 [0.000, 6.000], mean observation: 172.619 [23.000, 255.000], loss: --, mean_absolute_error: --, mean_q: --, mean_eps: --
   38653/2000000: episode: 192, duration: 8.648s, episode steps: 200, steps per second: 23, episode reward: 87.700, mean reward: 0.438 [-1.000, 1.000], mean action: 2.900 [0.000, 6.000], mean observation: 172.369 [24.000, 255.000], loss: --, mean_absolute_error: --, mean_q: --, mean_eps: --
   38864/2000000: episode: 193, duration: 9.705s, episode steps: 211, steps per second: 22, episode reward: 144.900, mean r

   44114/2000000: episode: 218, duration: 9.128s, episode steps: 203, steps per second: 22, episode reward: 117.200, mean reward: 0.577 [-1.000, 1.000], mean action: 3.103 [0.000, 6.000], mean observation: 170.876 [24.000, 255.000], loss: --, mean_absolute_error: --, mean_q: --, mean_eps: --
   44339/2000000: episode: 219, duration: 10.498s, episode steps: 225, steps per second: 21, episode reward: 160.400, mean reward: 0.713 [-1.000, 1.000], mean action: 3.164 [0.000, 6.000], mean observation: 172.074 [21.000, 255.000], loss: --, mean_absolute_error: --, mean_q: --, mean_eps: --
   44521/2000000: episode: 220, duration: 7.707s, episode steps: 182, steps per second: 24, episode reward: 59.100, mean reward: 0.325 [-1.000, 0.500], mean action: 3.132 [0.000, 6.000], mean observation: 171.685 [24.000, 255.000], loss: --, mean_absolute_error: --, mean_q: --, mean_eps: --
   44750/2000000: episode: 221, duration: 10.686s, episode steps: 229, steps per second: 21, episode reward: 148.600, mea

   49874/2000000: episode: 246, duration: 9.184s, episode steps: 204, steps per second: 22, episode reward: 104.100, mean reward: 0.510 [-1.000, 1.000], mean action: 2.877 [0.000, 6.000], mean observation: 171.890 [24.000, 255.000], loss: --, mean_absolute_error: --, mean_q: --, mean_eps: --
   50023/2000000: episode: 247, duration: 8.106s, episode steps: 149, steps per second: 18, episode reward: 75.700, mean reward: 0.508 [-1.000, 1.000], mean action: 3.007 [0.000, 6.000], mean observation: 172.351 [24.000, 255.000], loss: 0.150327, mean_absolute_error: 0.181348, mean_q: 0.211592, mean_eps: 0.954989
   50210/2000000: episode: 248, duration: 9.323s, episode steps: 187, steps per second: 20, episode reward: 130.000, mean reward: 0.695 [-1.000, 1.000], mean action: 2.909 [0.000, 6.000], mean observation: 171.113 [23.000, 255.000], loss: 0.046410, mean_absolute_error: 0.510706, mean_q: 0.656141, mean_eps: 0.954896
   50399/2000000: episode: 249, duration: 9.451s, episode steps: 189, step

   54613/2000000: episode: 272, duration: 9.401s, episode steps: 188, steps per second: 20, episode reward: 86.100, mean reward: 0.458 [-1.000, 0.500], mean action: 2.989 [0.000, 6.000], mean observation: 172.783 [24.000, 255.000], loss: 0.014972, mean_absolute_error: 0.460921, mean_q: 0.571793, mean_eps: 0.950932
   54794/2000000: episode: 273, duration: 8.927s, episode steps: 181, steps per second: 20, episode reward: 79.700, mean reward: 0.440 [-1.000, 1.000], mean action: 2.989 [0.000, 6.000], mean observation: 172.170 [24.000, 255.000], loss: 0.015722, mean_absolute_error: 0.445454, mean_q: 0.536109, mean_eps: 0.950766
   54977/2000000: episode: 274, duration: 8.919s, episode steps: 183, steps per second: 21, episode reward: 55.600, mean reward: 0.304 [-1.000, 0.500], mean action: 2.962 [0.000, 6.000], mean observation: 173.008 [24.000, 255.000], loss: 0.014787, mean_absolute_error: 0.488343, mean_q: 0.590598, mean_eps: 0.950603
   55140/2000000: episode: 275, duration: 7.933s, ep

   59360/2000000: episode: 298, duration: 9.237s, episode steps: 188, steps per second: 20, episode reward: 74.900, mean reward: 0.398 [-1.000, 0.500], mean action: 3.043 [0.000, 6.000], mean observation: 171.805 [23.000, 255.000], loss: 0.010908, mean_absolute_error: 0.493544, mean_q: 0.589539, mean_eps: 0.946662
   59498/2000000: episode: 299, duration: 6.583s, episode steps: 138, steps per second: 21, episode reward: 51.900, mean reward: 0.376 [-1.000, 0.500], mean action: 2.696 [0.000, 6.000], mean observation: 172.357 [23.000, 255.000], loss: 0.010719, mean_absolute_error: 0.445182, mean_q: 0.536936, mean_eps: 0.946515
   59691/2000000: episode: 300, duration: 9.655s, episode steps: 193, steps per second: 20, episode reward: 137.300, mean reward: 0.711 [-1.000, 1.000], mean action: 2.922 [0.000, 6.000], mean observation: 171.572 [24.000, 255.000], loss: 0.012662, mean_absolute_error: 0.454572, mean_q: 0.545243, mean_eps: 0.946365
   59834/2000000: episode: 301, duration: 6.791s, e

   64055/2000000: episode: 324, duration: 6.403s, episode steps: 137, steps per second: 21, episode reward: 51.000, mean reward: 0.372 [-1.000, 0.500], mean action: 2.752 [0.000, 6.000], mean observation: 172.856 [24.000, 255.000], loss: 0.008219, mean_absolute_error: 0.900894, mean_q: 1.069656, mean_eps: 0.942413
   64235/2000000: episode: 325, duration: 8.798s, episode steps: 180, steps per second: 20, episode reward: 110.900, mean reward: 0.616 [-1.000, 1.000], mean action: 2.950 [0.000, 6.000], mean observation: 171.906 [24.000, 255.000], loss: 0.008486, mean_absolute_error: 0.908802, mean_q: 1.081210, mean_eps: 0.942270
   64415/2000000: episode: 326, duration: 8.933s, episode steps: 180, steps per second: 20, episode reward: 90.700, mean reward: 0.504 [-1.000, 1.000], mean action: 2.883 [0.000, 6.000], mean observation: 172.283 [24.000, 255.000], loss: 0.008894, mean_absolute_error: 0.893136, mean_q: 1.060574, mean_eps: 0.942108
   64604/2000000: episode: 327, duration: 9.474s, e

   68494/2000000: episode: 350, duration: 6.666s, episode steps: 143, steps per second: 21, episode reward: 45.600, mean reward: 0.319 [-1.000, 0.500], mean action: 2.671 [0.000, 6.000], mean observation: 172.908 [23.000, 255.000], loss: 0.007048, mean_absolute_error: 0.899343, mean_q: 1.070269, mean_eps: 0.938420
   68630/2000000: episode: 351, duration: 6.486s, episode steps: 136, steps per second: 21, episode reward: 53.300, mean reward: 0.392 [-1.000, 0.500], mean action: 2.691 [0.000, 6.000], mean observation: 172.170 [23.000, 255.000], loss: 0.008127, mean_absolute_error: 0.904683, mean_q: 1.067586, mean_eps: 0.938294
   68821/2000000: episode: 352, duration: 9.652s, episode steps: 191, steps per second: 20, episode reward: 128.100, mean reward: 0.671 [-1.000, 1.000], mean action: 3.000 [0.000, 6.000], mean observation: 171.741 [24.000, 255.000], loss: 0.008356, mean_absolute_error: 0.895456, mean_q: 1.055705, mean_eps: 0.938147
   69001/2000000: episode: 353, duration: 8.839s, e

   73300/2000000: episode: 376, duration: 8.052s, episode steps: 166, steps per second: 21, episode reward: 78.800, mean reward: 0.475 [-1.000, 1.000], mean action: 2.759 [0.000, 6.000], mean observation: 170.652 [24.000, 255.000], loss: 0.006405, mean_absolute_error: 1.349511, mean_q: 1.589148, mean_eps: 0.934106
   73450/2000000: episode: 377, duration: 7.101s, episode steps: 150, steps per second: 21, episode reward: 52.700, mean reward: 0.351 [-1.000, 0.500], mean action: 2.860 [0.000, 6.000], mean observation: 171.589 [23.000, 255.000], loss: 0.007306, mean_absolute_error: 1.374422, mean_q: 1.612553, mean_eps: 0.933963
   73600/2000000: episode: 378, duration: 7.146s, episode steps: 150, steps per second: 21, episode reward: 58.300, mean reward: 0.389 [-1.000, 0.500], mean action: 3.007 [0.000, 6.000], mean observation: 171.319 [24.000, 255.000], loss: 0.007673, mean_absolute_error: 1.308086, mean_q: 1.536717, mean_eps: 0.933828
   73753/2000000: episode: 379, duration: 7.270s, ep

   77659/2000000: episode: 402, duration: 9.033s, episode steps: 182, steps per second: 20, episode reward: 109.900, mean reward: 0.604 [-1.000, 1.000], mean action: 2.890 [0.000, 6.000], mean observation: 171.473 [24.000, 255.000], loss: 0.007918, mean_absolute_error: 1.356930, mean_q: 1.597871, mean_eps: 0.930189
   77838/2000000: episode: 403, duration: 8.755s, episode steps: 179, steps per second: 20, episode reward: 108.300, mean reward: 0.605 [-1.000, 1.000], mean action: 2.944 [0.000, 6.000], mean observation: 171.697 [24.000, 255.000], loss: 0.006653, mean_absolute_error: 1.363113, mean_q: 1.604275, mean_eps: 0.930027
   78013/2000000: episode: 404, duration: 8.379s, episode steps: 175, steps per second: 21, episode reward: 63.600, mean reward: 0.363 [-1.000, 0.500], mean action: 3.006 [0.000, 6.000], mean observation: 172.922 [23.000, 255.000], loss: 0.007678, mean_absolute_error: 1.342396, mean_q: 1.578634, mean_eps: 0.929867
   78237/2000000: episode: 405, duration: 11.650s,

   82528/2000000: episode: 428, duration: 11.136s, episode steps: 218, steps per second: 20, episode reward: 144.100, mean reward: 0.661 [-1.000, 1.000], mean action: 3.133 [0.000, 6.000], mean observation: 173.445 [24.000, 255.000], loss: 0.007092, mean_absolute_error: 1.750352, mean_q: 2.059774, mean_eps: 0.925824
   82744/2000000: episode: 429, duration: 11.252s, episode steps: 216, steps per second: 19, episode reward: 108.300, mean reward: 0.501 [-1.000, 1.000], mean action: 3.287 [0.000, 6.000], mean observation: 173.188 [23.000, 255.000], loss: 0.007635, mean_absolute_error: 1.834032, mean_q: 2.150734, mean_eps: 0.925629
   82927/2000000: episode: 430, duration: 9.065s, episode steps: 183, steps per second: 20, episode reward: 117.000, mean reward: 0.639 [-1.000, 1.000], mean action: 2.902 [0.000, 6.000], mean observation: 171.661 [23.000, 255.000], loss: 0.007473, mean_absolute_error: 1.810379, mean_q: 2.131416, mean_eps: 0.925449
   83086/2000000: episode: 431, duration: 7.615

   87387/2000000: episode: 454, duration: 11.166s, episode steps: 218, steps per second: 20, episode reward: 119.000, mean reward: 0.546 [-1.000, 1.000], mean action: 3.073 [0.000, 6.000], mean observation: 172.999 [23.000, 255.000], loss: 0.006898, mean_absolute_error: 1.802990, mean_q: 2.115849, mean_eps: 0.921450
   87567/2000000: episode: 455, duration: 8.644s, episode steps: 180, steps per second: 21, episode reward: 83.300, mean reward: 0.463 [-1.000, 1.000], mean action: 2.878 [0.000, 6.000], mean observation: 172.317 [23.000, 255.000], loss: 0.007928, mean_absolute_error: 1.770084, mean_q: 2.079035, mean_eps: 0.921272
   87731/2000000: episode: 456, duration: 7.840s, episode steps: 164, steps per second: 21, episode reward: 68.100, mean reward: 0.415 [-1.000, 0.500], mean action: 2.866 [0.000, 6.000], mean observation: 171.774 [25.000, 255.000], loss: 0.006034, mean_absolute_error: 1.764674, mean_q: 2.072710, mean_eps: 0.921117
   87887/2000000: episode: 457, duration: 7.447s, 

   92166/2000000: episode: 480, duration: 7.173s, episode steps: 150, steps per second: 21, episode reward: 60.700, mean reward: 0.405 [-1.000, 0.500], mean action: 2.873 [0.000, 6.000], mean observation: 172.159 [24.000, 255.000], loss: 0.006687, mean_absolute_error: 2.202597, mean_q: 2.576636, mean_eps: 0.917119
   92310/2000000: episode: 481, duration: 6.860s, episode steps: 144, steps per second: 21, episode reward: 58.100, mean reward: 0.403 [-1.000, 0.500], mean action: 2.604 [0.000, 6.000], mean observation: 172.694 [24.000, 255.000], loss: 0.006645, mean_absolute_error: 2.183942, mean_q: 2.554612, mean_eps: 0.916986
   92509/2000000: episode: 482, duration: 9.975s, episode steps: 199, steps per second: 20, episode reward: 125.100, mean reward: 0.629 [-1.000, 1.000], mean action: 3.211 [0.000, 6.000], mean observation: 172.439 [24.000, 255.000], loss: 0.006074, mean_absolute_error: 2.157617, mean_q: 2.524440, mean_eps: 0.916831
   92664/2000000: episode: 483, duration: 7.355s, e

   96808/2000000: episode: 506, duration: 7.772s, episode steps: 161, steps per second: 21, episode reward: 87.800, mean reward: 0.545 [-1.000, 1.000], mean action: 2.689 [0.000, 6.000], mean observation: 171.214 [24.000, 255.000], loss: 0.006911, mean_absolute_error: 2.215865, mean_q: 2.587366, mean_eps: 0.912947
   96959/2000000: episode: 507, duration: 7.096s, episode steps: 151, steps per second: 21, episode reward: 56.800, mean reward: 0.376 [-1.000, 0.500], mean action: 3.013 [0.000, 6.000], mean observation: 171.599 [24.000, 255.000], loss: 0.007396, mean_absolute_error: 2.152564, mean_q: 2.507237, mean_eps: 0.912806
   97099/2000000: episode: 508, duration: 6.483s, episode steps: 140, steps per second: 22, episode reward: 48.900, mean reward: 0.349 [-1.000, 0.500], mean action: 2.729 [0.000, 6.000], mean observation: 172.445 [25.000, 255.000], loss: 0.009010, mean_absolute_error: 2.210826, mean_q: 2.581924, mean_eps: 0.912675
   97230/2000000: episode: 509, duration: 6.197s, ep

  101522/2000000: episode: 532, duration: 8.777s, episode steps: 176, steps per second: 20, episode reward: 78.600, mean reward: 0.447 [-1.000, 1.000], mean action: 2.824 [0.000, 6.000], mean observation: 171.097 [23.000, 255.000], loss: 0.008582, mean_absolute_error: 2.593964, mean_q: 3.033739, mean_eps: 0.908709
  101738/2000000: episode: 533, duration: 11.106s, episode steps: 216, steps per second: 19, episode reward: 156.600, mean reward: 0.725 [-1.000, 1.000], mean action: 3.000 [0.000, 6.000], mean observation: 171.945 [24.000, 255.000], loss: 0.007682, mean_absolute_error: 2.530787, mean_q: 2.962162, mean_eps: 0.908533
  101948/2000000: episode: 534, duration: 10.622s, episode steps: 210, steps per second: 20, episode reward: 123.200, mean reward: 0.587 [-1.000, 1.000], mean action: 3.014 [0.000, 6.000], mean observation: 172.158 [24.000, 255.000], loss: 0.007324, mean_absolute_error: 2.509413, mean_q: 2.929239, mean_eps: 0.908342
  102154/2000000: episode: 535, duration: 10.368

  106274/2000000: episode: 558, duration: 9.333s, episode steps: 187, steps per second: 20, episode reward: 102.000, mean reward: 0.545 [-1.000, 1.000], mean action: 2.984 [0.000, 6.000], mean observation: 172.104 [24.000, 255.000], loss: 0.006993, mean_absolute_error: 2.616875, mean_q: 3.059485, mean_eps: 0.904438
  106480/2000000: episode: 559, duration: 10.490s, episode steps: 206, steps per second: 20, episode reward: 145.200, mean reward: 0.705 [-1.000, 1.000], mean action: 3.150 [0.000, 6.000], mean observation: 172.968 [23.000, 255.000], loss: 0.006776, mean_absolute_error: 2.551682, mean_q: 2.972273, mean_eps: 0.904262
  106671/2000000: episode: 560, duration: 9.575s, episode steps: 191, steps per second: 20, episode reward: 127.800, mean reward: 0.669 [-1.000, 1.000], mean action: 2.895 [0.000, 6.000], mean observation: 171.670 [24.000, 255.000], loss: 0.006802, mean_absolute_error: 2.592561, mean_q: 3.027784, mean_eps: 0.904083
  106830/2000000: episode: 561, duration: 7.642s

  111154/2000000: episode: 584, duration: 8.684s, episode steps: 178, steps per second: 20, episode reward: 115.400, mean reward: 0.648 [-1.000, 1.000], mean action: 2.899 [0.000, 6.000], mean observation: 171.323 [23.000, 255.000], loss: 0.008341, mean_absolute_error: 2.987540, mean_q: 3.484544, mean_eps: 0.900042
  111313/2000000: episode: 585, duration: 7.582s, episode steps: 159, steps per second: 21, episode reward: 64.400, mean reward: 0.405 [-1.000, 0.500], mean action: 2.868 [0.000, 6.000], mean observation: 171.826 [23.000, 255.000], loss: 0.008683, mean_absolute_error: 2.985031, mean_q: 3.493663, mean_eps: 0.899889
  111498/2000000: episode: 586, duration: 9.004s, episode steps: 185, steps per second: 21, episode reward: 71.800, mean reward: 0.388 [-1.000, 0.500], mean action: 3.054 [0.000, 6.000], mean observation: 172.128 [24.000, 255.000], loss: 0.009958, mean_absolute_error: 3.066476, mean_q: 3.572796, mean_eps: 0.899735
  111720/2000000: episode: 587, duration: 11.399s, 

  116027/2000000: episode: 610, duration: 11.297s, episode steps: 220, steps per second: 19, episode reward: 110.600, mean reward: 0.503 [-1.000, 1.000], mean action: 3.005 [0.000, 6.000], mean observation: 172.977 [24.000, 255.000], loss: 0.008868, mean_absolute_error: 3.052273, mean_q: 3.564263, mean_eps: 0.895676
  116222/2000000: episode: 611, duration: 9.761s, episode steps: 195, steps per second: 20, episode reward: 126.100, mean reward: 0.647 [-1.000, 1.000], mean action: 3.026 [0.000, 6.000], mean observation: 172.445 [23.000, 255.000], loss: 0.007755, mean_absolute_error: 3.070752, mean_q: 3.590712, mean_eps: 0.895488
  116386/2000000: episode: 612, duration: 7.848s, episode steps: 164, steps per second: 21, episode reward: 67.700, mean reward: 0.413 [-1.000, 0.500], mean action: 2.915 [0.000, 6.000], mean observation: 172.225 [24.000, 255.000], loss: 0.008358, mean_absolute_error: 2.994324, mean_q: 3.494152, mean_eps: 0.895326
  116574/2000000: episode: 613, duration: 9.445s,

  120755/2000000: episode: 636, duration: 11.427s, episode steps: 222, steps per second: 19, episode reward: 118.200, mean reward: 0.532 [-1.000, 1.000], mean action: 3.041 [0.000, 6.000], mean observation: 172.980 [25.000, 255.000], loss: 0.010725, mean_absolute_error: 3.416398, mean_q: 3.983134, mean_eps: 0.891420
  120942/2000000: episode: 637, duration: 9.258s, episode steps: 187, steps per second: 20, episode reward: 108.000, mean reward: 0.578 [-1.000, 1.000], mean action: 2.968 [0.000, 6.000], mean observation: 171.826 [24.000, 255.000], loss: 0.011916, mean_absolute_error: 3.479252, mean_q: 4.063767, mean_eps: 0.891237
  121115/2000000: episode: 638, duration: 8.320s, episode steps: 173, steps per second: 21, episode reward: 75.000, mean reward: 0.434 [-1.000, 1.000], mean action: 3.006 [0.000, 6.000], mean observation: 171.744 [23.000, 255.000], loss: 0.009526, mean_absolute_error: 3.409912, mean_q: 3.980652, mean_eps: 0.891075
  121321/2000000: episode: 639, duration: 10.495s

  125703/2000000: episode: 662, duration: 10.927s, episode steps: 214, steps per second: 20, episode reward: 144.300, mean reward: 0.674 [-1.000, 1.000], mean action: 3.173 [0.000, 6.000], mean observation: 172.332 [24.000, 255.000], loss: 0.010715, mean_absolute_error: 3.455494, mean_q: 4.017558, mean_eps: 0.886964
  125894/2000000: episode: 663, duration: 9.635s, episode steps: 191, steps per second: 20, episode reward: 106.200, mean reward: 0.556 [-1.000, 1.000], mean action: 3.031 [0.000, 6.000], mean observation: 171.764 [22.000, 255.000], loss: 0.009522, mean_absolute_error: 3.496448, mean_q: 4.075122, mean_eps: 0.886782
  126076/2000000: episode: 664, duration: 8.990s, episode steps: 182, steps per second: 20, episode reward: 103.000, mean reward: 0.566 [-1.000, 1.000], mean action: 3.264 [0.000, 6.000], mean observation: 171.133 [23.000, 255.000], loss: 0.010797, mean_absolute_error: 3.435136, mean_q: 3.999583, mean_eps: 0.886614
  126256/2000000: episode: 665, duration: 8.846s

  130370/2000000: episode: 688, duration: 7.495s, episode steps: 157, steps per second: 21, episode reward: 80.000, mean reward: 0.510 [-1.000, 1.000], mean action: 2.847 [0.000, 6.000], mean observation: 171.217 [23.000, 255.000], loss: 0.013626, mean_absolute_error: 3.723365, mean_q: 4.339189, mean_eps: 0.882737
  130536/2000000: episode: 689, duration: 7.973s, episode steps: 166, steps per second: 21, episode reward: 70.300, mean reward: 0.423 [-1.000, 0.500], mean action: 2.705 [0.000, 6.000], mean observation: 171.263 [24.000, 255.000], loss: 0.012601, mean_absolute_error: 3.815317, mean_q: 4.447087, mean_eps: 0.882593
  130702/2000000: episode: 690, duration: 7.922s, episode steps: 166, steps per second: 21, episode reward: 67.100, mean reward: 0.404 [-1.000, 0.500], mean action: 2.922 [0.000, 6.000], mean observation: 172.166 [25.000, 255.000], loss: 0.011635, mean_absolute_error: 3.806749, mean_q: 4.437352, mean_eps: 0.882444
  130872/2000000: episode: 691, duration: 8.169s, ep

  135162/2000000: episode: 714, duration: 10.960s, episode steps: 211, steps per second: 19, episode reward: 132.600, mean reward: 0.628 [-1.000, 1.000], mean action: 3.204 [0.000, 6.000], mean observation: 173.037 [24.000, 255.000], loss: 0.012328, mean_absolute_error: 3.752980, mean_q: 4.382629, mean_eps: 0.878450
  135328/2000000: episode: 715, duration: 7.935s, episode steps: 166, steps per second: 21, episode reward: 66.700, mean reward: 0.402 [-1.000, 0.500], mean action: 2.940 [0.000, 6.000], mean observation: 172.357 [24.000, 255.000], loss: 0.011575, mean_absolute_error: 3.791648, mean_q: 4.411172, mean_eps: 0.878280
  135502/2000000: episode: 716, duration: 8.190s, episode steps: 174, steps per second: 21, episode reward: 56.300, mean reward: 0.324 [-1.000, 0.500], mean action: 2.649 [0.000, 6.000], mean observation: 173.012 [23.000, 255.000], loss: 0.010388, mean_absolute_error: 3.836309, mean_q: 4.482739, mean_eps: 0.878127
  135639/2000000: episode: 717, duration: 6.451s, 

  139727/2000000: episode: 740, duration: 7.986s, episode steps: 167, steps per second: 21, episode reward: 70.000, mean reward: 0.419 [-1.000, 0.500], mean action: 2.892 [0.000, 6.000], mean observation: 171.324 [23.000, 255.000], loss: 0.009416, mean_absolute_error: 3.674945, mean_q: 4.276043, mean_eps: 0.874322
  139916/2000000: episode: 741, duration: 9.284s, episode steps: 189, steps per second: 20, episode reward: 110.000, mean reward: 0.582 [-1.000, 1.000], mean action: 2.931 [0.000, 6.000], mean observation: 171.347 [24.000, 255.000], loss: 0.008847, mean_absolute_error: 3.663839, mean_q: 4.255414, mean_eps: 0.874162
  140096/2000000: episode: 742, duration: 8.692s, episode steps: 180, steps per second: 21, episode reward: 72.100, mean reward: 0.401 [-1.000, 0.500], mean action: 2.922 [0.000, 6.000], mean observation: 172.200 [24.000, 255.000], loss: 0.036197, mean_absolute_error: 3.989647, mean_q: 4.635869, mean_eps: 0.873996
  140250/2000000: episode: 743, duration: 7.233s, e

  144644/2000000: episode: 766, duration: 9.395s, episode steps: 189, steps per second: 20, episode reward: 135.300, mean reward: 0.716 [-1.000, 1.000], mean action: 2.984 [0.000, 6.000], mean observation: 171.947 [23.000, 255.000], loss: 0.012417, mean_absolute_error: 4.132474, mean_q: 4.827106, mean_eps: 0.869907
  144857/2000000: episode: 767, duration: 10.798s, episode steps: 213, steps per second: 20, episode reward: 155.700, mean reward: 0.731 [-1.000, 1.000], mean action: 3.056 [0.000, 6.000], mean observation: 173.661 [24.000, 255.000], loss: 0.011571, mean_absolute_error: 4.254891, mean_q: 4.965096, mean_eps: 0.869725
  145049/2000000: episode: 768, duration: 9.573s, episode steps: 192, steps per second: 20, episode reward: 118.500, mean reward: 0.617 [-1.000, 1.000], mean action: 2.969 [0.000, 6.000], mean observation: 172.688 [20.000, 255.000], loss: 0.013645, mean_absolute_error: 4.097077, mean_q: 4.768700, mean_eps: 0.869541
  145242/2000000: episode: 769, duration: 9.637s

  149546/2000000: episode: 792, duration: 9.902s, episode steps: 197, steps per second: 20, episode reward: 125.100, mean reward: 0.635 [-1.000, 1.000], mean action: 3.041 [0.000, 6.000], mean observation: 172.454 [24.000, 255.000], loss: 0.011011, mean_absolute_error: 4.268613, mean_q: 4.977381, mean_eps: 0.865497
  149730/2000000: episode: 793, duration: 9.119s, episode steps: 184, steps per second: 20, episode reward: 104.200, mean reward: 0.566 [-1.000, 1.000], mean action: 2.897 [0.000, 6.000], mean observation: 171.852 [23.000, 255.000], loss: 0.012374, mean_absolute_error: 4.192930, mean_q: 4.893481, mean_eps: 0.865326
  149855/2000000: episode: 794, duration: 5.864s, episode steps: 125, steps per second: 21, episode reward: 49.000, mean reward: 0.392 [-1.000, 0.500], mean action: 2.576 [0.000, 6.000], mean observation: 172.396 [23.000, 255.000], loss: 0.011615, mean_absolute_error: 4.168934, mean_q: 4.857097, mean_eps: 0.865187
  150061/2000000: episode: 795, duration: 10.442s,

  154402/2000000: episode: 818, duration: 11.265s, episode steps: 215, steps per second: 19, episode reward: 157.200, mean reward: 0.731 [-1.000, 1.000], mean action: 3.172 [0.000, 6.000], mean observation: 171.377 [24.000, 255.000], loss: 0.013502, mean_absolute_error: 4.504372, mean_q: 5.254290, mean_eps: 0.861135
  154558/2000000: episode: 819, duration: 7.618s, episode steps: 156, steps per second: 20, episode reward: 66.900, mean reward: 0.429 [-1.000, 0.500], mean action: 2.654 [0.000, 6.000], mean observation: 171.140 [23.000, 255.000], loss: 0.013057, mean_absolute_error: 4.657640, mean_q: 5.430872, mean_eps: 0.860968
  154760/2000000: episode: 820, duration: 10.446s, episode steps: 202, steps per second: 19, episode reward: 128.300, mean reward: 0.635 [-1.000, 1.000], mean action: 3.277 [0.000, 6.000], mean observation: 172.010 [24.000, 255.000], loss: 0.014533, mean_absolute_error: 4.531280, mean_q: 5.294454, mean_eps: 0.860808
  154915/2000000: episode: 821, duration: 7.506s

  159253/2000000: episode: 844, duration: 7.986s, episode steps: 163, steps per second: 20, episode reward: 66.400, mean reward: 0.407 [-1.000, 0.500], mean action: 2.914 [0.000, 6.000], mean observation: 172.116 [23.000, 255.000], loss: 0.012632, mean_absolute_error: 4.557160, mean_q: 5.302867, mean_eps: 0.856745
  159480/2000000: episode: 845, duration: 12.047s, episode steps: 227, steps per second: 19, episode reward: 169.900, mean reward: 0.748 [-1.000, 1.000], mean action: 3.344 [0.000, 6.000], mean observation: 172.256 [24.000, 255.000], loss: 0.016404, mean_absolute_error: 4.617571, mean_q: 5.390272, mean_eps: 0.856571
  159707/2000000: episode: 846, duration: 11.943s, episode steps: 227, steps per second: 19, episode reward: 105.800, mean reward: 0.466 [-1.000, 1.000], mean action: 3.194 [0.000, 6.000], mean observation: 173.114 [23.000, 255.000], loss: 0.015226, mean_absolute_error: 4.560160, mean_q: 5.324047, mean_eps: 0.856367
  159844/2000000: episode: 847, duration: 7.397s

  164193/2000000: episode: 870, duration: 9.059s, episode steps: 181, steps per second: 20, episode reward: 105.400, mean reward: 0.582 [-1.000, 1.000], mean action: 2.878 [0.000, 6.000], mean observation: 171.779 [23.000, 255.000], loss: 0.020572, mean_absolute_error: 5.061935, mean_q: 5.918398, mean_eps: 0.852308
  164406/2000000: episode: 871, duration: 11.230s, episode steps: 213, steps per second: 19, episode reward: 127.300, mean reward: 0.598 [-1.000, 1.000], mean action: 3.258 [0.000, 6.000], mean observation: 173.072 [24.000, 255.000], loss: 0.017997, mean_absolute_error: 5.076589, mean_q: 5.950205, mean_eps: 0.852130
  164604/2000000: episode: 872, duration: 10.200s, episode steps: 198, steps per second: 19, episode reward: 96.100, mean reward: 0.485 [-1.000, 1.000], mean action: 2.848 [0.000, 6.000], mean observation: 172.380 [24.000, 255.000], loss: 0.018140, mean_absolute_error: 5.070793, mean_q: 5.929813, mean_eps: 0.851946
  164783/2000000: episode: 873, duration: 8.981s

  169003/2000000: episode: 896, duration: 6.416s, episode steps: 137, steps per second: 21, episode reward: 43.800, mean reward: 0.320 [-1.000, 0.500], mean action: 2.628 [0.000, 6.000], mean observation: 173.293 [24.000, 255.000], loss: 0.021046, mean_absolute_error: 5.173168, mean_q: 6.039774, mean_eps: 0.847959
  169152/2000000: episode: 897, duration: 7.273s, episode steps: 149, steps per second: 20, episode reward: 61.400, mean reward: 0.412 [-1.000, 0.500], mean action: 2.933 [0.000, 6.000], mean observation: 172.379 [24.000, 255.000], loss: 0.020754, mean_absolute_error: 4.981658, mean_q: 5.811157, mean_eps: 0.847832
  169338/2000000: episode: 898, duration: 9.490s, episode steps: 186, steps per second: 20, episode reward: 123.100, mean reward: 0.662 [-1.000, 1.000], mean action: 3.188 [0.000, 6.000], mean observation: 171.861 [22.000, 255.000], loss: 0.018803, mean_absolute_error: 4.935635, mean_q: 5.768061, mean_eps: 0.847680
  169531/2000000: episode: 899, duration: 9.904s, e

  173697/2000000: episode: 922, duration: 9.487s, episode steps: 187, steps per second: 20, episode reward: 106.600, mean reward: 0.570 [-1.000, 1.000], mean action: 3.011 [0.000, 6.000], mean observation: 172.235 [23.000, 255.000], loss: 0.020869, mean_absolute_error: 5.449778, mean_q: 6.354863, mean_eps: 0.843756
  173885/2000000: episode: 923, duration: 9.650s, episode steps: 188, steps per second: 19, episode reward: 129.100, mean reward: 0.687 [-1.000, 1.000], mean action: 3.080 [0.000, 6.000], mean observation: 171.758 [23.000, 255.000], loss: 0.021248, mean_absolute_error: 5.548793, mean_q: 6.491547, mean_eps: 0.843587
  174040/2000000: episode: 924, duration: 7.608s, episode steps: 155, steps per second: 20, episode reward: 76.400, mean reward: 0.493 [-1.000, 1.000], mean action: 3.006 [0.000, 6.000], mean observation: 171.522 [22.000, 255.000], loss: 0.022779, mean_absolute_error: 5.397482, mean_q: 6.301738, mean_eps: 0.843434
  174209/2000000: episode: 925, duration: 8.238s, 

  178322/2000000: episode: 948, duration: 8.012s, episode steps: 164, steps per second: 20, episode reward: 67.300, mean reward: 0.410 [-1.000, 0.500], mean action: 2.823 [0.000, 6.000], mean observation: 171.478 [24.000, 255.000], loss: 0.022692, mean_absolute_error: 5.289641, mean_q: 6.173274, mean_eps: 0.839584
  178502/2000000: episode: 949, duration: 8.879s, episode steps: 180, steps per second: 20, episode reward: 75.500, mean reward: 0.419 [-1.000, 1.000], mean action: 3.094 [0.000, 6.000], mean observation: 171.805 [23.000, 255.000], loss: 0.021916, mean_absolute_error: 5.389863, mean_q: 6.287182, mean_eps: 0.839429
  178654/2000000: episode: 950, duration: 7.358s, episode steps: 152, steps per second: 21, episode reward: 61.300, mean reward: 0.403 [-1.000, 0.500], mean action: 2.757 [0.000, 6.000], mean observation: 171.768 [23.000, 255.000], loss: 0.020160, mean_absolute_error: 5.349994, mean_q: 6.252641, mean_eps: 0.839280
  178831/2000000: episode: 951, duration: 8.667s, ep

  182905/2000000: episode: 974, duration: 7.728s, episode steps: 158, steps per second: 20, episode reward: 65.500, mean reward: 0.415 [-1.000, 0.500], mean action: 2.987 [0.000, 6.000], mean observation: 171.066 [24.000, 255.000], loss: 0.025503, mean_absolute_error: 5.821872, mean_q: 6.790817, mean_eps: 0.835457
  183086/2000000: episode: 975, duration: 9.057s, episode steps: 181, steps per second: 20, episode reward: 121.400, mean reward: 0.671 [-1.000, 1.000], mean action: 3.055 [0.000, 6.000], mean observation: 170.754 [24.000, 255.000], loss: 0.027451, mean_absolute_error: 5.793579, mean_q: 6.765702, mean_eps: 0.835304
  183231/2000000: episode: 976, duration: 7.037s, episode steps: 145, steps per second: 21, episode reward: 54.600, mean reward: 0.377 [-1.000, 0.500], mean action: 2.779 [0.000, 6.000], mean observation: 171.888 [23.000, 255.000], loss: 0.035717, mean_absolute_error: 5.860497, mean_q: 6.847341, mean_eps: 0.835158
  183379/2000000: episode: 977, duration: 7.081s, e

  187364/2000000: episode: 1000, duration: 8.793s, episode steps: 177, steps per second: 20, episode reward: 96.500, mean reward: 0.545 [-1.000, 1.000], mean action: 2.893 [0.000, 6.000], mean observation: 171.831 [23.000, 255.000], loss: 0.024269, mean_absolute_error: 5.810051, mean_q: 6.791269, mean_eps: 0.831453
  187540/2000000: episode: 1001, duration: 8.764s, episode steps: 176, steps per second: 20, episode reward: 112.800, mean reward: 0.641 [-1.000, 1.000], mean action: 2.864 [0.000, 6.000], mean observation: 171.585 [24.000, 255.000], loss: 0.022028, mean_absolute_error: 5.936697, mean_q: 6.935905, mean_eps: 0.831295
  187751/2000000: episode: 1002, duration: 10.980s, episode steps: 211, steps per second: 19, episode reward: 148.100, mean reward: 0.702 [-1.000, 1.000], mean action: 3.204 [0.000, 6.000], mean observation: 173.415 [24.000, 255.000], loss: 0.022299, mean_absolute_error: 5.744198, mean_q: 6.715208, mean_eps: 0.831120
  187968/2000000: episode: 1003, duration: 11.

  192119/2000000: episode: 1026, duration: 11.920s, episode steps: 229, steps per second: 19, episode reward: 130.200, mean reward: 0.569 [-1.000, 1.000], mean action: 3.231 [0.000, 6.000], mean observation: 173.438 [24.000, 255.000], loss: 0.026656, mean_absolute_error: 6.355163, mean_q: 7.422024, mean_eps: 0.827196
  192301/2000000: episode: 1027, duration: 9.149s, episode steps: 182, steps per second: 20, episode reward: 126.700, mean reward: 0.696 [-1.000, 1.000], mean action: 2.907 [0.000, 6.000], mean observation: 171.214 [24.000, 255.000], loss: 0.029747, mean_absolute_error: 6.488971, mean_q: 7.590756, mean_eps: 0.827011
  192498/2000000: episode: 1028, duration: 10.042s, episode steps: 197, steps per second: 20, episode reward: 108.500, mean reward: 0.551 [-1.000, 1.000], mean action: 3.147 [0.000, 6.000], mean observation: 172.140 [24.000, 255.000], loss: 0.027907, mean_absolute_error: 6.249208, mean_q: 7.306063, mean_eps: 0.826840
  192626/2000000: episode: 1029, duration: 6

  196970/2000000: episode: 1052, duration: 9.903s, episode steps: 194, steps per second: 20, episode reward: 109.200, mean reward: 0.563 [-1.000, 1.000], mean action: 2.928 [0.000, 6.000], mean observation: 172.249 [25.000, 255.000], loss: 0.029295, mean_absolute_error: 6.281114, mean_q: 7.343425, mean_eps: 0.822815
  197126/2000000: episode: 1053, duration: 8.421s, episode steps: 156, steps per second: 19, episode reward: 77.000, mean reward: 0.494 [-1.000, 1.000], mean action: 3.090 [0.000, 6.000], mean observation: 174.441 [23.000, 255.000], loss: 0.027701, mean_absolute_error: 6.310555, mean_q: 7.369837, mean_eps: 0.822657
  197257/2000000: episode: 1054, duration: 7.115s, episode steps: 131, steps per second: 18, episode reward: 73.100, mean reward: 0.558 [-1.000, 1.000], mean action: 3.389 [0.000, 6.000], mean observation: 172.979 [24.000, 255.000], loss: 0.026834, mean_absolute_error: 6.326372, mean_q: 7.389417, mean_eps: 0.822527
  197405/2000000: episode: 1055, duration: 7.961

  201538/2000000: episode: 1078, duration: 10.505s, episode steps: 203, steps per second: 19, episode reward: 118.700, mean reward: 0.585 [-1.000, 1.000], mean action: 3.005 [0.000, 6.000], mean observation: 173.075 [24.000, 255.000], loss: 0.031379, mean_absolute_error: 6.722366, mean_q: 7.854523, mean_eps: 0.818708
  201694/2000000: episode: 1079, duration: 7.588s, episode steps: 156, steps per second: 21, episode reward: 58.500, mean reward: 0.375 [-1.000, 0.500], mean action: 2.923 [0.000, 6.000], mean observation: 172.040 [23.000, 255.000], loss: 0.037458, mean_absolute_error: 6.840010, mean_q: 8.003948, mean_eps: 0.818546
  201886/2000000: episode: 1080, duration: 9.865s, episode steps: 192, steps per second: 19, episode reward: 121.600, mean reward: 0.633 [-1.000, 1.000], mean action: 2.943 [0.000, 6.000], mean observation: 171.988 [23.000, 255.000], loss: 0.036938, mean_absolute_error: 6.868239, mean_q: 8.028159, mean_eps: 0.818389
  202109/2000000: episode: 1081, duration: 11.

  206439/2000000: episode: 1104, duration: 9.401s, episode steps: 185, steps per second: 20, episode reward: 127.400, mean reward: 0.689 [-1.000, 1.000], mean action: 3.141 [0.000, 6.000], mean observation: 171.458 [23.000, 255.000], loss: 0.033419, mean_absolute_error: 6.762541, mean_q: 7.900109, mean_eps: 0.814289
  206635/2000000: episode: 1105, duration: 10.057s, episode steps: 196, steps per second: 19, episode reward: 119.600, mean reward: 0.610 [-1.000, 1.000], mean action: 3.046 [0.000, 6.000], mean observation: 172.523 [23.000, 255.000], loss: 0.036882, mean_absolute_error: 6.751001, mean_q: 7.883942, mean_eps: 0.814118
  206830/2000000: episode: 1106, duration: 9.933s, episode steps: 195, steps per second: 20, episode reward: 119.800, mean reward: 0.614 [-1.000, 1.000], mean action: 3.041 [0.000, 6.000], mean observation: 171.147 [23.000, 255.000], loss: 0.033691, mean_absolute_error: 6.654193, mean_q: 7.775126, mean_eps: 0.813941
  207013/2000000: episode: 1107, duration: 9.

  211248/2000000: episode: 1130, duration: 6.854s, episode steps: 143, steps per second: 21, episode reward: 53.200, mean reward: 0.372 [-1.000, 0.500], mean action: 2.895 [0.000, 6.000], mean observation: 172.414 [24.000, 255.000], loss: 0.032632, mean_absolute_error: 6.844618, mean_q: 7.989669, mean_eps: 0.809942
  211452/2000000: episode: 1131, duration: 10.534s, episode steps: 204, steps per second: 19, episode reward: 127.000, mean reward: 0.623 [-1.000, 1.000], mean action: 3.020 [0.000, 6.000], mean observation: 172.890 [23.000, 255.000], loss: 0.041472, mean_absolute_error: 7.052898, mean_q: 8.235692, mean_eps: 0.809787
  211612/2000000: episode: 1132, duration: 7.885s, episode steps: 160, steps per second: 20, episode reward: 65.300, mean reward: 0.408 [-1.000, 0.500], mean action: 2.769 [0.000, 6.000], mean observation: 172.104 [25.000, 255.000], loss: 0.034801, mean_absolute_error: 7.272625, mean_q: 8.500445, mean_eps: 0.809623
  211814/2000000: episode: 1133, duration: 10.5

  216121/2000000: episode: 1156, duration: 10.503s, episode steps: 203, steps per second: 19, episode reward: 138.700, mean reward: 0.683 [-1.000, 1.000], mean action: 3.015 [0.000, 6.000], mean observation: 173.144 [24.000, 255.000], loss: 0.035750, mean_absolute_error: 6.836564, mean_q: 7.989757, mean_eps: 0.805582
  216342/2000000: episode: 1157, duration: 11.632s, episode steps: 221, steps per second: 19, episode reward: 109.500, mean reward: 0.495 [-1.000, 1.000], mean action: 3.217 [0.000, 6.000], mean observation: 173.221 [24.000, 255.000], loss: 0.032175, mean_absolute_error: 6.874139, mean_q: 8.030782, mean_eps: 0.805391
  216535/2000000: episode: 1158, duration: 9.887s, episode steps: 193, steps per second: 20, episode reward: 122.700, mean reward: 0.636 [-1.000, 1.000], mean action: 3.052 [0.000, 6.000], mean observation: 172.513 [24.000, 255.000], loss: 0.035588, mean_absolute_error: 6.990523, mean_q: 8.167908, mean_eps: 0.805206
  216726/2000000: episode: 1159, duration: 9

  220946/2000000: episode: 1182, duration: 7.009s, episode steps: 148, steps per second: 21, episode reward: 59.300, mean reward: 0.401 [-1.000, 0.500], mean action: 2.791 [0.000, 6.000], mean observation: 171.600 [24.000, 255.000], loss: 0.042012, mean_absolute_error: 7.550496, mean_q: 8.814235, mean_eps: 0.801215
  221116/2000000: episode: 1183, duration: 8.191s, episode steps: 170, steps per second: 21, episode reward: 84.100, mean reward: 0.495 [-1.000, 1.000], mean action: 2.918 [0.000, 6.000], mean observation: 171.009 [23.000, 255.000], loss: 0.040642, mean_absolute_error: 7.437483, mean_q: 8.668906, mean_eps: 0.801073
  221331/2000000: episode: 1184, duration: 10.915s, episode steps: 215, steps per second: 20, episode reward: 114.400, mean reward: 0.532 [-1.000, 1.000], mean action: 2.967 [0.000, 6.000], mean observation: 172.704 [24.000, 255.000], loss: 0.045458, mean_absolute_error: 7.420510, mean_q: 8.662327, mean_eps: 0.800900
  221540/2000000: episode: 1185, duration: 10.7

  225380/2000000: episode: 1208, duration: 8.049s, episode steps: 152, steps per second: 19, episode reward: 80.100, mean reward: 0.527 [-1.000, 1.000], mean action: 3.362 [0.000, 6.000], mean observation: 174.177 [23.000, 255.000], loss: 0.040430, mean_absolute_error: 7.504049, mean_q: 8.755747, mean_eps: 0.797228
  225539/2000000: episode: 1209, duration: 7.437s, episode steps: 159, steps per second: 21, episode reward: 48.000, mean reward: 0.302 [-1.000, 0.500], mean action: 3.019 [0.000, 6.000], mean observation: 173.102 [24.000, 255.000], loss: 0.039494, mean_absolute_error: 7.223494, mean_q: 8.419102, mean_eps: 0.797088
  225752/2000000: episode: 1210, duration: 10.850s, episode steps: 213, steps per second: 20, episode reward: 141.000, mean reward: 0.662 [-1.000, 1.000], mean action: 3.113 [0.000, 6.000], mean observation: 172.897 [24.000, 255.000], loss: 0.040204, mean_absolute_error: 7.314839, mean_q: 8.521498, mean_eps: 0.796920
  225940/2000000: episode: 1211, duration: 9.19

  230376/2000000: episode: 1234, duration: 10.494s, episode steps: 209, steps per second: 20, episode reward: 116.700, mean reward: 0.558 [-1.000, 1.000], mean action: 3.225 [0.000, 6.000], mean observation: 172.911 [24.000, 255.000], loss: 0.042853, mean_absolute_error: 7.903509, mean_q: 9.238590, mean_eps: 0.792757
  230529/2000000: episode: 1235, duration: 7.197s, episode steps: 153, steps per second: 21, episode reward: 49.400, mean reward: 0.323 [-1.000, 0.500], mean action: 2.758 [0.000, 6.000], mean observation: 172.725 [23.000, 255.000], loss: 0.038743, mean_absolute_error: 7.720376, mean_q: 9.008449, mean_eps: 0.792593
  230724/2000000: episode: 1236, duration: 9.822s, episode steps: 195, steps per second: 20, episode reward: 101.300, mean reward: 0.519 [-1.000, 1.000], mean action: 2.954 [0.000, 6.000], mean observation: 172.554 [24.000, 255.000], loss: 0.049801, mean_absolute_error: 7.826747, mean_q: 9.148859, mean_eps: 0.792437
  230925/2000000: episode: 1237, duration: 10.

  235160/2000000: episode: 1260, duration: 9.460s, episode steps: 191, steps per second: 20, episode reward: 120.000, mean reward: 0.628 [-1.000, 1.000], mean action: 3.000 [0.000, 6.000], mean observation: 171.080 [23.000, 255.000], loss: 0.039344, mean_absolute_error: 7.863026, mean_q: 9.187043, mean_eps: 0.788442
  235293/2000000: episode: 1261, duration: 7.161s, episode steps: 133, steps per second: 19, episode reward: 71.300, mean reward: 0.536 [-1.000, 1.000], mean action: 3.451 [0.000, 6.000], mean observation: 172.599 [24.000, 255.000], loss: 0.039944, mean_absolute_error: 7.870234, mean_q: 9.202430, mean_eps: 0.788297
  235487/2000000: episode: 1262, duration: 9.690s, episode steps: 194, steps per second: 20, episode reward: 118.100, mean reward: 0.609 [-1.000, 1.000], mean action: 3.067 [0.000, 6.000], mean observation: 170.871 [24.000, 255.000], loss: 0.043788, mean_absolute_error: 7.972942, mean_q: 9.326177, mean_eps: 0.788149
  235700/2000000: episode: 1263, duration: 10.9

  241314/2000000: episode: 1286, duration: 9.404s, episode steps: 229, steps per second: 24, episode reward: 108.000, mean reward: 0.472 [-1.000, 1.000], mean action: 2.987 [0.000, 6.000], mean observation: 172.361 [23.000, 255.000], loss: 0.049006, mean_absolute_error: 8.191283, mean_q: 9.562855, mean_eps: 0.782920
  241482/2000000: episode: 1287, duration: 6.809s, episode steps: 168, steps per second: 25, episode reward: 66.500, mean reward: 0.396 [-1.000, 0.500], mean action: 2.827 [0.000, 6.000], mean observation: 172.520 [24.000, 255.000], loss: 0.048976, mean_absolute_error: 8.119226, mean_q: 9.477423, mean_eps: 0.782742
  241727/2000000: episode: 1288, duration: 10.517s, episode steps: 245, steps per second: 23, episode reward: 171.200, mean reward: 0.699 [-1.000, 1.000], mean action: 2.980 [0.000, 6.000], mean observation: 172.701 [24.000, 255.000], loss: 0.047237, mean_absolute_error: 8.025545, mean_q: 9.372753, mean_eps: 0.782556
  242005/2000000: episode: 1289, duration: 10.

  247569/2000000: episode: 1312, duration: 6.781s, episode steps: 182, steps per second: 27, episode reward: 75.500, mean reward: 0.415 [-1.000, 0.500], mean action: 2.962 [0.000, 6.000], mean observation: 172.185 [24.000, 255.000], loss: 0.048771, mean_absolute_error: 8.011139, mean_q: 9.364354, mean_eps: 0.777270
  247838/2000000: episode: 1313, duration: 10.884s, episode steps: 269, steps per second: 25, episode reward: 172.900, mean reward: 0.643 [-1.000, 1.000], mean action: 3.156 [0.000, 6.000], mean observation: 173.058 [23.000, 255.000], loss: 0.043529, mean_absolute_error: 8.105471, mean_q: 9.459284, mean_eps: 0.777066
  248130/2000000: episode: 1314, duration: 11.831s, episode steps: 292, steps per second: 25, episode reward: 200.900, mean reward: 0.688 [-1.000, 1.000], mean action: 3.257 [0.000, 6.000], mean observation: 173.197 [23.000, 255.000], loss: 0.043968, mean_absolute_error: 8.165351, mean_q: 9.535113, mean_eps: 0.776814
  248390/2000000: episode: 1315, duration: 10

  253957/2000000: episode: 1338, duration: 11.376s, episode steps: 277, steps per second: 24, episode reward: 134.000, mean reward: 0.484 [-1.000, 1.000], mean action: 3.202 [0.000, 6.000], mean observation: 172.687 [23.000, 255.000], loss: 0.047065, mean_absolute_error: 8.483204, mean_q: 9.895828, mean_eps: 0.771564
  254200/2000000: episode: 1339, duration: 10.755s, episode steps: 243, steps per second: 23, episode reward: 119.200, mean reward: 0.491 [-1.000, 1.000], mean action: 3.313 [0.000, 6.000], mean observation: 172.515 [24.000, 255.000], loss: 0.048999, mean_absolute_error: 8.587531, mean_q: 10.025798, mean_eps: 0.771330
  254422/2000000: episode: 1340, duration: 9.316s, episode steps: 222, steps per second: 24, episode reward: 130.000, mean reward: 0.586 [-1.000, 1.000], mean action: 3.216 [0.000, 6.000], mean observation: 171.133 [24.000, 255.000], loss: 0.040181, mean_absolute_error: 8.473922, mean_q: 9.883853, mean_eps: 0.771121
  254649/2000000: episode: 1341, duration: 

  259852/2000000: episode: 1364, duration: 7.803s, episode steps: 201, steps per second: 26, episode reward: 73.000, mean reward: 0.363 [-1.000, 0.500], mean action: 2.980 [0.000, 6.000], mean observation: 171.655 [23.000, 255.000], loss: 0.053805, mean_absolute_error: 8.717806, mean_q: 10.177930, mean_eps: 0.766225
  260134/2000000: episode: 1365, duration: 12.097s, episode steps: 282, steps per second: 23, episode reward: 186.000, mean reward: 0.660 [-1.000, 1.000], mean action: 3.358 [0.000, 6.000], mean observation: 172.624 [24.000, 255.000], loss: 0.061823, mean_absolute_error: 8.753381, mean_q: 10.215203, mean_eps: 0.766007
  260403/2000000: episode: 1366, duration: 11.174s, episode steps: 269, steps per second: 24, episode reward: 175.700, mean reward: 0.653 [-1.000, 1.000], mean action: 3.156 [0.000, 6.000], mean observation: 172.333 [23.000, 255.000], loss: 0.048843, mean_absolute_error: 8.925636, mean_q: 10.424483, mean_eps: 0.765759
  260663/2000000: episode: 1367, duration:

  266040/2000000: episode: 1390, duration: 8.192s, episode steps: 207, steps per second: 25, episode reward: 111.500, mean reward: 0.539 [-1.000, 1.000], mean action: 2.792 [0.000, 6.000], mean observation: 171.852 [24.000, 255.000], loss: 0.050897, mean_absolute_error: 9.150181, mean_q: 10.680137, mean_eps: 0.760658
  266270/2000000: episode: 1391, duration: 9.747s, episode steps: 230, steps per second: 24, episode reward: 169.900, mean reward: 0.739 [-1.000, 1.000], mean action: 3.026 [0.000, 6.000], mean observation: 172.082 [24.000, 255.000], loss: 0.046930, mean_absolute_error: 9.122043, mean_q: 10.643674, mean_eps: 0.760461
  266526/2000000: episode: 1392, duration: 10.528s, episode steps: 256, steps per second: 24, episode reward: 174.300, mean reward: 0.681 [-1.000, 1.000], mean action: 3.012 [0.000, 6.000], mean observation: 172.938 [24.000, 255.000], loss: 0.045353, mean_absolute_error: 9.016875, mean_q: 10.524752, mean_eps: 0.760242
  266756/2000000: episode: 1393, duration:

  272330/2000000: episode: 1416, duration: 9.292s, episode steps: 231, steps per second: 25, episode reward: 150.200, mean reward: 0.650 [-1.000, 1.000], mean action: 3.035 [0.000, 6.000], mean observation: 170.771 [24.000, 255.000], loss: 0.045493, mean_absolute_error: 9.567300, mean_q: 11.180370, mean_eps: 0.755007
  272587/2000000: episode: 1417, duration: 11.100s, episode steps: 257, steps per second: 23, episode reward: 163.500, mean reward: 0.636 [-1.000, 1.000], mean action: 3.195 [0.000, 6.000], mean observation: 172.732 [24.000, 255.000], loss: 0.048720, mean_absolute_error: 9.443271, mean_q: 11.027702, mean_eps: 0.754788
  272810/2000000: episode: 1418, duration: 9.416s, episode steps: 223, steps per second: 24, episode reward: 152.400, mean reward: 0.683 [-1.000, 1.000], mean action: 3.036 [0.000, 6.000], mean observation: 171.298 [23.000, 255.000], loss: 0.050465, mean_absolute_error: 9.609512, mean_q: 11.210923, mean_eps: 0.754572
  273089/2000000: episode: 1419, duration:

  278405/2000000: episode: 1442, duration: 11.649s, episode steps: 290, steps per second: 25, episode reward: 149.600, mean reward: 0.516 [-1.000, 1.000], mean action: 3.317 [0.000, 6.000], mean observation: 173.444 [24.000, 255.000], loss: 0.047524, mean_absolute_error: 9.632207, mean_q: 11.248237, mean_eps: 0.749566
  278640/2000000: episode: 1443, duration: 9.384s, episode steps: 235, steps per second: 25, episode reward: 163.500, mean reward: 0.696 [-1.000, 1.000], mean action: 3.077 [0.000, 6.000], mean observation: 171.690 [24.000, 255.000], loss: 0.051694, mean_absolute_error: 9.864854, mean_q: 11.515477, mean_eps: 0.749330
  278891/2000000: episode: 1444, duration: 10.028s, episode steps: 251, steps per second: 25, episode reward: 154.300, mean reward: 0.615 [-1.000, 1.000], mean action: 2.980 [0.000, 6.000], mean observation: 172.253 [24.000, 255.000], loss: 0.055295, mean_absolute_error: 9.790292, mean_q: 11.430258, mean_eps: 0.749112
  279153/2000000: episode: 1445, duration

  284874/2000000: episode: 1468, duration: 9.403s, episode steps: 248, steps per second: 26, episode reward: 103.700, mean reward: 0.418 [-1.000, 0.500], mean action: 2.956 [0.000, 6.000], mean observation: 172.802 [24.000, 255.000], loss: 0.049505, mean_absolute_error: 10.613106, mean_q: 12.386958, mean_eps: 0.743725
  285137/2000000: episode: 1469, duration: 10.386s, episode steps: 263, steps per second: 25, episode reward: 177.200, mean reward: 0.674 [-1.000, 1.000], mean action: 3.251 [0.000, 6.000], mean observation: 172.605 [24.000, 255.000], loss: 0.044881, mean_absolute_error: 10.236789, mean_q: 11.945847, mean_eps: 0.743495
  285396/2000000: episode: 1470, duration: 9.992s, episode steps: 259, steps per second: 26, episode reward: 148.500, mean reward: 0.573 [-1.000, 1.000], mean action: 3.286 [0.000, 6.000], mean observation: 172.368 [24.000, 255.000], loss: 0.050077, mean_absolute_error: 10.151369, mean_q: 11.856105, mean_eps: 0.743261
  285666/2000000: episode: 1471, durati

  291453/2000000: episode: 1494, duration: 9.866s, episode steps: 252, steps per second: 26, episode reward: 146.900, mean reward: 0.583 [-1.000, 1.000], mean action: 2.790 [0.000, 6.000], mean observation: 171.008 [24.000, 255.000], loss: 0.053724, mean_absolute_error: 10.761686, mean_q: 12.586861, mean_eps: 0.737805
  291702/2000000: episode: 1495, duration: 9.651s, episode steps: 249, steps per second: 26, episode reward: 158.400, mean reward: 0.636 [-1.000, 1.000], mean action: 3.004 [0.000, 6.000], mean observation: 170.780 [23.000, 255.000], loss: 0.049286, mean_absolute_error: 10.653147, mean_q: 12.435465, mean_eps: 0.737580
  291969/2000000: episode: 1496, duration: 10.508s, episode steps: 267, steps per second: 25, episode reward: 117.200, mean reward: 0.439 [-1.000, 0.500], mean action: 3.277 [0.000, 6.000], mean observation: 172.779 [24.000, 255.000], loss: 0.059521, mean_absolute_error: 10.768606, mean_q: 12.578316, mean_eps: 0.737348
  292238/2000000: episode: 1497, durati

  297876/2000000: episode: 1520, duration: 9.686s, episode steps: 238, steps per second: 25, episode reward: 168.100, mean reward: 0.706 [-1.000, 1.000], mean action: 3.160 [0.000, 6.000], mean observation: 171.743 [24.000, 255.000], loss: 0.048342, mean_absolute_error: 10.574033, mean_q: 12.344408, mean_eps: 0.732020
  298109/2000000: episode: 1521, duration: 9.328s, episode steps: 233, steps per second: 25, episode reward: 176.100, mean reward: 0.756 [-1.000, 1.000], mean action: 2.987 [0.000, 6.000], mean observation: 171.875 [24.000, 255.000], loss: 0.055163, mean_absolute_error: 10.877304, mean_q: 12.717941, mean_eps: 0.731807
  298326/2000000: episode: 1522, duration: 7.948s, episode steps: 217, steps per second: 27, episode reward: 89.800, mean reward: 0.414 [-1.000, 0.500], mean action: 2.903 [0.000, 6.000], mean observation: 172.226 [23.000, 255.000], loss: 0.053259, mean_absolute_error: 10.980014, mean_q: 12.811782, mean_eps: 0.731604
  298559/2000000: episode: 1523, duration

  304325/2000000: episode: 1546, duration: 8.803s, episode steps: 234, steps per second: 27, episode reward: 145.900, mean reward: 0.624 [-1.000, 1.000], mean action: 3.218 [0.000, 6.000], mean observation: 171.702 [23.000, 255.000], loss: 0.071124, mean_absolute_error: 11.523122, mean_q: 13.484852, mean_eps: 0.726213
  304576/2000000: episode: 1547, duration: 9.897s, episode steps: 251, steps per second: 25, episode reward: 180.000, mean reward: 0.717 [-1.000, 1.000], mean action: 3.295 [0.000, 6.000], mean observation: 172.020 [24.000, 255.000], loss: 0.061618, mean_absolute_error: 11.338889, mean_q: 13.251302, mean_eps: 0.725995
  304812/2000000: episode: 1548, duration: 9.477s, episode steps: 236, steps per second: 25, episode reward: 174.400, mean reward: 0.739 [-1.000, 1.000], mean action: 3.123 [0.000, 6.000], mean observation: 171.412 [23.000, 255.000], loss: 0.055700, mean_absolute_error: 11.109517, mean_q: 12.976601, mean_eps: 0.725777
  305092/2000000: episode: 1549, duratio

### testing

In [None]:
log_dir = 'logs'
if not exists(log_dir):
    os.makedirs(log_dir)
    
weights_filename = join(log_dir,'dqn_{}_weights.h5f'.format('AirSimCarRL'))
dqn.load_weights(weights_filename)
dqn.test(env, nb_episodes=10, visualize=True)