<a href="https://colab.research.google.com/github/gavintobin/atlas-machine_learning/blob/main/reinforcement_learning/deep_q_learning/DQN_Agent_Atari_Breakout.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
import tensorflow as tf
# as we need to restart the session for the update to take effect

In [2]:
!pip install keras-rl2 --quiet
!pip install gym[atari] --quiet
!pip install tensorflow==2.11 --quiet
#!pip install keras-dtensor
!pip install gym[accept-rom-license]



In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
!python -m atari_py.import_roms /content/drive/MyDrive/dqn/roms/

/usr/bin/python3: Error while finding module specification for 'atari_py.import_roms' (ModuleNotFoundError: No module named 'atari_py')


In [5]:
import gym
from gym.envs.registration import register

In [6]:
register(
    id='Breakout-v4',
    entry_point='gym.envs.atari:AtariEnv',
    kwargs={'game': 'breakout', 'obs_type': 'image', 'frameskip': 1},
    max_episode_steps=10000,
    nondeterministic=False,
)

  and should_run_async(code)
  logger.warn(f"Overriding environment {spec.id}")


In [8]:
from __future__ import division
import argparse

from PIL import Image
import numpy as np
import gym
from gym.wrappers import RecordVideo
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Flatten, Convolution2D, Permute
from tensorflow.keras.optimizers.legacy import Adam
import tensorflow.keras.backend as K

from rl.agents.dqn import DQNAgent
from rl.policy import LinearAnnealedPolicy, BoltzmannQPolicy, EpsGreedyQPolicy
from rl.memory import SequentialMemory
from rl.core import Processor
from rl.callbacks import FileLogger, ModelIntervalCheckpoint


INPUT_SHAPE = (84, 84)
WINDOW_LENGTH = 4


class AtariProcessor(Processor):
    def process_observation(self, observation):
        assert observation.ndim == 3  # (height, width, channel)
        img = Image.fromarray(observation)
        img = img.resize(INPUT_SHAPE).convert('L')  # resize and convert to grayscale
        processed_observation = np.array(img)
        assert processed_observation.shape == INPUT_SHAPE
        return processed_observation.astype('uint8')  # saves storage in experience memory

    def process_state_batch(self, batch):
        # We could perform this processing step in `process_observation`. In this case, however,
        # we would need to store a `float32` array instead, which is 4x more memory intensive than
        # an `uint8` array. This matters if we store 1M observations.
        processed_batch = batch.astype('float32') / 255.
        return processed_batch

    def process_reward(self, reward):
        return np.clip(reward, -1., 1.)

# parser = argparse.ArgumentParser()
# parser.add_argument('--mode', choices=['train', 'test'], default='train')
# parser.add_argument('--env-name', type=str, default='BreakoutDeterministic-v4')
# parser.add_argument('--weights', type=str, default=None)
# args = parser.parse_args()
env_name = 'Breakout-v4'
mode = 'train'
weights = None

# Get the environment and extract the number of actions.
env = gym.make(env_name)
np.random.seed(123)
env.seed(123)
nb_actions = env.action_space.n
env = RecordVideo(env, '/content/drive/MyDrive/dqn', episode_trigger=lambda episode:True)
# Next, we build our model. We use the same model that was described by Mnih et al. (2015).
input_shape = (WINDOW_LENGTH,) + INPUT_SHAPE
model = Sequential()

# (width, height, channels)
model.add(Permute((2, 3, 1), input_shape=input_shape))

model.add(Convolution2D(32, (8, 8), strides=(4, 4)))
model.add(Activation('relu'))
model.add(Convolution2D(64, (4, 4), strides=(2, 2)))
model.add(Activation('relu'))
model.add(Convolution2D(64, (3, 3), strides=(1, 1)))
model.add(Activation('relu'))
model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
print(model.summary())

# Finally, we configure and compile our agent. You can use every built-in tensorflow.keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=1000000, window_length=WINDOW_LENGTH)
processor = AtariProcessor()

# Select a policy. We use eps-greedy action selection, which means that a random action is selected
# with probability eps. We anneal eps from 1.0 to 0.1 over the course of 1M steps. This is done so that
# the agent initially explores the environment (high eps) and then gradually sticks to what it knows
# (low eps). We also set a dedicated eps value that is used during testing. Note that we set it to 0.05
# so that the agent still performs some random actions. This ensures that the agent cannot get stuck.
policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05,
                              nb_steps=1000000)

# The trade-off between exploration and exploitation is difficult and an on-going research topic.
# If you want, you can experiment with the parameters or use a different policy. Another popular one
# is Boltzmann-style exploration:
# policy = BoltzmannQPolicy(tau=1.)
# Feel free to give it a try!

dqn = DQNAgent(model=model, nb_actions=nb_actions, policy=policy, memory=memory,
               processor=processor, nb_steps_warmup=5000, gamma=.99, target_model_update=10000,
               train_interval=4, delta_clip=1.)
dqn.compile(Adam(learning_rate=.00025), metrics=['mae'])

dqn.fit(env, nb_steps=100000, verbose=2)

# After training is done, we save the final weights.
dqn.save_weights('dqn_{}_weights.h5f'.format(env_name), overwrite=True)
dqn.save_weights('policy.h5', overwrite=True)

  and should_run_async(code)
  logger.warn(
  deprecation(
  deprecation(


Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 permute_1 (Permute)         (None, 84, 84, 4)         0         
                                                                 
 conv2d_3 (Conv2D)           (None, 20, 20, 32)        8224      
                                                                 
 activation_5 (Activation)   (None, 20, 20, 32)        0         
                                                                 
 conv2d_4 (Conv2D)           (None, 9, 9, 64)          32832     
                                                                 
 activation_6 (Activation)   (None, 9, 9, 64)          0         
                                                                 
 conv2d_5 (Conv2D)           (None, 7, 7, 64)          36928     
                                                                 
 activation_7 (Activation)   (None, 7, 7, 64)         

  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.warn(
  updates=self.state_updates,
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


   809/100000: episode: 1, duration: 5.003s, episode steps: 809, steps per second: 162, episode reward:  2.000, mean reward:  0.002 [ 0.000,  1.000], mean action: 1.533 [0.000, 3.000],  loss: --, mae: --, mean_q: --, mean_eps: --


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


  1305/100000: episode: 2, duration: 2.724s, episode steps: 496, steps per second: 182, episode reward:  0.000, mean reward:  0.000 [ 0.000,  0.000], mean action: 1.512 [0.000, 3.000],  loss: --, mae: --, mean_q: --, mean_eps: --


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


  2765/100000: episode: 3, duration: 9.533s, episode steps: 1460, steps per second: 153, episode reward:  5.000, mean reward:  0.003 [ 0.000,  1.000], mean action: 1.547 [0.000, 3.000],  loss: --, mae: --, mean_q: --, mean_eps: --


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


  3263/100000: episode: 4, duration: 2.793s, episode steps: 498, steps per second: 178, episode reward:  0.000, mean reward:  0.000 [ 0.000,  0.000], mean action: 1.556 [0.000, 3.000],  loss: --, mae: --, mean_q: --, mean_eps: --


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


  3974/100000: episode: 5, duration: 5.314s, episode steps: 711, steps per second: 134, episode reward:  1.000, mean reward:  0.001 [ 0.000,  1.000], mean action: 1.446 [0.000, 3.000],  loss: --, mae: --, mean_q: --, mean_eps: --


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


  4475/100000: episode: 6, duration: 4.921s, episode steps: 501, steps per second: 102, episode reward:  0.000, mean reward:  0.000 [ 0.000,  0.000], mean action: 1.511 [0.000, 3.000],  loss: --, mae: --, mean_q: --, mean_eps: --


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  updates=self.state_updates,


  5153/100000: episode: 7, duration: 11.419s, episode steps: 678, steps per second:  59, episode reward:  1.000, mean reward:  0.001 [ 0.000,  1.000], mean action: 1.577 [0.000, 3.000],  loss: 0.000525, mae: 0.009669, mean_q: 0.014798, mean_eps: 0.995430


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


  5793/100000: episode: 8, duration: 34.221s, episode steps: 640, steps per second:  19, episode reward:  1.000, mean reward:  0.002 [ 0.000,  1.000], mean action: 1.428 [0.000, 3.000],  loss: 0.000797, mae: 0.005597, mean_q: 0.011537, mean_eps: 0.995073


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


  6630/100000: episode: 9, duration: 41.669s, episode steps: 837, steps per second:  20, episode reward:  2.000, mean reward:  0.002 [ 0.000,  1.000], mean action: 1.478 [0.000, 3.000],  loss: 0.000829, mae: 0.005287, mean_q: 0.009898, mean_eps: 0.994409


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


  7145/100000: episode: 10, duration: 27.173s, episode steps: 515, steps per second:  19, episode reward:  0.000, mean reward:  0.000 [ 0.000,  0.000], mean action: 1.567 [0.000, 3.000],  loss: 0.001096, mae: 0.005900, mean_q: 0.010704, mean_eps: 0.993801


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


  8142/100000: episode: 11, duration: 50.017s, episode steps: 997, steps per second:  20, episode reward:  3.000, mean reward:  0.003 [ 0.000,  1.000], mean action: 1.537 [0.000, 3.000],  loss: 0.000758, mae: 0.005520, mean_q: 0.010309, mean_eps: 0.993120


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


  8820/100000: episode: 12, duration: 36.710s, episode steps: 678, steps per second:  18, episode reward:  1.000, mean reward:  0.001 [ 0.000,  1.000], mean action: 1.482 [0.000, 3.000],  loss: 0.000741, mae: 0.005489, mean_q: 0.009514, mean_eps: 0.992368


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


  9747/100000: episode: 13, duration: 49.929s, episode steps: 927, steps per second:  19, episode reward:  3.000, mean reward:  0.003 [ 0.000,  1.000], mean action: 1.480 [0.000, 3.000],  loss: 0.000743, mae: 0.005608, mean_q: 0.010083, mean_eps: 0.991646


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 10362/100000: episode: 14, duration: 31.940s, episode steps: 615, steps per second:  19, episode reward:  1.000, mean reward:  0.002 [ 0.000,  1.000], mean action: 1.496 [0.000, 3.000],  loss: 0.001012, mae: 0.006084, mean_q: 0.009383, mean_eps: 0.990951


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 10855/100000: episode: 15, duration: 26.456s, episode steps: 493, steps per second:  19, episode reward:  0.000, mean reward:  0.000 [ 0.000,  0.000], mean action: 1.424 [0.000, 3.000],  loss: 0.000895, mae: 0.005964, mean_q: 0.010515, mean_eps: 0.990453


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 11467/100000: episode: 16, duration: 31.723s, episode steps: 612, steps per second:  19, episode reward:  1.000, mean reward:  0.002 [ 0.000,  1.000], mean action: 1.503 [0.000, 3.000],  loss: 0.001222, mae: 0.006916, mean_q: 0.011328, mean_eps: 0.989956


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 12219/100000: episode: 17, duration: 36.805s, episode steps: 752, steps per second:  20, episode reward:  2.000, mean reward:  0.003 [ 0.000,  1.000], mean action: 1.513 [0.000, 3.000],  loss: 0.000586, mae: 0.005337, mean_q: 0.008624, mean_eps: 0.989342


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 12720/100000: episode: 18, duration: 26.203s, episode steps: 501, steps per second:  19, episode reward:  0.000, mean reward:  0.000 [ 0.000,  0.000], mean action: 1.513 [0.000, 3.000],  loss: 0.000874, mae: 0.005778, mean_q: 0.010561, mean_eps: 0.988779


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 13213/100000: episode: 19, duration: 25.777s, episode steps: 493, steps per second:  19, episode reward:  0.000, mean reward:  0.000 [ 0.000,  0.000], mean action: 1.450 [0.000, 3.000],  loss: 0.000880, mae: 0.005912, mean_q: 0.010499, mean_eps: 0.988331


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 13714/100000: episode: 20, duration: 23.700s, episode steps: 501, steps per second:  21, episode reward:  0.000, mean reward:  0.000 [ 0.000,  0.000], mean action: 1.461 [0.000, 3.000],  loss: 0.000999, mae: 0.006170, mean_q: 0.011398, mean_eps: 0.987882


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 14224/100000: episode: 21, duration: 28.805s, episode steps: 510, steps per second:  18, episode reward:  0.000, mean reward:  0.000 [ 0.000,  0.000], mean action: 1.592 [0.000, 3.000],  loss: 0.000979, mae: 0.006082, mean_q: 0.011001, mean_eps: 0.987429


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 15034/100000: episode: 22, duration: 41.932s, episode steps: 810, steps per second:  19, episode reward:  2.000, mean reward:  0.002 [ 0.000,  1.000], mean action: 1.506 [0.000, 3.000],  loss: 0.000619, mae: 0.005463, mean_q: 0.009519, mean_eps: 0.986835


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 15536/100000: episode: 23, duration: 25.930s, episode steps: 502, steps per second:  19, episode reward:  0.000, mean reward:  0.000 [ 0.000,  0.000], mean action: 1.480 [0.000, 3.000],  loss: 0.001250, mae: 0.006789, mean_q: 0.012018, mean_eps: 0.986244


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 16241/100000: episode: 24, duration: 36.594s, episode steps: 705, steps per second:  19, episode reward:  1.000, mean reward:  0.001 [ 0.000,  1.000], mean action: 1.583 [0.000, 3.000],  loss: 0.000794, mae: 0.005500, mean_q: 0.009216, mean_eps: 0.985701


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 16733/100000: episode: 25, duration: 26.560s, episode steps: 492, steps per second:  19, episode reward:  0.000, mean reward:  0.000 [ 0.000,  0.000], mean action: 1.506 [0.000, 3.000],  loss: 0.000504, mae: 0.005367, mean_q: 0.009583, mean_eps: 0.985161


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 17445/100000: episode: 26, duration: 36.032s, episode steps: 712, steps per second:  20, episode reward:  1.000, mean reward:  0.001 [ 0.000,  1.000], mean action: 1.565 [0.000, 3.000],  loss: 0.000439, mae: 0.004618, mean_q: 0.007323, mean_eps: 0.984619


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 17938/100000: episode: 27, duration: 26.564s, episode steps: 493, steps per second:  19, episode reward:  0.000, mean reward:  0.000 [ 0.000,  0.000], mean action: 1.485 [0.000, 3.000],  loss: 0.000591, mae: 0.005511, mean_q: 0.010770, mean_eps: 0.984077


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 18623/100000: episode: 28, duration: 37.394s, episode steps: 685, steps per second:  18, episode reward:  1.000, mean reward:  0.001 [ 0.000,  1.000], mean action: 1.442 [0.000, 3.000],  loss: 0.000643, mae: 0.005262, mean_q: 0.008986, mean_eps: 0.983548


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 19133/100000: episode: 29, duration: 27.403s, episode steps: 510, steps per second:  19, episode reward:  0.000, mean reward:  0.000 [ 0.000,  0.000], mean action: 1.529 [0.000, 3.000],  loss: 0.000488, mae: 0.004653, mean_q: 0.008034, mean_eps: 0.983010


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 19644/100000: episode: 30, duration: 25.688s, episode steps: 511, steps per second:  20, episode reward:  0.000, mean reward:  0.000 [ 0.000,  0.000], mean action: 1.605 [0.000, 3.000],  loss: 0.000495, mae: 0.005224, mean_q: 0.009830, mean_eps: 0.982551


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 20137/100000: episode: 31, duration: 25.534s, episode steps: 493, steps per second:  19, episode reward:  0.000, mean reward:  0.000 [ 0.000,  0.000], mean action: 1.493 [0.000, 3.000],  loss: 0.000632, mae: 0.005666, mean_q: 0.009273, mean_eps: 0.982099


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 20649/100000: episode: 32, duration: 27.410s, episode steps: 512, steps per second:  19, episode reward:  0.000, mean reward:  0.000 [ 0.000,  0.000], mean action: 1.518 [0.000, 3.000],  loss: 0.000371, mae: 0.008809, mean_q: 0.013371, mean_eps: 0.981645


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 21348/100000: episode: 33, duration: 35.470s, episode steps: 699, steps per second:  20, episode reward:  1.000, mean reward:  0.001 [ 0.000,  1.000], mean action: 1.486 [0.000, 3.000],  loss: 0.000901, mae: 0.009577, mean_q: 0.014682, mean_eps: 0.981102


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 22048/100000: episode: 34, duration: 37.993s, episode steps: 700, steps per second:  18, episode reward:  1.000, mean reward:  0.001 [ 0.000,  1.000], mean action: 1.516 [0.000, 3.000],  loss: 0.000605, mae: 0.009132, mean_q: 0.014390, mean_eps: 0.980474


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 22846/100000: episode: 35, duration: 40.728s, episode steps: 798, steps per second:  20, episode reward:  2.000, mean reward:  0.003 [ 0.000,  1.000], mean action: 1.541 [0.000, 3.000],  loss: 0.000392, mae: 0.008508, mean_q: 0.013303, mean_eps: 0.979799


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 23863/100000: episode: 36, duration: 54.386s, episode steps: 1017, steps per second:  19, episode reward:  3.000, mean reward:  0.003 [ 0.000,  1.000], mean action: 1.434 [0.000, 3.000],  loss: 0.000721, mae: 0.009478, mean_q: 0.015305, mean_eps: 0.978981


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 24479/100000: episode: 37, duration: 31.073s, episode steps: 616, steps per second:  20, episode reward:  1.000, mean reward:  0.002 [ 0.000,  1.000], mean action: 1.545 [0.000, 3.000],  loss: 0.000579, mae: 0.009106, mean_q: 0.014789, mean_eps: 0.978247


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 24992/100000: episode: 38, duration: 26.128s, episode steps: 513, steps per second:  20, episode reward:  0.000, mean reward:  0.000 [ 0.000,  0.000], mean action: 1.565 [0.000, 3.000],  loss: 0.000459, mae: 0.009117, mean_q: 0.015602, mean_eps: 0.977739


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 26021/100000: episode: 39, duration: 51.582s, episode steps: 1029, steps per second:  20, episode reward:  3.000, mean reward:  0.003 [ 0.000,  1.000], mean action: 1.518 [0.000, 3.000],  loss: 0.001088, mae: 0.010515, mean_q: 0.016968, mean_eps: 0.977045


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 26521/100000: episode: 40, duration: 26.091s, episode steps: 500, steps per second:  19, episode reward:  0.000, mean reward:  0.000 [ 0.000,  0.000], mean action: 1.538 [0.000, 3.000],  loss: 0.000731, mae: 0.009240, mean_q: 0.014538, mean_eps: 0.976355


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 27134/100000: episode: 41, duration: 30.851s, episode steps: 613, steps per second:  20, episode reward:  1.000, mean reward:  0.002 [ 0.000,  1.000], mean action: 1.437 [0.000, 3.000],  loss: 0.000882, mae: 0.010347, mean_q: 0.018692, mean_eps: 0.975855


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 28320/100000: episode: 42, duration: 62.023s, episode steps: 1186, steps per second:  19, episode reward:  4.000, mean reward:  0.003 [ 0.000,  1.000], mean action: 1.493 [0.000, 3.000],  loss: 0.000345, mae: 0.009083, mean_q: 0.015315, mean_eps: 0.975047


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 29223/100000: episode: 43, duration: 45.438s, episode steps: 903, steps per second:  20, episode reward:  3.000, mean reward:  0.003 [ 0.000,  1.000], mean action: 1.451 [0.000, 3.000],  loss: 0.000646, mae: 0.009483, mean_q: 0.015939, mean_eps: 0.974107


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 29710/100000: episode: 44, duration: 23.517s, episode steps: 487, steps per second:  21, episode reward:  0.000, mean reward:  0.000 [ 0.000,  0.000], mean action: 1.435 [0.000, 3.000],  loss: 0.000190, mae: 0.008573, mean_q: 0.013491, mean_eps: 0.973481


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 31302/100000: episode: 45, duration: 80.908s, episode steps: 1592, steps per second:  20, episode reward:  7.000, mean reward:  0.004 [ 0.000,  1.000], mean action: 1.533 [0.000, 3.000],  loss: 0.000921, mae: 0.010326, mean_q: 0.015555, mean_eps: 0.972545


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 32502/100000: episode: 46, duration: 62.043s, episode steps: 1200, steps per second:  19, episode reward:  4.000, mean reward:  0.003 [ 0.000,  1.000], mean action: 1.463 [0.000, 3.000],  loss: 0.000979, mae: 0.011179, mean_q: 0.018530, mean_eps: 0.971288


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 33882/100000: episode: 47, duration: 70.542s, episode steps: 1380, steps per second:  20, episode reward:  5.000, mean reward:  0.004 [ 0.000,  1.000], mean action: 1.470 [0.000, 3.000],  loss: 0.000875, mae: 0.011472, mean_q: 0.018587, mean_eps: 0.970127


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 34394/100000: episode: 48, duration: 26.905s, episode steps: 512, steps per second:  19, episode reward:  0.000, mean reward:  0.000 [ 0.000,  0.000], mean action: 1.436 [0.000, 3.000],  loss: 0.000410, mae: 0.009309, mean_q: 0.014152, mean_eps: 0.969276


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 35090/100000: episode: 49, duration: 35.007s, episode steps: 696, steps per second:  20, episode reward:  1.000, mean reward:  0.001 [ 0.000,  1.000], mean action: 1.569 [0.000, 3.000],  loss: 0.000602, mae: 0.010224, mean_q: 0.015819, mean_eps: 0.968732


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 35971/100000: episode: 50, duration: 44.436s, episode steps: 881, steps per second:  20, episode reward:  2.000, mean reward:  0.002 [ 0.000,  1.000], mean action: 1.554 [0.000, 3.000],  loss: 0.000627, mae: 0.010871, mean_q: 0.017202, mean_eps: 0.968023


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 37040/100000: episode: 51, duration: 56.965s, episode steps: 1069, steps per second:  19, episode reward:  3.000, mean reward:  0.003 [ 0.000,  1.000], mean action: 1.497 [0.000, 3.000],  loss: 0.001112, mae: 0.011037, mean_q: 0.017038, mean_eps: 0.967146


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


done, took 1745.527 seconds


  and should_run_async(code)
