1. Rainbow and Ape-X Expiriments 
    1. We release a set of hyper parameters for CartPole-v1 and Classic Control and Atari
    2. We release code for Rainbow that can train X steps in Y minutes on a Mac M2 Chip
    3. We also release a version of Ape-X as described in the original paper, and an Ape-X with rainbow
        1. Compare results of each 
        2. Compare Ape-X with different Rainbow components added or removed
    4. We compare the different models of DQN as seen in their papers to rainbow, the different individual components to rainbow, and rainbow with individual components removed
    10. Compare rainbow training speeds with different levels of numerical precision and datatypes
        1. Mixed precision using torch.amp 
        2. Lower matmul precision
            1. comparing medium, high, and highest 
            2. https://pytorch.org/docs/master/generated/torch.set_float32_matmul_precision.html?highlight=precision#torch.set_float32_matmul_precision
    11. Ape-X Hyper parameter sweep and sensitivities
    12. Exploration methods for Rainbow Ape-X
        1. Just noisy nets (same for all actors)
        2. Noisy nets and varying epsilon 
        3. Adding a constant that changes variance of noisy nets for action selection
        4. AlphaStar Agents

## Rainbow on CartPole-v1
Hyperparameters are based on the hyperopt experiments, quantized trial 27 with some minor changes

In [None]:
import gymnasium as gym
import sys

import torch
from utils import CategoricalCrossentropyLoss, KLDivergenceLoss

sys.path.append("../..")
from dqn.rainbow.rainbow_agent import RainbowAgent
from agent_configs import RainbowConfig
from game_configs import CartPoleConfig

env = gym.make("CartPole-v1", render_mode="rgb_array")

config_dict = {
    "dense_layers_widths": [128, 128],
    "value_hidden_layers_widths": [64, 64],
    "advatage_hidden_layers_widths": [64, 64],
    "adam_epsilon": 0.00375,
    "learning_rate": 0.005,
    "training_steps": 10000,
    "per_epsilon": 0.05,
    "per_alpha": 0.8,
    "per_beta": 0.45,
    "minibatch_size": 128,
    "replay_buffer_size": 10000,
    "min_replay_buffer_size": 1250,
    "transfer_interval": 10,
    "n_step": 9,
    "kernel_initializer": "glorot_uniform",
    "loss_function": KLDivergenceLoss(),  # could do categorical cross entropy
    "clipnorm": 2.0,
    "discount_factor": 0.99,
    "atom_size": 81,
    "replay_interval": 4,
}
game_config = CartPoleConfig()
config = RainbowConfig(config_dict, game_config)
device = "cuda:0" if torch.cuda.is_available() else "cpu"
agent = RainbowAgent(env, config, name="Rainbow_CartPole-v1", device=device)

for param in agent.model.parameters():
    print(param)
print("start")
agent.train()

## Rainbow on Classic Control
Hyperparameters come from revisitting rainbow

In [None]:
import gymnasium as gym
import sys
import torch

from utils import CategoricalCrossentropyLoss, KLDivergenceLoss

sys.path.append("../..")
from dqn.rainbow.rainbow_agent import RainbowAgent
from agent_configs import RainbowConfig
from game_configs import ClassicControlConfig

config_dict = {
    "dense_layers_widths": [512, 512],
    "value_hidden_layers_widths": [],  #
    "advatage_hidden_layers_widths": [],  #
    "adam_epsilon": 3.125e-4,
    "learning_rate": 0.001,
    "training_steps": 30000,
    "per_epsilon": 1e-6,  #
    "per_alpha": 0.5,
    "per_beta": 0.5,  # For RIAYN should be no annealing # 0.4
    "minibatch_size": 128,
    "replay_buffer_size": 50000,
    "min_replay_buffer_size": 500,
    "transfer_interval": 100,
    "n_step": 3,
    "kernel_initializer": "orthogonal",  #
    "loss_function": KLDivergenceLoss(),  # KLDivergence()
    "clipnorm": 0.0,  # 2.0
    "discount_factor": 0.99,
    "atom_size": 51,
    "replay_interval": 2,
}
game_config = ClassicControlConfig()
config = RainbowConfig(config_dict, game_config)
device = "cuda:0" if torch.cuda.is_available() else "cpu"

game_config.v_min = -200
game_config.v_max = 500

# game_config.v_min = 0
# game_config.v_max = 500
env = gym.make("CartPole-v1", render_mode="rgb_array")
agent = RainbowAgent(
    env, config, name="Rainbow_ClassicControl_CartPole-v1", device=device
)
agent.train()

# game_config.v_min = -500
# game_config.v_max = 0
env = gym.make("Acrobot-v1", render_mode="rgb_array")
agent = RainbowAgent(
    env, config, name="Rainbow_ClassicControl_Acrobot-v1", device=device
)
agent.train()

# game_config.v_min = -200
# game_config.v_max = 200
env = gym.make("LunarLander-v2", render_mode="rgb_array")
agent = RainbowAgent(
    env, config, name="Rainbow_ClassicControl_LunarLander-v2", device=device
)
agent.train()

# game_config.v_min = -200
# game_config.v_max = -100
env = gym.make("MountainCar-v0", render_mode="rgb_array", max_episode_steps=600)
agent = RainbowAgent(
    env, config, name="Rainbow_ClassicControl_MountainCar-v0", device=device
)
agent.train()

## Rainbow on Atari
Takes around 18GB RAM, and many hours even just to fill replay buffer with a Mac M2

### Rainbow MsPacman

In [None]:
import numpy as np

observation_buffer = np.zeros((3, 1, 2), dtype=np.object_)
print(observation_buffer)
observation_buffer[0] = [1, 1]
print(observation_buffer)
observation_buffer = np.array(observation_buffer, dtype=np.int8)
print(observation_buffer)

In [None]:
import gymnasium as gym
import sys

from utils import CategoricalCrossentropyLoss, KLDivergenceLoss

sys.path.append("../..")
from dqn.rainbow.rainbow_agent import RainbowAgent
from agent_configs import RainbowConfig
from game_configs import AtariConfig
from gymnasium.wrappers import AtariPreprocessing, FrameStack
import numpy as np

config_dict = {
    "conv_layers": [
        (32, 8, 4),
        (64, 4, 2),
        (64, 3, 1),
    ],
    "dense_layers_widths": [512],
    "value_hidden_layers_widths": [],  #
    "advatage_hidden_layers_widths": [],  #
    "adam_epsilon": 1.5e-4,
    "learning_rate": 0.00025 / 4,
    "training_steps": 50000000,  # Agent saw 200,000,000 frames
    "per_epsilon": 1e-6,  #
    "per_alpha": 0.5,
    "per_beta": 0.4,
    "minibatch_size": 32,
    "replay_buffer_size": 1000000,
    "min_replay_buffer_size": 80000,  # 80000
    "transfer_interval": 32000,
    "n_step": 3,
    "kernel_initializer": "orthogonal",  #
    "loss_function": KLDivergenceLoss(),
    "clipnorm": 0.0,  #
    "discount_factor": 0.99,
    "atom_size": 51,
    "replay_interval": 4,
}
game_config = AtariConfig()
config = RainbowConfig(config_dict, game_config)


class ClipReward(gym.RewardWrapper):
    def __init__(self, env, min_reward, max_reward):
        super().__init__(env)
        self.min_reward = min_reward
        self.max_reward = max_reward
        self.reward_range = (min_reward, max_reward)

    def reward(self, reward):
        return np.clip(reward, self.min_reward, self.max_reward)


env = gym.make(
    "MsPacmanNoFrameskip-v4", render_mode="rgb_array", max_episode_steps=108000
)
env = AtariPreprocessing(env, terminal_on_life_loss=True)
env = FrameStack(env, 4, lz4_compress=True)
agent = RainbowAgent(env, config, name="Rainbow_Atari_MsPacmanNoFrameskip-v4")
agent.checkpoint_interval = 1000
agent.train()

## Rainbow with  turn-based zero-sum 2-player perfect information deterministic games
Not working yet because of zero sum rewards, could get it to work by doing an NFSP sort of thing when storing experiences

### Rainbow Tic Tac Toe

#### Framestacking

In [None]:
import gymnasium as gym
import sys

from packages.game_configs.game_configs.tictactoe_config import TicTacToeConfig
from utils import CategoricalCrossentropyLoss, KLDivergenceLoss

sys.path.append("../..")
from dqn.rainbow.rainbow_agent import RainbowAgent
from agent_configs import RainbowConfig
from game_configs import AtariConfig
from gymnasium.wrappers import AtariPreprocessing, FrameStack
import numpy as np

config_dict = {
    "conv_layers": [
        (32, 8, 4),
        (64, 4, 2),
        (64, 3, 1),
    ],
    "dense_layers_widths": [512],
    "value_hidden_layers_widths": [],  #
    "advatage_hidden_layers_widths": [],  #
    "adam_epsilon": 1.5e-4,
    "learning_rate": 0.00025 / 4,
    "training_steps": 50000000,  # Agent saw 200,000,000 frames
    "per_epsilon": 1e-6,  #
    "per_alpha": 0.5,
    "per_beta": 0.4,
    "minibatch_size": 32,
    "replay_buffer_size": 1000000,
    "min_replay_buffer_size": 80000,
    "transfer_interval": 32000,
    "n_step": 3,
    "kernel_initializer": "orthogonal",  #
    "loss_function": KLDivergenceLoss(),
    "clipnorm": 0.0,  #
    "discount_factor": 0.99,
    "atom_size": 51,
    "replay_interval": 4,
}
game_config = TicTacToeConfig()
config = RainbowConfig(config_dict, game_config)

env = gym.make("TicTacToe-v0", render_mode="rgb_array")
env = FrameStack(env, 4)
agent = RainbowAgent(env, config, name="Rainbow_TicTacToe-v0")
agent.train()

#### No framestacking

In [None]:
import gymnasium as gym
import sys

from packages.game_configs.game_configs.tictactoe_config import TicTacToeConfig
from utils import CategoricalCrossentropyLoss, KLDivergenceLoss

sys.path.append("../..")
from dqn.rainbow.rainbow_agent import RainbowAgent
from agent_configs import RainbowConfig
from game_configs import AtariConfig
from gymnasium.wrappers import AtariPreprocessing, FrameStack
import numpy as np

config_dict = {
    "conv_layers": [
        (32, 8, 4),
        (64, 4, 2),
        (64, 3, 1),
    ],
    "dense_layers_widths": [512],
    "value_hidden_layers_widths": [],  #
    "advatage_hidden_layers_widths": [],  #
    "adam_epsilon": 1.5e-4,
    "learning_rate": 0.00025 / 4,
    "training_steps": 50000000,  # Agent saw 200,000,000 frames
    "per_epsilon": 1e-6,  #
    "per_alpha": 0.5,
    "per_beta": 0.4,
    "minibatch_size": 32,
    "replay_buffer_size": 1000000,
    "min_replay_buffer_size": 80000,
    "transfer_interval": 32000,
    "n_step": 3,
    "kernel_initializer": "orthogonal",  #
    "loss_function": KLDivergenceLoss(),
    "clipnorm": 0.0,  #
    "discount_factor": 0.99,
    "atom_size": 51,
    "replay_interval": 4,
}
game_config = TicTacToeConfig()
config = RainbowConfig(config_dict, game_config)

env = gym.make("TicTacToe-v0", render_mode="rgb_array")
agent = RainbowAgent(env, config, name="Rainbow_TicTacToe-v0")
agent.train()

### Rainbow Connect 4

### Rainbow Chess

In [None]:
import gymnasium as gym
import sys

import torch
from utils import MSELoss, HuberLoss

sys.path.append("../..")
from dqn.rainbow.rainbow_agent import RainbowAgent
from agent_configs import DQNConfig
from game_configs import CartPoleConfig

env = gym.make("CartPole-v1", render_mode="rgb_array")

config_dict = {
    "dense_layers_widths": [128, 128],
    "adam_epsilon": 0.00375,
    "learning_rate": 0.005,
    "training_steps": 10000,
    "minibatch_size": 128,
    "replay_buffer_size": 10000,
    "min_replay_buffer_size": 1250,
    "kernel_initializer": "glorot_uniform",
    "loss_function": MSELoss(),  # could do categorical cross entropy
    "clipnorm": 2.0,
    "replay_interval": 4,
    "eg_epsilon": 1.00,
    "eg_epsilon_final": 0.1,
    "eg_epsilon_decay_type": "linear",
    "eg_epsilon_final_step": 10000,
}
game_config = CartPoleConfig()
config = DQNConfig(config_dict, game_config)
device = "cuda:0" if torch.cuda.is_available() else "cpu"
agent = RainbowAgent(env, config, name="DQN_CartPole-v1", device=device)

for param in agent.model.parameters():
    print(param)
print("start")
agent.train()

Using default save_intermediate_weights     : False
Using         training_steps                : 10000
Using         adam_epsilon                  : 0.00375
Using default momentum                      : 0.9
Using         learning_rate                 : 0.005
Using         clipnorm                      : 2.0
Using default optimizer                     : <class 'torch.optim.adam.Adam'>
Using default weight_decay                  : 0.0
Using         loss_function                 : <utils.utils.HuberLoss object at 0x10459e680>
Using default activation                    : relu
Using         kernel_initializer            : glorot_uniform
Using         minibatch_size                : 128
Using         replay_buffer_size            : 10000
Using         min_replay_buffer_size        : 1250
Using default num_minibatches               : 1
Using default training_iterations           : 1
Using default print_interval                : 100
RainbowConfig
Using default residual_layers               :

  logger.warn(
  logger.warn(


float32
Parameter containing:
tensor([[ 0.0306,  0.0261,  0.1632,  0.1218],
        [-0.0777, -0.1849,  0.1889, -0.0825],
        [-0.0843, -0.0167,  0.0728,  0.0371],
        [-0.0856,  0.0005, -0.1334, -0.1339],
        [ 0.1362, -0.0566, -0.1585, -0.0592],
        [-0.2077,  0.0981, -0.1520,  0.0755],
        [-0.0436, -0.0544,  0.0347,  0.0970],
        [-0.0380,  0.1227,  0.1308,  0.2101],
        [ 0.1132,  0.0064, -0.0008,  0.0022],
        [ 0.0341, -0.0806, -0.1282,  0.0937],
        [ 0.0593,  0.0337, -0.0938, -0.0993],
        [-0.1780,  0.1024,  0.0181,  0.0709],
        [ 0.0891,  0.1091,  0.1489, -0.1015],
        [ 0.0545, -0.0985,  0.0577,  0.0200],
        [-0.0217,  0.1453, -0.0685,  0.0938],
        [ 0.0972, -0.0422, -0.0131,  0.1919],
        [-0.1841,  0.1795, -0.0664,  0.0013],
        [ 0.1071,  0.0480,  0.1835,  0.1601],
        [-0.1573,  0.0543,  0.1825,  0.0880],
        [-0.1920,  0.0543, -0.0528,  0.0349],
        [-0.1278, -0.1961, -0.0498,  0.1622],
    

                                                           

Moviepy - Done !
Moviepy - video ready checkpoints/DQN_CartPole-v1/step_0/videos/DQN_CartPole-v1/0/DQN_CartPole-v1-episode-4.mp4
score:  13.0


  axs[row][col].set_xlim(1, len(values))
  axs[row][col].legend()
  axs[row][col].set_xlim(1, len(values))


{'score': [], 'loss': [{'loss': 0.5264618396759033, 'target_model_updated': False}], 'test_score': [{'score': 12.4, 'max_score': 15.0, 'min_score': 10.0}]}
{'score': 475.0, 'test_score': 475.0}
[{'score': 12.4, 'max_score': 15.0, 'min_score': 10.0}]
Training step: 101/10000
Training step: 201/10000
Training step: 301/10000
score:  11.0
score:  9.0
score:  10.0
score:  9.0
Moviepy - Building video checkpoints/DQN_CartPole-v1/step_333/videos/DQN_CartPole-v1/333/DQN_CartPole-v1-episode-9.mp4.
Moviepy - Writing video checkpoints/DQN_CartPole-v1/step_333/videos/DQN_CartPole-v1/333/DQN_CartPole-v1-episode-9.mp4



                                                           

Moviepy - Done !
Moviepy - video ready checkpoints/DQN_CartPole-v1/step_333/videos/DQN_CartPole-v1/333/DQN_CartPole-v1-episode-9.mp4
score:  10.0




{'score': [{'score': 17.0, 'target_model_updated': True}, {'score': 30.0, 'target_model_updated': True}, {'score': 17.0, 'target_model_updated': True}, {'score': 48.0, 'target_model_updated': True}, {'score': 16.0, 'target_model_updated': True}, {'score': 25.0, 'target_model_updated': True}, {'score': 27.0, 'target_model_updated': True}, {'score': 19.0, 'target_model_updated': True}, {'score': 19.0, 'target_model_updated': True}, {'score': 16.0, 'target_model_updated': True}, {'score': 22.0, 'target_model_updated': True}, {'score': 13.0, 'target_model_updated': True}, {'score': 42.0, 'target_model_updated': True}, {'score': 54.0, 'target_model_updated': True}, {'score': 36.0, 'target_model_updated': True}, {'score': 22.0, 'target_model_updated': True}, {'score': 25.0, 'target_model_updated': True}, {'score': 21.0, 'target_model_updated': True}, {'score': 15.0, 'target_model_updated': True}, {'score': 40.0, 'target_model_updated': True}, {'score': 15.0, 'target_model_updated': True}, {'

  axs[row][col].legend()


Training step: 401/10000
Training step: 501/10000
Training step: 601/10000
score:  9.0
score:  9.0
score:  9.0
score:  8.0
Moviepy - Building video checkpoints/DQN_CartPole-v1/step_666/videos/DQN_CartPole-v1/666/DQN_CartPole-v1-episode-14.mp4.
Moviepy - Writing video checkpoints/DQN_CartPole-v1/step_666/videos/DQN_CartPole-v1/666/DQN_CartPole-v1-episode-14.mp4



                                                           

Moviepy - Done !
Moviepy - video ready checkpoints/DQN_CartPole-v1/step_666/videos/DQN_CartPole-v1/666/DQN_CartPole-v1-episode-14.mp4
score:  9.0
{'score': [{'score': 17.0, 'target_model_updated': True}, {'score': 30.0, 'target_model_updated': True}, {'score': 17.0, 'target_model_updated': True}, {'score': 48.0, 'target_model_updated': True}, {'score': 16.0, 'target_model_updated': True}, {'score': 25.0, 'target_model_updated': True}, {'score': 27.0, 'target_model_updated': True}, {'score': 19.0, 'target_model_updated': True}, {'score': 19.0, 'target_model_updated': True}, {'score': 16.0, 'target_model_updated': True}, {'score': 22.0, 'target_model_updated': True}, {'score': 13.0, 'target_model_updated': True}, {'score': 42.0, 'target_model_updated': True}, {'score': 54.0, 'target_model_updated': True}, {'score': 36.0, 'target_model_updated': True}, {'score': 22.0, 'target_model_updated': True}, {'score': 25.0, 'target_model_updated': True}, {'score': 21.0, 'target_model_updated': True

  axs[row][col].legend()


[{'score': 12.4, 'max_score': 15.0, 'min_score': 10.0}, {'score': 9.8, 'max_score': 11.0, 'min_score': 9.0}, {'score': 8.8, 'max_score': 9.0, 'min_score': 8.0}]
Training step: 701/10000
Training step: 801/10000
Training step: 901/10000
score:  9.0
score:  10.0
score:  8.0
score:  9.0
Moviepy - Building video checkpoints/DQN_CartPole-v1/step_999/videos/DQN_CartPole-v1/999/DQN_CartPole-v1-episode-19.mp4.
Moviepy - Writing video checkpoints/DQN_CartPole-v1/step_999/videos/DQN_CartPole-v1/999/DQN_CartPole-v1-episode-19.mp4



                                                           

Moviepy - Done !
Moviepy - video ready checkpoints/DQN_CartPole-v1/step_999/videos/DQN_CartPole-v1/999/DQN_CartPole-v1-episode-19.mp4
score:  10.0
{'score': [{'score': 17.0, 'target_model_updated': True}, {'score': 30.0, 'target_model_updated': True}, {'score': 17.0, 'target_model_updated': True}, {'score': 48.0, 'target_model_updated': True}, {'score': 16.0, 'target_model_updated': True}, {'score': 25.0, 'target_model_updated': True}, {'score': 27.0, 'target_model_updated': True}, {'score': 19.0, 'target_model_updated': True}, {'score': 19.0, 'target_model_updated': True}, {'score': 16.0, 'target_model_updated': True}, {'score': 22.0, 'target_model_updated': True}, {'score': 13.0, 'target_model_updated': True}, {'score': 42.0, 'target_model_updated': True}, {'score': 54.0, 'target_model_updated': True}, {'score': 36.0, 'target_model_updated': True}, {'score': 22.0, 'target_model_updated': True}, {'score': 25.0, 'target_model_updated': True}, {'score': 21.0, 'target_model_updated': Tru

  axs[row][col].legend()


[{'score': 12.4, 'max_score': 15.0, 'min_score': 10.0}, {'score': 9.8, 'max_score': 11.0, 'min_score': 9.0}, {'score': 8.8, 'max_score': 9.0, 'min_score': 8.0}, {'score': 9.2, 'max_score': 10.0, 'min_score': 8.0}]
Training step: 1001/10000
Training step: 1101/10000
Training step: 1201/10000
Training step: 1301/10000
score:  10.0
score:  10.0
score:  10.0
score:  10.0
Moviepy - Building video checkpoints/DQN_CartPole-v1/step_1332/videos/DQN_CartPole-v1/1332/DQN_CartPole-v1-episode-24.mp4.
Moviepy - Writing video checkpoints/DQN_CartPole-v1/step_1332/videos/DQN_CartPole-v1/1332/DQN_CartPole-v1-episode-24.mp4



                                                           

Moviepy - Done !
Moviepy - video ready checkpoints/DQN_CartPole-v1/step_1332/videos/DQN_CartPole-v1/1332/DQN_CartPole-v1-episode-24.mp4
score:  10.0
{'score': [{'score': 17.0, 'target_model_updated': True}, {'score': 30.0, 'target_model_updated': True}, {'score': 17.0, 'target_model_updated': True}, {'score': 48.0, 'target_model_updated': True}, {'score': 16.0, 'target_model_updated': True}, {'score': 25.0, 'target_model_updated': True}, {'score': 27.0, 'target_model_updated': True}, {'score': 19.0, 'target_model_updated': True}, {'score': 19.0, 'target_model_updated': True}, {'score': 16.0, 'target_model_updated': True}, {'score': 22.0, 'target_model_updated': True}, {'score': 13.0, 'target_model_updated': True}, {'score': 42.0, 'target_model_updated': True}, {'score': 54.0, 'target_model_updated': True}, {'score': 36.0, 'target_model_updated': True}, {'score': 22.0, 'target_model_updated': True}, {'score': 25.0, 'target_model_updated': True}, {'score': 21.0, 'target_model_updated': T

  axs[row][col].legend()


[{'score': 12.4, 'max_score': 15.0, 'min_score': 10.0}, {'score': 9.8, 'max_score': 11.0, 'min_score': 9.0}, {'score': 8.8, 'max_score': 9.0, 'min_score': 8.0}, {'score': 9.2, 'max_score': 10.0, 'min_score': 8.0}, {'score': 10.0, 'max_score': 10.0, 'min_score': 10.0}]
Training step: 1401/10000
Training step: 1501/10000
Training step: 1601/10000
score:  8.0
score:  10.0
score:  10.0
score:  10.0
Moviepy - Building video checkpoints/DQN_CartPole-v1/step_1665/videos/DQN_CartPole-v1/1665/DQN_CartPole-v1-episode-29.mp4.
Moviepy - Writing video checkpoints/DQN_CartPole-v1/step_1665/videos/DQN_CartPole-v1/1665/DQN_CartPole-v1-episode-29.mp4



                                                           

Moviepy - Done !
Moviepy - video ready checkpoints/DQN_CartPole-v1/step_1665/videos/DQN_CartPole-v1/1665/DQN_CartPole-v1-episode-29.mp4
score:  9.0




{'score': [{'score': 17.0, 'target_model_updated': True}, {'score': 30.0, 'target_model_updated': True}, {'score': 17.0, 'target_model_updated': True}, {'score': 48.0, 'target_model_updated': True}, {'score': 16.0, 'target_model_updated': True}, {'score': 25.0, 'target_model_updated': True}, {'score': 27.0, 'target_model_updated': True}, {'score': 19.0, 'target_model_updated': True}, {'score': 19.0, 'target_model_updated': True}, {'score': 16.0, 'target_model_updated': True}, {'score': 22.0, 'target_model_updated': True}, {'score': 13.0, 'target_model_updated': True}, {'score': 42.0, 'target_model_updated': True}, {'score': 54.0, 'target_model_updated': True}, {'score': 36.0, 'target_model_updated': True}, {'score': 22.0, 'target_model_updated': True}, {'score': 25.0, 'target_model_updated': True}, {'score': 21.0, 'target_model_updated': True}, {'score': 15.0, 'target_model_updated': True}, {'score': 40.0, 'target_model_updated': True}, {'score': 15.0, 'target_model_updated': True}, {'

  axs[row][col].legend()


[{'score': 12.4, 'max_score': 15.0, 'min_score': 10.0}, {'score': 9.8, 'max_score': 11.0, 'min_score': 9.0}, {'score': 8.8, 'max_score': 9.0, 'min_score': 8.0}, {'score': 9.2, 'max_score': 10.0, 'min_score': 8.0}, {'score': 10.0, 'max_score': 10.0, 'min_score': 10.0}, {'score': 9.4, 'max_score': 10.0, 'min_score': 8.0}]
Training step: 1701/10000
Training step: 1801/10000
Training step: 1901/10000
score:  10.0
score:  9.0
score:  9.0
score:  9.0
Moviepy - Building video checkpoints/DQN_CartPole-v1/step_1998/videos/DQN_CartPole-v1/1998/DQN_CartPole-v1-episode-34.mp4.
Moviepy - Writing video checkpoints/DQN_CartPole-v1/step_1998/videos/DQN_CartPole-v1/1998/DQN_CartPole-v1-episode-34.mp4



                                                   

Moviepy - Done !
Moviepy - video ready checkpoints/DQN_CartPole-v1/step_1998/videos/DQN_CartPole-v1/1998/DQN_CartPole-v1-episode-34.mp4
score:  10.0




{'score': [{'score': 17.0, 'target_model_updated': True}, {'score': 30.0, 'target_model_updated': True}, {'score': 17.0, 'target_model_updated': True}, {'score': 48.0, 'target_model_updated': True}, {'score': 16.0, 'target_model_updated': True}, {'score': 25.0, 'target_model_updated': True}, {'score': 27.0, 'target_model_updated': True}, {'score': 19.0, 'target_model_updated': True}, {'score': 19.0, 'target_model_updated': True}, {'score': 16.0, 'target_model_updated': True}, {'score': 22.0, 'target_model_updated': True}, {'score': 13.0, 'target_model_updated': True}, {'score': 42.0, 'target_model_updated': True}, {'score': 54.0, 'target_model_updated': True}, {'score': 36.0, 'target_model_updated': True}, {'score': 22.0, 'target_model_updated': True}, {'score': 25.0, 'target_model_updated': True}, {'score': 21.0, 'target_model_updated': True}, {'score': 15.0, 'target_model_updated': True}, {'score': 40.0, 'target_model_updated': True}, {'score': 15.0, 'target_model_updated': True}, {'

  axs[row][col].legend()


[{'score': 12.4, 'max_score': 15.0, 'min_score': 10.0}, {'score': 9.8, 'max_score': 11.0, 'min_score': 9.0}, {'score': 8.8, 'max_score': 9.0, 'min_score': 8.0}, {'score': 9.2, 'max_score': 10.0, 'min_score': 8.0}, {'score': 10.0, 'max_score': 10.0, 'min_score': 10.0}, {'score': 9.4, 'max_score': 10.0, 'min_score': 8.0}, {'score': 9.4, 'max_score': 10.0, 'min_score': 9.0}]
Training step: 2001/10000
Training step: 2101/10000
Training step: 2201/10000
Training step: 2301/10000
score:  10.0
score:  9.0
score:  9.0
score:  9.0
Moviepy - Building video checkpoints/DQN_CartPole-v1/step_2331/videos/DQN_CartPole-v1/2331/DQN_CartPole-v1-episode-39.mp4.
Moviepy - Writing video checkpoints/DQN_CartPole-v1/step_2331/videos/DQN_CartPole-v1/2331/DQN_CartPole-v1-episode-39.mp4



                                                           

Moviepy - Done !
Moviepy - video ready checkpoints/DQN_CartPole-v1/step_2331/videos/DQN_CartPole-v1/2331/DQN_CartPole-v1-episode-39.mp4
score:  9.0
{'score': [{'score': 17.0, 'target_model_updated': True}, {'score': 30.0, 'target_model_updated': True}, {'score': 17.0, 'target_model_updated': True}, {'score': 48.0, 'target_model_updated': True}, {'score': 16.0, 'target_model_updated': True}, {'score': 25.0, 'target_model_updated': True}, {'score': 27.0, 'target_model_updated': True}, {'score': 19.0, 'target_model_updated': True}, {'score': 19.0, 'target_model_updated': True}, {'score': 16.0, 'target_model_updated': True}, {'score': 22.0, 'target_model_updated': True}, {'score': 13.0, 'target_model_updated': True}, {'score': 42.0, 'target_model_updated': True}, {'score': 54.0, 'target_model_updated': True}, {'score': 36.0, 'target_model_updated': True}, {'score': 22.0, 'target_model_updated': True}, {'score': 25.0, 'target_model_updated': True}, {'score': 21.0, 'target_model_updated': Tr

  axs[row][col].legend()


[{'score': 12.4, 'max_score': 15.0, 'min_score': 10.0}, {'score': 9.8, 'max_score': 11.0, 'min_score': 9.0}, {'score': 8.8, 'max_score': 9.0, 'min_score': 8.0}, {'score': 9.2, 'max_score': 10.0, 'min_score': 8.0}, {'score': 10.0, 'max_score': 10.0, 'min_score': 10.0}, {'score': 9.4, 'max_score': 10.0, 'min_score': 8.0}, {'score': 9.4, 'max_score': 10.0, 'min_score': 9.0}, {'score': 9.2, 'max_score': 10.0, 'min_score': 9.0}]
Training step: 2401/10000
Training step: 2501/10000
Training step: 2601/10000
score:  10.0
score:  9.0
score:  9.0
score:  8.0
Moviepy - Building video checkpoints/DQN_CartPole-v1/step_2664/videos/DQN_CartPole-v1/2664/DQN_CartPole-v1-episode-44.mp4.
Moviepy - Writing video checkpoints/DQN_CartPole-v1/step_2664/videos/DQN_CartPole-v1/2664/DQN_CartPole-v1-episode-44.mp4



                                                           

Moviepy - Done !
Moviepy - video ready checkpoints/DQN_CartPole-v1/step_2664/videos/DQN_CartPole-v1/2664/DQN_CartPole-v1-episode-44.mp4
score:  10.0




{'score': [{'score': 17.0, 'target_model_updated': True}, {'score': 30.0, 'target_model_updated': True}, {'score': 17.0, 'target_model_updated': True}, {'score': 48.0, 'target_model_updated': True}, {'score': 16.0, 'target_model_updated': True}, {'score': 25.0, 'target_model_updated': True}, {'score': 27.0, 'target_model_updated': True}, {'score': 19.0, 'target_model_updated': True}, {'score': 19.0, 'target_model_updated': True}, {'score': 16.0, 'target_model_updated': True}, {'score': 22.0, 'target_model_updated': True}, {'score': 13.0, 'target_model_updated': True}, {'score': 42.0, 'target_model_updated': True}, {'score': 54.0, 'target_model_updated': True}, {'score': 36.0, 'target_model_updated': True}, {'score': 22.0, 'target_model_updated': True}, {'score': 25.0, 'target_model_updated': True}, {'score': 21.0, 'target_model_updated': True}, {'score': 15.0, 'target_model_updated': True}, {'score': 40.0, 'target_model_updated': True}, {'score': 15.0, 'target_model_updated': True}, {'

  axs[row][col].legend()


[{'score': 12.4, 'max_score': 15.0, 'min_score': 10.0}, {'score': 9.8, 'max_score': 11.0, 'min_score': 9.0}, {'score': 8.8, 'max_score': 9.0, 'min_score': 8.0}, {'score': 9.2, 'max_score': 10.0, 'min_score': 8.0}, {'score': 10.0, 'max_score': 10.0, 'min_score': 10.0}, {'score': 9.4, 'max_score': 10.0, 'min_score': 8.0}, {'score': 9.4, 'max_score': 10.0, 'min_score': 9.0}, {'score': 9.2, 'max_score': 10.0, 'min_score': 9.0}, {'score': 9.2, 'max_score': 10.0, 'min_score': 8.0}]
Training step: 2701/10000
Training step: 2801/10000
Training step: 2901/10000
score:  10.0
score:  9.0
score:  8.0
score:  9.0
Moviepy - Building video checkpoints/DQN_CartPole-v1/step_2997/videos/DQN_CartPole-v1/2997/DQN_CartPole-v1-episode-49.mp4.
Moviepy - Writing video checkpoints/DQN_CartPole-v1/step_2997/videos/DQN_CartPole-v1/2997/DQN_CartPole-v1-episode-49.mp4



                                                   

Moviepy - Done !
Moviepy - video ready checkpoints/DQN_CartPole-v1/step_2997/videos/DQN_CartPole-v1/2997/DQN_CartPole-v1-episode-49.mp4
score:  10.0




{'score': [{'score': 17.0, 'target_model_updated': True}, {'score': 30.0, 'target_model_updated': True}, {'score': 17.0, 'target_model_updated': True}, {'score': 48.0, 'target_model_updated': True}, {'score': 16.0, 'target_model_updated': True}, {'score': 25.0, 'target_model_updated': True}, {'score': 27.0, 'target_model_updated': True}, {'score': 19.0, 'target_model_updated': True}, {'score': 19.0, 'target_model_updated': True}, {'score': 16.0, 'target_model_updated': True}, {'score': 22.0, 'target_model_updated': True}, {'score': 13.0, 'target_model_updated': True}, {'score': 42.0, 'target_model_updated': True}, {'score': 54.0, 'target_model_updated': True}, {'score': 36.0, 'target_model_updated': True}, {'score': 22.0, 'target_model_updated': True}, {'score': 25.0, 'target_model_updated': True}, {'score': 21.0, 'target_model_updated': True}, {'score': 15.0, 'target_model_updated': True}, {'score': 40.0, 'target_model_updated': True}, {'score': 15.0, 'target_model_updated': True}, {'

  axs[row][col].legend()


[{'score': 12.4, 'max_score': 15.0, 'min_score': 10.0}, {'score': 9.8, 'max_score': 11.0, 'min_score': 9.0}, {'score': 8.8, 'max_score': 9.0, 'min_score': 8.0}, {'score': 9.2, 'max_score': 10.0, 'min_score': 8.0}, {'score': 10.0, 'max_score': 10.0, 'min_score': 10.0}, {'score': 9.4, 'max_score': 10.0, 'min_score': 8.0}, {'score': 9.4, 'max_score': 10.0, 'min_score': 9.0}, {'score': 9.2, 'max_score': 10.0, 'min_score': 9.0}, {'score': 9.2, 'max_score': 10.0, 'min_score': 8.0}, {'score': 9.2, 'max_score': 10.0, 'min_score': 8.0}]
Training step: 3001/10000
Training step: 3101/10000
Training step: 3201/10000
Training step: 3301/10000
score:  9.0
score:  10.0
score:  10.0
score:  8.0
Moviepy - Building video checkpoints/DQN_CartPole-v1/step_3330/videos/DQN_CartPole-v1/3330/DQN_CartPole-v1-episode-54.mp4.
Moviepy - Writing video checkpoints/DQN_CartPole-v1/step_3330/videos/DQN_CartPole-v1/3330/DQN_CartPole-v1-episode-54.mp4



                                                           

Moviepy - Done !
Moviepy - video ready checkpoints/DQN_CartPole-v1/step_3330/videos/DQN_CartPole-v1/3330/DQN_CartPole-v1-episode-54.mp4
score:  9.0
{'score': [{'score': 17.0, 'target_model_updated': True}, {'score': 30.0, 'target_model_updated': True}, {'score': 17.0, 'target_model_updated': True}, {'score': 48.0, 'target_model_updated': True}, {'score': 16.0, 'target_model_updated': True}, {'score': 25.0, 'target_model_updated': True}, {'score': 27.0, 'target_model_updated': True}, {'score': 19.0, 'target_model_updated': True}, {'score': 19.0, 'target_model_updated': True}, {'score': 16.0, 'target_model_updated': True}, {'score': 22.0, 'target_model_updated': True}, {'score': 13.0, 'target_model_updated': True}, {'score': 42.0, 'target_model_updated': True}, {'score': 54.0, 'target_model_updated': True}, {'score': 36.0, 'target_model_updated': True}, {'score': 22.0, 'target_model_updated': True}, {'score': 25.0, 'target_model_updated': True}, {'score': 21.0, 'target_model_updated': Tr

  axs[row][col].legend()


[{'score': 12.4, 'max_score': 15.0, 'min_score': 10.0}, {'score': 9.8, 'max_score': 11.0, 'min_score': 9.0}, {'score': 8.8, 'max_score': 9.0, 'min_score': 8.0}, {'score': 9.2, 'max_score': 10.0, 'min_score': 8.0}, {'score': 10.0, 'max_score': 10.0, 'min_score': 10.0}, {'score': 9.4, 'max_score': 10.0, 'min_score': 8.0}, {'score': 9.4, 'max_score': 10.0, 'min_score': 9.0}, {'score': 9.2, 'max_score': 10.0, 'min_score': 9.0}, {'score': 9.2, 'max_score': 10.0, 'min_score': 8.0}, {'score': 9.2, 'max_score': 10.0, 'min_score': 8.0}, {'score': 9.2, 'max_score': 10.0, 'min_score': 8.0}]
Training step: 3401/10000
Training step: 3501/10000
Training step: 3601/10000
score:  10.0
score:  9.0
score:  8.0
score:  9.0
Moviepy - Building video checkpoints/DQN_CartPole-v1/step_3663/videos/DQN_CartPole-v1/3663/DQN_CartPole-v1-episode-59.mp4.
Moviepy - Writing video checkpoints/DQN_CartPole-v1/step_3663/videos/DQN_CartPole-v1/3663/DQN_CartPole-v1-episode-59.mp4



                                                           

Moviepy - Done !
Moviepy - video ready checkpoints/DQN_CartPole-v1/step_3663/videos/DQN_CartPole-v1/3663/DQN_CartPole-v1-episode-59.mp4
score:  10.0




{'score': [{'score': 17.0, 'target_model_updated': True}, {'score': 30.0, 'target_model_updated': True}, {'score': 17.0, 'target_model_updated': True}, {'score': 48.0, 'target_model_updated': True}, {'score': 16.0, 'target_model_updated': True}, {'score': 25.0, 'target_model_updated': True}, {'score': 27.0, 'target_model_updated': True}, {'score': 19.0, 'target_model_updated': True}, {'score': 19.0, 'target_model_updated': True}, {'score': 16.0, 'target_model_updated': True}, {'score': 22.0, 'target_model_updated': True}, {'score': 13.0, 'target_model_updated': True}, {'score': 42.0, 'target_model_updated': True}, {'score': 54.0, 'target_model_updated': True}, {'score': 36.0, 'target_model_updated': True}, {'score': 22.0, 'target_model_updated': True}, {'score': 25.0, 'target_model_updated': True}, {'score': 21.0, 'target_model_updated': True}, {'score': 15.0, 'target_model_updated': True}, {'score': 40.0, 'target_model_updated': True}, {'score': 15.0, 'target_model_updated': True}, {'

  axs[row][col].legend()


[{'score': 12.4, 'max_score': 15.0, 'min_score': 10.0}, {'score': 9.8, 'max_score': 11.0, 'min_score': 9.0}, {'score': 8.8, 'max_score': 9.0, 'min_score': 8.0}, {'score': 9.2, 'max_score': 10.0, 'min_score': 8.0}, {'score': 10.0, 'max_score': 10.0, 'min_score': 10.0}, {'score': 9.4, 'max_score': 10.0, 'min_score': 8.0}, {'score': 9.4, 'max_score': 10.0, 'min_score': 9.0}, {'score': 9.2, 'max_score': 10.0, 'min_score': 9.0}, {'score': 9.2, 'max_score': 10.0, 'min_score': 8.0}, {'score': 9.2, 'max_score': 10.0, 'min_score': 8.0}, {'score': 9.2, 'max_score': 10.0, 'min_score': 8.0}, {'score': 9.2, 'max_score': 10.0, 'min_score': 8.0}]
Training step: 3701/10000
Training step: 3801/10000
Training step: 3901/10000
score:  9.0
score:  10.0
score:  10.0
score:  9.0
Moviepy - Building video checkpoints/DQN_CartPole-v1/step_3996/videos/DQN_CartPole-v1/3996/DQN_CartPole-v1-episode-64.mp4.
Moviepy - Writing video checkpoints/DQN_CartPole-v1/step_3996/videos/DQN_CartPole-v1/3996/DQN_CartPole-v1-epi

                                                           

Moviepy - Done !
Moviepy - video ready checkpoints/DQN_CartPole-v1/step_3996/videos/DQN_CartPole-v1/3996/DQN_CartPole-v1-episode-64.mp4
score:  10.0




{'score': [{'score': 17.0, 'target_model_updated': True}, {'score': 30.0, 'target_model_updated': True}, {'score': 17.0, 'target_model_updated': True}, {'score': 48.0, 'target_model_updated': True}, {'score': 16.0, 'target_model_updated': True}, {'score': 25.0, 'target_model_updated': True}, {'score': 27.0, 'target_model_updated': True}, {'score': 19.0, 'target_model_updated': True}, {'score': 19.0, 'target_model_updated': True}, {'score': 16.0, 'target_model_updated': True}, {'score': 22.0, 'target_model_updated': True}, {'score': 13.0, 'target_model_updated': True}, {'score': 42.0, 'target_model_updated': True}, {'score': 54.0, 'target_model_updated': True}, {'score': 36.0, 'target_model_updated': True}, {'score': 22.0, 'target_model_updated': True}, {'score': 25.0, 'target_model_updated': True}, {'score': 21.0, 'target_model_updated': True}, {'score': 15.0, 'target_model_updated': True}, {'score': 40.0, 'target_model_updated': True}, {'score': 15.0, 'target_model_updated': True}, {'

  axs[row][col].legend()


[{'score': 12.4, 'max_score': 15.0, 'min_score': 10.0}, {'score': 9.8, 'max_score': 11.0, 'min_score': 9.0}, {'score': 8.8, 'max_score': 9.0, 'min_score': 8.0}, {'score': 9.2, 'max_score': 10.0, 'min_score': 8.0}, {'score': 10.0, 'max_score': 10.0, 'min_score': 10.0}, {'score': 9.4, 'max_score': 10.0, 'min_score': 8.0}, {'score': 9.4, 'max_score': 10.0, 'min_score': 9.0}, {'score': 9.2, 'max_score': 10.0, 'min_score': 9.0}, {'score': 9.2, 'max_score': 10.0, 'min_score': 8.0}, {'score': 9.2, 'max_score': 10.0, 'min_score': 8.0}, {'score': 9.2, 'max_score': 10.0, 'min_score': 8.0}, {'score': 9.2, 'max_score': 10.0, 'min_score': 8.0}, {'score': 9.6, 'max_score': 10.0, 'min_score': 9.0}]


  plt.savefig("{}/{}.png".format(dir, model_name))


Training step: 4001/10000
Training step: 4101/10000
Training step: 4201/10000
Training step: 4301/10000
score:  9.0
score:  8.0
score:  10.0
score:  11.0
Moviepy - Building video checkpoints/DQN_CartPole-v1/step_4329/videos/DQN_CartPole-v1/4329/DQN_CartPole-v1-episode-69.mp4.
Moviepy - Writing video checkpoints/DQN_CartPole-v1/step_4329/videos/DQN_CartPole-v1/4329/DQN_CartPole-v1-episode-69.mp4



                                                           

Moviepy - Done !
Moviepy - video ready checkpoints/DQN_CartPole-v1/step_4329/videos/DQN_CartPole-v1/4329/DQN_CartPole-v1-episode-69.mp4
score:  10.0
{'score': [{'score': 17.0, 'target_model_updated': True}, {'score': 30.0, 'target_model_updated': True}, {'score': 17.0, 'target_model_updated': True}, {'score': 48.0, 'target_model_updated': True}, {'score': 16.0, 'target_model_updated': True}, {'score': 25.0, 'target_model_updated': True}, {'score': 27.0, 'target_model_updated': True}, {'score': 19.0, 'target_model_updated': True}, {'score': 19.0, 'target_model_updated': True}, {'score': 16.0, 'target_model_updated': True}, {'score': 22.0, 'target_model_updated': True}, {'score': 13.0, 'target_model_updated': True}, {'score': 42.0, 'target_model_updated': True}, {'score': 54.0, 'target_model_updated': True}, {'score': 36.0, 'target_model_updated': True}, {'score': 22.0, 'target_model_updated': True}, {'score': 25.0, 'target_model_updated': True}, {'score': 21.0, 'target_model_updated': T

  axs[row][col].legend()


[{'score': 12.4, 'max_score': 15.0, 'min_score': 10.0}, {'score': 9.8, 'max_score': 11.0, 'min_score': 9.0}, {'score': 8.8, 'max_score': 9.0, 'min_score': 8.0}, {'score': 9.2, 'max_score': 10.0, 'min_score': 8.0}, {'score': 10.0, 'max_score': 10.0, 'min_score': 10.0}, {'score': 9.4, 'max_score': 10.0, 'min_score': 8.0}, {'score': 9.4, 'max_score': 10.0, 'min_score': 9.0}, {'score': 9.2, 'max_score': 10.0, 'min_score': 9.0}, {'score': 9.2, 'max_score': 10.0, 'min_score': 8.0}, {'score': 9.2, 'max_score': 10.0, 'min_score': 8.0}, {'score': 9.2, 'max_score': 10.0, 'min_score': 8.0}, {'score': 9.2, 'max_score': 10.0, 'min_score': 8.0}, {'score': 9.6, 'max_score': 10.0, 'min_score': 9.0}, {'score': 9.6, 'max_score': 11.0, 'min_score': 8.0}]
Training step: 4401/10000
Training step: 4501/10000
Training step: 4601/10000
score:  8.0
score:  10.0
score:  10.0
score:  11.0
Moviepy - Building video checkpoints/DQN_CartPole-v1/step_4662/videos/DQN_CartPole-v1/4662/DQN_CartPole-v1-episode-74.mp4.
Mo

                                                           

Moviepy - Done !
Moviepy - video ready checkpoints/DQN_CartPole-v1/step_4662/videos/DQN_CartPole-v1/4662/DQN_CartPole-v1-episode-74.mp4
score:  9.0




{'score': [{'score': 17.0, 'target_model_updated': True}, {'score': 30.0, 'target_model_updated': True}, {'score': 17.0, 'target_model_updated': True}, {'score': 48.0, 'target_model_updated': True}, {'score': 16.0, 'target_model_updated': True}, {'score': 25.0, 'target_model_updated': True}, {'score': 27.0, 'target_model_updated': True}, {'score': 19.0, 'target_model_updated': True}, {'score': 19.0, 'target_model_updated': True}, {'score': 16.0, 'target_model_updated': True}, {'score': 22.0, 'target_model_updated': True}, {'score': 13.0, 'target_model_updated': True}, {'score': 42.0, 'target_model_updated': True}, {'score': 54.0, 'target_model_updated': True}, {'score': 36.0, 'target_model_updated': True}, {'score': 22.0, 'target_model_updated': True}, {'score': 25.0, 'target_model_updated': True}, {'score': 21.0, 'target_model_updated': True}, {'score': 15.0, 'target_model_updated': True}, {'score': 40.0, 'target_model_updated': True}, {'score': 15.0, 'target_model_updated': True}, {'

  axs[row][col].legend()


[{'score': 12.4, 'max_score': 15.0, 'min_score': 10.0}, {'score': 9.8, 'max_score': 11.0, 'min_score': 9.0}, {'score': 8.8, 'max_score': 9.0, 'min_score': 8.0}, {'score': 9.2, 'max_score': 10.0, 'min_score': 8.0}, {'score': 10.0, 'max_score': 10.0, 'min_score': 10.0}, {'score': 9.4, 'max_score': 10.0, 'min_score': 8.0}, {'score': 9.4, 'max_score': 10.0, 'min_score': 9.0}, {'score': 9.2, 'max_score': 10.0, 'min_score': 9.0}, {'score': 9.2, 'max_score': 10.0, 'min_score': 8.0}, {'score': 9.2, 'max_score': 10.0, 'min_score': 8.0}, {'score': 9.2, 'max_score': 10.0, 'min_score': 8.0}, {'score': 9.2, 'max_score': 10.0, 'min_score': 8.0}, {'score': 9.6, 'max_score': 10.0, 'min_score': 9.0}, {'score': 9.6, 'max_score': 11.0, 'min_score': 8.0}, {'score': 9.6, 'max_score': 11.0, 'min_score': 8.0}]
Training step: 4701/10000
Training step: 4801/10000
Training step: 4901/10000
score:  10.0
score:  10.0
score:  10.0
score:  8.0
Moviepy - Building video checkpoints/DQN_CartPole-v1/step_4995/videos/DQ

                                                           

Moviepy - Done !
Moviepy - video ready checkpoints/DQN_CartPole-v1/step_4995/videos/DQN_CartPole-v1/4995/DQN_CartPole-v1-episode-79.mp4
score:  10.0




{'score': [{'score': 17.0, 'target_model_updated': True}, {'score': 30.0, 'target_model_updated': True}, {'score': 17.0, 'target_model_updated': True}, {'score': 48.0, 'target_model_updated': True}, {'score': 16.0, 'target_model_updated': True}, {'score': 25.0, 'target_model_updated': True}, {'score': 27.0, 'target_model_updated': True}, {'score': 19.0, 'target_model_updated': True}, {'score': 19.0, 'target_model_updated': True}, {'score': 16.0, 'target_model_updated': True}, {'score': 22.0, 'target_model_updated': True}, {'score': 13.0, 'target_model_updated': True}, {'score': 42.0, 'target_model_updated': True}, {'score': 54.0, 'target_model_updated': True}, {'score': 36.0, 'target_model_updated': True}, {'score': 22.0, 'target_model_updated': True}, {'score': 25.0, 'target_model_updated': True}, {'score': 21.0, 'target_model_updated': True}, {'score': 15.0, 'target_model_updated': True}, {'score': 40.0, 'target_model_updated': True}, {'score': 15.0, 'target_model_updated': True}, {'

  axs[row][col].legend()


[{'score': 12.4, 'max_score': 15.0, 'min_score': 10.0}, {'score': 9.8, 'max_score': 11.0, 'min_score': 9.0}, {'score': 8.8, 'max_score': 9.0, 'min_score': 8.0}, {'score': 9.2, 'max_score': 10.0, 'min_score': 8.0}, {'score': 10.0, 'max_score': 10.0, 'min_score': 10.0}, {'score': 9.4, 'max_score': 10.0, 'min_score': 8.0}, {'score': 9.4, 'max_score': 10.0, 'min_score': 9.0}, {'score': 9.2, 'max_score': 10.0, 'min_score': 9.0}, {'score': 9.2, 'max_score': 10.0, 'min_score': 8.0}, {'score': 9.2, 'max_score': 10.0, 'min_score': 8.0}, {'score': 9.2, 'max_score': 10.0, 'min_score': 8.0}, {'score': 9.2, 'max_score': 10.0, 'min_score': 8.0}, {'score': 9.6, 'max_score': 10.0, 'min_score': 9.0}, {'score': 9.6, 'max_score': 11.0, 'min_score': 8.0}, {'score': 9.6, 'max_score': 11.0, 'min_score': 8.0}, {'score': 9.6, 'max_score': 10.0, 'min_score': 8.0}]
