In [1]:
import torch
import random
import numpy as np

seed = 777


def seed_torch(seed):
    torch.manual_seed(seed)
    if torch.backends.cudnn.enabled:
        torch.cuda.manual_seed(seed)
        torch.backends.cudnn.benchmark = False
        torch.backends.cudnn.deterministic = True


np.random.seed(seed)
random.seed(seed)
seed_torch(seed)

In [2]:
import gymnasium as gym
import sys

import torch
from utils import CategoricalCrossentropyLoss, KLDivergenceLoss
from utils.utils import HuberLoss

sys.path.append("../..")
from dqn.rainbow.rainbow_agent import RainbowAgent
from agent_configs import RainbowConfig
from game_configs import CartPoleConfig

env = gym.make("CartPole-v1", render_mode="rgb_array")

config_dict = {
    "dense_layer_widths": [128, 128],
    "value_hidden_layer_widths": [],
    "advatage_hidden_layer_widths": [],
    "adam_epsilon": 1e-8,
    "learning_rate": 0.001,
    "training_steps": 10000,
    "per_epsilon": 0.0001,
    "per_alpha": 0,
    "per_beta": 0,
    "minibatch_size": 32,
    "replay_buffer_size": 1000,
    "min_replay_buffer_size": 32,
    "transfer_interval": 200,
    "n_step": 1,
    "loss_function": HuberLoss(),  # could do categorical cross entropy
    "clipnorm": 0.0,
    "discount_factor": 0.99,
    "atom_size": 1,
    "replay_interval": 1,
    "dueling": False,
    "noisy_sigma": 0.0,
    "eg_epsilon": 1.0,
    "eg_epsilon_final": 0.0,
    "eg_epsilon_final_step": 2000,
    "eg_epsilon_decay_type": "linear",
}

config_dict = {
    "training_steps": 10000,
    "per_epsilon": 0.0001,
    "loss_function": KLDivergenceLoss(),  # could do categorical cross entropy
    "discount_factor": 0.99,
}

game_config = CartPoleConfig()
config = RainbowConfig(config_dict, game_config)
device = "cuda:0" if torch.cuda.is_available() else "cpu"
agent = RainbowAgent(env, config, name="Rainbow_CartPole-v1", device=device)
agent.checkpoint_interval = 200

for param in agent.model.parameters():
    print(param)

print("start")
agent.train()

Using default save_intermediate_weights     : False
Using         training_steps                : 10000
Using default adam_epsilon                  : 1e-06
Using default momentum                      : 0.9
Using default learning_rate                 : 0.001
Using default clipnorm                      : 0
Using default optimizer                     : <class 'torch.optim.adam.Adam'>
Using default weight_decay                  : 0.0
Using         loss_function                 : <utils.utils.KLDivergenceLoss object at 0x31cb9b280>
Using default activation                    : relu
Using         kernel_initializer            : None
Using default minibatch_size                : 64
Using default replay_buffer_size            : 5000
Using default min_replay_buffer_size        : 64
Using default num_minibatches               : 1
Using default training_iterations           : 1
Using default print_interval                : 100
RainbowConfig
Using default residual_layers               : []
Using d

  logger.warn(
  logger.warn(


float32
Parameter containing:
tensor([[-0.4181, -0.0089, -0.0967, -0.1141],
        [ 0.3813,  0.3811,  0.2242,  0.0033],
        [ 0.3249, -0.2366, -0.1888,  0.0948],
        [-0.3908,  0.1213,  0.2350,  0.4898],
        [ 0.4165, -0.1754,  0.3715,  0.3875],
        [ 0.4036, -0.2796, -0.1004, -0.2671],
        [ 0.4411,  0.4180,  0.3952, -0.0659],
        [-0.3477, -0.1846, -0.1229,  0.1166],
        [ 0.0355,  0.4970, -0.1223,  0.4541],
        [ 0.3282,  0.1459,  0.0544, -0.1297],
        [-0.4304, -0.1130,  0.2293,  0.4067],
        [-0.4941, -0.4749, -0.0504, -0.2852],
        [-0.1068,  0.0132,  0.4990, -0.3497],
        [-0.1813,  0.4039,  0.2984,  0.3534],
        [-0.4378,  0.1526,  0.4995,  0.0429],
        [ 0.1951, -0.4023, -0.1846, -0.3288],
        [ 0.2349,  0.4384,  0.1960, -0.4188],
        [-0.1492,  0.0885, -0.0926, -0.2365],
        [-0.3311, -0.4584, -0.1535,  0.4688],
        [-0.4053, -0.2498, -0.0500, -0.2375],
        [ 0.2434, -0.4209,  0.1780,  0.3752],
    

  axs[row][col].set_xlim(1, len(values))
  axs[row][col].legend()
  axs[row][col].set_xlim(1, len(values))


Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_CartPole-v1/step_0/videos/Rainbow_CartPole-v1/0/Rainbow_CartPole-v1-episode-4.mp4
score:  10.0
Training step: 101/10000
Training step: 201/10000
score:  9.0
score:  11.0
score:  9.0
score:  10.0
Moviepy - Building video checkpoints/Rainbow_CartPole-v1/step_200/videos/Rainbow_CartPole-v1/200/Rainbow_CartPole-v1-episode-9.mp4.
Moviepy - Writing video checkpoints/Rainbow_CartPole-v1/step_200/videos/Rainbow_CartPole-v1/200/Rainbow_CartPole-v1-episode-9.mp4



                                                   

Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_CartPole-v1/step_200/videos/Rainbow_CartPole-v1/200/Rainbow_CartPole-v1-episode-9.mp4
score:  9.0


  axs[row][col].legend()


Training step: 301/10000
Training step: 401/10000
score:  9.0
score:  9.0
score:  10.0
score:  10.0
Moviepy - Building video checkpoints/Rainbow_CartPole-v1/step_400/videos/Rainbow_CartPole-v1/400/Rainbow_CartPole-v1-episode-14.mp4.
Moviepy - Writing video checkpoints/Rainbow_CartPole-v1/step_400/videos/Rainbow_CartPole-v1/400/Rainbow_CartPole-v1-episode-14.mp4



                                                   

Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_CartPole-v1/step_400/videos/Rainbow_CartPole-v1/400/Rainbow_CartPole-v1-episode-14.mp4
score:  10.0


  axs[row][col].legend()


Training step: 501/10000
Training step: 601/10000
score:  109.0
score:  113.0
score:  123.0
score:  106.0
Moviepy - Building video checkpoints/Rainbow_CartPole-v1/step_600/videos/Rainbow_CartPole-v1/600/Rainbow_CartPole-v1-episode-19.mp4.
Moviepy - Writing video checkpoints/Rainbow_CartPole-v1/step_600/videos/Rainbow_CartPole-v1/600/Rainbow_CartPole-v1-episode-19.mp4



  axs[row][col].legend()


Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_CartPole-v1/step_600/videos/Rainbow_CartPole-v1/600/Rainbow_CartPole-v1-episode-19.mp4
score:  105.0
Training step: 701/10000
Training step: 801/10000
score:  64.0
score:  70.0
score:  75.0
score:  72.0
Moviepy - Building video checkpoints/Rainbow_CartPole-v1/step_800/videos/Rainbow_CartPole-v1/800/Rainbow_CartPole-v1-episode-24.mp4.
Moviepy - Writing video checkpoints/Rainbow_CartPole-v1/step_800/videos/Rainbow_CartPole-v1/800/Rainbow_CartPole-v1-episode-24.mp4



  axs[row][col].legend()


Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_CartPole-v1/step_800/videos/Rainbow_CartPole-v1/800/Rainbow_CartPole-v1-episode-24.mp4
score:  62.0
Training step: 901/10000
Training step: 1001/10000
score:  82.0
score:  90.0
score:  66.0
score:  92.0
Moviepy - Building video checkpoints/Rainbow_CartPole-v1/step_1000/videos/Rainbow_CartPole-v1/1000/Rainbow_CartPole-v1-episode-29.mp4.
Moviepy - Writing video checkpoints/Rainbow_CartPole-v1/step_1000/videos/Rainbow_CartPole-v1/1000/Rainbow_CartPole-v1-episode-29.mp4



  axs[row][col].legend()


Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_CartPole-v1/step_1000/videos/Rainbow_CartPole-v1/1000/Rainbow_CartPole-v1-episode-29.mp4
score:  72.0
Training step: 1101/10000
Training step: 1201/10000
score:  57.0
score:  73.0
score:  55.0
score:  98.0
Moviepy - Building video checkpoints/Rainbow_CartPole-v1/step_1200/videos/Rainbow_CartPole-v1/1200/Rainbow_CartPole-v1-episode-34.mp4.
Moviepy - Writing video checkpoints/Rainbow_CartPole-v1/step_1200/videos/Rainbow_CartPole-v1/1200/Rainbow_CartPole-v1-episode-34.mp4



  axs[row][col].legend()


Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_CartPole-v1/step_1200/videos/Rainbow_CartPole-v1/1200/Rainbow_CartPole-v1-episode-34.mp4
score:  43.0
Training step: 1301/10000
Training step: 1401/10000
score:  69.0
score:  61.0
score:  57.0
score:  78.0
Moviepy - Building video checkpoints/Rainbow_CartPole-v1/step_1400/videos/Rainbow_CartPole-v1/1400/Rainbow_CartPole-v1-episode-39.mp4.
Moviepy - Writing video checkpoints/Rainbow_CartPole-v1/step_1400/videos/Rainbow_CartPole-v1/1400/Rainbow_CartPole-v1-episode-39.mp4



  axs[row][col].legend()


Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_CartPole-v1/step_1400/videos/Rainbow_CartPole-v1/1400/Rainbow_CartPole-v1-episode-39.mp4
score:  72.0
Training step: 1501/10000
Training step: 1601/10000
score:  122.0
score:  80.0
score:  57.0
score:  60.0
Moviepy - Building video checkpoints/Rainbow_CartPole-v1/step_1600/videos/Rainbow_CartPole-v1/1600/Rainbow_CartPole-v1-episode-44.mp4.
Moviepy - Writing video checkpoints/Rainbow_CartPole-v1/step_1600/videos/Rainbow_CartPole-v1/1600/Rainbow_CartPole-v1-episode-44.mp4



  axs[row][col].legend()


Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_CartPole-v1/step_1600/videos/Rainbow_CartPole-v1/1600/Rainbow_CartPole-v1-episode-44.mp4
score:  65.0
Training step: 1701/10000
Training step: 1801/10000
score:  57.0
score:  64.0
score:  49.0
score:  67.0
Moviepy - Building video checkpoints/Rainbow_CartPole-v1/step_1800/videos/Rainbow_CartPole-v1/1800/Rainbow_CartPole-v1-episode-49.mp4.
Moviepy - Writing video checkpoints/Rainbow_CartPole-v1/step_1800/videos/Rainbow_CartPole-v1/1800/Rainbow_CartPole-v1-episode-49.mp4



  axs[row][col].legend()


Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_CartPole-v1/step_1800/videos/Rainbow_CartPole-v1/1800/Rainbow_CartPole-v1-episode-49.mp4
score:  45.0
Training step: 1901/10000
Training step: 2001/10000
score:  57.0
score:  41.0
score:  49.0
score:  55.0
Moviepy - Building video checkpoints/Rainbow_CartPole-v1/step_2000/videos/Rainbow_CartPole-v1/2000/Rainbow_CartPole-v1-episode-54.mp4.
Moviepy - Writing video checkpoints/Rainbow_CartPole-v1/step_2000/videos/Rainbow_CartPole-v1/2000/Rainbow_CartPole-v1-episode-54.mp4



  axs[row][col].legend()


Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_CartPole-v1/step_2000/videos/Rainbow_CartPole-v1/2000/Rainbow_CartPole-v1-episode-54.mp4
score:  70.0
Training step: 2101/10000
Training step: 2201/10000
score:  46.0
score:  66.0
score:  58.0
score:  76.0
Moviepy - Building video checkpoints/Rainbow_CartPole-v1/step_2200/videos/Rainbow_CartPole-v1/2200/Rainbow_CartPole-v1-episode-59.mp4.
Moviepy - Writing video checkpoints/Rainbow_CartPole-v1/step_2200/videos/Rainbow_CartPole-v1/2200/Rainbow_CartPole-v1-episode-59.mp4



  axs[row][col].legend()


Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_CartPole-v1/step_2200/videos/Rainbow_CartPole-v1/2200/Rainbow_CartPole-v1-episode-59.mp4
score:  66.0
Training step: 2301/10000
Training step: 2401/10000
score:  79.0
score:  66.0
score:  83.0
score:  42.0
Moviepy - Building video checkpoints/Rainbow_CartPole-v1/step_2400/videos/Rainbow_CartPole-v1/2400/Rainbow_CartPole-v1-episode-64.mp4.
Moviepy - Writing video checkpoints/Rainbow_CartPole-v1/step_2400/videos/Rainbow_CartPole-v1/2400/Rainbow_CartPole-v1-episode-64.mp4



  axs[row][col].legend()


Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_CartPole-v1/step_2400/videos/Rainbow_CartPole-v1/2400/Rainbow_CartPole-v1-episode-64.mp4
score:  62.0
Training step: 2501/10000
Training step: 2601/10000
score:  84.0
score:  49.0
score:  51.0
score:  77.0
Moviepy - Building video checkpoints/Rainbow_CartPole-v1/step_2600/videos/Rainbow_CartPole-v1/2600/Rainbow_CartPole-v1-episode-69.mp4.
Moviepy - Writing video checkpoints/Rainbow_CartPole-v1/step_2600/videos/Rainbow_CartPole-v1/2600/Rainbow_CartPole-v1-episode-69.mp4



  axs[row][col].legend()


Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_CartPole-v1/step_2600/videos/Rainbow_CartPole-v1/2600/Rainbow_CartPole-v1-episode-69.mp4
score:  73.0
Training step: 2701/10000
Training step: 2801/10000
score:  46.0
score:  50.0
score:  49.0
score:  57.0
Moviepy - Building video checkpoints/Rainbow_CartPole-v1/step_2800/videos/Rainbow_CartPole-v1/2800/Rainbow_CartPole-v1-episode-74.mp4.
Moviepy - Writing video checkpoints/Rainbow_CartPole-v1/step_2800/videos/Rainbow_CartPole-v1/2800/Rainbow_CartPole-v1-episode-74.mp4



  axs[row][col].legend()


Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_CartPole-v1/step_2800/videos/Rainbow_CartPole-v1/2800/Rainbow_CartPole-v1-episode-74.mp4
score:  49.0
Training step: 2901/10000
Training step: 3001/10000
score:  85.0
score:  84.0
score:  89.0
score:  82.0
Moviepy - Building video checkpoints/Rainbow_CartPole-v1/step_3000/videos/Rainbow_CartPole-v1/3000/Rainbow_CartPole-v1-episode-79.mp4.
Moviepy - Writing video checkpoints/Rainbow_CartPole-v1/step_3000/videos/Rainbow_CartPole-v1/3000/Rainbow_CartPole-v1-episode-79.mp4



  axs[row][col].legend()


Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_CartPole-v1/step_3000/videos/Rainbow_CartPole-v1/3000/Rainbow_CartPole-v1-episode-79.mp4
score:  91.0
Training step: 3101/10000
Training step: 3201/10000
score:  107.0
score:  115.0
score:  85.0
score:  52.0
Moviepy - Building video checkpoints/Rainbow_CartPole-v1/step_3200/videos/Rainbow_CartPole-v1/3200/Rainbow_CartPole-v1-episode-84.mp4.
Moviepy - Writing video checkpoints/Rainbow_CartPole-v1/step_3200/videos/Rainbow_CartPole-v1/3200/Rainbow_CartPole-v1-episode-84.mp4



  axs[row][col].legend()


Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_CartPole-v1/step_3200/videos/Rainbow_CartPole-v1/3200/Rainbow_CartPole-v1-episode-84.mp4
score:  98.0
Training step: 3301/10000
Training step: 3401/10000
score:  63.0
score:  67.0
score:  74.0
score:  79.0
Moviepy - Building video checkpoints/Rainbow_CartPole-v1/step_3400/videos/Rainbow_CartPole-v1/3400/Rainbow_CartPole-v1-episode-89.mp4.
Moviepy - Writing video checkpoints/Rainbow_CartPole-v1/step_3400/videos/Rainbow_CartPole-v1/3400/Rainbow_CartPole-v1-episode-89.mp4



  axs[row][col].legend()


Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_CartPole-v1/step_3400/videos/Rainbow_CartPole-v1/3400/Rainbow_CartPole-v1-episode-89.mp4
score:  62.0
Training step: 3501/10000
Training step: 3601/10000
score:  93.0
score:  88.0
score:  91.0
score:  89.0
Moviepy - Building video checkpoints/Rainbow_CartPole-v1/step_3600/videos/Rainbow_CartPole-v1/3600/Rainbow_CartPole-v1-episode-94.mp4.
Moviepy - Writing video checkpoints/Rainbow_CartPole-v1/step_3600/videos/Rainbow_CartPole-v1/3600/Rainbow_CartPole-v1-episode-94.mp4



  axs[row][col].legend()


Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_CartPole-v1/step_3600/videos/Rainbow_CartPole-v1/3600/Rainbow_CartPole-v1-episode-94.mp4
score:  94.0
Training step: 3701/10000
Training step: 3801/10000
score:  86.0
score:  102.0
score:  92.0
score:  86.0
Moviepy - Building video checkpoints/Rainbow_CartPole-v1/step_3800/videos/Rainbow_CartPole-v1/3800/Rainbow_CartPole-v1-episode-99.mp4.
Moviepy - Writing video checkpoints/Rainbow_CartPole-v1/step_3800/videos/Rainbow_CartPole-v1/3800/Rainbow_CartPole-v1-episode-99.mp4



  axs[row][col].legend()


Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_CartPole-v1/step_3800/videos/Rainbow_CartPole-v1/3800/Rainbow_CartPole-v1-episode-99.mp4
score:  112.0
Training step: 3901/10000
Training step: 4001/10000
score:  156.0
score:  154.0
score:  205.0
score:  156.0
Moviepy - Building video checkpoints/Rainbow_CartPole-v1/step_4000/videos/Rainbow_CartPole-v1/4000/Rainbow_CartPole-v1-episode-104.mp4.
Moviepy - Writing video checkpoints/Rainbow_CartPole-v1/step_4000/videos/Rainbow_CartPole-v1/4000/Rainbow_CartPole-v1-episode-104.mp4



                                                              

Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_CartPole-v1/step_4000/videos/Rainbow_CartPole-v1/4000/Rainbow_CartPole-v1-episode-104.mp4
score:  141.0


  axs[row][col].legend()


Training step: 4101/10000
Training step: 4201/10000
score:  147.0
score:  147.0
score:  119.0
score:  140.0
Moviepy - Building video checkpoints/Rainbow_CartPole-v1/step_4200/videos/Rainbow_CartPole-v1/4200/Rainbow_CartPole-v1-episode-109.mp4.
Moviepy - Writing video checkpoints/Rainbow_CartPole-v1/step_4200/videos/Rainbow_CartPole-v1/4200/Rainbow_CartPole-v1-episode-109.mp4



  axs[row][col].legend()


Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_CartPole-v1/step_4200/videos/Rainbow_CartPole-v1/4200/Rainbow_CartPole-v1-episode-109.mp4
score:  119.0
Training step: 4301/10000
Training step: 4401/10000
score:  141.0
score:  134.0
score:  159.0
score:  130.0
Moviepy - Building video checkpoints/Rainbow_CartPole-v1/step_4400/videos/Rainbow_CartPole-v1/4400/Rainbow_CartPole-v1-episode-114.mp4.
Moviepy - Writing video checkpoints/Rainbow_CartPole-v1/step_4400/videos/Rainbow_CartPole-v1/4400/Rainbow_CartPole-v1-episode-114.mp4



                                                              

Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_CartPole-v1/step_4400/videos/Rainbow_CartPole-v1/4400/Rainbow_CartPole-v1-episode-114.mp4
score:  138.0


  axs[row][col].legend()


Training step: 4501/10000
Training step: 4601/10000
score:  122.0
score:  130.0
score:  152.0
score:  117.0
Moviepy - Building video checkpoints/Rainbow_CartPole-v1/step_4600/videos/Rainbow_CartPole-v1/4600/Rainbow_CartPole-v1-episode-119.mp4.
Moviepy - Writing video checkpoints/Rainbow_CartPole-v1/step_4600/videos/Rainbow_CartPole-v1/4600/Rainbow_CartPole-v1-episode-119.mp4



  axs[row][col].legend()


Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_CartPole-v1/step_4600/videos/Rainbow_CartPole-v1/4600/Rainbow_CartPole-v1-episode-119.mp4
score:  129.0
Training step: 4701/10000
Training step: 4801/10000
score:  186.0
score:  153.0
score:  206.0
score:  150.0
Moviepy - Building video checkpoints/Rainbow_CartPole-v1/step_4800/videos/Rainbow_CartPole-v1/4800/Rainbow_CartPole-v1-episode-124.mp4.
Moviepy - Writing video checkpoints/Rainbow_CartPole-v1/step_4800/videos/Rainbow_CartPole-v1/4800/Rainbow_CartPole-v1-episode-124.mp4



  axs[row][col].legend()


Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_CartPole-v1/step_4800/videos/Rainbow_CartPole-v1/4800/Rainbow_CartPole-v1-episode-124.mp4
score:  247.0
Training step: 4901/10000
Training step: 5001/10000
score:  171.0
score:  168.0
score:  171.0
score:  160.0
Moviepy - Building video checkpoints/Rainbow_CartPole-v1/step_5000/videos/Rainbow_CartPole-v1/5000/Rainbow_CartPole-v1-episode-129.mp4.
Moviepy - Writing video checkpoints/Rainbow_CartPole-v1/step_5000/videos/Rainbow_CartPole-v1/5000/Rainbow_CartPole-v1-episode-129.mp4



  axs[row][col].legend()


Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_CartPole-v1/step_5000/videos/Rainbow_CartPole-v1/5000/Rainbow_CartPole-v1-episode-129.mp4
score:  201.0
Training step: 5101/10000
Training step: 5201/10000
score:  148.0
score:  170.0
score:  183.0
score:  166.0
Moviepy - Building video checkpoints/Rainbow_CartPole-v1/step_5200/videos/Rainbow_CartPole-v1/5200/Rainbow_CartPole-v1-episode-134.mp4.
Moviepy - Writing video checkpoints/Rainbow_CartPole-v1/step_5200/videos/Rainbow_CartPole-v1/5200/Rainbow_CartPole-v1-episode-134.mp4



  axs[row][col].legend()


Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_CartPole-v1/step_5200/videos/Rainbow_CartPole-v1/5200/Rainbow_CartPole-v1-episode-134.mp4
score:  178.0
Training step: 5301/10000
Training step: 5401/10000
score:  160.0
score:  137.0
score:  153.0
score:  145.0
Moviepy - Building video checkpoints/Rainbow_CartPole-v1/step_5400/videos/Rainbow_CartPole-v1/5400/Rainbow_CartPole-v1-episode-139.mp4.
Moviepy - Writing video checkpoints/Rainbow_CartPole-v1/step_5400/videos/Rainbow_CartPole-v1/5400/Rainbow_CartPole-v1-episode-139.mp4



                                                              

Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_CartPole-v1/step_5400/videos/Rainbow_CartPole-v1/5400/Rainbow_CartPole-v1-episode-139.mp4
score:  156.0


  axs[row][col].legend()


Training step: 5501/10000
Training step: 5601/10000
score:  270.0
score:  237.0
score:  168.0
score:  185.0
Moviepy - Building video checkpoints/Rainbow_CartPole-v1/step_5600/videos/Rainbow_CartPole-v1/5600/Rainbow_CartPole-v1-episode-144.mp4.
Moviepy - Writing video checkpoints/Rainbow_CartPole-v1/step_5600/videos/Rainbow_CartPole-v1/5600/Rainbow_CartPole-v1-episode-144.mp4



  axs[row][col].legend()


Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_CartPole-v1/step_5600/videos/Rainbow_CartPole-v1/5600/Rainbow_CartPole-v1-episode-144.mp4
score:  171.0
Training step: 5701/10000
Training step: 5801/10000
score:  169.0
score:  175.0
score:  199.0
score:  177.0
Moviepy - Building video checkpoints/Rainbow_CartPole-v1/step_5800/videos/Rainbow_CartPole-v1/5800/Rainbow_CartPole-v1-episode-149.mp4.
Moviepy - Writing video checkpoints/Rainbow_CartPole-v1/step_5800/videos/Rainbow_CartPole-v1/5800/Rainbow_CartPole-v1-episode-149.mp4



  axs[row][col].legend()


Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_CartPole-v1/step_5800/videos/Rainbow_CartPole-v1/5800/Rainbow_CartPole-v1-episode-149.mp4
score:  153.0
Training step: 5901/10000
Training step: 6001/10000
score:  172.0
score:  183.0
score:  182.0
score:  187.0
Moviepy - Building video checkpoints/Rainbow_CartPole-v1/step_6000/videos/Rainbow_CartPole-v1/6000/Rainbow_CartPole-v1-episode-154.mp4.
Moviepy - Writing video checkpoints/Rainbow_CartPole-v1/step_6000/videos/Rainbow_CartPole-v1/6000/Rainbow_CartPole-v1-episode-154.mp4



                                                              

Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_CartPole-v1/step_6000/videos/Rainbow_CartPole-v1/6000/Rainbow_CartPole-v1-episode-154.mp4
score:  158.0


  axs[row][col].legend()


Training step: 6101/10000
Training step: 6201/10000
score:  109.0
score:  153.0
score:  186.0
score:  160.0
Moviepy - Building video checkpoints/Rainbow_CartPole-v1/step_6200/videos/Rainbow_CartPole-v1/6200/Rainbow_CartPole-v1-episode-159.mp4.
Moviepy - Writing video checkpoints/Rainbow_CartPole-v1/step_6200/videos/Rainbow_CartPole-v1/6200/Rainbow_CartPole-v1-episode-159.mp4



                                                              

Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_CartPole-v1/step_6200/videos/Rainbow_CartPole-v1/6200/Rainbow_CartPole-v1-episode-159.mp4
score:  133.0


  axs[row][col].legend()


Training step: 6301/10000
Training step: 6401/10000
score:  206.0
score:  206.0
score:  200.0
score:  213.0
Moviepy - Building video checkpoints/Rainbow_CartPole-v1/step_6400/videos/Rainbow_CartPole-v1/6400/Rainbow_CartPole-v1-episode-164.mp4.
Moviepy - Writing video checkpoints/Rainbow_CartPole-v1/step_6400/videos/Rainbow_CartPole-v1/6400/Rainbow_CartPole-v1-episode-164.mp4



  axs[row][col].legend()


Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_CartPole-v1/step_6400/videos/Rainbow_CartPole-v1/6400/Rainbow_CartPole-v1-episode-164.mp4
score:  196.0
Training step: 6501/10000
Training step: 6601/10000
score:  226.0
score:  200.0
score:  198.0
score:  212.0
Moviepy - Building video checkpoints/Rainbow_CartPole-v1/step_6600/videos/Rainbow_CartPole-v1/6600/Rainbow_CartPole-v1-episode-169.mp4.
Moviepy - Writing video checkpoints/Rainbow_CartPole-v1/step_6600/videos/Rainbow_CartPole-v1/6600/Rainbow_CartPole-v1-episode-169.mp4



  axs[row][col].legend()


Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_CartPole-v1/step_6600/videos/Rainbow_CartPole-v1/6600/Rainbow_CartPole-v1-episode-169.mp4
score:  192.0
Training step: 6701/10000
Training step: 6801/10000
score:  179.0
score:  139.0
score:  277.0
score:  207.0
Moviepy - Building video checkpoints/Rainbow_CartPole-v1/step_6800/videos/Rainbow_CartPole-v1/6800/Rainbow_CartPole-v1-episode-174.mp4.
Moviepy - Writing video checkpoints/Rainbow_CartPole-v1/step_6800/videos/Rainbow_CartPole-v1/6800/Rainbow_CartPole-v1-episode-174.mp4



  axs[row][col].legend()


Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_CartPole-v1/step_6800/videos/Rainbow_CartPole-v1/6800/Rainbow_CartPole-v1-episode-174.mp4
score:  169.0
Training step: 6901/10000
Training step: 7001/10000
score:  212.0
score:  277.0
score:  189.0
score:  249.0
Moviepy - Building video checkpoints/Rainbow_CartPole-v1/step_7000/videos/Rainbow_CartPole-v1/7000/Rainbow_CartPole-v1-episode-179.mp4.
Moviepy - Writing video checkpoints/Rainbow_CartPole-v1/step_7000/videos/Rainbow_CartPole-v1/7000/Rainbow_CartPole-v1-episode-179.mp4



  axs[row][col].legend()


Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_CartPole-v1/step_7000/videos/Rainbow_CartPole-v1/7000/Rainbow_CartPole-v1-episode-179.mp4
score:  210.0
Training step: 7101/10000
Training step: 7201/10000
score:  273.0
score:  273.0
score:  288.0
score:  300.0
Moviepy - Building video checkpoints/Rainbow_CartPole-v1/step_7200/videos/Rainbow_CartPole-v1/7200/Rainbow_CartPole-v1-episode-184.mp4.
Moviepy - Writing video checkpoints/Rainbow_CartPole-v1/step_7200/videos/Rainbow_CartPole-v1/7200/Rainbow_CartPole-v1-episode-184.mp4



  axs[row][col].legend()


Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_CartPole-v1/step_7200/videos/Rainbow_CartPole-v1/7200/Rainbow_CartPole-v1-episode-184.mp4
score:  209.0
Training step: 7301/10000
Training step: 7401/10000
score:  257.0
score:  223.0
score:  285.0
score:  160.0
Moviepy - Building video checkpoints/Rainbow_CartPole-v1/step_7400/videos/Rainbow_CartPole-v1/7400/Rainbow_CartPole-v1-episode-189.mp4.
Moviepy - Writing video checkpoints/Rainbow_CartPole-v1/step_7400/videos/Rainbow_CartPole-v1/7400/Rainbow_CartPole-v1-episode-189.mp4



  axs[row][col].legend()


Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_CartPole-v1/step_7400/videos/Rainbow_CartPole-v1/7400/Rainbow_CartPole-v1-episode-189.mp4
score:  246.0
Training step: 7501/10000
Training step: 7601/10000
score:  302.0
score:  237.0
score:  184.0
score:  140.0
Moviepy - Building video checkpoints/Rainbow_CartPole-v1/step_7600/videos/Rainbow_CartPole-v1/7600/Rainbow_CartPole-v1-episode-194.mp4.
Moviepy - Writing video checkpoints/Rainbow_CartPole-v1/step_7600/videos/Rainbow_CartPole-v1/7600/Rainbow_CartPole-v1-episode-194.mp4



                                                              

Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_CartPole-v1/step_7600/videos/Rainbow_CartPole-v1/7600/Rainbow_CartPole-v1-episode-194.mp4
score:  126.0


  axs[row][col].legend()


Training step: 7701/10000
Training step: 7801/10000
score:  500.0
score:  500.0
score:  500.0
score:  257.0
Moviepy - Building video checkpoints/Rainbow_CartPole-v1/step_7800/videos/Rainbow_CartPole-v1/7800/Rainbow_CartPole-v1-episode-199.mp4.
Moviepy - Writing video checkpoints/Rainbow_CartPole-v1/step_7800/videos/Rainbow_CartPole-v1/7800/Rainbow_CartPole-v1-episode-199.mp4



  axs[row][col].legend()


Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_CartPole-v1/step_7800/videos/Rainbow_CartPole-v1/7800/Rainbow_CartPole-v1-episode-199.mp4
score:  160.0
Training step: 7901/10000
Training step: 8001/10000
score:  500.0
score:  197.0
score:  500.0
score:  500.0
Moviepy - Building video checkpoints/Rainbow_CartPole-v1/step_8000/videos/Rainbow_CartPole-v1/8000/Rainbow_CartPole-v1-episode-204.mp4.
Moviepy - Writing video checkpoints/Rainbow_CartPole-v1/step_8000/videos/Rainbow_CartPole-v1/8000/Rainbow_CartPole-v1-episode-204.mp4



  axs[row][col].legend()


Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_CartPole-v1/step_8000/videos/Rainbow_CartPole-v1/8000/Rainbow_CartPole-v1-episode-204.mp4
score:  500.0
Training step: 8101/10000
Training step: 8201/10000
score:  500.0
score:  500.0
score:  155.0
score:  329.0
Moviepy - Building video checkpoints/Rainbow_CartPole-v1/step_8200/videos/Rainbow_CartPole-v1/8200/Rainbow_CartPole-v1-episode-209.mp4.
Moviepy - Writing video checkpoints/Rainbow_CartPole-v1/step_8200/videos/Rainbow_CartPole-v1/8200/Rainbow_CartPole-v1-episode-209.mp4



  axs[row][col].legend()


Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_CartPole-v1/step_8200/videos/Rainbow_CartPole-v1/8200/Rainbow_CartPole-v1-episode-209.mp4
score:  115.0
Training step: 8301/10000
Training step: 8401/10000
score:  171.0
score:  178.0
score:  150.0
score:  154.0
Moviepy - Building video checkpoints/Rainbow_CartPole-v1/step_8400/videos/Rainbow_CartPole-v1/8400/Rainbow_CartPole-v1-episode-214.mp4.
Moviepy - Writing video checkpoints/Rainbow_CartPole-v1/step_8400/videos/Rainbow_CartPole-v1/8400/Rainbow_CartPole-v1-episode-214.mp4



  axs[row][col].legend()


Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_CartPole-v1/step_8400/videos/Rainbow_CartPole-v1/8400/Rainbow_CartPole-v1-episode-214.mp4
score:  170.0
Training step: 8501/10000
Training step: 8601/10000
score:  371.0
score:  160.0
score:  500.0
score:  205.0
Moviepy - Building video checkpoints/Rainbow_CartPole-v1/step_8600/videos/Rainbow_CartPole-v1/8600/Rainbow_CartPole-v1-episode-219.mp4.
Moviepy - Writing video checkpoints/Rainbow_CartPole-v1/step_8600/videos/Rainbow_CartPole-v1/8600/Rainbow_CartPole-v1-episode-219.mp4



                                                              

Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_CartPole-v1/step_8600/videos/Rainbow_CartPole-v1/8600/Rainbow_CartPole-v1-episode-219.mp4
score:  161.0


  axs[row][col].legend()


Training step: 8701/10000
Training step: 8801/10000
score:  153.0
score:  346.0
score:  234.0
score:  190.0
Moviepy - Building video checkpoints/Rainbow_CartPole-v1/step_8800/videos/Rainbow_CartPole-v1/8800/Rainbow_CartPole-v1-episode-224.mp4.
Moviepy - Writing video checkpoints/Rainbow_CartPole-v1/step_8800/videos/Rainbow_CartPole-v1/8800/Rainbow_CartPole-v1-episode-224.mp4



  axs[row][col].legend()


Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_CartPole-v1/step_8800/videos/Rainbow_CartPole-v1/8800/Rainbow_CartPole-v1-episode-224.mp4
score:  189.0
Training step: 8901/10000
Training step: 9001/10000
score:  173.0
score:  215.0
score:  175.0
score:  168.0
Moviepy - Building video checkpoints/Rainbow_CartPole-v1/step_9000/videos/Rainbow_CartPole-v1/9000/Rainbow_CartPole-v1-episode-229.mp4.
Moviepy - Writing video checkpoints/Rainbow_CartPole-v1/step_9000/videos/Rainbow_CartPole-v1/9000/Rainbow_CartPole-v1-episode-229.mp4



                                                              

Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_CartPole-v1/step_9000/videos/Rainbow_CartPole-v1/9000/Rainbow_CartPole-v1-episode-229.mp4
score:  151.0


  axs[row][col].legend()


Training step: 9101/10000
Training step: 9201/10000
score:  172.0
score:  181.0
score:  164.0
score:  160.0
Moviepy - Building video checkpoints/Rainbow_CartPole-v1/step_9200/videos/Rainbow_CartPole-v1/9200/Rainbow_CartPole-v1-episode-234.mp4.
Moviepy - Writing video checkpoints/Rainbow_CartPole-v1/step_9200/videos/Rainbow_CartPole-v1/9200/Rainbow_CartPole-v1-episode-234.mp4



  axs[row][col].legend()


Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_CartPole-v1/step_9200/videos/Rainbow_CartPole-v1/9200/Rainbow_CartPole-v1-episode-234.mp4
score:  201.0
Training step: 9301/10000
Training step: 9401/10000
score:  168.0
score:  169.0
score:  186.0
score:  179.0
Moviepy - Building video checkpoints/Rainbow_CartPole-v1/step_9400/videos/Rainbow_CartPole-v1/9400/Rainbow_CartPole-v1-episode-239.mp4.
Moviepy - Writing video checkpoints/Rainbow_CartPole-v1/step_9400/videos/Rainbow_CartPole-v1/9400/Rainbow_CartPole-v1-episode-239.mp4



  axs[row][col].legend()


Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_CartPole-v1/step_9400/videos/Rainbow_CartPole-v1/9400/Rainbow_CartPole-v1-episode-239.mp4
score:  190.0
Training step: 9501/10000
Training step: 9601/10000
score:  254.0
score:  269.0
score:  269.0
score:  274.0
Moviepy - Building video checkpoints/Rainbow_CartPole-v1/step_9600/videos/Rainbow_CartPole-v1/9600/Rainbow_CartPole-v1-episode-244.mp4.
Moviepy - Writing video checkpoints/Rainbow_CartPole-v1/step_9600/videos/Rainbow_CartPole-v1/9600/Rainbow_CartPole-v1-episode-244.mp4



  axs[row][col].legend()


Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_CartPole-v1/step_9600/videos/Rainbow_CartPole-v1/9600/Rainbow_CartPole-v1-episode-244.mp4
score:  260.0
Training step: 9701/10000
Training step: 9801/10000
score:  207.0
score:  184.0
score:  212.0
score:  206.0
Moviepy - Building video checkpoints/Rainbow_CartPole-v1/step_9800/videos/Rainbow_CartPole-v1/9800/Rainbow_CartPole-v1-episode-249.mp4.
Moviepy - Writing video checkpoints/Rainbow_CartPole-v1/step_9800/videos/Rainbow_CartPole-v1/9800/Rainbow_CartPole-v1-episode-249.mp4



  axs[row][col].legend()


Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_CartPole-v1/step_9800/videos/Rainbow_CartPole-v1/9800/Rainbow_CartPole-v1-episode-249.mp4
score:  215.0
Training step: 9901/10000
score:  169.0
score:  162.0
score:  170.0
score:  168.0
Moviepy - Building video checkpoints/Rainbow_CartPole-v1/step_10000/videos/Rainbow_CartPole-v1/10000/Rainbow_CartPole-v1-episode-254.mp4.
Moviepy - Writing video checkpoints/Rainbow_CartPole-v1/step_10000/videos/Rainbow_CartPole-v1/10000/Rainbow_CartPole-v1-episode-254.mp4



  axs[row][col].legend()


Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_CartPole-v1/step_10000/videos/Rainbow_CartPole-v1/10000/Rainbow_CartPole-v1-episode-254.mp4
score:  169.0


In [3]:
from rainbow_agent import RainbowAgent
import gymnasium as gym
import numpy as np
import tensorflow as tf
from hyperopt import hp

In [4]:
def create_search_space():
    search_space = {
        "activation": hp.choice(
            "activation",
            [
                "linear",
                "relu",
                # 'relu6',
                "sigmoid",
                "softplus",
                "soft_sign",
                "silu",
                "swish",
                "log_sigmoid",
                "hard_sigmoid",
                # 'hard_silu',
                # 'hard_swish',
                # 'hard_tanh',
                "elu",
                # 'celu',
                "selu",
                "gelu",
                # 'glu'
            ],
        ),
        "kernel_initializer": hp.choice(
            "kernel_initializer",
            [
                "he_uniform",
                "he_normal",
                "glorot_uniform",
                "glorot_normal",
                "lecun_uniform",
                "lecun_normal",
                "orthogonal",
                "variance_baseline",
                "variance_0.1",
                "variance_0.3",
                "variance_0.8",
                "variance_3",
                "variance_5",
                "variance_10",
            ],
        ),
        "optimizer": hp.choice(
            "optimizer", [tf.keras.optimizers.legacy.Adam]
        ),  # NO SGD OR RMSPROP FOR NOW SINCE IT IS FOR RAINBOW DQN
        "learning_rate": hp.choice(
            "learning_rate", [10, 5, 2, 1, 0.1, 0.01, 0.001, 0.0001, 0.00001]
        ),  #
        "adam_epsilon": hp.choice(
            "adam_epsilon",
            [1, 0.5, 0.3125, 0.03125, 0.003125, 0.0003125, 0.00003125, 0.000003125],
        ),
        "clipnorm": hp.choice("clipnorm", [None]),
        # NORMALIZATION?
        "soft_update": hp.choice(
            "soft_update", [False]
        ),  # seems to always be false, we can try it with tru
        "ema_beta": hp.uniform("ema_beta", 0.95, 0.999),
        "transfer_interval": hp.choice(
            "transfer_interval", [10, 25, 50, 100, 200, 400, 800, 1600, 2000]
        ),
        "replay_interval": hp.choice(
            "replay_interval", [1, 2, 3, 4, 5, 8, 10, 12, 350]
        ),
        "minibatch_size": hp.choice(
            "minibatch_size", [2**i for i in range(0, 8)]
        ),  ###########
        "replay_buffer_size": hp.choice(
            "replay_buffer_size",
            [2000, 3000, 5000, 7500, 10000, 15000, 20000, 25000, 50000],
        ),  #############
        "min_replay_buffer_size": hp.choice(
            "min_replay_buffer_size",
            [0, 125, 250, 375, 500, 625, 750, 875, 1000, 1500, 2000],
        ),  # 125, 250, 375, 500, 625, 750, 875, 1000, 1500, 2000
        "n_step": hp.choice("n_step", [1, 2, 3, 4, 5, 8, 10]),
        "discount_factor": hp.choice(
            "discount_factor", [0.1, 0.5, 0.9, 0.99, 0.995, 0.999]
        ),
        "atom_size": hp.choice("atom_size", [11, 21, 31, 41, 51, 61, 71, 81]),  #
        "conv_layers": hp.choice(
            "conv_layers", [[], [(32, 8, 4), (64, 4, 2), (64, 3, 1)]]
        ),
        "conv_layers_noisy": hp.choice("conv_layers_noisy", [False]),
        "width": hp.choice("width", [32, 64, 128, 256, 512, 1024]),
        "dense_layers": hp.choice("dense_layers", [0, 1, 2, 3, 4]),
        "dense_layers_noisy": hp.choice(
            "dense_layers_noisy", [True]
        ),  # i think this is always true for rainbow
        # REWARD CLIPPING
        "noisy_sigma": hp.choice("noisy_sigma", [0.5]),  #
        "loss_function": hp.choice(
            "loss_function",
            [tf.keras.losses.CategoricalCrossentropy(), tf.keras.losses.KLDivergence()],
        ),
        "dueling": hp.choice("dueling", [True]),
        "advantage_hidden_layers": hp.choice(
            "advantage_hidden_layers", [0, 1, 2, 3, 4]
        ),  #
        "value_hidden_layers": hp.choice("value_hidden_layers", [0, 1, 2, 3, 4]),  #
        "training_steps": hp.choice("training_steps", [30000]),
        "per_epsilon": hp.choice(
            "per_epsilon", [0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1]
        ),
        "per_alpha": hp.choice("per_alpha", [0.05 * i for i in range(0, 21)]),
        "per_beta": hp.choice("per_beta", [0.05 * i for i in range(1, 21)]),
        # 'per_beta_increase': hp.uniform('per_beta_increase', 0, 0.015),
        # 'search_max_depth': 5,
        # 'search_max_time': 10,
        "training_iterations": hp.choice("training_iterations", [1, 2, 3, 4, 5]),
        "num_minibatches": hp.choice("num_minibatches", [1, 2, 3, 4, 5]),
    }
    initial_best_config = [
        {
            "activation": 1,
            "kernel_initializer": 6,
            "optimizer": 0,  # NO SGD OR RMSPROP FOR NOW SINCE IT IS FOR RAINBOW DQN
            "learning_rate": 5,  #
            "adam_epsilon": 5,
            "clipnorm": 0,
            # NORMALIZATION?
            "soft_update": 0,  # seems to always be false, we can try it with tru
            "ema_beta": 0.95,
            "transfer_interval": 3,
            "replay_interval": 1,
            "minibatch_size": 7,
            "replay_buffer_size": 8,
            "min_replay_buffer_size": 4,
            "n_step": 2,
            "discount_factor": 3,
            "atom_size": 4,  #
            "conv_layers": 0,
            "conv_layers_noisy": 0,
            "width": 2,
            "dense_layers": 2,
            "dense_layers_noisy": 0,  # i think this is always true for rainbow
            # REWARD CLIPPING
            "noisy_sigma": 0,  #
            "loss_function": 0,
            "dueling": 0,
            "advantage_hidden_layers": 1,  #
            "value_hidden_layers": 1,  #
            "training_steps": 0,
            "per_epsilon": 3,
            "per_alpha": 10,
            "per_beta": 7,
            # 'per_beta_increase': hp.uniform('per_beta_increase', 0, 0.015),
            # 'search_max_depth': 5,
            # 'search_max_time': 10,
            "training_iterations": 1,
            "num_minibatches": 1,
        }
    ]

    return search_space, initial_best_config

In [5]:
from hyperopt import space_eval

search_sapce, initial_best_config = create_search_space()
config = space_eval(search_sapce, initial_best_config[0])
print(config)



In [6]:
env = gym.make("CartPole-v1", render_mode="rgb_array")

In [7]:
from agent_configs import RainbowConfig
from game_configs import CartPoleConfig

config = RainbowConfig(config, CartPoleConfig())

Using default save_intermediate_weights     : False
Using         training_steps                : 30000
Using         adam_epsilon                  : 0.0003125
Using default momentum                      : 0.9
Using         learning_rate                 : 0.01
Using         clipnorm                      : None
Using default weight_decay                  : 0.0
Using         loss_function                 : <keras.src.losses.losses.CategoricalCrossentropy object at 0x36262a8f0>
Using         activation                    : relu
Using         kernel_initializer            : orthogonal
Using         minibatch_size                : 128
Using         replay_buffer_size            : 50000
Using         min_replay_buffer_size        : 500
Using         num_minibatches               : 2
Using         training_iterations           : 2
Using default print_interval                : 100
RainbowConfig
Using default residual_layers               : []
Using         conv_layers                   : ()
Us

In [None]:
# train
agent = RainbowAgent(env, config, "RainbowDQN-{}".format(env.unwrapped.spec.id))
agent.checkpoint_interval = 10
agent.train()

num_actions:  2
float32
Resuming training at step 1 / 30000
replay buffer size: 0
filling replay buffer: 0 / (500)
filling replay buffer: 0 / (500)
filling replay buffer: 0 / (500)
filling replay buffer: 5 / (500)
filling replay buffer: 10 / (500)
filling replay buffer: 15 / (500)
filling replay buffer: 20 / (500)
filling replay buffer: 25 / (500)
filling replay buffer: 30 / (500)
filling replay buffer: 35 / (500)
filling replay buffer: 40 / (500)
filling replay buffer: 45 / (500)
filling replay buffer: 50 / (500)
filling replay buffer: 55 / (500)
filling replay buffer: 60 / (500)
filling replay buffer: 65 / (500)
filling replay buffer: 70 / (500)
filling replay buffer: 75 / (500)
filling replay buffer: 80 / (500)
filling replay buffer: 85 / (500)
filling replay buffer: 90 / (500)
filling replay buffer: 95 / (500)
filling replay buffer: 100 / (500)
filling replay buffer: 105 / (500)
filling replay buffer: 110 / (500)
filling replay buffer: 115 / (500)
filling replay buffer: 120 / (500)

  logger.warn(
  logger.warn(


AssertionError: Only KLDivergenceLoss and CategoricalCrossentropyLoss are supported for atom_size > 1, recieved <keras.src.losses.losses.CategoricalCrossentropy object at 0x36262a8f0>

: 

In [None]:
agent = RainbowAgent(env, config, "RainbowDQN-{}".format(env.unwrapped.spec.id))
agent.load_from_checkpoint("./checkpoints/RainbowDQN-CartPole-v1", 100)
agent.checkpoint_interval = 10
# print(agent.stats)
# print(agent.config)
# print(agent.replay_buffer.sample())
# print(agent.replay_buffer.beta)
agent.train()

In [None]:
import gymnasium as gym
import gym_anytrading
import tensorflow as tf

env = gym.make("forex-v0")
# env = gym.make('stocks-v0')

In [None]:
from gym_anytrading.datasets import FOREX_EURUSD_1H_ASK, STOCKS_GOOGL

custom_env = gym.make(
    "forex-v0",
    df=FOREX_EURUSD_1H_ASK,
    window_size=10,
    frame_bound=(10, 300),
    unit_side="right",
)

# custom_env = gym.make(
#     'stocks-v0',
#     df=STOCKS_GOOGL,
#     window_size=10,
#     frame_bound=(10, 300)
# )

In [None]:
print("env information:")
print("> shape:", env.unwrapped.shape)
print("> df.shape:", env.unwrapped.df.shape)
print("> prices.shape:", env.unwrapped.prices.shape)
print("> signal_features.shape:", env.unwrapped.signal_features.shape)
print("> max_possible_profit:", env.unwrapped.max_possible_profit())

print()
print("custom_env information:")
print("> shape:", custom_env.unwrapped.shape)
print("> df.shape:", custom_env.unwrapped.df.shape)
print("> prices.shape:", custom_env.unwrapped.prices.shape)
print("> signal_features.shape:", custom_env.unwrapped.signal_features.shape)
print("> max_possible_profit:", custom_env.unwrapped.max_possible_profit())

In [None]:
observation, info = env.reset()
env.render()

env = custom_env
observation, info = env.reset()
env.render()

In [None]:
from rainbow_agent import RainbowAgent
from agent_configs import RainbowConfig
from game_configs import CartPoleConfig
import gymnasium as gym
import sys

import torch
from utils import CategoricalCrossentropyLoss, KLDivergenceLoss
from utils.utils import HuberLoss

config_dict = {
    "activation": "relu",
    "kernel_initializer": "orthogonal",
    "min_replay_buffer_size": 32,
    "loss_function": KLDivergenceLoss,
    "learning_rate": 0.000001,
}
config = RainbowConfig(config_dict, CartPoleConfig())
# train
agent = RainbowAgent(env, config, "RainbowDQN-{}".format(env.unwrapped.spec.id))
agent.train()

NameError: name 'tf' is not defined

In [None]:
import gymnasium as gym
import sys

from utils import CategoricalCrossentropyLoss, KLDivergenceLoss

sys.path.append("../..")
from dqn.rainbow.rainbow_agent import RainbowAgent
from agent_configs import RainbowConfig
from game_configs import AtariConfig
from gymnasium.wrappers import AtariPreprocessing, FrameStack
import numpy as np

config_dict = {
    "conv_layers": [
        (32, 8, 4),
        (64, 4, 2),
        (64, 3, 1),
    ],
    "dense_layers_widths": [512],
    "value_hidden_layers_widths": [],  #
    "advatage_hidden_layers_widths": [],  #
    "adam_epsilon": 1.5e-4,
    "learning_rate": 0.00025 / 4,
    "training_steps": 500000,  # 50000000 Agent saw 200,000,000 frames
    "per_epsilon": 1e-6,  #
    "per_alpha": 0.5,
    "per_beta": 0.4,
    "minibatch_size": 32,
    "replay_buffer_size": 750000,  # 1000000
    "min_replay_buffer_size": 80000,
    "transfer_interval": 32000,
    "n_step": 3,
    "kernel_initializer": "orthogonal",  #
    "loss_function": KLDivergenceLoss(),
    "clipnorm": 0.0,  #
    "discount_factor": 0.99,
    "atom_size": 51,
    "replay_interval": 4,
}
game_config = AtariConfig()
config = RainbowConfig(config_dict, game_config)


class ClipReward(gym.RewardWrapper):
    def __init__(self, env, min_reward, max_reward):
        super().__init__(env)
        self.min_reward = min_reward
        self.max_reward = max_reward
        self.reward_range = (min_reward, max_reward)

    def reward(self, reward):
        return np.clip(reward, self.min_reward, self.max_reward)


env = gym.make(
    "MsPacmanNoFrameskip-v4", render_mode="rgb_array", max_episode_steps=108000
)
env = AtariPreprocessing(
    env
)  # terminal_on_life_loss=True we will need to change the resetting to check if lives is 0 instead of just checking if done
env = FrameStack(env, 4)
agent = RainbowAgent(env, config, name="Rainbow_Atari_MsPacmanNoFrameskip-v4")
agent.checkpoint_interval = 100
agent.train()

Using default save_intermediate_weights     : False
Using         training_steps                : 500000
Using         adam_epsilon                  : 0.00015
Using default momentum                      : 0.9
Using         learning_rate                 : 6.25e-05
Using         clipnorm                      : 0.0
Using default optimizer                     : <class 'torch.optim.adam.Adam'>
Using default weight_decay                  : 0.0
Using         loss_function                 : <utils.utils.KLDivergenceLoss object at 0x107921f00>
Using default activation                    : relu
Using         kernel_initializer            : orthogonal
Using         minibatch_size                : 32
Using         replay_buffer_size            : 750000
Using         min_replay_buffer_size        : 80000
Using default num_minibatches               : 1
Using default training_iterations           : 1
RainbowConfig
Using default residual_layers               : []
Using         conv_layers             

A.L.E: Arcade Learning Environment (version 0.8.1+53f58b7)
[Powered by Stella]
  logger.warn(


observation_dimensions:  (4, 84, 84)
num_actions:  9
uint8
filling replay buffer 0
filling replay buffer 1
filling replay buffer 2
filling replay buffer 3
filling replay buffer 4
filling replay buffer 5
filling replay buffer 6
filling replay buffer 7
filling replay buffer 8
filling replay buffer 9
filling replay buffer 10
filling replay buffer 11
filling replay buffer 12
filling replay buffer 13
filling replay buffer 14
filling replay buffer 15
filling replay buffer 16
filling replay buffer 17
filling replay buffer 18
filling replay buffer 19
filling replay buffer 20
filling replay buffer 21
filling replay buffer 22
filling replay buffer 23
filling replay buffer 24
filling replay buffer 25
filling replay buffer 26
filling replay buffer 27
filling replay buffer 28
filling replay buffer 29
filling replay buffer 30
filling replay buffer 31
filling replay buffer 32
filling replay buffer 33
filling replay buffer 34
filling replay buffer 35
filling replay buffer 36
filling replay buffer 37
f

  logger.warn(


score:  210.0
Moviepy - Building video checkpoints/Rainbow_Atari_MsPacmanNoFrameskip-v4/step_100/videos/Rainbow_Atari_MsPacmanNoFrameskip-v4/100/Rainbow_Atari_MsPacmanNoFrameskip-v4-episode-4.mp4.
Moviepy - Writing video checkpoints/Rainbow_Atari_MsPacmanNoFrameskip-v4/step_100/videos/Rainbow_Atari_MsPacmanNoFrameskip-v4/100/Rainbow_Atari_MsPacmanNoFrameskip-v4-episode-4.mp4



                                                                

Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_Atari_MsPacmanNoFrameskip-v4/step_100/videos/Rainbow_Atari_MsPacmanNoFrameskip-v4/100/Rainbow_Atari_MsPacmanNoFrameskip-v4-episode-4.mp4
score:  210.0
{'score': [{'score': 290.0, 'target_model_updated': True}], 'loss': [{'loss': 1.1116777658462524, 'target_model_updated': False}, {'loss': 3.9132351875305176, 'target_model_updated': True}, {'loss': 4.821791648864746, 'target_model_updated': False}, {'loss': 3.1255199909210205, 'target_model_updated': False}, {'loss': 4.30369758605957, 'target_model_updated': False}, {'loss': 4.144717693328857, 'target_model_updated': False}, {'loss': 2.7225661277770996, 'target_model_updated': False}, {'loss': 3.9697070121765137, 'target_model_updated': False}, {'loss': 3.351067543029785, 'target_model_updated': False}, {'loss': 1.7300931215286255, 'target_model_updated': False}, {'loss': 2.247039794921875, 'target_model_updated': False}, {'loss': 2.144829511642456, 'target_model_updated': False

  axs[row][col].set_xlim(1, len(values))
  axs[row][col].legend()
  axs[row][col].legend()


training step 101
training step 102
training step 103
training step 104
training step 105
training step 106
training step 107
training step 108
training step 109
training step 110
training step 111
training step 112
training step 113
training step 114
training step 115
training step 116
training step 117
training step 118
training step 119
training step 120
training step 121
training step 122
training step 123
training step 124
training step 125
training step 126
training step 127
training step 128
training step 129
training step 130
training step 131
training step 132
training step 133
training step 134
training step 135
training step 136
training step 137
training step 138
training step 139
training step 140
training step 141
training step 142
training step 143
training step 144
training step 145
training step 146
training step 147
training step 148
training step 149
training step 150
training step 151
training step 152
training step 153
training step 154
training step 155
training s

                                                                

Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_Atari_MsPacmanNoFrameskip-v4/step_200/videos/Rainbow_Atari_MsPacmanNoFrameskip-v4/200/Rainbow_Atari_MsPacmanNoFrameskip-v4-episode-9.mp4
score:  210.0
{'score': [{'score': 290.0, 'target_model_updated': True}], 'loss': [{'loss': 1.1116777658462524, 'target_model_updated': False}, {'loss': 3.9132351875305176, 'target_model_updated': True}, {'loss': 4.821791648864746, 'target_model_updated': False}, {'loss': 3.1255199909210205, 'target_model_updated': False}, {'loss': 4.30369758605957, 'target_model_updated': False}, {'loss': 4.144717693328857, 'target_model_updated': False}, {'loss': 2.7225661277770996, 'target_model_updated': False}, {'loss': 3.9697070121765137, 'target_model_updated': False}, {'loss': 3.351067543029785, 'target_model_updated': False}, {'loss': 1.7300931215286255, 'target_model_updated': False}, {'loss': 2.247039794921875, 'target_model_updated': False}, {'loss': 2.144829511642456, 'target_model_updated': False

  axs[row][col].set_xlim(1, len(values))
  axs[row][col].legend()
  axs[row][col].legend()


training step 201
training step 202
training step 203
training step 204
training step 205
training step 206
training step 207
training step 208
training step 209
training step 210
training step 211
training step 212
training step 213
training step 214
training step 215
training step 216
training step 217
training step 218
training step 219
training step 220
training step 221
training step 222
training step 223
training step 224
training step 225
training step 226
training step 227
training step 228
training step 229
training step 230
training step 231
training step 232
training step 233
training step 234
training step 235
training step 236
training step 237
training step 238
training step 239
training step 240
training step 241
training step 242
training step 243
training step 244
training step 245
training step 246
training step 247
training step 248
training step 249
training step 250
training step 251
training step 252
training step 253
training step 254
training step 255
training s

                                                                

Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_Atari_MsPacmanNoFrameskip-v4/step_300/videos/Rainbow_Atari_MsPacmanNoFrameskip-v4/300/Rainbow_Atari_MsPacmanNoFrameskip-v4-episode-14.mp4
score:  210.0
{'score': [{'score': 290.0, 'target_model_updated': True}, {'score': 340.0, 'target_model_updated': False}], 'loss': [{'loss': 1.1116777658462524, 'target_model_updated': False}, {'loss': 3.9132351875305176, 'target_model_updated': True}, {'loss': 4.821791648864746, 'target_model_updated': False}, {'loss': 3.1255199909210205, 'target_model_updated': False}, {'loss': 4.30369758605957, 'target_model_updated': False}, {'loss': 4.144717693328857, 'target_model_updated': False}, {'loss': 2.7225661277770996, 'target_model_updated': False}, {'loss': 3.9697070121765137, 'target_model_updated': False}, {'loss': 3.351067543029785, 'target_model_updated': False}, {'loss': 1.7300931215286255, 'target_model_updated': False}, {'loss': 2.247039794921875, 'target_model_updated': False}, {'loss'

  axs[row][col].legend()


training step 301
training step 302
training step 303
training step 304
training step 305
training step 306
training step 307
training step 308
training step 309
training step 310
training step 311
training step 312
training step 313
training step 314
training step 315
training step 316
training step 317
training step 318
training step 319
training step 320
training step 321
training step 322
training step 323
training step 324
training step 325
training step 326
training step 327
training step 328
training step 329
training step 330
training step 331
training step 332
training step 333
training step 334
training step 335
training step 336
training step 337
training step 338
training step 339
training step 340
training step 341
training step 342
training step 343
training step 344
training step 345
training step 346
training step 347
training step 348
training step 349
training step 350
training step 351
training step 352
training step 353
training step 354
training step 355
training s

                                                                

Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_Atari_MsPacmanNoFrameskip-v4/step_400/videos/Rainbow_Atari_MsPacmanNoFrameskip-v4/400/Rainbow_Atari_MsPacmanNoFrameskip-v4-episode-19.mp4
score:  210.0
{'score': [{'score': 290.0, 'target_model_updated': True}, {'score': 340.0, 'target_model_updated': False}, {'score': 420.0, 'target_model_updated': False}], 'loss': [{'loss': 1.1116777658462524, 'target_model_updated': False}, {'loss': 3.9132351875305176, 'target_model_updated': True}, {'loss': 4.821791648864746, 'target_model_updated': False}, {'loss': 3.1255199909210205, 'target_model_updated': False}, {'loss': 4.30369758605957, 'target_model_updated': False}, {'loss': 4.144717693328857, 'target_model_updated': False}, {'loss': 2.7225661277770996, 'target_model_updated': False}, {'loss': 3.9697070121765137, 'target_model_updated': False}, {'loss': 3.351067543029785, 'target_model_updated': False}, {'loss': 1.7300931215286255, 'target_model_updated': False}, {'loss': 2.2470397

  axs[row][col].legend()


training step 401
training step 402
training step 403
training step 404
training step 405
training step 406
training step 407
training step 408
training step 409
training step 410
training step 411
training step 412
training step 413
training step 414
training step 415
training step 416
training step 417
training step 418
training step 419
training step 420
training step 421
training step 422
training step 423
training step 424
training step 425
training step 426
training step 427
training step 428
training step 429
training step 430
training step 431
training step 432
training step 433
training step 434
training step 435
training step 436
training step 437
training step 438
training step 439
training step 440
training step 441
training step 442
training step 443
training step 444
training step 445
training step 446
training step 447
training step 448
training step 449
training step 450
training step 451
training step 452
training step 453
training step 454
training step 455
training s

                                                                

Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_Atari_MsPacmanNoFrameskip-v4/step_500/videos/Rainbow_Atari_MsPacmanNoFrameskip-v4/500/Rainbow_Atari_MsPacmanNoFrameskip-v4-episode-24.mp4
score:  210.0
{'score': [{'score': 290.0, 'target_model_updated': True}, {'score': 340.0, 'target_model_updated': False}, {'score': 420.0, 'target_model_updated': False}, {'score': 450.0, 'target_model_updated': False}], 'loss': [{'loss': 1.1116777658462524, 'target_model_updated': False}, {'loss': 3.9132351875305176, 'target_model_updated': True}, {'loss': 4.821791648864746, 'target_model_updated': False}, {'loss': 3.1255199909210205, 'target_model_updated': False}, {'loss': 4.30369758605957, 'target_model_updated': False}, {'loss': 4.144717693328857, 'target_model_updated': False}, {'loss': 2.7225661277770996, 'target_model_updated': False}, {'loss': 3.9697070121765137, 'target_model_updated': False}, {'loss': 3.351067543029785, 'target_model_updated': False}, {'loss': 1.7300931215286255, '

  axs[row][col].legend()


training step 501
training step 502
training step 503
training step 504
training step 505
training step 506
training step 507
training step 508
training step 509
training step 510
training step 511
training step 512
training step 513
training step 514
training step 515
training step 516
training step 517
training step 518
training step 519
training step 520
training step 521
training step 522
training step 523
training step 524
training step 525
training step 526
training step 527
training step 528
training step 529
training step 530
training step 531
training step 532
training step 533
training step 534
training step 535
training step 536
training step 537
training step 538
training step 539
training step 540
training step 541
training step 542
training step 543
training step 544
training step 545
training step 546
training step 547
training step 548
training step 549
training step 550
training step 551
training step 552
training step 553
training step 554
training step 555
training s

                                                                

Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_Atari_MsPacmanNoFrameskip-v4/step_600/videos/Rainbow_Atari_MsPacmanNoFrameskip-v4/600/Rainbow_Atari_MsPacmanNoFrameskip-v4-episode-29.mp4
score:  210.0
{'score': [{'score': 290.0, 'target_model_updated': True}, {'score': 340.0, 'target_model_updated': False}, {'score': 420.0, 'target_model_updated': False}, {'score': 450.0, 'target_model_updated': False}], 'loss': [{'loss': 1.1116777658462524, 'target_model_updated': False}, {'loss': 3.9132351875305176, 'target_model_updated': True}, {'loss': 4.821791648864746, 'target_model_updated': False}, {'loss': 3.1255199909210205, 'target_model_updated': False}, {'loss': 4.30369758605957, 'target_model_updated': False}, {'loss': 4.144717693328857, 'target_model_updated': False}, {'loss': 2.7225661277770996, 'target_model_updated': False}, {'loss': 3.9697070121765137, 'target_model_updated': False}, {'loss': 3.351067543029785, 'target_model_updated': False}, {'loss': 1.7300931215286255, '

  axs[row][col].legend()


training step 601
training step 602
training step 603
training step 604
training step 605
training step 606
training step 607
training step 608
training step 609
training step 610
training step 611
training step 612
training step 613
training step 614
training step 615
training step 616
training step 617
training step 618
training step 619
training step 620
training step 621
training step 622
training step 623
training step 624
training step 625
training step 626
training step 627
training step 628
training step 629
training step 630
training step 631
training step 632
training step 633
training step 634
training step 635
training step 636
training step 637
training step 638
training step 639
training step 640
training step 641
training step 642
training step 643
training step 644
training step 645
training step 646
training step 647
training step 648
training step 649
training step 650
training step 651
training step 652
training step 653
training step 654
training step 655
training s

                                                                

Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_Atari_MsPacmanNoFrameskip-v4/step_700/videos/Rainbow_Atari_MsPacmanNoFrameskip-v4/700/Rainbow_Atari_MsPacmanNoFrameskip-v4-episode-34.mp4
score:  210.0
{'score': [{'score': 290.0, 'target_model_updated': True}, {'score': 340.0, 'target_model_updated': False}, {'score': 420.0, 'target_model_updated': False}, {'score': 450.0, 'target_model_updated': False}, {'score': 610.0, 'target_model_updated': False}], 'loss': [{'loss': 1.1116777658462524, 'target_model_updated': False}, {'loss': 3.9132351875305176, 'target_model_updated': True}, {'loss': 4.821791648864746, 'target_model_updated': False}, {'loss': 3.1255199909210205, 'target_model_updated': False}, {'loss': 4.30369758605957, 'target_model_updated': False}, {'loss': 4.144717693328857, 'target_model_updated': False}, {'loss': 2.7225661277770996, 'target_model_updated': False}, {'loss': 3.9697070121765137, 'target_model_updated': False}, {'loss': 3.351067543029785, 'target_model

  axs[row][col].legend()


training step 701
training step 702
training step 703
training step 704
training step 705
training step 706
training step 707
training step 708
training step 709
training step 710
training step 711
training step 712
training step 713
training step 714
training step 715
training step 716
training step 717
training step 718
training step 719
training step 720
training step 721
training step 722
training step 723
training step 724
training step 725
training step 726
training step 727
training step 728
training step 729
training step 730
training step 731
training step 732
training step 733
training step 734
training step 735
training step 736
training step 737
training step 738
training step 739
training step 740
training step 741
training step 742
training step 743
training step 744
training step 745
training step 746
training step 747
training step 748
training step 749
training step 750
training step 751
training step 752
training step 753
training step 754
training step 755
training s

                                                                

Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_Atari_MsPacmanNoFrameskip-v4/step_800/videos/Rainbow_Atari_MsPacmanNoFrameskip-v4/800/Rainbow_Atari_MsPacmanNoFrameskip-v4-episode-39.mp4
score:  210.0
{'score': [{'score': 290.0, 'target_model_updated': True}, {'score': 340.0, 'target_model_updated': False}, {'score': 420.0, 'target_model_updated': False}, {'score': 450.0, 'target_model_updated': False}, {'score': 610.0, 'target_model_updated': False}], 'loss': [{'loss': 1.1116777658462524, 'target_model_updated': False}, {'loss': 3.9132351875305176, 'target_model_updated': True}, {'loss': 4.821791648864746, 'target_model_updated': False}, {'loss': 3.1255199909210205, 'target_model_updated': False}, {'loss': 4.30369758605957, 'target_model_updated': False}, {'loss': 4.144717693328857, 'target_model_updated': False}, {'loss': 2.7225661277770996, 'target_model_updated': False}, {'loss': 3.9697070121765137, 'target_model_updated': False}, {'loss': 3.351067543029785, 'target_model

  axs[row][col].legend()


training step 801
training step 802
training step 803
training step 804
training step 805
training step 806
training step 807
training step 808
training step 809
training step 810
training step 811
training step 812
training step 813
training step 814
training step 815
training step 816
training step 817
training step 818
training step 819
training step 820
training step 821
training step 822
training step 823
training step 824
training step 825
training step 826
training step 827
training step 828
training step 829
training step 830
training step 831
training step 832
training step 833
training step 834
training step 835
training step 836
training step 837
training step 838
training step 839
training step 840
training step 841
training step 842
training step 843
training step 844
training step 845
training step 846
training step 847
training step 848
training step 849
training step 850
training step 851
training step 852
training step 853
training step 854
training step 855
training s

                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_Atari_MsPacmanNoFrameskip-v4/step_900/videos/Rainbow_Atari_MsPacmanNoFrameskip-v4/900/Rainbow_Atari_MsPacmanNoFrameskip-v4-episode-44.mp4
score:  210.0
{'score': [{'score': 290.0, 'target_model_updated': True}, {'score': 340.0, 'target_model_updated': False}, {'score': 420.0, 'target_model_updated': False}, {'score': 450.0, 'target_model_updated': False}, {'score': 610.0, 'target_model_updated': False}, {'score': 1000.0, 'target_model_updated': False}], 'loss': [{'loss': 1.1116777658462524, 'target_model_updated': False}, {'loss': 3.9132351875305176, 'target_model_updated': True}, {'loss': 4.821791648864746, 'target_model_updated': False}, {'loss': 3.1255199909210205, 'target_model_updated': False}, {'loss': 4.30369758605957, 'target_model_updated': False}, {'loss': 4.144717693328857, 'target_model_updated': False}, {'loss': 2.7225661277770996, 'target_model_updated': False}, {'loss': 3.9697070121765137, 'target_model_updated':

  axs[row][col].legend()


training step 901
training step 902
training step 903
training step 904
training step 905
training step 906
training step 907
training step 908
training step 909
training step 910
training step 911
training step 912
training step 913
training step 914
training step 915
training step 916
training step 917
training step 918
training step 919
training step 920
training step 921
training step 922
training step 923
training step 924
training step 925
training step 926
training step 927
training step 928
training step 929
training step 930
training step 931
training step 932
training step 933
training step 934
training step 935
training step 936
training step 937
training step 938
training step 939
training step 940
training step 941
training step 942
training step 943
training step 944
training step 945
training step 946
training step 947
training step 948
training step 949
training step 950
training step 951
training step 952
training step 953
training step 954
training step 955
training s

                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_Atari_MsPacmanNoFrameskip-v4/step_1000/videos/Rainbow_Atari_MsPacmanNoFrameskip-v4/1000/Rainbow_Atari_MsPacmanNoFrameskip-v4-episode-49.mp4
score:  60.0
{'score': [{'score': 290.0, 'target_model_updated': True}, {'score': 340.0, 'target_model_updated': False}, {'score': 420.0, 'target_model_updated': False}, {'score': 450.0, 'target_model_updated': False}, {'score': 610.0, 'target_model_updated': False}, {'score': 1000.0, 'target_model_updated': False}, {'score': 610.0, 'target_model_updated': False}], 'loss': [{'loss': 1.1116777658462524, 'target_model_updated': False}, {'loss': 3.9132351875305176, 'target_model_updated': True}, {'loss': 4.821791648864746, 'target_model_updated': False}, {'loss': 3.1255199909210205, 'target_model_updated': False}, {'loss': 4.30369758605957, 'target_model_updated': False}, {'loss': 4.144717693328857, 'target_model_updated': False}, {'loss': 2.7225661277770996, 'target_model_updated': False}, {'

  axs[row][col].legend()


[{'score': 210.0, 'max_score': 210.0, 'min_score': 210.0}, {'score': 210.0, 'max_score': 210.0, 'min_score': 210.0}, {'score': 210.0, 'max_score': 210.0, 'min_score': 210.0}, {'score': 210.0, 'max_score': 210.0, 'min_score': 210.0}, {'score': 210.0, 'max_score': 210.0, 'min_score': 210.0}, {'score': 210.0, 'max_score': 210.0, 'min_score': 210.0}, {'score': 210.0, 'max_score': 210.0, 'min_score': 210.0}, {'score': 210.0, 'max_score': 210.0, 'min_score': 210.0}, {'score': 210.0, 'max_score': 210.0, 'min_score': 210.0}, {'score': 60.0, 'max_score': 60.0, 'min_score': 60.0}]
training step 1001
training step 1002
training step 1003
training step 1004
training step 1005
training step 1006
training step 1007
training step 1008
training step 1009
training step 1010
training step 1011
training step 1012
training step 1013
training step 1014
training step 1015
training step 1016
training step 1017
training step 1018
training step 1019
training step 1020
training step 1021
training step 1022
trai

                                                                

Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_Atari_MsPacmanNoFrameskip-v4/step_1100/videos/Rainbow_Atari_MsPacmanNoFrameskip-v4/1100/Rainbow_Atari_MsPacmanNoFrameskip-v4-episode-54.mp4
score:  210.0
{'score': [{'score': 290.0, 'target_model_updated': True}, {'score': 340.0, 'target_model_updated': False}, {'score': 420.0, 'target_model_updated': False}, {'score': 450.0, 'target_model_updated': False}, {'score': 610.0, 'target_model_updated': False}, {'score': 1000.0, 'target_model_updated': False}, {'score': 610.0, 'target_model_updated': False}], 'loss': [{'loss': 1.1116777658462524, 'target_model_updated': False}, {'loss': 3.9132351875305176, 'target_model_updated': True}, {'loss': 4.821791648864746, 'target_model_updated': False}, {'loss': 3.1255199909210205, 'target_model_updated': False}, {'loss': 4.30369758605957, 'target_model_updated': False}, {'loss': 4.144717693328857, 'target_model_updated': False}, {'loss': 2.7225661277770996, 'target_model_updated': False}, {

  axs[row][col].legend()


training step 1101
training step 1102
training step 1103
training step 1104
training step 1105
training step 1106
training step 1107
training step 1108
training step 1109
training step 1110
training step 1111
training step 1112
training step 1113
training step 1114
training step 1115
training step 1116
training step 1117
training step 1118
training step 1119
training step 1120
training step 1121
training step 1122
training step 1123
training step 1124
training step 1125
training step 1126
training step 1127
training step 1128
training step 1129
training step 1130
training step 1131
training step 1132
training step 1133
training step 1134
training step 1135
training step 1136
training step 1137
training step 1138
training step 1139
training step 1140
training step 1141
training step 1142
training step 1143
training step 1144
training step 1145
training step 1146
training step 1147
training step 1148
training step 1149
training step 1150
training step 1151
training step 1152
training ste

                                                                

Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_Atari_MsPacmanNoFrameskip-v4/step_1200/videos/Rainbow_Atari_MsPacmanNoFrameskip-v4/1200/Rainbow_Atari_MsPacmanNoFrameskip-v4-episode-59.mp4
score:  210.0
{'score': [{'score': 290.0, 'target_model_updated': True}, {'score': 340.0, 'target_model_updated': False}, {'score': 420.0, 'target_model_updated': False}, {'score': 450.0, 'target_model_updated': False}, {'score': 610.0, 'target_model_updated': False}, {'score': 1000.0, 'target_model_updated': False}, {'score': 610.0, 'target_model_updated': False}, {'score': 250.0, 'target_model_updated': False}], 'loss': [{'loss': 1.1116777658462524, 'target_model_updated': False}, {'loss': 3.9132351875305176, 'target_model_updated': True}, {'loss': 4.821791648864746, 'target_model_updated': False}, {'loss': 3.1255199909210205, 'target_model_updated': False}, {'loss': 4.30369758605957, 'target_model_updated': False}, {'loss': 4.144717693328857, 'target_model_updated': False}, {'loss': 2.72

  axs[row][col].legend()


training step 1201
training step 1202
training step 1203
training step 1204
training step 1205
training step 1206
training step 1207
training step 1208
training step 1209
training step 1210
training step 1211
training step 1212
training step 1213
training step 1214
training step 1215
training step 1216
training step 1217
training step 1218
training step 1219
training step 1220
training step 1221
training step 1222
training step 1223
training step 1224
training step 1225
training step 1226
training step 1227
training step 1228
training step 1229
training step 1230
training step 1231
training step 1232
training step 1233
training step 1234
training step 1235
training step 1236
training step 1237
training step 1238
training step 1239
training step 1240
training step 1241
training step 1242
training step 1243
training step 1244
training step 1245
training step 1246
training step 1247
training step 1248
training step 1249
training step 1250
training step 1251
training step 1252
training ste

                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_Atari_MsPacmanNoFrameskip-v4/step_1300/videos/Rainbow_Atari_MsPacmanNoFrameskip-v4/1300/Rainbow_Atari_MsPacmanNoFrameskip-v4-episode-64.mp4
score:  120.0
{'score': [{'score': 290.0, 'target_model_updated': True}, {'score': 340.0, 'target_model_updated': False}, {'score': 420.0, 'target_model_updated': False}, {'score': 450.0, 'target_model_updated': False}, {'score': 610.0, 'target_model_updated': False}, {'score': 1000.0, 'target_model_updated': False}, {'score': 610.0, 'target_model_updated': False}, {'score': 250.0, 'target_model_updated': False}, {'score': 350.0, 'target_model_updated': False}], 'loss': [{'loss': 1.1116777658462524, 'target_model_updated': False}, {'loss': 3.9132351875305176, 'target_model_updated': True}, {'loss': 4.821791648864746, 'target_model_updated': False}, {'loss': 3.1255199909210205, 'target_model_updated': False}, {'loss': 4.30369758605957, 'target_model_updated': False}, {'loss': 4.1447176933288

  axs[row][col].legend()


training step 1301
training step 1302
training step 1303
training step 1304
training step 1305
training step 1306
training step 1307
training step 1308
training step 1309
training step 1310
training step 1311
training step 1312
training step 1313
training step 1314
training step 1315
training step 1316
training step 1317
training step 1318
training step 1319
training step 1320
training step 1321
training step 1322
training step 1323
training step 1324
training step 1325
training step 1326
training step 1327
training step 1328
training step 1329
training step 1330
training step 1331
training step 1332
training step 1333
training step 1334
training step 1335
training step 1336
training step 1337
training step 1338
training step 1339
training step 1340
training step 1341
training step 1342
training step 1343
training step 1344
training step 1345
training step 1346
training step 1347
training step 1348
training step 1349
training step 1350
training step 1351
training step 1352
training ste

                                                                

Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_Atari_MsPacmanNoFrameskip-v4/step_1400/videos/Rainbow_Atari_MsPacmanNoFrameskip-v4/1400/Rainbow_Atari_MsPacmanNoFrameskip-v4-episode-69.mp4
score:  210.0
{'score': [{'score': 290.0, 'target_model_updated': True}, {'score': 340.0, 'target_model_updated': False}, {'score': 420.0, 'target_model_updated': False}, {'score': 450.0, 'target_model_updated': False}, {'score': 610.0, 'target_model_updated': False}, {'score': 1000.0, 'target_model_updated': False}, {'score': 610.0, 'target_model_updated': False}, {'score': 250.0, 'target_model_updated': False}, {'score': 350.0, 'target_model_updated': False}, {'score': 420.0, 'target_model_updated': False}], 'loss': [{'loss': 1.1116777658462524, 'target_model_updated': False}, {'loss': 3.9132351875305176, 'target_model_updated': True}, {'loss': 4.821791648864746, 'target_model_updated': False}, {'loss': 3.1255199909210205, 'target_model_updated': False}, {'loss': 4.30369758605957, 'target

  axs[row][col].legend()


training step 1401
training step 1402
training step 1403
training step 1404
training step 1405
training step 1406
training step 1407
training step 1408
training step 1409
training step 1410
training step 1411
training step 1412
training step 1413
training step 1414
training step 1415
training step 1416
training step 1417
training step 1418
training step 1419
training step 1420
training step 1421
training step 1422
training step 1423
training step 1424
training step 1425
training step 1426
training step 1427
training step 1428
training step 1429
training step 1430
training step 1431
training step 1432
training step 1433
training step 1434
training step 1435
training step 1436
training step 1437
training step 1438
training step 1439
training step 1440
training step 1441
training step 1442
training step 1443
training step 1444
training step 1445
training step 1446
training step 1447
training step 1448
training step 1449
training step 1450
training step 1451
training step 1452
training ste

                                                                

Moviepy - Done !
Moviepy - video ready checkpoints/Rainbow_Atari_MsPacmanNoFrameskip-v4/step_1500/videos/Rainbow_Atari_MsPacmanNoFrameskip-v4/1500/Rainbow_Atari_MsPacmanNoFrameskip-v4-episode-74.mp4
score:  210.0
{'score': [{'score': 290.0, 'target_model_updated': True}, {'score': 340.0, 'target_model_updated': False}, {'score': 420.0, 'target_model_updated': False}, {'score': 450.0, 'target_model_updated': False}, {'score': 610.0, 'target_model_updated': False}, {'score': 1000.0, 'target_model_updated': False}, {'score': 610.0, 'target_model_updated': False}, {'score': 250.0, 'target_model_updated': False}, {'score': 350.0, 'target_model_updated': False}, {'score': 420.0, 'target_model_updated': False}], 'loss': [{'loss': 1.1116777658462524, 'target_model_updated': False}, {'loss': 3.9132351875305176, 'target_model_updated': True}, {'loss': 4.821791648864746, 'target_model_updated': False}, {'loss': 3.1255199909210205, 'target_model_updated': False}, {'loss': 4.30369758605957, 'target

  axs[row][col].legend()


training step 1501
training step 1502
training step 1503
training step 1504
training step 1505
training step 1506
training step 1507


KeyboardInterrupt: 

Using default save_intermediate_weights     : False
Using         training_steps                : 500000
Using         adam_epsilon                  : 0.00015
Using default momentum                      : 0.9
Using         learning_rate                 : 6.25e-05
Using         clipnorm                      : 0.0
Using default optimizer                     : <class 'torch.optim.adam.Adam'>
Using default weight_decay                  : 0.0
Using         loss_function                 : <__main__.KLDivergenceLoss object at 0x103844e80>
Using default activation                    : relu
Using         kernel_initializer            : orthogonal
Using         minibatch_size                : 32
Using         replay_buffer_size            : 750000
Using         min_replay_buffer_size        : 80000
Using default num_minibatches               : 1
Using default training_iterations           : 1
RainbowConfig
Using default residual_layers               : []
Using         conv_layers                

  logger.warn(


uint8
filling replay buffer 0
filling replay buffer 1
filling replay buffer 2
filling replay buffer 3
filling replay buffer 4
filling replay buffer 5
filling replay buffer 6
filling replay buffer 7
filling replay buffer 8
filling replay buffer 9
filling replay buffer 10
filling replay buffer 11
filling replay buffer 12
filling replay buffer 13
filling replay buffer 14
filling replay buffer 15
filling replay buffer 16
filling replay buffer 17
filling replay buffer 18
filling replay buffer 19
filling replay buffer 20
filling replay buffer 21
filling replay buffer 22
filling replay buffer 23
filling replay buffer 24
filling replay buffer 25
filling replay buffer 26
filling replay buffer 27
filling replay buffer 28
filling replay buffer 29
filling replay buffer 30
filling replay buffer 31
filling replay buffer 32
filling replay buffer 33
filling replay buffer 34
filling replay buffer 35
filling replay buffer 36
filling replay buffer 37
filling replay buffer 38
filling replay buffer 39
fill

KeyboardInterrupt: 