In [9]:
import gymnasium as gym
from gymnasium.wrappers import AtariPreprocessing, FrameStack
import numpy as np
import sys
sys.path.append('../..')
from dqn.rainbow.rainbow_agent import RainbowAgent
from game_configs import AtariConfig, CartPoleConfig
from agent_configs import RainbowConfig
import random
import torch


In [10]:
class ClipReward(gym.RewardWrapper):
    def __init__(self, env, min_reward, max_reward):
        super().__init__(env)
        self.min_reward = min_reward
        self.max_reward = max_reward
        self.reward_range = (min_reward, max_reward)

    def reward(self, reward):
        return np.clip(reward, self.min_reward, self.max_reward)

In [11]:
# env = ClipReward(AtariPreprocessing(gym.make("MsPacmanNoFrameskip-v4", render_mode="rgb_array"), terminal_on_life_loss=True), -1, 1) # as recommended by the original paper, should already include max pooling
# env = FrameStack(env, 4)
env = gym.make("CartPole-v1")

In [12]:
seed = 777

def seed_torch(seed):
    torch.manual_seed(seed)
    if torch.backends.cudnn.enabled:
        torch.cuda.manual_seed(seed)
        torch.backends.cudnn.benchmark = False
        torch.backends.cudnn.deterministic = True

np.random.seed(seed)
random.seed(seed)
seed_torch(seed)


In [13]:
config_dict = {
  "dense_layers_widths": [128],
  "value_hidden_layers_widths": [128],
  "advatage_hidden_layers_widths": [128],
  "adam_epsilon": 1e-8,
  "learning_rate": 0.001,
  "training_steps": 10000,
  "per_epsilon": 1e-6,
  "per_alpha": 0.2,
  "per_beta": 0.6,
  "minibatch_size": 128,
  "transfer_interval": 100,
  "n_step": 3,
  "noisy_sigma": 0.5,
  "replay_interval": 1,
  "kernel_initializer": "orthogonal",
  "noisy_sigma": 0.0,
}

In [14]:
game_config = CartPoleConfig()
config = RainbowConfig(config_dict, game_config)

Using default save_intermediate_weights     : True
Using         training_steps                : 10000
Using         adam_epsilon                  : 1e-08
Using         learning_rate                 : 0.001
Using         clipnorm                      : None
Using default optimizer                     : <class 'torch.optim.adam.Adam'>
Using         loss_function                 : None
Using default activation                    : relu
Using         kernel_initializer            : orthogonal
Using         minibatch_size                : 128
Using default replay_buffer_size            : 5000
Using default min_replay_buffer_size        : 128
Using default num_minibatches               : 1
Using default training_iterations           : 1
Using default conv_layers                   : []
Using         dense_layers_widths           : [128]
Using         value_hidden_layers_widths    : [128]
Using default advantage_hidden_layers_widths: []
Using         noisy_sigma                   : 0.0
Using 

In [15]:
agent = RainbowAgent(env, config, name="test")

for param in agent.model.parameters():
  print(param)

observation_dimensions:  (4,)
num_actions:  2
Initializing Rainbow Network
Old weight Parameter containing:
tensor([[-0.4181, -0.0089, -0.0967, -0.1141],
        [ 0.3813,  0.3811,  0.2242,  0.0033],
        [ 0.3249, -0.2366, -0.1888,  0.0948],
        [-0.3908,  0.1213,  0.2350,  0.4898],
        [ 0.4165, -0.1754,  0.3715,  0.3875],
        [ 0.4036, -0.2796, -0.1004, -0.2671],
        [ 0.4411,  0.4180,  0.3952, -0.0659],
        [-0.3477, -0.1846, -0.1229,  0.1166],
        [ 0.0355,  0.4970, -0.1223,  0.4541],
        [ 0.3282,  0.1459,  0.0544, -0.1297],
        [-0.4304, -0.1130,  0.2293,  0.4067],
        [-0.4941, -0.4749, -0.0504, -0.2852],
        [-0.1068,  0.0132,  0.4990, -0.3497],
        [-0.1813,  0.4039,  0.2984,  0.3534],
        [-0.4378,  0.1526,  0.4995,  0.0429],
        [ 0.1951, -0.4023, -0.1846, -0.3288],
        [ 0.2349,  0.4384,  0.1960, -0.4188],
        [-0.1492,  0.0885, -0.0926, -0.2365],
        [-0.3311, -0.4584, -0.1535,  0.4688],
        [-0.4053, 

In [8]:
print("start")
agent.train()

start
[{'score': 10.0, 'target_model_updated': True}, {'score': 11.0, 'target_model_updated': False}, {'score': 9.0, 'target_model_updated': False}, {'score': 9.0, 'target_model_updated': False}, {'score': 18.0, 'target_model_updated': False}, {'score': 24.0, 'target_model_updated': False}, {'score': 18.0, 'target_model_updated': False}, {'score': 14.0, 'target_model_updated': True}, {'score': 115.0, 'target_model_updated': True}, {'score': 33.0, 'target_model_updated': False}, {'score': 21.0, 'target_model_updated': False}, {'score': 24.0, 'target_model_updated': True}]
score:  27.0
score:  26.0
score:  21.0
score:  25.0
score:  25.0


  gym.logger.warn(
  axs[row][col].set_xlim(1, len(values))


[{'score': 10.0, 'target_model_updated': True}, {'score': 11.0, 'target_model_updated': False}, {'score': 9.0, 'target_model_updated': False}, {'score': 9.0, 'target_model_updated': False}, {'score': 18.0, 'target_model_updated': False}, {'score': 24.0, 'target_model_updated': False}, {'score': 18.0, 'target_model_updated': False}, {'score': 14.0, 'target_model_updated': True}, {'score': 115.0, 'target_model_updated': True}, {'score': 33.0, 'target_model_updated': False}, {'score': 21.0, 'target_model_updated': False}, {'score': 24.0, 'target_model_updated': True}, {'score': 29.0, 'target_model_updated': False}, {'score': 25.0, 'target_model_updated': False}, {'score': 19.0, 'target_model_updated': False}, {'score': 21.0, 'target_model_updated': False}, {'score': 22.0, 'target_model_updated': True}, {'score': 21.0, 'target_model_updated': False}, {'score': 28.0, 'target_model_updated': False}, {'score': 27.0, 'target_model_updated': False}, {'score': 29.0, 'target_model_updated': True}

KeyboardInterrupt: 

In [9]:
import custom_gym_envs
import gymnasium as gym
import random
env = gym.make('custom_gym_envs/MississippiMarbles-v0', render_mode="human")
state, info = env.reset()
for _ in range(1000):
    action = random.choice(info['legal_moves'])
    state, reward, terminated, truncated, info = env.step(action)
    if terminated or truncated:
        break

  logger.warn(f"{pre} should be an int or np.int64, actual type: {type(obs)}")
  logger.warn(f"{pre} is not within the observation space.")
  logger.warn(f"{pre} should be an int or np.int64, actual type: {type(obs)}")
  logger.warn(f"{pre} is not within the observation space.")
2024-05-28 02:31:05.944 Python[93752:1499736] +[CATransaction synchronize] called within transaction


Busted!
Busted!


2024-05-28 02:31:20.960 Python[93752:1499736] +[CATransaction synchronize] called within transaction


Busted!
Busted!


2024-05-28 02:31:26.966 Python[93752:1499736] +[CATransaction synchronize] called within transaction
2024-05-28 02:31:29.188 Python[93752:1499736] +[CATransaction synchronize] called within transaction
2024-05-28 02:31:29.971 Python[93752:1499736] +[CATransaction synchronize] called within transaction


Busted!


2024-05-28 02:31:34.969 Python[93752:1499736] +[CATransaction synchronize] called within transaction


Busted!
Player 0 Passed and Scored 750 Points
Passed!
Collected all dice!
Collected all dice!
Player 1 Passed and Scored 2850 Points
Passed!
Player 2 Passed and Scored 3050 Points
Passed!
Piggybacked off 3050 Points
Player 3 Passed and Scored 3250 Points
Passed!
Piggybacked off 3250 Points
Collected all dice!
Player 4 Passed and Scored 3450 Points
Passed!
Piggybacked off 3450 Points
Player 5 Passed and Scored 3650 Points
Passed!
Busted!
Busted!
Player 2 Passed and Scored 200 Points
Passed!
Busted!
Player 4 Passed and Scored 50 Points
Passed!
Player 5 Passed and Scored 300 Points
Passed!


KeyboardInterrupt: 

In [None]:
# import gym_envs
# import gymnasium as gym
# env = gym.make('gym_envs/TicTacToe-v0')

# state, info = env.reset()
# print(state)
# print("Turn: ", state[2][0][0])
# print("Legal moves: ", info['legal_moves'])
# env.render()
# state, reward, terminated, truncated, info = env.step(0)
# print(state)
# print("Turn: ", state[2][0][0])
# print("Legal moves: ", info['legal_moves'])
# print("Terminated:", terminated)
# env.render()
# state, reward, terminated, truncated, info = env.step(4)
# print(state)
# print("Turn: ", state[2][0][0])
# print("Legal moves: ", info['legal_moves'])
# print("Terminated:", terminated)
# env.render()
# state, reward, terminated, truncated, info = env.step(3)
# print(state)
# print("Turn: ", state[2][0][0])
# print("Legal moves: ", info['legal_moves'])
# print("Terminated:", terminated)
# env.render()
# state, reward, terminated, truncated, info = env.step(6)
# print(state)
# print("Turn: ", state[2][0][0])
# print("Legal moves: ", info['legal_moves'])
# print("Terminated:", terminated)
# env.render()
# state, reward, terminated, truncated, info = env.step(2)
# print(state)
# print("Turn: ", state[2][0][0])
# print("Legal moves: ", info['legal_moves'])
# print("Terminated:", terminated)
# env.render()
# state, reward, terminated, truncated, info = env.step(1)
# print(state)
# print("Turn: ", state[2][0][0])
# print("Legal moves: ", info['legal_moves'])
# print("Terminated:", terminated)
# env.render()
# state, reward, terminated, truncated, info = env.step(7)
# print(state)
# print("Turn: ", state[2][0][0])
# print("Legal moves: ", info['legal_moves'])
# print("Terminated:", terminated)
# state, reward, terminated, truncated, info = env.step(8)
# print(state)
# print("Turn: ", state[2][0][0])
# print("Legal moves: ", info['legal_moves'])
# print("Terminated:", terminated)
# state, reward, terminated, truncated, info = env.step(5)
# print(state)
# print("Turn: ", state[2][0][0])
# print("Legal moves: ", info['legal_moves'])
# print("Terminated:", terminated)
# print("Truncated:", truncated)
# env.render()


# env.reset()
# state, reward, terminated, truncated, info = env.step(0)
# print(state)
# print("Turn: ", state[2][0][0])
# print("Legal moves: ", info['legal_moves'])
# print("Terminated:", terminated)
# state, reward, terminated, truncated, info = env.step(3)
# print(state)
# print("Turn: ", state[2][0][0])
# print("Legal moves: ", info['legal_moves'])
# print("Terminated:", terminated)
# state, reward, terminated, truncated, info = env.step(7)
# print(state)
# print("Turn: ", state[2][0][0])
# print("Legal moves: ", info['legal_moves'])
# print("Terminated:", terminated)
# state, reward, terminated, truncated, info = env.step(4)
# print(state)
# print("Turn: ", state[2][0][0])
# print("Legal moves: ", info['legal_moves'])
# print("Terminated:", terminated)
# state, reward, terminated, truncated, info = env.step(2)
# print(state)
# print("Turn: ", state[2][0][0])
# print("Legal moves: ", info['legal_moves'])
# print("Terminated:", terminated)
# state, reward, terminated, truncated, info = env.step(6)
# print(state)
# print("Turn: ", state[2][0][0])
# print("Legal moves: ", info['legal_moves'])
# print("Terminated:", terminated)
# state, reward, terminated, truncated, info = env.step(1)
# print(state)
# print("Turn: ", state[2][0][0])
# print("Legal moves: ", info['legal_moves'])
# print("Terminated:", terminated)
# print("Truncated:", truncated)
# print("Reward:", reward)



In [None]:
import gymnasium as gym
from gymnasium.wrappers import AtariPreprocessing, FrameStack
import numpy as np
import sys
sys.path.append('../..')
from dqn.rainbow.rainbow_agent import RainbowAgent
from game_configs import AtariConfig, CartPoleConfig
from agent_configs import RainbowConfig
import random
import torch


In [None]:
class ClipReward(gym.RewardWrapper):
    def __init__(self, env, min_reward, max_reward):
        super().__init__(env)
        self.min_reward = min_reward
        self.max_reward = max_reward
        self.reward_range = (min_reward, max_reward)

    def reward(self, reward):
        return np.clip(reward, self.min_reward, self.max_reward)

In [None]:
# env = ClipReward(AtariPreprocessing(gym.make("MsPacmanNoFrameskip-v4", render_mode="rgb_array"), terminal_on_life_loss=True), -1, 1) # as recommended by the original paper, should already include max pooling
# env = FrameStack(env, 4)
env = gym.make("CartPole-v1", render_mode="rgb_array")

In [None]:
seed = 777

def seed_torch(seed):
    torch.manual_seed(seed)
    if torch.backends.cudnn.enabled:
        torch.cuda.manual_seed(seed)
        torch.backends.cudnn.benchmark = False
        torch.backends.cudnn.deterministic = True

np.random.seed(seed)
random.seed(seed)
seed_torch(seed)


In [None]:
config_dict = {
  "dense_layers_widths": [128],
  "value_hidden_layers_widths": [128],
  "advatage_hidden_layers_widths": [128],
  "adam_epsilon": 1e-8,
  "learning_rate": 0.001,
  "training_steps": 10000,
  "per_epsilon": 1e-6,
  "per_alpha": 0.2,
  "per_beta": 0.6,
  "minibatch_size": 128,
  "transfer_interval": 100,
  "n_step": 3,
  "noisy_sigma": 0.5,
  "replay_interval": 1
}

In [None]:
game_config = CartPoleConfig()
config = RainbowConfig(config_dict, game_config)

Using default save_intermediate_weights     : True
Using         training_steps                : 10000
Using         adam_epsilon                  : 1e-08
Using         learning_rate                 : 0.001
Using         clipnorm                      : None
Using default optimizer                     : <class 'torch.optim.adam.Adam'>
Using         loss_function                 : None
Using default activation                    : relu
Using         kernel_initializer            : None
Using         minibatch_size                : 128
Using default replay_buffer_size            : 5000
Using default min_replay_buffer_size        : 128
Using default num_minibatches               : 1
Using default training_iterations           : 1
Using default conv_layers                   : []
Using         dense_layers_widths           : [128]
Using         value_hidden_layers_widths    : [128]
Using default advantage_hidden_layers_widths: []
Using         noisy_sigma                   : 0.5
Using defaul

In [None]:
agent = RainbowAgent(env, config, name="test")

for param in agent.model.parameters():
  print(param)

observation_dimensions:  (4,)
num_actions:  2


  logger.warn(
  from .autonotebook import tqdm as notebook_tqdm


Parameter containing:
tensor([[-0.4181, -0.0089, -0.0967, -0.1141],
        [ 0.3813,  0.3811,  0.2242,  0.0033],
        [ 0.3249, -0.2366, -0.1888,  0.0948],
        [-0.3908,  0.1213,  0.2350,  0.4898],
        [ 0.4165, -0.1754,  0.3715,  0.3875],
        [ 0.4036, -0.2796, -0.1004, -0.2671],
        [ 0.4411,  0.4180,  0.3952, -0.0659],
        [-0.3477, -0.1846, -0.1229,  0.1166],
        [ 0.0355,  0.4970, -0.1223,  0.4541],
        [ 0.3282,  0.1459,  0.0544, -0.1297],
        [-0.4304, -0.1130,  0.2293,  0.4067],
        [-0.4941, -0.4749, -0.0504, -0.2852],
        [-0.1068,  0.0132,  0.4990, -0.3497],
        [-0.1813,  0.4039,  0.2984,  0.3534],
        [-0.4378,  0.1526,  0.4995,  0.0429],
        [ 0.1951, -0.4023, -0.1846, -0.3288],
        [ 0.2349,  0.4384,  0.1960, -0.4188],
        [-0.1492,  0.0885, -0.0926, -0.2365],
        [-0.3311, -0.4584, -0.1535,  0.4688],
        [-0.4053, -0.2498, -0.0500, -0.2375],
        [ 0.2434, -0.4209,  0.1780,  0.3752],
        [ 0.

In [None]:
print("start")
agent.train()

start
[{'score': 9.0, 'target_model_updated': True}, {'score': 10.0, 'target_model_updated': False}, {'score': 11.0, 'target_model_updated': False}, {'score': 43.0, 'target_model_updated': False}, {'score': 42.0, 'target_model_updated': True}, {'score': 65.0, 'target_model_updated': False}, {'score': 112.0, 'target_model_updated': True}]
score:  33.0
score:  26.0
score:  41.0
score:  40.0
Moviepy - Building video checkpoints/test/step_333/videos/test/333/test-episode-4.mp4.
Moviepy - Writing video checkpoints/test/step_333/videos/test/333/test-episode-4.mp4



  axs[row][col].set_xlim(1, len(values))


Moviepy - Done !
Moviepy - video ready checkpoints/test/step_333/videos/test/333/test-episode-4.mp4
score:  38.0
[{'score': 9.0, 'target_model_updated': True}, {'score': 10.0, 'target_model_updated': False}, {'score': 11.0, 'target_model_updated': False}, {'score': 43.0, 'target_model_updated': False}, {'score': 42.0, 'target_model_updated': True}, {'score': 65.0, 'target_model_updated': False}, {'score': 112.0, 'target_model_updated': True}, {'score': 57.0, 'target_model_updated': True}, {'score': 84.0, 'target_model_updated': True}, {'score': 71.0, 'target_model_updated': True}, {'score': 46.0, 'target_model_updated': False}, {'score': 48.0, 'target_model_updated': False}, {'score': 37.0, 'target_model_updated': True}]
score:  48.0
score:  54.0
score:  53.0
score:  54.0
Moviepy - Building video checkpoints/test/step_666/videos/test/666/test-episode-9.mp4.
Moviepy - Writing video checkpoints/test/step_666/videos/test/666/test-episode-9.mp4



                                                             

Moviepy - Done !
Moviepy - video ready checkpoints/test/step_666/videos/test/666/test-episode-9.mp4
score:  43.0
[{'score': 9.0, 'target_model_updated': True}, {'score': 10.0, 'target_model_updated': False}, {'score': 11.0, 'target_model_updated': False}, {'score': 43.0, 'target_model_updated': False}, {'score': 42.0, 'target_model_updated': True}, {'score': 65.0, 'target_model_updated': False}, {'score': 112.0, 'target_model_updated': True}, {'score': 57.0, 'target_model_updated': True}, {'score': 84.0, 'target_model_updated': True}, {'score': 71.0, 'target_model_updated': True}, {'score': 46.0, 'target_model_updated': False}, {'score': 48.0, 'target_model_updated': False}, {'score': 37.0, 'target_model_updated': True}, {'score': 74.0, 'target_model_updated': True}, {'score': 199.0, 'target_model_updated': True}, {'score': 62.0, 'target_model_updated': False}]
score:  91.0
score:  437.0
score:  103.0
score:  112.0
Moviepy - Building video checkpoints/test/step_999/videos/test/999/test

                                                             

Moviepy - Done !
Moviepy - video ready checkpoints/test/step_999/videos/test/999/test-episode-14.mp4
score:  91.0
[{'score': 9.0, 'target_model_updated': True}, {'score': 10.0, 'target_model_updated': False}, {'score': 11.0, 'target_model_updated': False}, {'score': 43.0, 'target_model_updated': False}, {'score': 42.0, 'target_model_updated': True}, {'score': 65.0, 'target_model_updated': False}, {'score': 112.0, 'target_model_updated': True}, {'score': 57.0, 'target_model_updated': True}, {'score': 84.0, 'target_model_updated': True}, {'score': 71.0, 'target_model_updated': True}, {'score': 46.0, 'target_model_updated': False}, {'score': 48.0, 'target_model_updated': False}, {'score': 37.0, 'target_model_updated': True}, {'score': 74.0, 'target_model_updated': True}, {'score': 199.0, 'target_model_updated': True}, {'score': 62.0, 'target_model_updated': False}, {'score': 98.0, 'target_model_updated': True}, {'score': 105.0, 'target_model_updated': True}, {'score': 53.0, 'target_model_

                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/test/step_1332/videos/test/1332/test-episode-19.mp4
score:  292.0
[{'score': 9.0, 'target_model_updated': True}, {'score': 10.0, 'target_model_updated': False}, {'score': 11.0, 'target_model_updated': False}, {'score': 43.0, 'target_model_updated': False}, {'score': 42.0, 'target_model_updated': True}, {'score': 65.0, 'target_model_updated': False}, {'score': 112.0, 'target_model_updated': True}, {'score': 57.0, 'target_model_updated': True}, {'score': 84.0, 'target_model_updated': True}, {'score': 71.0, 'target_model_updated': True}, {'score': 46.0, 'target_model_updated': False}, {'score': 48.0, 'target_model_updated': False}, {'score': 37.0, 'target_model_updated': True}, {'score': 74.0, 'target_model_updated': True}, {'score': 199.0, 'target_model_updated': True}, {'score': 62.0, 'target_model_updated': False}, {'score': 98.0, 'target_model_updated': True}, {'score': 105.0, 'target_model_updated': True}, {'score': 53.0, 'target_mod

                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/test/step_1665/videos/test/1665/test-episode-24.mp4
score:  181.0
[{'score': 9.0, 'target_model_updated': True}, {'score': 10.0, 'target_model_updated': False}, {'score': 11.0, 'target_model_updated': False}, {'score': 43.0, 'target_model_updated': False}, {'score': 42.0, 'target_model_updated': True}, {'score': 65.0, 'target_model_updated': False}, {'score': 112.0, 'target_model_updated': True}, {'score': 57.0, 'target_model_updated': True}, {'score': 84.0, 'target_model_updated': True}, {'score': 71.0, 'target_model_updated': True}, {'score': 46.0, 'target_model_updated': False}, {'score': 48.0, 'target_model_updated': False}, {'score': 37.0, 'target_model_updated': True}, {'score': 74.0, 'target_model_updated': True}, {'score': 199.0, 'target_model_updated': True}, {'score': 62.0, 'target_model_updated': False}, {'score': 98.0, 'target_model_updated': True}, {'score': 105.0, 'target_model_updated': True}, {'score': 53.0, 'target_mod

                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/test/step_1998/videos/test/1998/test-episode-29.mp4
score:  500.0
[{'score': 9.0, 'target_model_updated': True}, {'score': 10.0, 'target_model_updated': False}, {'score': 11.0, 'target_model_updated': False}, {'score': 43.0, 'target_model_updated': False}, {'score': 42.0, 'target_model_updated': True}, {'score': 65.0, 'target_model_updated': False}, {'score': 112.0, 'target_model_updated': True}, {'score': 57.0, 'target_model_updated': True}, {'score': 84.0, 'target_model_updated': True}, {'score': 71.0, 'target_model_updated': True}, {'score': 46.0, 'target_model_updated': False}, {'score': 48.0, 'target_model_updated': False}, {'score': 37.0, 'target_model_updated': True}, {'score': 74.0, 'target_model_updated': True}, {'score': 199.0, 'target_model_updated': True}, {'score': 62.0, 'target_model_updated': False}, {'score': 98.0, 'target_model_updated': True}, {'score': 105.0, 'target_model_updated': True}, {'score': 53.0, 'target_mod

                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/test/step_2331/videos/test/2331/test-episode-34.mp4
score:  378.0
[{'score': 9.0, 'target_model_updated': True}, {'score': 10.0, 'target_model_updated': False}, {'score': 11.0, 'target_model_updated': False}, {'score': 43.0, 'target_model_updated': False}, {'score': 42.0, 'target_model_updated': True}, {'score': 65.0, 'target_model_updated': False}, {'score': 112.0, 'target_model_updated': True}, {'score': 57.0, 'target_model_updated': True}, {'score': 84.0, 'target_model_updated': True}, {'score': 71.0, 'target_model_updated': True}, {'score': 46.0, 'target_model_updated': False}, {'score': 48.0, 'target_model_updated': False}, {'score': 37.0, 'target_model_updated': True}, {'score': 74.0, 'target_model_updated': True}, {'score': 199.0, 'target_model_updated': True}, {'score': 62.0, 'target_model_updated': False}, {'score': 98.0, 'target_model_updated': True}, {'score': 105.0, 'target_model_updated': True}, {'score': 53.0, 'target_mod

                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/test/step_2664/videos/test/2664/test-episode-39.mp4
score:  500.0
[{'score': 9.0, 'target_model_updated': True}, {'score': 10.0, 'target_model_updated': False}, {'score': 11.0, 'target_model_updated': False}, {'score': 43.0, 'target_model_updated': False}, {'score': 42.0, 'target_model_updated': True}, {'score': 65.0, 'target_model_updated': False}, {'score': 112.0, 'target_model_updated': True}, {'score': 57.0, 'target_model_updated': True}, {'score': 84.0, 'target_model_updated': True}, {'score': 71.0, 'target_model_updated': True}, {'score': 46.0, 'target_model_updated': False}, {'score': 48.0, 'target_model_updated': False}, {'score': 37.0, 'target_model_updated': True}, {'score': 74.0, 'target_model_updated': True}, {'score': 199.0, 'target_model_updated': True}, {'score': 62.0, 'target_model_updated': False}, {'score': 98.0, 'target_model_updated': True}, {'score': 105.0, 'target_model_updated': True}, {'score': 53.0, 'target_mod

                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/test/step_2997/videos/test/2997/test-episode-44.mp4
score:  462.0
[{'score': 9.0, 'target_model_updated': True}, {'score': 10.0, 'target_model_updated': False}, {'score': 11.0, 'target_model_updated': False}, {'score': 43.0, 'target_model_updated': False}, {'score': 42.0, 'target_model_updated': True}, {'score': 65.0, 'target_model_updated': False}, {'score': 112.0, 'target_model_updated': True}, {'score': 57.0, 'target_model_updated': True}, {'score': 84.0, 'target_model_updated': True}, {'score': 71.0, 'target_model_updated': True}, {'score': 46.0, 'target_model_updated': False}, {'score': 48.0, 'target_model_updated': False}, {'score': 37.0, 'target_model_updated': True}, {'score': 74.0, 'target_model_updated': True}, {'score': 199.0, 'target_model_updated': True}, {'score': 62.0, 'target_model_updated': False}, {'score': 98.0, 'target_model_updated': True}, {'score': 105.0, 'target_model_updated': True}, {'score': 53.0, 'target_mod

                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/test/step_3330/videos/test/3330/test-episode-49.mp4
score:  353.0
[{'score': 9.0, 'target_model_updated': True}, {'score': 10.0, 'target_model_updated': False}, {'score': 11.0, 'target_model_updated': False}, {'score': 43.0, 'target_model_updated': False}, {'score': 42.0, 'target_model_updated': True}, {'score': 65.0, 'target_model_updated': False}, {'score': 112.0, 'target_model_updated': True}, {'score': 57.0, 'target_model_updated': True}, {'score': 84.0, 'target_model_updated': True}, {'score': 71.0, 'target_model_updated': True}, {'score': 46.0, 'target_model_updated': False}, {'score': 48.0, 'target_model_updated': False}, {'score': 37.0, 'target_model_updated': True}, {'score': 74.0, 'target_model_updated': True}, {'score': 199.0, 'target_model_updated': True}, {'score': 62.0, 'target_model_updated': False}, {'score': 98.0, 'target_model_updated': True}, {'score': 105.0, 'target_model_updated': True}, {'score': 53.0, 'target_mod

                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/test/step_3663/videos/test/3663/test-episode-54.mp4
score:  440.0
[{'score': 9.0, 'target_model_updated': True}, {'score': 10.0, 'target_model_updated': False}, {'score': 11.0, 'target_model_updated': False}, {'score': 43.0, 'target_model_updated': False}, {'score': 42.0, 'target_model_updated': True}, {'score': 65.0, 'target_model_updated': False}, {'score': 112.0, 'target_model_updated': True}, {'score': 57.0, 'target_model_updated': True}, {'score': 84.0, 'target_model_updated': True}, {'score': 71.0, 'target_model_updated': True}, {'score': 46.0, 'target_model_updated': False}, {'score': 48.0, 'target_model_updated': False}, {'score': 37.0, 'target_model_updated': True}, {'score': 74.0, 'target_model_updated': True}, {'score': 199.0, 'target_model_updated': True}, {'score': 62.0, 'target_model_updated': False}, {'score': 98.0, 'target_model_updated': True}, {'score': 105.0, 'target_model_updated': True}, {'score': 53.0, 'target_mod

                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/test/step_3996/videos/test/3996/test-episode-59.mp4
score:  360.0
[{'score': 9.0, 'target_model_updated': True}, {'score': 10.0, 'target_model_updated': False}, {'score': 11.0, 'target_model_updated': False}, {'score': 43.0, 'target_model_updated': False}, {'score': 42.0, 'target_model_updated': True}, {'score': 65.0, 'target_model_updated': False}, {'score': 112.0, 'target_model_updated': True}, {'score': 57.0, 'target_model_updated': True}, {'score': 84.0, 'target_model_updated': True}, {'score': 71.0, 'target_model_updated': True}, {'score': 46.0, 'target_model_updated': False}, {'score': 48.0, 'target_model_updated': False}, {'score': 37.0, 'target_model_updated': True}, {'score': 74.0, 'target_model_updated': True}, {'score': 199.0, 'target_model_updated': True}, {'score': 62.0, 'target_model_updated': False}, {'score': 98.0, 'target_model_updated': True}, {'score': 105.0, 'target_model_updated': True}, {'score': 53.0, 'target_mod

                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/test/step_4329/videos/test/4329/test-episode-64.mp4
score:  384.0
[{'score': 9.0, 'target_model_updated': True}, {'score': 10.0, 'target_model_updated': False}, {'score': 11.0, 'target_model_updated': False}, {'score': 43.0, 'target_model_updated': False}, {'score': 42.0, 'target_model_updated': True}, {'score': 65.0, 'target_model_updated': False}, {'score': 112.0, 'target_model_updated': True}, {'score': 57.0, 'target_model_updated': True}, {'score': 84.0, 'target_model_updated': True}, {'score': 71.0, 'target_model_updated': True}, {'score': 46.0, 'target_model_updated': False}, {'score': 48.0, 'target_model_updated': False}, {'score': 37.0, 'target_model_updated': True}, {'score': 74.0, 'target_model_updated': True}, {'score': 199.0, 'target_model_updated': True}, {'score': 62.0, 'target_model_updated': False}, {'score': 98.0, 'target_model_updated': True}, {'score': 105.0, 'target_model_updated': True}, {'score': 53.0, 'target_mod

                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/test/step_4662/videos/test/4662/test-episode-69.mp4
score:  406.0
[{'score': 9.0, 'target_model_updated': True}, {'score': 10.0, 'target_model_updated': False}, {'score': 11.0, 'target_model_updated': False}, {'score': 43.0, 'target_model_updated': False}, {'score': 42.0, 'target_model_updated': True}, {'score': 65.0, 'target_model_updated': False}, {'score': 112.0, 'target_model_updated': True}, {'score': 57.0, 'target_model_updated': True}, {'score': 84.0, 'target_model_updated': True}, {'score': 71.0, 'target_model_updated': True}, {'score': 46.0, 'target_model_updated': False}, {'score': 48.0, 'target_model_updated': False}, {'score': 37.0, 'target_model_updated': True}, {'score': 74.0, 'target_model_updated': True}, {'score': 199.0, 'target_model_updated': True}, {'score': 62.0, 'target_model_updated': False}, {'score': 98.0, 'target_model_updated': True}, {'score': 105.0, 'target_model_updated': True}, {'score': 53.0, 'target_mod

                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/test/step_4995/videos/test/4995/test-episode-74.mp4
score:  422.0
[{'score': 9.0, 'target_model_updated': True}, {'score': 10.0, 'target_model_updated': False}, {'score': 11.0, 'target_model_updated': False}, {'score': 43.0, 'target_model_updated': False}, {'score': 42.0, 'target_model_updated': True}, {'score': 65.0, 'target_model_updated': False}, {'score': 112.0, 'target_model_updated': True}, {'score': 57.0, 'target_model_updated': True}, {'score': 84.0, 'target_model_updated': True}, {'score': 71.0, 'target_model_updated': True}, {'score': 46.0, 'target_model_updated': False}, {'score': 48.0, 'target_model_updated': False}, {'score': 37.0, 'target_model_updated': True}, {'score': 74.0, 'target_model_updated': True}, {'score': 199.0, 'target_model_updated': True}, {'score': 62.0, 'target_model_updated': False}, {'score': 98.0, 'target_model_updated': True}, {'score': 105.0, 'target_model_updated': True}, {'score': 53.0, 'target_mod

                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/test/step_5328/videos/test/5328/test-episode-79.mp4
score:  373.0
[{'score': 9.0, 'target_model_updated': True}, {'score': 10.0, 'target_model_updated': False}, {'score': 11.0, 'target_model_updated': False}, {'score': 43.0, 'target_model_updated': False}, {'score': 42.0, 'target_model_updated': True}, {'score': 65.0, 'target_model_updated': False}, {'score': 112.0, 'target_model_updated': True}, {'score': 57.0, 'target_model_updated': True}, {'score': 84.0, 'target_model_updated': True}, {'score': 71.0, 'target_model_updated': True}, {'score': 46.0, 'target_model_updated': False}, {'score': 48.0, 'target_model_updated': False}, {'score': 37.0, 'target_model_updated': True}, {'score': 74.0, 'target_model_updated': True}, {'score': 199.0, 'target_model_updated': True}, {'score': 62.0, 'target_model_updated': False}, {'score': 98.0, 'target_model_updated': True}, {'score': 105.0, 'target_model_updated': True}, {'score': 53.0, 'target_mod

                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/test/step_5661/videos/test/5661/test-episode-84.mp4
score:  363.0
[{'score': 9.0, 'target_model_updated': True}, {'score': 10.0, 'target_model_updated': False}, {'score': 11.0, 'target_model_updated': False}, {'score': 43.0, 'target_model_updated': False}, {'score': 42.0, 'target_model_updated': True}, {'score': 65.0, 'target_model_updated': False}, {'score': 112.0, 'target_model_updated': True}, {'score': 57.0, 'target_model_updated': True}, {'score': 84.0, 'target_model_updated': True}, {'score': 71.0, 'target_model_updated': True}, {'score': 46.0, 'target_model_updated': False}, {'score': 48.0, 'target_model_updated': False}, {'score': 37.0, 'target_model_updated': True}, {'score': 74.0, 'target_model_updated': True}, {'score': 199.0, 'target_model_updated': True}, {'score': 62.0, 'target_model_updated': False}, {'score': 98.0, 'target_model_updated': True}, {'score': 105.0, 'target_model_updated': True}, {'score': 53.0, 'target_mod

                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/test/step_5994/videos/test/5994/test-episode-89.mp4
score:  431.0
[{'score': 9.0, 'target_model_updated': True}, {'score': 10.0, 'target_model_updated': False}, {'score': 11.0, 'target_model_updated': False}, {'score': 43.0, 'target_model_updated': False}, {'score': 42.0, 'target_model_updated': True}, {'score': 65.0, 'target_model_updated': False}, {'score': 112.0, 'target_model_updated': True}, {'score': 57.0, 'target_model_updated': True}, {'score': 84.0, 'target_model_updated': True}, {'score': 71.0, 'target_model_updated': True}, {'score': 46.0, 'target_model_updated': False}, {'score': 48.0, 'target_model_updated': False}, {'score': 37.0, 'target_model_updated': True}, {'score': 74.0, 'target_model_updated': True}, {'score': 199.0, 'target_model_updated': True}, {'score': 62.0, 'target_model_updated': False}, {'score': 98.0, 'target_model_updated': True}, {'score': 105.0, 'target_model_updated': True}, {'score': 53.0, 'target_mod

                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/test/step_6327/videos/test/6327/test-episode-94.mp4
score:  500.0
[{'score': 9.0, 'target_model_updated': True}, {'score': 10.0, 'target_model_updated': False}, {'score': 11.0, 'target_model_updated': False}, {'score': 43.0, 'target_model_updated': False}, {'score': 42.0, 'target_model_updated': True}, {'score': 65.0, 'target_model_updated': False}, {'score': 112.0, 'target_model_updated': True}, {'score': 57.0, 'target_model_updated': True}, {'score': 84.0, 'target_model_updated': True}, {'score': 71.0, 'target_model_updated': True}, {'score': 46.0, 'target_model_updated': False}, {'score': 48.0, 'target_model_updated': False}, {'score': 37.0, 'target_model_updated': True}, {'score': 74.0, 'target_model_updated': True}, {'score': 199.0, 'target_model_updated': True}, {'score': 62.0, 'target_model_updated': False}, {'score': 98.0, 'target_model_updated': True}, {'score': 105.0, 'target_model_updated': True}, {'score': 53.0, 'target_mod

                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/test/step_6660/videos/test/6660/test-episode-99.mp4
score:  500.0
[{'score': 9.0, 'target_model_updated': True}, {'score': 10.0, 'target_model_updated': False}, {'score': 11.0, 'target_model_updated': False}, {'score': 43.0, 'target_model_updated': False}, {'score': 42.0, 'target_model_updated': True}, {'score': 65.0, 'target_model_updated': False}, {'score': 112.0, 'target_model_updated': True}, {'score': 57.0, 'target_model_updated': True}, {'score': 84.0, 'target_model_updated': True}, {'score': 71.0, 'target_model_updated': True}, {'score': 46.0, 'target_model_updated': False}, {'score': 48.0, 'target_model_updated': False}, {'score': 37.0, 'target_model_updated': True}, {'score': 74.0, 'target_model_updated': True}, {'score': 199.0, 'target_model_updated': True}, {'score': 62.0, 'target_model_updated': False}, {'score': 98.0, 'target_model_updated': True}, {'score': 105.0, 'target_model_updated': True}, {'score': 53.0, 'target_mod

                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/test/step_6993/videos/test/6993/test-episode-104.mp4
score:  500.0
[{'score': 9.0, 'target_model_updated': True}, {'score': 10.0, 'target_model_updated': False}, {'score': 11.0, 'target_model_updated': False}, {'score': 43.0, 'target_model_updated': False}, {'score': 42.0, 'target_model_updated': True}, {'score': 65.0, 'target_model_updated': False}, {'score': 112.0, 'target_model_updated': True}, {'score': 57.0, 'target_model_updated': True}, {'score': 84.0, 'target_model_updated': True}, {'score': 71.0, 'target_model_updated': True}, {'score': 46.0, 'target_model_updated': False}, {'score': 48.0, 'target_model_updated': False}, {'score': 37.0, 'target_model_updated': True}, {'score': 74.0, 'target_model_updated': True}, {'score': 199.0, 'target_model_updated': True}, {'score': 62.0, 'target_model_updated': False}, {'score': 98.0, 'target_model_updated': True}, {'score': 105.0, 'target_model_updated': True}, {'score': 53.0, 'target_mo

                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/test/step_7326/videos/test/7326/test-episode-109.mp4
score:  500.0
[{'score': 9.0, 'target_model_updated': True}, {'score': 10.0, 'target_model_updated': False}, {'score': 11.0, 'target_model_updated': False}, {'score': 43.0, 'target_model_updated': False}, {'score': 42.0, 'target_model_updated': True}, {'score': 65.0, 'target_model_updated': False}, {'score': 112.0, 'target_model_updated': True}, {'score': 57.0, 'target_model_updated': True}, {'score': 84.0, 'target_model_updated': True}, {'score': 71.0, 'target_model_updated': True}, {'score': 46.0, 'target_model_updated': False}, {'score': 48.0, 'target_model_updated': False}, {'score': 37.0, 'target_model_updated': True}, {'score': 74.0, 'target_model_updated': True}, {'score': 199.0, 'target_model_updated': True}, {'score': 62.0, 'target_model_updated': False}, {'score': 98.0, 'target_model_updated': True}, {'score': 105.0, 'target_model_updated': True}, {'score': 53.0, 'target_mo

                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/test/step_7659/videos/test/7659/test-episode-114.mp4
score:  491.0
[{'score': 9.0, 'target_model_updated': True}, {'score': 10.0, 'target_model_updated': False}, {'score': 11.0, 'target_model_updated': False}, {'score': 43.0, 'target_model_updated': False}, {'score': 42.0, 'target_model_updated': True}, {'score': 65.0, 'target_model_updated': False}, {'score': 112.0, 'target_model_updated': True}, {'score': 57.0, 'target_model_updated': True}, {'score': 84.0, 'target_model_updated': True}, {'score': 71.0, 'target_model_updated': True}, {'score': 46.0, 'target_model_updated': False}, {'score': 48.0, 'target_model_updated': False}, {'score': 37.0, 'target_model_updated': True}, {'score': 74.0, 'target_model_updated': True}, {'score': 199.0, 'target_model_updated': True}, {'score': 62.0, 'target_model_updated': False}, {'score': 98.0, 'target_model_updated': True}, {'score': 105.0, 'target_model_updated': True}, {'score': 53.0, 'target_mo

                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/test/step_7992/videos/test/7992/test-episode-119.mp4
score:  479.0
[{'score': 9.0, 'target_model_updated': True}, {'score': 10.0, 'target_model_updated': False}, {'score': 11.0, 'target_model_updated': False}, {'score': 43.0, 'target_model_updated': False}, {'score': 42.0, 'target_model_updated': True}, {'score': 65.0, 'target_model_updated': False}, {'score': 112.0, 'target_model_updated': True}, {'score': 57.0, 'target_model_updated': True}, {'score': 84.0, 'target_model_updated': True}, {'score': 71.0, 'target_model_updated': True}, {'score': 46.0, 'target_model_updated': False}, {'score': 48.0, 'target_model_updated': False}, {'score': 37.0, 'target_model_updated': True}, {'score': 74.0, 'target_model_updated': True}, {'score': 199.0, 'target_model_updated': True}, {'score': 62.0, 'target_model_updated': False}, {'score': 98.0, 'target_model_updated': True}, {'score': 105.0, 'target_model_updated': True}, {'score': 53.0, 'target_mo

                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/test/step_8325/videos/test/8325/test-episode-124.mp4
score:  500.0
[{'score': 9.0, 'target_model_updated': True}, {'score': 10.0, 'target_model_updated': False}, {'score': 11.0, 'target_model_updated': False}, {'score': 43.0, 'target_model_updated': False}, {'score': 42.0, 'target_model_updated': True}, {'score': 65.0, 'target_model_updated': False}, {'score': 112.0, 'target_model_updated': True}, {'score': 57.0, 'target_model_updated': True}, {'score': 84.0, 'target_model_updated': True}, {'score': 71.0, 'target_model_updated': True}, {'score': 46.0, 'target_model_updated': False}, {'score': 48.0, 'target_model_updated': False}, {'score': 37.0, 'target_model_updated': True}, {'score': 74.0, 'target_model_updated': True}, {'score': 199.0, 'target_model_updated': True}, {'score': 62.0, 'target_model_updated': False}, {'score': 98.0, 'target_model_updated': True}, {'score': 105.0, 'target_model_updated': True}, {'score': 53.0, 'target_mo

                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/test/step_8658/videos/test/8658/test-episode-129.mp4
score:  500.0
[{'score': 9.0, 'target_model_updated': True}, {'score': 10.0, 'target_model_updated': False}, {'score': 11.0, 'target_model_updated': False}, {'score': 43.0, 'target_model_updated': False}, {'score': 42.0, 'target_model_updated': True}, {'score': 65.0, 'target_model_updated': False}, {'score': 112.0, 'target_model_updated': True}, {'score': 57.0, 'target_model_updated': True}, {'score': 84.0, 'target_model_updated': True}, {'score': 71.0, 'target_model_updated': True}, {'score': 46.0, 'target_model_updated': False}, {'score': 48.0, 'target_model_updated': False}, {'score': 37.0, 'target_model_updated': True}, {'score': 74.0, 'target_model_updated': True}, {'score': 199.0, 'target_model_updated': True}, {'score': 62.0, 'target_model_updated': False}, {'score': 98.0, 'target_model_updated': True}, {'score': 105.0, 'target_model_updated': True}, {'score': 53.0, 'target_mo

                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/test/step_8991/videos/test/8991/test-episode-134.mp4
score:  500.0
[{'score': 9.0, 'target_model_updated': True}, {'score': 10.0, 'target_model_updated': False}, {'score': 11.0, 'target_model_updated': False}, {'score': 43.0, 'target_model_updated': False}, {'score': 42.0, 'target_model_updated': True}, {'score': 65.0, 'target_model_updated': False}, {'score': 112.0, 'target_model_updated': True}, {'score': 57.0, 'target_model_updated': True}, {'score': 84.0, 'target_model_updated': True}, {'score': 71.0, 'target_model_updated': True}, {'score': 46.0, 'target_model_updated': False}, {'score': 48.0, 'target_model_updated': False}, {'score': 37.0, 'target_model_updated': True}, {'score': 74.0, 'target_model_updated': True}, {'score': 199.0, 'target_model_updated': True}, {'score': 62.0, 'target_model_updated': False}, {'score': 98.0, 'target_model_updated': True}, {'score': 105.0, 'target_model_updated': True}, {'score': 53.0, 'target_mo

                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/test/step_9324/videos/test/9324/test-episode-139.mp4
score:  500.0
[{'score': 9.0, 'target_model_updated': True}, {'score': 10.0, 'target_model_updated': False}, {'score': 11.0, 'target_model_updated': False}, {'score': 43.0, 'target_model_updated': False}, {'score': 42.0, 'target_model_updated': True}, {'score': 65.0, 'target_model_updated': False}, {'score': 112.0, 'target_model_updated': True}, {'score': 57.0, 'target_model_updated': True}, {'score': 84.0, 'target_model_updated': True}, {'score': 71.0, 'target_model_updated': True}, {'score': 46.0, 'target_model_updated': False}, {'score': 48.0, 'target_model_updated': False}, {'score': 37.0, 'target_model_updated': True}, {'score': 74.0, 'target_model_updated': True}, {'score': 199.0, 'target_model_updated': True}, {'score': 62.0, 'target_model_updated': False}, {'score': 98.0, 'target_model_updated': True}, {'score': 105.0, 'target_model_updated': True}, {'score': 53.0, 'target_mo

                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/test/step_9657/videos/test/9657/test-episode-144.mp4
score:  500.0
[{'score': 9.0, 'target_model_updated': True}, {'score': 10.0, 'target_model_updated': False}, {'score': 11.0, 'target_model_updated': False}, {'score': 43.0, 'target_model_updated': False}, {'score': 42.0, 'target_model_updated': True}, {'score': 65.0, 'target_model_updated': False}, {'score': 112.0, 'target_model_updated': True}, {'score': 57.0, 'target_model_updated': True}, {'score': 84.0, 'target_model_updated': True}, {'score': 71.0, 'target_model_updated': True}, {'score': 46.0, 'target_model_updated': False}, {'score': 48.0, 'target_model_updated': False}, {'score': 37.0, 'target_model_updated': True}, {'score': 74.0, 'target_model_updated': True}, {'score': 199.0, 'target_model_updated': True}, {'score': 62.0, 'target_model_updated': False}, {'score': 98.0, 'target_model_updated': True}, {'score': 105.0, 'target_model_updated': True}, {'score': 53.0, 'target_mo

                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/test/step_9990/videos/test/9990/test-episode-149.mp4
score:  447.0
score:  500.0
score:  500.0
score:  500.0
score:  500.0
Moviepy - Building video checkpoints/test/step_9999/videos/test/9999/test-episode-154.mp4.
Moviepy - Writing video checkpoints/test/step_9999/videos/test/9999/test-episode-154.mp4



                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/test/step_9999/videos/test/9999/test-episode-154.mp4
score:  500.0


In [None]:
class NormalizeZeroToOne(gym.ObservationWrapper):
    def __init__(self, env):
        super().__init__(env)
        self.observation_high = self.env.observation_space.high
        self.observation_low = self.env.observation_space.low

    def observation(self, obs):
        print(obs)
        print((obs - self.observation_low) / (self.observation_high - self.observation_low))
        return (obs - self.observation_low) / (self.observation_high - self.observation_low)

In [None]:
class ClipReward(gym.RewardWrapper):
    def __init__(self, env, min_reward, max_reward):
        super().__init__(env)
        self.min_reward = min_reward
        self.max_reward = max_reward
        self.reward_range = (min_reward, max_reward)

    def reward(self, reward):
        return np.clip(reward, self.min_reward, self.max_reward)

In [None]:
# env = gym.wrappers.AtariPreprocessing(gym.make("ALE/MsPacman-v5", render_mode="rgb_array"), terminal_on_life_loss=True, scale_obs=True) # as seen online with frame stackign though
# env = gym.wrappers.AtariPreprocessing(gym.make("ALE/MsPacman-v5", render_mode="rgb_array"), terminal_on_life_loss=True, scale_obs=True) # as seen online
env = ClipReward(gym.wrappers.AtariPreprocessing(gym.make("MsPacmanNoFrameskip-v4", render_mode="rgb_array"), terminal_on_life_loss=True), -1, 1) # as recommended by the original paper, should already include max pooling
env = gym.wrappers.FrameStack(env, 4)

In [None]:
from rainbow_agent import RainbowAgent
import gymnasium as gym
import numpy as np
import tensorflow as tf
from hyperopt import hp



1 Physical GPUs, 1 Logical GPUs


2024-05-27 13:16:47.685106: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M2
2024-05-27 13:16:47.685148: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 16.00 GB
2024-05-27 13:16:47.685156: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 5.33 GB
2024-05-27 13:16:47.685215: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-05-27 13:16:47.685245: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [None]:
def create_search_space():
    search_space = {
        "activation": hp.choice(
            "activation",
            [
                "linear",
                "relu",
                # 'relu6',
                "sigmoid",
                "softplus",
                "soft_sign",
                "silu",
                "swish",
                "log_sigmoid",
                "hard_sigmoid",
                # 'hard_silu',
                # 'hard_swish',
                # 'hard_tanh',
                "elu",
                # 'celu',
                "selu",
                "gelu",
                # 'glu'
            ],
        ),
        "kernel_initializer": hp.choice(
            "kernel_initializer",
            [
                "he_uniform",
                "he_normal",
                "glorot_uniform",
                "glorot_normal",
                "lecun_uniform",
                "lecun_normal",
                "orthogonal",
                "variance_baseline",
                "variance_0.1",
                "variance_0.3",
                "variance_0.8",
                "variance_3",
                "variance_5",
                "variance_10",
            ],
        ),
        "optimizer": hp.choice(
            "optimizer", [tf.keras.optimizers.legacy.Adam]
        ),  # NO SGD OR RMSPROP FOR NOW SINCE IT IS FOR RAINBOW DQN
        "learning_rate": hp.choice(
            "learning_rate", [10, 5, 2, 1, 0.1, 0.01, 0.001, 0.0001, 0.00001]
        ),  #
        "adam_epsilon": hp.choice(
            "adam_epsilon",
            [1, 0.5, 0.3125, 0.03125, 0.003125, 0.0003125, 0.00003125, 0.000003125],
        ),
        "clipnorm": hp.choice("clipnorm", [None]),
        # NORMALIZATION?
        "soft_update": hp.choice(
            "soft_update", [False]
        ),  # seems to always be false, we can try it with tru
        "ema_beta": hp.uniform("ema_beta", 0.95, 0.999),
        "transfer_interval": hp.choice(
            "transfer_interval", [10, 25, 50, 100, 200, 400, 800, 1600, 2000]
        ),
        "replay_interval": hp.choice("replay_interval", [1, 2, 3, 4, 5, 8, 10, 12, 350]),
        "minibatch_size": hp.choice(
            "minibatch_size", [2**i for i in range(0, 8)]
        ),  ###########
        "replay_buffer_size": hp.choice(
            "replay_buffer_size", [2000, 3000, 5000, 7500, 10000, 15000, 20000, 25000, 50000]
        ),  #############
        "min_replay_buffer_size": hp.choice(
            "min_replay_buffer_size", [0, 125, 250, 375, 500, 625, 750, 875, 1000, 1500, 2000]
        ),  # 125, 250, 375, 500, 625, 750, 875, 1000, 1500, 2000
        "n_step": hp.choice("n_step", [1, 2, 3, 4, 5, 8, 10]),
        "discount_factor": hp.choice(
            "discount_factor", [0.1, 0.5, 0.9, 0.99, 0.995, 0.999]
        ),
        "atom_size": hp.choice("atom_size", [11, 21, 31, 41, 51, 61, 71, 81]),  #
        "conv_layers": hp.choice("conv_layers", [[], [(32, 8, 4), (64, 4, 2), (64, 3, 1)]]),
        "conv_layers_noisy": hp.choice("conv_layers_noisy", [False]),
        "width": hp.choice("width", [32, 64, 128, 256, 512, 1024]),
        "dense_layers": hp.choice("dense_layers", [0, 1, 2, 3, 4]),
        "dense_layers_noisy": hp.choice(
            "dense_layers_noisy", [True]
        ),  # i think this is always true for rainbow
        # REWARD CLIPPING
        "noisy_sigma": hp.choice("noisy_sigma", [0.5]),  #
        "loss_function": hp.choice(
            "loss_function",
            [tf.keras.losses.CategoricalCrossentropy(), tf.keras.losses.KLDivergence()],
        ),
        "dueling": hp.choice("dueling", [True]),
        "advantage_hidden_layers": hp.choice(
            "advantage_hidden_layers", [0, 1, 2, 3, 4]
        ),  #
        "value_hidden_layers": hp.choice("value_hidden_layers", [0, 1, 2, 3, 4]),  #
        "training_steps": hp.choice("training_steps", [30000]),
        "per_epsilon": hp.choice(
            "per_epsilon", [0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1]
        ),
        "per_alpha": hp.choice("per_alpha", [0.05 * i for i in range(0, 21)]),
        "per_beta": hp.choice("per_beta", [0.05 * i for i in range(1, 21)]),
        # 'per_beta_increase': hp.uniform('per_beta_increase', 0, 0.015),
        # 'search_max_depth': 5,
        # 'search_max_time': 10,
        "training_iterations": hp.choice("training_iterations", [1, 2, 3, 4, 5]),
        "num_minibatches": hp.choice("num_minibatches", [1, 2, 3, 4, 5]),
    }
    initial_best_config = [
        {
            "activation": 1,
            "kernel_initializer": 6,
            "optimizer": 0,  # NO SGD OR RMSPROP FOR NOW SINCE IT IS FOR RAINBOW DQN
            "learning_rate": 5,  #
            "adam_epsilon": 5,
            "clipnorm": 0,
            # NORMALIZATION?
            "soft_update": 0,  # seems to always be false, we can try it with tru
            "ema_beta": 0.95,
            "transfer_interval": 3,
            "replay_interval": 1,
            "minibatch_size": 7,
            "replay_buffer_size": 8,  
            "min_replay_buffer_size": 4,
            "n_step": 2,
            "discount_factor": 3,
            "atom_size": 4,  #
            "conv_layers": 0,
            "conv_layers_noisy": 0,
            "width": 2,
            "dense_layers": 2,
            "dense_layers_noisy": 0,  # i think this is always true for rainbow
            # REWARD CLIPPING
            "noisy_sigma": 0,  #
            "loss_function": 0,
            "dueling": 0,
            "advantage_hidden_layers": 1,  #
            "value_hidden_layers": 1,  #
            "training_steps": 0,
            "per_epsilon": 3,
            "per_alpha": 10,
            "per_beta": 7,
            # 'per_beta_increase': hp.uniform('per_beta_increase', 0, 0.015),
            # 'search_max_depth': 5,
            # 'search_max_time': 10,
            "training_iterations": 1,
            "num_minibatches": 1,
        }
    ]

    return search_space, initial_best_config


In [None]:
from hyperopt import space_eval

search_sapce, initial_best_config = create_search_space()
config = space_eval(search_sapce, initial_best_config[0])
print(config)


{'activation': 'relu', 'adam_epsilon': 0.0003125, 'advantage_hidden_layers': 1, 'atom_size': 51, 'clipnorm': None, 'conv_layers': (), 'conv_layers_noisy': False, 'dense_layers': 2, 'dense_layers_noisy': True, 'discount_factor': 0.99, 'dueling': True, 'ema_beta': 0.95, 'kernel_initializer': 'orthogonal', 'learning_rate': 0.01, 'loss_function': <keras.src.losses.CategoricalCrossentropy object at 0x29c93d580>, 'min_replay_buffer_size': 500, 'minibatch_size': 128, 'n_step': 3, 'noisy_sigma': 0.5, 'num_minibatches': 2, 'optimizer': <class 'keras.src.optimizers.legacy.adam.Adam'>, 'per_alpha': 0.5, 'per_beta': 0.4, 'per_epsilon': 0.001, 'replay_buffer_size': 50000, 'replay_interval': 2, 'soft_update': False, 'training_iterations': 2, 'training_steps': 30000, 'transfer_interval': 100, 'value_hidden_layers': 1, 'width': 128}


In [None]:
env = gym.make('CartPole-v1', render_mode="rgb_array")

In [None]:
from agent_configs import RainbowConfig
from game_configs import CartPoleConfig
config = RainbowConfig(config, CartPoleConfig())

Using default save_intermediate_weights: True
Using adam_epsilon: 0.0003125
Using learning_rate: 0.01
Using clipnorm: None
Using optimizer: <class 'keras.src.optimizers.legacy.adam.Adam'>
Using loss_function: <keras.src.losses.CategoricalCrossentropy object at 0x29c93d580>
Using training_iterations: 2
Using num_minibatches: 2
Using minibatch_size: 128
Using replay_buffer_size: 50000
Using min_replay_buffer_size: 500
Using training_steps: 30000
Using activation: relu
Using kernel_initializer: orthogonal
Using width: 128
Using noisy_sigma: 0.5
Using conv_layers: ()
Using dense_layers: 2
Using default deuling: False
Using value_hidden_layers: 1
Using advantage_hidden_layers: 1
Using discount_factor: 0.99
Using soft_update: False
Using transfer_interval: 100
Using ema_beta: 0.95
Using replay_interval: 2
Using per_alpha: 0.5
Using per_beta: 0.4
Using per_epsilon: 0.001
Using n_step: 3
Using atom_size: 51


In [None]:
# train
agent = RainbowAgent(env, config, "RainbowDQN-{}".format(env.unwrapped.spec.id))
agent.checkpoint_interval = 10
agent.train()

observation_dimensions:  (4,)
num_actions:  2


  logger.warn(


Weights  [[1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]]
Elementwise Loss  tf.Tensor(
[ 4.2488556  4.4044695  4.1126256  4.2543073  4.296421   4.423803
  4.3096285  5.9415407  4.4245315  4.2336383  4.30497    5.440406
  5.8048677  4.199656   4.2123127  4.235448   4.1141553 13.40

KeyboardInterrupt: 

In [None]:
agent = RainbowAgent(env, config, "RainbowDQN-{}".format(env.unwrapped.spec.id))
agent.load_from_checkpoint("./checkpoints/RainbowDQN-CartPole-v1", 100)
agent.checkpoint_interval = 10
# print(agent.stats)
# print(agent.config)
# print(agent.replay_buffer.sample())
# print(agent.replay_buffer.beta)
agent.train()

observation_dimensions:  (4,)
num_actions:  2
{'config_dict': {'activation': 'relu', 'adam_epsilon': 0.0003125, 'advantage_hidden_layers': 1, 'atom_size': 51, 'clipnorm': None, 'conv_layers': (), 'conv_layers_noisy': False, 'dense_layers': 2, 'dense_layers_noisy': True, 'discount_factor': 0.99, 'dueling': True, 'ema_beta': 0.95, 'kernel_initializer': 'orthogonal', 'learning_rate': 0.01, 'loss_function': <keras.src.losses.CategoricalCrossentropy object at 0x2e3871790>, 'min_replay_buffer_size': 500, 'minibatch_size': 128, 'n_step': 3, 'noisy_sigma': 0.5, 'num_minibatches': 2, 'optimizer': <class 'keras.src.optimizers.legacy.adam.Adam'>, 'per_alpha': 0.5, 'per_beta': 0.4, 'per_epsilon': 0.001, 'replay_buffer_size': 50000, 'replay_interval': 2, 'soft_update': False, 'training_iterations': 2, 'training_steps': 30000, 'transfer_interval': 100, 'value_hidden_layers': 1, 'width': 128}, 'game': <game_configs.cartpole_config.CartPoleConfig object at 0x2e386fac0>}
Using default save_intermediate

                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/RainbowDQN-CartPole-v1/step_100/videos/RainbowDQN-CartPole-v1/100/RainbowDQN-CartPole-v1-episode-4.mp4
score:  129.0
score:  273.0
score:  133.0
score:  161.0
score:  216.0
Moviepy - Building video checkpoints/RainbowDQN-CartPole-v1/step_110/videos/RainbowDQN-CartPole-v1/110/RainbowDQN-CartPole-v1-episode-9.mp4.
Moviepy - Writing video checkpoints/RainbowDQN-CartPole-v1/step_110/videos/RainbowDQN-CartPole-v1/110/RainbowDQN-CartPole-v1-episode-9.mp4



                                                              

Moviepy - Done !
Moviepy - video ready checkpoints/RainbowDQN-CartPole-v1/step_110/videos/RainbowDQN-CartPole-v1/110/RainbowDQN-CartPole-v1-episode-9.mp4
score:  122.0
score:  141.0
score:  117.0
score:  133.0
score:  144.0
Moviepy - Building video checkpoints/RainbowDQN-CartPole-v1/step_120/videos/RainbowDQN-CartPole-v1/120/RainbowDQN-CartPole-v1-episode-14.mp4.
Moviepy - Writing video checkpoints/RainbowDQN-CartPole-v1/step_120/videos/RainbowDQN-CartPole-v1/120/RainbowDQN-CartPole-v1-episode-14.mp4



                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/RainbowDQN-CartPole-v1/step_120/videos/RainbowDQN-CartPole-v1/120/RainbowDQN-CartPole-v1-episode-14.mp4
score:  137.0


KeyboardInterrupt: 

In [None]:
import gymnasium as gym
import gym_anytrading
import tensorflow as tf

env = gym.make('forex-v0')
# env = gym.make('stocks-v0')


In [None]:
from gym_anytrading.datasets import FOREX_EURUSD_1H_ASK, STOCKS_GOOGL

custom_env = gym.make(
    'forex-v0',
    df=FOREX_EURUSD_1H_ASK,
    window_size=10,
    frame_bound=(10, 300),
    unit_side='right'
)

# custom_env = gym.make(
#     'stocks-v0',
#     df=STOCKS_GOOGL,
#     window_size=10,
#     frame_bound=(10, 300)
# )


In [None]:
print("env information:")
print("> shape:", env.unwrapped.shape)
print("> df.shape:", env.unwrapped.df.shape)
print("> prices.shape:", env.unwrapped.prices.shape)
print("> signal_features.shape:", env.unwrapped.signal_features.shape)
print("> max_possible_profit:", env.unwrapped.max_possible_profit())

print()
print("custom_env information:")
print("> shape:", custom_env.unwrapped.shape)
print("> df.shape:", custom_env.unwrapped.df.shape)
print("> prices.shape:", custom_env.unwrapped.prices.shape)
print("> signal_features.shape:", custom_env.unwrapped.signal_features.shape)
print("> max_possible_profit:", custom_env.unwrapped.max_possible_profit())


In [None]:
observation, info = env.reset()
env.render()

env = custom_env
observation, info = env.reset()
env.render()

In [None]:
from rainbow_agent import RainbowAgent
from agent_configs import RainbowConfig
from game_configs import CartPoleConfig
config_dict = {
    "activation": "relu",
    "kernel_initializer": "orthogonal",
    "min_replay_buffer_size": 32,
    "loss_function": tf.keras.losses.KLDivergence(),
    "learning_rate": 0.000001,
}
config = RainbowConfig(config_dict, CartPoleConfig())
# train
agent = RainbowAgent(env, config, "RainbowDQN-{}".format(env.unwrapped.spec.id))
agent.train()