In [1]:
import gymnasium as gym
from gymnasium.wrappers import AtariPreprocessing, FrameStack
import numpy as np
import sys
sys.path.append('../..')
from dqn.rainbow.rainbow_agent import RainbowAgent
from game_configs import AtariConfig, CartPoleConfig
from agent_configs import RainbowConfig
import random
import torch


In [2]:
class ClipReward(gym.RewardWrapper):
    def __init__(self, env, min_reward, max_reward):
        super().__init__(env)
        self.min_reward = min_reward
        self.max_reward = max_reward
        self.reward_range = (min_reward, max_reward)

    def reward(self, reward):
        return np.clip(reward, self.min_reward, self.max_reward)

In [3]:
# env = ClipReward(AtariPreprocessing(gym.make("MsPacmanNoFrameskip-v4", render_mode="rgb_array"), terminal_on_life_loss=True), -1, 1) # as recommended by the original paper, should already include max pooling
# env = FrameStack(env, 4)
env = gym.make("CartPole-v1", render_mode="rgb_array")

In [4]:
seed = 777

def seed_torch(seed):
    torch.manual_seed(seed)
    if torch.backends.cudnn.enabled:
        torch.cuda.manual_seed(seed)
        torch.backends.cudnn.benchmark = False
        torch.backends.cudnn.deterministic = True

np.random.seed(seed)
random.seed(seed)
seed_torch(seed)


In [5]:
config_dict = {
  "dense_layers_widths": [128],
  "value_hidden_layers_widths": [128],
  "advatage_hidden_layers_widths": [128],
  "adam_epsilon": 1e-8,
  "learning_rate": 0.001,
  "training_steps": 10000,
  "per_epsilon": 1e-6,
  "per_alpha": 0.2,
  "per_beta": 0.6,
  "minibatch_size": 128,
  "transfer_interval": 100,
  "n_step": 3,
  "noisy_sigma": 0.5,
  "replay_interval": 1
}

In [6]:
game_config = CartPoleConfig()
config = RainbowConfig(config_dict, game_config)

Using default save_intermediate_weights     : True
Using         training_steps                : 10000
Using         adam_epsilon                  : 1e-08
Using         learning_rate                 : 0.001
Using         clipnorm                      : None
Using default optimizer                     : <class 'torch.optim.adam.Adam'>
Using         loss_function                 : None
Using default activation                    : relu
Using         kernel_initializer            : None
Using         minibatch_size                : 128
Using default replay_buffer_size            : 5000
Using default min_replay_buffer_size        : 128
Using default num_minibatches               : 1
Using default training_iterations           : 1
Using default conv_layers                   : []
Using         dense_layers_widths           : [128]
Using         value_hidden_layers_widths    : [128]
Using default advantage_hidden_layers_widths: []
Using         noisy_sigma                   : 0.5
Using defaul

In [7]:
agent = RainbowAgent(env, config, name="test")

for param in agent.model.parameters():
  print(param)

observation_dimensions:  (4,)
num_actions:  2


  logger.warn(
  from .autonotebook import tqdm as notebook_tqdm


Parameter containing:
tensor([[-0.4181, -0.0089, -0.0967, -0.1141],
        [ 0.3813,  0.3811,  0.2242,  0.0033],
        [ 0.3249, -0.2366, -0.1888,  0.0948],
        [-0.3908,  0.1213,  0.2350,  0.4898],
        [ 0.4165, -0.1754,  0.3715,  0.3875],
        [ 0.4036, -0.2796, -0.1004, -0.2671],
        [ 0.4411,  0.4180,  0.3952, -0.0659],
        [-0.3477, -0.1846, -0.1229,  0.1166],
        [ 0.0355,  0.4970, -0.1223,  0.4541],
        [ 0.3282,  0.1459,  0.0544, -0.1297],
        [-0.4304, -0.1130,  0.2293,  0.4067],
        [-0.4941, -0.4749, -0.0504, -0.2852],
        [-0.1068,  0.0132,  0.4990, -0.3497],
        [-0.1813,  0.4039,  0.2984,  0.3534],
        [-0.4378,  0.1526,  0.4995,  0.0429],
        [ 0.1951, -0.4023, -0.1846, -0.3288],
        [ 0.2349,  0.4384,  0.1960, -0.4188],
        [-0.1492,  0.0885, -0.0926, -0.2365],
        [-0.3311, -0.4584, -0.1535,  0.4688],
        [-0.4053, -0.2498, -0.0500, -0.2375],
        [ 0.2434, -0.4209,  0.1780,  0.3752],
        [ 0.

In [8]:
print("start")
agent.train()

start
[{'score': 9.0, 'target_model_updated': True}, {'score': 10.0, 'target_model_updated': False}, {'score': 11.0, 'target_model_updated': False}, {'score': 43.0, 'target_model_updated': False}, {'score': 42.0, 'target_model_updated': True}, {'score': 65.0, 'target_model_updated': False}, {'score': 112.0, 'target_model_updated': True}]
score:  33.0
score:  26.0
score:  41.0
score:  40.0
Moviepy - Building video checkpoints/test/step_333/videos/test/333/test-episode-4.mp4.
Moviepy - Writing video checkpoints/test/step_333/videos/test/333/test-episode-4.mp4



  axs[row][col].set_xlim(1, len(values))


Moviepy - Done !
Moviepy - video ready checkpoints/test/step_333/videos/test/333/test-episode-4.mp4
score:  38.0
[{'score': 9.0, 'target_model_updated': True}, {'score': 10.0, 'target_model_updated': False}, {'score': 11.0, 'target_model_updated': False}, {'score': 43.0, 'target_model_updated': False}, {'score': 42.0, 'target_model_updated': True}, {'score': 65.0, 'target_model_updated': False}, {'score': 112.0, 'target_model_updated': True}, {'score': 57.0, 'target_model_updated': True}, {'score': 84.0, 'target_model_updated': True}, {'score': 71.0, 'target_model_updated': True}, {'score': 46.0, 'target_model_updated': False}, {'score': 48.0, 'target_model_updated': False}, {'score': 37.0, 'target_model_updated': True}]
score:  48.0
score:  54.0
score:  53.0
score:  54.0
Moviepy - Building video checkpoints/test/step_666/videos/test/666/test-episode-9.mp4.
Moviepy - Writing video checkpoints/test/step_666/videos/test/666/test-episode-9.mp4



                                                             

Moviepy - Done !
Moviepy - video ready checkpoints/test/step_666/videos/test/666/test-episode-9.mp4
score:  43.0
[{'score': 9.0, 'target_model_updated': True}, {'score': 10.0, 'target_model_updated': False}, {'score': 11.0, 'target_model_updated': False}, {'score': 43.0, 'target_model_updated': False}, {'score': 42.0, 'target_model_updated': True}, {'score': 65.0, 'target_model_updated': False}, {'score': 112.0, 'target_model_updated': True}, {'score': 57.0, 'target_model_updated': True}, {'score': 84.0, 'target_model_updated': True}, {'score': 71.0, 'target_model_updated': True}, {'score': 46.0, 'target_model_updated': False}, {'score': 48.0, 'target_model_updated': False}, {'score': 37.0, 'target_model_updated': True}, {'score': 74.0, 'target_model_updated': True}, {'score': 199.0, 'target_model_updated': True}, {'score': 62.0, 'target_model_updated': False}]
score:  91.0
score:  437.0
score:  103.0
score:  112.0
Moviepy - Building video checkpoints/test/step_999/videos/test/999/test

                                                             

Moviepy - Done !
Moviepy - video ready checkpoints/test/step_999/videos/test/999/test-episode-14.mp4
score:  91.0
[{'score': 9.0, 'target_model_updated': True}, {'score': 10.0, 'target_model_updated': False}, {'score': 11.0, 'target_model_updated': False}, {'score': 43.0, 'target_model_updated': False}, {'score': 42.0, 'target_model_updated': True}, {'score': 65.0, 'target_model_updated': False}, {'score': 112.0, 'target_model_updated': True}, {'score': 57.0, 'target_model_updated': True}, {'score': 84.0, 'target_model_updated': True}, {'score': 71.0, 'target_model_updated': True}, {'score': 46.0, 'target_model_updated': False}, {'score': 48.0, 'target_model_updated': False}, {'score': 37.0, 'target_model_updated': True}, {'score': 74.0, 'target_model_updated': True}, {'score': 199.0, 'target_model_updated': True}, {'score': 62.0, 'target_model_updated': False}, {'score': 98.0, 'target_model_updated': True}, {'score': 105.0, 'target_model_updated': True}, {'score': 53.0, 'target_model_

                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/test/step_1332/videos/test/1332/test-episode-19.mp4
score:  292.0
[{'score': 9.0, 'target_model_updated': True}, {'score': 10.0, 'target_model_updated': False}, {'score': 11.0, 'target_model_updated': False}, {'score': 43.0, 'target_model_updated': False}, {'score': 42.0, 'target_model_updated': True}, {'score': 65.0, 'target_model_updated': False}, {'score': 112.0, 'target_model_updated': True}, {'score': 57.0, 'target_model_updated': True}, {'score': 84.0, 'target_model_updated': True}, {'score': 71.0, 'target_model_updated': True}, {'score': 46.0, 'target_model_updated': False}, {'score': 48.0, 'target_model_updated': False}, {'score': 37.0, 'target_model_updated': True}, {'score': 74.0, 'target_model_updated': True}, {'score': 199.0, 'target_model_updated': True}, {'score': 62.0, 'target_model_updated': False}, {'score': 98.0, 'target_model_updated': True}, {'score': 105.0, 'target_model_updated': True}, {'score': 53.0, 'target_mod

                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/test/step_1665/videos/test/1665/test-episode-24.mp4
score:  181.0
[{'score': 9.0, 'target_model_updated': True}, {'score': 10.0, 'target_model_updated': False}, {'score': 11.0, 'target_model_updated': False}, {'score': 43.0, 'target_model_updated': False}, {'score': 42.0, 'target_model_updated': True}, {'score': 65.0, 'target_model_updated': False}, {'score': 112.0, 'target_model_updated': True}, {'score': 57.0, 'target_model_updated': True}, {'score': 84.0, 'target_model_updated': True}, {'score': 71.0, 'target_model_updated': True}, {'score': 46.0, 'target_model_updated': False}, {'score': 48.0, 'target_model_updated': False}, {'score': 37.0, 'target_model_updated': True}, {'score': 74.0, 'target_model_updated': True}, {'score': 199.0, 'target_model_updated': True}, {'score': 62.0, 'target_model_updated': False}, {'score': 98.0, 'target_model_updated': True}, {'score': 105.0, 'target_model_updated': True}, {'score': 53.0, 'target_mod

                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/test/step_1998/videos/test/1998/test-episode-29.mp4
score:  500.0
[{'score': 9.0, 'target_model_updated': True}, {'score': 10.0, 'target_model_updated': False}, {'score': 11.0, 'target_model_updated': False}, {'score': 43.0, 'target_model_updated': False}, {'score': 42.0, 'target_model_updated': True}, {'score': 65.0, 'target_model_updated': False}, {'score': 112.0, 'target_model_updated': True}, {'score': 57.0, 'target_model_updated': True}, {'score': 84.0, 'target_model_updated': True}, {'score': 71.0, 'target_model_updated': True}, {'score': 46.0, 'target_model_updated': False}, {'score': 48.0, 'target_model_updated': False}, {'score': 37.0, 'target_model_updated': True}, {'score': 74.0, 'target_model_updated': True}, {'score': 199.0, 'target_model_updated': True}, {'score': 62.0, 'target_model_updated': False}, {'score': 98.0, 'target_model_updated': True}, {'score': 105.0, 'target_model_updated': True}, {'score': 53.0, 'target_mod

                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/test/step_2331/videos/test/2331/test-episode-34.mp4
score:  378.0
[{'score': 9.0, 'target_model_updated': True}, {'score': 10.0, 'target_model_updated': False}, {'score': 11.0, 'target_model_updated': False}, {'score': 43.0, 'target_model_updated': False}, {'score': 42.0, 'target_model_updated': True}, {'score': 65.0, 'target_model_updated': False}, {'score': 112.0, 'target_model_updated': True}, {'score': 57.0, 'target_model_updated': True}, {'score': 84.0, 'target_model_updated': True}, {'score': 71.0, 'target_model_updated': True}, {'score': 46.0, 'target_model_updated': False}, {'score': 48.0, 'target_model_updated': False}, {'score': 37.0, 'target_model_updated': True}, {'score': 74.0, 'target_model_updated': True}, {'score': 199.0, 'target_model_updated': True}, {'score': 62.0, 'target_model_updated': False}, {'score': 98.0, 'target_model_updated': True}, {'score': 105.0, 'target_model_updated': True}, {'score': 53.0, 'target_mod

                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/test/step_2664/videos/test/2664/test-episode-39.mp4
score:  500.0
[{'score': 9.0, 'target_model_updated': True}, {'score': 10.0, 'target_model_updated': False}, {'score': 11.0, 'target_model_updated': False}, {'score': 43.0, 'target_model_updated': False}, {'score': 42.0, 'target_model_updated': True}, {'score': 65.0, 'target_model_updated': False}, {'score': 112.0, 'target_model_updated': True}, {'score': 57.0, 'target_model_updated': True}, {'score': 84.0, 'target_model_updated': True}, {'score': 71.0, 'target_model_updated': True}, {'score': 46.0, 'target_model_updated': False}, {'score': 48.0, 'target_model_updated': False}, {'score': 37.0, 'target_model_updated': True}, {'score': 74.0, 'target_model_updated': True}, {'score': 199.0, 'target_model_updated': True}, {'score': 62.0, 'target_model_updated': False}, {'score': 98.0, 'target_model_updated': True}, {'score': 105.0, 'target_model_updated': True}, {'score': 53.0, 'target_mod

                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/test/step_2997/videos/test/2997/test-episode-44.mp4
score:  462.0
[{'score': 9.0, 'target_model_updated': True}, {'score': 10.0, 'target_model_updated': False}, {'score': 11.0, 'target_model_updated': False}, {'score': 43.0, 'target_model_updated': False}, {'score': 42.0, 'target_model_updated': True}, {'score': 65.0, 'target_model_updated': False}, {'score': 112.0, 'target_model_updated': True}, {'score': 57.0, 'target_model_updated': True}, {'score': 84.0, 'target_model_updated': True}, {'score': 71.0, 'target_model_updated': True}, {'score': 46.0, 'target_model_updated': False}, {'score': 48.0, 'target_model_updated': False}, {'score': 37.0, 'target_model_updated': True}, {'score': 74.0, 'target_model_updated': True}, {'score': 199.0, 'target_model_updated': True}, {'score': 62.0, 'target_model_updated': False}, {'score': 98.0, 'target_model_updated': True}, {'score': 105.0, 'target_model_updated': True}, {'score': 53.0, 'target_mod

                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/test/step_3330/videos/test/3330/test-episode-49.mp4
score:  353.0
[{'score': 9.0, 'target_model_updated': True}, {'score': 10.0, 'target_model_updated': False}, {'score': 11.0, 'target_model_updated': False}, {'score': 43.0, 'target_model_updated': False}, {'score': 42.0, 'target_model_updated': True}, {'score': 65.0, 'target_model_updated': False}, {'score': 112.0, 'target_model_updated': True}, {'score': 57.0, 'target_model_updated': True}, {'score': 84.0, 'target_model_updated': True}, {'score': 71.0, 'target_model_updated': True}, {'score': 46.0, 'target_model_updated': False}, {'score': 48.0, 'target_model_updated': False}, {'score': 37.0, 'target_model_updated': True}, {'score': 74.0, 'target_model_updated': True}, {'score': 199.0, 'target_model_updated': True}, {'score': 62.0, 'target_model_updated': False}, {'score': 98.0, 'target_model_updated': True}, {'score': 105.0, 'target_model_updated': True}, {'score': 53.0, 'target_mod

                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/test/step_3663/videos/test/3663/test-episode-54.mp4
score:  440.0
[{'score': 9.0, 'target_model_updated': True}, {'score': 10.0, 'target_model_updated': False}, {'score': 11.0, 'target_model_updated': False}, {'score': 43.0, 'target_model_updated': False}, {'score': 42.0, 'target_model_updated': True}, {'score': 65.0, 'target_model_updated': False}, {'score': 112.0, 'target_model_updated': True}, {'score': 57.0, 'target_model_updated': True}, {'score': 84.0, 'target_model_updated': True}, {'score': 71.0, 'target_model_updated': True}, {'score': 46.0, 'target_model_updated': False}, {'score': 48.0, 'target_model_updated': False}, {'score': 37.0, 'target_model_updated': True}, {'score': 74.0, 'target_model_updated': True}, {'score': 199.0, 'target_model_updated': True}, {'score': 62.0, 'target_model_updated': False}, {'score': 98.0, 'target_model_updated': True}, {'score': 105.0, 'target_model_updated': True}, {'score': 53.0, 'target_mod

                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/test/step_3996/videos/test/3996/test-episode-59.mp4
score:  360.0
[{'score': 9.0, 'target_model_updated': True}, {'score': 10.0, 'target_model_updated': False}, {'score': 11.0, 'target_model_updated': False}, {'score': 43.0, 'target_model_updated': False}, {'score': 42.0, 'target_model_updated': True}, {'score': 65.0, 'target_model_updated': False}, {'score': 112.0, 'target_model_updated': True}, {'score': 57.0, 'target_model_updated': True}, {'score': 84.0, 'target_model_updated': True}, {'score': 71.0, 'target_model_updated': True}, {'score': 46.0, 'target_model_updated': False}, {'score': 48.0, 'target_model_updated': False}, {'score': 37.0, 'target_model_updated': True}, {'score': 74.0, 'target_model_updated': True}, {'score': 199.0, 'target_model_updated': True}, {'score': 62.0, 'target_model_updated': False}, {'score': 98.0, 'target_model_updated': True}, {'score': 105.0, 'target_model_updated': True}, {'score': 53.0, 'target_mod

                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/test/step_4329/videos/test/4329/test-episode-64.mp4
score:  384.0
[{'score': 9.0, 'target_model_updated': True}, {'score': 10.0, 'target_model_updated': False}, {'score': 11.0, 'target_model_updated': False}, {'score': 43.0, 'target_model_updated': False}, {'score': 42.0, 'target_model_updated': True}, {'score': 65.0, 'target_model_updated': False}, {'score': 112.0, 'target_model_updated': True}, {'score': 57.0, 'target_model_updated': True}, {'score': 84.0, 'target_model_updated': True}, {'score': 71.0, 'target_model_updated': True}, {'score': 46.0, 'target_model_updated': False}, {'score': 48.0, 'target_model_updated': False}, {'score': 37.0, 'target_model_updated': True}, {'score': 74.0, 'target_model_updated': True}, {'score': 199.0, 'target_model_updated': True}, {'score': 62.0, 'target_model_updated': False}, {'score': 98.0, 'target_model_updated': True}, {'score': 105.0, 'target_model_updated': True}, {'score': 53.0, 'target_mod

                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/test/step_4662/videos/test/4662/test-episode-69.mp4
score:  406.0
[{'score': 9.0, 'target_model_updated': True}, {'score': 10.0, 'target_model_updated': False}, {'score': 11.0, 'target_model_updated': False}, {'score': 43.0, 'target_model_updated': False}, {'score': 42.0, 'target_model_updated': True}, {'score': 65.0, 'target_model_updated': False}, {'score': 112.0, 'target_model_updated': True}, {'score': 57.0, 'target_model_updated': True}, {'score': 84.0, 'target_model_updated': True}, {'score': 71.0, 'target_model_updated': True}, {'score': 46.0, 'target_model_updated': False}, {'score': 48.0, 'target_model_updated': False}, {'score': 37.0, 'target_model_updated': True}, {'score': 74.0, 'target_model_updated': True}, {'score': 199.0, 'target_model_updated': True}, {'score': 62.0, 'target_model_updated': False}, {'score': 98.0, 'target_model_updated': True}, {'score': 105.0, 'target_model_updated': True}, {'score': 53.0, 'target_mod

                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/test/step_4995/videos/test/4995/test-episode-74.mp4
score:  422.0
[{'score': 9.0, 'target_model_updated': True}, {'score': 10.0, 'target_model_updated': False}, {'score': 11.0, 'target_model_updated': False}, {'score': 43.0, 'target_model_updated': False}, {'score': 42.0, 'target_model_updated': True}, {'score': 65.0, 'target_model_updated': False}, {'score': 112.0, 'target_model_updated': True}, {'score': 57.0, 'target_model_updated': True}, {'score': 84.0, 'target_model_updated': True}, {'score': 71.0, 'target_model_updated': True}, {'score': 46.0, 'target_model_updated': False}, {'score': 48.0, 'target_model_updated': False}, {'score': 37.0, 'target_model_updated': True}, {'score': 74.0, 'target_model_updated': True}, {'score': 199.0, 'target_model_updated': True}, {'score': 62.0, 'target_model_updated': False}, {'score': 98.0, 'target_model_updated': True}, {'score': 105.0, 'target_model_updated': True}, {'score': 53.0, 'target_mod

                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/test/step_5328/videos/test/5328/test-episode-79.mp4
score:  373.0
[{'score': 9.0, 'target_model_updated': True}, {'score': 10.0, 'target_model_updated': False}, {'score': 11.0, 'target_model_updated': False}, {'score': 43.0, 'target_model_updated': False}, {'score': 42.0, 'target_model_updated': True}, {'score': 65.0, 'target_model_updated': False}, {'score': 112.0, 'target_model_updated': True}, {'score': 57.0, 'target_model_updated': True}, {'score': 84.0, 'target_model_updated': True}, {'score': 71.0, 'target_model_updated': True}, {'score': 46.0, 'target_model_updated': False}, {'score': 48.0, 'target_model_updated': False}, {'score': 37.0, 'target_model_updated': True}, {'score': 74.0, 'target_model_updated': True}, {'score': 199.0, 'target_model_updated': True}, {'score': 62.0, 'target_model_updated': False}, {'score': 98.0, 'target_model_updated': True}, {'score': 105.0, 'target_model_updated': True}, {'score': 53.0, 'target_mod

                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/test/step_5661/videos/test/5661/test-episode-84.mp4
score:  363.0
[{'score': 9.0, 'target_model_updated': True}, {'score': 10.0, 'target_model_updated': False}, {'score': 11.0, 'target_model_updated': False}, {'score': 43.0, 'target_model_updated': False}, {'score': 42.0, 'target_model_updated': True}, {'score': 65.0, 'target_model_updated': False}, {'score': 112.0, 'target_model_updated': True}, {'score': 57.0, 'target_model_updated': True}, {'score': 84.0, 'target_model_updated': True}, {'score': 71.0, 'target_model_updated': True}, {'score': 46.0, 'target_model_updated': False}, {'score': 48.0, 'target_model_updated': False}, {'score': 37.0, 'target_model_updated': True}, {'score': 74.0, 'target_model_updated': True}, {'score': 199.0, 'target_model_updated': True}, {'score': 62.0, 'target_model_updated': False}, {'score': 98.0, 'target_model_updated': True}, {'score': 105.0, 'target_model_updated': True}, {'score': 53.0, 'target_mod

                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/test/step_5994/videos/test/5994/test-episode-89.mp4
score:  431.0
[{'score': 9.0, 'target_model_updated': True}, {'score': 10.0, 'target_model_updated': False}, {'score': 11.0, 'target_model_updated': False}, {'score': 43.0, 'target_model_updated': False}, {'score': 42.0, 'target_model_updated': True}, {'score': 65.0, 'target_model_updated': False}, {'score': 112.0, 'target_model_updated': True}, {'score': 57.0, 'target_model_updated': True}, {'score': 84.0, 'target_model_updated': True}, {'score': 71.0, 'target_model_updated': True}, {'score': 46.0, 'target_model_updated': False}, {'score': 48.0, 'target_model_updated': False}, {'score': 37.0, 'target_model_updated': True}, {'score': 74.0, 'target_model_updated': True}, {'score': 199.0, 'target_model_updated': True}, {'score': 62.0, 'target_model_updated': False}, {'score': 98.0, 'target_model_updated': True}, {'score': 105.0, 'target_model_updated': True}, {'score': 53.0, 'target_mod

                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/test/step_6327/videos/test/6327/test-episode-94.mp4
score:  500.0
[{'score': 9.0, 'target_model_updated': True}, {'score': 10.0, 'target_model_updated': False}, {'score': 11.0, 'target_model_updated': False}, {'score': 43.0, 'target_model_updated': False}, {'score': 42.0, 'target_model_updated': True}, {'score': 65.0, 'target_model_updated': False}, {'score': 112.0, 'target_model_updated': True}, {'score': 57.0, 'target_model_updated': True}, {'score': 84.0, 'target_model_updated': True}, {'score': 71.0, 'target_model_updated': True}, {'score': 46.0, 'target_model_updated': False}, {'score': 48.0, 'target_model_updated': False}, {'score': 37.0, 'target_model_updated': True}, {'score': 74.0, 'target_model_updated': True}, {'score': 199.0, 'target_model_updated': True}, {'score': 62.0, 'target_model_updated': False}, {'score': 98.0, 'target_model_updated': True}, {'score': 105.0, 'target_model_updated': True}, {'score': 53.0, 'target_mod

                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/test/step_6660/videos/test/6660/test-episode-99.mp4
score:  500.0
[{'score': 9.0, 'target_model_updated': True}, {'score': 10.0, 'target_model_updated': False}, {'score': 11.0, 'target_model_updated': False}, {'score': 43.0, 'target_model_updated': False}, {'score': 42.0, 'target_model_updated': True}, {'score': 65.0, 'target_model_updated': False}, {'score': 112.0, 'target_model_updated': True}, {'score': 57.0, 'target_model_updated': True}, {'score': 84.0, 'target_model_updated': True}, {'score': 71.0, 'target_model_updated': True}, {'score': 46.0, 'target_model_updated': False}, {'score': 48.0, 'target_model_updated': False}, {'score': 37.0, 'target_model_updated': True}, {'score': 74.0, 'target_model_updated': True}, {'score': 199.0, 'target_model_updated': True}, {'score': 62.0, 'target_model_updated': False}, {'score': 98.0, 'target_model_updated': True}, {'score': 105.0, 'target_model_updated': True}, {'score': 53.0, 'target_mod

                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/test/step_6993/videos/test/6993/test-episode-104.mp4
score:  500.0
[{'score': 9.0, 'target_model_updated': True}, {'score': 10.0, 'target_model_updated': False}, {'score': 11.0, 'target_model_updated': False}, {'score': 43.0, 'target_model_updated': False}, {'score': 42.0, 'target_model_updated': True}, {'score': 65.0, 'target_model_updated': False}, {'score': 112.0, 'target_model_updated': True}, {'score': 57.0, 'target_model_updated': True}, {'score': 84.0, 'target_model_updated': True}, {'score': 71.0, 'target_model_updated': True}, {'score': 46.0, 'target_model_updated': False}, {'score': 48.0, 'target_model_updated': False}, {'score': 37.0, 'target_model_updated': True}, {'score': 74.0, 'target_model_updated': True}, {'score': 199.0, 'target_model_updated': True}, {'score': 62.0, 'target_model_updated': False}, {'score': 98.0, 'target_model_updated': True}, {'score': 105.0, 'target_model_updated': True}, {'score': 53.0, 'target_mo

                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/test/step_7326/videos/test/7326/test-episode-109.mp4
score:  500.0
[{'score': 9.0, 'target_model_updated': True}, {'score': 10.0, 'target_model_updated': False}, {'score': 11.0, 'target_model_updated': False}, {'score': 43.0, 'target_model_updated': False}, {'score': 42.0, 'target_model_updated': True}, {'score': 65.0, 'target_model_updated': False}, {'score': 112.0, 'target_model_updated': True}, {'score': 57.0, 'target_model_updated': True}, {'score': 84.0, 'target_model_updated': True}, {'score': 71.0, 'target_model_updated': True}, {'score': 46.0, 'target_model_updated': False}, {'score': 48.0, 'target_model_updated': False}, {'score': 37.0, 'target_model_updated': True}, {'score': 74.0, 'target_model_updated': True}, {'score': 199.0, 'target_model_updated': True}, {'score': 62.0, 'target_model_updated': False}, {'score': 98.0, 'target_model_updated': True}, {'score': 105.0, 'target_model_updated': True}, {'score': 53.0, 'target_mo

                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/test/step_7659/videos/test/7659/test-episode-114.mp4
score:  491.0
[{'score': 9.0, 'target_model_updated': True}, {'score': 10.0, 'target_model_updated': False}, {'score': 11.0, 'target_model_updated': False}, {'score': 43.0, 'target_model_updated': False}, {'score': 42.0, 'target_model_updated': True}, {'score': 65.0, 'target_model_updated': False}, {'score': 112.0, 'target_model_updated': True}, {'score': 57.0, 'target_model_updated': True}, {'score': 84.0, 'target_model_updated': True}, {'score': 71.0, 'target_model_updated': True}, {'score': 46.0, 'target_model_updated': False}, {'score': 48.0, 'target_model_updated': False}, {'score': 37.0, 'target_model_updated': True}, {'score': 74.0, 'target_model_updated': True}, {'score': 199.0, 'target_model_updated': True}, {'score': 62.0, 'target_model_updated': False}, {'score': 98.0, 'target_model_updated': True}, {'score': 105.0, 'target_model_updated': True}, {'score': 53.0, 'target_mo

                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/test/step_7992/videos/test/7992/test-episode-119.mp4
score:  479.0
[{'score': 9.0, 'target_model_updated': True}, {'score': 10.0, 'target_model_updated': False}, {'score': 11.0, 'target_model_updated': False}, {'score': 43.0, 'target_model_updated': False}, {'score': 42.0, 'target_model_updated': True}, {'score': 65.0, 'target_model_updated': False}, {'score': 112.0, 'target_model_updated': True}, {'score': 57.0, 'target_model_updated': True}, {'score': 84.0, 'target_model_updated': True}, {'score': 71.0, 'target_model_updated': True}, {'score': 46.0, 'target_model_updated': False}, {'score': 48.0, 'target_model_updated': False}, {'score': 37.0, 'target_model_updated': True}, {'score': 74.0, 'target_model_updated': True}, {'score': 199.0, 'target_model_updated': True}, {'score': 62.0, 'target_model_updated': False}, {'score': 98.0, 'target_model_updated': True}, {'score': 105.0, 'target_model_updated': True}, {'score': 53.0, 'target_mo

                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/test/step_8325/videos/test/8325/test-episode-124.mp4
score:  500.0
[{'score': 9.0, 'target_model_updated': True}, {'score': 10.0, 'target_model_updated': False}, {'score': 11.0, 'target_model_updated': False}, {'score': 43.0, 'target_model_updated': False}, {'score': 42.0, 'target_model_updated': True}, {'score': 65.0, 'target_model_updated': False}, {'score': 112.0, 'target_model_updated': True}, {'score': 57.0, 'target_model_updated': True}, {'score': 84.0, 'target_model_updated': True}, {'score': 71.0, 'target_model_updated': True}, {'score': 46.0, 'target_model_updated': False}, {'score': 48.0, 'target_model_updated': False}, {'score': 37.0, 'target_model_updated': True}, {'score': 74.0, 'target_model_updated': True}, {'score': 199.0, 'target_model_updated': True}, {'score': 62.0, 'target_model_updated': False}, {'score': 98.0, 'target_model_updated': True}, {'score': 105.0, 'target_model_updated': True}, {'score': 53.0, 'target_mo

                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/test/step_8658/videos/test/8658/test-episode-129.mp4
score:  500.0
[{'score': 9.0, 'target_model_updated': True}, {'score': 10.0, 'target_model_updated': False}, {'score': 11.0, 'target_model_updated': False}, {'score': 43.0, 'target_model_updated': False}, {'score': 42.0, 'target_model_updated': True}, {'score': 65.0, 'target_model_updated': False}, {'score': 112.0, 'target_model_updated': True}, {'score': 57.0, 'target_model_updated': True}, {'score': 84.0, 'target_model_updated': True}, {'score': 71.0, 'target_model_updated': True}, {'score': 46.0, 'target_model_updated': False}, {'score': 48.0, 'target_model_updated': False}, {'score': 37.0, 'target_model_updated': True}, {'score': 74.0, 'target_model_updated': True}, {'score': 199.0, 'target_model_updated': True}, {'score': 62.0, 'target_model_updated': False}, {'score': 98.0, 'target_model_updated': True}, {'score': 105.0, 'target_model_updated': True}, {'score': 53.0, 'target_mo

                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/test/step_8991/videos/test/8991/test-episode-134.mp4
score:  500.0
[{'score': 9.0, 'target_model_updated': True}, {'score': 10.0, 'target_model_updated': False}, {'score': 11.0, 'target_model_updated': False}, {'score': 43.0, 'target_model_updated': False}, {'score': 42.0, 'target_model_updated': True}, {'score': 65.0, 'target_model_updated': False}, {'score': 112.0, 'target_model_updated': True}, {'score': 57.0, 'target_model_updated': True}, {'score': 84.0, 'target_model_updated': True}, {'score': 71.0, 'target_model_updated': True}, {'score': 46.0, 'target_model_updated': False}, {'score': 48.0, 'target_model_updated': False}, {'score': 37.0, 'target_model_updated': True}, {'score': 74.0, 'target_model_updated': True}, {'score': 199.0, 'target_model_updated': True}, {'score': 62.0, 'target_model_updated': False}, {'score': 98.0, 'target_model_updated': True}, {'score': 105.0, 'target_model_updated': True}, {'score': 53.0, 'target_mo

                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/test/step_9324/videos/test/9324/test-episode-139.mp4
score:  500.0
[{'score': 9.0, 'target_model_updated': True}, {'score': 10.0, 'target_model_updated': False}, {'score': 11.0, 'target_model_updated': False}, {'score': 43.0, 'target_model_updated': False}, {'score': 42.0, 'target_model_updated': True}, {'score': 65.0, 'target_model_updated': False}, {'score': 112.0, 'target_model_updated': True}, {'score': 57.0, 'target_model_updated': True}, {'score': 84.0, 'target_model_updated': True}, {'score': 71.0, 'target_model_updated': True}, {'score': 46.0, 'target_model_updated': False}, {'score': 48.0, 'target_model_updated': False}, {'score': 37.0, 'target_model_updated': True}, {'score': 74.0, 'target_model_updated': True}, {'score': 199.0, 'target_model_updated': True}, {'score': 62.0, 'target_model_updated': False}, {'score': 98.0, 'target_model_updated': True}, {'score': 105.0, 'target_model_updated': True}, {'score': 53.0, 'target_mo

                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/test/step_9657/videos/test/9657/test-episode-144.mp4
score:  500.0
[{'score': 9.0, 'target_model_updated': True}, {'score': 10.0, 'target_model_updated': False}, {'score': 11.0, 'target_model_updated': False}, {'score': 43.0, 'target_model_updated': False}, {'score': 42.0, 'target_model_updated': True}, {'score': 65.0, 'target_model_updated': False}, {'score': 112.0, 'target_model_updated': True}, {'score': 57.0, 'target_model_updated': True}, {'score': 84.0, 'target_model_updated': True}, {'score': 71.0, 'target_model_updated': True}, {'score': 46.0, 'target_model_updated': False}, {'score': 48.0, 'target_model_updated': False}, {'score': 37.0, 'target_model_updated': True}, {'score': 74.0, 'target_model_updated': True}, {'score': 199.0, 'target_model_updated': True}, {'score': 62.0, 'target_model_updated': False}, {'score': 98.0, 'target_model_updated': True}, {'score': 105.0, 'target_model_updated': True}, {'score': 53.0, 'target_mo

                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/test/step_9990/videos/test/9990/test-episode-149.mp4
score:  447.0
score:  500.0
score:  500.0
score:  500.0
score:  500.0
Moviepy - Building video checkpoints/test/step_9999/videos/test/9999/test-episode-154.mp4.
Moviepy - Writing video checkpoints/test/step_9999/videos/test/9999/test-episode-154.mp4



                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/test/step_9999/videos/test/9999/test-episode-154.mp4
score:  500.0
