In [2]:
import sys
import time
from typing import Any, Iterable

import gymnasium as gym
import numpy as np
import torch
from torch import optim

from src.datetime import get_current_timestamp
from src.model_db.tiny_model_db import TinyModelDB
from src.module_analysis import count_parameters
from src.moving_averages import ExponentialMovingAverage
from src.np_functions import softmax
from src.reinforcement_learning.algorithms.policy_mitosis.async_policy_mitosis import AsyncPolicyMitosis
from src.reinforcement_learning.algorithms.policy_mitosis.mitosis_policy_info import MitosisPolicyInfo
from src.reinforcement_learning.algorithms.policy_mitosis.policy_mitosis_base import PolicyWithEnvAndInfo, TrainInfo
from src.reinforcement_learning.algorithms.ppo.ppo import PPOLoggingConfig, PPO
from src.reinforcement_learning.core.callback import Callback
from src.reinforcement_learning.core.generalized_advantage_estimate import compute_gae_and_returns
from src.reinforcement_learning.core.normalization import NormalizationType
from src.reinforcement_learning.core.objectives import ObjectiveLoggingConfig
from src.reinforcement_learning.gym.parallelize_env import parallelize_env_async
from src.stopwatch import Stopwatch
from src.torch_device import get_torch_device
from src.torch_functions import antisymmetric_power

%load_ext autoreload
%autoreload 2

In [3]:


nr_carts = 6

def make_multi_agent_cart_pole_env(render_mode: str | None = None):
    from src.reinforcement_learning.gym.envs.multi_agent_cartpole3d import MultiAgentCartPole3D
    return MultiAgentCartPole3D(
        nr_carts=nr_carts,
        cart_size=0.25,
        force_magnitude=500,
        physics_steps_per_step=10,
        reset_position_radius=0.75,
        reset_randomize_position_angle_offset=True,
        reset_position_randomization_magnitude=0.1,
        reset_hinge_randomization_magnitude=0.05,
        slide_range=2,
        hinge_range=0.8,
        time_limit=60.0,
        step_reward_function=lambda time_, action, state, prev_state: 0.01 ,
        out_ouf_range_reward_function=lambda time_, action, state: 0.0,# -10 + time_ * 3,
        time_limit_reward_function=lambda time_, action, state: 10,
        render_mode=render_mode,
    )

In [None]:



def init_policy():
    import numpy as np
    import torch
    from torch import nn

    from src.networks.core.net import Net
    from src.networks.core.seq_net import SeqNet
    from src.reinforcement_learning.core.action_selectors.squashed_diag_gaussian_action_selector import \
        SquashedDiagGaussianActionSelector
    from src.reinforcement_learning.core.policies.actor_critic_policy import ActorCriticPolicy
    from src.networks.skip_nets.additive_skip_connection import AdditiveSkipConnection
    from src.weight_initialization import orthogonal_initialization
    from src.networks.multihead_self_attention import MultiheadSelfAttention
    
    in_size = 8
    action_size = 2
    
    actor_layers = 3
    actor_features = 48
    
    critic_layers = 2
    critic_features = 48

    actor_hidden_activation_function = nn.ELU
    critic_hidden_activation_function = nn.ELU
    
    actor_hidden_initialization = lambda module: orthogonal_initialization(module, gain=np.sqrt(2))
    critic_hidden_initialization = lambda module: orthogonal_initialization(module, gain=np.sqrt(2))

    class A2CNetwork(nn.Module):

        def __init__(self):
            super().__init__()

            self.actor_embedding = nn.Sequential(nn.Linear(in_size, actor_features), actor_hidden_activation_function())
            self.actor = SeqNet.from_layer_provider(
                layer_provider=lambda layer_nr, is_last_layer, in_features, out_features: nn.Sequential(
                    AdditiveSkipConnection(MultiheadSelfAttention(
                        embed_dim=in_features,
                        num_heads=4,
                        batch_first=True,
                    )),
                    nn.LayerNorm(in_features),
                    AdditiveSkipConnection(Net.sequential_net(
                        actor_hidden_initialization(nn.Linear(in_features, out_features)),
                        actor_hidden_activation_function(),
                        actor_hidden_initialization(nn.Linear(in_features, out_features)),
                        nn.Tanh() if is_last_layer else actor_hidden_activation_function(),
                    )),
                    nn.LayerNorm(in_features),
                ),
                num_layers=actor_layers,
                num_features=actor_features,
            )

            self.critic_embedding = nn.Sequential(nn.Linear(in_size, critic_features), critic_hidden_activation_function())
            self.critic = SeqNet.from_layer_provider(
                layer_provider=lambda layer_nr, is_last_layer, in_features, out_features: nn.Sequential(
                    AdditiveSkipConnection(MultiheadSelfAttention(
                        embed_dim=in_features,
                        num_heads=4,
                        batch_first=True,
                    )),
                    nn.LayerNorm(in_features),
                    AdditiveSkipConnection(Net.sequential_net(
                        critic_hidden_initialization(nn.Linear(in_features, out_features)),
                        critic_hidden_activation_function(),
                        critic_hidden_initialization(nn.Linear(in_features, out_features)),
                        critic_hidden_activation_function(),
                    )),
                    nn.LayerNorm(in_features),
                ),
                num_layers=critic_layers,
                num_features=critic_features,
            )
            self.critic_regressor = nn.Linear(critic_features, 1)

        def forward(self, x: torch.Tensor):
            *batch_shape, nr_actors, nr_features = x.shape
            x = torch.flatten(x, end_dim=-3)
            
            actor_out: torch.Tensor = self.actor(self.actor_embedding(x))
            critic_out: torch.Tensor = self.critic_regressor(self.critic(self.critic_embedding(x)).sum(dim=-2))
            
            actor_out = actor_out.unflatten(dim=0, sizes=batch_shape)
            critic_out = critic_out.unflatten(dim=0, sizes=batch_shape)
            
            return actor_out, critic_out
        
    return ActorCriticPolicy(A2CNetwork(), SquashedDiagGaussianActionSelector(
        latent_dim=actor_features,
        action_dim=action_size,
        std=0.15,
        std_learnable=False,
        action_net_initialization=lambda module: orthogonal_initialization(module, gain=0.01),
    ))

def wrap_env(env_):
    return env_

def train_func(policy_with_env_and_info: PolicyWithEnvAndInfo) -> TrainInfo:
    policy = policy_with_env_and_info['policy']
    env = policy_with_env_and_info['env']
    
    score = 0.0
    score_ema = ExponentialMovingAverage(0.45)
    rollout_stopwatch = Stopwatch()
    def on_rollout_done(rl: PPO, step: int, info: dict[str, Any], scheduler_values: dict[str, Any]):   
        
        if 'raw_rewards' in info['rollout']:
            raw_rewards = info['rollout']['raw_rewards']
            _, gamma_1_returns = compute_gae_and_returns(
                value_estimates=np.zeros_like(rl.buffer.rewards[:len(raw_rewards)]),
                rewards=raw_rewards,
                episode_starts=rl.buffer.episode_starts[:len(raw_rewards)],
                last_values=np.zeros_like(rl.buffer.rewards[0], dtype=float),
                last_dones=np.zeros_like(rl.buffer.episode_starts[0], dtype=bool),
                gamma=1.0,
                gae_lambda=1.0,
                normalize_rewards=None,
                normalize_advantages=None,
            )
        else:
            _, gamma_1_returns = rl.buffer.compute_gae_and_returns(
                last_values=torch.zeros_like(rl.buffer.value_estimates[0]),
                last_dones=np.zeros_like(rl.buffer.episode_starts[0], dtype=bool),
                gamma=1.0,
                gae_lambda=1.0,
                normalize_advantages=None,
                normalize_rewards=None,
            )
        
        episode_scores = gamma_1_returns[
            rl.buffer.episode_starts[:rl.buffer.pos]
        ]
        
        nonlocal score, score_ema
        score = episode_scores.mean()
        
        
        
        current_score_ema = score_ema.update(score)
        
        rollout_time = rollout_stopwatch.reset()
        
        resets: np.ndarray = rl.buffer.episode_starts.astype(int).sum(axis=0)
        resets_mean = resets.mean()
        resets_min = resets.min()
        print(f'{step:>6}: '
              f'{score = :9.3f}, '
              f'score_ema = {current_score_ema:9.3f}, '
              f'time = {rollout_time:5.2f}, '
              f'resets = {resets_mean:5.2f} >= {resets_min:5.2f}')
        sys.stdout.flush()
        
    optimizations_done = 0
    def on_optimization_done(rl: PPO, step: int, info: dict[str, Any], scheduler_values: dict[str, Any]):
        nonlocal optimizations_done
        optimizations_done += 1
    
    policy_info = policy_with_env_and_info['policy_info']
    policy_info_str = ('('
          f'policy_id = {policy_info["policy_id"]}, '
          f'parent_id = {policy_info["parent_policy_id"]}, '
          f'num_parameters = {count_parameters(policy)}, '
          f'previous_steps = {policy_info["steps_trained"]}, '
          f'previous_score = {policy_info["score"]:9.3f}'
          ')')
    
    print(f'Starting PPO with policy {policy_info_str:s} for {steps_per_iteration:_} steps')
    mitosis_iteration_stopwatch = Stopwatch()
    PPO(
        env=env,
        policy=policy.to(device),
        policy_optimizer=lambda pol: optim.AdamW(pol.parameters(), lr=1e-5),
        buffer_size=5000,
        gamma=0.995,
        gae_lambda=1.0,
        normalize_rewards=None,
        normalize_advantages=NormalizationType.Std,
        reduce_actor_objective=lambda obj: antisymmetric_power(obj, 1.5).mean(),
        weigh_actor_objective=lambda obj: 1.0 * obj,
        weigh_entropy_objective=None,
        weigh_critic_objective=lambda obj: 0.5 * obj,
        ppo_max_epochs=10,
        ppo_kl_target=0.025,
        ppo_batch_size=500,
        action_ratio_clip_range=0.1,
        grad_norm_clip_value=1.0,
        callback=Callback(
            on_rollout_done=on_rollout_done,
            on_optimization_done=on_optimization_done,
        ),
        logging_config=PPOLoggingConfig(log_rollout_infos=True),
            torch_device=device,
        ).train(steps_per_iteration)
    
    
    print(f'Training finished for policy {policy_info_str:s}, end score = {score:9.3f}, time = {mitosis_iteration_stopwatch.time_passed():6.2f}')
    
    return {
        'steps_trained': steps_per_iteration, 
        'optimizations_done': optimizations_done, 
        'score': score_ema.get(),
    }

def select_policy_selection_probs(policy_infos: Iterable[MitosisPolicyInfo]) -> np.ndarray:
    scores = np.nan_to_num(np.array([policy_info['score'] for policy_info in policy_infos]), nan=0.0)
    scores = scores / scores.mean()
    scores = softmax(scores, temperature=2.5 / len(scores)**1.0)
    return scores

device = get_torch_device("cuda:0") if True else get_torch_device('cpu')
print(f'using device {device}')

steps_per_iteration = 50_000

num_envs = 16

# mitosis_id = get_current_timestamp()
mitosis_id = '2024-05-28_20.00.00'
policy_db = TinyModelDB[MitosisPolicyInfo](base_path=f'E:/saved_models/rl/MultiAgentCartPole/{nr_carts}/mitosis-{mitosis_id}')
# policy_db = TinyModelDB[PolicyInfo](base_path=f'C:/Users/domin/git/pytorch-starter/saved_models/rl/{env_name}/mitosis-{mitosis_id}')

try:
    print(f'Starting {nr_carts} agent cartpole mitosis with id {mitosis_id}')
    AsyncPolicyMitosis(
        num_workers=3,
        policy_db=policy_db,
        train_policy_function=train_func,
        create_env=lambda: parallelize_env_async(lambda: make_multi_agent_cart_pole_env(None), num_envs),
        new_init_policy_function=init_policy,
        new_wrap_env_function=wrap_env,
        select_policy_selection_probs=select_policy_selection_probs,
        min_base_ancestors=5,
        rng_seed=None,
        initialization_delay=15,
        delay_between_workers=15,
    ).train_with_mitosis(1000)
except KeyboardInterrupt:
    print('keyboard interrupt')
finally:
    print('closing envs')
    time.sleep(2.5)
    print('envs closed')
    policy_db.close()
    print('model db closed')
    

print('done')

using device cuda:0
Starting 6 agent cartpole mitosis with id 2024-05-28_20.00.00
Starting worker 0 with delay = 0
policy selection probs = 
	2024-05-28_23.10.15~6QVFyI: p = 0.111459, scores =   0.028, steps = 500000
	2024-05-28_23.17.44~QlbUFa: p = 0.177386, scores =   0.034, steps = 400000
	2024-05-28_23.18.28~rlRteB: p = 0.181181, scores =   0.034, steps = 550000
	2024-05-28_23.25.50~AZeaFt: p = 0.200964, scores =   0.036, steps = 500000
	2024-05-28_23.33.35~CUWu2J: p = 0.195758, scores =   0.035, steps = 600000
	2024-05-28_23.41.43~p5yas3: p = 0.133253, scores =   0.030, steps = 450000
Started training iteration for policy: 2024-05-29_16.46.30~2Tzt96, parent policy id: 2024-05-28_23.25.50~AZeaFt
Starting worker 1 with delay = 15
policy selection probs = 
	2024-05-28_23.10.15~6QVFyI: p = 0.111459, scores =   0.028, steps = 500000
	2024-05-28_23.17.44~QlbUFa: p = 0.177386, scores =   0.034, steps = 400000
	2024-05-28_23.18.28~rlRteB: p = 0.181181, scores =   0.034, steps = 550000
	20

In [4]:
def record_video():
    import torch
    from tqdm import tqdm
    from src.reinforcement_learning.gym.singleton_vector_env import as_vec_env
    from gymnasium.wrappers import AutoResetWrapper, RecordVideo
    from src.reinforcement_learning.gym.env_wrapping import wrap_env_using_source
    from src.reinforcement_learning.core.policies.policy_initialization import init_policy_using_source
    
    record_env = make_multi_agent_cart_pole_env(render_mode='rgb_array')
    
    policy_db = TinyModelDB[MitosisPolicyInfo](base_path=f'E:/saved_models/rl/MultiAgentCartPole/6/mitosis-2024-05-28_20.00.00')
    print(policy_db)
    
    # policy_entry = max(policy_db.all_entries(), key=lambda entry: entry['model_info']['score'])
    policy_entry = policy_db.fetch_entry('2024-05-29_17.05.37~nRK7nR')
    policy_info : MitosisPolicyInfo = policy_entry['model_info']
    print(policy_entry)

    policy = init_policy_using_source(policy_info['init_policy_source_code'])

    policy_db.load_model_state_dict(policy, policy_entry['model_id'])

    record_env = wrap_env_using_source(record_env, policy_info['wrap_env_source_code'])
    
    try:
        record_env.metadata['render_fps'] = 500 / record_env.physics_steps_per_step
        record_env = AutoResetWrapper(
            RecordVideo(record_env, video_folder=rf'C:\Users\domin\Videos\rl\{get_current_timestamp()}', episode_trigger=lambda ep_nr: True)
        )
        record_env, _ = as_vec_env(record_env)
        # Todo: wrap env
        
        policy.reset_sde_noise(1)
        
        def record(max_steps: int):
            with torch.no_grad():
                obs, info = record_env.reset()
                for step in tqdm(range(max_steps)):
                    actions_dist, _ = policy.process_obs(torch.tensor(obs, device='cpu'))
                    actions = actions_dist.get_actions(deterministic=True).cpu().numpy()
                    obs, reward, terminated, truncated, info = record_env.step(actions)
        
        record(10_000)
    except KeyboardInterrupt:
        print('keyboard interrupt')
    finally:
        print('closing record_env')
        record_env.close()
        print('record_env closed')

record_video()

TinyModelDB(self.base_path = 'E:/saved_models/rl/MultiAgentCartPole/6/mitosis-2024-05-28_20.00.00', self.db_file_name = '_model_db.json')
{'model_id': '2024-05-29_17.05.37~nRK7nR', 'parent_model_id': '2024-05-29_16.56.03~kmTuGa', 'model_info': {'policy_id': '2024-05-29_17.05.37~nRK7nR', 'parent_policy_id': '2024-05-29_16.56.03~kmTuGa', 'score': 4.981492179152891, 'steps_trained': 650000, 'env_steps_trained': 13600000, 'init_policy_source_code': 'def init_policy():\n    import numpy as np\n    import torch\n    from torch import nn\n\n    from src.networks.core.net import Net\n    from src.networks.core.seq_net import SeqNet\n    from src.reinforcement_learning.core.action_selectors.squashed_diag_gaussian_action_selector import \\\n        SquashedDiagGaussianActionSelector\n    from src.reinforcement_learning.core.policies.actor_critic_policy import ActorCriticPolicy\n    from src.networks.skip_nets.additive_skip_connection import AdditiveSkipConnection\n    from src.weight_initializat

  1%|          | 63/10000 [00:01<01:04, 154.88it/s]

Moviepy - Building video C:\Users\domin\Videos\rl\2024-05-29_17.17.36\rl-video-episode-0.mp4.
Moviepy - Writing video C:\Users\domin\Videos\rl\2024-05-29_17.17.36\rl-video-episode-0.mp4



t:   0%|          | 0/68 [00:00<?, ?it/s, now=None][A
t:  21%|██        | 14/68 [00:00<00:00, 137.22it/s, now=None][A
t:  54%|█████▍    | 37/68 [00:00<00:00, 188.03it/s, now=None][A
t:  85%|████████▌ | 58/68 [00:00<00:00, 194.82it/s, now=None][A
  1%|          | 97/10000 [00:01<03:46, 43.80it/s]           [A

Moviepy - Done !
Moviepy - video ready C:\Users\domin\Videos\rl\2024-05-29_17.17.36\rl-video-episode-0.mp4


 11%|█         | 1063/10000 [00:09<01:12, 122.79it/s]

Moviepy - Building video C:\Users\domin\Videos\rl\2024-05-29_17.17.36\rl-video-episode-1.mp4.
Moviepy - Writing video C:\Users\domin\Videos\rl\2024-05-29_17.17.36\rl-video-episode-1.mp4



t:   0%|          | 0/1012 [00:00<?, ?it/s, now=None][A
t:   1%|          | 12/1012 [00:00<00:08, 117.62it/s, now=None][A
t:   3%|▎         | 32/1012 [00:00<00:06, 162.81it/s, now=None][A
t:   5%|▌         | 54/1012 [00:00<00:05, 186.86it/s, now=None][A
t:   8%|▊         | 76/1012 [00:00<00:04, 198.97it/s, now=None][A
t:  10%|▉         | 98/1012 [00:00<00:04, 206.45it/s, now=None][A
t:  12%|█▏        | 120/1012 [00:00<00:04, 209.59it/s, now=None][A
t:  14%|█▍        | 142/1012 [00:00<00:04, 212.59it/s, now=None][A
t:  16%|█▌        | 164/1012 [00:00<00:03, 212.90it/s, now=None][A
t:  18%|█▊        | 186/1012 [00:00<00:03, 214.75it/s, now=None][A
t:  21%|██        | 208/1012 [00:01<00:03, 213.72it/s, now=None][A
t:  23%|██▎       | 230/1012 [00:01<00:03, 214.31it/s, now=None][A
t:  25%|██▍       | 252/1012 [00:01<00:03, 214.71it/s, now=None][A
t:  27%|██▋       | 274/1012 [00:01<00:03, 214.36it/s, now=None][A
t:  29%|██▉       | 296/1012 [00:01<00:03, 216.02it/s, now=None

Moviepy - Done !
Moviepy - video ready C:\Users\domin\Videos\rl\2024-05-29_17.17.36\rl-video-episode-1.mp4


 25%|██▌       | 2531/10000 [00:25<01:03, 118.40it/s]

Moviepy - Building video C:\Users\domin\Videos\rl\2024-05-29_17.17.36\rl-video-episode-2.mp4.
Moviepy - Writing video C:\Users\domin\Videos\rl\2024-05-29_17.17.36\rl-video-episode-2.mp4



t:   0%|          | 0/1456 [00:00<?, ?it/s, now=None][A
t:   1%|          | 12/1456 [00:00<00:12, 117.62it/s, now=None][A
t:   2%|▏         | 30/1456 [00:00<00:09, 152.22it/s, now=None][A
t:   3%|▎         | 50/1456 [00:00<00:08, 173.00it/s, now=None][A
t:   5%|▍         | 71/1456 [00:00<00:07, 186.00it/s, now=None][A
t:   6%|▋         | 93/1456 [00:00<00:06, 197.40it/s, now=None][A
t:   8%|▊         | 115/1456 [00:00<00:06, 202.95it/s, now=None][A
t:   9%|▉         | 137/1456 [00:00<00:06, 207.76it/s, now=None][A
t:  11%|█         | 159/1456 [00:00<00:06, 209.62it/s, now=None][A
t:  12%|█▏        | 181/1456 [00:00<00:06, 210.21it/s, now=None][A
t:  14%|█▍        | 203/1456 [00:01<00:05, 212.52it/s, now=None][A
t:  15%|█▌        | 225/1456 [00:01<00:05, 214.11it/s, now=None][A
t:  17%|█▋        | 247/1456 [00:01<00:05, 214.58it/s, now=None][A
t:  18%|█▊        | 269/1456 [00:01<00:05, 214.26it/s, now=None][A
t:  20%|█▉        | 291/1456 [00:01<00:05, 213.73it/s, now=None

Moviepy - Done !
Moviepy - video ready C:\Users\domin\Videos\rl\2024-05-29_17.17.36\rl-video-episode-2.mp4


 34%|███▎      | 3352/10000 [00:38<00:55, 119.50it/s]

Moviepy - Building video C:\Users\domin\Videos\rl\2024-05-29_17.17.36\rl-video-episode-3.mp4.
Moviepy - Writing video C:\Users\domin\Videos\rl\2024-05-29_17.17.36\rl-video-episode-3.mp4



t:   0%|          | 0/829 [00:00<?, ?it/s, now=None][A
t:   2%|▏         | 13/829 [00:00<00:06, 127.42it/s, now=None][A
t:   4%|▍         | 35/829 [00:00<00:04, 177.27it/s, now=None][A
t:   7%|▋         | 57/829 [00:00<00:04, 192.96it/s, now=None][A
t:  10%|▉         | 79/829 [00:00<00:03, 202.61it/s, now=None][A
t:  12%|█▏        | 102/829 [00:00<00:03, 209.34it/s, now=None][A
t:  15%|█▌        | 125/829 [00:00<00:03, 214.28it/s, now=None][A
t:  18%|█▊        | 147/829 [00:00<00:03, 214.72it/s, now=None][A
t:  21%|██        | 170/829 [00:00<00:03, 217.44it/s, now=None][A
t:  23%|██▎       | 192/829 [00:00<00:02, 216.87it/s, now=None][A
t:  26%|██▌       | 214/829 [00:01<00:02, 217.15it/s, now=None][A
t:  28%|██▊       | 236/829 [00:01<00:02, 217.34it/s, now=None][A
t:  31%|███       | 258/829 [00:01<00:02, 217.47it/s, now=None][A
t:  34%|███▍      | 281/829 [00:01<00:02, 218.58it/s, now=None][A
t:  37%|███▋      | 303/829 [00:01<00:02, 217.70it/s, now=None][A
t:  39%|█

Moviepy - Done !
Moviepy - video ready C:\Users\domin\Videos\rl\2024-05-29_17.17.36\rl-video-episode-3.mp4


 41%|████      | 4087/10000 [00:48<00:40, 146.53it/s]

Moviepy - Building video C:\Users\domin\Videos\rl\2024-05-29_17.17.36\rl-video-episode-4.mp4.
Moviepy - Writing video C:\Users\domin\Videos\rl\2024-05-29_17.17.36\rl-video-episode-4.mp4



t:   0%|          | 0/736 [00:00<?, ?it/s, now=None][A
t:   2%|▏         | 13/736 [00:00<00:05, 126.19it/s, now=None][A
t:   4%|▍         | 31/736 [00:00<00:04, 156.52it/s, now=None][A
t:   7%|▋         | 51/736 [00:00<00:03, 176.16it/s, now=None][A
t:  10%|▉         | 73/736 [00:00<00:03, 191.09it/s, now=None][A
t:  13%|█▎        | 95/736 [00:00<00:03, 199.25it/s, now=None][A
t:  16%|█▌        | 117/736 [00:00<00:03, 205.50it/s, now=None][A
t:  19%|█▉        | 139/736 [00:00<00:02, 209.49it/s, now=None][A
t:  22%|██▏       | 161/736 [00:00<00:02, 212.79it/s, now=None][A
t:  25%|██▍       | 183/736 [00:00<00:02, 212.37it/s, now=None][A
t:  28%|██▊       | 205/736 [00:01<00:02, 214.03it/s, now=None][A
t:  31%|███       | 227/736 [00:01<00:02, 213.24it/s, now=None][A
t:  34%|███▍      | 249/736 [00:01<00:02, 215.25it/s, now=None][A
t:  37%|███▋      | 271/736 [00:01<00:02, 216.66it/s, now=None][A
t:  40%|███▉      | 293/736 [00:01<00:02, 216.35it/s, now=None][A
t:  43%|██

Moviepy - Done !
Moviepy - video ready C:\Users\domin\Videos\rl\2024-05-29_17.17.36\rl-video-episode-4.mp4


 52%|█████▏    | 5181/10000 [00:59<00:33, 141.85it/s]

Moviepy - Building video C:\Users\domin\Videos\rl\2024-05-29_17.17.36\rl-video-episode-5.mp4.
Moviepy - Writing video C:\Users\domin\Videos\rl\2024-05-29_17.17.36\rl-video-episode-5.mp4



t:   0%|          | 0/1092 [00:00<?, ?it/s, now=None][A
t:   1%|          | 13/1092 [00:00<00:08, 124.96it/s, now=None][A
t:   3%|▎         | 33/1092 [00:00<00:06, 164.87it/s, now=None][A
t:   5%|▍         | 54/1092 [00:00<00:05, 182.58it/s, now=None][A
t:   7%|▋         | 76/1092 [00:00<00:05, 196.43it/s, now=None][A
t:   9%|▉         | 98/1092 [00:00<00:04, 203.34it/s, now=None][A
t:  11%|█         | 120/1092 [00:00<00:04, 208.06it/s, now=None][A
t:  13%|█▎        | 142/1092 [00:00<00:04, 210.54it/s, now=None][A
t:  15%|█▌        | 164/1092 [00:00<00:04, 213.42it/s, now=None][A
t:  17%|█▋        | 186/1092 [00:00<00:04, 214.22it/s, now=None][A
t:  19%|█▉        | 208/1092 [00:01<00:04, 215.63it/s, now=None][A
t:  21%|██        | 230/1092 [00:01<00:03, 215.96it/s, now=None][A
t:  23%|██▎       | 252/1092 [00:01<00:03, 217.16it/s, now=None][A
t:  25%|██▌       | 274/1092 [00:01<00:03, 217.66it/s, now=None][A
t:  27%|██▋       | 296/1092 [00:01<00:03, 217.79it/s, now=None

Moviepy - Done !
Moviepy - video ready C:\Users\domin\Videos\rl\2024-05-29_17.17.36\rl-video-episode-5.mp4


 64%|██████▍   | 6413/10000 [01:15<00:27, 128.86it/s]

Moviepy - Building video C:\Users\domin\Videos\rl\2024-05-29_17.17.36\rl-video-episode-6.mp4.
Moviepy - Writing video C:\Users\domin\Videos\rl\2024-05-29_17.17.36\rl-video-episode-6.mp4



t:   0%|          | 0/1240 [00:00<?, ?it/s, now=None][A
t:   1%|          | 12/1240 [00:00<00:10, 119.97it/s, now=None][A
t:   3%|▎         | 32/1240 [00:00<00:07, 164.12it/s, now=None][A
t:   4%|▍         | 52/1240 [00:00<00:06, 177.93it/s, now=None][A
t:   6%|▌         | 74/1240 [00:00<00:06, 193.13it/s, now=None][A
t:   8%|▊         | 95/1240 [00:00<00:05, 199.12it/s, now=None][A
t:   9%|▉         | 117/1240 [00:00<00:05, 205.73it/s, now=None][A
t:  11%|█         | 139/1240 [00:00<00:05, 207.32it/s, now=None][A
t:  13%|█▎        | 161/1240 [00:00<00:05, 209.07it/s, now=None][A
t:  15%|█▍        | 182/1240 [00:00<00:05, 209.33it/s, now=None][A
t:  16%|█▋        | 204/1240 [00:01<00:04, 211.92it/s, now=None][A
t:  18%|█▊        | 226/1240 [00:01<00:04, 212.21it/s, now=None][A
t:  20%|██        | 248/1240 [00:01<00:04, 214.10it/s, now=None][A
t:  22%|██▏       | 270/1240 [00:01<00:04, 214.57it/s, now=None][A
t:  24%|██▎       | 292/1240 [00:01<00:04, 215.75it/s, now=None

Moviepy - Done !
Moviepy - video ready C:\Users\domin\Videos\rl\2024-05-29_17.17.36\rl-video-episode-6.mp4


 65%|██████▌   | 6518/10000 [01:21<01:07, 51.53it/s]

Moviepy - Building video C:\Users\domin\Videos\rl\2024-05-29_17.17.36\rl-video-episode-7.mp4.
Moviepy - Writing video C:\Users\domin\Videos\rl\2024-05-29_17.17.36\rl-video-episode-7.mp4



t:   0%|          | 0/103 [00:00<?, ?it/s, now=None][A
t:  11%|█         | 11/103 [00:00<00:00, 105.99it/s, now=None][A
t:  29%|██▉       | 30/103 [00:00<00:00, 151.11it/s, now=None][A
t:  48%|████▊     | 49/103 [00:00<00:00, 167.73it/s, now=None][A
t:  69%|██████▉   | 71/103 [00:00<00:00, 184.09it/s, now=None][A
t:  90%|█████████ | 93/103 [00:00<00:00, 195.34it/s, now=None][A
 65%|██████▌   | 6543/10000 [01:22<01:25, 40.49it/s]          [A

Moviepy - Done !
Moviepy - video ready C:\Users\domin\Videos\rl\2024-05-29_17.17.36\rl-video-episode-7.mp4


 72%|███████▏  | 7208/10000 [01:27<00:20, 134.73it/s]

Moviepy - Building video C:\Users\domin\Videos\rl\2024-05-29_17.17.36\rl-video-episode-8.mp4.
Moviepy - Writing video C:\Users\domin\Videos\rl\2024-05-29_17.17.36\rl-video-episode-8.mp4



t:   0%|          | 0/695 [00:00<?, ?it/s, now=None][A
t:   2%|▏         | 11/695 [00:00<00:06, 109.98it/s, now=None][A
t:   4%|▍         | 30/695 [00:00<00:04, 156.34it/s, now=None][A
t:   7%|▋         | 51/695 [00:00<00:03, 180.76it/s, now=None][A
t:  11%|█         | 73/695 [00:00<00:03, 193.16it/s, now=None][A
t:  14%|█▎        | 95/695 [00:00<00:03, 199.11it/s, now=None][A
t:  17%|█▋        | 117/695 [00:00<00:02, 204.02it/s, now=None][A
t:  20%|█▉        | 138/695 [00:00<00:02, 205.92it/s, now=None][A
t:  23%|██▎       | 160/695 [00:00<00:02, 208.37it/s, now=None][A
t:  26%|██▌       | 181/695 [00:00<00:02, 208.21it/s, now=None][A
t:  29%|██▉       | 203/695 [00:01<00:02, 209.87it/s, now=None][A
t:  32%|███▏      | 224/695 [00:01<00:02, 208.64it/s, now=None][A
t:  35%|███▌      | 246/695 [00:01<00:02, 210.76it/s, now=None][A
t:  39%|███▊      | 268/695 [00:01<00:02, 212.24it/s, now=None][A
t:  42%|████▏     | 290/695 [00:01<00:01, 212.64it/s, now=None][A
t:  45%|██

Moviepy - Done !
Moviepy - video ready C:\Users\domin\Videos\rl\2024-05-29_17.17.36\rl-video-episode-8.mp4


 82%|████████▏ | 8192/10000 [01:38<00:12, 143.88it/s]

Moviepy - Building video C:\Users\domin\Videos\rl\2024-05-29_17.17.36\rl-video-episode-9.mp4.
Moviepy - Writing video C:\Users\domin\Videos\rl\2024-05-29_17.17.36\rl-video-episode-9.mp4



t:   0%|          | 0/982 [00:00<?, ?it/s, now=None][A
t:   1%|          | 11/982 [00:00<00:08, 108.89it/s, now=None][A
t:   3%|▎         | 31/982 [00:00<00:05, 158.96it/s, now=None][A
t:   5%|▌         | 52/982 [00:00<00:05, 179.68it/s, now=None][A
t:   8%|▊         | 75/982 [00:00<00:04, 196.46it/s, now=None][A
t:  10%|▉         | 97/982 [00:00<00:04, 204.71it/s, now=None][A
t:  12%|█▏        | 119/982 [00:00<00:04, 208.41it/s, now=None][A
t:  14%|█▍        | 141/982 [00:00<00:03, 212.14it/s, now=None][A
t:  17%|█▋        | 164/982 [00:00<00:03, 215.71it/s, now=None][A
t:  19%|█▉        | 186/982 [00:00<00:03, 215.67it/s, now=None][A
t:  21%|██        | 208/982 [00:01<00:03, 216.32it/s, now=None][A
t:  23%|██▎       | 230/982 [00:01<00:03, 216.10it/s, now=None][A
t:  26%|██▌       | 252/982 [00:01<00:03, 215.96it/s, now=None][A
t:  28%|██▊       | 274/982 [00:01<00:03, 215.86it/s, now=None][A
t:  30%|███       | 296/982 [00:01<00:03, 215.79it/s, now=None][A
t:  32%|██

Moviepy - Done !
Moviepy - video ready C:\Users\domin\Videos\rl\2024-05-29_17.17.36\rl-video-episode-9.mp4


 82%|████████▏ | 8218/10000 [01:43<02:18, 12.89it/s]

Moviepy - Building video C:\Users\domin\Videos\rl\2024-05-29_17.17.36\rl-video-episode-10.mp4.
Moviepy - Writing video C:\Users\domin\Videos\rl\2024-05-29_17.17.36\rl-video-episode-10.mp4



t:   0%|          | 0/33 [00:00<?, ?it/s, now=None][A
t:  39%|███▉      | 13/33 [00:00<00:00, 126.18it/s, now=None][A
t:  79%|███████▉  | 26/33 [00:00<00:00, 53.51it/s, now=None] [A
 82%|████████▏ | 8247/10000 [01:43<01:26, 20.29it/s]        [A

Moviepy - Done !
Moviepy - video ready C:\Users\domin\Videos\rl\2024-05-29_17.17.36\rl-video-episode-10.mp4


 83%|████████▎ | 8285/10000 [01:44<00:41, 41.52it/s]

Moviepy - Building video C:\Users\domin\Videos\rl\2024-05-29_17.17.36\rl-video-episode-11.mp4.
Moviepy - Writing video C:\Users\domin\Videos\rl\2024-05-29_17.17.36\rl-video-episode-11.mp4



t:   0%|          | 0/63 [00:00<?, ?it/s, now=None][A
t:  13%|█▎        | 8/63 [00:00<00:00, 79.58it/s, now=None][A
t:  44%|████▍     | 28/63 [00:00<00:00, 146.37it/s, now=None][A
t:  78%|███████▊  | 49/63 [00:00<00:00, 172.32it/s, now=None][A
 83%|████████▎ | 8311/10000 [01:44<00:39, 43.25it/s]         [A

Moviepy - Done !
Moviepy - video ready C:\Users\domin\Videos\rl\2024-05-29_17.17.36\rl-video-episode-11.mp4


 97%|█████████▋| 9742/10000 [01:54<00:01, 153.16it/s]

Moviepy - Building video C:\Users\domin\Videos\rl\2024-05-29_17.17.36\rl-video-episode-12.mp4.
Moviepy - Writing video C:\Users\domin\Videos\rl\2024-05-29_17.17.36\rl-video-episode-12.mp4



t:   0%|          | 0/1466 [00:00<?, ?it/s, now=None][A
t:   1%|          | 11/1466 [00:00<00:13, 107.79it/s, now=None][A
t:   2%|▏         | 30/1466 [00:00<00:09, 153.13it/s, now=None][A
t:   4%|▎         | 52/1466 [00:00<00:07, 179.93it/s, now=None][A
t:   5%|▌         | 74/1466 [00:00<00:07, 193.92it/s, now=None][A
t:   7%|▋         | 96/1466 [00:00<00:06, 201.54it/s, now=None][A
t:   8%|▊         | 117/1466 [00:00<00:06, 204.00it/s, now=None][A
t:   9%|▉         | 139/1466 [00:00<00:06, 207.01it/s, now=None][A
t:  11%|█         | 161/1466 [00:00<00:06, 210.01it/s, now=None][A
t:  12%|█▏        | 183/1466 [00:00<00:06, 210.47it/s, now=None][A
t:  14%|█▍        | 205/1466 [00:01<00:05, 211.42it/s, now=None][A
t:  15%|█▌        | 227/1466 [00:01<00:05, 212.52it/s, now=None][A
t:  17%|█▋        | 249/1466 [00:01<00:05, 213.77it/s, now=None][A
t:  18%|█▊        | 271/1466 [00:01<00:05, 214.96it/s, now=None][A
t:  20%|█▉        | 293/1466 [00:01<00:05, 214.53it/s, now=None

Moviepy - Done !
Moviepy - video ready C:\Users\domin\Videos\rl\2024-05-29_17.17.36\rl-video-episode-12.mp4


100%|██████████| 10000/10000 [02:03<00:00, 81.30it/s]

closing record_env
record_env closed



  logger.warn("Unable to save last video! Did you call close()?")


In [5]:

from tinydb import Query

with TinyModelDB[MitosisPolicyInfo](base_path=f'E:/saved_models/rl/MultiAgentCartPole/6/mitosis-2024-05-28_20.00.00') as policy_db:
    # ids = [
    #     "2024-05-28_22.44.47~SWBrcf",
    #     "2024-05-28_22.53.10~NxiZmB",
    #     "2024-05-28_23.33.57~BuFN2D",
    #     "2024-05-28_23.17.28~tKHbuA",
    #     "2024-05-28_22.28.41~PSkKj8",
    #     "2024-05-28_23.09.23~gv5cdh",
    #     "2024-05-28_22.36.53~VmRNfG",
    #     "2024-05-28_23.34.42~uId3Dg",
    #     "2024-05-28_23.01.59~ZOgs3C",
    #     "2024-05-28_22.29.12~wJcdJe",
    #     "2024-05-28_23.09.31~jf6H1y",
    #     "2024-05-28_23.01.21~DYpgo1",
    #     "2024-05-28_22.53.46~UtNJV6",
    #     "2024-05-28_22.45.04~Zh7iTt",
    #     "2024-05-28_22.28.22~qszXJA",
    #     "2024-05-28_22.12.13~Xmz6UN",
    #     "2024-05-28_22.52.56~UkPQsu",
    #     "2024-05-28_22.36.34~had5vp",
    #     "2024-05-28_22.45.38~bMEaRM",
    #     "2024-05-28_22.20.28~SsufR9",
    #     "2024-05-28_22.37.25~3b05TO",
    #     "2024-05-28_23.01.09~7ZvbTm",
    #     "2024-05-28_22.20.56~JOCugl",
    #     "2024-05-28_23.25.31~3Qf6TW",
    #     "2024-05-28_23.26.37~EUawd0",
    #     "2024-05-28_22.04.18~j8Jw25",
    #     "2024-05-28_22.12.38~Jj57EC",
    #     "2024-05-28_22.04.03~BSz2Jz",
    #     "2024-05-28_22.11.49~9pmZUv",
    #     "2024-05-28_21.33.55~6uzn1n",
    #     "2024-05-28_22.03.48~XPPD7g",
    #     "2024-05-28_21.33.12~rST4Pk",
    #     "2024-05-28_21.20.40~4qsqZ9",
    #     "2024-05-28_21.20.00~cuZISG",
    #     "2024-05-28_21.07.18~Vs4JWw",
    #     "2024-05-28_21.20.40~asJ3it",
    #     "2024-05-28_21.07.18~YJS6T6",
    #     "2024-05-28_20.54.10~6I7W4P",
    #     "2024-05-28_20.53.18~GLWl38",
    #     "2024-05-28_20.41.05~BBv516",
    #     "2024-05-28_20.40.35~EGHSbo",
    #     "2024-05-28_20.40.50~N8trZR",
    #     "2024-05-28_20.53.58~y9xmO4",
    #     "2024-05-29_01.22.48~aqRz9m",
    #     "2024-05-29_01.22.32~xEO93C",
    #     "2024-05-28_22.20.07~FmOrA1",
    #     "2024-05-29_01.22.17~mi8ahU",
    #     "2024-05-28_21.33.55~0WSNco",
    #     "2024-05-28_21.06.38~v0m9db",
    # ]
    # for id in ids:
    #     policy_db.delete_entry(id, delete_state_dict=True)
    
    for entry in policy_db.all_entries():
        entry['model_info']['optimizations_done'] = 0
        policy_db.db.update({'model_info': entry['model_info']}, Query().model_id == entry['model_id'])