In [1]:
import sys 
import math
import gymnasium as gym
from gymnasium.wrappers.record_video import RecordVideo
from gymnasium.wrappers.record_episode_statistics import RecordEpisodeStatistics
import kidspuzzles
from stable_baselines3 import A2C

In [2]:
def linear_scheduler(initial_value):
    def func(progress_remaining):
        return initial_value * progress_remaining
    return func

def step_scheduler(initial_value, drop_interval, drop_factor):
    def func(progress_remaining):
        return initial_value * (drop_factor ** (int((1 - progress_remaining) / drop_interval)))
    return func

def cosine_annealing_scheduler(initial_value, T_max):
    def func(progress_remaining):
        return initial_value * (1 + math.cos(math.pi * progress_remaining / T_max)) / 2
    return func

In [3]:

n_digits = 10
# reward_clipped = -0.02
# reward_enter_target_area = 0.01
# reward_exit_target_area = -0.02

env = gym.make(
    'kidspuzzles/DigitsPuzzleEnv-v0', 
    render_mode = 'rgb_array', 
    n_digits = n_digits,
    # reward_clipped = reward_clipped,
    # reward_enter_target_area = reward_enter_target_area,
    # reward_exit_target_area = reward_exit_target_area
)

record_freq = 200

total_timesteps=3_000_000
initial_lr = 7e-4
lin_lr_scheduler = linear_scheduler(initial_lr)
step_lr_scheduler = step_scheduler(initial_value=initial_lr, drop_factor=0.5, drop_interval=0.2)
cos_lr_scheduler = cosine_annealing_scheduler(initial_lr, T_max=total_timesteps)

for lr_name in ["None", "linear", "step", "cosine"][2:3]:
    
    lr_scheduler = initial_lr 
    if lr_name == "linear":
        lr_scheduler = lin_lr_scheduler
    elif lr_name == "step":
        lr_name = "step_0.5"
        lr_scheduler = step_lr_scheduler
    elif lr_name == "cosine":
        lr_scheduler = cos_lr_scheduler
    
    video_folder = f"DigitsPuzzle-{n_digits}-{lr_name}"
    env = RecordVideo(env, video_folder=f"videos/{video_folder}", name_prefix="training",
                  episode_trigger=lambda x: x % record_freq == 0)

    model = A2C(
        "MultiInputPolicy", 
        env, 
        ent_coef=0.01,
        n_steps=64,
        verbose=1, 
        device="mps", 
        learning_rate=lr_scheduler,
        tensorboard_log=f"./logs/a2c_digitspuzzle_nd{n_digits}_{lr_name}/"
    )
    model.learn(total_timesteps=total_timesteps)

  logger.warn(


Using mps device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Logging to ./logs/a2c_digitspuzzle_nd10_step_0.5/A2C_1
MoviePy - Building video /Users/bapa/Codes/KidsPuzzles/videos/DigitsPuzzle-10-step/training-episode-0.mp4.
MoviePy - Writing video /Users/bapa/Codes/KidsPuzzles/videos/DigitsPuzzle-10-step/training-episode-0.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready /Users/bapa/Codes/KidsPuzzles/videos/DigitsPuzzle-10-step/training-episode-0.mp4




-----------------------------------------
| rollout/              |               |
|    ep_len_mean        | 300           |
|    ep_rew_mean        | -166          |
| time/                 |               |
|    fps                | 231           |
|    iterations         | 100           |
|    time_elapsed       | 27            |
|    total_timesteps    | 6400          |
| train/                |               |
|    entropy_loss       | -3.64         |
|    explained_variance | -0.0023714304 |
|    learning_rate      | 0.0007        |
|    n_updates          | 99            |
|    policy_loss        | -16.1         |
|    value_loss         | 31.1          |
-----------------------------------------
-----------------------------------------
| rollout/              |               |
|    ep_len_mean        | 300           |
|    ep_rew_mean        | -165          |
| time/                 |               |
|    fps                | 253           |
|    iterations         | 200     

                                                                          

MoviePy - Done !
MoviePy - video ready /Users/bapa/Codes/KidsPuzzles/videos/DigitsPuzzle-10-step/training-episode-200.mp4




----------------------------------------
| rollout/              |              |
|    ep_len_mean        | 300          |
|    ep_rew_mean        | -155         |
| time/                 |              |
|    fps                | 272          |
|    iterations         | 1000         |
|    time_elapsed       | 235          |
|    total_timesteps    | 64000        |
| train/                |              |
|    entropy_loss       | -3.5         |
|    explained_variance | -1.66893e-06 |
|    learning_rate      | 0.0007       |
|    n_updates          | 999          |
|    policy_loss        | -1.52        |
|    value_loss         | 1.81         |
----------------------------------------
-----------------------------------------
| rollout/              |               |
|    ep_len_mean        | 300           |
|    ep_rew_mean        | -152          |
| time/                 |               |
|    fps                | 272           |
|    iterations         | 1100          |
|    time

                                                                        

MoviePy - Done !
MoviePy - video ready /Users/bapa/Codes/KidsPuzzles/videos/DigitsPuzzle-10-step/training-episode-400.mp4
--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 300        |
|    ep_rew_mean        | -115       |
| time/                 |            |
|    fps                | 274        |
|    iterations         | 1900       |
|    time_elapsed       | 442        |
|    total_timesteps    | 121600     |
| train/                |            |
|    entropy_loss       | -2.8       |
|    explained_variance | 0.66690016 |
|    learning_rate      | 0.0007     |
|    n_updates          | 1899       |
|    policy_loss        | -0.565     |
|    value_loss         | 0.607      |
--------------------------------------
---------------------------------------
| rollout/              |             |
|    ep_len_mean        | 300         |
|    ep_rew_mean        | -112        |
| time/                 |             |
|    fps       

                                                                          

MoviePy - Done !
MoviePy - video ready /Users/bapa/Codes/KidsPuzzles/videos/DigitsPuzzle-10-step/training-episode-600.mp4




---------------------------------------
| rollout/              |             |
|    ep_len_mean        | 300         |
|    ep_rew_mean        | -92.3       |
| time/                 |             |
|    fps                | 278         |
|    iterations         | 2900        |
|    time_elapsed       | 666         |
|    total_timesteps    | 185600      |
| train/                |             |
|    entropy_loss       | -2.04       |
|    explained_variance | -0.48086345 |
|    learning_rate      | 0.0007      |
|    n_updates          | 2899        |
|    policy_loss        | 3.2         |
|    value_loss         | 4.06        |
---------------------------------------
--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 300        |
|    ep_rew_mean        | -90.5      |
| time/                 |            |
|    fps                | 278        |
|    iterations         | 3000       |
|    time_elapsed       | 688        |
|    tot

                                                                          

MoviePy - Done !
MoviePy - video ready /Users/bapa/Codes/KidsPuzzles/videos/DigitsPuzzle-10-step/training-episode-800.mp4




---------------------------------------
| rollout/              |             |
|    ep_len_mean        | 300         |
|    ep_rew_mean        | -88.8       |
| time/                 |             |
|    fps                | 278         |
|    iterations         | 3800        |
|    time_elapsed       | 872         |
|    total_timesteps    | 243200      |
| train/                |             |
|    entropy_loss       | -1.35       |
|    explained_variance | -0.75688684 |
|    learning_rate      | 0.0007      |
|    n_updates          | 3799        |
|    policy_loss        | 3.68        |
|    value_loss         | 6.47        |
---------------------------------------
---------------------------------------
| rollout/              |             |
|    ep_len_mean        | 300         |
|    ep_rew_mean        | -86.6       |
| time/                 |             |
|    fps                | 278         |
|    iterations         | 3900        |
|    time_elapsed       | 895         |


                                                                          

MoviePy - Done !
MoviePy - video ready /Users/bapa/Codes/KidsPuzzles/videos/DigitsPuzzle-10-step/training-episode-1000.mp4




--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 300        |
|    ep_rew_mean        | -78.8      |
| time/                 |            |
|    fps                | 278        |
|    iterations         | 4700       |
|    time_elapsed       | 1080       |
|    total_timesteps    | 300800     |
| train/                |            |
|    entropy_loss       | -1.48      |
|    explained_variance | 0.05486083 |
|    learning_rate      | 0.0007     |
|    n_updates          | 4699       |
|    policy_loss        | -4.78      |
|    value_loss         | 10.9       |
--------------------------------------
---------------------------------------
| rollout/              |             |
|    ep_len_mean        | 300         |
|    ep_rew_mean        | -78.4       |
| time/                 |             |
|    fps                | 278         |
|    iterations         | 4800        |
|    time_elapsed       | 1102        |
|    total_timest

                                                                          

MoviePy - Done !
MoviePy - video ready /Users/bapa/Codes/KidsPuzzles/videos/DigitsPuzzle-10-step/training-episode-1200.mp4




--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 300        |
|    ep_rew_mean        | -73.4      |
| time/                 |            |
|    fps                | 278        |
|    iterations         | 5700       |
|    time_elapsed       | 1310       |
|    total_timesteps    | 364800     |
| train/                |            |
|    entropy_loss       | -1.43      |
|    explained_variance | 0.18404305 |
|    learning_rate      | 0.0007     |
|    n_updates          | 5699       |
|    policy_loss        | 0.3        |
|    value_loss         | 0.0631     |
--------------------------------------
--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 300        |
|    ep_rew_mean        | -72.7      |
| time/                 |            |
|    fps                | 278        |
|    iterations         | 5800       |
|    time_elapsed       | 1333       |
|    total_timesteps    |

                                                                          

MoviePy - Done !
MoviePy - video ready /Users/bapa/Codes/KidsPuzzles/videos/DigitsPuzzle-10-step/training-episode-1400.mp4




-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 300       |
|    ep_rew_mean        | -69.1     |
| time/                 |           |
|    fps                | 278       |
|    iterations         | 6600      |
|    time_elapsed       | 1517      |
|    total_timesteps    | 422400    |
| train/                |           |
|    entropy_loss       | -1.41     |
|    explained_variance | 0.1549412 |
|    learning_rate      | 0.0007    |
|    n_updates          | 6599      |
|    policy_loss        | -0.738    |
|    value_loss         | 0.462     |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 300       |
|    ep_rew_mean        | -69.6     |
| time/                 |           |
|    fps                | 278       |
|    iterations         | 6700      |
|    time_elapsed       | 1539      |
|    total_timesteps    | 428800    |
| train/    

                                                                          

MoviePy - Done !
MoviePy - video ready /Users/bapa/Codes/KidsPuzzles/videos/DigitsPuzzle-10-step/training-episode-1600.mp4




-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 300       |
|    ep_rew_mean        | -62.7     |
| time/                 |           |
|    fps                | 278       |
|    iterations         | 7600      |
|    time_elapsed       | 1747      |
|    total_timesteps    | 486400    |
| train/                |           |
|    entropy_loss       | -1.91     |
|    explained_variance | 0.5275426 |
|    learning_rate      | 0.0007    |
|    n_updates          | 7599      |
|    policy_loss        | 0.914     |
|    value_loss         | 0.679     |
-------------------------------------
--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 300        |
|    ep_rew_mean        | -63.4      |
| time/                 |            |
|    fps                | 278        |
|    iterations         | 7700       |
|    time_elapsed       | 1770       |
|    total_timesteps    | 492800     |
| t

                                                                          

MoviePy - Done !
MoviePy - video ready /Users/bapa/Codes/KidsPuzzles/videos/DigitsPuzzle-10-step/training-episode-1800.mp4




--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 300        |
|    ep_rew_mean        | -61.5      |
| time/                 |            |
|    fps                | 278        |
|    iterations         | 8500       |
|    time_elapsed       | 1955       |
|    total_timesteps    | 544000     |
| train/                |            |
|    entropy_loss       | -2.3       |
|    explained_variance | 0.79366475 |
|    learning_rate      | 0.0007     |
|    n_updates          | 8499       |
|    policy_loss        | 0.269      |
|    value_loss         | 0.778      |
--------------------------------------
----------------------------------------
| rollout/              |              |
|    ep_len_mean        | 300          |
|    ep_rew_mean        | -61.6        |
| time/                 |              |
|    fps                | 278          |
|    iterations         | 8600         |
|    time_elapsed       | 1978         |
|    tota

                                                                          

MoviePy - Done !
MoviePy - video ready /Users/bapa/Codes/KidsPuzzles/videos/DigitsPuzzle-10-step/training-episode-2000.mp4




--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 300        |
|    ep_rew_mean        | -53.7      |
| time/                 |            |
|    fps                | 278        |
|    iterations         | 9400       |
|    time_elapsed       | 2162       |
|    total_timesteps    | 601600     |
| train/                |            |
|    entropy_loss       | -2.17      |
|    explained_variance | 0.87045443 |
|    learning_rate      | 0.00035    |
|    n_updates          | 9399       |
|    policy_loss        | -2.9       |
|    value_loss         | 2.19       |
--------------------------------------
---------------------------------------
| rollout/              |             |
|    ep_len_mean        | 300         |
|    ep_rew_mean        | -53.2       |
| time/                 |             |
|    fps                | 278         |
|    iterations         | 9500        |
|    time_elapsed       | 2185        |
|    total_timest

                                                                          

MoviePy - Done !
MoviePy - video ready /Users/bapa/Codes/KidsPuzzles/videos/DigitsPuzzle-10-step/training-episode-2200.mp4




---------------------------------------
| rollout/              |             |
|    ep_len_mean        | 300         |
|    ep_rew_mean        | -48.1       |
| time/                 |             |
|    fps                | 278         |
|    iterations         | 10400       |
|    time_elapsed       | 2393        |
|    total_timesteps    | 665600      |
| train/                |             |
|    entropy_loss       | -2.27       |
|    explained_variance | 0.043009102 |
|    learning_rate      | 0.00035     |
|    n_updates          | 10399       |
|    policy_loss        | 0.503       |
|    value_loss         | 0.141       |
---------------------------------------
--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 300        |
|    ep_rew_mean        | -45.9      |
| time/                 |            |
|    fps                | 278        |
|    iterations         | 10500      |
|    time_elapsed       | 2416       |
|    tot

                                                                          

MoviePy - Done !
MoviePy - video ready /Users/bapa/Codes/KidsPuzzles/videos/DigitsPuzzle-10-step/training-episode-2400.mp4




---------------------------------------
| rollout/              |             |
|    ep_len_mean        | 300         |
|    ep_rew_mean        | -45.2       |
| time/                 |             |
|    fps                | 278         |
|    iterations         | 11300       |
|    time_elapsed       | 2601        |
|    total_timesteps    | 723200      |
| train/                |             |
|    entropy_loss       | -2.25       |
|    explained_variance | -0.66867626 |
|    learning_rate      | 0.00035     |
|    n_updates          | 11299       |
|    policy_loss        | 1.13        |
|    value_loss         | 0.434       |
---------------------------------------
--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 300        |
|    ep_rew_mean        | -45.3      |
| time/                 |            |
|    fps                | 278        |
|    iterations         | 11400      |
|    time_elapsed       | 2624       |
|    tot

                                                                          

MoviePy - Done !
MoviePy - video ready /Users/bapa/Codes/KidsPuzzles/videos/DigitsPuzzle-10-step/training-episode-2600.mp4




--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 300        |
|    ep_rew_mean        | -40.8      |
| time/                 |            |
|    fps                | 277        |
|    iterations         | 12200      |
|    time_elapsed       | 2808       |
|    total_timesteps    | 780800     |
| train/                |            |
|    entropy_loss       | -2.1       |
|    explained_variance | 0.18906724 |
|    learning_rate      | 0.00035    |
|    n_updates          | 12199      |
|    policy_loss        | 0.973      |
|    value_loss         | 0.291      |
--------------------------------------
----------------------------------------
| rollout/              |              |
|    ep_len_mean        | 300          |
|    ep_rew_mean        | -39.8        |
| time/                 |              |
|    fps                | 278          |
|    iterations         | 12300        |
|    time_elapsed       | 2831         |
|    tota

                                                                        

MoviePy - Done !
MoviePy - video ready /Users/bapa/Codes/KidsPuzzles/videos/DigitsPuzzle-10-step/training-episode-2800.mp4
--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 300        |
|    ep_rew_mean        | -39.1      |
| time/                 |            |
|    fps                | 277        |
|    iterations         | 13200      |
|    time_elapsed       | 3039       |
|    total_timesteps    | 844800     |
| train/                |            |
|    entropy_loss       | -2.03      |
|    explained_variance | 0.24596202 |
|    learning_rate      | 0.00035    |
|    n_updates          | 13199      |
|    policy_loss        | 2.15       |
|    value_loss         | 1.74       |
--------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 300       |
|    ep_rew_mean        | -39       |
| time/                 |           |
|    fps                

                                                                          

MoviePy - Done !
MoviePy - video ready /Users/bapa/Codes/KidsPuzzles/videos/DigitsPuzzle-10-step/training-episode-3000.mp4




---------------------------------------
| rollout/              |             |
|    ep_len_mean        | 300         |
|    ep_rew_mean        | -35.6       |
| time/                 |             |
|    fps                | 277         |
|    iterations         | 14100       |
|    time_elapsed       | 3246        |
|    total_timesteps    | 902400      |
| train/                |             |
|    entropy_loss       | -1.04       |
|    explained_variance | 0.030046225 |
|    learning_rate      | 0.00035     |
|    n_updates          | 14099       |
|    policy_loss        | -3.45       |
|    value_loss         | 7.8         |
---------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 300       |
|    ep_rew_mean        | -35.4     |
| time/                 |           |
|    fps                | 277       |
|    iterations         | 14200     |
|    time_elapsed       | 3269      |
|    total_times

                                                                          

MoviePy - Done !
MoviePy - video ready /Users/bapa/Codes/KidsPuzzles/videos/DigitsPuzzle-10-step/training-episode-3200.mp4




--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 300        |
|    ep_rew_mean        | -31        |
| time/                 |            |
|    fps                | 277        |
|    iterations         | 15100      |
|    time_elapsed       | 3477       |
|    total_timesteps    | 966400     |
| train/                |            |
|    entropy_loss       | -1.72      |
|    explained_variance | 0.90839785 |
|    learning_rate      | 0.00035    |
|    n_updates          | 15099      |
|    policy_loss        | -1.85      |
|    value_loss         | 1.33       |
--------------------------------------
--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 300        |
|    ep_rew_mean        | -30.2      |
| time/                 |            |
|    fps                | 277        |
|    iterations         | 15200      |
|    time_elapsed       | 3500       |
|    total_timesteps    |

                                                                          

MoviePy - Done !
MoviePy - video ready /Users/bapa/Codes/KidsPuzzles/videos/DigitsPuzzle-10-step/training-episode-3400.mp4




-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 300       |
|    ep_rew_mean        | -24.2     |
| time/                 |           |
|    fps                | 277       |
|    iterations         | 16000     |
|    time_elapsed       | 3685      |
|    total_timesteps    | 1024000   |
| train/                |           |
|    entropy_loss       | -1.36     |
|    explained_variance | 0.9574325 |
|    learning_rate      | 0.00035   |
|    n_updates          | 15999     |
|    policy_loss        | 0.187     |
|    value_loss         | 0.154     |
-------------------------------------
--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 300        |
|    ep_rew_mean        | -24.8      |
| time/                 |            |
|    fps                | 277        |
|    iterations         | 16100      |
|    time_elapsed       | 3707       |
|    total_timesteps    | 1030400    |
| t

                                                                          

MoviePy - Done !
MoviePy - video ready /Users/bapa/Codes/KidsPuzzles/videos/DigitsPuzzle-10-step/training-episode-3600.mp4




-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 300       |
|    ep_rew_mean        | -18.5     |
| time/                 |           |
|    fps                | 277       |
|    iterations         | 16900     |
|    time_elapsed       | 3893      |
|    total_timesteps    | 1081600   |
| train/                |           |
|    entropy_loss       | -1.54     |
|    explained_variance | 0.9457554 |
|    learning_rate      | 0.00035   |
|    n_updates          | 16899     |
|    policy_loss        | -0.907    |
|    value_loss         | 0.579     |
-------------------------------------
--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 300        |
|    ep_rew_mean        | -18.7      |
| time/                 |            |
|    fps                | 277        |
|    iterations         | 17000      |
|    time_elapsed       | 3916       |
|    total_timesteps    | 1088000    |
| t

                                                                          

MoviePy - Done !
MoviePy - video ready /Users/bapa/Codes/KidsPuzzles/videos/DigitsPuzzle-10-step/training-episode-3800.mp4




---------------------------------------
| rollout/              |             |
|    ep_len_mean        | 300         |
|    ep_rew_mean        | -21.2       |
| time/                 |             |
|    fps                | 277         |
|    iterations         | 17900       |
|    time_elapsed       | 4124        |
|    total_timesteps    | 1145600     |
| train/                |             |
|    entropy_loss       | -0.48       |
|    explained_variance | -0.11506176 |
|    learning_rate      | 0.00035     |
|    n_updates          | 17899       |
|    policy_loss        | -0.0761     |
|    value_loss         | 0.056       |
---------------------------------------
---------------------------------------
| rollout/              |             |
|    ep_len_mean        | 300         |
|    ep_rew_mean        | -21.4       |
| time/                 |             |
|    fps                | 277         |
|    iterations         | 18000       |
|    time_elapsed       | 4146        |


                                                                          

MoviePy - Done !
MoviePy - video ready /Users/bapa/Codes/KidsPuzzles/videos/DigitsPuzzle-10-step/training-episode-4000.mp4
--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 300        |
|    ep_rew_mean        | -14.4      |
| time/                 |            |
|    fps                | 277        |
|    iterations         | 18800      |
|    time_elapsed       | 4331       |
|    total_timesteps    | 1203200    |
| train/                |            |
|    entropy_loss       | -1.31      |
|    explained_variance | -0.5160161 |
|    learning_rate      | 0.000175   |
|    n_updates          | 18799      |
|    policy_loss        | 1.54       |
|    value_loss         | 1.9        |
--------------------------------------
--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 300        |
|    ep_rew_mean        | -14.6      |
| time/                 |            |
|    fps           

                                                                          

MoviePy - Done !
MoviePy - video ready /Users/bapa/Codes/KidsPuzzles/videos/DigitsPuzzle-10-step/training-episode-4200.mp4




--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 300        |
|    ep_rew_mean        | -15.1      |
| time/                 |            |
|    fps                | 277        |
|    iterations         | 19700      |
|    time_elapsed       | 4539       |
|    total_timesteps    | 1260800    |
| train/                |            |
|    entropy_loss       | -0.666     |
|    explained_variance | -0.6784816 |
|    learning_rate      | 0.000175   |
|    n_updates          | 19699      |
|    policy_loss        | -0.0337    |
|    value_loss         | 0.0239     |
--------------------------------------
----------------------------------------
| rollout/              |              |
|    ep_len_mean        | 300          |
|    ep_rew_mean        | -15.1        |
| time/                 |              |
|    fps                | 277          |
|    iterations         | 19800        |
|    time_elapsed       | 4562         |
|    tota

                                                                          

MoviePy - Done !
MoviePy - video ready /Users/bapa/Codes/KidsPuzzles/videos/DigitsPuzzle-10-step/training-episode-4400.mp4




--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 300        |
|    ep_rew_mean        | -12.6      |
| time/                 |            |
|    fps                | 277        |
|    iterations         | 20700      |
|    time_elapsed       | 4769       |
|    total_timesteps    | 1324800    |
| train/                |            |
|    entropy_loss       | -0.534     |
|    explained_variance | -3.3468866 |
|    learning_rate      | 0.000175   |
|    n_updates          | 20699      |
|    policy_loss        | -0.0268    |
|    value_loss         | 0.0282     |
--------------------------------------
--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 300        |
|    ep_rew_mean        | -12.6      |
| time/                 |            |
|    fps                | 277        |
|    iterations         | 20800      |
|    time_elapsed       | 4792       |
|    total_timesteps    |

                                                                          

MoviePy - Done !
MoviePy - video ready /Users/bapa/Codes/KidsPuzzles/videos/DigitsPuzzle-10-step/training-episode-4600.mp4




----------------------------------------
| rollout/              |              |
|    ep_len_mean        | 300          |
|    ep_rew_mean        | -12.1        |
| time/                 |              |
|    fps                | 277          |
|    iterations         | 21600        |
|    time_elapsed       | 4977         |
|    total_timesteps    | 1382400      |
| train/                |              |
|    entropy_loss       | -0.362       |
|    explained_variance | -0.023254871 |
|    learning_rate      | 0.000175     |
|    n_updates          | 21599        |
|    policy_loss        | -0.31        |
|    value_loss         | 0.707        |
----------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 300       |
|    ep_rew_mean        | -12       |
| time/                 |           |
|    fps                | 277       |
|    iterations         | 21700     |
|    time_elapsed       | 4999      |

                                                                          

MoviePy - Done !
MoviePy - video ready /Users/bapa/Codes/KidsPuzzles/videos/DigitsPuzzle-10-step/training-episode-4800.mp4




-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 300       |
|    ep_rew_mean        | -10.4     |
| time/                 |           |
|    fps                | 277       |
|    iterations         | 22600     |
|    time_elapsed       | 5207      |
|    total_timesteps    | 1446400   |
| train/                |           |
|    entropy_loss       | -1.14     |
|    explained_variance | 0.9365311 |
|    learning_rate      | 0.000175  |
|    n_updates          | 22599     |
|    policy_loss        | -1.15     |
|    value_loss         | 0.935     |
-------------------------------------
--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 300        |
|    ep_rew_mean        | -11.1      |
| time/                 |            |
|    fps                | 277        |
|    iterations         | 22700      |
|    time_elapsed       | 5230       |
|    total_timesteps    | 1452800    |
| t

                                                                          

MoviePy - Done !
MoviePy - video ready /Users/bapa/Codes/KidsPuzzles/videos/DigitsPuzzle-10-step/training-episode-5000.mp4




-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 300       |
|    ep_rew_mean        | -9.22     |
| time/                 |           |
|    fps                | 277       |
|    iterations         | 23500     |
|    time_elapsed       | 5415      |
|    total_timesteps    | 1504000   |
| train/                |           |
|    entropy_loss       | -1.36     |
|    explained_variance | 0.9052209 |
|    learning_rate      | 0.000175  |
|    n_updates          | 23499     |
|    policy_loss        | -0.84     |
|    value_loss         | 0.457     |
-------------------------------------
--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 300        |
|    ep_rew_mean        | -8.92      |
| time/                 |            |
|    fps                | 277        |
|    iterations         | 23600      |
|    time_elapsed       | 5438       |
|    total_timesteps    | 1510400    |
| t

                                                                          

MoviePy - Done !
MoviePy - video ready /Users/bapa/Codes/KidsPuzzles/videos/DigitsPuzzle-10-step/training-episode-5200.mp4
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 300       |
|    ep_rew_mean        | -9.54     |
| time/                 |           |
|    fps                | 277       |
|    iterations         | 24400     |
|    time_elapsed       | 5621      |
|    total_timesteps    | 1561600   |
| train/                |           |
|    entropy_loss       | -1.36     |
|    explained_variance | 0.9828803 |
|    learning_rate      | 0.000175  |
|    n_updates          | 24399     |
|    policy_loss        | -0.682    |
|    value_loss         | 0.173     |
-------------------------------------
--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 300        |
|    ep_rew_mean        | -8.8       |
| time/                 |            |
|    fps                | 277       

                                                                          

MoviePy - Done !
MoviePy - video ready /Users/bapa/Codes/KidsPuzzles/videos/DigitsPuzzle-10-step/training-episode-5400.mp4




-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 300       |
|    ep_rew_mean        | -7.77     |
| time/                 |           |
|    fps                | 277       |
|    iterations         | 25400     |
|    time_elapsed       | 5852      |
|    total_timesteps    | 1625600   |
| train/                |           |
|    entropy_loss       | -0.498    |
|    explained_variance | -1.372685 |
|    learning_rate      | 0.000175  |
|    n_updates          | 25399     |
|    policy_loss        | -0.302    |
|    value_loss         | 0.293     |
-------------------------------------
--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 300        |
|    ep_rew_mean        | -7.65      |
| time/                 |            |
|    fps                | 277        |
|    iterations         | 25500      |
|    time_elapsed       | 5875       |
|    total_timesteps    | 1632000    |
| t

                                                                          

MoviePy - Done !
MoviePy - video ready /Users/bapa/Codes/KidsPuzzles/videos/DigitsPuzzle-10-step/training-episode-5600.mp4




--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 300        |
|    ep_rew_mean        | -7.31      |
| time/                 |            |
|    fps                | 277        |
|    iterations         | 26300      |
|    time_elapsed       | 6060       |
|    total_timesteps    | 1683200    |
| train/                |            |
|    entropy_loss       | -0.645     |
|    explained_variance | -7.3632183 |
|    learning_rate      | 0.000175   |
|    n_updates          | 26299      |
|    policy_loss        | 0.521      |
|    value_loss         | 0.853      |
--------------------------------------
--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 300        |
|    ep_rew_mean        | -8.07      |
| time/                 |            |
|    fps                | 277        |
|    iterations         | 26400      |
|    time_elapsed       | 6083       |
|    total_timesteps    |

                                                                          

MoviePy - Done !
MoviePy - video ready /Users/bapa/Codes/KidsPuzzles/videos/DigitsPuzzle-10-step/training-episode-5800.mp4




---------------------------------------
| rollout/              |             |
|    ep_len_mean        | 300         |
|    ep_rew_mean        | -6.51       |
| time/                 |             |
|    fps                | 277         |
|    iterations         | 27200       |
|    time_elapsed       | 6268        |
|    total_timesteps    | 1740800     |
| train/                |             |
|    entropy_loss       | -0.636      |
|    explained_variance | -0.89828205 |
|    learning_rate      | 0.000175    |
|    n_updates          | 27199       |
|    policy_loss        | 0.2         |
|    value_loss         | 0.176       |
---------------------------------------
---------------------------------------
| rollout/              |             |
|    ep_len_mean        | 300         |
|    ep_rew_mean        | -5.75       |
| time/                 |             |
|    fps                | 277         |
|    iterations         | 27300       |
|    time_elapsed       | 6291        |


                                                                          

MoviePy - Done !
MoviePy - video ready /Users/bapa/Codes/KidsPuzzles/videos/DigitsPuzzle-10-step/training-episode-6000.mp4




--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 300        |
|    ep_rew_mean        | -6.91      |
| time/                 |            |
|    fps                | 277        |
|    iterations         | 28200      |
|    time_elapsed       | 6499       |
|    total_timesteps    | 1804800    |
| train/                |            |
|    entropy_loss       | -0.223     |
|    explained_variance | 0.04228878 |
|    learning_rate      | 8.75e-05   |
|    n_updates          | 28199      |
|    policy_loss        | -0.0225    |
|    value_loss         | 0.247      |
--------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 300       |
|    ep_rew_mean        | -6.84     |
| time/                 |           |
|    fps                | 277       |
|    iterations         | 28300     |
|    time_elapsed       | 6522      |
|    total_timesteps    | 1811200

                                                                          

MoviePy - Done !
MoviePy - video ready /Users/bapa/Codes/KidsPuzzles/videos/DigitsPuzzle-10-step/training-episode-6200.mp4




--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 300        |
|    ep_rew_mean        | -5.32      |
| time/                 |            |
|    fps                | 277        |
|    iterations         | 29100      |
|    time_elapsed       | 6706       |
|    total_timesteps    | 1862400    |
| train/                |            |
|    entropy_loss       | -0.513     |
|    explained_variance | -1.6137538 |
|    learning_rate      | 8.75e-05   |
|    n_updates          | 29099      |
|    policy_loss        | -0.0856    |
|    value_loss         | 0.0224     |
--------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 300       |
|    ep_rew_mean        | -4.96     |
| time/                 |           |
|    fps                | 277       |
|    iterations         | 29200     |
|    time_elapsed       | 6729      |
|    total_timesteps    | 1868800

                                                                          

MoviePy - Done !
MoviePy - video ready /Users/bapa/Codes/KidsPuzzles/videos/DigitsPuzzle-10-step/training-episode-6400.mp4




-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 300       |
|    ep_rew_mean        | -4.35     |
| time/                 |           |
|    fps                | 277       |
|    iterations         | 30100     |
|    time_elapsed       | 6937      |
|    total_timesteps    | 1926400   |
| train/                |           |
|    entropy_loss       | -1.09     |
|    explained_variance | 0.8895342 |
|    learning_rate      | 8.75e-05  |
|    n_updates          | 30099     |
|    policy_loss        | -0.737    |
|    value_loss         | 0.689     |
-------------------------------------
--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 300        |
|    ep_rew_mean        | -4.48      |
| time/                 |            |
|    fps                | 277        |
|    iterations         | 30200      |
|    time_elapsed       | 6960       |
|    total_timesteps    | 1932800    |
| t

                                                                          

MoviePy - Done !
MoviePy - video ready /Users/bapa/Codes/KidsPuzzles/videos/DigitsPuzzle-10-step/training-episode-6600.mp4




--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 300        |
|    ep_rew_mean        | -5.08      |
| time/                 |            |
|    fps                | 277        |
|    iterations         | 31000      |
|    time_elapsed       | 7145       |
|    total_timesteps    | 1984000    |
| train/                |            |
|    entropy_loss       | -1.24      |
|    explained_variance | 0.27775842 |
|    learning_rate      | 8.75e-05   |
|    n_updates          | 30999      |
|    policy_loss        | 2.6        |
|    value_loss         | 2.63       |
--------------------------------------
--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 300        |
|    ep_rew_mean        | -5.29      |
| time/                 |            |
|    fps                | 277        |
|    iterations         | 31100      |
|    time_elapsed       | 7168       |
|    total_timesteps    |

                                                                          

MoviePy - Done !
MoviePy - video ready /Users/bapa/Codes/KidsPuzzles/videos/DigitsPuzzle-10-step/training-episode-6800.mp4
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 300       |
|    ep_rew_mean        | -3.42     |
| time/                 |           |
|    fps                | 277       |
|    iterations         | 31900     |
|    time_elapsed       | 7352      |
|    total_timesteps    | 2041600   |
| train/                |           |
|    entropy_loss       | -1.46     |
|    explained_variance | 0.9840599 |
|    learning_rate      | 8.75e-05  |
|    n_updates          | 31899     |
|    policy_loss        | 0.119     |
|    value_loss         | 0.13      |
-------------------------------------
---------------------------------------
| rollout/              |             |
|    ep_len_mean        | 300         |
|    ep_rew_mean        | -3.37       |
| time/                 |             |
|    fps                | 277  

                                                                          

MoviePy - Done !
MoviePy - video ready /Users/bapa/Codes/KidsPuzzles/videos/DigitsPuzzle-10-step/training-episode-7000.mp4




--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 300        |
|    ep_rew_mean        | -4.58      |
| time/                 |            |
|    fps                | 277        |
|    iterations         | 32900      |
|    time_elapsed       | 7580       |
|    total_timesteps    | 2105600    |
| train/                |            |
|    entropy_loss       | -0.559     |
|    explained_variance | -2.0092835 |
|    learning_rate      | 8.75e-05   |
|    n_updates          | 32899      |
|    policy_loss        | 0.159      |
|    value_loss         | 0.104      |
--------------------------------------
--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 300        |
|    ep_rew_mean        | -4.27      |
| time/                 |            |
|    fps                | 277        |
|    iterations         | 33000      |
|    time_elapsed       | 7603       |
|    total_timesteps    |

                                                                          

MoviePy - Done !
MoviePy - video ready /Users/bapa/Codes/KidsPuzzles/videos/DigitsPuzzle-10-step/training-episode-7200.mp4




--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 300        |
|    ep_rew_mean        | -4.09      |
| time/                 |            |
|    fps                | 277        |
|    iterations         | 33800      |
|    time_elapsed       | 7788       |
|    total_timesteps    | 2163200    |
| train/                |            |
|    entropy_loss       | -0.785     |
|    explained_variance | -0.7603687 |
|    learning_rate      | 8.75e-05   |
|    n_updates          | 33799      |
|    policy_loss        | -1.23      |
|    value_loss         | 2.03       |
--------------------------------------
----------------------------------------
| rollout/              |              |
|    ep_len_mean        | 300          |
|    ep_rew_mean        | -3.96        |
| time/                 |              |
|    fps                | 277          |
|    iterations         | 33900        |
|    time_elapsed       | 7811         |
|    tota

                                                                          

MoviePy - Done !
MoviePy - video ready /Users/bapa/Codes/KidsPuzzles/videos/DigitsPuzzle-10-step/training-episode-7400.mp4




----------------------------------------
| rollout/              |              |
|    ep_len_mean        | 300          |
|    ep_rew_mean        | -1.37        |
| time/                 |              |
|    fps                | 277          |
|    iterations         | 34700        |
|    time_elapsed       | 7996         |
|    total_timesteps    | 2220800      |
| train/                |              |
|    entropy_loss       | -0.418       |
|    explained_variance | -0.008366704 |
|    learning_rate      | 8.75e-05     |
|    n_updates          | 34699        |
|    policy_loss        | -0.304       |
|    value_loss         | 0.739        |
----------------------------------------
--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 300        |
|    ep_rew_mean        | -1.09      |
| time/                 |            |
|    fps                | 277        |
|    iterations         | 34800      |
|    time_elapsed       | 8019

                                                                          

MoviePy - Done !
MoviePy - video ready /Users/bapa/Codes/KidsPuzzles/videos/DigitsPuzzle-10-step/training-episode-7600.mp4




---------------------------------------
| rollout/              |             |
|    ep_len_mean        | 300         |
|    ep_rew_mean        | -2.68       |
| time/                 |             |
|    fps                | 277         |
|    iterations         | 35700       |
|    time_elapsed       | 8227        |
|    total_timesteps    | 2284800     |
| train/                |             |
|    entropy_loss       | -0.65       |
|    explained_variance | -0.30703378 |
|    learning_rate      | 8.75e-05    |
|    n_updates          | 35699       |
|    policy_loss        | -0.0841     |
|    value_loss         | 0.0237      |
---------------------------------------
---------------------------------------
| rollout/              |             |
|    ep_len_mean        | 298         |
|    ep_rew_mean        | -2.81       |
| time/                 |             |
|    fps                | 277         |
|    iterations         | 35800       |
|    time_elapsed       | 8250        |


                                                                          

MoviePy - Done !
MoviePy - video ready /Users/bapa/Codes/KidsPuzzles/videos/DigitsPuzzle-10-step/training-episode-7800.mp4




--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 300        |
|    ep_rew_mean        | -2.72      |
| time/                 |            |
|    fps                | 277        |
|    iterations         | 36600      |
|    time_elapsed       | 8435       |
|    total_timesteps    | 2342400    |
| train/                |            |
|    entropy_loss       | -1.5       |
|    explained_variance | -0.5340495 |
|    learning_rate      | 8.75e-05   |
|    n_updates          | 36599      |
|    policy_loss        | 0.93       |
|    value_loss         | 0.548      |
--------------------------------------
--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 300        |
|    ep_rew_mean        | -1.9       |
| time/                 |            |
|    fps                | 277        |
|    iterations         | 36700      |
|    time_elapsed       | 8458       |
|    total_timesteps    |

                                                                          

MoviePy - Done !
MoviePy - video ready /Users/bapa/Codes/KidsPuzzles/videos/DigitsPuzzle-10-step/training-episode-8000.mp4
--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 300        |
|    ep_rew_mean        | -1.3       |
| time/                 |            |
|    fps                | 277        |
|    iterations         | 37600      |
|    time_elapsed       | 8665       |
|    total_timesteps    | 2406400    |
| train/                |            |
|    entropy_loss       | -1.02      |
|    explained_variance | -1.4959147 |
|    learning_rate      | 4.37e-05   |
|    n_updates          | 37599      |
|    policy_loss        | -0.156     |
|    value_loss         | 0.0293     |
--------------------------------------
--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 300        |
|    ep_rew_mean        | -1.29      |
| time/                 |            |
|    fps           

                                                                          

MoviePy - Done !
MoviePy - video ready /Users/bapa/Codes/KidsPuzzles/videos/DigitsPuzzle-10-step/training-episode-8200.mp4




--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 300        |
|    ep_rew_mean        | -1.02      |
| time/                 |            |
|    fps                | 277        |
|    iterations         | 38500      |
|    time_elapsed       | 8873       |
|    total_timesteps    | 2464000    |
| train/                |            |
|    entropy_loss       | -1.12      |
|    explained_variance | -0.6406566 |
|    learning_rate      | 4.37e-05   |
|    n_updates          | 38499      |
|    policy_loss        | -0.0792    |
|    value_loss         | 0.0424     |
--------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 300       |
|    ep_rew_mean        | -1.3      |
| time/                 |           |
|    fps                | 277       |
|    iterations         | 38600     |
|    time_elapsed       | 8896      |
|    total_timesteps    | 2470400

                                                                          

MoviePy - Done !
MoviePy - video ready /Users/bapa/Codes/KidsPuzzles/videos/DigitsPuzzle-10-step/training-episode-8400.mp4




---------------------------------------
| rollout/              |             |
|    ep_len_mean        | 300         |
|    ep_rew_mean        | -1.41       |
| time/                 |             |
|    fps                | 277         |
|    iterations         | 39400       |
|    time_elapsed       | 9080        |
|    total_timesteps    | 2521600     |
| train/                |             |
|    entropy_loss       | -1.11       |
|    explained_variance | 0.119332254 |
|    learning_rate      | 4.37e-05    |
|    n_updates          | 39399       |
|    policy_loss        | 0.194       |
|    value_loss         | 0.235       |
---------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 300       |
|    ep_rew_mean        | -1.88     |
| time/                 |           |
|    fps                | 277       |
|    iterations         | 39500     |
|    time_elapsed       | 9103      |
|    total_times

                                                                          

MoviePy - Done !
MoviePy - video ready /Users/bapa/Codes/KidsPuzzles/videos/DigitsPuzzle-10-step/training-episode-8600.mp4




-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 300       |
|    ep_rew_mean        | -0.618    |
| time/                 |           |
|    fps                | 277       |
|    iterations         | 40400     |
|    time_elapsed       | 9310      |
|    total_timesteps    | 2585600   |
| train/                |           |
|    entropy_loss       | -1.9      |
|    explained_variance | 0.7026917 |
|    learning_rate      | 4.37e-05  |
|    n_updates          | 40399     |
|    policy_loss        | -4.59     |
|    value_loss         | 5.66      |
-------------------------------------
---------------------------------------
| rollout/              |             |
|    ep_len_mean        | 300         |
|    ep_rew_mean        | -0.815      |
| time/                 |             |
|    fps                | 277         |
|    iterations         | 40500       |
|    time_elapsed       | 9333        |
|    total_timesteps    | 2592000 

                                                                          

MoviePy - Done !
MoviePy - video ready /Users/bapa/Codes/KidsPuzzles/videos/DigitsPuzzle-10-step/training-episode-8800.mp4




-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 300       |
|    ep_rew_mean        | 0.0144    |
| time/                 |           |
|    fps                | 277       |
|    iterations         | 41300     |
|    time_elapsed       | 9518      |
|    total_timesteps    | 2643200   |
| train/                |           |
|    entropy_loss       | -1.51     |
|    explained_variance | 0.9182213 |
|    learning_rate      | 4.37e-05  |
|    n_updates          | 41299     |
|    policy_loss        | -0.484    |
|    value_loss         | 0.302     |
-------------------------------------
---------------------------------------
| rollout/              |             |
|    ep_len_mean        | 300         |
|    ep_rew_mean        | -0.191      |
| time/                 |             |
|    fps                | 277         |
|    iterations         | 41400       |
|    time_elapsed       | 9541        |
|    total_timesteps    | 2649600 

                                                                          

MoviePy - Done !
MoviePy - video ready /Users/bapa/Codes/KidsPuzzles/videos/DigitsPuzzle-10-step/training-episode-9000.mp4




-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 300       |
|    ep_rew_mean        | -0.129    |
| time/                 |           |
|    fps                | 277       |
|    iterations         | 42200     |
|    time_elapsed       | 9727      |
|    total_timesteps    | 2700800   |
| train/                |           |
|    entropy_loss       | -1.67     |
|    explained_variance | 0.9146347 |
|    learning_rate      | 4.37e-05  |
|    n_updates          | 42199     |
|    policy_loss        | -0.99     |
|    value_loss         | 0.954     |
-------------------------------------
---------------------------------------
| rollout/              |             |
|    ep_len_mean        | 300         |
|    ep_rew_mean        | -0.0402     |
| time/                 |             |
|    fps                | 277         |
|    iterations         | 42300       |
|    time_elapsed       | 9750        |
|    total_timesteps    | 2707200 

                                                                          

MoviePy - Done !
MoviePy - video ready /Users/bapa/Codes/KidsPuzzles/videos/DigitsPuzzle-10-step/training-episode-9200.mp4




--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 300        |
|    ep_rew_mean        | 0.493      |
| time/                 |            |
|    fps                | 277        |
|    iterations         | 43200      |
|    time_elapsed       | 9956       |
|    total_timesteps    | 2764800    |
| train/                |            |
|    entropy_loss       | -2.04      |
|    explained_variance | 0.97137153 |
|    learning_rate      | 4.37e-05   |
|    n_updates          | 43199      |
|    policy_loss        | 0.598      |
|    value_loss         | 0.222      |
--------------------------------------
---------------------------------------
| rollout/              |             |
|    ep_len_mean        | 300         |
|    ep_rew_mean        | 0.823       |
| time/                 |             |
|    fps                | 277         |
|    iterations         | 43300       |
|    time_elapsed       | 9980        |
|    total_timest

                                                                          

MoviePy - Done !
MoviePy - video ready /Users/bapa/Codes/KidsPuzzles/videos/DigitsPuzzle-10-step/training-episode-9400.mp4




-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 300       |
|    ep_rew_mean        | 1         |
| time/                 |           |
|    fps                | 277       |
|    iterations         | 44100     |
|    time_elapsed       | 10164     |
|    total_timesteps    | 2822400   |
| train/                |           |
|    entropy_loss       | -1.91     |
|    explained_variance | 0.7210729 |
|    learning_rate      | 4.37e-05  |
|    n_updates          | 44099     |
|    policy_loss        | -2.14     |
|    value_loss         | 1.95      |
-------------------------------------
---------------------------------------
| rollout/              |             |
|    ep_len_mean        | 300         |
|    ep_rew_mean        | 0.513       |
| time/                 |             |
|    fps                | 277         |
|    iterations         | 44200       |
|    time_elapsed       | 10187       |
|    total_timesteps    | 2828800 

                                                                          

MoviePy - Done !
MoviePy - video ready /Users/bapa/Codes/KidsPuzzles/videos/DigitsPuzzle-10-step/training-episode-9600.mp4




--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 296        |
|    ep_rew_mean        | 2.18       |
| time/                 |            |
|    fps                | 277        |
|    iterations         | 45000      |
|    time_elapsed       | 10372      |
|    total_timesteps    | 2880000    |
| train/                |            |
|    entropy_loss       | -2.1       |
|    explained_variance | -2.6692295 |
|    learning_rate      | 4.37e-05   |
|    n_updates          | 44999      |
|    policy_loss        | 0.991      |
|    value_loss         | 0.472      |
--------------------------------------
---------------------------------------
| rollout/              |             |
|    ep_len_mean        | 294         |
|    ep_rew_mean        | 2.53        |
| time/                 |             |
|    fps                | 277         |
|    iterations         | 45100       |
|    time_elapsed       | 10395       |
|    total_timest

                                                                          

MoviePy - Done !
MoviePy - video ready /Users/bapa/Codes/KidsPuzzles/videos/DigitsPuzzle-10-step/training-episode-9800.mp4




-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 299       |
|    ep_rew_mean        | 2.43      |
| time/                 |           |
|    fps                | 277       |
|    iterations         | 46000     |
|    time_elapsed       | 10603     |
|    total_timesteps    | 2944000   |
| train/                |           |
|    entropy_loss       | -1.66     |
|    explained_variance | 0.8852365 |
|    learning_rate      | 4.37e-05  |
|    n_updates          | 45999     |
|    policy_loss        | -0.267    |
|    value_loss         | 0.866     |
-------------------------------------
--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 299        |
|    ep_rew_mean        | 2.38       |
| time/                 |            |
|    fps                | 277        |
|    iterations         | 46100      |
|    time_elapsed       | 10626      |
|    total_timesteps    | 2950400    |
| t

                                                                          

MoviePy - Done !
MoviePy - video ready /Users/bapa/Codes/KidsPuzzles/videos/DigitsPuzzle-10-step/training-episode-10000.mp4




In [4]:
import time
vec_env = model.get_env()
observations = vec_env.reset()
reward_sum = 0
for _ in range(50):
    action, _state = model.predict(observations)
    observations, reward, terminated, info = vec_env.step(action)
    vec_env.render("human")
    time.sleep(5)

    reward_sum += reward
    if terminated:
        observations = vec_env.reset()

print("Total reward: ", reward_sum)

2025-02-09 01:09:30.953 python[33249:2281154] +[IMKClient subclass]: chose IMKClient_Modern
2025-02-09 01:09:30.953 python[33249:2281154] +[IMKInputSession subclass]: chose IMKInputSession_Modern


Total reward:  [-10.6444435]


In [5]:
env.close()