In [1]:
# to access functions from root directory
import sys
sys.path.append('/data/ad181/RemoteDir/multilevel_ppo')

In [2]:
%matplotlib notebook
import numpy as np
import time
import pickle
import os
import matplotlib.pyplot as plt
from copy import copy, deepcopy
from tqdm.notebook import trange, tqdm

import gym
from stable_baselines3.ppo import PPO, MlpPolicy
from stable_baselines3.ppo_multi_level import PPO_ML
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.vec_env.subproc_vec_multi_level_env import SubprocVecMultiLevelEnv
from stable_baselines3.common.envs.multi_level_ressim_env import MultiLevelRessimEnv
from stable_baselines3.common.logger import configure

from utils.custom_eval_callback import CustomEvalCallback, CustomEvalCallbackParallel
from utils.plot_functions import plot_learning
from utils.env_evaluate_functions import eval_actions

In [3]:
seed=1
case='ppo_2l'
data_dir='./data'
log_dir='./data/'+case

In [4]:
os.makedirs(data_dir, exist_ok=True)
os.makedirs(log_dir, exist_ok=True)

In [5]:
with open('../envs_params/env_data_v1/env_train_dict.pkl', 'rb') as input:
    env_ck_dict = pickle.load(input)

In [6]:
# generate dictionaries for env (env_dict_), n_steps (T_ml) and minibatch (M_ml) for `n_level`s
n_levels=2
fine_level = len(env_ck_dict)
env_dict_ = {}
for i,l in enumerate(range(fine_level-n_levels, fine_level)):
    print(i+1,'->',l+1)
    env_dict_[i+1] = env_ck_dict[l+1]

1 -> 4
2 -> 5


In [7]:
for seed in range(1,4):
    if True: 
        print(f'seed {seed}')
        log_dir = './data/'+case+'/seed_'+str(seed)
        os.makedirs(log_dir, exist_ok=True)
        T = {1:70, 2:5} # n_steps
        N = 50 # number of actors
        M = {1:350, 2:25} # minibatch size
        I = 300 # number of iterations
        K = 20 # number of epochs
        
        log_interval = I/30
        
        fine_level = len(env_dict_)
        
        print('generate callback ...')
        eval_callback = CustomEvalCallback( env_dict_[fine_level], 
                                            best_model_save_path=str(log_dir)+'/best_model', 
                                            n_eval_episodes=1,
                                            log_path=str(log_dir)+'/results_eval', 
                                            eval_freq=log_interval*sum(T.values()) )
        
        print('vectorize environment ...')
        
        # generate PPO_ML parameters for MLMC analysis. 
        env_dict = {}
        n_steps_dict = {}
        batch_size_dict = {}
        for env, level in zip(env_dict_.values(), env_dict_.keys()):
            print(f"vectorize env level {level}")
            env_dict[level] = make_vec_env( MultiLevelRessimEnv, 
                                    n_envs=N, 
                                    seed=seed, 
                                    env_kwargs= {"ressim_params":env.ressim_params, "level":env.level}, 
                                    vec_env_cls=SubprocVecMultiLevelEnv )
            n_steps_dict[level] = T[level]
            batch_size_dict[level] = M[level]
        
        print(env_dict_[level].observation_space)
        print('model definition ..')
        model = PPO_ML(policy=MlpPolicy,
                           env=env_dict,
                           learning_rate = 1e-5,
                           n_steps = n_steps_dict,
                           batch_size = batch_size_dict,
                           n_epochs = K,
                           clip_range = 0.1,
                           ent_coef = 0.001,
                           vf_coef = 0.5,
                           policy_kwargs = dict(net_arch=[150,100,80], log_std_init=-2.9),
                           verbose = 1,
                           seed = seed,
                           target_kl = 0.05,
                           device = "auto")
        # set logger for the model
        new_logger = configure(log_dir)
        model.set_logger(new_logger)
        print('policy learning ..')
        model.learn(total_timesteps=N*sum(T.values())*I, callback=eval_callback)
        model.save(log_dir+'/PPO', exclude=['env_dict'])
        del model
        for level in env_dict.keys():
            env_dict[level].close()


seed 1
generate callback ...
vectorize environment ...
vectorize env level 1
vectorize env level 2
Box(-1.0, 1.0, (96,), float64)
model definition ..
Using cuda device
Logging to ./data/ppo_2l/seed_1
policy learning ..




---------------------------------
| rollout/           |          |
|    ep_len_mean     | 5        |
|    ep_rew_mean     | 0.578    |
| time/              |          |
|    fps             | 72       |
|    iterations      | 1        |
|    time_elapsed    | 51       |
|    total_timesteps | 3750     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.575       |
| time/                   |             |
|    fps                  | 80          |
|    iterations           | 2           |
|    time_elapsed         | 93          |
|    total_timesteps      | 7500        |
| train/                  |             |
|    approx_kl            | 0.007996517 |
|    clip_fraction        | 0.341       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | -4.02       |
|    learning_rate        | 1e

  for j in range(len(p_1)-1):


Eval num_timesteps=37500, episode_reward=0.59 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
----------------------------------------
| eval/                   |            |
|    mean_ep_length       | 5          |
|    mean_reward          | 0.594      |
| rollout/                |            |
|    ep_len_mean          | 5          |
|    ep_rew_mean          | 0.582      |
| time/                   |            |
|    fps                  | 83         |
|    iterations           | 10         |
|    time_elapsed         | 448        |
|    total_timesteps      | 37500      |
| train/                  |            |
|    approx_kl            | 0.02993982 |
|    clip_fraction        | 0.451      |
|    clip_range           | 0.1        |
|    entropy_loss         | -94.8      |
|    explained_variance   | 0.627      |
|    learning_rate        | 1e-05      |
|    loss                 | 0.0305     |
|    n_updates            | 180        |
|    policy_gradient_loss | 0.00

Early stopping at step 16 due to reaching max kl: 0.03
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 5          |
|    ep_rew_mean          | 0.578      |
| time/                   |            |
|    fps                  | 86         |
|    iterations           | 19         |
|    time_elapsed         | 819        |
|    total_timesteps      | 71250      |
| train/                  |            |
|    approx_kl            | 0.02039597 |
|    clip_fraction        | 0.451      |
|    clip_range           | 0.1        |
|    entropy_loss         | -94.8      |
|    explained_variance   | 0.748      |
|    learning_rate        | 1e-05      |
|    loss                 | 0.0441     |
|    n_updates            | 360        |
|    policy_gradient_loss | 0.0099     |
|    std                  | 0.055      |
|    value_loss           | 0.0088     |
----------------------------------------
Early stopping at step 19 due to reaching m

Early stopping at step 12 due to reaching max kl: 0.08
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 5          |
|    ep_rew_mean          | 0.59       |
| time/                   |            |
|    fps                  | 86         |
|    iterations           | 28         |
|    time_elapsed         | 1211       |
|    total_timesteps      | 105000     |
| train/                  |            |
|    approx_kl            | 0.04554894 |
|    clip_fraction        | 0.459      |
|    clip_range           | 0.1        |
|    entropy_loss         | -94.8      |
|    explained_variance   | 0.799      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0528    |
|    n_updates            | 540        |
|    policy_gradient_loss | 0.00287    |
|    std                  | 0.055      |
|    value_loss           | 0.0072     |
----------------------------------------
Early stopping at step 13 due to reaching m

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 5          |
|    ep_rew_mean          | 0.595      |
| time/                   |            |
|    fps                  | 86         |
|    iterations           | 37         |
|    time_elapsed         | 1604       |
|    total_timesteps      | 138750     |
| train/                  |            |
|    approx_kl            | 0.03236131 |
|    clip_fraction        | 0.473      |
|    clip_range           | 0.1        |
|    entropy_loss         | -94.8      |
|    explained_variance   | 0.825      |
|    learning_rate        | 1e-05      |
|    loss                 | 0.0167     |
|    n_updates            | 720        |
|    policy_gradient_loss | 0.00543    |
|    std                  | 0.055      |
|    value_loss           | 0.00604    |
----------------------------------------
Early stopping at step 8 due to reaching max kl: 0.02
-----------------------------------------
| 

Early stopping at step 16 due to reaching max kl: 0.09
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.599       |
| time/                   |             |
|    fps                  | 86          |
|    iterations           | 46          |
|    time_elapsed         | 1999        |
|    total_timesteps      | 172500      |
| train/                  |             |
|    approx_kl            | 0.049407844 |
|    clip_fraction        | 0.427       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.848       |
|    learning_rate        | 1e-05       |
|    loss                 | -0.0149     |
|    n_updates            | 900         |
|    policy_gradient_loss | 0.00346     |
|    std                  | 0.055       |
|    value_loss           | 0.00574     |
-----------------------------------------
Early stopping at ste

Early stopping at step 10 due to reaching max kl: 0.02
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.608       |
| time/                   |             |
|    fps                  | 86          |
|    iterations           | 55          |
|    time_elapsed         | 2393        |
|    total_timesteps      | 206250      |
| train/                  |             |
|    approx_kl            | 0.012602488 |
|    clip_fraction        | 0.423       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.866       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0703      |
|    n_updates            | 1080        |
|    policy_gradient_loss | 0.00401     |
|    std                  | 0.055       |
|    value_loss           | 0.00473     |
-----------------------------------------
Early stopping at ste

Early stopping at step 15 due to reaching max kl: 0.07
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 5         |
|    ep_rew_mean          | 0.608     |
| time/                   |           |
|    fps                  | 86        |
|    iterations           | 64        |
|    time_elapsed         | 2788      |
|    total_timesteps      | 240000    |
| train/                  |           |
|    approx_kl            | 0.0469703 |
|    clip_fraction        | 0.473     |
|    clip_range           | 0.1       |
|    entropy_loss         | -94.8     |
|    explained_variance   | 0.876     |
|    learning_rate        | 1e-05     |
|    loss                 | 0.0186    |
|    n_updates            | 1260      |
|    policy_gradient_loss | 0.0131    |
|    std                  | 0.055     |
|    value_loss           | 0.00483   |
---------------------------------------
Early stopping at step 16 due to reaching max kl: 0.04
----------

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.616       |
| time/                   |             |
|    fps                  | 86          |
|    iterations           | 73          |
|    time_elapsed         | 3182        |
|    total_timesteps      | 273750      |
| train/                  |             |
|    approx_kl            | 0.031241504 |
|    clip_fraction        | 0.471       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.887       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0278      |
|    n_updates            | 1440        |
|    policy_gradient_loss | -0.00215    |
|    std                  | 0.055       |
|    value_loss           | 0.00441     |
-----------------------------------------
Early stopping at step 16 due to reaching max kl: 0.01
---------------------

Early stopping at step 16 due to reaching max kl: 0.09
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.619       |
| time/                   |             |
|    fps                  | 86          |
|    iterations           | 82          |
|    time_elapsed         | 3574        |
|    total_timesteps      | 307500      |
| train/                  |             |
|    approx_kl            | 0.053771075 |
|    clip_fraction        | 0.465       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.895       |
|    learning_rate        | 1e-05       |
|    loss                 | -0.023      |
|    n_updates            | 1620        |
|    policy_gradient_loss | 0.00397     |
|    std                  | 0.055       |
|    value_loss           | 0.00382     |
-----------------------------------------
Early stopping at ste

Early stopping at step 17 due to reaching max kl: 0.03
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.622       |
| time/                   |             |
|    fps                  | 86          |
|    iterations           | 91          |
|    time_elapsed         | 3961        |
|    total_timesteps      | 341250      |
| train/                  |             |
|    approx_kl            | 0.024736673 |
|    clip_fraction        | 0.487       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.896       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0223      |
|    n_updates            | 1800        |
|    policy_gradient_loss | 0.00398     |
|    std                  | 0.055       |
|    value_loss           | 0.00408     |
-----------------------------------------
Early stopping at ste

Early stopping at step 8 due to reaching max kl: 0.02
Eval num_timesteps=375000, episode_reward=0.66 +/- 0.00
Episode length: 5.00 +/- 0.00
-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 5           |
|    mean_reward          | 0.663       |
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.626       |
| time/                   |             |
|    fps                  | 86          |
|    iterations           | 100         |
|    time_elapsed         | 4347        |
|    total_timesteps      | 375000      |
| train/                  |             |
|    approx_kl            | 0.016773865 |
|    clip_fraction        | 0.439       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.891       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.038       |
|    n_updates      

Early stopping at step 10 due to reaching max kl: 0.06
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.625       |
| time/                   |             |
|    fps                  | 86          |
|    iterations           | 109         |
|    time_elapsed         | 4716        |
|    total_timesteps      | 408750      |
| train/                  |             |
|    approx_kl            | 0.035814498 |
|    clip_fraction        | 0.42        |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.899       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0294      |
|    n_updates            | 2160        |
|    policy_gradient_loss | 0.00215     |
|    std                  | 0.055       |
|    value_loss           | 0.00372     |
-----------------------------------------
Early stopping at ste

Early stopping at step 15 due to reaching max kl: 0.03
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.631       |
| time/                   |             |
|    fps                  | 86          |
|    iterations           | 118         |
|    time_elapsed         | 5108        |
|    total_timesteps      | 442500      |
| train/                  |             |
|    approx_kl            | 0.023619376 |
|    clip_fraction        | 0.495       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.895       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0504      |
|    n_updates            | 2340        |
|    policy_gradient_loss | -0.00175    |
|    std                  | 0.055       |
|    value_loss           | 0.00392     |
-----------------------------------------
Early stopping at ste

Early stopping at step 7 due to reaching max kl: 0.03
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 5          |
|    ep_rew_mean          | 0.635      |
| time/                   |            |
|    fps                  | 86         |
|    iterations           | 127        |
|    time_elapsed         | 5502       |
|    total_timesteps      | 476250     |
| train/                  |            |
|    approx_kl            | 0.02182373 |
|    clip_fraction        | 0.429      |
|    clip_range           | 0.1        |
|    entropy_loss         | -94.9      |
|    explained_variance   | 0.9        |
|    learning_rate        | 1e-05      |
|    loss                 | 0.0527     |
|    n_updates            | 2520       |
|    policy_gradient_loss | 0.000725   |
|    std                  | 0.055      |
|    value_loss           | 0.00361    |
----------------------------------------
Early stopping at step 11 due to reaching ma

Early stopping at step 13 due to reaching max kl: 0.02
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 5          |
|    ep_rew_mean          | 0.636      |
| time/                   |            |
|    fps                  | 86         |
|    iterations           | 136        |
|    time_elapsed         | 5895       |
|    total_timesteps      | 510000     |
| train/                  |            |
|    approx_kl            | 0.02177685 |
|    clip_fraction        | 0.48       |
|    clip_range           | 0.1        |
|    entropy_loss         | -94.9      |
|    explained_variance   | 0.904      |
|    learning_rate        | 1e-05      |
|    loss                 | 0.0498     |
|    n_updates            | 2700       |
|    policy_gradient_loss | 0.00272    |
|    std                  | 0.055      |
|    value_loss           | 0.00342    |
----------------------------------------
Early stopping at step 11 due to reaching m

Early stopping at step 6 due to reaching max kl: 0.05
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 5          |
|    ep_rew_mean          | 0.638      |
| time/                   |            |
|    fps                  | 86         |
|    iterations           | 145        |
|    time_elapsed         | 6284       |
|    total_timesteps      | 543750     |
| train/                  |            |
|    approx_kl            | 0.03134439 |
|    clip_fraction        | 0.364      |
|    clip_range           | 0.1        |
|    entropy_loss         | -94.9      |
|    explained_variance   | 0.901      |
|    learning_rate        | 1e-05      |
|    loss                 | 0.068      |
|    n_updates            | 2880       |
|    policy_gradient_loss | 0.00277    |
|    std                  | 0.055      |
|    value_loss           | 0.00365    |
----------------------------------------
-----------------------------------------
| 

Early stopping at step 8 due to reaching max kl: 0.10
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 5          |
|    ep_rew_mean          | 0.635      |
| time/                   |            |
|    fps                  | 86         |
|    iterations           | 154        |
|    time_elapsed         | 6678       |
|    total_timesteps      | 577500     |
| train/                  |            |
|    approx_kl            | 0.05759328 |
|    clip_fraction        | 0.449      |
|    clip_range           | 0.1        |
|    entropy_loss         | -94.9      |
|    explained_variance   | 0.906      |
|    learning_rate        | 1e-05      |
|    loss                 | 0.0412     |
|    n_updates            | 3060       |
|    policy_gradient_loss | 0.00585    |
|    std                  | 0.055      |
|    value_loss           | 0.00338    |
----------------------------------------
Early stopping at step 6 due to reaching max

Early stopping at step 9 due to reaching max kl: 0.01
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 5         |
|    ep_rew_mean          | 0.642     |
| time/                   |           |
|    fps                  | 86        |
|    iterations           | 163       |
|    time_elapsed         | 7068      |
|    total_timesteps      | 611250    |
| train/                  |           |
|    approx_kl            | 0.0173422 |
|    clip_fraction        | 0.423     |
|    clip_range           | 0.1       |
|    entropy_loss         | -94.9     |
|    explained_variance   | 0.907     |
|    learning_rate        | 1e-05     |
|    loss                 | 0.0671    |
|    n_updates            | 3240      |
|    policy_gradient_loss | 0.000139  |
|    std                  | 0.055     |
|    value_loss           | 0.00374   |
---------------------------------------
Early stopping at step 11 due to reaching max kl: 0.02
-----------

Early stopping at step 18 due to reaching max kl: 0.02
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.644       |
| time/                   |             |
|    fps                  | 86          |
|    iterations           | 172         |
|    time_elapsed         | 7461        |
|    total_timesteps      | 645000      |
| train/                  |             |
|    approx_kl            | 0.022268355 |
|    clip_fraction        | 0.49        |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.9       |
|    explained_variance   | 0.899       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0581      |
|    n_updates            | 3420        |
|    policy_gradient_loss | 0.00398     |
|    std                  | 0.0549      |
|    value_loss           | 0.00449     |
-----------------------------------------
---------------------

Early stopping at step 12 due to reaching max kl: 0.04
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.648       |
| time/                   |             |
|    fps                  | 86          |
|    iterations           | 181         |
|    time_elapsed         | 7853        |
|    total_timesteps      | 678750      |
| train/                  |             |
|    approx_kl            | 0.027443983 |
|    clip_fraction        | 0.476       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.9       |
|    explained_variance   | 0.901       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.031       |
|    n_updates            | 3600        |
|    policy_gradient_loss | 0.00226     |
|    std                  | 0.0549      |
|    value_loss           | 0.00394     |
-----------------------------------------
Early stopping at ste

Early stopping at step 10 due to reaching max kl: 0.02
Eval num_timesteps=712500, episode_reward=0.67 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
----------------------------------------
| eval/                   |            |
|    mean_ep_length       | 5          |
|    mean_reward          | 0.666      |
| rollout/                |            |
|    ep_len_mean          | 5          |
|    ep_rew_mean          | 0.646      |
| time/                   |            |
|    fps                  | 86         |
|    iterations           | 190        |
|    time_elapsed         | 8240       |
|    total_timesteps      | 712500     |
| train/                  |            |
|    approx_kl            | 0.01476346 |
|    clip_fraction        | 0.405      |
|    clip_range           | 0.1        |
|    entropy_loss         | -94.9      |
|    explained_variance   | 0.9        |
|    learning_rate        | 1e-05      |
|    loss                 | 0.0554     |
|    n_updates   

Early stopping at step 6 due to reaching max kl: 0.03
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.647       |
| time/                   |             |
|    fps                  | 86          |
|    iterations           | 199         |
|    time_elapsed         | 8607        |
|    total_timesteps      | 746250      |
| train/                  |             |
|    approx_kl            | 0.019568915 |
|    clip_fraction        | 0.361       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.9       |
|    explained_variance   | 0.902       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0531      |
|    n_updates            | 3960        |
|    policy_gradient_loss | -0.00053    |
|    std                  | 0.0549      |
|    value_loss           | 0.00391     |
-----------------------------------------
Early stopping at step

Early stopping at step 6 due to reaching max kl: 0.03
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 5          |
|    ep_rew_mean          | 0.649      |
| time/                   |            |
|    fps                  | 86         |
|    iterations           | 208        |
|    time_elapsed         | 8994       |
|    total_timesteps      | 780000     |
| train/                  |            |
|    approx_kl            | 0.02582194 |
|    clip_fraction        | 0.412      |
|    clip_range           | 0.1        |
|    entropy_loss         | -94.9      |
|    explained_variance   | 0.899      |
|    learning_rate        | 1e-05      |
|    loss                 | 0.0651     |
|    n_updates            | 4140       |
|    policy_gradient_loss | -0.0005    |
|    std                  | 0.0549     |
|    value_loss           | 0.00388    |
----------------------------------------
Early stopping at step 17 due to reaching ma

Early stopping at step 11 due to reaching max kl: 0.02
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.646       |
| time/                   |             |
|    fps                  | 86          |
|    iterations           | 217         |
|    time_elapsed         | 9386        |
|    total_timesteps      | 813750      |
| train/                  |             |
|    approx_kl            | 0.019651432 |
|    clip_fraction        | 0.447       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.9       |
|    explained_variance   | 0.901       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0487      |
|    n_updates            | 4320        |
|    policy_gradient_loss | 0.0102      |
|    std                  | 0.0549      |
|    value_loss           | 0.00402     |
-----------------------------------------
Early stopping at ste

Early stopping at step 9 due to reaching max kl: 0.01
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 5          |
|    ep_rew_mean          | 0.648      |
| time/                   |            |
|    fps                  | 86         |
|    iterations           | 226        |
|    time_elapsed         | 9776       |
|    total_timesteps      | 847500     |
| train/                  |            |
|    approx_kl            | 0.01183754 |
|    clip_fraction        | 0.405      |
|    clip_range           | 0.1        |
|    entropy_loss         | -94.9      |
|    explained_variance   | 0.894      |
|    learning_rate        | 1e-05      |
|    loss                 | 0.0631     |
|    n_updates            | 4500       |
|    policy_gradient_loss | 0.00301    |
|    std                  | 0.0549     |
|    value_loss           | 0.00392    |
----------------------------------------
Early stopping at step 9 due to reaching max

Early stopping at step 7 due to reaching max kl: 0.10
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.649       |
| time/                   |             |
|    fps                  | 86          |
|    iterations           | 235         |
|    time_elapsed         | 10163       |
|    total_timesteps      | 881250      |
| train/                  |             |
|    approx_kl            | 0.056971602 |
|    clip_fraction        | 0.409       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.9       |
|    explained_variance   | 0.896       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0612      |
|    n_updates            | 4680        |
|    policy_gradient_loss | -0.00109    |
|    std                  | 0.0549      |
|    value_loss           | 0.00406     |
-----------------------------------------
Early stopping at step

Early stopping at step 8 due to reaching max kl: 0.02
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 5          |
|    ep_rew_mean          | 0.649      |
| time/                   |            |
|    fps                  | 86         |
|    iterations           | 244        |
|    time_elapsed         | 10544      |
|    total_timesteps      | 915000     |
| train/                  |            |
|    approx_kl            | 0.01598027 |
|    clip_fraction        | 0.434      |
|    clip_range           | 0.1        |
|    entropy_loss         | -94.9      |
|    explained_variance   | 0.897      |
|    learning_rate        | 1e-05      |
|    loss                 | 0.0563     |
|    n_updates            | 4860       |
|    policy_gradient_loss | -0.000102  |
|    std                  | 0.0549     |
|    value_loss           | 0.00391    |
----------------------------------------
Early stopping at step 7 due to reaching max

Early stopping at step 6 due to reaching max kl: 0.04
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.653       |
| time/                   |             |
|    fps                  | 86          |
|    iterations           | 253         |
|    time_elapsed         | 10926       |
|    total_timesteps      | 948750      |
| train/                  |             |
|    approx_kl            | 0.026696272 |
|    clip_fraction        | 0.366       |
|    clip_range           | 0.1         |
|    entropy_loss         | -95         |
|    explained_variance   | 0.906       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0597      |
|    n_updates            | 5040        |
|    policy_gradient_loss | 0.00197     |
|    std                  | 0.0549      |
|    value_loss           | 0.00378     |
-----------------------------------------
----------------------

Early stopping at step 8 due to reaching max kl: 0.02
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.653       |
| time/                   |             |
|    fps                  | 86          |
|    iterations           | 262         |
|    time_elapsed         | 11311       |
|    total_timesteps      | 982500      |
| train/                  |             |
|    approx_kl            | 0.017771628 |
|    clip_fraction        | 0.392       |
|    clip_range           | 0.1         |
|    entropy_loss         | -95         |
|    explained_variance   | 0.902       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0623      |
|    n_updates            | 5220        |
|    policy_gradient_loss | 0.000928    |
|    std                  | 0.0549      |
|    value_loss           | 0.00398     |
-----------------------------------------
Early stopping at step

Early stopping at step 10 due to reaching max kl: 0.07
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 5          |
|    ep_rew_mean          | 0.655      |
| time/                   |            |
|    fps                  | 86         |
|    iterations           | 271        |
|    time_elapsed         | 11696      |
|    total_timesteps      | 1016250    |
| train/                  |            |
|    approx_kl            | 0.04197442 |
|    clip_fraction        | 0.449      |
|    clip_range           | 0.1        |
|    entropy_loss         | -94.9      |
|    explained_variance   | 0.901      |
|    learning_rate        | 1e-05      |
|    loss                 | 0.0487     |
|    n_updates            | 5400       |
|    policy_gradient_loss | 0.00561    |
|    std                  | 0.0549     |
|    value_loss           | 0.00414    |
----------------------------------------
Early stopping at step 8 due to reaching ma

Early stopping at step 6 due to reaching max kl: 0.05
Eval num_timesteps=1050000, episode_reward=0.67 +/- 0.00
Episode length: 5.00 +/- 0.00
-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 5           |
|    mean_reward          | 0.666       |
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.655       |
| time/                   |             |
|    fps                  | 86          |
|    iterations           | 280         |
|    time_elapsed         | 12078       |
|    total_timesteps      | 1050000     |
| train/                  |             |
|    approx_kl            | 0.038837492 |
|    clip_fraction        | 0.377       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.9       |
|    explained_variance   | 0.899       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0524      |
|    n_updates     

Early stopping at step 7 due to reaching max kl: 0.08
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.652       |
| time/                   |             |
|    fps                  | 87          |
|    iterations           | 289         |
|    time_elapsed         | 12442       |
|    total_timesteps      | 1083750     |
| train/                  |             |
|    approx_kl            | 0.047111467 |
|    clip_fraction        | 0.411       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.9       |
|    explained_variance   | 0.9         |
|    learning_rate        | 1e-05       |
|    loss                 | 0.029       |
|    n_updates            | 5760        |
|    policy_gradient_loss | 0.00416     |
|    std                  | 0.0549      |
|    value_loss           | 0.00408     |
-----------------------------------------
Early stopping at step

Early stopping at step 10 due to reaching max kl: 0.08
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 5          |
|    ep_rew_mean          | 0.653      |
| time/                   |            |
|    fps                  | 87         |
|    iterations           | 298        |
|    time_elapsed         | 12824      |
|    total_timesteps      | 1117500    |
| train/                  |            |
|    approx_kl            | 0.05145721 |
|    clip_fraction        | 0.437      |
|    clip_range           | 0.1        |
|    entropy_loss         | -94.9      |
|    explained_variance   | 0.898      |
|    learning_rate        | 1e-05      |
|    loss                 | 0.0437     |
|    n_updates            | 5940       |
|    policy_gradient_loss | -0.000255  |
|    std                  | 0.0549     |
|    value_loss           | 0.00394    |
----------------------------------------
Early stopping at step 6 due to reaching ma

  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in

  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in

seed 2
generate callback ...
vectorize environment ...
vectorize env level 1
vectorize env level 2
Box(-1.0, 1.0, (96,), float64)
model definition ..
Using cuda device
Logging to ./data/ppo_2l/seed_2
policy learning ..




---------------------------------
| rollout/           |          |
|    ep_len_mean     | 5        |
|    ep_rew_mean     | 0.579    |
| time/              |          |
|    fps             | 72       |
|    iterations      | 1        |
|    time_elapsed    | 51       |
|    total_timesteps | 3750     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.574       |
| time/                   |             |
|    fps                  | 78          |
|    iterations           | 2           |
|    time_elapsed         | 95          |
|    total_timesteps      | 7500        |
| train/                  |             |
|    approx_kl            | 0.008252731 |
|    clip_fraction        | 0.357       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | -7.34       |
|    learning_rate        | 1e

Early stopping at step 15 due to reaching max kl: 0.04
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.585       |
| time/                   |             |
|    fps                  | 83          |
|    iterations           | 11          |
|    time_elapsed         | 491         |
|    total_timesteps      | 41250       |
| train/                  |             |
|    approx_kl            | 0.024672838 |
|    clip_fraction        | 0.453       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.707       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0282      |
|    n_updates            | 200         |
|    policy_gradient_loss | 0.0055      |
|    std                  | 0.055       |
|    value_loss           | 0.0094      |
-----------------------------------------
---------------------

Early stopping at step 12 due to reaching max kl: 0.02
Eval num_timesteps=75000, episode_reward=0.61 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 5           |
|    mean_reward          | 0.613       |
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.589       |
| time/                   |             |
|    fps                  | 84          |
|    iterations           | 20          |
|    time_elapsed         | 883         |
|    total_timesteps      | 75000       |
| train/                  |             |
|    approx_kl            | 0.015962683 |
|    clip_fraction        | 0.435       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.761       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0629      

Early stopping at step 12 due to reaching max kl: 0.03
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 5         |
|    ep_rew_mean          | 0.592     |
| time/                   |           |
|    fps                  | 86        |
|    iterations           | 29        |
|    time_elapsed         | 1252      |
|    total_timesteps      | 108750    |
| train/                  |           |
|    approx_kl            | 0.0209642 |
|    clip_fraction        | 0.429     |
|    clip_range           | 0.1       |
|    entropy_loss         | -94.8     |
|    explained_variance   | 0.788     |
|    learning_rate        | 1e-05     |
|    loss                 | 0.0545    |
|    n_updates            | 560       |
|    policy_gradient_loss | 0.00918   |
|    std                  | 0.055     |
|    value_loss           | 0.00753   |
---------------------------------------
Early stopping at step 8 due to reaching max kl: 0.01
Eval num_ti

Early stopping at step 10 due to reaching max kl: 0.05
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 5          |
|    ep_rew_mean          | 0.595      |
| time/                   |            |
|    fps                  | 86         |
|    iterations           | 38         |
|    time_elapsed         | 1644       |
|    total_timesteps      | 142500     |
| train/                  |            |
|    approx_kl            | 0.03165409 |
|    clip_fraction        | 0.415      |
|    clip_range           | 0.1        |
|    entropy_loss         | -94.8      |
|    explained_variance   | 0.813      |
|    learning_rate        | 1e-05      |
|    loss                 | 0.024      |
|    n_updates            | 740        |
|    policy_gradient_loss | 0.00287    |
|    std                  | 0.055      |
|    value_loss           | 0.00632    |
----------------------------------------
Early stopping at step 9 due to reaching ma

Early stopping at step 17 due to reaching max kl: 0.08
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.594       |
| time/                   |             |
|    fps                  | 86          |
|    iterations           | 47          |
|    time_elapsed         | 2036        |
|    total_timesteps      | 176250      |
| train/                  |             |
|    approx_kl            | 0.047238823 |
|    clip_fraction        | 0.493       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.84        |
|    learning_rate        | 1e-05       |
|    loss                 | -0.0119     |
|    n_updates            | 920         |
|    policy_gradient_loss | 0.00628     |
|    std                  | 0.055       |
|    value_loss           | 0.00546     |
-----------------------------------------
Early stopping at ste

Early stopping at step 9 due to reaching max kl: 0.02
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.603       |
| time/                   |             |
|    fps                  | 86          |
|    iterations           | 56          |
|    time_elapsed         | 2424        |
|    total_timesteps      | 210000      |
| train/                  |             |
|    approx_kl            | 0.015791787 |
|    clip_fraction        | 0.422       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.854       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0639      |
|    n_updates            | 1100        |
|    policy_gradient_loss | 0.00427     |
|    std                  | 0.055       |
|    value_loss           | 0.00493     |
-----------------------------------------
Early stopping at step

Early stopping at step 16 due to reaching max kl: 0.03
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 5          |
|    ep_rew_mean          | 0.606      |
| time/                   |            |
|    fps                  | 86         |
|    iterations           | 65         |
|    time_elapsed         | 2815       |
|    total_timesteps      | 243750     |
| train/                  |            |
|    approx_kl            | 0.02297461 |
|    clip_fraction        | 0.486      |
|    clip_range           | 0.1        |
|    entropy_loss         | -94.8      |
|    explained_variance   | 0.86       |
|    learning_rate        | 1e-05      |
|    loss                 | 0.0418     |
|    n_updates            | 1280       |
|    policy_gradient_loss | 0.00783    |
|    std                  | 0.055      |
|    value_loss           | 0.00492    |
----------------------------------------
Early stopping at step 17 due to reaching m

Early stopping at step 14 due to reaching max kl: 0.05
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 5          |
|    ep_rew_mean          | 0.609      |
| time/                   |            |
|    fps                  | 86         |
|    iterations           | 74         |
|    time_elapsed         | 3207       |
|    total_timesteps      | 277500     |
| train/                  |            |
|    approx_kl            | 0.03257557 |
|    clip_fraction        | 0.472      |
|    clip_range           | 0.1        |
|    entropy_loss         | -94.8      |
|    explained_variance   | 0.872      |
|    learning_rate        | 1e-05      |
|    loss                 | 0.0405     |
|    n_updates            | 1460       |
|    policy_gradient_loss | 0.00793    |
|    std                  | 0.055      |
|    value_loss           | 0.00487    |
----------------------------------------
Early stopping at step 11 due to reaching m

Early stopping at step 17 due to reaching max kl: 0.03
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.613       |
| time/                   |             |
|    fps                  | 86          |
|    iterations           | 83          |
|    time_elapsed         | 3595        |
|    total_timesteps      | 311250      |
| train/                  |             |
|    approx_kl            | 0.021687815 |
|    clip_fraction        | 0.488       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.873       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0318      |
|    n_updates            | 1640        |
|    policy_gradient_loss | 0.0106      |
|    std                  | 0.055       |
|    value_loss           | 0.00488     |
-----------------------------------------
Early stopping at ste

Early stopping at step 8 due to reaching max kl: 0.03
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 5          |
|    ep_rew_mean          | 0.621      |
| time/                   |            |
|    fps                  | 86         |
|    iterations           | 92         |
|    time_elapsed         | 3980       |
|    total_timesteps      | 345000     |
| train/                  |            |
|    approx_kl            | 0.02056545 |
|    clip_fraction        | 0.403      |
|    clip_range           | 0.1        |
|    entropy_loss         | -94.8      |
|    explained_variance   | 0.869      |
|    learning_rate        | 1e-05      |
|    loss                 | 0.0705     |
|    n_updates            | 1820       |
|    policy_gradient_loss | 0.0039     |
|    std                  | 0.055      |
|    value_loss           | 0.0046     |
----------------------------------------
Early stopping at step 13 due to reaching ma

Early stopping at step 11 due to reaching max kl: 0.02
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 5          |
|    ep_rew_mean          | 0.621      |
| time/                   |            |
|    fps                  | 86         |
|    iterations           | 101        |
|    time_elapsed         | 4368       |
|    total_timesteps      | 378750     |
| train/                  |            |
|    approx_kl            | 0.02023707 |
|    clip_fraction        | 0.476      |
|    clip_range           | 0.1        |
|    entropy_loss         | -94.8      |
|    explained_variance   | 0.884      |
|    learning_rate        | 1e-05      |
|    loss                 | 0.0622     |
|    n_updates            | 2000       |
|    policy_gradient_loss | 0.00577    |
|    std                  | 0.055      |
|    value_loss           | 0.00465    |
----------------------------------------
Early stopping at step 10 due to reaching m

Early stopping at step 15 due to reaching max kl: 0.05
Eval num_timesteps=412500, episode_reward=0.66 +/- 0.00
Episode length: 5.00 +/- 0.00
-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 5           |
|    mean_reward          | 0.662       |
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.626       |
| time/                   |             |
|    fps                  | 86          |
|    iterations           | 110         |
|    time_elapsed         | 4754        |
|    total_timesteps      | 412500      |
| train/                  |             |
|    approx_kl            | 0.035085943 |
|    clip_fraction        | 0.497       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.881       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0299      |
|    n_updates     

Early stopping at step 12 due to reaching max kl: 0.01
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.63        |
| time/                   |             |
|    fps                  | 87          |
|    iterations           | 119         |
|    time_elapsed         | 5120        |
|    total_timesteps      | 446250      |
| train/                  |             |
|    approx_kl            | 0.013848136 |
|    clip_fraction        | 0.42        |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.879       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0634      |
|    n_updates            | 2360        |
|    policy_gradient_loss | 0.00437     |
|    std                  | 0.055       |
|    value_loss           | 0.00541     |
-----------------------------------------
Early stopping at ste

Early stopping at step 10 due to reaching max kl: 0.05
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.626       |
| time/                   |             |
|    fps                  | 87          |
|    iterations           | 128         |
|    time_elapsed         | 5507        |
|    total_timesteps      | 480000      |
| train/                  |             |
|    approx_kl            | 0.035857268 |
|    clip_fraction        | 0.476       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.883       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0135      |
|    n_updates            | 2540        |
|    policy_gradient_loss | 0.0103      |
|    std                  | 0.055       |
|    value_loss           | 0.00466     |
-----------------------------------------
Early stopping at ste

Early stopping at step 13 due to reaching max kl: 0.06
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 5          |
|    ep_rew_mean          | 0.634      |
| time/                   |            |
|    fps                  | 87         |
|    iterations           | 137        |
|    time_elapsed         | 5899       |
|    total_timesteps      | 513750     |
| train/                  |            |
|    approx_kl            | 0.03696988 |
|    clip_fraction        | 0.472      |
|    clip_range           | 0.1        |
|    entropy_loss         | -94.8      |
|    explained_variance   | 0.884      |
|    learning_rate        | 1e-05      |
|    loss                 | 0.0485     |
|    n_updates            | 2720       |
|    policy_gradient_loss | 0.00894    |
|    std                  | 0.055      |
|    value_loss           | 0.0045     |
----------------------------------------
Early stopping at step 10 due to reaching m

Early stopping at step 18 due to reaching max kl: 0.05
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.634       |
| time/                   |             |
|    fps                  | 87          |
|    iterations           | 146         |
|    time_elapsed         | 6286        |
|    total_timesteps      | 547500      |
| train/                  |             |
|    approx_kl            | 0.032635294 |
|    clip_fraction        | 0.526       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.883       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0214      |
|    n_updates            | 2900        |
|    policy_gradient_loss | 0.0097      |
|    std                  | 0.055       |
|    value_loss           | 0.00431     |
-----------------------------------------
Early stopping at ste

Early stopping at step 10 due to reaching max kl: 0.09
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.638       |
| time/                   |             |
|    fps                  | 87          |
|    iterations           | 155         |
|    time_elapsed         | 6673        |
|    total_timesteps      | 581250      |
| train/                  |             |
|    approx_kl            | 0.058802072 |
|    clip_fraction        | 0.463       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.885       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0237      |
|    n_updates            | 3080        |
|    policy_gradient_loss | 0.00906     |
|    std                  | 0.055       |
|    value_loss           | 0.00408     |
-----------------------------------------
Early stopping at ste

Early stopping at step 8 due to reaching max kl: 0.02
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.639       |
| time/                   |             |
|    fps                  | 87          |
|    iterations           | 164         |
|    time_elapsed         | 7063        |
|    total_timesteps      | 615000      |
| train/                  |             |
|    approx_kl            | 0.017600423 |
|    clip_fraction        | 0.437       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.887       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0608      |
|    n_updates            | 3260        |
|    policy_gradient_loss | 0.004       |
|    std                  | 0.055       |
|    value_loss           | 0.00429     |
-----------------------------------------
Early stopping at step

Early stopping at step 5 due to reaching max kl: 0.03
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.643       |
| time/                   |             |
|    fps                  | 87          |
|    iterations           | 173         |
|    time_elapsed         | 7446        |
|    total_timesteps      | 648750      |
| train/                  |             |
|    approx_kl            | 0.023289006 |
|    clip_fraction        | 0.404       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.89        |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0659      |
|    n_updates            | 3440        |
|    policy_gradient_loss | 0.00452     |
|    std                  | 0.055       |
|    value_loss           | 0.00391     |
-----------------------------------------
Early stopping at step

Early stopping at step 13 due to reaching max kl: 0.03
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.647       |
| time/                   |             |
|    fps                  | 87          |
|    iterations           | 182         |
|    time_elapsed         | 7829        |
|    total_timesteps      | 682500      |
| train/                  |             |
|    approx_kl            | 0.025660444 |
|    clip_fraction        | 0.468       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.893       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0493      |
|    n_updates            | 3620        |
|    policy_gradient_loss | 0.00123     |
|    std                  | 0.055       |
|    value_loss           | 0.00434     |
-----------------------------------------
Early stopping at ste

Early stopping at step 6 due to reaching max kl: 0.04
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.648       |
| time/                   |             |
|    fps                  | 87          |
|    iterations           | 191         |
|    time_elapsed         | 8210        |
|    total_timesteps      | 716250      |
| train/                  |             |
|    approx_kl            | 0.027612796 |
|    clip_fraction        | 0.398       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.893       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0559      |
|    n_updates            | 3800        |
|    policy_gradient_loss | 0.00421     |
|    std                  | 0.055       |
|    value_loss           | 0.00381     |
-----------------------------------------
Early stopping at step

Early stopping at step 9 due to reaching max kl: 0.01
Eval num_timesteps=750000, episode_reward=0.66 +/- 0.00
Episode length: 5.00 +/- 0.00
-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 5           |
|    mean_reward          | 0.664       |
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.646       |
| time/                   |             |
|    fps                  | 87          |
|    iterations           | 200         |
|    time_elapsed         | 8596        |
|    total_timesteps      | 750000      |
| train/                  |             |
|    approx_kl            | 0.014868012 |
|    clip_fraction        | 0.417       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.89        |
|    learning_rate        | 1e-05       |
|    loss                 | 0.065       |
|    n_updates      

Early stopping at step 7 due to reaching max kl: 0.04
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 5          |
|    ep_rew_mean          | 0.647      |
| time/                   |            |
|    fps                  | 87         |
|    iterations           | 209        |
|    time_elapsed         | 8956       |
|    total_timesteps      | 783750     |
| train/                  |            |
|    approx_kl            | 0.03420556 |
|    clip_fraction        | 0.457      |
|    clip_range           | 0.1        |
|    entropy_loss         | -94.8      |
|    explained_variance   | 0.892      |
|    learning_rate        | 1e-05      |
|    loss                 | 0.0586     |
|    n_updates            | 4160       |
|    policy_gradient_loss | 0.00618    |
|    std                  | 0.055      |
|    value_loss           | 0.00382    |
----------------------------------------
Early stopping at step 4 due to reaching max

Early stopping at step 3 due to reaching max kl: 0.01
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.651       |
| time/                   |             |
|    fps                  | 87          |
|    iterations           | 218         |
|    time_elapsed         | 9339        |
|    total_timesteps      | 817500      |
| train/                  |             |
|    approx_kl            | 0.012073245 |
|    clip_fraction        | 0.325       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.892       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0697      |
|    n_updates            | 4340        |
|    policy_gradient_loss | 0.00119     |
|    std                  | 0.055       |
|    value_loss           | 0.00431     |
-----------------------------------------
Early stopping at step

Early stopping at step 5 due to reaching max kl: 0.02
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 5          |
|    ep_rew_mean          | 0.65       |
| time/                   |            |
|    fps                  | 87         |
|    iterations           | 227        |
|    time_elapsed         | 9722       |
|    total_timesteps      | 851250     |
| train/                  |            |
|    approx_kl            | 0.01861415 |
|    clip_fraction        | 0.382      |
|    clip_range           | 0.1        |
|    entropy_loss         | -94.8      |
|    explained_variance   | 0.894      |
|    learning_rate        | 1e-05      |
|    loss                 | 0.06       |
|    n_updates            | 4520       |
|    policy_gradient_loss | 0.0033     |
|    std                  | 0.055      |
|    value_loss           | 0.0039     |
----------------------------------------
Early stopping at step 7 due to reaching max

Early stopping at step 7 due to reaching max kl: 0.03
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 5          |
|    ep_rew_mean          | 0.651      |
| time/                   |            |
|    fps                  | 87         |
|    iterations           | 236        |
|    time_elapsed         | 10100      |
|    total_timesteps      | 885000     |
| train/                  |            |
|    approx_kl            | 0.02450258 |
|    clip_fraction        | 0.416      |
|    clip_range           | 0.1        |
|    entropy_loss         | -94.8      |
|    explained_variance   | 0.891      |
|    learning_rate        | 1e-05      |
|    loss                 | 0.0541     |
|    n_updates            | 4700       |
|    policy_gradient_loss | -0.00122   |
|    std                  | 0.055      |
|    value_loss           | 0.00421    |
----------------------------------------
Early stopping at step 7 due to reaching max

Early stopping at step 8 due to reaching max kl: 0.10
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.653       |
| time/                   |             |
|    fps                  | 87          |
|    iterations           | 245         |
|    time_elapsed         | 10481       |
|    total_timesteps      | 918750      |
| train/                  |             |
|    approx_kl            | 0.058012135 |
|    clip_fraction        | 0.439       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.889       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0281      |
|    n_updates            | 4880        |
|    policy_gradient_loss | 0.00351     |
|    std                  | 0.055       |
|    value_loss           | 0.00416     |
-----------------------------------------
Early stopping at step

Early stopping at step 8 due to reaching max kl: 0.04
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.652       |
| time/                   |             |
|    fps                  | 87          |
|    iterations           | 254         |
|    time_elapsed         | 10858       |
|    total_timesteps      | 952500      |
| train/                  |             |
|    approx_kl            | 0.027882334 |
|    clip_fraction        | 0.441       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.892       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0604      |
|    n_updates            | 5060        |
|    policy_gradient_loss | 0.00462     |
|    std                  | 0.055       |
|    value_loss           | 0.00428     |
-----------------------------------------
Early stopping at step

Early stopping at step 8 due to reaching max kl: 0.06
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.652       |
| time/                   |             |
|    fps                  | 87          |
|    iterations           | 263         |
|    time_elapsed         | 11239       |
|    total_timesteps      | 986250      |
| train/                  |             |
|    approx_kl            | 0.040658563 |
|    clip_fraction        | 0.471       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.889       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.029       |
|    n_updates            | 5240        |
|    policy_gradient_loss | 0.00702     |
|    std                  | 0.055       |
|    value_loss           | 0.00411     |
-----------------------------------------
Early stopping at step

Early stopping at step 4 due to reaching max kl: 0.08
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.654       |
| time/                   |             |
|    fps                  | 87          |
|    iterations           | 272         |
|    time_elapsed         | 11620       |
|    total_timesteps      | 1020000     |
| train/                  |             |
|    approx_kl            | 0.046799947 |
|    clip_fraction        | 0.372       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.887       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0651      |
|    n_updates            | 5420        |
|    policy_gradient_loss | 0.00177     |
|    std                  | 0.055       |
|    value_loss           | 0.00432     |
-----------------------------------------
Early stopping at step

Early stopping at step 10 due to reaching max kl: 0.03
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.654       |
| time/                   |             |
|    fps                  | 87          |
|    iterations           | 281         |
|    time_elapsed         | 12003       |
|    total_timesteps      | 1053750     |
| train/                  |             |
|    approx_kl            | 0.021041213 |
|    clip_fraction        | 0.478       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.887       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0594      |
|    n_updates            | 5600        |
|    policy_gradient_loss | 0.0102      |
|    std                  | 0.055       |
|    value_loss           | 0.00471     |
-----------------------------------------
Early stopping at ste

Early stopping at step 4 due to reaching max kl: 0.02
Eval num_timesteps=1087500, episode_reward=0.66 +/- 0.00
Episode length: 5.00 +/- 0.00
-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 5           |
|    mean_reward          | 0.664       |
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.654       |
| time/                   |             |
|    fps                  | 87          |
|    iterations           | 290         |
|    time_elapsed         | 12380       |
|    total_timesteps      | 1087500     |
| train/                  |             |
|    approx_kl            | 0.023976453 |
|    clip_fraction        | 0.36        |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.886       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0718      |
|    n_updates     

Early stopping at step 9 due to reaching max kl: 0.03
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 5          |
|    ep_rew_mean          | 0.653      |
| time/                   |            |
|    fps                  | 88         |
|    iterations           | 299        |
|    time_elapsed         | 12737      |
|    total_timesteps      | 1121250    |
| train/                  |            |
|    approx_kl            | 0.02701819 |
|    clip_fraction        | 0.467      |
|    clip_range           | 0.1        |
|    entropy_loss         | -94.8      |
|    explained_variance   | 0.892      |
|    learning_rate        | 1e-05      |
|    loss                 | 0.0667     |
|    n_updates            | 5960       |
|    policy_gradient_loss | 0.0072     |
|    std                  | 0.055      |
|    value_loss           | 0.00429    |
----------------------------------------
Early stopping at step 7 due to reaching max

  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in

  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in

seed 3
generate callback ...
vectorize environment ...
vectorize env level 1
vectorize env level 2
Box(-1.0, 1.0, (96,), float64)
model definition ..
Using cuda device
Logging to ./data/ppo_2l/seed_3
policy learning ..




---------------------------------
| rollout/           |          |
|    ep_len_mean     | 5        |
|    ep_rew_mean     | 0.582    |
| time/              |          |
|    fps             | 71       |
|    iterations      | 1        |
|    time_elapsed    | 52       |
|    total_timesteps | 3750     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.579       |
| time/                   |             |
|    fps                  | 79          |
|    iterations           | 2           |
|    time_elapsed         | 94          |
|    total_timesteps      | 7500        |
| train/                  |             |
|    approx_kl            | 0.009492418 |
|    clip_fraction        | 0.387       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | -4.36       |
|    learning_rate        | 1e

Early stopping at step 14 due to reaching max kl: 0.02
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.582       |
| time/                   |             |
|    fps                  | 84          |
|    iterations           | 11          |
|    time_elapsed         | 489         |
|    total_timesteps      | 41250       |
| train/                  |             |
|    approx_kl            | 0.017657954 |
|    clip_fraction        | 0.425       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.65        |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0565      |
|    n_updates            | 200         |
|    policy_gradient_loss | 0.00315     |
|    std                  | 0.055       |
|    value_loss           | 0.0112      |
-----------------------------------------
---------------------

Early stopping at step 14 due to reaching max kl: 0.02
Eval num_timesteps=75000, episode_reward=0.64 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 5           |
|    mean_reward          | 0.636       |
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.588       |
| time/                   |             |
|    fps                  | 84          |
|    iterations           | 20          |
|    time_elapsed         | 883         |
|    total_timesteps      | 75000       |
| train/                  |             |
|    approx_kl            | 0.017095152 |
|    clip_fraction        | 0.433       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.727       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0414      

Early stopping at step 17 due to reaching max kl: 0.04
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.595       |
| time/                   |             |
|    fps                  | 86          |
|    iterations           | 29          |
|    time_elapsed         | 1256        |
|    total_timesteps      | 108750      |
| train/                  |             |
|    approx_kl            | 0.026888479 |
|    clip_fraction        | 0.468       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.784       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0397      |
|    n_updates            | 560         |
|    policy_gradient_loss | 0.00637     |
|    std                  | 0.055       |
|    value_loss           | 0.0078      |
-----------------------------------------
Early stopping at ste

Early stopping at step 13 due to reaching max kl: 0.02
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.595       |
| time/                   |             |
|    fps                  | 86          |
|    iterations           | 38          |
|    time_elapsed         | 1649        |
|    total_timesteps      | 142500      |
| train/                  |             |
|    approx_kl            | 0.017825393 |
|    clip_fraction        | 0.455       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.828       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0558      |
|    n_updates            | 740         |
|    policy_gradient_loss | 0.00416     |
|    std                  | 0.055       |
|    value_loss           | 0.00618     |
-----------------------------------------
Early stopping at ste

Early stopping at step 9 due to reaching max kl: 0.02
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 5          |
|    ep_rew_mean          | 0.602      |
| time/                   |            |
|    fps                  | 86         |
|    iterations           | 47         |
|    time_elapsed         | 2041       |
|    total_timesteps      | 176250     |
| train/                  |            |
|    approx_kl            | 0.01359079 |
|    clip_fraction        | 0.378      |
|    clip_range           | 0.1        |
|    entropy_loss         | -94.8      |
|    explained_variance   | 0.845      |
|    learning_rate        | 1e-05      |
|    loss                 | 0.0491     |
|    n_updates            | 920        |
|    policy_gradient_loss | -0.000937  |
|    std                  | 0.055      |
|    value_loss           | 0.00544    |
----------------------------------------
Early stopping at step 8 due to reaching max

Early stopping at step 18 due to reaching max kl: 0.05
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.601       |
| time/                   |             |
|    fps                  | 86          |
|    iterations           | 56          |
|    time_elapsed         | 2435        |
|    total_timesteps      | 210000      |
| train/                  |             |
|    approx_kl            | 0.028881453 |
|    clip_fraction        | 0.46        |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.861       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0288      |
|    n_updates            | 1100        |
|    policy_gradient_loss | 0.000979    |
|    std                  | 0.055       |
|    value_loss           | 0.00489     |
-----------------------------------------
Early stopping at ste

Early stopping at step 16 due to reaching max kl: 0.04
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.61        |
| time/                   |             |
|    fps                  | 86          |
|    iterations           | 65          |
|    time_elapsed         | 2832        |
|    total_timesteps      | 243750      |
| train/                  |             |
|    approx_kl            | 0.024939252 |
|    clip_fraction        | 0.469       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.873       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0291      |
|    n_updates            | 1280        |
|    policy_gradient_loss | 0.00136     |
|    std                  | 0.055       |
|    value_loss           | 0.00458     |
-----------------------------------------
Early stopping at ste

Early stopping at step 9 due to reaching max kl: 0.02
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.616       |
| time/                   |             |
|    fps                  | 86          |
|    iterations           | 74          |
|    time_elapsed         | 3219        |
|    total_timesteps      | 277500      |
| train/                  |             |
|    approx_kl            | 0.016968627 |
|    clip_fraction        | 0.446       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.881       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.052       |
|    n_updates            | 1460        |
|    policy_gradient_loss | 0.00396     |
|    std                  | 0.055       |
|    value_loss           | 0.00438     |
-----------------------------------------
Early stopping at step

Early stopping at step 16 due to reaching max kl: 0.04
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.617       |
| time/                   |             |
|    fps                  | 86          |
|    iterations           | 83          |
|    time_elapsed         | 3613        |
|    total_timesteps      | 311250      |
| train/                  |             |
|    approx_kl            | 0.025824396 |
|    clip_fraction        | 0.455       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.88        |
|    learning_rate        | 1e-05       |
|    loss                 | 0.00834     |
|    n_updates            | 1640        |
|    policy_gradient_loss | 0.0041      |
|    std                  | 0.055       |
|    value_loss           | 0.00459     |
-----------------------------------------
Early stopping at ste

Early stopping at step 9 due to reaching max kl: 0.02
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.622       |
| time/                   |             |
|    fps                  | 86          |
|    iterations           | 92          |
|    time_elapsed         | 4003        |
|    total_timesteps      | 345000      |
| train/                  |             |
|    approx_kl            | 0.019184284 |
|    clip_fraction        | 0.397       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.885       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0472      |
|    n_updates            | 1820        |
|    policy_gradient_loss | 0.0139      |
|    std                  | 0.055       |
|    value_loss           | 0.00461     |
-----------------------------------------
Early stopping at step

Early stopping at step 11 due to reaching max kl: 0.01
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.626       |
| time/                   |             |
|    fps                  | 86          |
|    iterations           | 101         |
|    time_elapsed         | 4398        |
|    total_timesteps      | 378750      |
| train/                  |             |
|    approx_kl            | 0.011194302 |
|    clip_fraction        | 0.419       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.884       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0649      |
|    n_updates            | 2000        |
|    policy_gradient_loss | -0.0041     |
|    std                  | 0.055       |
|    value_loss           | 0.00486     |
-----------------------------------------
Early stopping at ste

Early stopping at step 9 due to reaching max kl: 0.02
Eval num_timesteps=412500, episode_reward=0.66 +/- 0.00
Episode length: 5.00 +/- 0.00
----------------------------------------
| eval/                   |            |
|    mean_ep_length       | 5          |
|    mean_reward          | 0.665      |
| rollout/                |            |
|    ep_len_mean          | 5          |
|    ep_rew_mean          | 0.628      |
| time/                   |            |
|    fps                  | 86         |
|    iterations           | 110        |
|    time_elapsed         | 4787       |
|    total_timesteps      | 412500     |
| train/                  |            |
|    approx_kl            | 0.01631487 |
|    clip_fraction        | 0.407      |
|    clip_range           | 0.1        |
|    entropy_loss         | -94.8      |
|    explained_variance   | 0.895      |
|    learning_rate        | 1e-05      |
|    loss                 | 0.0536     |
|    n_updates            | 2180       |

Early stopping at step 19 due to reaching max kl: 0.05
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.633       |
| time/                   |             |
|    fps                  | 86          |
|    iterations           | 119         |
|    time_elapsed         | 5159        |
|    total_timesteps      | 446250      |
| train/                  |             |
|    approx_kl            | 0.033255283 |
|    clip_fraction        | 0.475       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.899       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.033       |
|    n_updates            | 2360        |
|    policy_gradient_loss | 0.00542     |
|    std                  | 0.055       |
|    value_loss           | 0.00407     |
-----------------------------------------
Early stopping at ste

Early stopping at step 11 due to reaching max kl: 0.02
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.633       |
| time/                   |             |
|    fps                  | 86          |
|    iterations           | 128         |
|    time_elapsed         | 5551        |
|    total_timesteps      | 480000      |
| train/                  |             |
|    approx_kl            | 0.015861819 |
|    clip_fraction        | 0.402       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.9       |
|    explained_variance   | 0.891       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0606      |
|    n_updates            | 2540        |
|    policy_gradient_loss | 0.000409    |
|    std                  | 0.055       |
|    value_loss           | 0.00447     |
-----------------------------------------
Early stopping at ste

Early stopping at step 11 due to reaching max kl: 0.06
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.637       |
| time/                   |             |
|    fps                  | 86          |
|    iterations           | 137         |
|    time_elapsed         | 5940        |
|    total_timesteps      | 513750      |
| train/                  |             |
|    approx_kl            | 0.037228093 |
|    clip_fraction        | 0.46        |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.9       |
|    explained_variance   | 0.893       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0249      |
|    n_updates            | 2720        |
|    policy_gradient_loss | 0.00723     |
|    std                  | 0.055       |
|    value_loss           | 0.00385     |
-----------------------------------------
Early stopping at ste

Early stopping at step 17 due to reaching max kl: 0.05
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 5          |
|    ep_rew_mean          | 0.639      |
| time/                   |            |
|    fps                  | 86         |
|    iterations           | 146        |
|    time_elapsed         | 6328       |
|    total_timesteps      | 547500     |
| train/                  |            |
|    approx_kl            | 0.03198263 |
|    clip_fraction        | 0.481      |
|    clip_range           | 0.1        |
|    entropy_loss         | -94.9      |
|    explained_variance   | 0.896      |
|    learning_rate        | 1e-05      |
|    loss                 | 0.0117     |
|    n_updates            | 2900       |
|    policy_gradient_loss | 0.000962   |
|    std                  | 0.055      |
|    value_loss           | 0.00418    |
----------------------------------------
Early stopping at step 8 due to reaching ma

Early stopping at step 10 due to reaching max kl: 0.03
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.643       |
| time/                   |             |
|    fps                  | 86          |
|    iterations           | 155         |
|    time_elapsed         | 6717        |
|    total_timesteps      | 581250      |
| train/                  |             |
|    approx_kl            | 0.020601235 |
|    clip_fraction        | 0.419       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.9       |
|    explained_variance   | 0.898       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0453      |
|    n_updates            | 3080        |
|    policy_gradient_loss | 0.00284     |
|    std                  | 0.0549      |
|    value_loss           | 0.00414     |
-----------------------------------------
Early stopping at ste

Early stopping at step 13 due to reaching max kl: 0.02
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.643       |
| time/                   |             |
|    fps                  | 86          |
|    iterations           | 164         |
|    time_elapsed         | 7110        |
|    total_timesteps      | 615000      |
| train/                  |             |
|    approx_kl            | 0.016409453 |
|    clip_fraction        | 0.45        |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.9       |
|    explained_variance   | 0.897       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.065       |
|    n_updates            | 3260        |
|    policy_gradient_loss | 0.000464    |
|    std                  | 0.0549      |
|    value_loss           | 0.00414     |
-----------------------------------------
Early stopping at ste

Early stopping at step 9 due to reaching max kl: 0.05
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.646       |
| time/                   |             |
|    fps                  | 86          |
|    iterations           | 173         |
|    time_elapsed         | 7496        |
|    total_timesteps      | 648750      |
| train/                  |             |
|    approx_kl            | 0.034608312 |
|    clip_fraction        | 0.442       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.9       |
|    explained_variance   | 0.9         |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0508      |
|    n_updates            | 3440        |
|    policy_gradient_loss | 0.0013      |
|    std                  | 0.0549      |
|    value_loss           | 0.00388     |
-----------------------------------------
Early stopping at step

Early stopping at step 12 due to reaching max kl: 0.08
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.646       |
| time/                   |             |
|    fps                  | 86          |
|    iterations           | 182         |
|    time_elapsed         | 7885        |
|    total_timesteps      | 682500      |
| train/                  |             |
|    approx_kl            | 0.051155366 |
|    clip_fraction        | 0.467       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.9       |
|    explained_variance   | 0.904       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0124      |
|    n_updates            | 3620        |
|    policy_gradient_loss | 0.00791     |
|    std                  | 0.0549      |
|    value_loss           | 0.00372     |
-----------------------------------------
Early stopping at ste

Early stopping at step 14 due to reaching max kl: 0.02
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 5          |
|    ep_rew_mean          | 0.646      |
| time/                   |            |
|    fps                  | 86         |
|    iterations           | 191        |
|    time_elapsed         | 8279       |
|    total_timesteps      | 716250     |
| train/                  |            |
|    approx_kl            | 0.01858769 |
|    clip_fraction        | 0.464      |
|    clip_range           | 0.1        |
|    entropy_loss         | -94.9      |
|    explained_variance   | 0.903      |
|    learning_rate        | 1e-05      |
|    loss                 | 0.0501     |
|    n_updates            | 3800       |
|    policy_gradient_loss | 0.00888    |
|    std                  | 0.0549     |
|    value_loss           | 0.00409    |
----------------------------------------
Early stopping at step 12 due to reaching m

Early stopping at step 7 due to reaching max kl: 0.10
Eval num_timesteps=750000, episode_reward=0.67 +/- 0.00
Episode length: 5.00 +/- 0.00
----------------------------------------
| eval/                   |            |
|    mean_ep_length       | 5          |
|    mean_reward          | 0.666      |
| rollout/                |            |
|    ep_len_mean          | 5          |
|    ep_rew_mean          | 0.648      |
| time/                   |            |
|    fps                  | 86         |
|    iterations           | 200        |
|    time_elapsed         | 8666       |
|    total_timesteps      | 750000     |
| train/                  |            |
|    approx_kl            | 0.05450105 |
|    clip_fraction        | 0.435      |
|    clip_range           | 0.1        |
|    entropy_loss         | -94.9      |
|    explained_variance   | 0.898      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0125    |
|    n_updates            | 3980       |

Early stopping at step 15 due to reaching max kl: 0.04
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 5          |
|    ep_rew_mean          | 0.647      |
| time/                   |            |
|    fps                  | 86         |
|    iterations           | 209        |
|    time_elapsed         | 9029       |
|    total_timesteps      | 783750     |
| train/                  |            |
|    approx_kl            | 0.02923814 |
|    clip_fraction        | 0.495      |
|    clip_range           | 0.1        |
|    entropy_loss         | -94.9      |
|    explained_variance   | 0.896      |
|    learning_rate        | 1e-05      |
|    loss                 | 0.0431     |
|    n_updates            | 4160       |
|    policy_gradient_loss | 0.00256    |
|    std                  | 0.0549     |
|    value_loss           | 0.0042     |
----------------------------------------
Early stopping at step 8 due to reaching ma

Early stopping at step 10 due to reaching max kl: 0.02
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.651       |
| time/                   |             |
|    fps                  | 86          |
|    iterations           | 218         |
|    time_elapsed         | 9416        |
|    total_timesteps      | 817500      |
| train/                  |             |
|    approx_kl            | 0.017004507 |
|    clip_fraction        | 0.452       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.9       |
|    explained_variance   | 0.89        |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0636      |
|    n_updates            | 4340        |
|    policy_gradient_loss | 0.004       |
|    std                  | 0.0549      |
|    value_loss           | 0.00408     |
-----------------------------------------
Early stopping at ste

Early stopping at step 18 due to reaching max kl: 0.02
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.652       |
| time/                   |             |
|    fps                  | 86          |
|    iterations           | 227         |
|    time_elapsed         | 9809        |
|    total_timesteps      | 851250      |
| train/                  |             |
|    approx_kl            | 0.016949488 |
|    clip_fraction        | 0.501       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.9       |
|    explained_variance   | 0.894       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0641      |
|    n_updates            | 4520        |
|    policy_gradient_loss | -0.000448   |
|    std                  | 0.0549      |
|    value_loss           | 0.00401     |
-----------------------------------------
Early stopping at ste

Early stopping at step 7 due to reaching max kl: 0.02
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.649       |
| time/                   |             |
|    fps                  | 86          |
|    iterations           | 236         |
|    time_elapsed         | 10199       |
|    total_timesteps      | 885000      |
| train/                  |             |
|    approx_kl            | 0.016009228 |
|    clip_fraction        | 0.377       |
|    clip_range           | 0.1         |
|    entropy_loss         | -95         |
|    explained_variance   | 0.892       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.057       |
|    n_updates            | 4700        |
|    policy_gradient_loss | 0.000461    |
|    std                  | 0.0549      |
|    value_loss           | 0.00412     |
-----------------------------------------
Early stopping at step

Early stopping at step 10 due to reaching max kl: 0.03
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.653       |
| time/                   |             |
|    fps                  | 86          |
|    iterations           | 245         |
|    time_elapsed         | 10590       |
|    total_timesteps      | 918750      |
| train/                  |             |
|    approx_kl            | 0.024851602 |
|    clip_fraction        | 0.445       |
|    clip_range           | 0.1         |
|    entropy_loss         | -95         |
|    explained_variance   | 0.893       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.063       |
|    n_updates            | 4880        |
|    policy_gradient_loss | 0.00618     |
|    std                  | 0.0549      |
|    value_loss           | 0.00398     |
-----------------------------------------
Early stopping at ste

Early stopping at step 5 due to reaching max kl: 0.01
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.653       |
| time/                   |             |
|    fps                  | 86          |
|    iterations           | 254         |
|    time_elapsed         | 10976       |
|    total_timesteps      | 952500      |
| train/                  |             |
|    approx_kl            | 0.012159151 |
|    clip_fraction        | 0.36        |
|    clip_range           | 0.1         |
|    entropy_loss         | -95         |
|    explained_variance   | 0.888       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0706      |
|    n_updates            | 5060        |
|    policy_gradient_loss | -0.0012     |
|    std                  | 0.0549      |
|    value_loss           | 0.00448     |
-----------------------------------------
Early stopping at step

Early stopping at step 15 due to reaching max kl: 0.02
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.653       |
| time/                   |             |
|    fps                  | 86          |
|    iterations           | 263         |
|    time_elapsed         | 11361       |
|    total_timesteps      | 986250      |
| train/                  |             |
|    approx_kl            | 0.018487278 |
|    clip_fraction        | 0.482       |
|    clip_range           | 0.1         |
|    entropy_loss         | -95         |
|    explained_variance   | 0.891       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0434      |
|    n_updates            | 5240        |
|    policy_gradient_loss | 0.00483     |
|    std                  | 0.0549      |
|    value_loss           | 0.00402     |
-----------------------------------------
Early stopping at ste

Early stopping at step 13 due to reaching max kl: 0.04
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.653       |
| time/                   |             |
|    fps                  | 86          |
|    iterations           | 272         |
|    time_elapsed         | 11750       |
|    total_timesteps      | 1020000     |
| train/                  |             |
|    approx_kl            | 0.029375732 |
|    clip_fraction        | 0.479       |
|    clip_range           | 0.1         |
|    entropy_loss         | -95         |
|    explained_variance   | 0.888       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0325      |
|    n_updates            | 5420        |
|    policy_gradient_loss | 0.00472     |
|    std                  | 0.0549      |
|    value_loss           | 0.00427     |
-----------------------------------------
Early stopping at ste

Early stopping at step 12 due to reaching max kl: 0.07
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 5         |
|    ep_rew_mean          | 0.655     |
| time/                   |           |
|    fps                  | 86        |
|    iterations           | 281       |
|    time_elapsed         | 12138     |
|    total_timesteps      | 1053750   |
| train/                  |           |
|    approx_kl            | 0.0406888 |
|    clip_fraction        | 0.436     |
|    clip_range           | 0.1       |
|    entropy_loss         | -95       |
|    explained_variance   | 0.883     |
|    learning_rate        | 1e-05     |
|    loss                 | 0.0425    |
|    n_updates            | 5600      |
|    policy_gradient_loss | 0.00244   |
|    std                  | 0.0549    |
|    value_loss           | 0.00464   |
---------------------------------------
Early stopping at step 9 due to reaching max kl: 0.08
-----------

Early stopping at step 11 due to reaching max kl: 0.03
Eval num_timesteps=1087500, episode_reward=0.67 +/- 0.00
Episode length: 5.00 +/- 0.00
-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 5           |
|    mean_reward          | 0.665       |
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.654       |
| time/                   |             |
|    fps                  | 86          |
|    iterations           | 290         |
|    time_elapsed         | 12522       |
|    total_timesteps      | 1087500     |
| train/                  |             |
|    approx_kl            | 0.025507905 |
|    clip_fraction        | 0.434       |
|    clip_range           | 0.1         |
|    entropy_loss         | -95         |
|    explained_variance   | 0.888       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0501      |
|    n_updates    

Early stopping at step 14 due to reaching max kl: 0.03
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.656       |
| time/                   |             |
|    fps                  | 86          |
|    iterations           | 299         |
|    time_elapsed         | 12888       |
|    total_timesteps      | 1121250     |
| train/                  |             |
|    approx_kl            | 0.024203675 |
|    clip_fraction        | 0.497       |
|    clip_range           | 0.1         |
|    entropy_loss         | -95         |
|    explained_variance   | 0.894       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0538      |
|    n_updates            | 5960        |
|    policy_gradient_loss | 0.00372     |
|    std                  | 0.0549      |
|    value_loss           | 0.00404     |
-----------------------------------------
Early stopping at ste

  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in

  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in