In [1]:
# to access functions from root directory
import sys
sys.path.append('/data/ad181/RemoteDir/multilevel_ppo')

In [2]:
%matplotlib notebook
import numpy as np
import time
import pickle
import os
import matplotlib.pyplot as plt
from copy import copy, deepcopy
from tqdm.notebook import trange, tqdm

import gym
from stable_baselines3.ppo import PPO, MlpPolicy
from stable_baselines3.ppo_multi_level import PPO_ML
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.vec_env.subproc_vec_multi_level_env import SubprocVecMultiLevelEnv
from stable_baselines3.common.envs.multi_level_ressim_env import MultiLevelRessimEnv
from stable_baselines3.common.logger import configure

from utils.custom_eval_callback import CustomEvalCallback, CustomEvalCallbackParallel
from utils.plot_functions import plot_learning
from utils.env_evaluate_functions import eval_actions

In [3]:
seed=1
case='ppo_3l'
data_dir='./data'
log_dir='./data/'+case

In [4]:
os.makedirs(data_dir, exist_ok=True)
os.makedirs(log_dir, exist_ok=True)

In [5]:
with open('../envs_params/env_data_v1/env_train_dict.pkl', 'rb') as input:
    env_ck_dict = pickle.load(input)

In [6]:
# generate dictionaries for env (env_dict_), n_steps (T_ml) and minibatch (M_ml) for `n_level`s
n_levels=3
fine_level = len(env_ck_dict)
env_dict_ = {}
for i,l in enumerate(range(fine_level-n_levels, fine_level)):
    print(i+1,'->',l+1)
    env_dict_[i+1] = env_ck_dict[l+1]

1 -> 3
2 -> 4
3 -> 5


In [7]:
for seed in range(1,4):
    if True: 
        print(f'seed {seed}')
        log_dir = './data/'+case+'/seed_'+str(seed)
        os.makedirs(log_dir, exist_ok=True)
        T = {1:80, 2:10, 3:5} # n_steps
        N = 50 # number of actors
        M = {1:400, 2:50, 3:25} # minibatch size
        I = 300 # number of iterations
        K = 20 # number of epochs
        
        log_interval = I/30
        
        fine_level = len(env_dict_)
        
        print('generate callback ...')
        eval_callback = CustomEvalCallback( env_dict_[fine_level], 
                                            best_model_save_path=str(log_dir)+'/best_model', 
                                            n_eval_episodes=1,
                                            log_path=str(log_dir)+'/results_eval', 
                                            eval_freq=log_interval*sum(T.values()) )
        
        print('vectorize environment ...')
        
        # generate PPO_ML parameters for MLMC analysis. 
        env_dict = {}
        n_steps_dict = {}
        batch_size_dict = {}
        for env, level in zip(env_dict_.values(), env_dict_.keys()):
            print(f"vectorize env level {level}")
            env_dict[level] = make_vec_env( MultiLevelRessimEnv, 
                                    n_envs=N, 
                                    seed=seed, 
                                    env_kwargs= {"ressim_params":env.ressim_params, "level":env.level}, 
                                    vec_env_cls=SubprocVecMultiLevelEnv )
            n_steps_dict[level] = T[level]
            batch_size_dict[level] = M[level]
        
        print(env_dict_[level].observation_space)
        print('model definition ..')
        model = PPO_ML(policy=MlpPolicy,
                           env=env_dict,
                           learning_rate = 1e-5,
                           n_steps = n_steps_dict,
                           batch_size = batch_size_dict,
                           n_epochs = K,
                           clip_range = 0.1,
                           ent_coef = 0.001,
                           vf_coef = 0.5,
                           policy_kwargs = dict(net_arch=[150,100,80], log_std_init=-2.9),
                           verbose = 1,
                           seed = seed,
                           target_kl = 0.05,
                           device = "auto")
        # set logger for the model
        new_logger = configure(log_dir)
        model.set_logger(new_logger)
        print('policy learning ..')
        model.learn(total_timesteps=N*sum(T.values())*I, callback=eval_callback)
        model.save(log_dir+'/PPO', exclude=['env_dict'])
        del model
        for level in env_dict.keys():
            env_dict[level].close()


seed 1
generate callback ...
vectorize environment ...
vectorize env level 1
vectorize env level 2
vectorize env level 3
Box(-1.0, 1.0, (96,), float64)
model definition ..
Using cuda device
Logging to ./data/ppo_3l/seed_1
policy learning ..




---------------------------------
| rollout/           |          |
|    ep_len_mean     | 5        |
|    ep_rew_mean     | 0.582    |
| time/              |          |
|    fps             | 81       |
|    iterations      | 1        |
|    time_elapsed    | 58       |
|    total_timesteps | 4750     |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.58         |
| time/                   |              |
|    fps                  | 95           |
|    iterations           | 2            |
|    time_elapsed         | 99           |
|    total_timesteps      | 9500         |
| train/                  |              |
|    approx_kl            | 0.0064921943 |
|    clip_fraction        | 0.313        |
|    clip_range           | 0.1          |
|    entropy_loss         | -94.8        |
|    explained_variance   | -4.14        |
|    learning_r

  for j in range(len(p_1)-1):


Eval num_timesteps=47500, episode_reward=0.60 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 5           |
|    mean_reward          | 0.6         |
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.58        |
| time/                   |             |
|    fps                  | 104         |
|    iterations           | 10          |
|    time_elapsed         | 455         |
|    total_timesteps      | 47500       |
| train/                  |             |
|    approx_kl            | 0.036009382 |
|    clip_fraction        | 0.464       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.689       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.00366     |
|    n_updates            | 180         |
|    policy

Early stopping at step 13 due to reaching max kl: 0.08
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.59        |
| time/                   |             |
|    fps                  | 110         |
|    iterations           | 19          |
|    time_elapsed         | 815         |
|    total_timesteps      | 90250       |
| train/                  |             |
|    approx_kl            | 0.043496788 |
|    clip_fraction        | 0.473       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.761       |
|    learning_rate        | 1e-05       |
|    loss                 | -0.00253    |
|    n_updates            | 360         |
|    policy_gradient_loss | 0.0124      |
|    std                  | 0.055       |
|    value_loss           | 0.00924     |
-----------------------------------------
Early stopping at ste

Early stopping at step 5 due to reaching max kl: 0.01
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.591       |
| time/                   |             |
|    fps                  | 111         |
|    iterations           | 28          |
|    time_elapsed         | 1192        |
|    total_timesteps      | 133000      |
| train/                  |             |
|    approx_kl            | 0.013399586 |
|    clip_fraction        | 0.341       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.796       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0568      |
|    n_updates            | 540         |
|    policy_gradient_loss | 0.00545     |
|    std                  | 0.055       |
|    value_loss           | 0.0081      |
-----------------------------------------
Early stopping at step

Early stopping at step 13 due to reaching max kl: 0.03
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.597       |
| time/                   |             |
|    fps                  | 112         |
|    iterations           | 37          |
|    time_elapsed         | 1568        |
|    total_timesteps      | 175750      |
| train/                  |             |
|    approx_kl            | 0.024962112 |
|    clip_fraction        | 0.458       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.83        |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0283      |
|    n_updates            | 720         |
|    policy_gradient_loss | 0.00908     |
|    std                  | 0.055       |
|    value_loss           | 0.00631     |
-----------------------------------------
Early stopping at ste

Early stopping at step 11 due to reaching max kl: 0.02
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.6         |
| time/                   |             |
|    fps                  | 112         |
|    iterations           | 46          |
|    time_elapsed         | 1948        |
|    total_timesteps      | 218500      |
| train/                  |             |
|    approx_kl            | 0.022165732 |
|    clip_fraction        | 0.442       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.861       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0431      |
|    n_updates            | 900         |
|    policy_gradient_loss | 0.00653     |
|    std                  | 0.055       |
|    value_loss           | 0.00526     |
-----------------------------------------
Early stopping at ste

Early stopping at step 13 due to reaching max kl: 0.02
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.605       |
| time/                   |             |
|    fps                  | 112         |
|    iterations           | 55          |
|    time_elapsed         | 2326        |
|    total_timesteps      | 261250      |
| train/                  |             |
|    approx_kl            | 0.017004125 |
|    clip_fraction        | 0.448       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.872       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0353      |
|    n_updates            | 1080        |
|    policy_gradient_loss | 0.00965     |
|    std                  | 0.055       |
|    value_loss           | 0.00497     |
-----------------------------------------
Early stopping at ste

Early stopping at step 15 due to reaching max kl: 0.07
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.604       |
| time/                   |             |
|    fps                  | 112         |
|    iterations           | 64          |
|    time_elapsed         | 2700        |
|    total_timesteps      | 304000      |
| train/                  |             |
|    approx_kl            | 0.040494844 |
|    clip_fraction        | 0.468       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.886       |
|    learning_rate        | 1e-05       |
|    loss                 | -0.00839    |
|    n_updates            | 1260        |
|    policy_gradient_loss | 0.0101      |
|    std                  | 0.055       |
|    value_loss           | 0.00473     |
-----------------------------------------
Early stopping at ste

Early stopping at step 15 due to reaching max kl: 0.04
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.612       |
| time/                   |             |
|    fps                  | 112         |
|    iterations           | 73          |
|    time_elapsed         | 3074        |
|    total_timesteps      | 346750      |
| train/                  |             |
|    approx_kl            | 0.030430617 |
|    clip_fraction        | 0.5         |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.887       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0563      |
|    n_updates            | 1440        |
|    policy_gradient_loss | 0.0107      |
|    std                  | 0.055       |
|    value_loss           | 0.00435     |
-----------------------------------------
Early stopping at ste

Early stopping at step 8 due to reaching max kl: 0.03
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.618       |
| time/                   |             |
|    fps                  | 113         |
|    iterations           | 82          |
|    time_elapsed         | 3445        |
|    total_timesteps      | 389500      |
| train/                  |             |
|    approx_kl            | 0.030873077 |
|    clip_fraction        | 0.413       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.892       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0709      |
|    n_updates            | 1620        |
|    policy_gradient_loss | 0.009       |
|    std                  | 0.055       |
|    value_loss           | 0.00435     |
-----------------------------------------
Early stopping at step

Early stopping at step 9 due to reaching max kl: 0.02
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.621       |
| time/                   |             |
|    fps                  | 113         |
|    iterations           | 91          |
|    time_elapsed         | 3820        |
|    total_timesteps      | 432250      |
| train/                  |             |
|    approx_kl            | 0.018573014 |
|    clip_fraction        | 0.435       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.9         |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0728      |
|    n_updates            | 1800        |
|    policy_gradient_loss | 0.00919     |
|    std                  | 0.055       |
|    value_loss           | 0.00419     |
-----------------------------------------
Early stopping at step

Early stopping at step 12 due to reaching max kl: 0.02
Eval num_timesteps=475000, episode_reward=0.66 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 5           |
|    mean_reward          | 0.663       |
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.627       |
| time/                   |             |
|    fps                  | 113         |
|    iterations           | 100         |
|    time_elapsed         | 4197        |
|    total_timesteps      | 475000      |
| train/                  |             |
|    approx_kl            | 0.021374762 |
|    clip_fraction        | 0.457       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.895       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0377     

Early stopping at step 13 due to reaching max kl: 0.01
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.629       |
| time/                   |             |
|    fps                  | 113         |
|    iterations           | 109         |
|    time_elapsed         | 4546        |
|    total_timesteps      | 517750      |
| train/                  |             |
|    approx_kl            | 0.015701672 |
|    clip_fraction        | 0.449       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.905       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0655      |
|    n_updates            | 2160        |
|    policy_gradient_loss | 0.0111      |
|    std                  | 0.055       |
|    value_loss           | 0.00416     |
-----------------------------------------
Early stopping at ste

Early stopping at step 15 due to reaching max kl: 0.02
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 5          |
|    ep_rew_mean          | 0.629      |
| time/                   |            |
|    fps                  | 113        |
|    iterations           | 118        |
|    time_elapsed         | 4922       |
|    total_timesteps      | 560500     |
| train/                  |            |
|    approx_kl            | 0.02530717 |
|    clip_fraction        | 0.484      |
|    clip_range           | 0.1        |
|    entropy_loss         | -94.8      |
|    explained_variance   | 0.908      |
|    learning_rate        | 1e-05      |
|    loss                 | 0.0622     |
|    n_updates            | 2340       |
|    policy_gradient_loss | 0.0193     |
|    std                  | 0.055      |
|    value_loss           | 0.0039     |
----------------------------------------
Early stopping at step 11 due to reaching m

Early stopping at step 12 due to reaching max kl: 0.02
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.631       |
| time/                   |             |
|    fps                  | 113         |
|    iterations           | 127         |
|    time_elapsed         | 5298        |
|    total_timesteps      | 603250      |
| train/                  |             |
|    approx_kl            | 0.022385836 |
|    clip_fraction        | 0.478       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.903       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0519      |
|    n_updates            | 2520        |
|    policy_gradient_loss | 0.0103      |
|    std                  | 0.055       |
|    value_loss           | 0.00404     |
-----------------------------------------
Early stopping at ste

Early stopping at step 13 due to reaching max kl: 0.04
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.635       |
| time/                   |             |
|    fps                  | 113         |
|    iterations           | 136         |
|    time_elapsed         | 5676        |
|    total_timesteps      | 646000      |
| train/                  |             |
|    approx_kl            | 0.032234017 |
|    clip_fraction        | 0.475       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.907       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0311      |
|    n_updates            | 2700        |
|    policy_gradient_loss | 0.0131      |
|    std                  | 0.055       |
|    value_loss           | 0.00423     |
-----------------------------------------
Early stopping at ste

Early stopping at step 9 due to reaching max kl: 0.02
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.635       |
| time/                   |             |
|    fps                  | 113         |
|    iterations           | 145         |
|    time_elapsed         | 6050        |
|    total_timesteps      | 688750      |
| train/                  |             |
|    approx_kl            | 0.024448484 |
|    clip_fraction        | 0.444       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.9       |
|    explained_variance   | 0.908       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0482      |
|    n_updates            | 2880        |
|    policy_gradient_loss | 0.0111      |
|    std                  | 0.055       |
|    value_loss           | 0.00388     |
-----------------------------------------
Early stopping at step

Early stopping at step 9 due to reaching max kl: 0.04
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.64        |
| time/                   |             |
|    fps                  | 113         |
|    iterations           | 154         |
|    time_elapsed         | 6424        |
|    total_timesteps      | 731500      |
| train/                  |             |
|    approx_kl            | 0.027003845 |
|    clip_fraction        | 0.465       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.9       |
|    explained_variance   | 0.907       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0437      |
|    n_updates            | 3060        |
|    policy_gradient_loss | 0.0149      |
|    std                  | 0.055       |
|    value_loss           | 0.00381     |
-----------------------------------------
Early stopping at step

Early stopping at step 9 due to reaching max kl: 0.04
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 5          |
|    ep_rew_mean          | 0.643      |
| time/                   |            |
|    fps                  | 113        |
|    iterations           | 163        |
|    time_elapsed         | 6804       |
|    total_timesteps      | 774250     |
| train/                  |            |
|    approx_kl            | 0.02571474 |
|    clip_fraction        | 0.419      |
|    clip_range           | 0.1        |
|    entropy_loss         | -94.9      |
|    explained_variance   | 0.917      |
|    learning_rate        | 1e-05      |
|    loss                 | 0.0525     |
|    n_updates            | 3240       |
|    policy_gradient_loss | 0.00472    |
|    std                  | 0.055      |
|    value_loss           | 0.00368    |
----------------------------------------
Early stopping at step 15 due to reaching ma

Early stopping at step 9 due to reaching max kl: 0.04
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.643       |
| time/                   |             |
|    fps                  | 113         |
|    iterations           | 172         |
|    time_elapsed         | 7180        |
|    total_timesteps      | 817000      |
| train/                  |             |
|    approx_kl            | 0.035766285 |
|    clip_fraction        | 0.456       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.9       |
|    explained_variance   | 0.917       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0592      |
|    n_updates            | 3420        |
|    policy_gradient_loss | 0.0185      |
|    std                  | 0.0549      |
|    value_loss           | 0.00372     |
-----------------------------------------
Early stopping at step

Early stopping at step 14 due to reaching max kl: 0.04
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.645       |
| time/                   |             |
|    fps                  | 113         |
|    iterations           | 181         |
|    time_elapsed         | 7556        |
|    total_timesteps      | 859750      |
| train/                  |             |
|    approx_kl            | 0.030704513 |
|    clip_fraction        | 0.466       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.9       |
|    explained_variance   | 0.912       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0395      |
|    n_updates            | 3600        |
|    policy_gradient_loss | 0.0101      |
|    std                  | 0.055       |
|    value_loss           | 0.0038      |
-----------------------------------------
Early stopping at ste

Early stopping at step 12 due to reaching max kl: 0.03
Eval num_timesteps=902500, episode_reward=0.67 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 5           |
|    mean_reward          | 0.665       |
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.644       |
| time/                   |             |
|    fps                  | 113         |
|    iterations           | 190         |
|    time_elapsed         | 7927        |
|    total_timesteps      | 902500      |
| train/                  |             |
|    approx_kl            | 0.037331175 |
|    clip_fraction        | 0.473       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.9       |
|    explained_variance   | 0.909       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0538     

Early stopping at step 15 due to reaching max kl: 0.03
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.647       |
| time/                   |             |
|    fps                  | 114         |
|    iterations           | 199         |
|    time_elapsed         | 8276        |
|    total_timesteps      | 945250      |
| train/                  |             |
|    approx_kl            | 0.024528222 |
|    clip_fraction        | 0.474       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.9       |
|    explained_variance   | 0.91        |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0556      |
|    n_updates            | 3960        |
|    policy_gradient_loss | 0.0133      |
|    std                  | 0.055       |
|    value_loss           | 0.00411     |
-----------------------------------------
Early stopping at ste

Early stopping at step 8 due to reaching max kl: 0.04
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 5          |
|    ep_rew_mean          | 0.648      |
| time/                   |            |
|    fps                  | 114        |
|    iterations           | 208        |
|    time_elapsed         | 8642       |
|    total_timesteps      | 988000     |
| train/                  |            |
|    approx_kl            | 0.03878971 |
|    clip_fraction        | 0.433      |
|    clip_range           | 0.1        |
|    entropy_loss         | -94.9      |
|    explained_variance   | 0.907      |
|    learning_rate        | 1e-05      |
|    loss                 | 0.0577     |
|    n_updates            | 4140       |
|    policy_gradient_loss | 0.00993    |
|    std                  | 0.055      |
|    value_loss           | 0.00384    |
----------------------------------------
Early stopping at step 14 due to reaching ma

Early stopping at step 8 due to reaching max kl: 0.03
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.648       |
| time/                   |             |
|    fps                  | 114         |
|    iterations           | 217         |
|    time_elapsed         | 9013        |
|    total_timesteps      | 1030750     |
| train/                  |             |
|    approx_kl            | 0.030778103 |
|    clip_fraction        | 0.435       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.9       |
|    explained_variance   | 0.912       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0546      |
|    n_updates            | 4320        |
|    policy_gradient_loss | 0.0083      |
|    std                  | 0.055       |
|    value_loss           | 0.00378     |
-----------------------------------------
Early stopping at step

Early stopping at step 10 due to reaching max kl: 0.06
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.646       |
| time/                   |             |
|    fps                  | 114         |
|    iterations           | 226         |
|    time_elapsed         | 9384        |
|    total_timesteps      | 1073500     |
| train/                  |             |
|    approx_kl            | 0.068354756 |
|    clip_fraction        | 0.431       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.9       |
|    explained_variance   | 0.912       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0446      |
|    n_updates            | 4500        |
|    policy_gradient_loss | 0.00891     |
|    std                  | 0.055       |
|    value_loss           | 0.00407     |
-----------------------------------------
Early stopping at ste

Early stopping at step 11 due to reaching max kl: 0.02
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.647       |
| time/                   |             |
|    fps                  | 114         |
|    iterations           | 235         |
|    time_elapsed         | 9747        |
|    total_timesteps      | 1116250     |
| train/                  |             |
|    approx_kl            | 0.028518885 |
|    clip_fraction        | 0.449       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.9       |
|    explained_variance   | 0.907       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0345      |
|    n_updates            | 4680        |
|    policy_gradient_loss | 0.00695     |
|    std                  | 0.0549      |
|    value_loss           | 0.00423     |
-----------------------------------------
Early stopping at ste

Early stopping at step 9 due to reaching max kl: 0.05
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.649       |
| time/                   |             |
|    fps                  | 114         |
|    iterations           | 244         |
|    time_elapsed         | 10112       |
|    total_timesteps      | 1159000     |
| train/                  |             |
|    approx_kl            | 0.044240084 |
|    clip_fraction        | 0.444       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.9       |
|    explained_variance   | 0.906       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0278      |
|    n_updates            | 4860        |
|    policy_gradient_loss | 0.0121      |
|    std                  | 0.0549      |
|    value_loss           | 0.00389     |
-----------------------------------------
Early stopping at step

Early stopping at step 9 due to reaching max kl: 0.06
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.647       |
| time/                   |             |
|    fps                  | 114         |
|    iterations           | 253         |
|    time_elapsed         | 10480       |
|    total_timesteps      | 1201750     |
| train/                  |             |
|    approx_kl            | 0.041466508 |
|    clip_fraction        | 0.438       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.9       |
|    explained_variance   | 0.9         |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0698      |
|    n_updates            | 5040        |
|    policy_gradient_loss | 0.011       |
|    std                  | 0.0549      |
|    value_loss           | 0.00436     |
-----------------------------------------
Early stopping at step

Early stopping at step 7 due to reaching max kl: 0.03
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.65        |
| time/                   |             |
|    fps                  | 114         |
|    iterations           | 262         |
|    time_elapsed         | 10840       |
|    total_timesteps      | 1244500     |
| train/                  |             |
|    approx_kl            | 0.030505354 |
|    clip_fraction        | 0.418       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.9       |
|    explained_variance   | 0.891       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0517      |
|    n_updates            | 5220        |
|    policy_gradient_loss | 0.00647     |
|    std                  | 0.0549      |
|    value_loss           | 0.00491     |
-----------------------------------------
Early stopping at step

Early stopping at step 11 due to reaching max kl: 0.04
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.65        |
| time/                   |             |
|    fps                  | 114         |
|    iterations           | 271         |
|    time_elapsed         | 11205       |
|    total_timesteps      | 1287250     |
| train/                  |             |
|    approx_kl            | 0.027014395 |
|    clip_fraction        | 0.46        |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.9       |
|    explained_variance   | 0.89        |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0398      |
|    n_updates            | 5400        |
|    policy_gradient_loss | 0.0106      |
|    std                  | 0.0549      |
|    value_loss           | 0.00487     |
-----------------------------------------
Early stopping at ste

Early stopping at step 10 due to reaching max kl: 0.06
Eval num_timesteps=1330000, episode_reward=0.66 +/- 0.00
Episode length: 5.00 +/- 0.00
-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 5           |
|    mean_reward          | 0.664       |
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.651       |
| time/                   |             |
|    fps                  | 114         |
|    iterations           | 280         |
|    time_elapsed         | 11571       |
|    total_timesteps      | 1330000     |
| train/                  |             |
|    approx_kl            | 0.034577057 |
|    clip_fraction        | 0.481       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.9       |
|    explained_variance   | 0.878       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0385      |
|    n_updates    

Early stopping at step 10 due to reaching max kl: 0.08
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.648       |
| time/                   |             |
|    fps                  | 115         |
|    iterations           | 289         |
|    time_elapsed         | 11912       |
|    total_timesteps      | 1372750     |
| train/                  |             |
|    approx_kl            | 0.049485426 |
|    clip_fraction        | 0.455       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.9       |
|    explained_variance   | 0.886       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0323      |
|    n_updates            | 5760        |
|    policy_gradient_loss | 0.0122      |
|    std                  | 0.0549      |
|    value_loss           | 0.00499     |
-----------------------------------------
Early stopping at ste

Early stopping at step 8 due to reaching max kl: 0.03
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 5          |
|    ep_rew_mean          | 0.65       |
| time/                   |            |
|    fps                  | 115        |
|    iterations           | 298        |
|    time_elapsed         | 12280      |
|    total_timesteps      | 1415500    |
| train/                  |            |
|    approx_kl            | 0.04142836 |
|    clip_fraction        | 0.419      |
|    clip_range           | 0.1        |
|    entropy_loss         | -94.9      |
|    explained_variance   | 0.888      |
|    learning_rate        | 1e-05      |
|    loss                 | 0.0154     |
|    n_updates            | 5940       |
|    policy_gradient_loss | 0.00707    |
|    std                  | 0.0549     |
|    value_loss           | 0.00492    |
----------------------------------------
Early stopping at step 6 due to reaching max

  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in

  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in

  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in

seed 2
generate callback ...
vectorize environment ...
vectorize env level 1
vectorize env level 2
vectorize env level 3
Box(-1.0, 1.0, (96,), float64)
model definition ..
Using cuda device
Logging to ./data/ppo_3l/seed_2
policy learning ..




---------------------------------
| rollout/           |          |
|    ep_len_mean     | 5        |
|    ep_rew_mean     | 0.579    |
| time/              |          |
|    fps             | 81       |
|    iterations      | 1        |
|    time_elapsed    | 58       |
|    total_timesteps | 4750     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.58        |
| time/                   |             |
|    fps                  | 94          |
|    iterations           | 2           |
|    time_elapsed         | 100         |
|    total_timesteps      | 9500        |
| train/                  |             |
|    approx_kl            | 0.007150023 |
|    clip_fraction        | 0.308       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | -7.61       |
|    learning_rate        | 1e

Early stopping at step 12 due to reaching max kl: 0.03
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.581       |
| time/                   |             |
|    fps                  | 107         |
|    iterations           | 11          |
|    time_elapsed         | 485         |
|    total_timesteps      | 52250       |
| train/                  |             |
|    approx_kl            | 0.023623241 |
|    clip_fraction        | 0.458       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.691       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0379      |
|    n_updates            | 200         |
|    policy_gradient_loss | 0.0102      |
|    std                  | 0.055       |
|    value_loss           | 0.0109      |
-----------------------------------------
Early stopping at ste

Early stopping at step 14 due to reaching max kl: 0.04
Eval num_timesteps=95000, episode_reward=0.61 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 5           |
|    mean_reward          | 0.613       |
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.587       |
| time/                   |             |
|    fps                  | 109         |
|    iterations           | 20          |
|    time_elapsed         | 869         |
|    total_timesteps      | 95000       |
| train/                  |             |
|    approx_kl            | 0.029782658 |
|    clip_fraction        | 0.482       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.774       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0189      

Early stopping at step 17 due to reaching max kl: 0.02
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.593       |
| time/                   |             |
|    fps                  | 112         |
|    iterations           | 29          |
|    time_elapsed         | 1225        |
|    total_timesteps      | 137750      |
| train/                  |             |
|    approx_kl            | 0.025131742 |
|    clip_fraction        | 0.507       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.802       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0466      |
|    n_updates            | 560         |
|    policy_gradient_loss | 0.0114      |
|    std                  | 0.055       |
|    value_loss           | 0.00775     |
-----------------------------------------
Early stopping at ste

Early stopping at step 12 due to reaching max kl: 0.03
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.599       |
| time/                   |             |
|    fps                  | 112         |
|    iterations           | 38          |
|    time_elapsed         | 1599        |
|    total_timesteps      | 180500      |
| train/                  |             |
|    approx_kl            | 0.022454953 |
|    clip_fraction        | 0.442       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.822       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0472      |
|    n_updates            | 740         |
|    policy_gradient_loss | 0.00954     |
|    std                  | 0.055       |
|    value_loss           | 0.0068      |
-----------------------------------------
Early stopping at ste

Early stopping at step 19 due to reaching max kl: 0.04
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.601       |
| time/                   |             |
|    fps                  | 112         |
|    iterations           | 47          |
|    time_elapsed         | 1975        |
|    total_timesteps      | 223250      |
| train/                  |             |
|    approx_kl            | 0.034214303 |
|    clip_fraction        | 0.524       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.838       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0188      |
|    n_updates            | 920         |
|    policy_gradient_loss | 0.0128      |
|    std                  | 0.055       |
|    value_loss           | 0.00587     |
-----------------------------------------
Early stopping at ste

Early stopping at step 12 due to reaching max kl: 0.02
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.604       |
| time/                   |             |
|    fps                  | 113         |
|    iterations           | 56          |
|    time_elapsed         | 2348        |
|    total_timesteps      | 266000      |
| train/                  |             |
|    approx_kl            | 0.027752563 |
|    clip_fraction        | 0.466       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.856       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.00862     |
|    n_updates            | 1100        |
|    policy_gradient_loss | 0.0113      |
|    std                  | 0.055       |
|    value_loss           | 0.00606     |
-----------------------------------------
Early stopping at ste

Early stopping at step 11 due to reaching max kl: 0.02
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 5         |
|    ep_rew_mean          | 0.607     |
| time/                   |           |
|    fps                  | 113       |
|    iterations           | 65        |
|    time_elapsed         | 2717      |
|    total_timesteps      | 308750    |
| train/                  |           |
|    approx_kl            | 0.0277123 |
|    clip_fraction        | 0.485     |
|    clip_range           | 0.1       |
|    entropy_loss         | -94.8     |
|    explained_variance   | 0.856     |
|    learning_rate        | 1e-05     |
|    loss                 | 0.0318    |
|    n_updates            | 1280      |
|    policy_gradient_loss | 0.0115    |
|    std                  | 0.055     |
|    value_loss           | 0.00542   |
---------------------------------------
Early stopping at step 12 due to reaching max kl: 0.03
----------

Early stopping at step 13 due to reaching max kl: 0.02
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.611       |
| time/                   |             |
|    fps                  | 113         |
|    iterations           | 74          |
|    time_elapsed         | 3095        |
|    total_timesteps      | 351500      |
| train/                  |             |
|    approx_kl            | 0.017130233 |
|    clip_fraction        | 0.442       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.86        |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0442      |
|    n_updates            | 1460        |
|    policy_gradient_loss | 0.00769     |
|    std                  | 0.055       |
|    value_loss           | 0.00558     |
-----------------------------------------
Early stopping at ste

Early stopping at step 14 due to reaching max kl: 0.04
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.613       |
| time/                   |             |
|    fps                  | 113         |
|    iterations           | 83          |
|    time_elapsed         | 3470        |
|    total_timesteps      | 394250      |
| train/                  |             |
|    approx_kl            | 0.049847256 |
|    clip_fraction        | 0.491       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.871       |
|    learning_rate        | 1e-05       |
|    loss                 | -0.0388     |
|    n_updates            | 1640        |
|    policy_gradient_loss | 0.0108      |
|    std                  | 0.055       |
|    value_loss           | 0.0051      |
-----------------------------------------
Early stopping at ste

Early stopping at step 12 due to reaching max kl: 0.02
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.617       |
| time/                   |             |
|    fps                  | 113         |
|    iterations           | 92          |
|    time_elapsed         | 3845        |
|    total_timesteps      | 437000      |
| train/                  |             |
|    approx_kl            | 0.051824246 |
|    clip_fraction        | 0.467       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.867       |
|    learning_rate        | 1e-05       |
|    loss                 | -0.0498     |
|    n_updates            | 1820        |
|    policy_gradient_loss | 0.0178      |
|    std                  | 0.055       |
|    value_loss           | 0.00546     |
-----------------------------------------
Early stopping at ste

Early stopping at step 14 due to reaching max kl: 0.04
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.62        |
| time/                   |             |
|    fps                  | 113         |
|    iterations           | 101         |
|    time_elapsed         | 4225        |
|    total_timesteps      | 479750      |
| train/                  |             |
|    approx_kl            | 0.027259352 |
|    clip_fraction        | 0.46        |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.868       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0162      |
|    n_updates            | 2000        |
|    policy_gradient_loss | 0.0103      |
|    std                  | 0.055       |
|    value_loss           | 0.00537     |
-----------------------------------------
Early stopping at ste

Early stopping at step 15 due to reaching max kl: 0.06
Eval num_timesteps=522500, episode_reward=0.66 +/- 0.00
Episode length: 5.00 +/- 0.00
-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 5           |
|    mean_reward          | 0.655       |
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.622       |
| time/                   |             |
|    fps                  | 113         |
|    iterations           | 110         |
|    time_elapsed         | 4597        |
|    total_timesteps      | 522500      |
| train/                  |             |
|    approx_kl            | 0.043166835 |
|    clip_fraction        | 0.515       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.873       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.00681     |
|    n_updates     

Early stopping at step 14 due to reaching max kl: 0.03
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.623       |
| time/                   |             |
|    fps                  | 114         |
|    iterations           | 119         |
|    time_elapsed         | 4946        |
|    total_timesteps      | 565250      |
| train/                  |             |
|    approx_kl            | 0.030786937 |
|    clip_fraction        | 0.505       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.873       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0417      |
|    n_updates            | 2360        |
|    policy_gradient_loss | 0.0128      |
|    std                  | 0.055       |
|    value_loss           | 0.00475     |
-----------------------------------------
Early stopping at ste

Early stopping at step 12 due to reaching max kl: 0.02
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.631       |
| time/                   |             |
|    fps                  | 114         |
|    iterations           | 128         |
|    time_elapsed         | 5317        |
|    total_timesteps      | 608000      |
| train/                  |             |
|    approx_kl            | 0.024485284 |
|    clip_fraction        | 0.466       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.87        |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0308      |
|    n_updates            | 2540        |
|    policy_gradient_loss | 0.0113      |
|    std                  | 0.055       |
|    value_loss           | 0.00545     |
-----------------------------------------
Early stopping at ste

Early stopping at step 7 due to reaching max kl: 0.03
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.634       |
| time/                   |             |
|    fps                  | 114         |
|    iterations           | 137         |
|    time_elapsed         | 5691        |
|    total_timesteps      | 650750      |
| train/                  |             |
|    approx_kl            | 0.026836744 |
|    clip_fraction        | 0.437       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.87        |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0379      |
|    n_updates            | 2720        |
|    policy_gradient_loss | 0.0111      |
|    std                  | 0.055       |
|    value_loss           | 0.00531     |
-----------------------------------------
Early stopping at step

Early stopping at step 11 due to reaching max kl: 0.03
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 5          |
|    ep_rew_mean          | 0.634      |
| time/                   |            |
|    fps                  | 114        |
|    iterations           | 146        |
|    time_elapsed         | 6062       |
|    total_timesteps      | 693500     |
| train/                  |            |
|    approx_kl            | 0.03250074 |
|    clip_fraction        | 0.485      |
|    clip_range           | 0.1        |
|    entropy_loss         | -94.8      |
|    explained_variance   | 0.874      |
|    learning_rate        | 1e-05      |
|    loss                 | 0.0573     |
|    n_updates            | 2900       |
|    policy_gradient_loss | 0.0136     |
|    std                  | 0.055      |
|    value_loss           | 0.0052     |
----------------------------------------
Early stopping at step 7 due to reaching ma

Early stopping at step 12 due to reaching max kl: 0.08
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 5          |
|    ep_rew_mean          | 0.636      |
| time/                   |            |
|    fps                  | 114        |
|    iterations           | 155        |
|    time_elapsed         | 6433       |
|    total_timesteps      | 736250     |
| train/                  |            |
|    approx_kl            | 0.04355393 |
|    clip_fraction        | 0.469      |
|    clip_range           | 0.1        |
|    entropy_loss         | -94.8      |
|    explained_variance   | 0.872      |
|    learning_rate        | 1e-05      |
|    loss                 | 0.0243     |
|    n_updates            | 3080       |
|    policy_gradient_loss | 0.0103     |
|    std                  | 0.055      |
|    value_loss           | 0.00529    |
----------------------------------------
Early stopping at step 17 due to reaching m

Early stopping at step 6 due to reaching max kl: 0.01
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.639       |
| time/                   |             |
|    fps                  | 114         |
|    iterations           | 164         |
|    time_elapsed         | 6805        |
|    total_timesteps      | 779000      |
| train/                  |             |
|    approx_kl            | 0.023824722 |
|    clip_fraction        | 0.398       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.874       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0463      |
|    n_updates            | 3260        |
|    policy_gradient_loss | 0.00817     |
|    std                  | 0.055       |
|    value_loss           | 0.00519     |
-----------------------------------------
Early stopping at step

Early stopping at step 9 due to reaching max kl: 0.07
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 5          |
|    ep_rew_mean          | 0.641      |
| time/                   |            |
|    fps                  | 114        |
|    iterations           | 173        |
|    time_elapsed         | 7174       |
|    total_timesteps      | 821750     |
| train/                  |            |
|    approx_kl            | 0.04367234 |
|    clip_fraction        | 0.444      |
|    clip_range           | 0.1        |
|    entropy_loss         | -94.8      |
|    explained_variance   | 0.868      |
|    learning_rate        | 1e-05      |
|    loss                 | 0.0509     |
|    n_updates            | 3440       |
|    policy_gradient_loss | 0.0147     |
|    std                  | 0.055      |
|    value_loss           | 0.00527    |
----------------------------------------
Early stopping at step 7 due to reaching max

Early stopping at step 7 due to reaching max kl: 0.01
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.643       |
| time/                   |             |
|    fps                  | 114         |
|    iterations           | 182         |
|    time_elapsed         | 7543        |
|    total_timesteps      | 864500      |
| train/                  |             |
|    approx_kl            | 0.027213389 |
|    clip_fraction        | 0.415       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.866       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0448      |
|    n_updates            | 3620        |
|    policy_gradient_loss | 0.00506     |
|    std                  | 0.055       |
|    value_loss           | 0.00539     |
-----------------------------------------
Early stopping at step

Early stopping at step 11 due to reaching max kl: 0.03
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 5          |
|    ep_rew_mean          | 0.645      |
| time/                   |            |
|    fps                  | 114        |
|    iterations           | 191        |
|    time_elapsed         | 7907       |
|    total_timesteps      | 907250     |
| train/                  |            |
|    approx_kl            | 0.03085573 |
|    clip_fraction        | 0.479      |
|    clip_range           | 0.1        |
|    entropy_loss         | -94.8      |
|    explained_variance   | 0.868      |
|    learning_rate        | 1e-05      |
|    loss                 | 0.0398     |
|    n_updates            | 3800       |
|    policy_gradient_loss | 0.0115     |
|    std                  | 0.055      |
|    value_loss           | 0.00543    |
----------------------------------------
Early stopping at step 13 due to reaching m

Early stopping at step 7 due to reaching max kl: 0.03
Eval num_timesteps=950000, episode_reward=0.66 +/- 0.00
Episode length: 5.00 +/- 0.00
-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 5           |
|    mean_reward          | 0.662       |
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.645       |
| time/                   |             |
|    fps                  | 114         |
|    iterations           | 200         |
|    time_elapsed         | 8274        |
|    total_timesteps      | 950000      |
| train/                  |             |
|    approx_kl            | 0.031095063 |
|    clip_fraction        | 0.44        |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.87        |
|    learning_rate        | 1e-05       |
|    loss                 | 0.086       |
|    n_updates      

Early stopping at step 11 due to reaching max kl: 0.03
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.645       |
| time/                   |             |
|    fps                  | 115         |
|    iterations           | 209         |
|    time_elapsed         | 8616        |
|    total_timesteps      | 992750      |
| train/                  |             |
|    approx_kl            | 0.042912584 |
|    clip_fraction        | 0.498       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.868       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.00033     |
|    n_updates            | 4160        |
|    policy_gradient_loss | 0.00899     |
|    std                  | 0.055       |
|    value_loss           | 0.00542     |
-----------------------------------------
Early stopping at ste

Early stopping at step 6 due to reaching max kl: 0.02
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.648       |
| time/                   |             |
|    fps                  | 115         |
|    iterations           | 218         |
|    time_elapsed         | 8981        |
|    total_timesteps      | 1035500     |
| train/                  |             |
|    approx_kl            | 0.025159033 |
|    clip_fraction        | 0.404       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.867       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0663      |
|    n_updates            | 4340        |
|    policy_gradient_loss | 0.00999     |
|    std                  | 0.055       |
|    value_loss           | 0.00508     |
-----------------------------------------
Early stopping at step

Early stopping at step 9 due to reaching max kl: 0.02
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.648       |
| time/                   |             |
|    fps                  | 115         |
|    iterations           | 227         |
|    time_elapsed         | 9343        |
|    total_timesteps      | 1078250     |
| train/                  |             |
|    approx_kl            | 0.035681512 |
|    clip_fraction        | 0.477       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.865       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.027       |
|    n_updates            | 4520        |
|    policy_gradient_loss | 0.015       |
|    std                  | 0.055       |
|    value_loss           | 0.0057      |
-----------------------------------------
Early stopping at step

Early stopping at step 4 due to reaching max kl: 0.13
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 5          |
|    ep_rew_mean          | 0.648      |
| time/                   |            |
|    fps                  | 115        |
|    iterations           | 236        |
|    time_elapsed         | 9704       |
|    total_timesteps      | 1121000    |
| train/                  |            |
|    approx_kl            | 0.07423345 |
|    clip_fraction        | 0.393      |
|    clip_range           | 0.1        |
|    entropy_loss         | -94.9      |
|    explained_variance   | 0.865      |
|    learning_rate        | 1e-05      |
|    loss                 | 0.021      |
|    n_updates            | 4700       |
|    policy_gradient_loss | 0.00982    |
|    std                  | 0.055      |
|    value_loss           | 0.00524    |
----------------------------------------
Early stopping at step 4 due to reaching max

Early stopping at step 5 due to reaching max kl: 0.07
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.649       |
| time/                   |             |
|    fps                  | 115         |
|    iterations           | 245         |
|    time_elapsed         | 10064       |
|    total_timesteps      | 1163750     |
| train/                  |             |
|    approx_kl            | 0.048985336 |
|    clip_fraction        | 0.436       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.9       |
|    explained_variance   | 0.858       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0515      |
|    n_updates            | 4880        |
|    policy_gradient_loss | 0.0135      |
|    std                  | 0.055       |
|    value_loss           | 0.00593     |
-----------------------------------------
Early stopping at step

Early stopping at step 5 due to reaching max kl: 0.02
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.649       |
| time/                   |             |
|    fps                  | 115         |
|    iterations           | 254         |
|    time_elapsed         | 10427       |
|    total_timesteps      | 1206500     |
| train/                  |             |
|    approx_kl            | 0.025343293 |
|    clip_fraction        | 0.432       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.9       |
|    explained_variance   | 0.85        |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0525      |
|    n_updates            | 5060        |
|    policy_gradient_loss | 0.00923     |
|    std                  | 0.055       |
|    value_loss           | 0.00588     |
-----------------------------------------
Early stopping at step

Early stopping at step 9 due to reaching max kl: 0.10
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 5          |
|    ep_rew_mean          | 0.65       |
| time/                   |            |
|    fps                  | 115        |
|    iterations           | 263        |
|    time_elapsed         | 10794      |
|    total_timesteps      | 1249250    |
| train/                  |            |
|    approx_kl            | 0.06436578 |
|    clip_fraction        | 0.512      |
|    clip_range           | 0.1        |
|    entropy_loss         | -94.9      |
|    explained_variance   | 0.846      |
|    learning_rate        | 1e-05      |
|    loss                 | 0.00391    |
|    n_updates            | 5240       |
|    policy_gradient_loss | 0.0166     |
|    std                  | 0.0549     |
|    value_loss           | 0.00656    |
----------------------------------------
Early stopping at step 5 due to reaching max

Early stopping at step 5 due to reaching max kl: 0.09
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 5          |
|    ep_rew_mean          | 0.648      |
| time/                   |            |
|    fps                  | 115        |
|    iterations           | 272        |
|    time_elapsed         | 11153      |
|    total_timesteps      | 1292000    |
| train/                  |            |
|    approx_kl            | 0.05897382 |
|    clip_fraction        | 0.459      |
|    clip_range           | 0.1        |
|    entropy_loss         | -94.9      |
|    explained_variance   | 0.846      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0236    |
|    n_updates            | 5420       |
|    policy_gradient_loss | 0.0117     |
|    std                  | 0.0549     |
|    value_loss           | 0.00649    |
----------------------------------------
Early stopping at step 12 due to reaching ma

Early stopping at step 7 due to reaching max kl: 0.07
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.652       |
| time/                   |             |
|    fps                  | 115         |
|    iterations           | 281         |
|    time_elapsed         | 11517       |
|    total_timesteps      | 1334750     |
| train/                  |             |
|    approx_kl            | 0.049201634 |
|    clip_fraction        | 0.494       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.9       |
|    explained_variance   | 0.838       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0382      |
|    n_updates            | 5600        |
|    policy_gradient_loss | 0.0254      |
|    std                  | 0.0549      |
|    value_loss           | 0.0065      |
-----------------------------------------
Early stopping at step

Early stopping at step 7 due to reaching max kl: 0.08
Eval num_timesteps=1377500, episode_reward=0.66 +/- 0.00
Episode length: 5.00 +/- 0.00
----------------------------------------
| eval/                   |            |
|    mean_ep_length       | 5          |
|    mean_reward          | 0.659      |
| rollout/                |            |
|    ep_len_mean          | 5          |
|    ep_rew_mean          | 0.656      |
| time/                   |            |
|    fps                  | 115        |
|    iterations           | 290        |
|    time_elapsed         | 11880      |
|    total_timesteps      | 1377500    |
| train/                  |            |
|    approx_kl            | 0.05292807 |
|    clip_fraction        | 0.508      |
|    clip_range           | 0.1        |
|    entropy_loss         | -94.9      |
|    explained_variance   | 0.844      |
|    learning_rate        | 1e-05      |
|    loss                 | 0.0172     |
|    n_updates            | 5780       

Early stopping at step 9 due to reaching max kl: 0.04
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 5          |
|    ep_rew_mean          | 0.651      |
| time/                   |            |
|    fps                  | 116        |
|    iterations           | 299        |
|    time_elapsed         | 12222      |
|    total_timesteps      | 1420250    |
| train/                  |            |
|    approx_kl            | 0.04056969 |
|    clip_fraction        | 0.572      |
|    clip_range           | 0.1        |
|    entropy_loss         | -94.9      |
|    explained_variance   | 0.849      |
|    learning_rate        | 1e-05      |
|    loss                 | 0.0602     |
|    n_updates            | 5960       |
|    policy_gradient_loss | 0.0243     |
|    std                  | 0.0549     |
|    value_loss           | 0.00626    |
----------------------------------------
Early stopping at step 8 due to reaching max

  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in

  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in

  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in

seed 3
generate callback ...
vectorize environment ...
vectorize env level 1
vectorize env level 2
vectorize env level 3
Box(-1.0, 1.0, (96,), float64)
model definition ..
Using cuda device
Logging to ./data/ppo_3l/seed_3
policy learning ..




---------------------------------
| rollout/           |          |
|    ep_len_mean     | 5        |
|    ep_rew_mean     | 0.586    |
| time/              |          |
|    fps             | 82       |
|    iterations      | 1        |
|    time_elapsed    | 57       |
|    total_timesteps | 4750     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.579       |
| time/                   |             |
|    fps                  | 96          |
|    iterations           | 2           |
|    time_elapsed         | 98          |
|    total_timesteps      | 9500        |
| train/                  |             |
|    approx_kl            | 0.013840185 |
|    clip_fraction        | 0.356       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | -4.62       |
|    learning_rate        | 1e

----------------------------------------
| eval/                   |            |
|    mean_ep_length       | 5          |
|    mean_reward          | 0.612      |
| rollout/                |            |
|    ep_len_mean          | 5          |
|    ep_rew_mean          | 0.583      |
| time/                   |            |
|    fps                  | 107        |
|    iterations           | 10         |
|    time_elapsed         | 441        |
|    total_timesteps      | 47500      |
| train/                  |            |
|    approx_kl            | 0.04541217 |
|    clip_fraction        | 0.487      |
|    clip_range           | 0.1        |
|    entropy_loss         | -94.8      |
|    explained_variance   | 0.638      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0041    |
|    n_updates            | 180        |
|    policy_gradient_loss | 0.0134     |
|    std                  | 0.055      |
|    value_loss           | 0.013      |
----------------

Early stopping at step 13 due to reaching max kl: 0.03
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.586       |
| time/                   |             |
|    fps                  | 113         |
|    iterations           | 19          |
|    time_elapsed         | 796         |
|    total_timesteps      | 90250       |
| train/                  |             |
|    approx_kl            | 0.026061853 |
|    clip_fraction        | 0.469       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.747       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0272      |
|    n_updates            | 360         |
|    policy_gradient_loss | 0.0171      |
|    std                  | 0.055       |
|    value_loss           | 0.00994     |
-----------------------------------------
Early stopping at ste

Early stopping at step 13 due to reaching max kl: 0.02
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.596       |
| time/                   |             |
|    fps                  | 113         |
|    iterations           | 28          |
|    time_elapsed         | 1174        |
|    total_timesteps      | 133000      |
| train/                  |             |
|    approx_kl            | 0.019132864 |
|    clip_fraction        | 0.457       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.795       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0345      |
|    n_updates            | 540         |
|    policy_gradient_loss | 0.00753     |
|    std                  | 0.055       |
|    value_loss           | 0.00764     |
-----------------------------------------
Early stopping at ste

Early stopping at step 11 due to reaching max kl: 0.04
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.596       |
| time/                   |             |
|    fps                  | 113         |
|    iterations           | 37          |
|    time_elapsed         | 1542        |
|    total_timesteps      | 175750      |
| train/                  |             |
|    approx_kl            | 0.027491653 |
|    clip_fraction        | 0.448       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.821       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0312      |
|    n_updates            | 720         |
|    policy_gradient_loss | 0.00578     |
|    std                  | 0.055       |
|    value_loss           | 0.00668     |
-----------------------------------------
Early stopping at ste

Early stopping at step 12 due to reaching max kl: 0.04
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.6         |
| time/                   |             |
|    fps                  | 114         |
|    iterations           | 46          |
|    time_elapsed         | 1916        |
|    total_timesteps      | 218500      |
| train/                  |             |
|    approx_kl            | 0.026161158 |
|    clip_fraction        | 0.467       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.84        |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0384      |
|    n_updates            | 900         |
|    policy_gradient_loss | 0.00921     |
|    std                  | 0.055       |
|    value_loss           | 0.0059      |
-----------------------------------------
Early stopping at ste

Early stopping at step 13 due to reaching max kl: 0.03
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.604       |
| time/                   |             |
|    fps                  | 113         |
|    iterations           | 55          |
|    time_elapsed         | 2292        |
|    total_timesteps      | 261250      |
| train/                  |             |
|    approx_kl            | 0.022140034 |
|    clip_fraction        | 0.427       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.857       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0665      |
|    n_updates            | 1080        |
|    policy_gradient_loss | 0.00832     |
|    std                  | 0.055       |
|    value_loss           | 0.00567     |
-----------------------------------------
Early stopping at ste

Early stopping at step 11 due to reaching max kl: 0.06
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 5          |
|    ep_rew_mean          | 0.607      |
| time/                   |            |
|    fps                  | 114        |
|    iterations           | 64         |
|    time_elapsed         | 2659       |
|    total_timesteps      | 304000     |
| train/                  |            |
|    approx_kl            | 0.03709027 |
|    clip_fraction        | 0.466      |
|    clip_range           | 0.1        |
|    entropy_loss         | -94.8      |
|    explained_variance   | 0.871      |
|    learning_rate        | 1e-05      |
|    loss                 | 0.0135     |
|    n_updates            | 1260       |
|    policy_gradient_loss | 0.0156     |
|    std                  | 0.0551     |
|    value_loss           | 0.00473    |
----------------------------------------
Early stopping at step 10 due to reaching m

Early stopping at step 9 due to reaching max kl: 0.01
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.613       |
| time/                   |             |
|    fps                  | 114         |
|    iterations           | 73          |
|    time_elapsed         | 3031        |
|    total_timesteps      | 346750      |
| train/                  |             |
|    approx_kl            | 0.014087887 |
|    clip_fraction        | 0.412       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.873       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0394      |
|    n_updates            | 1440        |
|    policy_gradient_loss | 0.00343     |
|    std                  | 0.0551      |
|    value_loss           | 0.0052      |
-----------------------------------------
Early stopping at step

Early stopping at step 9 due to reaching max kl: 0.02
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.618       |
| time/                   |             |
|    fps                  | 114         |
|    iterations           | 82          |
|    time_elapsed         | 3406        |
|    total_timesteps      | 389500      |
| train/                  |             |
|    approx_kl            | 0.017111676 |
|    clip_fraction        | 0.439       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.884       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0581      |
|    n_updates            | 1620        |
|    policy_gradient_loss | 0.00925     |
|    std                  | 0.0551      |
|    value_loss           | 0.00479     |
-----------------------------------------
Early stopping at step

Early stopping at step 11 due to reaching max kl: 0.03
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 5          |
|    ep_rew_mean          | 0.62       |
| time/                   |            |
|    fps                  | 114        |
|    iterations           | 91         |
|    time_elapsed         | 3777       |
|    total_timesteps      | 432250     |
| train/                  |            |
|    approx_kl            | 0.01997569 |
|    clip_fraction        | 0.432      |
|    clip_range           | 0.1        |
|    entropy_loss         | -94.8      |
|    explained_variance   | 0.881      |
|    learning_rate        | 1e-05      |
|    loss                 | 0.0361     |
|    n_updates            | 1800       |
|    policy_gradient_loss | 0.00737    |
|    std                  | 0.055      |
|    value_loss           | 0.00476    |
----------------------------------------
Early stopping at step 9 due to reaching ma

Early stopping at step 6 due to reaching max kl: 0.01
Eval num_timesteps=475000, episode_reward=0.66 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
------------------------------------------
| eval/                   |              |
|    mean_ep_length       | 5            |
|    mean_reward          | 0.663        |
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.623        |
| time/                   |              |
|    fps                  | 114          |
|    iterations           | 100          |
|    time_elapsed         | 4147         |
|    total_timesteps      | 475000       |
| train/                  |              |
|    approx_kl            | 0.0152402455 |
|    clip_fraction        | 0.352        |
|    clip_range           | 0.1          |
|    entropy_loss         | -94.8        |
|    explained_variance   | 0.888        |
|    learning_rate        | 1e-05        |
|    loss            

Early stopping at step 12 due to reaching max kl: 0.03
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 5          |
|    ep_rew_mean          | 0.628      |
| time/                   |            |
|    fps                  | 115        |
|    iterations           | 109        |
|    time_elapsed         | 4497       |
|    total_timesteps      | 517750     |
| train/                  |            |
|    approx_kl            | 0.01993396 |
|    clip_fraction        | 0.447      |
|    clip_range           | 0.1        |
|    entropy_loss         | -94.8      |
|    explained_variance   | 0.881      |
|    learning_rate        | 1e-05      |
|    loss                 | 0.0337     |
|    n_updates            | 2160       |
|    policy_gradient_loss | 0.00745    |
|    std                  | 0.055      |
|    value_loss           | 0.00468    |
----------------------------------------
Early stopping at step 12 due to reaching m

Early stopping at step 16 due to reaching max kl: 0.08
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 5          |
|    ep_rew_mean          | 0.627      |
| time/                   |            |
|    fps                  | 115        |
|    iterations           | 118        |
|    time_elapsed         | 4869       |
|    total_timesteps      | 560500     |
| train/                  |            |
|    approx_kl            | 0.04621775 |
|    clip_fraction        | 0.54       |
|    clip_range           | 0.1        |
|    entropy_loss         | -94.8      |
|    explained_variance   | 0.891      |
|    learning_rate        | 1e-05      |
|    loss                 | 0.0498     |
|    n_updates            | 2340       |
|    policy_gradient_loss | 0.0163     |
|    std                  | 0.055      |
|    value_loss           | 0.00451    |
----------------------------------------
Early stopping at step 13 due to reaching m

Early stopping at step 11 due to reaching max kl: 0.02
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.631       |
| time/                   |             |
|    fps                  | 114         |
|    iterations           | 127         |
|    time_elapsed         | 5247        |
|    total_timesteps      | 603250      |
| train/                  |             |
|    approx_kl            | 0.017105011 |
|    clip_fraction        | 0.438       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.895       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0557      |
|    n_updates            | 2520        |
|    policy_gradient_loss | 0.00852     |
|    std                  | 0.055       |
|    value_loss           | 0.00455     |
-----------------------------------------
Early stopping at ste

Early stopping at step 16 due to reaching max kl: 0.04
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.633       |
| time/                   |             |
|    fps                  | 114         |
|    iterations           | 136         |
|    time_elapsed         | 5619        |
|    total_timesteps      | 646000      |
| train/                  |             |
|    approx_kl            | 0.028320132 |
|    clip_fraction        | 0.468       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.897       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0178      |
|    n_updates            | 2700        |
|    policy_gradient_loss | 0.0092      |
|    std                  | 0.055       |
|    value_loss           | 0.00444     |
-----------------------------------------
Early stopping at ste

Early stopping at step 12 due to reaching max kl: 0.04
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.639       |
| time/                   |             |
|    fps                  | 114         |
|    iterations           | 145         |
|    time_elapsed         | 5992        |
|    total_timesteps      | 688750      |
| train/                  |             |
|    approx_kl            | 0.027176062 |
|    clip_fraction        | 0.476       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.901       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0378      |
|    n_updates            | 2880        |
|    policy_gradient_loss | 0.0128      |
|    std                  | 0.055       |
|    value_loss           | 0.00409     |
-----------------------------------------
Early stopping at ste

Early stopping at step 8 due to reaching max kl: 0.09
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.638       |
| time/                   |             |
|    fps                  | 114         |
|    iterations           | 154         |
|    time_elapsed         | 6362        |
|    total_timesteps      | 731500      |
| train/                  |             |
|    approx_kl            | 0.045154672 |
|    clip_fraction        | 0.411       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.903       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0139      |
|    n_updates            | 3060        |
|    policy_gradient_loss | 0.00925     |
|    std                  | 0.055       |
|    value_loss           | 0.00398     |
-----------------------------------------
Early stopping at step

Early stopping at step 14 due to reaching max kl: 0.02
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.641       |
| time/                   |             |
|    fps                  | 114         |
|    iterations           | 163         |
|    time_elapsed         | 6734        |
|    total_timesteps      | 774250      |
| train/                  |             |
|    approx_kl            | 0.027591819 |
|    clip_fraction        | 0.482       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.903       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0348      |
|    n_updates            | 3240        |
|    policy_gradient_loss | 0.0131      |
|    std                  | 0.055       |
|    value_loss           | 0.00416     |
-----------------------------------------
Early stopping at ste

Early stopping at step 15 due to reaching max kl: 0.05
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.641       |
| time/                   |             |
|    fps                  | 114         |
|    iterations           | 172         |
|    time_elapsed         | 7110        |
|    total_timesteps      | 817000      |
| train/                  |             |
|    approx_kl            | 0.032074347 |
|    clip_fraction        | 0.459       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.905       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0287      |
|    n_updates            | 3420        |
|    policy_gradient_loss | 0.0112      |
|    std                  | 0.055       |
|    value_loss           | 0.00399     |
-----------------------------------------
Early stopping at ste

Early stopping at step 5 due to reaching max kl: 0.01
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.641       |
| time/                   |             |
|    fps                  | 114         |
|    iterations           | 181         |
|    time_elapsed         | 7477        |
|    total_timesteps      | 859750      |
| train/                  |             |
|    approx_kl            | 0.016190296 |
|    clip_fraction        | 0.375       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.902       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0526      |
|    n_updates            | 3600        |
|    policy_gradient_loss | 0.00223     |
|    std                  | 0.055       |
|    value_loss           | 0.00386     |
-----------------------------------------
Early stopping at step

Early stopping at step 11 due to reaching max kl: 0.08
Eval num_timesteps=902500, episode_reward=0.66 +/- 0.00
Episode length: 5.00 +/- 0.00
----------------------------------------
| eval/                   |            |
|    mean_ep_length       | 5          |
|    mean_reward          | 0.66       |
| rollout/                |            |
|    ep_len_mean          | 5          |
|    ep_rew_mean          | 0.645      |
| time/                   |            |
|    fps                  | 114        |
|    iterations           | 190        |
|    time_elapsed         | 7849       |
|    total_timesteps      | 902500     |
| train/                  |            |
|    approx_kl            | 0.04309203 |
|    clip_fraction        | 0.474      |
|    clip_range           | 0.1        |
|    entropy_loss         | -94.8      |
|    explained_variance   | 0.904      |
|    learning_rate        | 1e-05      |
|    loss                 | 0.0163     |
|    n_updates            | 3780       

Early stopping at step 9 due to reaching max kl: 0.02
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.644       |
| time/                   |             |
|    fps                  | 115         |
|    iterations           | 199         |
|    time_elapsed         | 8200        |
|    total_timesteps      | 945250      |
| train/                  |             |
|    approx_kl            | 0.021658316 |
|    clip_fraction        | 0.447       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.906       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0466      |
|    n_updates            | 3960        |
|    policy_gradient_loss | 0.00565     |
|    std                  | 0.055       |
|    value_loss           | 0.00377     |
-----------------------------------------
Early stopping at step

Early stopping at step 11 due to reaching max kl: 0.03
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.644       |
| time/                   |             |
|    fps                  | 115         |
|    iterations           | 208         |
|    time_elapsed         | 8574        |
|    total_timesteps      | 988000      |
| train/                  |             |
|    approx_kl            | 0.021851545 |
|    clip_fraction        | 0.462       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.906       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0474      |
|    n_updates            | 4140        |
|    policy_gradient_loss | 0.00856     |
|    std                  | 0.055       |
|    value_loss           | 0.00379     |
-----------------------------------------
Early stopping at ste

Early stopping at step 13 due to reaching max kl: 0.04
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.647       |
| time/                   |             |
|    fps                  | 115         |
|    iterations           | 217         |
|    time_elapsed         | 8943        |
|    total_timesteps      | 1030750     |
| train/                  |             |
|    approx_kl            | 0.027300293 |
|    clip_fraction        | 0.475       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.905       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0337      |
|    n_updates            | 4320        |
|    policy_gradient_loss | 0.0145      |
|    std                  | 0.055       |
|    value_loss           | 0.00422     |
-----------------------------------------
Early stopping at ste

Early stopping at step 15 due to reaching max kl: 0.05
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 5          |
|    ep_rew_mean          | 0.645      |
| time/                   |            |
|    fps                  | 115        |
|    iterations           | 226        |
|    time_elapsed         | 9318       |
|    total_timesteps      | 1073500    |
| train/                  |            |
|    approx_kl            | 0.03316216 |
|    clip_fraction        | 0.523      |
|    clip_range           | 0.1        |
|    entropy_loss         | -94.8      |
|    explained_variance   | 0.901      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0228    |
|    n_updates            | 4500       |
|    policy_gradient_loss | 0.0181     |
|    std                  | 0.055      |
|    value_loss           | 0.00392    |
----------------------------------------
Early stopping at step 13 due to reaching m

Early stopping at step 11 due to reaching max kl: 0.02
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.646       |
| time/                   |             |
|    fps                  | 115         |
|    iterations           | 235         |
|    time_elapsed         | 9692        |
|    total_timesteps      | 1116250     |
| train/                  |             |
|    approx_kl            | 0.020454383 |
|    clip_fraction        | 0.485       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.905       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0665      |
|    n_updates            | 4680        |
|    policy_gradient_loss | 0.0108      |
|    std                  | 0.055       |
|    value_loss           | 0.00389     |
-----------------------------------------
Early stopping at ste

Early stopping at step 9 due to reaching max kl: 0.08
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.644       |
| time/                   |             |
|    fps                  | 115         |
|    iterations           | 244         |
|    time_elapsed         | 10064       |
|    total_timesteps      | 1159000     |
| train/                  |             |
|    approx_kl            | 0.042613238 |
|    clip_fraction        | 0.473       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.903       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0145      |
|    n_updates            | 4860        |
|    policy_gradient_loss | 0.0179      |
|    std                  | 0.055       |
|    value_loss           | 0.00386     |
-----------------------------------------
Early stopping at step

Early stopping at step 10 due to reaching max kl: 0.03
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.642       |
| time/                   |             |
|    fps                  | 115         |
|    iterations           | 253         |
|    time_elapsed         | 10432       |
|    total_timesteps      | 1201750     |
| train/                  |             |
|    approx_kl            | 0.024708718 |
|    clip_fraction        | 0.478       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.906       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0266      |
|    n_updates            | 5040        |
|    policy_gradient_loss | 0.0184      |
|    std                  | 0.055       |
|    value_loss           | 0.00382     |
-----------------------------------------
Early stopping at ste

Early stopping at step 6 due to reaching max kl: 0.05
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.649       |
| time/                   |             |
|    fps                  | 115         |
|    iterations           | 262         |
|    time_elapsed         | 10798       |
|    total_timesteps      | 1244500     |
| train/                  |             |
|    approx_kl            | 0.029931888 |
|    clip_fraction        | 0.463       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.8       |
|    explained_variance   | 0.9         |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0161      |
|    n_updates            | 5220        |
|    policy_gradient_loss | 0.0195      |
|    std                  | 0.055       |
|    value_loss           | 0.00436     |
-----------------------------------------
Early stopping at step

Early stopping at step 6 due to reaching max kl: 0.08
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.647       |
| time/                   |             |
|    fps                  | 115         |
|    iterations           | 271         |
|    time_elapsed         | 11166       |
|    total_timesteps      | 1287250     |
| train/                  |             |
|    approx_kl            | 0.047421064 |
|    clip_fraction        | 0.436       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.9       |
|    explained_variance   | 0.898       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.00676     |
|    n_updates            | 5400        |
|    policy_gradient_loss | 0.0167      |
|    std                  | 0.055       |
|    value_loss           | 0.00393     |
-----------------------------------------
Early stopping at step

Early stopping at step 4 due to reaching max kl: 0.03
Eval num_timesteps=1330000, episode_reward=0.66 +/- 0.00
Episode length: 5.00 +/- 0.00
-----------------------------------------
| eval/                   |             |
|    mean_ep_length       | 5           |
|    mean_reward          | 0.659       |
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.645       |
| time/                   |             |
|    fps                  | 115         |
|    iterations           | 280         |
|    time_elapsed         | 11528       |
|    total_timesteps      | 1330000     |
| train/                  |             |
|    approx_kl            | 0.032643918 |
|    clip_fraction        | 0.455       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.9       |
|    explained_variance   | 0.889       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0284      |
|    n_updates     

Early stopping at step 7 due to reaching max kl: 0.04
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 5          |
|    ep_rew_mean          | 0.643      |
| time/                   |            |
|    fps                  | 115        |
|    iterations           | 289        |
|    time_elapsed         | 11863      |
|    total_timesteps      | 1372750    |
| train/                  |            |
|    approx_kl            | 0.03960452 |
|    clip_fraction        | 0.54       |
|    clip_range           | 0.1        |
|    entropy_loss         | -94.9      |
|    explained_variance   | 0.889      |
|    learning_rate        | 1e-05      |
|    loss                 | 0.0143     |
|    n_updates            | 5760       |
|    policy_gradient_loss | 0.0224     |
|    std                  | 0.0549     |
|    value_loss           | 0.00406    |
----------------------------------------
Early stopping at step 4 due to reaching max

Early stopping at step 2 due to reaching max kl: 0.02
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.645       |
| time/                   |             |
|    fps                  | 115         |
|    iterations           | 298         |
|    time_elapsed         | 12220       |
|    total_timesteps      | 1415500     |
| train/                  |             |
|    approx_kl            | 0.028468246 |
|    clip_fraction        | 0.361       |
|    clip_range           | 0.1         |
|    entropy_loss         | -94.9       |
|    explained_variance   | 0.888       |
|    learning_rate        | 1e-05       |
|    loss                 | 0.0919      |
|    n_updates            | 5940        |
|    policy_gradient_loss | 0.0113      |
|    std                  | 0.0549      |
|    value_loss           | 0.00448     |
-----------------------------------------
Early stopping at step

  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in

  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in

  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in