In [1]:
# to access functions from other locations
import sys
sys.path.append('/data/ad181/RemoteDir/k_variability_in_ressim_env/SPE10_like_envs/')

In [2]:
%matplotlib notebook
import numpy as np
import time
import pickle
import os
import matplotlib.pyplot as plt

import gym
from stable_baselines3.ppo import PPO, MlpPolicy
from stable_baselines3.common.vec_env import SubprocVecEnv, DummyVecEnv
from stable_baselines3.common.callbacks import CallbackList
from utils.custom_eval_callback import CustomEvalCallback
from typing import Callable

from utils.plot_functions import plot_learning

from model.ressim import Grid
from ressim_env import ResSimEnv_v0, ResSimEnv_v1, ResSimEnv_v2
from k_distributions.generate_constr_k import generate_cond_
from utils.env_wrappers import StepReset

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [3]:
seed=1
case='1ph_v0_clustered'

In [4]:
os.makedirs('./data', exist_ok=True)
os.makedirs('./data/'+case, exist_ok=True)

In [5]:
case_ = '1ph_v0'
with open('../envs_params/env_data/env_'+case_+'_train.pkl', 'rb') as input:
    env_train = pickle.load(input)

rl_indices = [2,4,6,7,9,14,19,21,23]
with open('../envs_params/env_data/env_'+case_+'_eval.pkl', 'rb') as input:
    env_eval = pickle.load(input)
k_list_rl = env_eval.k_list[rl_indices]
env_eval.set_k(k_list_rl)
    
with open('../envs_params/env_data/env_list_'+case_+'_eval.pkl', 'rb') as input:
    envs = pickle.load(input)
    
k_train_clustered = np.load('../envs_params/k_data/k_log_md_'+case_+'_train_clustered.npy')
md_m2_conv = 1/1.01325e+15
k_train_clustered = md_m2_conv*np.exp(k_train_clustered) 
env_train.set_k(k_train_clustered)

In [6]:
# wrap environement
env_train = StepReset(env_train)
env_eval = StepReset(env_eval)
for env in envs:
    env = StepReset(env)

In [7]:
def make_env(env, rank: int, seed: int = 0) -> Callable:
    """
    Utility function for multiprocessed env.
    
    :param env_id: (str) the environment ID
    :param num_env: (int) the number of environment you wish to have in subprocesses
    :param seed: (int) the inital seed for RNG
    :param rank: (int) index of the subprocess
    :return: (Callable)
    """
    def _init() -> gym.Env:
        env_ = env
        env_.seed(seed + rank)
        return env_
    return _init

In [8]:
for seed in range(1,6):
    if True:
        print(f'seed {seed}')
        log_dir = './data/'+case+'/seed_'+str(seed)
        os.makedirs(log_dir, exist_ok=True)
        num_cpu = 64
        env_train.seed(seed)
        env_eval.seed(seed)
        train_callback = CustomEvalCallback(env_train, best_model_save_path=None, n_eval_episodes=1,
                             log_path=str(log_dir)+'/results_train', eval_freq=100)
        callback_list = [train_callback]
        for i in range(25):
            envs[i].seed(seed)
            eval_callback = CustomEvalCallback(envs[i], best_model_save_path=None, n_eval_episodes=1,
                             log_path=str(log_dir)+'/results_eval_'+str(i), eval_freq=300)
            callback_list.append(eval_callback)
        eval_callback = CustomEvalCallback(env_eval, best_model_save_path=str(log_dir)+'/best_model_eval', n_eval_episodes=1,
                             log_path=str(log_dir)+'/results_eval', eval_freq=100)
        callback_list.append(eval_callback)
        callback = CallbackList(callback_list)
        env = SubprocVecEnv([make_env(env_train, i, seed) for i in range(num_cpu)])
#     env = VecMonitor(env, filename=log_dir)
        print(f'seed {seed}: model definition ..')
        model = PPO(policy=MlpPolicy,
            env=env,
            learning_rate = 1e-5,
            n_steps = 4,
            batch_size = 32,
            n_epochs = 20,
            gamma = 0.99,
            gae_lambda = 0.95,
            clip_range = 0.2,
            clip_range_vf = None,
            ent_coef = 0.001,
            vf_coef = 0.5,
            max_grad_norm = 0.5,
            use_sde= False,
            create_eval_env= False,
            policy_kwargs = dict(net_arch=[4000,2000,800,300], log_std_init=-1.9),
            verbose = 1,
            target_kl =0.05,
            seed = seed,
            device = "auto")
        print(f'seed {seed}: learning ..')
        model.learn(total_timesteps=300000, callback=callback)
        model.save(log_dir+'/PPO')
        fig = plot_learning(log_dir, case='train')
        fig.savefig(log_dir+'/learn_train.png')
        fig = plot_learning(log_dir, case='eval')
        fig.savefig(log_dir+'/learn_eval.png')

seed 1
seed 1: model definition ..
Using cuda device
seed 1: learning ..




----------------------------
| time/              |     |
|    fps             | 142 |
|    iterations      | 1   |
|    time_elapsed    | 1   |
|    total_timesteps | 256 |
----------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 88          |
|    iterations           | 2           |
|    time_elapsed         | 5           |
|    total_timesteps      | 512         |
| train/                  |             |
|    approx_kl            | 0.010806677 |
|    clip_fraction        | 0.128       |
|    clip_range           | 0.2         |
|    entropy_loss         | 1.92        |
|    explained_variance   | 0.24        |
|    learning_rate        | 1e-05       |
|    loss                 | -0.0405     |
|    n_updates            | 20          |
|    policy_gradient_loss | -0.0442     |
|    std                  | 0.15        |
|    value_loss           | 0.0171      |
-----------------------------------------

---------------------------------------
| time/                   |           |
|    fps                  | 68        |
|    iterations           | 13        |
|    time_elapsed         | 48        |
|    total_timesteps      | 3328      |
| train/                  |           |
|    approx_kl            | 0.0389212 |
|    clip_fraction        | 0.388     |
|    clip_range           | 0.2       |
|    entropy_loss         | 1.94      |
|    explained_variance   | 0.896     |
|    learning_rate        | 1e-05     |
|    loss                 | -0.138    |
|    n_updates            | 240       |
|    policy_gradient_loss | -0.0857   |
|    std                  | 0.149     |
|    value_loss           | 0.0019    |
---------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 68          |
|    iterations           | 14          |
|    time_elapsed         | 52          |
|    total_timesteps      | 35

Early stopping at step 17 due to reaching max kl: 0.08
-----------------------------------------
| time/                   |             |
|    fps                  | 67          |
|    iterations           | 24          |
|    time_elapsed         | 90          |
|    total_timesteps      | 6144        |
| train/                  |             |
|    approx_kl            | 0.075598195 |
|    clip_fraction        | 0.413       |
|    clip_range           | 0.2         |
|    entropy_loss         | 1.96        |
|    explained_variance   | 0.903       |
|    learning_rate        | 1e-05       |
|    loss                 | -0.142      |
|    n_updates            | 460         |
|    policy_gradient_loss | -0.087      |
|    std                  | 0.148       |
|    value_loss           | 0.00314     |
-----------------------------------------
Eval num_timesteps=6400, episode_reward=0.59 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
Eval num_timesteps=6400, episode_reward=0

----------------------------------------
| time/                   |            |
|    fps                  | 64         |
|    iterations           | 34         |
|    time_elapsed         | 135        |
|    total_timesteps      | 8704       |
| train/                  |            |
|    approx_kl            | 0.05424126 |
|    clip_fraction        | 0.435      |
|    clip_range           | 0.2        |
|    entropy_loss         | 1.97       |
|    explained_variance   | 0.941      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.113     |
|    n_updates            | 660        |
|    policy_gradient_loss | -0.0882    |
|    std                  | 0.148      |
|    value_loss           | 0.00129    |
----------------------------------------
Early stopping at step 12 due to reaching max kl: 0.08
----------------------------------------
| time/                   |            |
|    fps                  | 64         |
|    iterations           | 35         |
| 

----------------------------------------
| time/                   |            |
|    fps                  | 66         |
|    iterations           | 45         |
|    time_elapsed         | 173        |
|    total_timesteps      | 11520      |
| train/                  |            |
|    approx_kl            | 0.02811397 |
|    clip_fraction        | 0.455      |
|    clip_range           | 0.2        |
|    entropy_loss         | 1.99       |
|    explained_variance   | 0.922      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.121     |
|    n_updates            | 880        |
|    policy_gradient_loss | -0.0982    |
|    std                  | 0.147      |
|    value_loss           | 0.00134    |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 66          |
|    iterations           | 46          |
|    time_elapsed         | 177         |
|    total_

----------------------------------------
| time/                   |            |
|    fps                  | 64         |
|    iterations           | 55         |
|    time_elapsed         | 218        |
|    total_timesteps      | 14080      |
| train/                  |            |
|    approx_kl            | 0.07025857 |
|    clip_fraction        | 0.513      |
|    clip_range           | 0.2        |
|    entropy_loss         | 2          |
|    explained_variance   | 0.932      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0926    |
|    n_updates            | 1080       |
|    policy_gradient_loss | -0.0936    |
|    std                  | 0.147      |
|    value_loss           | 0.00139    |
----------------------------------------
---------------------------------------
| time/                   |           |
|    fps                  | 64        |
|    iterations           | 56        |
|    time_elapsed         | 222       |
|    total_timesteps 

-----------------------------------------
| time/                   |             |
|    fps                  | 67          |
|    iterations           | 65          |
|    time_elapsed         | 246         |
|    total_timesteps      | 16640       |
| train/                  |             |
|    approx_kl            | 0.079616465 |
|    clip_fraction        | 0.452       |
|    clip_range           | 0.2         |
|    entropy_loss         | 2.01        |
|    explained_variance   | 0.939       |
|    learning_rate        | 1e-05       |
|    loss                 | -0.114      |
|    n_updates            | 1280        |
|    policy_gradient_loss | -0.0831     |
|    std                  | 0.147       |
|    value_loss           | 0.00156     |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 67          |
|    iterations           | 66          |
|    time_elapsed         | 249   

Eval num_timesteps=19200, episode_reward=0.57 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
Eval num_timesteps=19200, episode_reward=0.54 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
Eval num_timesteps=19200, episode_reward=0.53 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
Eval num_timesteps=19200, episode_reward=0.63 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
Eval num_timesteps=19200, episode_reward=0.56 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
Eval num_timesteps=19200, episode_reward=0.60 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
Eval num_timesteps=19200, episode_reward=0.50 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
Eval num_timesteps=19200, episode_reward=0.49 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
Eval num_timesteps=19200, episode_reward=0.54 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
Eval num_timesteps=19200, episode_rew

Early stopping at step 15 due to reaching max kl: 0.08
----------------------------------------
| time/                   |            |
|    fps                  | 62         |
|    iterations           | 82         |
|    time_elapsed         | 337        |
|    total_timesteps      | 20992      |
| train/                  |            |
|    approx_kl            | 0.07577585 |
|    clip_fraction        | 0.481      |
|    clip_range           | 0.2        |
|    entropy_loss         | 2.02       |
|    explained_variance   | 0.94       |
|    learning_rate        | 1e-05      |
|    loss                 | -0.125     |
|    n_updates            | 1620       |
|    policy_gradient_loss | -0.0937    |
|    std                  | 0.146      |
|    value_loss           | 0.00165    |
----------------------------------------
Early stopping at step 8 due to reaching max kl: 0.08
-----------------------------------------
| time/                   |             |
|    fps                  | 

Early stopping at step 3 due to reaching max kl: 0.08
----------------------------------------
| time/                   |            |
|    fps                  | 64         |
|    iterations           | 92         |
|    time_elapsed         | 365        |
|    total_timesteps      | 23552      |
| train/                  |            |
|    approx_kl            | 0.07547648 |
|    clip_fraction        | 0.271      |
|    clip_range           | 0.2        |
|    entropy_loss         | 2.03       |
|    explained_variance   | 0.937      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0823    |
|    n_updates            | 1820       |
|    policy_gradient_loss | -0.0467    |
|    std                  | 0.146      |
|    value_loss           | 0.00208    |
----------------------------------------
Early stopping at step 12 due to reaching max kl: 0.09
----------------------------------------
| time/                   |            |
|    fps                  | 64

Early stopping at step 17 due to reaching max kl: 0.08
----------------------------------------
| time/                   |            |
|    fps                  | 64         |
|    iterations           | 102        |
|    time_elapsed         | 406        |
|    total_timesteps      | 26112      |
| train/                  |            |
|    approx_kl            | 0.07986352 |
|    clip_fraction        | 0.531      |
|    clip_range           | 0.2        |
|    entropy_loss         | 2.04       |
|    explained_variance   | 0.952      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.104     |
|    n_updates            | 2020       |
|    policy_gradient_loss | -0.0951    |
|    std                  | 0.145      |
|    value_loss           | 0.00128    |
----------------------------------------
Early stopping at step 5 due to reaching max kl: 0.08
----------------------------------------
| time/                   |            |
|    fps                  | 64

----------------------------------------
| time/                   |            |
|    fps                  | 65         |
|    iterations           | 112        |
|    time_elapsed         | 434        |
|    total_timesteps      | 28672      |
| train/                  |            |
|    approx_kl            | 0.04116331 |
|    clip_fraction        | 0.532      |
|    clip_range           | 0.2        |
|    entropy_loss         | 2.04       |
|    explained_variance   | 0.945      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.119     |
|    n_updates            | 2220       |
|    policy_gradient_loss | -0.0966    |
|    std                  | 0.145      |
|    value_loss           | 0.00147    |
----------------------------------------
Early stopping at step 3 due to reaching max kl: 0.08
----------------------------------------
| time/                   |            |
|    fps                  | 66         |
|    iterations           | 113        |
|  

---------------------------------------
| time/                   |           |
|    fps                  | 67        |
|    iterations           | 122       |
|    time_elapsed         | 460       |
|    total_timesteps      | 31232     |
| train/                  |           |
|    approx_kl            | 0.0418354 |
|    clip_fraction        | 0.572     |
|    clip_range           | 0.2       |
|    entropy_loss         | 2.05      |
|    explained_variance   | 0.917     |
|    learning_rate        | 1e-05     |
|    loss                 | -0.125    |
|    n_updates            | 2420      |
|    policy_gradient_loss | -0.101    |
|    std                  | 0.145     |
|    value_loss           | 0.00136   |
---------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 67          |
|    iterations           | 123         |
|    time_elapsed         | 464         |
|    total_timesteps      | 31

Early stopping at step 7 due to reaching max kl: 0.08
----------------------------------------
| time/                   |            |
|    fps                  | 68         |
|    iterations           | 132        |
|    time_elapsed         | 494        |
|    total_timesteps      | 33792      |
| train/                  |            |
|    approx_kl            | 0.08339452 |
|    clip_fraction        | 0.396      |
|    clip_range           | 0.2        |
|    entropy_loss         | 2.06       |
|    explained_variance   | 0.937      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.12      |
|    n_updates            | 2620       |
|    policy_gradient_loss | -0.0642    |
|    std                  | 0.145      |
|    value_loss           | 0.00193    |
----------------------------------------
Early stopping at step 6 due to reaching max kl: 0.09
-----------------------------------------
| time/                   |             |
|    fps                  | 6

-----------------------------------------
| time/                   |             |
|    fps                  | 69          |
|    iterations           | 142         |
|    time_elapsed         | 521         |
|    total_timesteps      | 36352       |
| train/                  |             |
|    approx_kl            | 0.068634935 |
|    clip_fraction        | 0.553       |
|    clip_range           | 0.2         |
|    entropy_loss         | 2.07        |
|    explained_variance   | 0.952       |
|    learning_rate        | 1e-05       |
|    loss                 | -0.144      |
|    n_updates            | 2820        |
|    policy_gradient_loss | -0.104      |
|    std                  | 0.144       |
|    value_loss           | 0.00101     |
-----------------------------------------
Early stopping at step 9 due to reaching max kl: 0.08
----------------------------------------
| time/                   |            |
|    fps                  | 69         |
|    iterations          

Eval num_timesteps=38400, episode_reward=0.53 +/- 0.00
Episode length: 5.00 +/- 0.00
Eval num_timesteps=38400, episode_reward=0.50 +/- 0.00
Episode length: 5.00 +/- 0.00
Eval num_timesteps=38400, episode_reward=0.59 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
Eval num_timesteps=38400, episode_reward=0.59 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
Eval num_timesteps=38400, episode_reward=0.49 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
Eval num_timesteps=38400, episode_reward=0.59 +/- 0.00
Episode length: 5.00 +/- 0.00
Eval num_timesteps=38400, episode_reward=0.59 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
Eval num_timesteps=38400, episode_reward=0.59 +/- 0.00
Episode length: 5.00 +/- 0.00
Eval num_timesteps=38400, episode_reward=0.54 +/- 0.00
Episode length: 5.00 +/- 0.00
New best mean reward!
Eval num_timesteps=38400, episode_reward=0.58 +/- 0.00
Episode length: 5.00 +/- 0.00
----------------------------------------

Early stopping at step 11 due to reaching max kl: 0.08
--------------------------------------
| time/                   |          |
|    fps                  | 66       |
|    iterations           | 159      |
|    time_elapsed         | 616      |
|    total_timesteps      | 40704    |
| train/                  |          |
|    approx_kl            | 0.084116 |
|    clip_fraction        | 0.444    |
|    clip_range           | 0.2      |
|    entropy_loss         | 2.08     |
|    explained_variance   | 0.955    |
|    learning_rate        | 1e-05    |
|    loss                 | -0.115   |
|    n_updates            | 3160     |
|    policy_gradient_loss | -0.0862  |
|    std                  | 0.144    |
|    value_loss           | 0.00116  |
--------------------------------------
Early stopping at step 15 due to reaching max kl: 0.08
----------------------------------------
| time/                   |            |
|    fps                  | 66         |
|    iterations           

Early stopping at step 2 due to reaching max kl: 0.08
-----------------------------------------
| time/                   |             |
|    fps                  | 67          |
|    iterations           | 169         |
|    time_elapsed         | 640         |
|    total_timesteps      | 43264       |
| train/                  |             |
|    approx_kl            | 0.077187926 |
|    clip_fraction        | 0.415       |
|    clip_range           | 0.2         |
|    entropy_loss         | 2.08        |
|    explained_variance   | 0.937       |
|    learning_rate        | 1e-05       |
|    loss                 | -0.0744     |
|    n_updates            | 3360        |
|    policy_gradient_loss | -0.0343     |
|    std                  | 0.144       |
|    value_loss           | 0.00246     |
-----------------------------------------
Early stopping at step 8 due to reaching max kl: 0.08
----------------------------------------
| time/                   |            |
|    fps    

Early stopping at step 11 due to reaching max kl: 0.08
----------------------------------------
| time/                   |            |
|    fps                  | 67         |
|    iterations           | 179        |
|    time_elapsed         | 677        |
|    total_timesteps      | 45824      |
| train/                  |            |
|    approx_kl            | 0.08134904 |
|    clip_fraction        | 0.519      |
|    clip_range           | 0.2        |
|    entropy_loss         | 2.09       |
|    explained_variance   | 0.914      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.138     |
|    n_updates            | 3560       |
|    policy_gradient_loss | -0.0896    |
|    std                  | 0.144      |
|    value_loss           | 0.00182    |
----------------------------------------
Early stopping at step 8 due to reaching max kl: 0.08
----------------------------------------
| time/                   |            |
|    fps                  | 67

Early stopping at step 4 due to reaching max kl: 0.09
----------------------------------------
| time/                   |            |
|    fps                  | 68         |
|    iterations           | 189        |
|    time_elapsed         | 702        |
|    total_timesteps      | 48384      |
| train/                  |            |
|    approx_kl            | 0.08973224 |
|    clip_fraction        | 0.346      |
|    clip_range           | 0.2        |
|    entropy_loss         | 2.09       |
|    explained_variance   | 0.949      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0536    |
|    n_updates            | 3760       |
|    policy_gradient_loss | -0.049     |
|    std                  | 0.143      |
|    value_loss           | 0.00179    |
----------------------------------------
Early stopping at step 5 due to reaching max kl: 0.08
----------------------------------------
| time/                   |            |
|    fps                  | 69 

Early stopping at step 14 due to reaching max kl: 0.09
----------------------------------------
| time/                   |            |
|    fps                  | 70         |
|    iterations           | 199        |
|    time_elapsed         | 726        |
|    total_timesteps      | 50944      |
| train/                  |            |
|    approx_kl            | 0.08654318 |
|    clip_fraction        | 0.496      |
|    clip_range           | 0.2        |
|    entropy_loss         | 2.1        |
|    explained_variance   | 0.93       |
|    learning_rate        | 1e-05      |
|    loss                 | -0.144     |
|    n_updates            | 3960       |
|    policy_gradient_loss | -0.0915    |
|    std                  | 0.143      |
|    value_loss           | 0.00141    |
----------------------------------------
Early stopping at step 8 due to reaching max kl: 0.09
Eval num_timesteps=51200, episode_reward=0.59 +/- 0.00
Episode length: 5.00 +/- 0.00
Eval num_timesteps=51200, e

----------------------------------------
| time/                   |            |
|    fps                  | 70         |
|    iterations           | 209        |
|    time_elapsed         | 763        |
|    total_timesteps      | 53504      |
| train/                  |            |
|    approx_kl            | 0.06616616 |
|    clip_fraction        | 0.554      |
|    clip_range           | 0.2        |
|    entropy_loss         | 2.11       |
|    explained_variance   | 0.951      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.141     |
|    n_updates            | 4160       |
|    policy_gradient_loss | -0.0996    |
|    std                  | 0.143      |
|    value_loss           | 0.0012     |
----------------------------------------
Early stopping at step 18 due to reaching max kl: 0.08
----------------------------------------
| time/                   |            |
|    fps                  | 70         |
|    iterations           | 210        |
| 

Early stopping at step 4 due to reaching max kl: 0.09
----------------------------------------
| time/                   |            |
|    fps                  | 71         |
|    iterations           | 219        |
|    time_elapsed         | 786        |
|    total_timesteps      | 56064      |
| train/                  |            |
|    approx_kl            | 0.09153506 |
|    clip_fraction        | 0.36       |
|    clip_range           | 0.2        |
|    entropy_loss         | 2.12       |
|    explained_variance   | 0.935      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0981    |
|    n_updates            | 4360       |
|    policy_gradient_loss | -0.0551    |
|    std                  | 0.143      |
|    value_loss           | 0.00204    |
----------------------------------------
Early stopping at step 3 due to reaching max kl: 0.08
----------------------------------------
| time/                   |            |
|    fps                  | 71 

Early stopping at step 8 due to reaching max kl: 0.08
----------------------------------------
| time/                   |            |
|    fps                  | 68         |
|    iterations           | 226        |
|    time_elapsed         | 850        |
|    total_timesteps      | 57856      |
| train/                  |            |
|    approx_kl            | 0.07934132 |
|    clip_fraction        | 0.424      |
|    clip_range           | 0.2        |
|    entropy_loss         | 2.12       |
|    explained_variance   | 0.948      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.124     |
|    n_updates            | 4500       |
|    policy_gradient_loss | -0.0724    |
|    std                  | 0.142      |
|    value_loss           | 0.00139    |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 67         |
|    iterations           | 227        |
|  

-----------------------------------------
| time/                   |             |
|    fps                  | 68          |
|    iterations           | 236         |
|    time_elapsed         | 875         |
|    total_timesteps      | 60416       |
| train/                  |             |
|    approx_kl            | 0.057810836 |
|    clip_fraction        | 0.561       |
|    clip_range           | 0.2         |
|    entropy_loss         | 2.13        |
|    explained_variance   | 0.938       |
|    learning_rate        | 1e-05       |
|    loss                 | -0.111      |
|    n_updates            | 4700        |
|    policy_gradient_loss | -0.101      |
|    std                  | 0.142       |
|    value_loss           | 0.00122     |
-----------------------------------------
Early stopping at step 10 due to reaching max kl: 0.08
-----------------------------------------
| time/                   |             |
|    fps                  | 69          |
|    iterations      

----------------------------------------
| time/                   |            |
|    fps                  | 69         |
|    iterations           | 246        |
|    time_elapsed         | 901        |
|    total_timesteps      | 62976      |
| train/                  |            |
|    approx_kl            | 0.08379954 |
|    clip_fraction        | 0.464      |
|    clip_range           | 0.2        |
|    entropy_loss         | 2.13       |
|    explained_variance   | 0.952      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0654    |
|    n_updates            | 4900       |
|    policy_gradient_loss | -0.0727    |
|    std                  | 0.142      |
|    value_loss           | 0.00149    |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 69         |
|    iterations           | 247        |
|    time_elapsed         | 905        |
|    total_times

Early stopping at step 12 due to reaching max kl: 0.08
----------------------------------------
| time/                   |            |
|    fps                  | 69         |
|    iterations           | 256        |
|    time_elapsed         | 938        |
|    total_timesteps      | 65536      |
| train/                  |            |
|    approx_kl            | 0.08197748 |
|    clip_fraction        | 0.492      |
|    clip_range           | 0.2        |
|    entropy_loss         | 2.14       |
|    explained_variance   | 0.932      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.125     |
|    n_updates            | 5100       |
|    policy_gradient_loss | -0.092     |
|    std                  | 0.142      |
|    value_loss           | 0.00166    |
----------------------------------------
Early stopping at step 19 due to reaching max kl: 0.08
----------------------------------------
| time/                   |            |
|    fps                  | 6

Early stopping at step 10 due to reaching max kl: 0.10
---------------------------------------
| time/                   |           |
|    fps                  | 70        |
|    iterations           | 266       |
|    time_elapsed         | 964       |
|    total_timesteps      | 68096     |
| train/                  |           |
|    approx_kl            | 0.0972397 |
|    clip_fraction        | 0.49      |
|    clip_range           | 0.2       |
|    entropy_loss         | 2.15      |
|    explained_variance   | 0.956     |
|    learning_rate        | 1e-05     |
|    loss                 | -0.107    |
|    n_updates            | 5300      |
|    policy_gradient_loss | -0.0833   |
|    std                  | 0.141     |
|    value_loss           | 0.00126   |
---------------------------------------
Early stopping at step 6 due to reaching max kl: 0.08
----------------------------------------
| time/                   |            |
|    fps                  | 70         |
|    ite

Early stopping at step 15 due to reaching max kl: 0.08
----------------------------------------
| time/                   |            |
|    fps                  | 70         |
|    iterations           | 276        |
|    time_elapsed         | 1000       |
|    total_timesteps      | 70656      |
| train/                  |            |
|    approx_kl            | 0.08109322 |
|    clip_fraction        | 0.52       |
|    clip_range           | 0.2        |
|    entropy_loss         | 2.16       |
|    explained_variance   | 0.937      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.122     |
|    n_updates            | 5500       |
|    policy_gradient_loss | -0.0999    |
|    std                  | 0.141      |
|    value_loss           | 0.00135    |
----------------------------------------
Early stopping at step 7 due to reaching max kl: 0.08
-----------------------------------------
| time/                   |             |
|    fps                  | 

Early stopping at step 5 due to reaching max kl: 0.08
----------------------------------------
| time/                   |            |
|    fps                  | 71         |
|    iterations           | 286        |
|    time_elapsed         | 1022       |
|    total_timesteps      | 73216      |
| train/                  |            |
|    approx_kl            | 0.08399922 |
|    clip_fraction        | 0.438      |
|    clip_range           | 0.2        |
|    entropy_loss         | 2.16       |
|    explained_variance   | 0.961      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.108     |
|    n_updates            | 5700       |
|    policy_gradient_loss | -0.0668    |
|    std                  | 0.141      |
|    value_loss           | 0.00147    |
----------------------------------------
Early stopping at step 6 due to reaching max kl: 0.08
----------------------------------------
| time/                   |            |
|    fps                  | 71 

Early stopping at step 3 due to reaching max kl: 0.08
-----------------------------------------
| time/                   |             |
|    fps                  | 72          |
|    iterations           | 296         |
|    time_elapsed         | 1045        |
|    total_timesteps      | 75776       |
| train/                  |             |
|    approx_kl            | 0.080676265 |
|    clip_fraction        | 0.39        |
|    clip_range           | 0.2         |
|    entropy_loss         | 2.17        |
|    explained_variance   | 0.956       |
|    learning_rate        | 1e-05       |
|    loss                 | -0.0835     |
|    n_updates            | 5900        |
|    policy_gradient_loss | -0.0586     |
|    std                  | 0.141       |
|    value_loss           | 0.00175     |
-----------------------------------------
Early stopping at step 1 due to reaching max kl: 0.08
----------------------------------------
| time/                   |            |
|    fps    

Early stopping at step 17 due to reaching max kl: 0.08
----------------------------------------
| time/                   |            |
|    fps                  | 70         |
|    iterations           | 303        |
|    time_elapsed         | 1094       |
|    total_timesteps      | 77568      |
| train/                  |            |
|    approx_kl            | 0.07725295 |
|    clip_fraction        | 0.558      |
|    clip_range           | 0.2        |
|    entropy_loss         | 2.17       |
|    explained_variance   | 0.949      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.123     |
|    n_updates            | 6040       |
|    policy_gradient_loss | -0.094     |
|    std                  | 0.141      |
|    value_loss           | 0.00126    |
----------------------------------------
Early stopping at step 2 due to reaching max kl: 0.08
-----------------------------------------
| time/                   |             |
|    fps                  | 

Early stopping at step 9 due to reaching max kl: 0.09
-----------------------------------------
| time/                   |             |
|    fps                  | 71          |
|    iterations           | 313         |
|    time_elapsed         | 1114        |
|    total_timesteps      | 80128       |
| train/                  |             |
|    approx_kl            | 0.089310385 |
|    clip_fraction        | 0.455       |
|    clip_range           | 0.2         |
|    entropy_loss         | 2.17        |
|    explained_variance   | 0.943       |
|    learning_rate        | 1e-05       |
|    loss                 | -0.0972     |
|    n_updates            | 6240        |
|    policy_gradient_loss | -0.0675     |
|    std                  | 0.141       |
|    value_loss           | 0.00151     |
-----------------------------------------
Early stopping at step 7 due to reaching max kl: 0.08
---------------------------------------
| time/                   |           |
|    fps      

Early stopping at step 5 due to reaching max kl: 0.08
----------------------------------------
| time/                   |            |
|    fps                  | 72         |
|    iterations           | 323        |
|    time_elapsed         | 1136       |
|    total_timesteps      | 82688      |
| train/                  |            |
|    approx_kl            | 0.07740557 |
|    clip_fraction        | 0.4        |
|    clip_range           | 0.2        |
|    entropy_loss         | 2.18       |
|    explained_variance   | 0.953      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0878    |
|    n_updates            | 6440       |
|    policy_gradient_loss | -0.0564    |
|    std                  | 0.14       |
|    value_loss           | 0.00168    |
----------------------------------------
Early stopping at step 3 due to reaching max kl: 0.08
----------------------------------------
| time/                   |            |
|    fps                  | 72 

Early stopping at step 3 due to reaching max kl: 0.08
----------------------------------------
| time/                   |            |
|    fps                  | 72         |
|    iterations           | 333        |
|    time_elapsed         | 1168       |
|    total_timesteps      | 85248      |
| train/                  |            |
|    approx_kl            | 0.07711076 |
|    clip_fraction        | 0.297      |
|    clip_range           | 0.2        |
|    entropy_loss         | 2.18       |
|    explained_variance   | 0.952      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.043     |
|    n_updates            | 6640       |
|    policy_gradient_loss | -0.0465    |
|    std                  | 0.14       |
|    value_loss           | 0.00185    |
----------------------------------------
Early stopping at step 5 due to reaching max kl: 0.09
-----------------------------------------
| time/                   |             |
|    fps                  | 7

Early stopping at step 5 due to reaching max kl: 0.08
----------------------------------------
| time/                   |            |
|    fps                  | 73         |
|    iterations           | 343        |
|    time_elapsed         | 1187       |
|    total_timesteps      | 87808      |
| train/                  |            |
|    approx_kl            | 0.07889375 |
|    clip_fraction        | 0.379      |
|    clip_range           | 0.2        |
|    entropy_loss         | 2.19       |
|    explained_variance   | 0.939      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0975    |
|    n_updates            | 6840       |
|    policy_gradient_loss | -0.0459    |
|    std                  | 0.14       |
|    value_loss           | 0.00184    |
----------------------------------------
Early stopping at step 7 due to reaching max kl: 0.08
----------------------------------------
| time/                   |            |
|    fps                  | 74 

Early stopping at step 6 due to reaching max kl: 0.09
----------------------------------------
| time/                   |            |
|    fps                  | 74         |
|    iterations           | 353        |
|    time_elapsed         | 1215       |
|    total_timesteps      | 90368      |
| train/                  |            |
|    approx_kl            | 0.08815889 |
|    clip_fraction        | 0.445      |
|    clip_range           | 0.2        |
|    entropy_loss         | 2.19       |
|    explained_variance   | 0.961      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.08      |
|    n_updates            | 7040       |
|    policy_gradient_loss | -0.0693    |
|    std                  | 0.14       |
|    value_loss           | 0.00133    |
----------------------------------------
Early stopping at step 7 due to reaching max kl: 0.08
-----------------------------------------
| time/                   |             |
|    fps                  | 7

Early stopping at step 8 due to reaching max kl: 0.08
----------------------------------------
| time/                   |            |
|    fps                  | 75         |
|    iterations           | 363        |
|    time_elapsed         | 1236       |
|    total_timesteps      | 92928      |
| train/                  |            |
|    approx_kl            | 0.08077454 |
|    clip_fraction        | 0.422      |
|    clip_range           | 0.2        |
|    entropy_loss         | 2.19       |
|    explained_variance   | 0.958      |
|    learning_rate        | 1e-05      |
|    loss                 | -0.0251    |
|    n_updates            | 7240       |
|    policy_gradient_loss | -0.066     |
|    std                  | 0.14       |
|    value_loss           | 0.00147    |
----------------------------------------
Early stopping at step 19 due to reaching max kl: 0.08
----------------------------------------
| time/                   |            |
|    fps                  | 75

Early stopping at step 5 due to reaching max kl: 0.08
--------------------------------------
| time/                   |          |
|    fps                  | 75       |
|    iterations           | 373      |
|    time_elapsed         | 1262     |
|    total_timesteps      | 95488    |
| train/                  |          |
|    approx_kl            | 0.079888 |
|    clip_fraction        | 0.33     |
|    clip_range           | 0.2      |
|    entropy_loss         | 2.2      |
|    explained_variance   | 0.962    |
|    learning_rate        | 1e-05    |
|    loss                 | -0.0782  |
|    n_updates            | 7440     |
|    policy_gradient_loss | -0.0518  |
|    std                  | 0.14     |
|    value_loss           | 0.00149  |
--------------------------------------
Early stopping at step 9 due to reaching max kl: 0.09
----------------------------------------
| time/                   |            |
|    fps                  | 75         |
|    iterations           | 

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/home/ad181/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3331, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-8-5cd9c39318b9>", line 45, in <module>
    model.learn(total_timesteps=300000, callback=callback)
  File "/home/ad181/anaconda3/lib/python3.7/site-packages/stable_baselines3/ppo/ppo.py", line 264, in learn
    reset_num_timesteps=reset_num_timesteps,
  File "/home/ad181/anaconda3/lib/python3.7/site-packages/stable_baselines3/common/on_policy_algorithm.py", line 222, in learn
    continue_training = self.collect_rollouts(self.env, callback, self.rollout_buffer, n_rollout_steps=self.n_steps)
  File "/home/ad181/anaconda3/lib/python3.7/site-packages/stable_baselines3/common/on_policy_algorithm.py", line 169, in collect_rollouts
    if callback.on_step() is False:
  File "/home/ad181/anaconda3/lib/python3.7/site-packages/stable_baselines3/common/callbacks.py", line 

KeyboardInterrupt: 