In [1]:
# to access functions from root directory
import sys
sys.path.append('/data/ad181/RemoteDir/multilevel_ppo')

In [2]:
%matplotlib notebook
import numpy as np
import time
import pickle
import os
import matplotlib.pyplot as plt
from copy import copy, deepcopy
from tqdm.notebook import trange, tqdm

import gym
from stable_baselines3.ppo import PPO, MlpPolicy
from stable_baselines3.ppo_multi_level import PPO_ML
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.vec_env.subproc_vec_multi_level_env import SubprocVecMultiLevelEnv
from stable_baselines3.common.envs.multi_level_ressim_env import MultiLevelRessimEnv
from stable_baselines3.common.logger import configure

from utils.custom_eval_callback import CustomEvalCallback, CustomEvalCallbackParallel
from utils.plot_functions import plot_learning
from utils.env_evaluate_functions import eval_actions

In [3]:
seed=1
case='mlmc_analysis'
data_dir='./data'
log_dir='./data/'+case

In [4]:
os.makedirs(data_dir, exist_ok=True)
os.makedirs(log_dir, exist_ok=True)

In [5]:
with open('../envs_params/env_data_v1/env_train_dict.pkl', 'rb') as input:
    env_ck_dict = pickle.load(input)

In [6]:
# generate dictionaries for env (env_dict_), n_steps (T_ml) and minibatch (M_ml) for `n_level`s
n_levels=2
fine_level = len(env_ck_dict)
env_dict_ = {}
for i,l in enumerate(range(fine_level-n_levels, fine_level)):
    print(i+1,'->',l+1)
    env_dict_[i+1] = env_ck_dict[l+1]

1 -> 3
2 -> 4


In [7]:
for seed in range(1,2):
    if True: 
        print(f'seed {seed}')
        log_dir = './data/'+case+'/seed_'+str(seed)
        os.makedirs(log_dir, exist_ok=True)
        T = {1:100, 2:100} # n_steps
        N = 50 # number of actors
        M = {1:500, 2:500} # minibatch size
        I = 1200 # number of iterations
        K = 20 # number of epochs
        
        log_interval = I/120
        
        fine_level = len(env_dict_)
        
        print('generate callback ...')
        eval_callback = CustomEvalCallback( env_dict_[fine_level], 
                                            best_model_save_path=None, 
                                            n_eval_episodes=1,
                                            log_path=str(log_dir)+'/results_eval', 
                                            eval_freq=log_interval*T[1]  )
        
        print('vectorize environment ...')
        
        # generate PPO_ML parameters for MLMC analysis. 
        # we choose same n_steps and batch_size values on levels because only fine level values are used in the analysis
        env_dict = {}
        n_steps_dict = {}
        batch_size_dict = {}
        for env, level in zip(env_dict_.values(), env_dict_.keys()):
            print(f"vectorize env level {level}")
            env_dict[level] = make_vec_env( MultiLevelRessimEnv, 
                                    n_envs=N, 
                                    seed=seed, 
                                    env_kwargs= {"ressim_params":env.ressim_params, "level":env.level}, 
                                    vec_env_cls=SubprocVecMultiLevelEnv )
            n_steps_dict[level] = T[level]
            batch_size_dict[level] = M[level]
        
        print(env_dict_[level].observation_space)
        print('model definition ..')
        model = PPO_ML(policy=MlpPolicy,
                           env=env_dict,
                           learning_rate = 1e-6,
                           n_steps = n_steps_dict,
                           batch_size = batch_size_dict,
                           n_epochs = K,
                           clip_range = 0.1,
                           ent_coef = 0.001,
                           vf_coef = 0.5,
                           policy_kwargs = dict(net_arch=[70,70,50], log_std_init=-2.9),
                           verbose = 1,
                           seed = seed,
                           target_kl = 0.05,
                           device = "auto")
        # set logger for the model
        new_logger = configure(log_dir)
        model.set_logger(new_logger)
        print('policy learning and analysis ..')
        e2 = [1e-2, 1e-3, 1e-4]
        analysis_interval = I/10
        model.mlmc_analysis(total_timesteps=N*T[1]*I, 
                            n_expt= 100000, 
                            eps_array=np.sqrt(e2),
                            log_interval=log_interval,
                            analysis_interval=analysis_interval, 
                            step_comp_time_dict={1:0.15,2:1.0})
        model.save(log_dir+'/PPO', exclude=['env_dict'])
        del model
        for level in env_dict.keys():
            env_dict[level].close()


seed 1
generate callback ...
vectorize environment ...
vectorize env level 1
vectorize env level 2
Box(-1.0, 1.0, (35,), float64)
model definition ..
Using cuda device
Logging to ./data/mlmc_analysis/seed_1
policy learning and analysis ..
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 5             |
|    ep_rew_mean          | 0.677         |
| time/                   |               |
|    fps                  | 58            |
|    iterations           | 10            |
|    time_elapsed         | 854           |
|    total_timesteps      | 50000         |
| train/                  |               |
|    approx_kl            | 1.4137933e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.1           |
|    entropy_loss         | 31.1          |
|    explained_variance   | -7.21         |
|    learning_rate        | 1e-06         |
|    loss                 | 0.244         |
|    n_update

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 5             |
|    ep_rew_mean          | 0.683         |
| time/                   |               |
|    fps                  | 61            |
|    iterations           | 100           |
|    time_elapsed         | 8157          |
|    total_timesteps      | 500000        |
| train/                  |               |
|    approx_kl            | 1.0966796e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.1           |
|    entropy_loss         | 31.1          |
|    explained_variance   | -3.36         |
|    learning_rate        | 1e-06         |
|    loss                 | 0.135         |
|    n_updates            | 1980          |
|    policy_gradient_loss | -0.000207     |
|    std                  | 0.055         |
|    value_loss           | 0.202         |
-------------------------------------------
--------------------------------

100%|██████████| 2000/2000 [32:48<00:00,  1.02it/s]

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 5             |
|    ep_rew_mean          | 0.682         |
| time/                   |               |
|    fps                  | 51            |
|    iterations           | 120           |
|    time_elapsed         | 11645         |
|    total_timesteps      | 600000        |
| train/                  |               |
|    approx_kl            | 1.3106818e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.1           |
|    entropy_loss         | 31.1          |
|    explained_variance   | -2.94         |
|    learning_rate        | 1e-06         |
|    loss                 | 0.115         |
|    n_updates            | 2380          |
|    policy_gradient_loss | -0.000255     |
|    std                  | 0.055         |
|    value_loss           | 0.183         |
-------------------------------------------





analysis of MLMC estimator for 100000 number of experimets...
------------------------------------------------------------------
| expt_results/           |                                      |
|    C_l                  | [0.15, 1.0]                          |
|    N                    | 100000                               |
|    P_l                  | [0.0614, 0.0611]                     |
|    V_l                  | [0.99, 0.98]                         |
| mc_results/             |                                      |
|    C_mc                 | 1                                    |
|    N_mc                 | [197, 1962, 19615]                   |
|    P_mc                 | [0.0246, 0.0873, 0.0529]             |
|    V_mc                 | 0.98                                 |
|    eps_mc               | [0.1        0.03162278 0.01      ]   |
| mlmc_results/           |                                      |
|    C_ml                 | [[0.15, 1.15], [0.15, 1.15], [0.1... |


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 5             |
|    ep_rew_mean          | 0.688         |
| time/                   |               |
|    fps                  | 55            |
|    iterations           | 200           |
|    time_elapsed         | 17887         |
|    total_timesteps      | 1000000       |
| train/                  |               |
|    approx_kl            | 1.0058953e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.1           |
|    entropy_loss         | 31.1          |
|    explained_variance   | -1.9          |
|    learning_rate        | 1e-06         |
|    loss                 | 0.101         |
|    n_updates            | 3980          |
|    policy_gradient_loss | -0.00022      |
|    std                  | 0.055         |
|    value_loss           | 0.135         |
-------------------------------------------
--------------------------------

100%|██████████| 2000/2000 [32:45<00:00,  1.02it/s]

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.679        |
| time/                   |              |
|    fps                  | 52           |
|    iterations           | 240          |
|    time_elapsed         | 22894        |
|    total_timesteps      | 1200000      |
| train/                  |              |
|    approx_kl            | 9.545524e-06 |
|    clip_fraction        | 0            |
|    clip_range           | 0.1          |
|    entropy_loss         | 31.1         |
|    explained_variance   | -1.51        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0834       |
|    n_updates            | 4780         |
|    policy_gradient_loss | -0.000179    |
|    std                  | 0.055        |
|    value_loss           | 0.115        |
------------------------------------------





analysis of MLMC estimator for 100000 number of experimets...
------------------------------------------------------------------
| expt_results/           |                                      |
|    C_l                  | [0.15, 1.0]                          |
|    N                    | 100000                               |
|    P_l                  | [0.0263, 0.0265]                     |
|    V_l                  | [1.0, 0.99]                          |
| mc_results/             |                                      |
|    C_mc                 | 1                                    |
|    N_mc                 | [199, 1984, 19840]                   |
|    P_mc                 | [0.0081, 0.0279, 0.0246]             |
|    V_mc                 | 0.99                                 |
|    eps_mc               | [0.1        0.03162278 0.01      ]   |
| mlmc_results/           |                                      |
|    C_ml                 | [[0.15, 1.15], [0.15, 1.15], [0.1... |


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 5             |
|    ep_rew_mean          | 0.681         |
| time/                   |               |
|    fps                  | 55            |
|    iterations           | 320           |
|    time_elapsed         | 29090         |
|    total_timesteps      | 1600000       |
| train/                  |               |
|    approx_kl            | 8.3924015e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.1           |
|    entropy_loss         | 31.1          |
|    explained_variance   | -0.998        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0775        |
|    n_updates            | 6380          |
|    policy_gradient_loss | -0.000171     |
|    std                  | 0.055         |
|    value_loss           | 0.0918        |
-------------------------------------------
--------------------------------

100%|██████████| 2000/2000 [32:43<00:00,  1.02it/s]

--------------------------------------------
| rollout/                |                |
|    ep_len_mean          | 5              |
|    ep_rew_mean          | 0.683          |
| time/                   |                |
|    fps                  | 52             |
|    iterations           | 360            |
|    time_elapsed         | 34075          |
|    total_timesteps      | 1800000        |
| train/                  |                |
|    approx_kl            | 1.05032705e-05 |
|    clip_fraction        | 0              |
|    clip_range           | 0.1            |
|    entropy_loss         | 31.1           |
|    explained_variance   | -0.724         |
|    learning_rate        | 1e-06          |
|    loss                 | 0.0693         |
|    n_updates            | 7180           |
|    policy_gradient_loss | -0.000252      |
|    std                  | 0.055          |
|    value_loss           | 0.0799         |
--------------------------------------------





analysis of MLMC estimator for 100000 number of experimets...
------------------------------------------------------------------
| expt_results/           |                                      |
|    C_l                  | [0.15, 1.0]                          |
|    N                    | 100000                               |
|    P_l                  | [0.0083, 0.0087]                     |
|    V_l                  | [1.0, 0.99]                          |
| mc_results/             |                                      |
|    C_mc                 | 1                                    |
|    N_mc                 | [199, 1981, 19810]                   |
|    P_mc                 | [0.034, 0.035, 0.0033]               |
|    V_mc                 | 0.99                                 |
|    eps_mc               | [0.1        0.03162278 0.01      ]   |
| mlmc_results/           |                                      |
|    C_ml                 | [[0.15, 1.15], [0.15, 1.15], [0.1... |


--------------------------------------------
| rollout/                |                |
|    ep_len_mean          | 5              |
|    ep_rew_mean          | 0.688          |
| time/                   |                |
|    fps                  | 54             |
|    iterations           | 440            |
|    time_elapsed         | 40210          |
|    total_timesteps      | 2200000        |
| train/                  |                |
|    approx_kl            | 1.43065035e-05 |
|    clip_fraction        | 0              |
|    clip_range           | 0.1            |
|    entropy_loss         | 31.1           |
|    explained_variance   | -0.416         |
|    learning_rate        | 1e-06          |
|    loss                 | 0.0621         |
|    n_updates            | 8780           |
|    policy_gradient_loss | -0.000302      |
|    std                  | 0.055          |
|    value_loss           | 0.0654         |
--------------------------------------------
----------

100%|██████████| 2000/2000 [32:40<00:00,  1.02it/s]

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 5             |
|    ep_rew_mean          | 0.682         |
| time/                   |               |
|    fps                  | 53            |
|    iterations           | 480           |
|    time_elapsed         | 45160         |
|    total_timesteps      | 2400000       |
| train/                  |               |
|    approx_kl            | 1.1297092e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.1           |
|    entropy_loss         | 31.1          |
|    explained_variance   | -0.294        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0627        |
|    n_updates            | 9580          |
|    policy_gradient_loss | -0.000182     |
|    std                  | 0.055         |
|    value_loss           | 0.06          |
-------------------------------------------





analysis of MLMC estimator for 100000 number of experimets...
------------------------------------------------------------------
| expt_results/           |                                      |
|    C_l                  | [0.15, 1.0]                          |
|    N                    | 100000                               |
|    P_l                  | [-0.0023, -0.0021]                   |
|    V_l                  | [0.99, 0.99]                         |
| mc_results/             |                                      |
|    C_mc                 | 1                                    |
|    N_mc                 | [199, 1986, 19853]                   |
|    P_mc                 | [0.0024, 0.0323, -0.0036]            |
|    V_mc                 | 0.99                                 |
|    eps_mc               | [0.1        0.03162278 0.01      ]   |
| mlmc_results/           |                                      |
|    C_ml                 | [[0.15, 1.15], [0.15, 1.15], [0.1... |


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.688        |
| time/                   |              |
|    fps                  | 54           |
|    iterations           | 560          |
|    time_elapsed         | 51283        |
|    total_timesteps      | 2800000      |
| train/                  |              |
|    approx_kl            | 1.507846e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.1          |
|    entropy_loss         | 31.1         |
|    explained_variance   | -0.053       |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0525       |
|    n_updates            | 11180        |
|    policy_gradient_loss | -0.000235    |
|    std                  | 0.055        |
|    value_loss           | 0.047        |
------------------------------------------
-------------------------------------------
| rollout/

100%|██████████| 2000/2000 [32:23<00:00,  1.03it/s]

--------------------------------------------
| rollout/                |                |
|    ep_len_mean          | 5              |
|    ep_rew_mean          | 0.693          |
| time/                   |                |
|    fps                  | 53             |
|    iterations           | 600            |
|    time_elapsed         | 56190          |
|    total_timesteps      | 3000000        |
| train/                  |                |
|    approx_kl            | 1.02295235e-05 |
|    clip_fraction        | 0              |
|    clip_range           | 0.1            |
|    entropy_loss         | 31.1           |
|    explained_variance   | 0.0688         |
|    learning_rate        | 1e-06          |
|    loss                 | 0.0509         |
|    n_updates            | 11980          |
|    policy_gradient_loss | -0.00022       |
|    std                  | 0.055          |
|    value_loss           | 0.0435         |
--------------------------------------------





analysis of MLMC estimator for 100000 number of experimets...
------------------------------------------------------------------
| expt_results/           |                                      |
|    C_l                  | [0.15, 1.0]                          |
|    N                    | 100000                               |
|    P_l                  | [-0.0093, -0.0093]                   |
|    V_l                  | [1.0, 1.0]                           |
| mc_results/             |                                      |
|    C_mc                 | 1                                    |
|    N_mc                 | [200, 1993, 19926]                   |
|    P_mc                 | [0.0643, -0.0146, -0.0111]           |
|    V_mc                 | 1                                    |
|    eps_mc               | [0.1        0.03162278 0.01      ]   |
| mlmc_results/           |                                      |
|    C_ml                 | [[0.15, 1.15], [0.15, 1.15], [0.1... |


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 0.695       |
| time/                   |             |
|    fps                  | 54          |
|    iterations           | 680         |
|    time_elapsed         | 62292       |
|    total_timesteps      | 3400000     |
| train/                  |             |
|    approx_kl            | 8.92884e-06 |
|    clip_fraction        | 0           |
|    clip_range           | 0.1         |
|    entropy_loss         | 31.1        |
|    explained_variance   | 0.184       |
|    learning_rate        | 1e-06       |
|    loss                 | 0.0493      |
|    n_updates            | 13580       |
|    policy_gradient_loss | -0.000223   |
|    std                  | 0.055       |
|    value_loss           | 0.0367      |
-----------------------------------------
-------------------------------------------
| rollout/                |     

100%|██████████| 2000/2000 [32:30<00:00,  1.03it/s]

--------------------------------------------
| rollout/                |                |
|    ep_len_mean          | 5              |
|    ep_rew_mean          | 0.689          |
| time/                   |                |
|    fps                  | 53             |
|    iterations           | 720            |
|    time_elapsed         | 67219          |
|    total_timesteps      | 3600000        |
| train/                  |                |
|    approx_kl            | 1.10167175e-05 |
|    clip_fraction        | 0              |
|    clip_range           | 0.1            |
|    entropy_loss         | 31.1           |
|    explained_variance   | 0.25           |
|    learning_rate        | 1e-06          |
|    loss                 | 0.048          |
|    n_updates            | 14380          |
|    policy_gradient_loss | -0.000221      |
|    std                  | 0.055          |
|    value_loss           | 0.034          |
--------------------------------------------





analysis of MLMC estimator for 100000 number of experimets...
------------------------------------------------------------------
| expt_results/           |                                      |
|    C_l                  | [0.15, 1.0]                          |
|    N                    | 100000                               |
|    P_l                  | [-0.0141, -0.0142]                   |
|    V_l                  | [1.0, 1.0]                           |
| mc_results/             |                                      |
|    C_mc                 | 1                                    |
|    N_mc                 | [200, 1995, 19948]                   |
|    P_mc                 | [0.0468, -0.0022, -0.0111]           |
|    V_mc                 | 1                                    |
|    eps_mc               | [0.1        0.03162278 0.01      ]   |
| mlmc_results/           |                                      |
|    C_ml                 | [[0.15, 1.15], [0.15, 1.15], [0.1... |


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 5             |
|    ep_rew_mean          | 0.694         |
| time/                   |               |
|    fps                  | 54            |
|    iterations           | 800           |
|    time_elapsed         | 73498         |
|    total_timesteps      | 4000000       |
| train/                  |               |
|    approx_kl            | 1.3518573e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.1           |
|    entropy_loss         | 31.1          |
|    explained_variance   | 0.35          |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0448        |
|    n_updates            | 15980         |
|    policy_gradient_loss | -0.000276     |
|    std                  | 0.055         |
|    value_loss           | 0.0296        |
-------------------------------------------
--------------------------------

100%|██████████| 2000/2000 [33:19<00:00,  1.00it/s]

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 5             |
|    ep_rew_mean          | 0.704         |
| time/                   |               |
|    fps                  | 53            |
|    iterations           | 840           |
|    time_elapsed         | 78588         |
|    total_timesteps      | 4200000       |
| train/                  |               |
|    approx_kl            | 1.2337596e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.1           |
|    entropy_loss         | 31.1          |
|    explained_variance   | 0.394         |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0445        |
|    n_updates            | 16780         |
|    policy_gradient_loss | -0.00024      |
|    std                  | 0.055         |
|    value_loss           | 0.0271        |
-------------------------------------------





analysis of MLMC estimator for 100000 number of experimets...
------------------------------------------------------------------
| expt_results/           |                                      |
|    C_l                  | [0.15, 1.0]                          |
|    N                    | 100000                               |
|    P_l                  | [-0.0172, -0.0174]                   |
|    V_l                  | [1.0, 1.0]                           |
| mc_results/             |                                      |
|    C_mc                 | 1                                    |
|    N_mc                 | [200, 1997, 19969]                   |
|    P_mc                 | [0.0085, -0.0339, -0.0158]           |
|    V_mc                 | 1                                    |
|    eps_mc               | [0.1        0.03162278 0.01      ]   |
| mlmc_results/           |                                      |
|    C_ml                 | [[0.15, 1.15], [0.15, 1.15], [0.1... |


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 5             |
|    ep_rew_mean          | 0.697         |
| time/                   |               |
|    fps                  | 54            |
|    iterations           | 920           |
|    time_elapsed         | 84963         |
|    total_timesteps      | 4600000       |
| train/                  |               |
|    approx_kl            | 1.1304061e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.1           |
|    entropy_loss         | 31.1          |
|    explained_variance   | 0.46          |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0425        |
|    n_updates            | 18380         |
|    policy_gradient_loss | -0.000184     |
|    std                  | 0.055         |
|    value_loss           | 0.0248        |
-------------------------------------------
--------------------------------

100%|██████████| 2000/2000 [32:44<00:00,  1.02it/s]

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.703        |
| time/                   |              |
|    fps                  | 53           |
|    iterations           | 960          |
|    time_elapsed         | 90030        |
|    total_timesteps      | 4800000      |
| train/                  |              |
|    approx_kl            | 1.731928e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.1          |
|    entropy_loss         | 31.1         |
|    explained_variance   | 0.516        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.041        |
|    n_updates            | 19180        |
|    policy_gradient_loss | -0.000207    |
|    std                  | 0.055        |
|    value_loss           | 0.0221       |
------------------------------------------





analysis of MLMC estimator for 100000 number of experimets...
------------------------------------------------------------------
| expt_results/           |                                      |
|    C_l                  | [0.15, 1.0]                          |
|    N                    | 100000                               |
|    P_l                  | [-0.0196, -0.0197]                   |
|    V_l                  | [1.0, 1.0]                           |
| mc_results/             |                                      |
|    C_mc                 | 1                                    |
|    N_mc                 | [200, 1999, 19989]                   |
|    P_mc                 | [-0.0667, 0.007, -0.016]             |
|    V_mc                 | 1                                    |
|    eps_mc               | [0.1        0.03162278 0.01      ]   |
| mlmc_results/           |                                      |
|    C_ml                 | [[0.15, 1.15], [0.15, 1.15], [0.1... |


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5            |
|    ep_rew_mean          | 0.709        |
| time/                   |              |
|    fps                  | 54           |
|    iterations           | 1040         |
|    time_elapsed         | 96238        |
|    total_timesteps      | 5200000      |
| train/                  |              |
|    approx_kl            | 1.583585e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.1          |
|    entropy_loss         | 31.1         |
|    explained_variance   | 0.54         |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0416       |
|    n_updates            | 20780        |
|    policy_gradient_loss | -0.000177    |
|    std                  | 0.055        |
|    value_loss           | 0.0218       |
------------------------------------------
-------------------------------------------
| rollout/

100%|██████████| 2000/2000 [32:57<00:00,  1.01it/s]

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 5             |
|    ep_rew_mean          | 0.71          |
| time/                   |               |
|    fps                  | 53            |
|    iterations           | 1080          |
|    time_elapsed         | 101235        |
|    total_timesteps      | 5400000       |
| train/                  |               |
|    approx_kl            | 2.2878063e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.1           |
|    entropy_loss         | 31.1          |
|    explained_variance   | 0.577         |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0396        |
|    n_updates            | 21580         |
|    policy_gradient_loss | -0.00032      |
|    std                  | 0.055         |
|    value_loss           | 0.0199        |
-------------------------------------------





analysis of MLMC estimator for 100000 number of experimets...
------------------------------------------------------------------
| expt_results/           |                                      |
|    C_l                  | [0.15, 1.0]                          |
|    N                    | 100000                               |
|    P_l                  | [-0.0211, -0.0213]                   |
|    V_l                  | [1.0, 1.0]                           |
| mc_results/             |                                      |
|    C_mc                 | 1                                    |
|    N_mc                 | [201, 2001, 20004]                   |
|    P_mc                 | [-0.0474, 0.0195, -0.0227]           |
|    V_mc                 | 1                                    |
|    eps_mc               | [0.1        0.03162278 0.01      ]   |
| mlmc_results/           |                                      |
|    C_ml                 | [[0.15, 1.15], [0.15, 1.15], [0.1... |


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 5             |
|    ep_rew_mean          | 0.71          |
| time/                   |               |
|    fps                  | 53            |
|    iterations           | 1160          |
|    time_elapsed         | 107434        |
|    total_timesteps      | 5800000       |
| train/                  |               |
|    approx_kl            | 1.7296908e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.1           |
|    entropy_loss         | 31.1          |
|    explained_variance   | 0.627         |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0394        |
|    n_updates            | 23180         |
|    policy_gradient_loss | -0.000265     |
|    std                  | 0.055         |
|    value_loss           | 0.0178        |
-------------------------------------------
--------------------------------

100%|██████████| 2000/2000 [33:07<00:00,  1.01it/s]

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 5             |
|    ep_rew_mean          | 0.714         |
| time/                   |               |
|    fps                  | 53            |
|    iterations           | 1200          |
|    time_elapsed         | 112474        |
|    total_timesteps      | 6000000       |
| train/                  |               |
|    approx_kl            | 2.3973094e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.1           |
|    entropy_loss         | 31.1          |
|    explained_variance   | 0.643         |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0387        |
|    n_updates            | 23980         |
|    policy_gradient_loss | -0.000289     |
|    std                  | 0.055         |
|    value_loss           | 0.0169        |
-------------------------------------------





analysis of MLMC estimator for 100000 number of experimets...


  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in

  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in range(len(p_1)-1):
  for j in

  for j in range(len(p_1)-1):
