In [1]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import pandas as pd
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)
import feature_manager as fma
import rl.env_simple_crypto_trade as env
import importlib
import rl.models as rla
import config as cf
import tr_utils

In [2]:
importlib.reload(fma)

fm = fma.FeatureManager(target_col="trade_signal")
fm.import_trading_data(symbol="BTCUSDT",trade_timeframe="4h")
fm.import_macro_data(symbol="BTCUSDT",macro_timeframe="1d")
fm.import_super_data(symbol="BTCUSDT",super_timeframe="1w")

fm.build_features(
    lags = 1,
    macro_lags = 1,
    super_lags = 1,
    features = cf.BITCOIN_EXTERNAL_INDICATORS + cf.TRADING_TA_INDICATORS,
    macro_features = cf.MACRO_TA_INDICATORS,
    super_features = cf.SUPER_TA_INDICATORS,
)

Imported trading data from ../data/BTCUSDT-4h.csv with 11760 rows
Imported macro data from ../data/BTCUSDT-1d.csv with 1963 rows
Imported super macro data from ../data/BTCUSDT-1w.csv with 247 rows
Calculating external features ...
Calculating TA indicators of trade timeframe
Calculating TA indicators of macro timeframe
Calculating TA indicators of super timeframe
Calculating candlestick of trade timeframe
Calculating candlestick of macro timeframe
Calculating candlestick of super timeframe

Adding features for trade timeframe with lags 1: hashrate, fed_rate, gold, nasdaq, sp500, google_trend, sma_3_10, sma_7_30, sma_14_50, cci7, cci14, cci30, dx7, dx14, dx30, up_bb, low_bb, 

Adding features for macro timeframe with lags 1: sma_3_10, sma_7_30, sma_14_50, rsi7, rsi14, rsi30, cci7, cci14, cci30, dx7, dx14, dx30, up_bb, low_bb, 

Adding features for super timeframe with lags 1: sma_3_10, sma_7_30, sma_14_50, rsi7, rsi14, rsi30, cci7, cci14, cci30, dx7, dx14, dx30, up_bb, low_bb, 

Normali

In [3]:
train = fm.df.iloc[-4000:-2000]
trade = fm.df.iloc[-2000:]

In [4]:
importlib.reload(env)
importlib.reload(cf)

env_kwargs = cf.TRADE_ENV_PARAMETER
state_space = 10 + len(fm.cols)

train_env = env.CryptoTradingEnv(
    trade_timeframe="4h",
    df = train, 
    state_space=state_space,
    indicators=fm.cols,
    **env_kwargs)

trade_env = env.CryptoTradingEnv(
    trade_timeframe="4h",
    df = trade, 
    state_space=state_space,
    indicators=fm.cols,
    **env_kwargs)

full_env = env.CryptoTradingEnv(
    trade_timeframe="4h",
    df = fm.df, 
    state_space=state_space,
    indicators=fm.cols,
    **env_kwargs)

In [5]:
importlib.reload(rla)
agent = rla.DRLTradeAgent(env=full_env)

In [6]:
importlib.reload(rla)

# DQN_PARAMS = {
#     "learning_rate": 1e-4,
#     "buffer_size": 100_000,  
#     "learning_starts": 100_000,
#     "batch_size": 64,
#     "tau": 1.0,
#     "gamma": 0.9999,
#     "train_freq": 4,
#     "target_update_interval": 10000,
#     "exploration_fraction": 0.3,
#     "exploration_initial_eps": 1.0,
#     "exploration_final_eps": 0.05,
# }

# catalog_name = tr_utils.get_name_with_kwargs("dqn",DQN_PARAMS)

# dqn_model = agent.get_model(
#     model_name="dqn",
#     model_kwargs=DQN_PARAMS,
#     seed=100,
#     tensorboard_log=catalog_name
# )

PPO_PARAMS = {
    "n_steps": 8000,
    "ent_coef": 0.015,
    "learning_rate": 0.00025,
    "batch_size": 800
}

catalog_name = tr_utils.get_name_with_kwargs("no_candle",PPO_PARAMS)

ppo_model = agent.get_model(
    model_name="ppo",
    model_kwargs = PPO_PARAMS,
    tensorboard_log=catalog_name,
    seed = 100
)


Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [7]:
selected_model = ppo_model

selected_model = agent.train_model(
    model = selected_model,
    total_timesteps = 1_000_000,
    checkpoint = True,
    catalog_name = catalog_name,
    save_frequency = 20_000,
    progress_bar = True
)

Logging to ../logs/tensorboard_log/no_candlen_steps_8000ent_coef_0.015learning_rate_0.00025batch_size_800/PPO_0


Output()

-------------------------------------
| rollout/           |              |
|    ep_len_mean     | 7.68e+03     |
|    ep_rew_mean     | -1.94        |
| time/              |              |
|    fps             | 195          |
|    iterations      | 1            |
|    time_elapsed    | 40           |
|    total_timesteps | 8000         |
| train/             |              |
|    reward          | 0.0009696332 |
-------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 7.68e+03    |
|    ep_rew_mean          | -2.58       |
| time/                   |             |
|    fps                  | 210         |
|    iterations           | 2           |
|    time_elapsed         | 75          |
|    total_timesteps      | 16000       |
| train/                  |             |
|    approx_kl            | 0.007690509 |
|    clip_fraction        | 0.00667     |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.09       |
|    explained_variance   | -0.0135     |
|    learning_rate        | 0.00025     |
|    loss                 | -0.016      |
|    n_updates            | 10          |
|    policy_gradient_loss | -0.000693   |
|    reward               | -0.00020002 |
|    value_loss           | 0.00189     |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 7.68e+03     |
|    ep_rew_mean          | -2.41        |
| time/                   |              |
|    fps                  | 215          |
|    iterations           | 3            |
|    time_elapsed         | 111          |
|    total_timesteps      | 24000        |
| train/                  |              |
|    approx_kl            | 0.0030025195 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.09        |
|    explained_variance   | 0.00071      |
|    learning_rate        | 0.00025      |
|    loss                 | -0.0196      |
|    n_updates            | 20           |
|    policy_gradient_loss | -4.28e-05    |
|    reward               | 0.005037532  |
|    value_loss           | 0.00191      |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 7.68e+03    |
|    ep_rew_mean          | -2.28       |
| time/                   |             |
|    fps                  | 228         |
|    iterations           | 6           |
|    time_elapsed         | 210         |
|    total_timesteps      | 48000       |
| train/                  |             |
|    approx_kl            | 0.01153705  |
|    clip_fraction        | 0.0295      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.07       |
|    explained_variance   | 1.34e-05    |
|    learning_rate        | 0.00025     |
|    loss                 | -0.0222     |
|    n_updates            | 50          |
|    policy_gradient_loss | -0.00257    |
|    reward               | 0.014894627 |
|    value_loss           | 0.00163     |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 7.68e+03     |
|    ep_rew_mean          | -1.93        |
| time/                   |              |
|    fps                  | 230          |
|    iterations           | 7            |
|    time_elapsed         | 243          |
|    total_timesteps      | 56000        |
| train/                  |              |
|    approx_kl            | 0.0057363277 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.07        |
|    explained_variance   | -0.000103    |
|    learning_rate        | 0.00025      |
|    loss                 | -0.0137      |
|    n_updates            | 60           |
|    policy_gradient_loss | -4.7e-05     |
|    reward               | 0.0          |
|    value_loss           | 0.00196      |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 7.68e+03     |
|    ep_rew_mean          | -1.97        |
| time/                   |              |
|    fps                  | 229          |
|    iterations           | 8            |
|    time_elapsed         | 278          |
|    total_timesteps      | 64000        |
| train/                  |              |
|    approx_kl            | 0.011743849  |
|    clip_fraction        | 0.0089       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.06        |
|    explained_variance   | -0.000378    |
|    learning_rate        | 0.00025      |
|    loss                 | -0.0199      |
|    n_updates            | 70           |
|    policy_gradient_loss | -0.00116     |
|    reward               | 0.0047173207 |
|    value_loss           | 0.00185      |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 7.68e+03    |
|    ep_rew_mean          | -2.05       |
| time/                   |             |
|    fps                  | 231         |
|    iterations           | 9           |
|    time_elapsed         | 311         |
|    total_timesteps      | 72000       |
| train/                  |             |
|    approx_kl            | 0.010432763 |
|    clip_fraction        | 0.0154      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.06       |
|    explained_variance   | 0.00165     |
|    learning_rate        | 0.00025     |
|    loss                 | -0.0165     |
|    n_updates            | 80          |
|    policy_gradient_loss | -0.000998   |
|    reward               | 0.007419468 |
|    value_loss           | 0.00145     |
-----------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 7.68e+03      |
|    ep_rew_mean          | -2.03         |
| time/                   |               |
|    fps                  | 231           |
|    iterations           | 10            |
|    time_elapsed         | 345           |
|    total_timesteps      | 80000         |
| train/                  |               |
|    approx_kl            | 0.010684297   |
|    clip_fraction        | 0.0136        |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.08         |
|    explained_variance   | -0.00424      |
|    learning_rate        | 0.00025       |
|    loss                 | -0.00898      |
|    n_updates            | 90            |
|    policy_gradient_loss | -0.00108      |
|    reward               | 4.4705944e-06 |
|    value_loss           | 0.00225       |
-------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 7.68e+03    |
|    ep_rew_mean          | -2.08       |
| time/                   |             |
|    fps                  | 232         |
|    iterations           | 11          |
|    time_elapsed         | 379         |
|    total_timesteps      | 88000       |
| train/                  |             |
|    approx_kl            | 0.010626558 |
|    clip_fraction        | 0.0201      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.07       |
|    explained_variance   | 0.000301    |
|    learning_rate        | 0.00025     |
|    loss                 | -0.0154     |
|    n_updates            | 100         |
|    policy_gradient_loss | -0.00199    |
|    reward               | 0.0         |
|    value_loss           | 0.0014      |
-----------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 7.68e+03      |
|    ep_rew_mean          | -1.97         |
| time/                   |               |
|    fps                  | 232           |
|    iterations           | 12            |
|    time_elapsed         | 412           |
|    total_timesteps      | 96000         |
| train/                  |               |
|    approx_kl            | 0.004656234   |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.07         |
|    explained_variance   | -0.0022       |
|    learning_rate        | 0.00025       |
|    loss                 | -0.0172       |
|    n_updates            | 110           |
|    policy_gradient_loss | -0.000125     |
|    reward               | 0.00030010505 |
|    value_loss           | 0.00181       |
-------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 7.68e+03    |
|    ep_rew_mean          | -2          |
| time/                   |             |
|    fps                  | 232         |
|    iterations           | 13          |
|    time_elapsed         | 447         |
|    total_timesteps      | 104000      |
| train/                  |             |
|    approx_kl            | 0.015020452 |
|    clip_fraction        | 0.0413      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.02       |
|    explained_variance   | -0.000476   |
|    learning_rate        | 0.00025     |
|    loss                 | -0.017      |
|    n_updates            | 120         |
|    policy_gradient_loss | -0.00326    |
|    reward               | -0.04938215 |
|    value_loss           | 0.00148     |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 7.68e+03    |
|    ep_rew_mean          | -2.01       |
| time/                   |             |
|    fps                  | 233         |
|    iterations           | 14          |
|    time_elapsed         | 480         |
|    total_timesteps      | 112000      |
| train/                  |             |
|    approx_kl            | 0.007817244 |
|    clip_fraction        | 0.0252      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.975      |
|    explained_variance   | -8.68e-05   |
|    learning_rate        | 0.00025     |
|    loss                 | -0.0153     |
|    n_updates            | 130         |
|    policy_gradient_loss | -0.00218    |
|    reward               | 0.009119181 |
|    value_loss           | 0.00152     |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 7.68e+03     |
|    ep_rew_mean          | -1.99        |
| time/                   |              |
|    fps                  | 232          |
|    iterations           | 15           |
|    time_elapsed         | 516          |
|    total_timesteps      | 120000       |
| train/                  |              |
|    approx_kl            | 0.0044439803 |
|    clip_fraction        | 0.00717      |
|    clip_range           | 0.2          |
|    entropy_loss         | -1           |
|    explained_variance   | -4.27e-05    |
|    learning_rate        | 0.00025      |
|    loss                 | -0.0164      |
|    n_updates            | 140          |
|    policy_gradient_loss | 8.4e-05      |
|    reward               | 0.007830555  |
|    value_loss           | 0.00171      |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 7.68e+03     |
|    ep_rew_mean          | -2.08        |
| time/                   |              |
|    fps                  | 232          |
|    iterations           | 16           |
|    time_elapsed         | 550          |
|    total_timesteps      | 128000       |
| train/                  |              |
|    approx_kl            | 0.016790533  |
|    clip_fraction        | 0.0234       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.95        |
|    explained_variance   | -0.0005      |
|    learning_rate        | 0.00025      |
|    loss                 | -0.0156      |
|    n_updates            | 150          |
|    policy_gradient_loss | -0.00255     |
|    reward               | 0.0025426361 |
|    value_loss           | 0.00168      |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 7.68e+03     |
|    ep_rew_mean          | -2.08        |
| time/                   |              |
|    fps                  | 231          |
|    iterations           | 17           |
|    time_elapsed         | 586          |
|    total_timesteps      | 136000       |
| train/                  |              |
|    approx_kl            | 0.011947056  |
|    clip_fraction        | 0.0138       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.864       |
|    explained_variance   | -0.000469    |
|    learning_rate        | 0.00025      |
|    loss                 | -0.0104      |
|    n_updates            | 160          |
|    policy_gradient_loss | -0.00194     |
|    reward               | -0.017449537 |
|    value_loss           | 0.00183      |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 7.68e+03    |
|    ep_rew_mean          | -1.98       |
| time/                   |             |
|    fps                  | 230         |
|    iterations           | 18          |
|    time_elapsed         | 623         |
|    total_timesteps      | 144000      |
| train/                  |             |
|    approx_kl            | 0.008058956 |
|    clip_fraction        | 0.0226      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.758      |
|    explained_variance   | -3.25e-05   |
|    learning_rate        | 0.00025     |
|    loss                 | -0.0119     |
|    n_updates            | 170         |
|    policy_gradient_loss | -0.00247    |
|    reward               | 0.0         |
|    value_loss           | 0.00185     |
-----------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 7.68e+03      |
|    ep_rew_mean          | -1.97         |
| time/                   |               |
|    fps                  | 230           |
|    iterations           | 19            |
|    time_elapsed         | 658           |
|    total_timesteps      | 152000        |
| train/                  |               |
|    approx_kl            | 0.005272772   |
|    clip_fraction        | 0.0315        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.654        |
|    explained_variance   | -8.19e-05     |
|    learning_rate        | 0.00025       |
|    loss                 | -0.014        |
|    n_updates            | 180           |
|    policy_gradient_loss | -0.00358      |
|    reward               | -0.0033087323 |
|    value_loss           | 0.00191       |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 7.68e+03     |
|    ep_rew_mean          | -1.88        |
| time/                   |              |
|    fps                  | 231          |
|    iterations           | 20           |
|    time_elapsed         | 691          |
|    total_timesteps      | 160000       |
| train/                  |              |
|    approx_kl            | 0.0027942425 |
|    clip_fraction        | 0.026        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.6         |
|    explained_variance   | -0.000211    |
|    learning_rate        | 0.00025      |
|    loss                 | -0.0116      |
|    n_updates            | 190          |
|    policy_gradient_loss | -0.00275     |
|    reward               | 0.0          |
|    value_loss           | 0.00206      |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 7.68e+03    |
|    ep_rew_mean          | -1.82       |
| time/                   |             |
|    fps                  | 231         |
|    iterations           | 21          |
|    time_elapsed         | 726         |
|    total_timesteps      | 168000      |
| train/                  |             |
|    approx_kl            | 0.004902636 |
|    clip_fraction        | 0.0251      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.543      |
|    explained_variance   | -4.43e-05   |
|    learning_rate        | 0.00025     |
|    loss                 | -0.00742    |
|    n_updates            | 200         |
|    policy_gradient_loss | -0.00291    |
|    reward               | -0.00020002 |
|    value_loss           | 0.00186     |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 7.68e+03     |
|    ep_rew_mean          | -1.75        |
| time/                   |              |
|    fps                  | 231          |
|    iterations           | 22           |
|    time_elapsed         | 759          |
|    total_timesteps      | 176000       |
| train/                  |              |
|    approx_kl            | 0.0037108879 |
|    clip_fraction        | 0.0305       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.468       |
|    explained_variance   | -7.03e-05    |
|    learning_rate        | 0.00025      |
|    loss                 | -0.00878     |
|    n_updates            | 210          |
|    policy_gradient_loss | -0.00333     |
|    reward               | 0.0          |
|    value_loss           | 0.00218      |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 7.68e+03     |
|    ep_rew_mean          | -1.74        |
| time/                   |              |
|    fps                  | 232          |
|    iterations           | 23           |
|    time_elapsed         | 792          |
|    total_timesteps      | 184000       |
| train/                  |              |
|    approx_kl            | 0.0030467208 |
|    clip_fraction        | 0.0346       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.395       |
|    explained_variance   | -4.9e-05     |
|    learning_rate        | 0.00025      |
|    loss                 | -0.0116      |
|    n_updates            | 220          |
|    policy_gradient_loss | -0.00377     |
|    reward               | 0.0068303966 |
|    value_loss           | 0.00276      |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 7.68e+03     |
|    ep_rew_mean          | -1.51        |
| time/                   |              |
|    fps                  | 233          |
|    iterations           | 25           |
|    time_elapsed         | 857          |
|    total_timesteps      | 200000       |
| train/                  |              |
|    approx_kl            | 0.0016034447 |
|    clip_fraction        | 0.0107       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.29        |
|    explained_variance   | -0.000175    |
|    learning_rate        | 0.00025      |
|    loss                 | -0.00512     |
|    n_updates            | 240          |
|    policy_gradient_loss | -0.00169     |
|    reward               | 0.0          |
|    value_loss           | 0.00305      |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 7.68e+03     |
|    ep_rew_mean          | -1.47        |
| time/                   |              |
|    fps                  | 233          |
|    iterations           | 26           |
|    time_elapsed         | 890          |
|    total_timesteps      | 208000       |
| train/                  |              |
|    approx_kl            | 0.0014690564 |
|    clip_fraction        | 0.0232       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.241       |
|    explained_variance   | 0.00049      |
|    learning_rate        | 0.00025      |
|    loss                 | -0.00673     |
|    n_updates            | 250          |
|    policy_gradient_loss | -0.00291     |
|    reward               | 0.0          |
|    value_loss           | 0.00356      |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 7.68e+03     |
|    ep_rew_mean          | -1.38        |
| time/                   |              |
|    fps                  | 233          |
|    iterations           | 27           |
|    time_elapsed         | 923          |
|    total_timesteps      | 216000       |
| train/                  |              |
|    approx_kl            | 0.0012162414 |
|    clip_fraction        | 0.0153       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.207       |
|    explained_variance   | -9.81e-05    |
|    learning_rate        | 0.00025      |
|    loss                 | -0.00495     |
|    n_updates            | 260          |
|    policy_gradient_loss | -0.00216     |
|    reward               | 0.0          |
|    value_loss           | 0.00394      |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 7.68e+03     |
|    ep_rew_mean          | -1.34        |
| time/                   |              |
|    fps                  | 234          |
|    iterations           | 28           |
|    time_elapsed         | 956          |
|    total_timesteps      | 224000       |
| train/                  |              |
|    approx_kl            | 0.0009905486 |
|    clip_fraction        | 0.0106       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.175       |
|    explained_variance   | 1.19e-05     |
|    learning_rate        | 0.00025      |
|    loss                 | -0.00269     |
|    n_updates            | 270          |
|    policy_gradient_loss | -0.0017      |
|    reward               | 0.006711064  |
|    value_loss           | 0.00389      |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 7.68e+03     |
|    ep_rew_mean          | -1.27        |
| time/                   |              |
|    fps                  | 234          |
|    iterations           | 29           |
|    time_elapsed         | 989          |
|    total_timesteps      | 232000       |
| train/                  |              |
|    approx_kl            | 0.0007840477 |
|    clip_fraction        | 0.0155       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.144       |
|    explained_variance   | 8.24e-05     |
|    learning_rate        | 0.00025      |
|    loss                 | -0.00296     |
|    n_updates            | 280          |
|    policy_gradient_loss | -0.00268     |
|    reward               | 0.0          |
|    value_loss           | 0.00471      |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 7.68e+03     |
|    ep_rew_mean          | -1.2         |
| time/                   |              |
|    fps                  | 234          |
|    iterations           | 30           |
|    time_elapsed         | 1022         |
|    total_timesteps      | 240000       |
| train/                  |              |
|    approx_kl            | 0.0004350841 |
|    clip_fraction        | 0.00341      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.127       |
|    explained_variance   | -0.00032     |
|    learning_rate        | 0.00025      |
|    loss                 | -0.000435    |
|    n_updates            | 290          |
|    policy_gradient_loss | -0.000696    |
|    reward               | 0.0075383    |
|    value_loss           | 0.00546      |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 7.68e+03     |
|    ep_rew_mean          | -1.15        |
| time/                   |              |
|    fps                  | 235          |
|    iterations           | 31           |
|    time_elapsed         | 1053         |
|    total_timesteps      | 248000       |
| train/                  |              |
|    approx_kl            | 0.0004729047 |
|    clip_fraction        | 0.00055      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.111       |
|    explained_variance   | 6.55e-05     |
|    learning_rate        | 0.00025      |
|    loss                 | 0.00351      |
|    n_updates            | 300          |
|    policy_gradient_loss | -0.000303    |
|    reward               | 0.0          |
|    value_loss           | 0.00522      |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 7.68e+03      |
|    ep_rew_mean          | -1.11         |
| time/                   |               |
|    fps                  | 235           |
|    iterations           | 32            |
|    time_elapsed         | 1086          |
|    total_timesteps      | 256000        |
| train/                  |               |
|    approx_kl            | 0.00035663258 |
|    clip_fraction        | 0.00264       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.104        |
|    explained_variance   | -4.22e-05     |
|    learning_rate        | 0.00025       |
|    loss                 | 0.00315       |
|    n_updates            | 310           |
|    policy_gradient_loss | -0.000486     |
|    reward               | -0.012425786  |
|    value_loss           | 0.00605       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 7.68e+03      |
|    ep_rew_mean          | -1.08         |
| time/                   |               |
|    fps                  | 236           |
|    iterations           | 33            |
|    time_elapsed         | 1118          |
|    total_timesteps      | 264000        |
| train/                  |               |
|    approx_kl            | 0.00021256406 |
|    clip_fraction        | 0.000275      |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0965       |
|    explained_variance   | 6.38e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | 0.00175       |
|    n_updates            | 320           |
|    policy_gradient_loss | -0.000221     |
|    reward               | 0.0007145761  |
|    value_loss           | 0.00641       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 7.68e+03      |
|    ep_rew_mean          | -1.04         |
| time/                   |               |
|    fps                  | 236           |
|    iterations           | 34            |
|    time_elapsed         | 1151          |
|    total_timesteps      | 272000        |
| train/                  |               |
|    approx_kl            | 0.0001704834  |
|    clip_fraction        | 0.000375      |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0849       |
|    explained_variance   | -0.000136     |
|    learning_rate        | 0.00025       |
|    loss                 | 0.00127       |
|    n_updates            | 330           |
|    policy_gradient_loss | -0.000263     |
|    reward               | 0.00023116218 |
|    value_loss           | 0.00533       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 7.68e+03      |
|    ep_rew_mean          | -1.01         |
| time/                   |               |
|    fps                  | 236           |
|    iterations           | 35            |
|    time_elapsed         | 1183          |
|    total_timesteps      | 280000        |
| train/                  |               |
|    approx_kl            | 0.00017419543 |
|    clip_fraction        | 0.00272       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0881       |
|    explained_variance   | -0.000294     |
|    learning_rate        | 0.00025       |
|    loss                 | 0.0018        |
|    n_updates            | 340           |
|    policy_gradient_loss | -0.00021      |
|    reward               | 0.0           |
|    value_loss           | 0.00679       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 7.68e+03      |
|    ep_rew_mean          | -0.973        |
| time/                   |               |
|    fps                  | 236           |
|    iterations           | 36            |
|    time_elapsed         | 1216          |
|    total_timesteps      | 288000        |
| train/                  |               |
|    approx_kl            | 0.00039778213 |
|    clip_fraction        | 0.00152       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0786       |
|    explained_variance   | 8.82e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | 0.0014        |
|    n_updates            | 350           |
|    policy_gradient_loss | -0.000312     |
|    reward               | 0.0           |
|    value_loss           | 0.00543       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 7.68e+03      |
|    ep_rew_mean          | -0.943        |
| time/                   |               |
|    fps                  | 237           |
|    iterations           | 37            |
|    time_elapsed         | 1248          |
|    total_timesteps      | 296000        |
| train/                  |               |
|    approx_kl            | 0.00020897211 |
|    clip_fraction        | 0.00467       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.085        |
|    explained_variance   | 6.08e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | 0.00222       |
|    n_updates            | 360           |
|    policy_gradient_loss | -0.000543     |
|    reward               | -0.008080069  |
|    value_loss           | 0.00702       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 7.68e+03      |
|    ep_rew_mean          | -0.915        |
| time/                   |               |
|    fps                  | 237           |
|    iterations           | 38            |
|    time_elapsed         | 1282          |
|    total_timesteps      | 304000        |
| train/                  |               |
|    approx_kl            | 9.593819e-05  |
|    clip_fraction        | 0.00137       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.09         |
|    explained_variance   | -3.58e-06     |
|    learning_rate        | 0.00025       |
|    loss                 | 0.000867      |
|    n_updates            | 370           |
|    policy_gradient_loss | -6.01e-05     |
|    reward               | -0.0012786915 |
|    value_loss           | 0.00734       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 7.68e+03      |
|    ep_rew_mean          | -0.889        |
| time/                   |               |
|    fps                  | 237           |
|    iterations           | 39            |
|    time_elapsed         | 1315          |
|    total_timesteps      | 312000        |
| train/                  |               |
|    approx_kl            | 0.00023022122 |
|    clip_fraction        | 0.000362      |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0862       |
|    explained_variance   | 6.62e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | 0.00314       |
|    n_updates            | 380           |
|    policy_gradient_loss | -0.000208     |
|    reward               | -0.027387636  |
|    value_loss           | 0.00686       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 7.68e+03      |
|    ep_rew_mean          | -0.855        |
| time/                   |               |
|    fps                  | 237           |
|    iterations           | 40            |
|    time_elapsed         | 1347          |
|    total_timesteps      | 320000        |
| train/                  |               |
|    approx_kl            | 0.00022134357 |
|    clip_fraction        | 0.00545       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0976       |
|    explained_variance   | -5.25e-05     |
|    learning_rate        | 0.00025       |
|    loss                 | -3.96e-05     |
|    n_updates            | 390           |
|    policy_gradient_loss | -0.000573     |
|    reward               | 0.0           |
|    value_loss           | 0.00677       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 7.68e+03      |
|    ep_rew_mean          | -0.83         |
| time/                   |               |
|    fps                  | 237           |
|    iterations           | 41            |
|    time_elapsed         | 1380          |
|    total_timesteps      | 328000        |
| train/                  |               |
|    approx_kl            | 0.00017923489 |
|    clip_fraction        | 0.00045       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.098        |
|    explained_variance   | -4.27e-05     |
|    learning_rate        | 0.00025       |
|    loss                 | 0.00153       |
|    n_updates            | 400           |
|    policy_gradient_loss | -0.000126     |
|    reward               | -0.21759304   |
|    value_loss           | 0.00604       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 7.68e+03      |
|    ep_rew_mean          | -0.781        |
| time/                   |               |
|    fps                  | 237           |
|    iterations           | 43            |
|    time_elapsed         | 1446          |
|    total_timesteps      | 344000        |
| train/                  |               |
|    approx_kl            | 0.00011065826 |
|    clip_fraction        | 0.000525      |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0966       |
|    explained_variance   | -0.000432     |
|    learning_rate        | 0.00025       |
|    loss                 | 0.00408       |
|    n_updates            | 420           |
|    policy_gradient_loss | -1.34e-05     |
|    reward               | 0.0           |
|    value_loss           | 0.00643       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 7.68e+03      |
|    ep_rew_mean          | -0.75         |
| time/                   |               |
|    fps                  | 237           |
|    iterations           | 44            |
|    time_elapsed         | 1479          |
|    total_timesteps      | 352000        |
| train/                  |               |
|    approx_kl            | 0.00042633567 |
|    clip_fraction        | 0.0046        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0841       |
|    explained_variance   | 3.99e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | -0.00246      |
|    n_updates            | 430           |
|    policy_gradient_loss | -0.000909     |
|    reward               | 0.0           |
|    value_loss           | 0.00518       |
-------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 7.68e+03    |
|    ep_rew_mean          | -0.726      |
| time/                   |             |
|    fps                  | 238         |
|    iterations           | 45          |
|    time_elapsed         | 1512        |
|    total_timesteps      | 360000      |
| train/                  |             |
|    approx_kl            | 0.000198378 |
|    clip_fraction        | 0.00424     |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.0927     |
|    explained_variance   | 1.25e-06    |
|    learning_rate        | 0.00025     |
|    loss                 | 0.001       |
|    n_updates            | 440         |
|    policy_gradient_loss | -0.000391   |
|    reward               | 0.0         |
|    value_loss           | 0.00704     |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 7.68e+03    |
|    ep_rew_mean          | -0.696      |
| time/                   |             |
|    fps                  | 238         |
|    iterations           | 46          |
|    time_elapsed         | 1545        |
|    total_timesteps      | 368000      |
| train/                  |             |
|    approx_kl            | 0.000275406 |
|    clip_fraction        | 0.000425    |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.0847     |
|    explained_variance   | 4.17e-07    |
|    learning_rate        | 0.00025     |
|    loss                 | 0.00191     |
|    n_updates            | 450         |
|    policy_gradient_loss | -0.000224   |
|    reward               | 0.0         |
|    value_loss           | 0.00579     |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 7.68e+03     |
|    ep_rew_mean          | -0.68        |
| time/                   |              |
|    fps                  | 238          |
|    iterations           | 47           |
|    time_elapsed         | 1579         |
|    total_timesteps      | 376000       |
| train/                  |              |
|    approx_kl            | 0.0003472083 |
|    clip_fraction        | 0.00487      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.0833      |
|    explained_variance   | -6.91e-06    |
|    learning_rate        | 0.00025      |
|    loss                 | -0.000252    |
|    n_updates            | 460          |
|    policy_gradient_loss | -0.000859    |
|    reward               | 0.0          |
|    value_loss           | 0.00691      |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 7.68e+03     |
|    ep_rew_mean          | -0.641       |
| time/                   |              |
|    fps                  | 237          |
|    iterations           | 48           |
|    time_elapsed         | 1614         |
|    total_timesteps      | 384000       |
| train/                  |              |
|    approx_kl            | 0.0002798948 |
|    clip_fraction        | 0.00187      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.0785      |
|    explained_variance   | -3.1e-06     |
|    learning_rate        | 0.00025      |
|    loss                 | 0.00369      |
|    n_updates            | 470          |
|    policy_gradient_loss | -0.00036     |
|    reward               | 0.0010493664 |
|    value_loss           | 0.00728      |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 7.68e+03     |
|    ep_rew_mean          | -0.624       |
| time/                   |              |
|    fps                  | 237          |
|    iterations           | 49           |
|    time_elapsed         | 1648         |
|    total_timesteps      | 392000       |
| train/                  |              |
|    approx_kl            | 0.0002758227 |
|    clip_fraction        | 0.00456      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.0877      |
|    explained_variance   | -4.94e-05    |
|    learning_rate        | 0.00025      |
|    loss                 | 0.00114      |
|    n_updates            | 480          |
|    policy_gradient_loss | -0.00062     |
|    reward               | 0.0          |
|    value_loss           | 0.00486      |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 7.68e+03      |
|    ep_rew_mean          | -0.606        |
| time/                   |               |
|    fps                  | 237           |
|    iterations           | 50            |
|    time_elapsed         | 1682          |
|    total_timesteps      | 400000        |
| train/                  |               |
|    approx_kl            | 0.00020612453 |
|    clip_fraction        | 0.00554       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.101        |
|    explained_variance   | -0.000107     |
|    learning_rate        | 0.00025       |
|    loss                 | 0.000841      |
|    n_updates            | 490           |
|    policy_gradient_loss | -0.000571     |
|    reward               | -0.0027066825 |
|    value_loss           | 0.00725       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 7.68e+03      |
|    ep_rew_mean          | -0.588        |
| time/                   |               |
|    fps                  | 237           |
|    iterations           | 51            |
|    time_elapsed         | 1716          |
|    total_timesteps      | 408000        |
| train/                  |               |
|    approx_kl            | 0.00027073885 |
|    clip_fraction        | 0.0005        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.103        |
|    explained_variance   | -0.000391     |
|    learning_rate        | 0.00025       |
|    loss                 | 0.00296       |
|    n_updates            | 500           |
|    policy_gradient_loss | -0.000148     |
|    reward               | 0.0           |
|    value_loss           | 0.00514       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 7.68e+03      |
|    ep_rew_mean          | -0.567        |
| time/                   |               |
|    fps                  | 237           |
|    iterations           | 52            |
|    time_elapsed         | 1748          |
|    total_timesteps      | 416000        |
| train/                  |               |
|    approx_kl            | 0.00034978995 |
|    clip_fraction        | 0.00167       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.104        |
|    explained_variance   | 3.58e-07      |
|    learning_rate        | 0.00025       |
|    loss                 | -0.000375     |
|    n_updates            | 510           |
|    policy_gradient_loss | -0.000203     |
|    reward               | 0.0021990938  |
|    value_loss           | 0.00548       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 7.68e+03      |
|    ep_rew_mean          | -0.548        |
| time/                   |               |
|    fps                  | 238           |
|    iterations           | 53            |
|    time_elapsed         | 1781          |
|    total_timesteps      | 424000        |
| train/                  |               |
|    approx_kl            | 0.00037601957 |
|    clip_fraction        | 0.00137       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0906       |
|    explained_variance   | -0.000374     |
|    learning_rate        | 0.00025       |
|    loss                 | 0.00204       |
|    n_updates            | 520           |
|    policy_gradient_loss | -0.000331     |
|    reward               | 0.00019760216 |
|    value_loss           | 0.0062        |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 7.68e+03      |
|    ep_rew_mean          | -0.529        |
| time/                   |               |
|    fps                  | 238           |
|    iterations           | 54            |
|    time_elapsed         | 1814          |
|    total_timesteps      | 432000        |
| train/                  |               |
|    approx_kl            | 0.00021784028 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0859       |
|    explained_variance   | 0.000165      |
|    learning_rate        | 0.00025       |
|    loss                 | 0.00316       |
|    n_updates            | 530           |
|    policy_gradient_loss | -9.38e-05     |
|    reward               | -9.900882e-05 |
|    value_loss           | 0.00692       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 7.68e+03      |
|    ep_rew_mean          | -0.513        |
| time/                   |               |
|    fps                  | 238           |
|    iterations           | 55            |
|    time_elapsed         | 1847          |
|    total_timesteps      | 440000        |
| train/                  |               |
|    approx_kl            | 0.00018614931 |
|    clip_fraction        | 0.00425       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0936       |
|    explained_variance   | -0.000345     |
|    learning_rate        | 0.00025       |
|    loss                 | -9.48e-05     |
|    n_updates            | 540           |
|    policy_gradient_loss | -0.000454     |
|    reward               | 0.04200384    |
|    value_loss           | 0.00661       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 7.68e+03      |
|    ep_rew_mean          | -0.496        |
| time/                   |               |
|    fps                  | 237           |
|    iterations           | 56            |
|    time_elapsed         | 1882          |
|    total_timesteps      | 448000        |
| train/                  |               |
|    approx_kl            | 0.00024552766 |
|    clip_fraction        | 0.000925      |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.101        |
|    explained_variance   | 6.89e-05      |
|    learning_rate        | 0.00025       |
|    loss                 | 0.000702      |
|    n_updates            | 550           |
|    policy_gradient_loss | 2.32e-06      |
|    reward               | 0.019577555   |
|    value_loss           | 0.00673       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 7.68e+03      |
|    ep_rew_mean          | -0.481        |
| time/                   |               |
|    fps                  | 237           |
|    iterations           | 57            |
|    time_elapsed         | 1919          |
|    total_timesteps      | 456000        |
| train/                  |               |
|    approx_kl            | 0.00033125884 |
|    clip_fraction        | 0.00175       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0974       |
|    explained_variance   | -0.000391     |
|    learning_rate        | 0.00025       |
|    loss                 | 3.98e-05      |
|    n_updates            | 560           |
|    policy_gradient_loss | -0.000361     |
|    reward               | -0.003941576  |
|    value_loss           | 0.00583       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 7.68e+03      |
|    ep_rew_mean          | -0.464        |
| time/                   |               |
|    fps                  | 237           |
|    iterations           | 58            |
|    time_elapsed         | 1955          |
|    total_timesteps      | 464000        |
| train/                  |               |
|    approx_kl            | 0.00025080593 |
|    clip_fraction        | 0.00375       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0831       |
|    explained_variance   | 6.5e-05       |
|    learning_rate        | 0.00025       |
|    loss                 | 0.00309       |
|    n_updates            | 570           |
|    policy_gradient_loss | -0.000624     |
|    reward               | -0.002782821  |
|    value_loss           | 0.00645       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 7.68e+03      |
|    ep_rew_mean          | -0.448        |
| time/                   |               |
|    fps                  | 236           |
|    iterations           | 59            |
|    time_elapsed         | 1992          |
|    total_timesteps      | 472000        |
| train/                  |               |
|    approx_kl            | 0.00019527553 |
|    clip_fraction        | 0.00307       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0902       |
|    explained_variance   | 0.000104      |
|    learning_rate        | 0.00025       |
|    loss                 | -0.00173      |
|    n_updates            | 580           |
|    policy_gradient_loss | -0.00049      |
|    reward               | 0.0026956615  |
|    value_loss           | 0.00554       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 7.68e+03      |
|    ep_rew_mean          | -0.44         |
| time/                   |               |
|    fps                  | 236           |
|    iterations           | 60            |
|    time_elapsed         | 2027          |
|    total_timesteps      | 480000        |
| train/                  |               |
|    approx_kl            | 0.00021700599 |
|    clip_fraction        | 0.00352       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0915       |
|    explained_variance   | -0.000102     |
|    learning_rate        | 0.00025       |
|    loss                 | 0.000793      |
|    n_updates            | 590           |
|    policy_gradient_loss | -0.00048      |
|    reward               | -0.0059578805 |
|    value_loss           | 0.00737       |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 7.68e+03     |
|    ep_rew_mean          | -0.423       |
| time/                   |              |
|    fps                  | 236          |
|    iterations           | 61           |
|    time_elapsed         | 2064         |
|    total_timesteps      | 488000       |
| train/                  |              |
|    approx_kl            | 0.0002964771 |
|    clip_fraction        | 0.00537      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.102       |
|    explained_variance   | -1.31e-06    |
|    learning_rate        | 0.00025      |
|    loss                 | 0.000376     |
|    n_updates            | 600          |
|    policy_gradient_loss | -0.000466    |
|    reward               | -0.026524222 |
|    value_loss           | 0.00591      |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 7.68e+03      |
|    ep_rew_mean          | -0.405        |
| time/                   |               |
|    fps                  | 235           |
|    iterations           | 63            |
|    time_elapsed         | 2136          |
|    total_timesteps      | 504000        |
| train/                  |               |
|    approx_kl            | 0.00040546228 |
|    clip_fraction        | 0.00235       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.106        |
|    explained_variance   | 2.32e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | 0.00173       |
|    n_updates            | 620           |
|    policy_gradient_loss | -0.000562     |
|    reward               | 0.0437199     |
|    value_loss           | 0.00521       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 7.68e+03      |
|    ep_rew_mean          | -0.391        |
| time/                   |               |
|    fps                  | 235           |
|    iterations           | 64            |
|    time_elapsed         | 2172          |
|    total_timesteps      | 512000        |
| train/                  |               |
|    approx_kl            | 0.00038283278 |
|    clip_fraction        | 0.00256       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0983       |
|    explained_variance   | -1.41e-05     |
|    learning_rate        | 0.00025       |
|    loss                 | 0.000766      |
|    n_updates            | 630           |
|    policy_gradient_loss | -0.000427     |
|    reward               | -0.042425476  |
|    value_loss           | 0.00614       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 7.68e+03      |
|    ep_rew_mean          | -0.383        |
| time/                   |               |
|    fps                  | 235           |
|    iterations           | 65            |
|    time_elapsed         | 2208          |
|    total_timesteps      | 520000        |
| train/                  |               |
|    approx_kl            | 0.00015023706 |
|    clip_fraction        | 3.75e-05      |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0978       |
|    explained_variance   | -0.000189     |
|    learning_rate        | 0.00025       |
|    loss                 | 0.00189       |
|    n_updates            | 640           |
|    policy_gradient_loss | -2.87e-05     |
|    reward               | -0.0018020412 |
|    value_loss           | 0.00606       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 7.68e+03      |
|    ep_rew_mean          | -0.374        |
| time/                   |               |
|    fps                  | 235           |
|    iterations           | 66            |
|    time_elapsed         | 2242          |
|    total_timesteps      | 528000        |
| train/                  |               |
|    approx_kl            | 0.00013127952 |
|    clip_fraction        | 0.0033        |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.106        |
|    explained_variance   | -0.000152     |
|    learning_rate        | 0.00025       |
|    loss                 | 0.00214       |
|    n_updates            | 650           |
|    policy_gradient_loss | -0.000217     |
|    reward               | 0.0           |
|    value_loss           | 0.00689       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 7.68e+03      |
|    ep_rew_mean          | -0.365        |
| time/                   |               |
|    fps                  | 235           |
|    iterations           | 67            |
|    time_elapsed         | 2275          |
|    total_timesteps      | 536000        |
| train/                  |               |
|    approx_kl            | 0.00034522454 |
|    clip_fraction        | 0.00124       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.101        |
|    explained_variance   | -8.02e-05     |
|    learning_rate        | 0.00025       |
|    loss                 | 0.000865      |
|    n_updates            | 660           |
|    policy_gradient_loss | -0.000271     |
|    reward               | 0.0           |
|    value_loss           | 0.00565       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 7.68e+03      |
|    ep_rew_mean          | -0.348        |
| time/                   |               |
|    fps                  | 235           |
|    iterations           | 68            |
|    time_elapsed         | 2309          |
|    total_timesteps      | 544000        |
| train/                  |               |
|    approx_kl            | 0.00030729396 |
|    clip_fraction        | 0.00307       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.109        |
|    explained_variance   | -7.87e-06     |
|    learning_rate        | 0.00025       |
|    loss                 | 0.000965      |
|    n_updates            | 670           |
|    policy_gradient_loss | -8.58e-05     |
|    reward               | -0.008621193  |
|    value_loss           | 0.00561       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 7.68e+03      |
|    ep_rew_mean          | -0.328        |
| time/                   |               |
|    fps                  | 235           |
|    iterations           | 70            |
|    time_elapsed         | 2375          |
|    total_timesteps      | 560000        |
| train/                  |               |
|    approx_kl            | 0.00030541897 |
|    clip_fraction        | 0.00334       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0887       |
|    explained_variance   | 1.31e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | 0.00117       |
|    n_updates            | 690           |
|    policy_gradient_loss | -0.000554     |
|    reward               | -0.0032833703 |
|    value_loss           | 0.00605       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 7.68e+03      |
|    ep_rew_mean          | -0.317        |
| time/                   |               |
|    fps                  | 235           |
|    iterations           | 71            |
|    time_elapsed         | 2409          |
|    total_timesteps      | 568000        |
| train/                  |               |
|    approx_kl            | 0.00023701454 |
|    clip_fraction        | 0.00372       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0989       |
|    explained_variance   | 9.12e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | 0.000938      |
|    n_updates            | 700           |
|    policy_gradient_loss | -0.000203     |
|    reward               | 0.0           |
|    value_loss           | 0.00585       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 7.68e+03      |
|    ep_rew_mean          | -0.308        |
| time/                   |               |
|    fps                  | 235           |
|    iterations           | 72            |
|    time_elapsed         | 2441          |
|    total_timesteps      | 576000        |
| train/                  |               |
|    approx_kl            | 0.00028162388 |
|    clip_fraction        | 0.00211       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.101        |
|    explained_variance   | -1.38e-05     |
|    learning_rate        | 0.00025       |
|    loss                 | -0.0021       |
|    n_updates            | 710           |
|    policy_gradient_loss | -0.000262     |
|    reward               | 0.0           |
|    value_loss           | 0.00581       |
-------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 7.68e+03      |
|    ep_rew_mean          | -0.299        |
| time/                   |               |
|    fps                  | 235           |
|    iterations           | 73            |
|    time_elapsed         | 2475          |
|    total_timesteps      | 584000        |
| train/                  |               |
|    approx_kl            | 0.00017221985 |
|    clip_fraction        | 0.00167       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.102        |
|    explained_variance   | 1.79e-06      |
|    learning_rate        | 0.00025       |
|    loss                 | 0.000633      |
|    n_updates            | 720           |
|    policy_gradient_loss | -0.000369     |
|    reward               | 0.0           |
|    value_loss           | 0.00572       |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 7.68e+03     |
|    ep_rew_mean          | -0.289       |
| time/                   |              |
|    fps                  | 236          |
|    iterations           | 74           |
|    time_elapsed         | 2506         |
|    total_timesteps      | 592000       |
| train/                  |              |
|    approx_kl            | 0.0003995934 |
|    clip_fraction        | 0.00449      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.0907      |
|    explained_variance   | 4.72e-05     |
|    learning_rate        | 0.00025      |
|    loss                 | 0.000844     |
|    n_updates            | 730          |
|    policy_gradient_loss | -0.000801    |
|    reward               | -0.00020002  |
|    value_loss           | 0.00519      |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 7.68e+03      |
|    ep_rew_mean          | -0.276        |
| time/                   |               |
|    fps                  | 236           |
|    iterations           | 75            |
|    time_elapsed         | 2539          |
|    total_timesteps      | 600000        |
| train/                  |               |
|    approx_kl            | 0.00021159471 |
|    clip_fraction        | 0.00311       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0969       |
|    explained_variance   | -0.000334     |
|    learning_rate        | 0.00025       |
|    loss                 | 0.00124       |
|    n_updates            | 740           |
|    policy_gradient_loss | -0.000197     |
|    reward               | 0.031942457   |
|    value_loss           | 0.00657       |
-------------------------------------------


In [None]:
agent.predict(selected_model,full_env,render=True)

In [None]:
agent.plot_multiple()

In [None]:
state_df = pd.DataFrame(full_env.state_memory)

In [None]:
state_df

In [None]:
state_df.head(10)

In [None]:
alpha = full_env.df["Close"].pct_change()

In [None]:
alpha

In [None]:
full_env.df

In [None]:
state_df.head(30)