# 1. Import Libaries

## 1A) Import Libaries

In [1]:
# Import the necessary libraries

# gymnasium is a modern version of the gym library used to create and interact with reinforcement learning environments
import gymnasium as gym

# Import PPO (Proximal Policy Optimization) from stable-baselines3, which is a popular RL algorithm
from stable_baselines3 import PPO

# Import the evaluation function to assess the performance of the trained policy
from stable_baselines3.common.evaluation import evaluate_policy

## 1B) Create Env and Test

In [None]:
# Create the BipedalWalker environment with human-rendering mode enabled
env = gym.make("BipedalWalker-v3", render_mode="human")

In [None]:
# Reset the environment (start a new episode) - without using seed or options
obs = env.reset()

# Let the agent take random actions for 1000 steps
for _ in range(1000):
    # Take a random action sampled from the environment's action space
    action = env.action_space.sample()
    
    # Step the environment forward using the chosen action
    # The environment returns the new observation (obs), the reward, 
    # whether the episode is done (done), if it was truncated (truncated), and additional info (info)
    obs, reward, done, truncated, info = env.step(action)
    
    # If the episode is finished (either done or truncated), reset the environment for a new episode
    if done or truncated:
        obs = env.reset()

# Close the environment when finished to clean up resources
env.close()

# 2) Train Model for Normal Version with PPO

## 2A) Train Model

In [None]:
env = gym.make("BipedalWalker-v3")

In [None]:
# Create the PPO model with a Multi-Layer Perceptron (MLP) policy
model = PPO("MlpPolicy", env, verbose=1)

In [None]:
model.learn(total_timesteps=1000000)

## 2B) Save Model

In [None]:
model.save("ppo_bipedalwalker_1M")

In [None]:
del model

## 2C) Evaluate Model

In [None]:
model = PPO.load("ppo_bipedalwalker")

In [None]:
env = gym.make("BipedalWalker-v3", render_mode="human")

In [None]:
# Modeli değerlendirin (örneğin, 10 bölüm boyunca)
mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=10)

print(f"Ortalama ödül: {mean_reward} ± {std_reward}")

# 3) Train Model for Hardcore Version with PPO

## 3A) Test Enviroment

In [None]:
env = gym.make("BipedalWalker-v3", hardcore=True, render_mode="human")

In [None]:
# Reset the environment (start a new episode) - without using seed or options
obs = env.reset()

# Let the agent take random actions for 1000 steps
for _ in range(1000):
    # Take a random action sampled from the environment's action space
    action = env.action_space.sample()
    
    # Step the environment forward using the chosen action
    # The environment returns the new observation (obs), the reward, 
    # whether the episode is done (done), if it was truncated (truncated), and additional info (info)
    obs, reward, done, truncated, info = env.step(action)
    
    # If the episode is finished (either done or truncated), reset the environment for a new episode
    if done or truncated:
        obs = env.reset()

# Close the environment when finished to clean up resources
env.close()

## 3B) Train Model

In [3]:
# Create the PPO model with a Multi-Layer Perceptron (MLP) policy
model = PPO("MlpPolicy", env, verbose=1)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [2]:
env = gym.make("BipedalWalker-v3", hardcore=True)

In [5]:
model.learn(total_timesteps=2000000)

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 362      |
|    ep_rew_mean     | -76.1    |
| time/              |          |
|    fps             | 4365     |
|    iterations      | 1        |
|    time_elapsed    | 0        |
|    total_timesteps | 2048     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 981         |
|    ep_rew_mean          | -64.1       |
| time/                   |             |
|    fps                  | 3558        |
|    iterations           | 2           |
|    time_elapsed         | 1           |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.023978543 |
|    clip_fraction        | 0.234       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.37        |
|    explained_variance   | 0.836       |
|    learning_rate        | 0.

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 973         |
|    ep_rew_mean          | -65.4       |
| time/                   |             |
|    fps                  | 3110        |
|    iterations           | 11          |
|    time_elapsed         | 7           |
|    total_timesteps      | 22528       |
| train/                  |             |
|    approx_kl            | 0.016823713 |
|    clip_fraction        | 0.169       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.389       |
|    explained_variance   | 0.289       |
|    learning_rate        | 0.0003      |
|    loss                 | 3.88        |
|    n_updates            | 4990        |
|    policy_gradient_loss | -0.0131     |
|    std                  | 0.236       |
|    value_loss           | 9.75        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.01e+03    |
|    ep_rew_mean          | -65.1       |
| time/                   |             |
|    fps                  | 3056        |
|    iterations           | 20          |
|    time_elapsed         | 13          |
|    total_timesteps      | 40960       |
| train/                  |             |
|    approx_kl            | 0.022074915 |
|    clip_fraction        | 0.248       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.455       |
|    explained_variance   | 0.951       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.317       |
|    n_updates            | 5080        |
|    policy_gradient_loss | -0.0125     |
|    std                  | 0.233       |
|    value_loss           | 1.7         |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.05e+03    |
|    ep_rew_mean          | -63.7       |
| time/                   |             |
|    fps                  | 3024        |
|    iterations           | 29          |
|    time_elapsed         | 19          |
|    total_timesteps      | 59392       |
| train/                  |             |
|    approx_kl            | 0.015401494 |
|    clip_fraction        | 0.183       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.569       |
|    explained_variance   | 0.706       |
|    learning_rate        | 0.0003      |
|    loss                 | 3.62        |
|    n_updates            | 5170        |
|    policy_gradient_loss | -0.00748    |
|    std                  | 0.23        |
|    value_loss           | 10.4        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 978         |
|    ep_rew_mean          | -61.3       |
| time/                   |             |
|    fps                  | 3010        |
|    iterations           | 38          |
|    time_elapsed         | 25          |
|    total_timesteps      | 77824       |
| train/                  |             |
|    approx_kl            | 0.015435314 |
|    clip_fraction        | 0.148       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.573       |
|    explained_variance   | 0.918       |
|    learning_rate        | 0.0003      |
|    loss                 | 6.48        |
|    n_updates            | 5260        |
|    policy_gradient_loss | -0.0127     |
|    std                  | 0.231       |
|    value_loss           | 18          |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 945         |
|    ep_rew_mean          | -61.1       |
| time/                   |             |
|    fps                  | 2999        |
|    iterations           | 47          |
|    time_elapsed         | 32          |
|    total_timesteps      | 96256       |
| train/                  |             |
|    approx_kl            | 0.018823965 |
|    clip_fraction        | 0.23        |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.643       |
|    explained_variance   | 0.913       |
|    learning_rate        | 0.0003      |
|    loss                 | 2.67        |
|    n_updates            | 5350        |
|    policy_gradient_loss | -0.0156     |
|    std                  | 0.227       |
|    value_loss           | 13          |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 946         |
|    ep_rew_mean          | -60         |
| time/                   |             |
|    fps                  | 2991        |
|    iterations           | 56          |
|    time_elapsed         | 38          |
|    total_timesteps      | 114688      |
| train/                  |             |
|    approx_kl            | 0.022764334 |
|    clip_fraction        | 0.295       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.782       |
|    explained_variance   | 0.536       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.126       |
|    n_updates            | 5440        |
|    policy_gradient_loss | -0.00564    |
|    std                  | 0.218       |
|    value_loss           | 0.793       |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 926         |
|    ep_rew_mean          | -57.9       |
| time/                   |             |
|    fps                  | 2984        |
|    iterations           | 65          |
|    time_elapsed         | 44          |
|    total_timesteps      | 133120      |
| train/                  |             |
|    approx_kl            | 0.021401584 |
|    clip_fraction        | 0.242       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.845       |
|    explained_variance   | 0.738       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.909       |
|    n_updates            | 5530        |
|    policy_gradient_loss | -0.0073     |
|    std                  | 0.213       |
|    value_loss           | 4.68        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 885         |
|    ep_rew_mean          | -60.9       |
| time/                   |             |
|    fps                  | 2980        |
|    iterations           | 74          |
|    time_elapsed         | 50          |
|    total_timesteps      | 151552      |
| train/                  |             |
|    approx_kl            | 0.032993257 |
|    clip_fraction        | 0.341       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.939       |
|    explained_variance   | 0.391       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.226       |
|    n_updates            | 5620        |
|    policy_gradient_loss | -0.00599    |
|    std                  | 0.207       |
|    value_loss           | 0.524       |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 908        |
|    ep_rew_mean          | -61.4      |
| time/                   |            |
|    fps                  | 2975       |
|    iterations           | 83         |
|    time_elapsed         | 57         |
|    total_timesteps      | 169984     |
| train/                  |            |
|    approx_kl            | 0.04244484 |
|    clip_fraction        | 0.293      |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.968      |
|    explained_variance   | 0.935      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.502      |
|    n_updates            | 5710       |
|    policy_gradient_loss | -0.00288   |
|    std                  | 0.207      |
|    value_loss           | 2.79       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 837        |
|    ep_rew_mean          | -66.3      |
| time/                   |            |
|    fps                  | 2973       |
|    iterations           | 92         |
|    time_elapsed         | 63         |
|    total_timesteps      | 188416     |
| train/                  |            |
|    approx_kl            | 0.04943934 |
|    clip_fraction        | 0.382      |
|    clip_range           | 0.2        |
|    entropy_loss         | 1.01       |
|    explained_variance   | 0.937      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.706      |
|    n_updates            | 5800       |
|    policy_gradient_loss | -0.00496   |
|    std                  | 0.205      |
|    value_loss           | 5.25       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 798        |
|    ep_rew_mean          | -68        |
| time/                   |            |
|    fps                  | 2969       |
|    iterations           | 101        |
|    time_elapsed         | 69         |
|    total_timesteps      | 206848     |
| train/                  |            |
|    approx_kl            | 0.03342785 |
|    clip_fraction        | 0.31       |
|    clip_range           | 0.2        |
|    entropy_loss         | 1.14       |
|    explained_variance   | 0.88       |
|    learning_rate        | 0.0003     |
|    loss                 | 0.348      |
|    n_updates            | 5890       |
|    policy_gradient_loss | -0.00938   |
|    std                  | 0.197      |
|    value_loss           | 4.74       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 851         |
|    ep_rew_mean          | -65.9       |
| time/                   |             |
|    fps                  | 2965        |
|    iterations           | 110         |
|    time_elapsed         | 75          |
|    total_timesteps      | 225280      |
| train/                  |             |
|    approx_kl            | 0.027191313 |
|    clip_fraction        | 0.267       |
|    clip_range           | 0.2         |
|    entropy_loss         | 1.21        |
|    explained_variance   | 0.842       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0325      |
|    n_updates            | 5980        |
|    policy_gradient_loss | -0.00533    |
|    std                  | 0.196       |
|    value_loss           | 0.166       |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 886         |
|    ep_rew_mean          | -62.2       |
| time/                   |             |
|    fps                  | 2962        |
|    iterations           | 119         |
|    time_elapsed         | 82          |
|    total_timesteps      | 243712      |
| train/                  |             |
|    approx_kl            | 0.030104186 |
|    clip_fraction        | 0.309       |
|    clip_range           | 0.2         |
|    entropy_loss         | 1.3         |
|    explained_variance   | 0.95        |
|    learning_rate        | 0.0003      |
|    loss                 | 0.17        |
|    n_updates            | 6070        |
|    policy_gradient_loss | -0.0122     |
|    std                  | 0.191       |
|    value_loss           | 0.431       |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 931         |
|    ep_rew_mean          | -61.3       |
| time/                   |             |
|    fps                  | 2958        |
|    iterations           | 128         |
|    time_elapsed         | 88          |
|    total_timesteps      | 262144      |
| train/                  |             |
|    approx_kl            | 0.034949142 |
|    clip_fraction        | 0.31        |
|    clip_range           | 0.2         |
|    entropy_loss         | 1.36        |
|    explained_variance   | 0.796       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.324       |
|    n_updates            | 6160        |
|    policy_gradient_loss | -0.00912    |
|    std                  | 0.188       |
|    value_loss           | 1.58        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 997         |
|    ep_rew_mean          | -56.3       |
| time/                   |             |
|    fps                  | 2956        |
|    iterations           | 137         |
|    time_elapsed         | 94          |
|    total_timesteps      | 280576      |
| train/                  |             |
|    approx_kl            | 0.026389075 |
|    clip_fraction        | 0.312       |
|    clip_range           | 0.2         |
|    entropy_loss         | 1.34        |
|    explained_variance   | -0.533      |
|    learning_rate        | 0.0003      |
|    loss                 | 0.487       |
|    n_updates            | 6250        |
|    policy_gradient_loss | 0.00179     |
|    std                  | 0.189       |
|    value_loss           | 1.14        |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.08e+03    |
|    ep_rew_mean          | -51.4       |
| time/                   |             |
|    fps                  | 2953        |
|    iterations           | 146         |
|    time_elapsed         | 101         |
|    total_timesteps      | 299008      |
| train/                  |             |
|    approx_kl            | 0.021581309 |
|    clip_fraction        | 0.223       |
|    clip_range           | 0.2         |
|    entropy_loss         | 1.48        |
|    explained_variance   | 0.87        |
|    learning_rate        | 0.0003      |
|    loss                 | 1.21        |
|    n_updates            | 6340        |
|    policy_gradient_loss | -0.0111     |
|    std                  | 0.183       |
|    value_loss           | 4.82        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.15e+03    |
|    ep_rew_mean          | -47         |
| time/                   |             |
|    fps                  | 2950        |
|    iterations           | 155         |
|    time_elapsed         | 107         |
|    total_timesteps      | 317440      |
| train/                  |             |
|    approx_kl            | 0.034281038 |
|    clip_fraction        | 0.319       |
|    clip_range           | 0.2         |
|    entropy_loss         | 1.5         |
|    explained_variance   | 0.84        |
|    learning_rate        | 0.0003      |
|    loss                 | 0.012       |
|    n_updates            | 6430        |
|    policy_gradient_loss | -0.00553    |
|    std                  | 0.181       |
|    value_loss           | 0.143       |
-----------------------------------------
---------------------------------------
| rollout/                |         

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.1e+03     |
|    ep_rew_mean          | -46.3       |
| time/                   |             |
|    fps                  | 2948        |
|    iterations           | 165         |
|    time_elapsed         | 114         |
|    total_timesteps      | 337920      |
| train/                  |             |
|    approx_kl            | 0.019579217 |
|    clip_fraction        | 0.206       |
|    clip_range           | 0.2         |
|    entropy_loss         | 1.54        |
|    explained_variance   | 0.923       |
|    learning_rate        | 0.0003      |
|    loss                 | 2.47        |
|    n_updates            | 6530        |
|    policy_gradient_loss | -0.0167     |
|    std                  | 0.181       |
|    value_loss           | 16.7        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.04e+03    |
|    ep_rew_mean          | -47.6       |
| time/                   |             |
|    fps                  | 2945        |
|    iterations           | 174         |
|    time_elapsed         | 120         |
|    total_timesteps      | 356352      |
| train/                  |             |
|    approx_kl            | 0.048175603 |
|    clip_fraction        | 0.273       |
|    clip_range           | 0.2         |
|    entropy_loss         | 1.47        |
|    explained_variance   | 0.945       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.229       |
|    n_updates            | 6620        |
|    policy_gradient_loss | -0.0118     |
|    std                  | 0.183       |
|    value_loss           | 3.02        |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 957         |
|    ep_rew_mean          | -50.9       |
| time/                   |             |
|    fps                  | 2943        |
|    iterations           | 184         |
|    time_elapsed         | 128         |
|    total_timesteps      | 376832      |
| train/                  |             |
|    approx_kl            | 0.022979029 |
|    clip_fraction        | 0.246       |
|    clip_range           | 0.2         |
|    entropy_loss         | 1.46        |
|    explained_variance   | 0.911       |
|    learning_rate        | 0.0003      |
|    loss                 | 6.24        |
|    n_updates            | 6720        |
|    policy_gradient_loss | -0.0113     |
|    std                  | 0.183       |
|    value_loss           | 13.2        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 765         |
|    ep_rew_mean          | -61.6       |
| time/                   |             |
|    fps                  | 2939        |
|    iterations           | 193         |
|    time_elapsed         | 134         |
|    total_timesteps      | 395264      |
| train/                  |             |
|    approx_kl            | 0.018503703 |
|    clip_fraction        | 0.23        |
|    clip_range           | 0.2         |
|    entropy_loss         | 1.51        |
|    explained_variance   | 0.876       |
|    learning_rate        | 0.0003      |
|    loss                 | 4.21        |
|    n_updates            | 6810        |
|    policy_gradient_loss | -0.0136     |
|    std                  | 0.182       |
|    value_loss           | 11.4        |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 647         |
|    ep_rew_mean          | -73.8       |
| time/                   |             |
|    fps                  | 2937        |
|    iterations           | 203         |
|    time_elapsed         | 141         |
|    total_timesteps      | 415744      |
| train/                  |             |
|    approx_kl            | 0.018187767 |
|    clip_fraction        | 0.211       |
|    clip_range           | 0.2         |
|    entropy_loss         | 1.51        |
|    explained_variance   | 0.92        |
|    learning_rate        | 0.0003      |
|    loss                 | 8.18        |
|    n_updates            | 6910        |
|    policy_gradient_loss | -0.0136     |
|    std                  | 0.181       |
|    value_loss           | 26.1        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 637       |
|    ep_rew_mean          | -76.8     |
| time/                   |           |
|    fps                  | 2936      |
|    iterations           | 212       |
|    time_elapsed         | 147       |
|    total_timesteps      | 434176    |
| train/                  |           |
|    approx_kl            | 0.0479966 |
|    clip_fraction        | 0.353     |
|    clip_range           | 0.2       |
|    entropy_loss         | 1.49      |
|    explained_variance   | 0.823     |
|    learning_rate        | 0.0003    |
|    loss                 | 0.165     |
|    n_updates            | 7000      |
|    policy_gradient_loss | -0.00135  |
|    std                  | 0.182     |
|    value_loss           | 0.615     |
---------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 633     

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 721         |
|    ep_rew_mean          | -76         |
| time/                   |             |
|    fps                  | 2934        |
|    iterations           | 221         |
|    time_elapsed         | 154         |
|    total_timesteps      | 452608      |
| train/                  |             |
|    approx_kl            | 0.031134639 |
|    clip_fraction        | 0.33        |
|    clip_range           | 0.2         |
|    entropy_loss         | 1.46        |
|    explained_variance   | 0.927       |
|    learning_rate        | 0.0003      |
|    loss                 | 4.05        |
|    n_updates            | 7090        |
|    policy_gradient_loss | -0.00528    |
|    std                  | 0.182       |
|    value_loss           | 6.27        |
-----------------------------------------
----------------------------------------
| rollout/                |        

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 701        |
|    ep_rew_mean          | -76.9      |
| time/                   |            |
|    fps                  | 2934       |
|    iterations           | 230        |
|    time_elapsed         | 160        |
|    total_timesteps      | 471040     |
| train/                  |            |
|    approx_kl            | 0.02145445 |
|    clip_fraction        | 0.178      |
|    clip_range           | 0.2        |
|    entropy_loss         | 1.5        |
|    explained_variance   | 0.951      |
|    learning_rate        | 0.0003     |
|    loss                 | 2.42       |
|    n_updates            | 7180       |
|    policy_gradient_loss | -0.00618   |
|    std                  | 0.18       |
|    value_loss           | 8.77       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 799         |
|    ep_rew_mean          | -71.3       |
| time/                   |             |
|    fps                  | 2932        |
|    iterations           | 239         |
|    time_elapsed         | 166         |
|    total_timesteps      | 489472      |
| train/                  |             |
|    approx_kl            | 0.029668389 |
|    clip_fraction        | 0.253       |
|    clip_range           | 0.2         |
|    entropy_loss         | 1.5         |
|    explained_variance   | 0.913       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.92        |
|    n_updates            | 7270        |
|    policy_gradient_loss | -0.00924    |
|    std                  | 0.181       |
|    value_loss           | 7.38        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 845          |
|    ep_rew_mean          | -68.1        |
| time/                   |              |
|    fps                  | 2931         |
|    iterations           | 248          |
|    time_elapsed         | 173          |
|    total_timesteps      | 507904       |
| train/                  |              |
|    approx_kl            | 0.0140973935 |
|    clip_fraction        | 0.164        |
|    clip_range           | 0.2          |
|    entropy_loss         | 1.64         |
|    explained_variance   | 0.888        |
|    learning_rate        | 0.0003       |
|    loss                 | 8.9          |
|    n_updates            | 7360         |
|    policy_gradient_loss | -0.0134      |
|    std                  | 0.174        |
|    value_loss           | 30.3         |
------------------------------------------
-----------------------------------------
| rollout/  

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 853        |
|    ep_rew_mean          | -65.3      |
| time/                   |            |
|    fps                  | 2928       |
|    iterations           | 257        |
|    time_elapsed         | 179        |
|    total_timesteps      | 526336     |
| train/                  |            |
|    approx_kl            | 0.03808733 |
|    clip_fraction        | 0.334      |
|    clip_range           | 0.2        |
|    entropy_loss         | 1.65       |
|    explained_variance   | 0.85       |
|    learning_rate        | 0.0003     |
|    loss                 | 0.147      |
|    n_updates            | 7450       |
|    policy_gradient_loss | 0.00401    |
|    std                  | 0.175      |
|    value_loss           | 0.41       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 853         |
|    ep_rew_mean          | -65.3       |
| time/                   |             |
|    fps                  | 2924        |
|    iterations           | 266         |
|    time_elapsed         | 186         |
|    total_timesteps      | 544768      |
| train/                  |             |
|    approx_kl            | 0.034386337 |
|    clip_fraction        | 0.274       |
|    clip_range           | 0.2         |
|    entropy_loss         | 1.7         |
|    explained_variance   | 0.963       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.147       |
|    n_updates            | 7540        |
|    policy_gradient_loss | -0.0112     |
|    std                  | 0.171       |
|    value_loss           | 2.21        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 828         |
|    ep_rew_mean          | -66         |
| time/                   |             |
|    fps                  | 2918        |
|    iterations           | 275         |
|    time_elapsed         | 192         |
|    total_timesteps      | 563200      |
| train/                  |             |
|    approx_kl            | 0.022948591 |
|    clip_fraction        | 0.176       |
|    clip_range           | 0.2         |
|    entropy_loss         | 1.78        |
|    explained_variance   | 0.919       |
|    learning_rate        | 0.0003      |
|    loss                 | 3.92        |
|    n_updates            | 7630        |
|    policy_gradient_loss | -0.0185     |
|    std                  | 0.169       |
|    value_loss           | 12.6        |
-----------------------------------------
------------------------------------------
| rollout/                |      

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 753         |
|    ep_rew_mean          | -68.6       |
| time/                   |             |
|    fps                  | 2913        |
|    iterations           | 284         |
|    time_elapsed         | 199         |
|    total_timesteps      | 581632      |
| train/                  |             |
|    approx_kl            | 0.026879106 |
|    clip_fraction        | 0.272       |
|    clip_range           | 0.2         |
|    entropy_loss         | 1.83        |
|    explained_variance   | 0.87        |
|    learning_rate        | 0.0003      |
|    loss                 | 2           |
|    n_updates            | 7720        |
|    policy_gradient_loss | -0.0107     |
|    std                  | 0.165       |
|    value_loss           | 12.6        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 668       |
|    ep_rew_mean          | -71.6     |
| time/                   |           |
|    fps                  | 2908      |
|    iterations           | 293       |
|    time_elapsed         | 206       |
|    total_timesteps      | 600064    |
| train/                  |           |
|    approx_kl            | 0.0412256 |
|    clip_fraction        | 0.299     |
|    clip_range           | 0.2       |
|    entropy_loss         | 1.79      |
|    explained_variance   | 0.919     |
|    learning_rate        | 0.0003    |
|    loss                 | 1.22      |
|    n_updates            | 7810      |
|    policy_gradient_loss | -0.0117   |
|    std                  | 0.169     |
|    value_loss           | 8.15      |
---------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 666     

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 600        |
|    ep_rew_mean          | -74.2      |
| time/                   |            |
|    fps                  | 2903       |
|    iterations           | 302        |
|    time_elapsed         | 213        |
|    total_timesteps      | 618496     |
| train/                  |            |
|    approx_kl            | 0.03506654 |
|    clip_fraction        | 0.332      |
|    clip_range           | 0.2        |
|    entropy_loss         | 1.82       |
|    explained_variance   | 0.962      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.499      |
|    n_updates            | 7900       |
|    policy_gradient_loss | -0.00752   |
|    std                  | 0.168      |
|    value_loss           | 1.85       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 664         |
|    ep_rew_mean          | -68.8       |
| time/                   |             |
|    fps                  | 2897        |
|    iterations           | 311         |
|    time_elapsed         | 219         |
|    total_timesteps      | 636928      |
| train/                  |             |
|    approx_kl            | 0.039018787 |
|    clip_fraction        | 0.384       |
|    clip_range           | 0.2         |
|    entropy_loss         | 1.78        |
|    explained_variance   | -0.802      |
|    learning_rate        | 0.0003      |
|    loss                 | 0.127       |
|    n_updates            | 7990        |
|    policy_gradient_loss | 0.000561    |
|    std                  | 0.169       |
|    value_loss           | 1.14        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 683         |
|    ep_rew_mean          | -66.2       |
| time/                   |             |
|    fps                  | 2891        |
|    iterations           | 320         |
|    time_elapsed         | 226         |
|    total_timesteps      | 655360      |
| train/                  |             |
|    approx_kl            | 0.028967343 |
|    clip_fraction        | 0.305       |
|    clip_range           | 0.2         |
|    entropy_loss         | 1.73        |
|    explained_variance   | 0.914       |
|    learning_rate        | 0.0003      |
|    loss                 | 4.58        |
|    n_updates            | 8080        |
|    policy_gradient_loss | -0.0069     |
|    std                  | 0.17        |
|    value_loss           | 5.87        |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 720         |
|    ep_rew_mean          | -63         |
| time/                   |             |
|    fps                  | 2884        |
|    iterations           | 329         |
|    time_elapsed         | 233         |
|    total_timesteps      | 673792      |
| train/                  |             |
|    approx_kl            | 0.027588423 |
|    clip_fraction        | 0.214       |
|    clip_range           | 0.2         |
|    entropy_loss         | 1.74        |
|    explained_variance   | 0.895       |
|    learning_rate        | 0.0003      |
|    loss                 | 7.42        |
|    n_updates            | 8170        |
|    policy_gradient_loss | -0.0157     |
|    std                  | 0.169       |
|    value_loss           | 17.6        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 776         |
|    ep_rew_mean          | -59.8       |
| time/                   |             |
|    fps                  | 2878        |
|    iterations           | 338         |
|    time_elapsed         | 240         |
|    total_timesteps      | 692224      |
| train/                  |             |
|    approx_kl            | 0.020295372 |
|    clip_fraction        | 0.183       |
|    clip_range           | 0.2         |
|    entropy_loss         | 1.79        |
|    explained_variance   | 0.845       |
|    learning_rate        | 0.0003      |
|    loss                 | 3.3         |
|    n_updates            | 8260        |
|    policy_gradient_loss | -0.0131     |
|    std                  | 0.166       |
|    value_loss           | 15.8        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 753         |
|    ep_rew_mean          | -59.8       |
| time/                   |             |
|    fps                  | 2871        |
|    iterations           | 347         |
|    time_elapsed         | 247         |
|    total_timesteps      | 710656      |
| train/                  |             |
|    approx_kl            | 0.034521643 |
|    clip_fraction        | 0.338       |
|    clip_range           | 0.2         |
|    entropy_loss         | 1.84        |
|    explained_variance   | 0.542       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.869       |
|    n_updates            | 8350        |
|    policy_gradient_loss | -0.00478    |
|    std                  | 0.164       |
|    value_loss           | 3.69        |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 792         |
|    ep_rew_mean          | -58.9       |
| time/                   |             |
|    fps                  | 2865        |
|    iterations           | 357         |
|    time_elapsed         | 255         |
|    total_timesteps      | 731136      |
| train/                  |             |
|    approx_kl            | 0.036403082 |
|    clip_fraction        | 0.383       |
|    clip_range           | 0.2         |
|    entropy_loss         | 1.86        |
|    explained_variance   | 0.694       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.107       |
|    n_updates            | 8450        |
|    policy_gradient_loss | -0.00122    |
|    std                  | 0.162       |
|    value_loss           | 0.352       |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 829         |
|    ep_rew_mean          | -57         |
| time/                   |             |
|    fps                  | 2858        |
|    iterations           | 366         |
|    time_elapsed         | 262         |
|    total_timesteps      | 749568      |
| train/                  |             |
|    approx_kl            | 0.027148178 |
|    clip_fraction        | 0.246       |
|    clip_range           | 0.2         |
|    entropy_loss         | 1.94        |
|    explained_variance   | 0.91        |
|    learning_rate        | 0.0003      |
|    loss                 | 6.4         |
|    n_updates            | 8540        |
|    policy_gradient_loss | -0.00334    |
|    std                  | 0.159       |
|    value_loss           | 6.85        |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 877         |
|    ep_rew_mean          | -54.4       |
| time/                   |             |
|    fps                  | 2853        |
|    iterations           | 375         |
|    time_elapsed         | 269         |
|    total_timesteps      | 768000      |
| train/                  |             |
|    approx_kl            | 0.032122582 |
|    clip_fraction        | 0.263       |
|    clip_range           | 0.2         |
|    entropy_loss         | 1.95        |
|    explained_variance   | 0.914       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.482       |
|    n_updates            | 8630        |
|    policy_gradient_loss | -0.00848    |
|    std                  | 0.159       |
|    value_loss           | 1.71        |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 892         |
|    ep_rew_mean          | -52         |
| time/                   |             |
|    fps                  | 2849        |
|    iterations           | 384         |
|    time_elapsed         | 275         |
|    total_timesteps      | 786432      |
| train/                  |             |
|    approx_kl            | 0.022318436 |
|    clip_fraction        | 0.206       |
|    clip_range           | 0.2         |
|    entropy_loss         | 2.04        |
|    explained_variance   | 0.812       |
|    learning_rate        | 0.0003      |
|    loss                 | 12          |
|    n_updates            | 8720        |
|    policy_gradient_loss | -0.015      |
|    std                  | 0.155       |
|    value_loss           | 15.9        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 950         |
|    ep_rew_mean          | -50.4       |
| time/                   |             |
|    fps                  | 2846        |
|    iterations           | 393         |
|    time_elapsed         | 282         |
|    total_timesteps      | 804864      |
| train/                  |             |
|    approx_kl            | 0.025743674 |
|    clip_fraction        | 0.269       |
|    clip_range           | 0.2         |
|    entropy_loss         | 2.07        |
|    explained_variance   | 0.844       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.73        |
|    n_updates            | 8810        |
|    policy_gradient_loss | -0.00556    |
|    std                  | 0.155       |
|    value_loss           | 3.15        |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 869         |
|    ep_rew_mean          | -59.3       |
| time/                   |             |
|    fps                  | 2842        |
|    iterations           | 402         |
|    time_elapsed         | 289         |
|    total_timesteps      | 823296      |
| train/                  |             |
|    approx_kl            | 0.020995598 |
|    clip_fraction        | 0.233       |
|    clip_range           | 0.2         |
|    entropy_loss         | 2.03        |
|    explained_variance   | 0.92        |
|    learning_rate        | 0.0003      |
|    loss                 | 6.39        |
|    n_updates            | 8900        |
|    policy_gradient_loss | -0.0134     |
|    std                  | 0.156       |
|    value_loss           | 21.2        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 892         |
|    ep_rew_mean          | -60.5       |
| time/                   |             |
|    fps                  | 2839        |
|    iterations           | 411         |
|    time_elapsed         | 296         |
|    total_timesteps      | 841728      |
| train/                  |             |
|    approx_kl            | 0.037996814 |
|    clip_fraction        | 0.363       |
|    clip_range           | 0.2         |
|    entropy_loss         | 2.07        |
|    explained_variance   | 0.824       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.402       |
|    n_updates            | 8990        |
|    policy_gradient_loss | -0.00207    |
|    std                  | 0.153       |
|    value_loss           | 2.24        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 866        |
|    ep_rew_mean          | -62.7      |
| time/                   |            |
|    fps                  | 2836       |
|    iterations           | 420        |
|    time_elapsed         | 303        |
|    total_timesteps      | 860160     |
| train/                  |            |
|    approx_kl            | 0.03623031 |
|    clip_fraction        | 0.363      |
|    clip_range           | 0.2        |
|    entropy_loss         | 2.09       |
|    explained_variance   | 0.646      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.0329     |
|    n_updates            | 9080       |
|    policy_gradient_loss | -0.0135    |
|    std                  | 0.153      |
|    value_loss           | 0.243      |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 918         |
|    ep_rew_mean          | -63.1       |
| time/                   |             |
|    fps                  | 2833        |
|    iterations           | 429         |
|    time_elapsed         | 310         |
|    total_timesteps      | 878592      |
| train/                  |             |
|    approx_kl            | 0.034900796 |
|    clip_fraction        | 0.272       |
|    clip_range           | 0.2         |
|    entropy_loss         | 2.09        |
|    explained_variance   | 0.929       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.643       |
|    n_updates            | 9170        |
|    policy_gradient_loss | 0.000621    |
|    std                  | 0.155       |
|    value_loss           | 6.13        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 947        |
|    ep_rew_mean          | -59.8      |
| time/                   |            |
|    fps                  | 2830       |
|    iterations           | 438        |
|    time_elapsed         | 316        |
|    total_timesteps      | 897024     |
| train/                  |            |
|    approx_kl            | 0.02221847 |
|    clip_fraction        | 0.196      |
|    clip_range           | 0.2        |
|    entropy_loss         | 2.21       |
|    explained_variance   | 0.857      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.725      |
|    n_updates            | 9260       |
|    policy_gradient_loss | -0.0179    |
|    std                  | 0.149      |
|    value_loss           | 4.05       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 959       |
|    ep_rew_mean          | -57.9     |
| time/                   |           |
|    fps                  | 2827      |
|    iterations           | 447       |
|    time_elapsed         | 323       |
|    total_timesteps      | 915456    |
| train/                  |           |
|    approx_kl            | 0.0361191 |
|    clip_fraction        | 0.324     |
|    clip_range           | 0.2       |
|    entropy_loss         | 2.25      |
|    explained_variance   | 0.935     |
|    learning_rate        | 0.0003    |
|    loss                 | 0.954     |
|    n_updates            | 9350      |
|    policy_gradient_loss | -0.0114   |
|    std                  | 0.148     |
|    value_loss           | 2.9       |
---------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 976       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 933        |
|    ep_rew_mean          | -58.8      |
| time/                   |            |
|    fps                  | 2825       |
|    iterations           | 456        |
|    time_elapsed         | 330        |
|    total_timesteps      | 933888     |
| train/                  |            |
|    approx_kl            | 0.03732694 |
|    clip_fraction        | 0.278      |
|    clip_range           | 0.2        |
|    entropy_loss         | 2.21       |
|    explained_variance   | 0.502      |
|    learning_rate        | 0.0003     |
|    loss                 | 20.3       |
|    n_updates            | 9440       |
|    policy_gradient_loss | -0.0163    |
|    std                  | 0.148      |
|    value_loss           | 26.6       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 882        |
|    ep_rew_mean          | -61.4      |
| time/                   |            |
|    fps                  | 2822       |
|    iterations           | 465        |
|    time_elapsed         | 337        |
|    total_timesteps      | 952320     |
| train/                  |            |
|    approx_kl            | 0.03424764 |
|    clip_fraction        | 0.284      |
|    clip_range           | 0.2        |
|    entropy_loss         | 2.17       |
|    explained_variance   | 0.815      |
|    learning_rate        | 0.0003     |
|    loss                 | 2.06       |
|    n_updates            | 9530       |
|    policy_gradient_loss | -0.0128    |
|    std                  | 0.15       |
|    value_loss           | 10.1       |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 840        |
|    ep_rew_mean          | -63.1      |
| time/                   |            |
|    fps                  | 2819       |
|    iterations           | 474        |
|    time_elapsed         | 344        |
|    total_timesteps      | 970752     |
| train/                  |            |
|    approx_kl            | 0.05485917 |
|    clip_fraction        | 0.329      |
|    clip_range           | 0.2        |
|    entropy_loss         | 2.23       |
|    explained_variance   | 0.943      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.371      |
|    n_updates            | 9620       |
|    policy_gradient_loss | -0.0139    |
|    std                  | 0.146      |
|    value_loss           | 2.71       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 786         |
|    ep_rew_mean          | -66.5       |
| time/                   |             |
|    fps                  | 2817        |
|    iterations           | 483         |
|    time_elapsed         | 351         |
|    total_timesteps      | 989184      |
| train/                  |             |
|    approx_kl            | 0.021455636 |
|    clip_fraction        | 0.193       |
|    clip_range           | 0.2         |
|    entropy_loss         | 2.33        |
|    explained_variance   | 0.924       |
|    learning_rate        | 0.0003      |
|    loss                 | 5.75        |
|    n_updates            | 9710        |
|    policy_gradient_loss | -0.0196     |
|    std                  | 0.143       |
|    value_loss           | 18          |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 797         |
|    ep_rew_mean          | -65.8       |
| time/                   |             |
|    fps                  | 2815        |
|    iterations           | 492         |
|    time_elapsed         | 357         |
|    total_timesteps      | 1007616     |
| train/                  |             |
|    approx_kl            | 0.021693066 |
|    clip_fraction        | 0.302       |
|    clip_range           | 0.2         |
|    entropy_loss         | 2.32        |
|    explained_variance   | 0.594       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0318     |
|    n_updates            | 9800        |
|    policy_gradient_loss | 0.00217     |
|    std                  | 0.142       |
|    value_loss           | 0.0173      |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 796        |
|    ep_rew_mean          | -65.9      |
| time/                   |            |
|    fps                  | 2813       |
|    iterations           | 501        |
|    time_elapsed         | 364        |
|    total_timesteps      | 1026048    |
| train/                  |            |
|    approx_kl            | 0.03593486 |
|    clip_fraction        | 0.336      |
|    clip_range           | 0.2        |
|    entropy_loss         | 2.35       |
|    explained_variance   | 0.951      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.626      |
|    n_updates            | 9890       |
|    policy_gradient_loss | -0.00683   |
|    std                  | 0.142      |
|    value_loss           | 1.76       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 850         |
|    ep_rew_mean          | -64.3       |
| time/                   |             |
|    fps                  | 2810        |
|    iterations           | 510         |
|    time_elapsed         | 371         |
|    total_timesteps      | 1044480     |
| train/                  |             |
|    approx_kl            | 0.040167466 |
|    clip_fraction        | 0.317       |
|    clip_range           | 0.2         |
|    entropy_loss         | 2.4         |
|    explained_variance   | 0.941       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.38        |
|    n_updates            | 9980        |
|    policy_gradient_loss | -0.011      |
|    std                  | 0.14        |
|    value_loss           | 3.43        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 894         |
|    ep_rew_mean          | -63.7       |
| time/                   |             |
|    fps                  | 2809        |
|    iterations           | 519         |
|    time_elapsed         | 378         |
|    total_timesteps      | 1062912     |
| train/                  |             |
|    approx_kl            | 0.035955712 |
|    clip_fraction        | 0.215       |
|    clip_range           | 0.2         |
|    entropy_loss         | 2.51        |
|    explained_variance   | 0.967       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.56        |
|    n_updates            | 10070       |
|    policy_gradient_loss | -0.0153     |
|    std                  | 0.135       |
|    value_loss           | 2.07        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 925         |
|    ep_rew_mean          | -61.2       |
| time/                   |             |
|    fps                  | 2807        |
|    iterations           | 528         |
|    time_elapsed         | 385         |
|    total_timesteps      | 1081344     |
| train/                  |             |
|    approx_kl            | 0.034375146 |
|    clip_fraction        | 0.374       |
|    clip_range           | 0.2         |
|    entropy_loss         | 2.57        |
|    explained_variance   | 0.799       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.416       |
|    n_updates            | 10160       |
|    policy_gradient_loss | -0.01       |
|    std                  | 0.134       |
|    value_loss           | 2.1         |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 930         |
|    ep_rew_mean          | -60.5       |
| time/                   |             |
|    fps                  | 2806        |
|    iterations           | 537         |
|    time_elapsed         | 391         |
|    total_timesteps      | 1099776     |
| train/                  |             |
|    approx_kl            | 0.019544505 |
|    clip_fraction        | 0.229       |
|    clip_range           | 0.2         |
|    entropy_loss         | 2.57        |
|    explained_variance   | 0.944       |
|    learning_rate        | 0.0003      |
|    loss                 | 2.73        |
|    n_updates            | 10250       |
|    policy_gradient_loss | -0.0118     |
|    std                  | 0.134       |
|    value_loss           | 13.1        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 906         |
|    ep_rew_mean          | -61.2       |
| time/                   |             |
|    fps                  | 2805        |
|    iterations           | 546         |
|    time_elapsed         | 398         |
|    total_timesteps      | 1118208     |
| train/                  |             |
|    approx_kl            | 0.030289358 |
|    clip_fraction        | 0.315       |
|    clip_range           | 0.2         |
|    entropy_loss         | 2.5         |
|    explained_variance   | 0.971       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.39        |
|    n_updates            | 10340       |
|    policy_gradient_loss | 0.00228     |
|    std                  | 0.136       |
|    value_loss           | 1.64        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 877         |
|    ep_rew_mean          | -61.6       |
| time/                   |             |
|    fps                  | 2804        |
|    iterations           | 555         |
|    time_elapsed         | 405         |
|    total_timesteps      | 1136640     |
| train/                  |             |
|    approx_kl            | 0.028328318 |
|    clip_fraction        | 0.308       |
|    clip_range           | 0.2         |
|    entropy_loss         | 2.49        |
|    explained_variance   | 0.96        |
|    learning_rate        | 0.0003      |
|    loss                 | 1.04        |
|    n_updates            | 10430       |
|    policy_gradient_loss | -0.00863    |
|    std                  | 0.137       |
|    value_loss           | 2.49        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 868         |
|    ep_rew_mean          | -63.1       |
| time/                   |             |
|    fps                  | 2803        |
|    iterations           | 564         |
|    time_elapsed         | 412         |
|    total_timesteps      | 1155072     |
| train/                  |             |
|    approx_kl            | 0.054249592 |
|    clip_fraction        | 0.366       |
|    clip_range           | 0.2         |
|    entropy_loss         | 2.52        |
|    explained_variance   | 0.0714      |
|    learning_rate        | 0.0003      |
|    loss                 | 0.224       |
|    n_updates            | 10520       |
|    policy_gradient_loss | -0.00345    |
|    std                  | 0.135       |
|    value_loss           | 0.756       |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 913        |
|    ep_rew_mean          | -61.6      |
| time/                   |            |
|    fps                  | 2802       |
|    iterations           | 573        |
|    time_elapsed         | 418        |
|    total_timesteps      | 1173504    |
| train/                  |            |
|    approx_kl            | 0.04521198 |
|    clip_fraction        | 0.377      |
|    clip_range           | 0.2        |
|    entropy_loss         | 2.61       |
|    explained_variance   | 0.955      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.319      |
|    n_updates            | 10610      |
|    policy_gradient_loss | 0.0101     |
|    std                  | 0.133      |
|    value_loss           | 1.74       |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 929         |
|    ep_rew_mean          | -60.1       |
| time/                   |             |
|    fps                  | 2801        |
|    iterations           | 582         |
|    time_elapsed         | 425         |
|    total_timesteps      | 1191936     |
| train/                  |             |
|    approx_kl            | 0.050765485 |
|    clip_fraction        | 0.372       |
|    clip_range           | 0.2         |
|    entropy_loss         | 2.64        |
|    explained_variance   | -1.43       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.185       |
|    n_updates            | 10700       |
|    policy_gradient_loss | -0.00146    |
|    std                  | 0.132       |
|    value_loss           | 1.19        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 970         |
|    ep_rew_mean          | -57         |
| time/                   |             |
|    fps                  | 2801        |
|    iterations           | 591         |
|    time_elapsed         | 432         |
|    total_timesteps      | 1210368     |
| train/                  |             |
|    approx_kl            | 0.034628812 |
|    clip_fraction        | 0.24        |
|    clip_range           | 0.2         |
|    entropy_loss         | 2.66        |
|    explained_variance   | 0.923       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.591       |
|    n_updates            | 10790       |
|    policy_gradient_loss | -0.0146     |
|    std                  | 0.131       |
|    value_loss           | 8.36        |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 948         |
|    ep_rew_mean          | -55.9       |
| time/                   |             |
|    fps                  | 2800        |
|    iterations           | 600         |
|    time_elapsed         | 438         |
|    total_timesteps      | 1228800     |
| train/                  |             |
|    approx_kl            | 0.051050678 |
|    clip_fraction        | 0.322       |
|    clip_range           | 0.2         |
|    entropy_loss         | 2.72        |
|    explained_variance   | 0.966       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.68        |
|    n_updates            | 10880       |
|    policy_gradient_loss | -0.00789    |
|    std                  | 0.128       |
|    value_loss           | 2.51        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 987         |
|    ep_rew_mean          | -52.6       |
| time/                   |             |
|    fps                  | 2799        |
|    iterations           | 609         |
|    time_elapsed         | 445         |
|    total_timesteps      | 1247232     |
| train/                  |             |
|    approx_kl            | 0.017457714 |
|    clip_fraction        | 0.152       |
|    clip_range           | 0.2         |
|    entropy_loss         | 2.72        |
|    explained_variance   | 0.889       |
|    learning_rate        | 0.0003      |
|    loss                 | 3.39        |
|    n_updates            | 10970       |
|    policy_gradient_loss | -0.0155     |
|    std                  | 0.129       |
|    value_loss           | 17.7        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 891        |
|    ep_rew_mean          | -54.4      |
| time/                   |            |
|    fps                  | 2798       |
|    iterations           | 618        |
|    time_elapsed         | 452        |
|    total_timesteps      | 1265664    |
| train/                  |            |
|    approx_kl            | 0.06261064 |
|    clip_fraction        | 0.291      |
|    clip_range           | 0.2        |
|    entropy_loss         | 2.78       |
|    explained_variance   | 0.93       |
|    learning_rate        | 0.0003     |
|    loss                 | 0.437      |
|    n_updates            | 11060      |
|    policy_gradient_loss | -0.0185    |
|    std                  | 0.126      |
|    value_loss           | 3.14       |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 832         |
|    ep_rew_mean          | -59.5       |
| time/                   |             |
|    fps                  | 2798        |
|    iterations           | 627         |
|    time_elapsed         | 458         |
|    total_timesteps      | 1284096     |
| train/                  |             |
|    approx_kl            | 0.017367473 |
|    clip_fraction        | 0.197       |
|    clip_range           | 0.2         |
|    entropy_loss         | 2.8         |
|    explained_variance   | 0.905       |
|    learning_rate        | 0.0003      |
|    loss                 | 10.3        |
|    n_updates            | 11150       |
|    policy_gradient_loss | -0.0164     |
|    std                  | 0.126       |
|    value_loss           | 27.2        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 777         |
|    ep_rew_mean          | -61.6       |
| time/                   |             |
|    fps                  | 2797        |
|    iterations           | 636         |
|    time_elapsed         | 465         |
|    total_timesteps      | 1302528     |
| train/                  |             |
|    approx_kl            | 0.025645804 |
|    clip_fraction        | 0.247       |
|    clip_range           | 0.2         |
|    entropy_loss         | 2.77        |
|    explained_variance   | 0.917       |
|    learning_rate        | 0.0003      |
|    loss                 | 2.74        |
|    n_updates            | 11240       |
|    policy_gradient_loss | -0.0135     |
|    std                  | 0.127       |
|    value_loss           | 14.1        |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 787         |
|    ep_rew_mean          | -62.4       |
| time/                   |             |
|    fps                  | 2796        |
|    iterations           | 645         |
|    time_elapsed         | 472         |
|    total_timesteps      | 1320960     |
| train/                  |             |
|    approx_kl            | 0.035061143 |
|    clip_fraction        | 0.354       |
|    clip_range           | 0.2         |
|    entropy_loss         | 2.83        |
|    explained_variance   | -0.283      |
|    learning_rate        | 0.0003      |
|    loss                 | -0.00399    |
|    n_updates            | 11330       |
|    policy_gradient_loss | -0.00765    |
|    std                  | 0.125       |
|    value_loss           | 0.942       |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 873         |
|    ep_rew_mean          | -58.9       |
| time/                   |             |
|    fps                  | 2796        |
|    iterations           | 654         |
|    time_elapsed         | 479         |
|    total_timesteps      | 1339392     |
| train/                  |             |
|    approx_kl            | 0.028191283 |
|    clip_fraction        | 0.264       |
|    clip_range           | 0.2         |
|    entropy_loss         | 2.9         |
|    explained_variance   | 0.89        |
|    learning_rate        | 0.0003      |
|    loss                 | 0.517       |
|    n_updates            | 11420       |
|    policy_gradient_loss | -0.00669    |
|    std                  | 0.122       |
|    value_loss           | 4.35        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 839        |
|    ep_rew_mean          | -58.6      |
| time/                   |            |
|    fps                  | 2795       |
|    iterations           | 663        |
|    time_elapsed         | 485        |
|    total_timesteps      | 1357824    |
| train/                  |            |
|    approx_kl            | 0.06779629 |
|    clip_fraction        | 0.315      |
|    clip_range           | 0.2        |
|    entropy_loss         | 2.89       |
|    explained_variance   | 0.916      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.11       |
|    n_updates            | 11510      |
|    policy_gradient_loss | -0.0132    |
|    std                  | 0.122      |
|    value_loss           | 1.43       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 915         |
|    ep_rew_mean          | -53.1       |
| time/                   |             |
|    fps                  | 2794        |
|    iterations           | 673         |
|    time_elapsed         | 493         |
|    total_timesteps      | 1378304     |
| train/                  |             |
|    approx_kl            | 0.039127395 |
|    clip_fraction        | 0.358       |
|    clip_range           | 0.2         |
|    entropy_loss         | 2.92        |
|    explained_variance   | 0.815       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.00264     |
|    n_updates            | 11610       |
|    policy_gradient_loss | -0.00166    |
|    std                  | 0.122       |
|    value_loss           | 0.109       |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 968         |
|    ep_rew_mean          | -50.2       |
| time/                   |             |
|    fps                  | 2793        |
|    iterations           | 682         |
|    time_elapsed         | 500         |
|    total_timesteps      | 1396736     |
| train/                  |             |
|    approx_kl            | 0.031914227 |
|    clip_fraction        | 0.279       |
|    clip_range           | 0.2         |
|    entropy_loss         | 2.95        |
|    explained_variance   | 0.285       |
|    learning_rate        | 0.0003      |
|    loss                 | 3.7         |
|    n_updates            | 11700       |
|    policy_gradient_loss | -0.012      |
|    std                  | 0.121       |
|    value_loss           | 10.3        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 910         |
|    ep_rew_mean          | -49.4       |
| time/                   |             |
|    fps                  | 2792        |
|    iterations           | 691         |
|    time_elapsed         | 506         |
|    total_timesteps      | 1415168     |
| train/                  |             |
|    approx_kl            | 0.022192162 |
|    clip_fraction        | 0.227       |
|    clip_range           | 0.2         |
|    entropy_loss         | 3.01        |
|    explained_variance   | 0.862       |
|    learning_rate        | 0.0003      |
|    loss                 | 15.6        |
|    n_updates            | 11790       |
|    policy_gradient_loss | -0.0163     |
|    std                  | 0.12        |
|    value_loss           | 16.8        |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 884         |
|    ep_rew_mean          | -50.1       |
| time/                   |             |
|    fps                  | 2791        |
|    iterations           | 700         |
|    time_elapsed         | 513         |
|    total_timesteps      | 1433600     |
| train/                  |             |
|    approx_kl            | 0.020143617 |
|    clip_fraction        | 0.19        |
|    clip_range           | 0.2         |
|    entropy_loss         | 3.04        |
|    explained_variance   | 0.879       |
|    learning_rate        | 0.0003      |
|    loss                 | 17.9        |
|    n_updates            | 11880       |
|    policy_gradient_loss | -0.019      |
|    std                  | 0.119       |
|    value_loss           | 43.5        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 876         |
|    ep_rew_mean          | -51.9       |
| time/                   |             |
|    fps                  | 2790        |
|    iterations           | 709         |
|    time_elapsed         | 520         |
|    total_timesteps      | 1452032     |
| train/                  |             |
|    approx_kl            | 0.056417912 |
|    clip_fraction        | 0.35        |
|    clip_range           | 0.2         |
|    entropy_loss         | 3.06        |
|    explained_variance   | 0.941       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.171       |
|    n_updates            | 11970       |
|    policy_gradient_loss | 0.00353     |
|    std                  | 0.118       |
|    value_loss           | 1.82        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 838         |
|    ep_rew_mean          | -51.9       |
| time/                   |             |
|    fps                  | 2789        |
|    iterations           | 718         |
|    time_elapsed         | 527         |
|    total_timesteps      | 1470464     |
| train/                  |             |
|    approx_kl            | 0.029630207 |
|    clip_fraction        | 0.272       |
|    clip_range           | 0.2         |
|    entropy_loss         | 3.07        |
|    explained_variance   | 0.858       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.86        |
|    n_updates            | 12060       |
|    policy_gradient_loss | -0.011      |
|    std                  | 0.118       |
|    value_loss           | 18.5        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 817        |
|    ep_rew_mean          | -54.1      |
| time/                   |            |
|    fps                  | 2789       |
|    iterations           | 727        |
|    time_elapsed         | 533        |
|    total_timesteps      | 1488896    |
| train/                  |            |
|    approx_kl            | 0.06545612 |
|    clip_fraction        | 0.356      |
|    clip_range           | 0.2        |
|    entropy_loss         | 3.08       |
|    explained_variance   | 0.818      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.931      |
|    n_updates            | 12150      |
|    policy_gradient_loss | -0.0114    |
|    std                  | 0.117      |
|    value_loss           | 5.85       |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 772         |
|    ep_rew_mean          | -54.2       |
| time/                   |             |
|    fps                  | 2788        |
|    iterations           | 736         |
|    time_elapsed         | 540         |
|    total_timesteps      | 1507328     |
| train/                  |             |
|    approx_kl            | 0.014327294 |
|    clip_fraction        | 0.177       |
|    clip_range           | 0.2         |
|    entropy_loss         | 3.13        |
|    explained_variance   | 0.932       |
|    learning_rate        | 0.0003      |
|    loss                 | 16.8        |
|    n_updates            | 12240       |
|    policy_gradient_loss | -0.0214     |
|    std                  | 0.117       |
|    value_loss           | 50.3        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 813         |
|    ep_rew_mean          | -51.6       |
| time/                   |             |
|    fps                  | 2787        |
|    iterations           | 745         |
|    time_elapsed         | 547         |
|    total_timesteps      | 1525760     |
| train/                  |             |
|    approx_kl            | 0.061721757 |
|    clip_fraction        | 0.44        |
|    clip_range           | 0.2         |
|    entropy_loss         | 3.07        |
|    explained_variance   | 0.968       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.12        |
|    n_updates            | 12330       |
|    policy_gradient_loss | -0.00569    |
|    std                  | 0.119       |
|    value_loss           | 2.75        |
-----------------------------------------
----------------------------------------
| rollout/                |        

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 760        |
|    ep_rew_mean          | -52.6      |
| time/                   |            |
|    fps                  | 2786       |
|    iterations           | 754        |
|    time_elapsed         | 554        |
|    total_timesteps      | 1544192    |
| train/                  |            |
|    approx_kl            | 0.04196246 |
|    clip_fraction        | 0.394      |
|    clip_range           | 0.2        |
|    entropy_loss         | 3.08       |
|    explained_variance   | -0.712     |
|    learning_rate        | 0.0003     |
|    loss                 | 0.073      |
|    n_updates            | 12420      |
|    policy_gradient_loss | -0.00784   |
|    std                  | 0.116      |
|    value_loss           | 0.254      |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 755         |
|    ep_rew_mean          | -52.9       |
| time/                   |             |
|    fps                  | 2785        |
|    iterations           | 763         |
|    time_elapsed         | 560         |
|    total_timesteps      | 1562624     |
| train/                  |             |
|    approx_kl            | 0.034947418 |
|    clip_fraction        | 0.209       |
|    clip_range           | 0.2         |
|    entropy_loss         | 3.18        |
|    explained_variance   | 0.931       |
|    learning_rate        | 0.0003      |
|    loss                 | 3.62        |
|    n_updates            | 12510       |
|    policy_gradient_loss | -0.0151     |
|    std                  | 0.115       |
|    value_loss           | 9.25        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 752         |
|    ep_rew_mean          | -54.9       |
| time/                   |             |
|    fps                  | 2785        |
|    iterations           | 772         |
|    time_elapsed         | 567         |
|    total_timesteps      | 1581056     |
| train/                  |             |
|    approx_kl            | 0.039419897 |
|    clip_fraction        | 0.351       |
|    clip_range           | 0.2         |
|    entropy_loss         | 3.15        |
|    explained_variance   | -0.732      |
|    learning_rate        | 0.0003      |
|    loss                 | -0.00306    |
|    n_updates            | 12600       |
|    policy_gradient_loss | 0.00399     |
|    std                  | 0.115       |
|    value_loss           | 0.191       |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 841        |
|    ep_rew_mean          | -52.7      |
| time/                   |            |
|    fps                  | 2784       |
|    iterations           | 781        |
|    time_elapsed         | 574        |
|    total_timesteps      | 1599488    |
| train/                  |            |
|    approx_kl            | 0.05438366 |
|    clip_fraction        | 0.387      |
|    clip_range           | 0.2        |
|    entropy_loss         | 3.21       |
|    explained_variance   | -2.39      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.0218    |
|    n_updates            | 12690      |
|    policy_gradient_loss | -0.00395   |
|    std                  | 0.113      |
|    value_loss           | 0.208      |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 817         |
|    ep_rew_mean          | -54.2       |
| time/                   |             |
|    fps                  | 2784        |
|    iterations           | 790         |
|    time_elapsed         | 581         |
|    total_timesteps      | 1617920     |
| train/                  |             |
|    approx_kl            | 0.025012579 |
|    clip_fraction        | 0.228       |
|    clip_range           | 0.2         |
|    entropy_loss         | 3.17        |
|    explained_variance   | 0.88        |
|    learning_rate        | 0.0003      |
|    loss                 | 6.14        |
|    n_updates            | 12780       |
|    policy_gradient_loss | -0.0144     |
|    std                  | 0.114       |
|    value_loss           | 32.6        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 835         |
|    ep_rew_mean          | -52.6       |
| time/                   |             |
|    fps                  | 2783        |
|    iterations           | 799         |
|    time_elapsed         | 587         |
|    total_timesteps      | 1636352     |
| train/                  |             |
|    approx_kl            | 0.031204164 |
|    clip_fraction        | 0.322       |
|    clip_range           | 0.2         |
|    entropy_loss         | 3.28        |
|    explained_variance   | 0.831       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0171      |
|    n_updates            | 12870       |
|    policy_gradient_loss | -0.00513    |
|    std                  | 0.111       |
|    value_loss           | 0.238       |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 836         |
|    ep_rew_mean          | -50.2       |
| time/                   |             |
|    fps                  | 2782        |
|    iterations           | 808         |
|    time_elapsed         | 594         |
|    total_timesteps      | 1654784     |
| train/                  |             |
|    approx_kl            | 0.035251558 |
|    clip_fraction        | 0.291       |
|    clip_range           | 0.2         |
|    entropy_loss         | 3.28        |
|    explained_variance   | 0.88        |
|    learning_rate        | 0.0003      |
|    loss                 | 4.88        |
|    n_updates            | 12960       |
|    policy_gradient_loss | -0.0131     |
|    std                  | 0.111       |
|    value_loss           | 7.41        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 821        |
|    ep_rew_mean          | -51.6      |
| time/                   |            |
|    fps                  | 2782       |
|    iterations           | 817        |
|    time_elapsed         | 601        |
|    total_timesteps      | 1673216    |
| train/                  |            |
|    approx_kl            | 0.05214078 |
|    clip_fraction        | 0.352      |
|    clip_range           | 0.2        |
|    entropy_loss         | 3.34       |
|    explained_variance   | 0.724      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.457      |
|    n_updates            | 13050      |
|    policy_gradient_loss | -0.00989   |
|    std                  | 0.109      |
|    value_loss           | 3.84       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 816         |
|    ep_rew_mean          | -52.7       |
| time/                   |             |
|    fps                  | 2781        |
|    iterations           | 826         |
|    time_elapsed         | 608         |
|    total_timesteps      | 1691648     |
| train/                  |             |
|    approx_kl            | 0.023055281 |
|    clip_fraction        | 0.271       |
|    clip_range           | 0.2         |
|    entropy_loss         | 3.45        |
|    explained_variance   | 0.914       |
|    learning_rate        | 0.0003      |
|    loss                 | 2.82        |
|    n_updates            | 13140       |
|    policy_gradient_loss | -0.0119     |
|    std                  | 0.106       |
|    value_loss           | 13.6        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 758        |
|    ep_rew_mean          | -55.8      |
| time/                   |            |
|    fps                  | 2780       |
|    iterations           | 835        |
|    time_elapsed         | 615        |
|    total_timesteps      | 1710080    |
| train/                  |            |
|    approx_kl            | 0.05457684 |
|    clip_fraction        | 0.307      |
|    clip_range           | 0.2        |
|    entropy_loss         | 3.47       |
|    explained_variance   | 0.919      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.642      |
|    n_updates            | 13230      |
|    policy_gradient_loss | -0.00756   |
|    std                  | 0.105      |
|    value_loss           | 6.28       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 702       |
|    ep_rew_mean          | -59.1     |
| time/                   |           |
|    fps                  | 2779      |
|    iterations           | 844       |
|    time_elapsed         | 621       |
|    total_timesteps      | 1728512   |
| train/                  |           |
|    approx_kl            | 0.0813694 |
|    clip_fraction        | 0.468     |
|    clip_range           | 0.2       |
|    entropy_loss         | 3.41      |
|    explained_variance   | 0.908     |
|    learning_rate        | 0.0003    |
|    loss                 | 0.423     |
|    n_updates            | 13320     |
|    policy_gradient_loss | 0.00254   |
|    std                  | 0.108     |
|    value_loss           | 2.86      |
---------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 712     

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 727         |
|    ep_rew_mean          | -58.8       |
| time/                   |             |
|    fps                  | 2778        |
|    iterations           | 853         |
|    time_elapsed         | 628         |
|    total_timesteps      | 1746944     |
| train/                  |             |
|    approx_kl            | 0.078552045 |
|    clip_fraction        | 0.429       |
|    clip_range           | 0.2         |
|    entropy_loss         | 3.35        |
|    explained_variance   | 0.283       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0213      |
|    n_updates            | 13410       |
|    policy_gradient_loss | -0.00719    |
|    std                  | 0.111       |
|    value_loss           | 0.498       |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 723        |
|    ep_rew_mean          | -58.9      |
| time/                   |            |
|    fps                  | 2777       |
|    iterations           | 862        |
|    time_elapsed         | 635        |
|    total_timesteps      | 1765376    |
| train/                  |            |
|    approx_kl            | 0.08082211 |
|    clip_fraction        | 0.408      |
|    clip_range           | 0.2        |
|    entropy_loss         | 3.36       |
|    explained_variance   | 0.882      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.647      |
|    n_updates            | 13500      |
|    policy_gradient_loss | -0.00389   |
|    std                  | 0.11       |
|    value_loss           | 4.65       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 701        |
|    ep_rew_mean          | -60.8      |
| time/                   |            |
|    fps                  | 2776       |
|    iterations           | 871        |
|    time_elapsed         | 642        |
|    total_timesteps      | 1783808    |
| train/                  |            |
|    approx_kl            | 0.08867511 |
|    clip_fraction        | 0.377      |
|    clip_range           | 0.2        |
|    entropy_loss         | 3.37       |
|    explained_variance   | 0.91       |
|    learning_rate        | 0.0003     |
|    loss                 | 0.568      |
|    n_updates            | 13590      |
|    policy_gradient_loss | -0.0128    |
|    std                  | 0.109      |
|    value_loss           | 2.31       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 706         |
|    ep_rew_mean          | -60.6       |
| time/                   |             |
|    fps                  | 2775        |
|    iterations           | 880         |
|    time_elapsed         | 649         |
|    total_timesteps      | 1802240     |
| train/                  |             |
|    approx_kl            | 0.060373396 |
|    clip_fraction        | 0.347       |
|    clip_range           | 0.2         |
|    entropy_loss         | 3.39        |
|    explained_variance   | 0.959       |
|    learning_rate        | 0.0003      |
|    loss                 | 6.98        |
|    n_updates            | 13680       |
|    policy_gradient_loss | -0.00881    |
|    std                  | 0.11        |
|    value_loss           | 9.11        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 664        |
|    ep_rew_mean          | -62.4      |
| time/                   |            |
|    fps                  | 2775       |
|    iterations           | 889        |
|    time_elapsed         | 656        |
|    total_timesteps      | 1820672    |
| train/                  |            |
|    approx_kl            | 0.03164569 |
|    clip_fraction        | 0.336      |
|    clip_range           | 0.2        |
|    entropy_loss         | 3.44       |
|    explained_variance   | 0.848      |
|    learning_rate        | 0.0003     |
|    loss                 | 10.5       |
|    n_updates            | 13770      |
|    policy_gradient_loss | -0.0114    |
|    std                  | 0.108      |
|    value_loss           | 29.1       |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 671         |
|    ep_rew_mean          | -62.7       |
| time/                   |             |
|    fps                  | 2774        |
|    iterations           | 898         |
|    time_elapsed         | 662         |
|    total_timesteps      | 1839104     |
| train/                  |             |
|    approx_kl            | 0.053188197 |
|    clip_fraction        | 0.312       |
|    clip_range           | 0.2         |
|    entropy_loss         | 3.5         |
|    explained_variance   | 0.949       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.988       |
|    n_updates            | 13860       |
|    policy_gradient_loss | -0.016      |
|    std                  | 0.106       |
|    value_loss           | 6.66        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 677        |
|    ep_rew_mean          | -62.6      |
| time/                   |            |
|    fps                  | 2773       |
|    iterations           | 907        |
|    time_elapsed         | 669        |
|    total_timesteps      | 1857536    |
| train/                  |            |
|    approx_kl            | 0.07924473 |
|    clip_fraction        | 0.512      |
|    clip_range           | 0.2        |
|    entropy_loss         | 3.5        |
|    explained_variance   | 0.058      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.552      |
|    n_updates            | 13950      |
|    policy_gradient_loss | 0.0035     |
|    std                  | 0.105      |
|    value_loss           | 1.53       |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 736         |
|    ep_rew_mean          | -59.3       |
| time/                   |             |
|    fps                  | 2772        |
|    iterations           | 916         |
|    time_elapsed         | 676         |
|    total_timesteps      | 1875968     |
| train/                  |             |
|    approx_kl            | 0.052472584 |
|    clip_fraction        | 0.374       |
|    clip_range           | 0.2         |
|    entropy_loss         | 3.58        |
|    explained_variance   | 0.946       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.08        |
|    n_updates            | 14040       |
|    policy_gradient_loss | -0.00654    |
|    std                  | 0.103       |
|    value_loss           | 4.05        |
-----------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 819        |
|    ep_rew_mean          | -55.3      |
| time/                   |            |
|    fps                  | 2772       |
|    iterations           | 925        |
|    time_elapsed         | 683        |
|    total_timesteps      | 1894400    |
| train/                  |            |
|    approx_kl            | 0.03203822 |
|    clip_fraction        | 0.252      |
|    clip_range           | 0.2        |
|    entropy_loss         | 3.57       |
|    explained_variance   | 0.907      |
|    learning_rate        | 0.0003     |
|    loss                 | 5.03       |
|    n_updates            | 14130      |
|    policy_gradient_loss | -0.0102    |
|    std                  | 0.104      |
|    value_loss           | 10.7       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 752         |
|    ep_rew_mean          | -57.6       |
| time/                   |             |
|    fps                  | 2771        |
|    iterations           | 934         |
|    time_elapsed         | 690         |
|    total_timesteps      | 1912832     |
| train/                  |             |
|    approx_kl            | 0.021611463 |
|    clip_fraction        | 0.258       |
|    clip_range           | 0.2         |
|    entropy_loss         | 3.64        |
|    explained_variance   | 0.868       |
|    learning_rate        | 0.0003      |
|    loss                 | 14.5        |
|    n_updates            | 14220       |
|    policy_gradient_loss | -0.0175     |
|    std                  | 0.102       |
|    value_loss           | 40.3        |
-----------------------------------------
----------------------------------------
| rollout/                |        

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 777         |
|    ep_rew_mean          | -56.1       |
| time/                   |             |
|    fps                  | 2770        |
|    iterations           | 943         |
|    time_elapsed         | 697         |
|    total_timesteps      | 1931264     |
| train/                  |             |
|    approx_kl            | 0.077048674 |
|    clip_fraction        | 0.427       |
|    clip_range           | 0.2         |
|    entropy_loss         | 3.54        |
|    explained_variance   | 0.463       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.148       |
|    n_updates            | 14310       |
|    policy_gradient_loss | 0.00017     |
|    std                  | 0.104       |
|    value_loss           | 0.732       |
-----------------------------------------
----------------------------------------
| rollout/                |        

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 745        |
|    ep_rew_mean          | -60.1      |
| time/                   |            |
|    fps                  | 2769       |
|    iterations           | 952        |
|    time_elapsed         | 703        |
|    total_timesteps      | 1949696    |
| train/                  |            |
|    approx_kl            | 0.05227442 |
|    clip_fraction        | 0.405      |
|    clip_range           | 0.2        |
|    entropy_loss         | 3.62       |
|    explained_variance   | -0.262     |
|    learning_rate        | 0.0003     |
|    loss                 | 0.0267     |
|    n_updates            | 14400      |
|    policy_gradient_loss | -0.00518   |
|    std                  | 0.103      |
|    value_loss           | 0.381      |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 698        |
|    ep_rew_mean          | -61.5      |
| time/                   |            |
|    fps                  | 2769       |
|    iterations           | 961        |
|    time_elapsed         | 710        |
|    total_timesteps      | 1968128    |
| train/                  |            |
|    approx_kl            | 0.04778247 |
|    clip_fraction        | 0.318      |
|    clip_range           | 0.2        |
|    entropy_loss         | 3.61       |
|    explained_variance   | 0.951      |
|    learning_rate        | 0.0003     |
|    loss                 | 1.4        |
|    n_updates            | 14490      |
|    policy_gradient_loss | -0.0109    |
|    std                  | 0.102      |
|    value_loss           | 5.13       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 763        |
|    ep_rew_mean          | -59        |
| time/                   |            |
|    fps                  | 2768       |
|    iterations           | 970        |
|    time_elapsed         | 717        |
|    total_timesteps      | 1986560    |
| train/                  |            |
|    approx_kl            | 0.05372051 |
|    clip_fraction        | 0.367      |
|    clip_range           | 0.2        |
|    entropy_loss         | 3.59       |
|    explained_variance   | -0.57      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.527      |
|    n_updates            | 14580      |
|    policy_gradient_loss | -0.00478   |
|    std                  | 0.104      |
|    value_loss           | 1.07       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_me

<stable_baselines3.ppo.ppo.PPO at 0x31a61df10>

## 3C) Save Model

In [6]:
model.save("ppo_bipedalwalker_hardcore_3M")

## 3D) Evaluate Model

In [7]:
env = gym.make("BipedalWalker-v3", hardcore=True, render_mode="human")

In [8]:
# Modeli değerlendirin (örneğin, 10 bölüm boyunca)
mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=10)

print(f"Ortalama ödül: {mean_reward} ± {std_reward}")

2024-09-24 15:39:31.035 python[5914:203471] +[IMKClient subclass]: chose IMKClient_Legacy
2024-09-24 15:39:31.035 python[5914:203471] +[IMKInputSession subclass]: chose IMKInputSession_Legacy


Ortalama ödül: -53.9464984558479 ± 34.84904356642908
