In [1]:
from tqdm import tqdm
import numpy as np

from stable_baselines3 import PPO
from stable_baselines3.common.callbacks import CheckpointCallback
from stable_baselines3.common.vec_env import VecMonitor, VecNormalize, VecCheckNan
from stable_baselines3.ppo import MlpPolicy

from rlgym.envs import Match
from rlgym.utils.action_parsers import DiscreteAction
from rlgym.utils.obs_builders import AdvancedObs
from rlgym.utils.state_setters import DefaultState, RandomState
from rlgym.utils.terminal_conditions.common_conditions import TimeoutCondition, NoTouchTimeoutCondition, GoalScoredCondition
from rlgym.utils.reward_functions.common_rewards.misc_rewards import EventReward, ConstantReward, VelocityReward, SaveBoostReward
from rlgym.utils.reward_functions.common_rewards.player_ball_rewards import VelocityPlayerToBallReward
from rlgym.utils.reward_functions.common_rewards.ball_goal_rewards import VelocityBallToGoalReward
from rlgym.utils.reward_functions.common_rewards.conditional_rewards import RewardIfBehindBall
from rlgym.utils.reward_functions import CombinedReward
from rlgym_tools.sb3_utils import SB3MultipleInstanceEnv

from egocentric_obs import EgocentricObs
from rewards import RewardIfGoalside, RewardIfShouldShadow1s, PossessionReward, RewardIfPlayerBallY, PlayerBallYDistReward, TimestepReward, MultiplyRewards, RewardIfGrounded

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
frame_skip = 8                                             # Number of ticks to repeat an action
half_life_seconds = 5                                      # Easier to conceptualize, after this many seconds the reward discount is 0.5

fps = 240 / frame_skip
gamma = np.exp(np.log(0.5) / (fps * half_life_seconds))    # Quick mafs
agents_per_match = 2
num_instances = 18
target_steps = 1_000_000
steps = target_steps // (num_instances * agents_per_match) #making sure the experience counts line up properly
batch_size = target_steps//10                              #getting the batch size down to something more manageable - 100k in this case
training_interval = 25_000_000
mmr_save_frequency = 50_000_000

In [3]:
model_name = 'Egor_512'
def exit_save(model):
    model.save(f'models/{model_name}/exit_save')

In [5]:
def get_match():  # Need to use a function so that each instance can call it and produce their own objects
    return Match(
        team_size=1,
        tick_skip=frame_skip,
        reward_function=CombinedReward(
        (
#             RewardIfShouldShadow1s(ConstantReward()),
#             RewardIfGoalside(ConstantReward()),
            SaveBoostReward(),
#             RewardIfPlayerBallY(PlayerBallYDistReward()),
#             RewardIfGrounded(ConstantReward()),
            VelocityReward(), 
#             VelocityPlayerToBallReward(),
#             VelocityBallToGoalReward(),
#             EventReward(
#                 team_goal=100.0,
#                 concede=-100.0,
#                 shot=5.0,
#                 save=30.0,
#                 demo=10.0,
#             ),
        ),
        (0.05, 0.05)),
        # self_play=True,  in rlgym 1.2 'self_play' is depreciated. Uncomment line if using an earlier version and comment out spawn_opponents
        spawn_opponents=True,
#         terminal_conditions=[TimeoutCondition(fps * 100), NoTouchTimeoutCondition(fps * 20), GoalScoredCondition()],
        terminal_conditions=[TimeoutCondition(fps * 10)],
        obs_builder=EgocentricObs(),  # Not that advanced, good default
        state_setter=RandomState(),  # Resets to kickoff position
        action_parser=DiscreteAction(n_bins=9)  # Discrete > Continuous don't @ me
    )

In [6]:
env = SB3MultipleInstanceEnv(get_match, num_instances, wait_time=45)# Start 1 instances, waiting 60 seconds between each
env = VecCheckNan(env)                                # Optional
env = VecMonitor(env)                                 # Recommended, logs mean reward and ep_len to Tensorboard
env = VecNormalize(env, norm_obs=False, gamma=gamma)  # Highly recommended, normalizes rewards

  return np.divide(vec, vecmag(vec))


In [7]:
model_id = 'exit_save'
try:
    model = PPO.load(
        f'models/{model_name}/{model_id}.zip',
        env,
        device="cuda",
        custom_objects={"n_envs": env.num_envs}, #automatically adjusts to users changing instance count, may encounter shaping error otherwise
        # If you need to adjust parameters mid training, you can use the below example as a guide
        #custom_objects={"n_envs": env.num_envs, "n_steps": steps, "batch_size": batch_size, "n_epochs": 10, "learning_rate": 5e-5}
    )
    print(f"Loaded: {model_id}.")
except:
    print("model ({model_id}) not found, creating new model.")
    from torch.nn import ELU
    policy_kwargs = dict(
        activation_fn=ELU,
        net_arch=[256, 256, 256, dict(pi=[512, 512, 512], vf=[512, 512, 512])],
    )

    model = PPO(
        MlpPolicy,
        env,
        n_epochs=10,                                                             # PPO calls for multiple epochs
        policy_kwargs=policy_kwargs,
        learning_rate=5e-5,                                                      # Around this is fairly common for PPO Originally 5e-5
        ent_coef=0.01,                                                           # From PPO Atari
        vf_coef=1.,                                                              # From PPO Atari
        gamma=gamma,                                                             # Gamma as calculated using half-life
        verbose=3,                                                               # Print out all the info as we're going
        batch_size=batch_size,                                                   # Batch size as high as possible within reason
        n_steps=steps,                                                           # Number of steps to perform before optimizing network
        tensorboard_log=f'logs/{model_name}',                                    # `tensorboard --logdir out/logs` in terminal to see graphs
        device="cuda"                                                            # Uses GPU if available
    )

model ({model_id}) not found, creating new model.
Using cuda device


We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=27777 and n_envs=36)
  f"You have specified a mini-batch size of {batch_size},"


In [8]:
# Save model every so often
# Divide by num_envs (number of agents) because callback only increments every time all agents have taken a step
# This saves to specified folder with a specified name
callback = CheckpointCallback(round(5_000_000 / env.num_envs), save_path=f"models/{model_name}", name_prefix=model_name)

try:
    mmr_model_target_count = model.num_timesteps + mmr_save_frequency
    while True:
        #may need to reset timesteps when you're running a different number of instances than when you saved the model
        model.learn(training_interval, callback=callback, reset_num_timesteps=False) #can ignore callback if training_interval < callback target
        model.save(f"models/{model_name}/exit_save")
        if model.num_timesteps >= mmr_model_target_count:
            model.save(f"mmr_models/{model_name}/{model.num_timesteps}")
            mmr_model_target_count += mmr_save_frequency

except KeyboardInterrupt:
    print("Exiting training")

print("Saving model")
exit_save(model)
print("Save complete")


Logging to logs/Egor_512\PPO_0
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 300       |
|    ep_rew_mean     | 6.4239984 |
| time/              |           |
|    fps             | 4056      |
|    iterations      | 1         |
|    time_elapsed    | 246       |
|    total_timesteps | 999972    |
----------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 300          |
|    ep_rew_mean          | 6.484916     |
| time/                   |              |
|    fps                  | 4061         |
|    iterations           | 2            |
|    time_elapsed         | 492          |
|    total_timesteps      | 1999944      |
| train/                  |              |
|    approx_kl            | 0.0046764887 |
|    clip_fraction        | 0.0257       |
|    clip_range           | 0.2          |
|    entropy_loss         | -13.1        |
|    explained_va

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 300          |
|    ep_rew_mean          | 7.6507816    |
| time/                   |              |
|    fps                  | 4404         |
|    iterations           | 11           |
|    time_elapsed         | 2497         |
|    total_timesteps      | 10999692     |
| train/                  |              |
|    approx_kl            | 0.0042357305 |
|    clip_fraction        | 0.0185       |
|    clip_range           | 0.2          |
|    entropy_loss         | -13          |
|    explained_variance   | 0.551        |
|    learning_rate        | 5e-05        |
|    loss                 | -0.00691     |
|    n_updates            | 100          |
|    policy_gradient_loss | -0.00227     |
|    value_loss           | 0.129        |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 300          |
|    ep_rew_mean          | 8.926441     |
| time/                   |              |
|    fps                  | 4449         |
|    iterations           | 21           |
|    time_elapsed         | 4719         |
|    total_timesteps      | 20999412     |
| train/                  |              |
|    approx_kl            | 0.0041937674 |
|    clip_fraction        | 0.0205       |
|    clip_range           | 0.2          |
|    entropy_loss         | -12.8        |
|    explained_variance   | 0.555        |
|    learning_rate        | 5e-05        |
|    loss                 | 0.0403       |
|    n_updates            | 200          |
|    policy_gradient_loss | -0.00253     |
|    value_loss           | 0.174        |
------------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 300         |
|    ep_rew_mean          | 9.506104    |
| time/                   |             |
|    fps                  | 4524        |
|    iterations           | 5           |
|    time_elapsed         | 1104        |
|    total_timesteps      | 30999132    |
| train/                  |             |
|    approx_kl            | 0.004192561 |
|    clip_fraction        | 0.0183      |
|    clip_range           | 0.2         |
|    entropy_loss         | -12.8       |
|    explained_variance   | 0.541       |
|    learning_rate        | 5e-05       |
|    loss                 | 0.0447      |
|    n_updates            | 300         |
|    policy_gradient_loss | -0.00236    |
|    value_loss           | 0.174       |
-----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 300 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 300          |
|    ep_rew_mean          | 9.835096     |
| time/                   |              |
|    fps                  | 4547         |
|    iterations           | 15           |
|    time_elapsed         | 3298         |
|    total_timesteps      | 40998852     |
| train/                  |              |
|    approx_kl            | 0.0041622487 |
|    clip_fraction        | 0.0191       |
|    clip_range           | 0.2          |
|    entropy_loss         | -12.7        |
|    explained_variance   | 0.535        |
|    learning_rate        | 5e-05        |
|    loss                 | 0.0397       |
|    n_updates            | 400          |
|    policy_gradient_loss | -0.00239     |
|    value_loss           | 0.173        |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 300          |
|    ep_rew_mean          | 11.075399    |
| time/                   |              |
|    fps                  | 4534         |
|    iterations           | 25           |
|    time_elapsed         | 5513         |
|    total_timesteps      | 50998572     |
| train/                  |              |
|    approx_kl            | 0.0042720907 |
|    clip_fraction        | 0.0206       |
|    clip_range           | 0.2          |
|    entropy_loss         | -12.6        |
|    explained_variance   | 0.52         |
|    learning_rate        | 5e-05        |
|    loss                 | 0.0427       |
|    n_updates            | 500          |
|    policy_gradient_loss | -0.00244     |
|    value_loss           | 0.175        |
------------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 300        |
|    ep_rew_mean          | 10.95824   |
| time/                   |            |
|    fps                  | 4515       |
|    iterations           | 9          |
|    time_elapsed         | 1993       |
|    total_timesteps      | 60998292   |
| train/                  |            |
|    approx_kl            | 0.00426395 |
|    clip_fraction        | 0.0196     |
|    clip_range           | 0.2        |
|    entropy_loss         | -12.6      |
|    explained_variance   | 0.537      |
|    learning_rate        | 5e-05      |
|    loss                 | 0.0358     |
|    n_updates            | 600        |
|    policy_gradient_loss | -0.00237   |
|    value_loss           | 0.168      |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 300         |
|    ep_rew_m

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 300          |
|    ep_rew_mean          | 11.601548    |
| time/                   |              |
|    fps                  | 4498         |
|    iterations           | 19           |
|    time_elapsed         | 4223         |
|    total_timesteps      | 70998012     |
| train/                  |              |
|    approx_kl            | 0.0041483324 |
|    clip_fraction        | 0.0194       |
|    clip_range           | 0.2          |
|    entropy_loss         | -12.5        |
|    explained_variance   | 0.546        |
|    learning_rate        | 5e-05        |
|    loss                 | 0.0367       |
|    n_updates            | 700          |
|    policy_gradient_loss | -0.00235     |
|    value_loss           | 0.165        |
------------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 300          |
|    ep_rew_mean          | 12.054404    |
| time/                   |              |
|    fps                  | 4579         |
|    iterations           | 3            |
|    time_elapsed         | 655          |
|    total_timesteps      | 80997732     |
| train/                  |              |
|    approx_kl            | 0.0042399075 |
|    clip_fraction        | 0.0207       |
|    clip_range           | 0.2          |
|    entropy_loss         | -12.5        |
|    explained_variance   | 0.564        |
|    learning_rate        | 5e-05        |
|    loss                 | 0.0339       |
|    n_updates            | 800          |
|    policy_gradient_loss | -0.00248     |
|    value_loss           | 0.163        |
------------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mea

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 300        |
|    ep_rew_mean          | 12.228578  |
| time/                   |            |
|    fps                  | 4582       |
|    iterations           | 13         |
|    time_elapsed         | 2837       |
|    total_timesteps      | 90997452   |
| train/                  |            |
|    approx_kl            | 0.00419861 |
|    clip_fraction        | 0.0186     |
|    clip_range           | 0.2        |
|    entropy_loss         | -12.5      |
|    explained_variance   | 0.58       |
|    learning_rate        | 5e-05      |
|    loss                 | 0.0352     |
|    n_updates            | 900        |
|    policy_gradient_loss | -0.00226   |
|    value_loss           | 0.162      |
----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 300          |
|    ep_re

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 300         |
|    ep_rew_mean          | 12.344623   |
| time/                   |             |
|    fps                  | 4554        |
|    iterations           | 23          |
|    time_elapsed         | 5049        |
|    total_timesteps      | 100997172   |
| train/                  |             |
|    approx_kl            | 0.004176954 |
|    clip_fraction        | 0.0194      |
|    clip_range           | 0.2         |
|    entropy_loss         | -12.4       |
|    explained_variance   | 0.59        |
|    learning_rate        | 5e-05       |
|    loss                 | 0.0346      |
|    n_updates            | 1000        |
|    policy_gradient_loss | -0.00229    |
|    value_loss           | 0.163       |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 300   

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 300          |
|    ep_rew_mean          | 13.165405    |
| time/                   |              |
|    fps                  | 4593         |
|    iterations           | 7            |
|    time_elapsed         | 1523         |
|    total_timesteps      | 110996892    |
| train/                  |              |
|    approx_kl            | 0.0039736363 |
|    clip_fraction        | 0.0185       |
|    clip_range           | 0.2          |
|    entropy_loss         | -12.4        |
|    explained_variance   | 0.6          |
|    learning_rate        | 5e-05        |
|    loss                 | 0.0396       |
|    n_updates            | 1100         |
|    policy_gradient_loss | -0.00221     |
|    value_loss           | 0.167        |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 300         |
|    ep_rew_mean          | 13.360971   |
| time/                   |             |
|    fps                  | 4575        |
|    iterations           | 17          |
|    time_elapsed         | 3714        |
|    total_timesteps      | 120996612   |
| train/                  |             |
|    approx_kl            | 0.004000225 |
|    clip_fraction        | 0.0164      |
|    clip_range           | 0.2         |
|    entropy_loss         | -12.3       |
|    explained_variance   | 0.61        |
|    learning_rate        | 5e-05       |
|    loss                 | 0.0465      |
|    n_updates            | 1200        |
|    policy_gradient_loss | -0.00213    |
|    value_loss           | 0.175       |
-----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 300 

Logging to logs/Egor_512\PPO_0
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 300       |
|    ep_rew_mean     | 13.878584 |
| time/              |           |
|    fps             | 4600      |
|    iterations      | 1         |
|    time_elapsed    | 217       |
|    total_timesteps | 130996332 |
----------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 300          |
|    ep_rew_mean          | 14.273677    |
| time/                   |              |
|    fps                  | 4550         |
|    iterations           | 2            |
|    time_elapsed         | 439          |
|    total_timesteps      | 131996304    |
| train/                  |              |
|    approx_kl            | 0.0039081858 |
|    clip_fraction        | 0.0173       |
|    clip_range           | 0.2          |
|    entropy_loss         | -12.2        |
|    explained_va

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 300          |
|    ep_rew_mean          | 16.176151    |
| time/                   |              |
|    fps                  | 2297         |
|    iterations           | 11           |
|    time_elapsed         | 4788         |
|    total_timesteps      | 140996052    |
| train/                  |              |
|    approx_kl            | 0.0039095217 |
|    clip_fraction        | 0.017        |
|    clip_range           | 0.2          |
|    entropy_loss         | -12.1        |
|    explained_variance   | 0.635        |
|    learning_rate        | 5e-05        |
|    loss                 | 0.0806       |
|    n_updates            | 1400         |
|    policy_gradient_loss | -0.0021      |
|    value_loss           | 0.207        |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 300          |
|    ep_rew_mean          | 18.588686    |
| time/                   |              |
|    fps                  | 2992         |
|    iterations           | 21           |
|    time_elapsed         | 7016         |
|    total_timesteps      | 150995772    |
| train/                  |              |
|    approx_kl            | 0.0040715514 |
|    clip_fraction        | 0.0187       |
|    clip_range           | 0.2          |
|    entropy_loss         | -12.1        |
|    explained_variance   | 0.586        |
|    learning_rate        | 5e-05        |
|    loss                 | 0.109        |
|    n_updates            | 1500         |
|    policy_gradient_loss | -0.00218     |
|    value_loss           | 0.232        |
------------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 300         |
|    ep_rew_mean          | 18.654448   |
| time/                   |             |
|    fps                  | 4550        |
|    iterations           | 5           |
|    time_elapsed         | 1098        |
|    total_timesteps      | 160995492   |
| train/                  |             |
|    approx_kl            | 0.004376987 |
|    clip_fraction        | 0.0222      |
|    clip_range           | 0.2         |
|    entropy_loss         | -12         |
|    explained_variance   | 0.493       |
|    learning_rate        | 5e-05       |
|    loss                 | 0.118       |
|    n_updates            | 1600        |
|    policy_gradient_loss | -0.00231    |
|    value_loss           | 0.243       |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 300   

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 300          |
|    ep_rew_mean          | 18.850502    |
| time/                   |              |
|    fps                  | 4533         |
|    iterations           | 15           |
|    time_elapsed         | 3308         |
|    total_timesteps      | 170995212    |
| train/                  |              |
|    approx_kl            | 0.0044755847 |
|    clip_fraction        | 0.0223       |
|    clip_range           | 0.2          |
|    entropy_loss         | -12          |
|    explained_variance   | 0.433        |
|    learning_rate        | 5e-05        |
|    loss                 | 0.107        |
|    n_updates            | 1700         |
|    policy_gradient_loss | -0.00238     |
|    value_loss           | 0.235        |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 300         |
|    ep_rew_mean          | 19.648643   |
| time/                   |             |
|    fps                  | 4554        |
|    iterations           | 25          |
|    time_elapsed         | 5489        |
|    total_timesteps      | 180994932   |
| train/                  |             |
|    approx_kl            | 0.004443205 |
|    clip_fraction        | 0.0234      |
|    clip_range           | 0.2         |
|    entropy_loss         | -12.1       |
|    explained_variance   | 0.403       |
|    learning_rate        | 5e-05       |
|    loss                 | 0.0944      |
|    n_updates            | 1800        |
|    policy_gradient_loss | -0.00239    |
|    value_loss           | 0.22        |
-----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 300 

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 300         |
|    ep_rew_mean          | 18.540077   |
| time/                   |             |
|    fps                  | 4431        |
|    iterations           | 9           |
|    time_elapsed         | 2030        |
|    total_timesteps      | 190994652   |
| train/                  |             |
|    approx_kl            | 0.004337941 |
|    clip_fraction        | 0.0236      |
|    clip_range           | 0.2         |
|    entropy_loss         | -12.1       |
|    explained_variance   | 0.414       |
|    learning_rate        | 5e-05       |
|    loss                 | 0.0815      |
|    n_updates            | 1900        |
|    policy_gradient_loss | -0.00238    |
|    value_loss           | 0.206       |
-----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 300 

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 300          |
|    ep_rew_mean          | 19.091345    |
| time/                   |              |
|    fps                  | 4492         |
|    iterations           | 19           |
|    time_elapsed         | 4228         |
|    total_timesteps      | 200994372    |
| train/                  |              |
|    approx_kl            | 0.0043918965 |
|    clip_fraction        | 0.0243       |
|    clip_range           | 0.2          |
|    entropy_loss         | -12.1        |
|    explained_variance   | 0.414        |
|    learning_rate        | 5e-05        |
|    loss                 | 0.0723       |
|    n_updates            | 2000         |
|    policy_gradient_loss | -0.00241     |
|    value_loss           | 0.197        |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 300          |
|    ep_rew_mean          | 19.16172     |
| time/                   |              |
|    fps                  | 4524         |
|    iterations           | 3            |
|    time_elapsed         | 663          |
|    total_timesteps      | 210994092    |
| train/                  |              |
|    approx_kl            | 0.0044016694 |
|    clip_fraction        | 0.0217       |
|    clip_range           | 0.2          |
|    entropy_loss         | -12.1        |
|    explained_variance   | 0.428        |
|    learning_rate        | 5e-05        |
|    loss                 | 0.0641       |
|    n_updates            | 2100         |
|    policy_gradient_loss | -0.00225     |
|    value_loss           | 0.185        |
------------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 300          |
|    ep_rew_mean          | 18.96743     |
| time/                   |              |
|    fps                  | 4496         |
|    iterations           | 13           |
|    time_elapsed         | 2891         |
|    total_timesteps      | 220993812    |
| train/                  |              |
|    approx_kl            | 0.0041933563 |
|    clip_fraction        | 0.0219       |
|    clip_range           | 0.2          |
|    entropy_loss         | -12.1        |
|    explained_variance   | 0.432        |
|    learning_rate        | 5e-05        |
|    loss                 | 0.0544       |
|    n_updates            | 2200         |
|    policy_gradient_loss | -0.00235     |
|    value_loss           | 0.178        |
------------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 300         |
|    ep_rew_mean          | 18.903986   |
| time/                   |             |
|    fps                  | 3092        |
|    iterations           | 23          |
|    time_elapsed         | 7438        |
|    total_timesteps      | 230993532   |
| train/                  |             |
|    approx_kl            | 0.004409414 |
|    clip_fraction        | 0.0231      |
|    clip_range           | 0.2         |
|    entropy_loss         | -12.1       |
|    explained_variance   | 0.445       |
|    learning_rate        | 5e-05       |
|    loss                 | 0.0482      |
|    n_updates            | 2300        |
|    policy_gradient_loss | -0.00245    |
|    value_loss           | 0.173       |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 300   

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 300         |
|    ep_rew_mean          | 19.368206   |
| time/                   |             |
|    fps                  | 4516        |
|    iterations           | 7           |
|    time_elapsed         | 1549        |
|    total_timesteps      | 240993252   |
| train/                  |             |
|    approx_kl            | 0.004361061 |
|    clip_fraction        | 0.0231      |
|    clip_range           | 0.2         |
|    entropy_loss         | -12.1       |
|    explained_variance   | 0.455       |
|    learning_rate        | 5e-05       |
|    loss                 | 0.0403      |
|    n_updates            | 2400        |
|    policy_gradient_loss | -0.00247    |
|    value_loss           | 0.168       |
-----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 300 

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 300         |
|    ep_rew_mean          | 19.272852   |
| time/                   |             |
|    fps                  | 4539        |
|    iterations           | 17          |
|    time_elapsed         | 3744        |
|    total_timesteps      | 250992972   |
| train/                  |             |
|    approx_kl            | 0.004424528 |
|    clip_fraction        | 0.0245      |
|    clip_range           | 0.2         |
|    entropy_loss         | -12.1       |
|    explained_variance   | 0.436       |
|    learning_rate        | 5e-05       |
|    loss                 | 0.0401      |
|    n_updates            | 2500        |
|    policy_gradient_loss | -0.00257    |
|    value_loss           | 0.168       |
-----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 300 

Logging to logs/Egor_512\PPO_0
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 300       |
|    ep_rew_mean     | 19.375612 |
| time/              |           |
|    fps             | 4621      |
|    iterations      | 1         |
|    time_elapsed    | 216       |
|    total_timesteps | 260992692 |
----------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 300          |
|    ep_rew_mean          | 18.78283     |
| time/                   |              |
|    fps                  | 4564         |
|    iterations           | 2            |
|    time_elapsed         | 438          |
|    total_timesteps      | 261992664    |
| train/                  |              |
|    approx_kl            | 0.0044645434 |
|    clip_fraction        | 0.0237       |
|    clip_range           | 0.2          |
|    entropy_loss         | -12.1        |
|    explained_va

In [9]:
exit_save(model)