In [None]:
# Install dependencies from pyproject.toml (works on Google Colab)
%pip install -q -e .

In [None]:

from pathlib import Path
import imageio
import gymnasium as gym
import minigrid
from minigrid.wrappers import ImgObsWrapper
from minigrid.wrappers import RGBImgPartialObsWrapper
import random
import numpy as np
import torch as th


import torch.nn as nn
from stable_baselines3 import PPO
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
from stable_baselines3.common.vec_env import DummyVecEnv, VecMonitor, VecNormalize

# Set random seed for reproducibility
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
th.manual_seed(SEED)
th.cuda.manual_seed_all(SEED)
th.backends.cudnn.deterministic = True
th.backends.cudnn.benchmark = False

CURRENT_DIRECTORY = Path(".")
model_output = CURRENT_DIRECTORY / "ppo_minigrid.zip"
LOG_DIR = CURRENT_DIRECTORY / "logs"
LOG_DIR.mkdir(exist_ok=True)

# uncomment following line to retrain the model
# model_output.unlink(missing_ok=True)


def make_env(seed: int = 0, log_suffix: str = ""):
	"""Create a monitored MiniGrid environment with RGB partial observations."""
	env = gym.make("MiniGrid-Empty-8x8-v0", render_mode="rgb_array", max_episode_steps=100)
	env = RGBImgPartialObsWrapper(env)
	env = ImgObsWrapper(env)
	log_path = LOG_DIR / f"train{log_suffix}"
	log_path.parent.mkdir(parents=True, exist_ok=True)
	env = Monitor(env, log_path.as_posix())
	env.reset(seed=seed)
	env.action_space.seed(seed)
	env.observation_space.seed(seed)
	return env


# Official recommendation: custom feature extractor for MiniGrid image input
class MinigridFeaturesExtractor(BaseFeaturesExtractor):
	def __init__(self, observation_space, features_dim: int = 256):
		super().__init__(observation_space, features_dim)
		n_input_channels = observation_space.shape[0]
		self.cnn = nn.Sequential(
			nn.Conv2d(n_input_channels, 32, kernel_size=3, stride=1, padding=1),
			nn.ReLU(),
			nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
			nn.ReLU(),
			nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
			nn.ReLU(),
			nn.Flatten(),
		)
		with th.no_grad():
			sample = th.as_tensor(observation_space.sample()[None]).float()
			n_flatten = self.cnn(sample).shape[1]
		self.linear = nn.Sequential(nn.Linear(n_flatten, features_dim), nn.ReLU())

	def forward(self, observations):
		return self.linear(self.cnn(observations))


policy_kwargs = dict(
	features_extractor_class=MinigridFeaturesExtractor,
	features_extractor_kwargs=dict(features_dim=256),
	normalize_images=True,
)


# Create MiniGrid-Empty-8x8-v0 environment with image observation
vec_env = DummyVecEnv([lambda seed=SEED + i: make_env(seed, f"_{i}") for i in range(8)])
vec_env = VecNormalize(vec_env, norm_obs=True, norm_reward=True, clip_obs=10.0)
train_env = VecMonitor(vec_env, LOG_DIR.as_posix())


if not model_output.exists():
	print("Training new model...")
	model = PPO(
		"CnnPolicy",
		train_env,
		policy_kwargs=policy_kwargs,
		learning_rate=2.5e-4,
		n_steps=1024,
		batch_size=1024,
		gamma=0.995,
		gae_lambda=0.9,
		clip_range=0.2,
		ent_coef=0.001,
		vf_coef=0.5,
		max_grad_norm=0.4,
		verbose=1,
	)
	model.learn(total_timesteps=int(1e6), progress_bar=True)
	model.save(model_output)
else:
	print("Model already exists, loading...")
	model = PPO.load(model_output, env=train_env)


# Evaluate the agent
eval_env = DummyVecEnv([lambda seed=1000 + i: make_env(seed, f"_eval_{i}") for i in range(4)])
eval_env = VecNormalize(eval_env, training=False, norm_obs=True, norm_reward=True, clip_obs=10.0)
mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=100, render=False)
print(f"Mean reward: {mean_reward} +/- {std_reward}")



Training new model...
Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.


  return datetime.utcnow().replace(tzinfo=utc)


Output()

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 252      |
|    ep_rew_mean     | 0.0266   |
| time/              |          |
|    fps             | 369      |
|    iterations      | 1        |
|    time_elapsed    | 5        |
|    total_timesteps | 2048     |
---------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 254         |
|    ep_rew_mean          | 0.0133      |
| time/                   |             |
|    fps                  | 299         |
|    iterations           | 2           |
|    time_elapsed         | 13          |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.010304455 |
|    clip_fraction        | 0.0899      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.94       |
|    explained_variance   | -0.451      |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0247     |
|    n_updates            | 10          |
|    policy_gradient_loss | -0.00503    |
|    value_loss           | 0.000177    |
-----------------------------------------


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 227       |
|    ep_rew_mean          | 0.129     |
| time/                   |           |
|    fps                  | 285       |
|    iterations           | 3         |
|    time_elapsed         | 21        |
|    total_timesteps      | 6144      |
| train/                  |           |
|    approx_kl            | 0.0081685 |
|    clip_fraction        | 0.0388    |
|    clip_range           | 0.2       |
|    entropy_loss         | -1.92     |
|    explained_variance   | -3.41     |
|    learning_rate        | 0.0003    |
|    loss                 | -0.0216   |
|    n_updates            | 20        |
|    policy_gradient_loss | -0.00258  |
|    value_loss           | 1.39e-05  |
---------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 226         |
|    ep_rew_mean          | 0.132       |
| time/                   |             |
|    fps                  | 284         |
|    iterations           | 4           |
|    time_elapsed         | 28          |
|    total_timesteps      | 8192        |
| train/                  |             |
|    approx_kl            | 0.011905246 |
|    clip_fraction        | 0.08        |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.88       |
|    explained_variance   | 0.00458     |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0131      |
|    n_updates            | 30          |
|    policy_gradient_loss | -0.00404    |
|    value_loss           | 0.00809     |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 213         |
|    ep_rew_mean          | 0.186       |
| time/                   |             |
|    fps                  | 277         |
|    iterations           | 5           |
|    time_elapsed         | 36          |
|    total_timesteps      | 10240       |
| train/                  |             |
|    approx_kl            | 0.014530979 |
|    clip_fraction        | 0.126       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.83       |
|    explained_variance   | 0.0598      |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0171      |
|    n_updates            | 40          |
|    policy_gradient_loss | -0.00647    |
|    value_loss           | 0.00204     |
-----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 212        |
|    ep_rew_mean          | 0.195      |
| time/                   |            |
|    fps                  | 276        |
|    iterations           | 6          |
|    time_elapsed         | 44         |
|    total_timesteps      | 12288      |
| train/                  |            |
|    approx_kl            | 0.01143183 |
|    clip_fraction        | 0.122      |
|    clip_range           | 0.2        |
|    entropy_loss         | -1.78      |
|    explained_variance   | 0.0896     |
|    learning_rate        | 0.0003     |
|    loss                 | -0.0315    |
|    n_updates            | 50         |
|    policy_gradient_loss | -0.00672   |
|    value_loss           | 0.00841    |
----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 195         |
|    ep_rew_mean          | 0.263       |
| time/                   |             |
|    fps                  | 275         |
|    iterations           | 7           |
|    time_elapsed         | 52          |
|    total_timesteps      | 14336       |
| train/                  |             |
|    approx_kl            | 0.008126879 |
|    clip_fraction        | 0.0715      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.73       |
|    explained_variance   | 0.18        |
|    learning_rate        | 0.0003      |
|    loss                 | -0.00168    |
|    n_updates            | 60          |
|    policy_gradient_loss | -0.0032     |
|    value_loss           | 0.0034      |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 188          |
|    ep_rew_mean          | 0.294        |
| time/                   |              |
|    fps                  | 272          |
|    iterations           | 8            |
|    time_elapsed         | 60           |
|    total_timesteps      | 16384        |
| train/                  |              |
|    approx_kl            | 0.0075806053 |
|    clip_fraction        | 0.071        |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.7         |
|    explained_variance   | 0.196        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.00503      |
|    n_updates            | 70           |
|    policy_gradient_loss | -0.00692     |
|    value_loss           | 0.0138       |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 173         |
|    ep_rew_mean          | 0.355       |
| time/                   |             |
|    fps                  | 273         |
|    iterations           | 9           |
|    time_elapsed         | 67          |
|    total_timesteps      | 18432       |
| train/                  |             |
|    approx_kl            | 0.011130968 |
|    clip_fraction        | 0.0943      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.68       |
|    explained_variance   | 0.301       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0108      |
|    n_updates            | 80          |
|    policy_gradient_loss | -0.00733    |
|    value_loss           | 0.00788     |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 125         |
|    ep_rew_mean          | 0.545       |
| time/                   |             |
|    fps                  | 271         |
|    iterations           | 10          |
|    time_elapsed         | 75          |
|    total_timesteps      | 20480       |
| train/                  |             |
|    approx_kl            | 0.007684296 |
|    clip_fraction        | 0.0625      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.61       |
|    explained_variance   | 0.285       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0106      |
|    n_updates            | 90          |
|    policy_gradient_loss | -0.00699    |
|    value_loss           | 0.015       |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 77          |
|    ep_rew_mean          | 0.725       |
| time/                   |             |
|    fps                  | 270         |
|    iterations           | 11          |
|    time_elapsed         | 83          |
|    total_timesteps      | 22528       |
| train/                  |             |
|    approx_kl            | 0.010714591 |
|    clip_fraction        | 0.12        |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.56       |
|    explained_variance   | 0.329       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0114     |
|    n_updates            | 100         |
|    policy_gradient_loss | -0.0152     |
|    value_loss           | 0.0235      |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 44          |
|    ep_rew_mean          | 0.845       |
| time/                   |             |
|    fps                  | 271         |
|    iterations           | 12          |
|    time_elapsed         | 90          |
|    total_timesteps      | 24576       |
| train/                  |             |
|    approx_kl            | 0.009297152 |
|    clip_fraction        | 0.106       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.45       |
|    explained_variance   | 0.356       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0103      |
|    n_updates            | 110         |
|    policy_gradient_loss | -0.0113     |
|    value_loss           | 0.0259      |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 30.7        |
|    ep_rew_mean          | 0.892       |
| time/                   |             |
|    fps                  | 270         |
|    iterations           | 13          |
|    time_elapsed         | 98          |
|    total_timesteps      | 26624       |
| train/                  |             |
|    approx_kl            | 0.012853214 |
|    clip_fraction        | 0.126       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.33       |
|    explained_variance   | 0.43        |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0139     |
|    n_updates            | 120         |
|    policy_gradient_loss | -0.0136     |
|    value_loss           | 0.0213      |
-----------------------------------------


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 21.4      |
|    ep_rew_mean          | 0.925     |
| time/                   |           |
|    fps                  | 270       |
|    iterations           | 14        |
|    time_elapsed         | 105       |
|    total_timesteps      | 28672     |
| train/                  |           |
|    approx_kl            | 0.0151756 |
|    clip_fraction        | 0.169     |
|    clip_range           | 0.2       |
|    entropy_loss         | -1.2      |
|    explained_variance   | 0.387     |
|    learning_rate        | 0.0003    |
|    loss                 | -0.00286  |
|    n_updates            | 130       |
|    policy_gradient_loss | -0.0206   |
|    value_loss           | 0.0169    |
---------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 16.7         |
|    ep_rew_mean          | 0.941        |
| time/                   |              |
|    fps                  | 270          |
|    iterations           | 15           |
|    time_elapsed         | 113          |
|    total_timesteps      | 30720        |
| train/                  |              |
|    approx_kl            | 0.0094948355 |
|    clip_fraction        | 0.123        |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.02        |
|    explained_variance   | 0.361        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.0108       |
|    n_updates            | 140          |
|    policy_gradient_loss | -0.0176      |
|    value_loss           | 0.00727      |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 14.5        |
|    ep_rew_mean          | 0.949       |
| time/                   |             |
|    fps                  | 269         |
|    iterations           | 16          |
|    time_elapsed         | 121         |
|    total_timesteps      | 32768       |
| train/                  |             |
|    approx_kl            | 0.016197221 |
|    clip_fraction        | 0.167       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.878      |
|    explained_variance   | 0.515       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0121     |
|    n_updates            | 150         |
|    policy_gradient_loss | -0.0206     |
|    value_loss           | 0.00296     |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 13.5        |
|    ep_rew_mean          | 0.952       |
| time/                   |             |
|    fps                  | 270         |
|    iterations           | 17          |
|    time_elapsed         | 128         |
|    total_timesteps      | 34816       |
| train/                  |             |
|    approx_kl            | 0.012442203 |
|    clip_fraction        | 0.195       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.684      |
|    explained_variance   | 0.548       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0124     |
|    n_updates            | 160         |
|    policy_gradient_loss | -0.023      |
|    value_loss           | 0.00152     |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 13.1        |
|    ep_rew_mean          | 0.954       |
| time/                   |             |
|    fps                  | 269         |
|    iterations           | 18          |
|    time_elapsed         | 136         |
|    total_timesteps      | 36864       |
| train/                  |             |
|    approx_kl            | 0.011923253 |
|    clip_fraction        | 0.195       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.506      |
|    explained_variance   | 0.783       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0466     |
|    n_updates            | 170         |
|    policy_gradient_loss | -0.0331     |
|    value_loss           | 0.000393    |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.9        |
|    ep_rew_mean          | 0.958       |
| time/                   |             |
|    fps                  | 269         |
|    iterations           | 19          |
|    time_elapsed         | 144         |
|    total_timesteps      | 38912       |
| train/                  |             |
|    approx_kl            | 0.014433549 |
|    clip_fraction        | 0.151       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.34       |
|    explained_variance   | 0.629       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0527     |
|    n_updates            | 180         |
|    policy_gradient_loss | -0.0215     |
|    value_loss           | 0.000845    |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.9        |
|    ep_rew_mean          | 0.958       |
| time/                   |             |
|    fps                  | 269         |
|    iterations           | 20          |
|    time_elapsed         | 152         |
|    total_timesteps      | 40960       |
| train/                  |             |
|    approx_kl            | 0.016457103 |
|    clip_fraction        | 0.103       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.229      |
|    explained_variance   | 0.855       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0462     |
|    n_updates            | 190         |
|    policy_gradient_loss | -0.0269     |
|    value_loss           | 0.000128    |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.2        |
|    ep_rew_mean          | 0.961       |
| time/                   |             |
|    fps                  | 268         |
|    iterations           | 21          |
|    time_elapsed         | 160         |
|    total_timesteps      | 43008       |
| train/                  |             |
|    approx_kl            | 0.015660867 |
|    clip_fraction        | 0.0433      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.142      |
|    explained_variance   | 0.794       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0139     |
|    n_updates            | 200         |
|    policy_gradient_loss | -0.0102     |
|    value_loss           | 0.000289    |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.1        |
|    ep_rew_mean          | 0.961       |
| time/                   |             |
|    fps                  | 269         |
|    iterations           | 22          |
|    time_elapsed         | 167         |
|    total_timesteps      | 45056       |
| train/                  |             |
|    approx_kl            | 0.014396055 |
|    clip_fraction        | 0.0175      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.0714     |
|    explained_variance   | 0.959       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0104     |
|    n_updates            | 210         |
|    policy_gradient_loss | -0.0112     |
|    value_loss           | 1.87e-05    |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11          |
|    ep_rew_mean          | 0.961       |
| time/                   |             |
|    fps                  | 269         |
|    iterations           | 23          |
|    time_elapsed         | 175         |
|    total_timesteps      | 47104       |
| train/                  |             |
|    approx_kl            | 0.001644043 |
|    clip_fraction        | 0.00889     |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.0414     |
|    explained_variance   | 0.986       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0214     |
|    n_updates            | 220         |
|    policy_gradient_loss | -0.00705    |
|    value_loss           | 1.31e-05    |
-----------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 11.1          |
|    ep_rew_mean          | 0.961         |
| time/                   |               |
|    fps                  | 268           |
|    iterations           | 24            |
|    time_elapsed         | 182           |
|    total_timesteps      | 49152         |
| train/                  |               |
|    approx_kl            | 0.00034918665 |
|    clip_fraction        | 0.00225       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0288       |
|    explained_variance   | 0.994         |
|    learning_rate        | 0.0003        |
|    loss                 | -0.000832     |
|    n_updates            | 230           |
|    policy_gradient_loss | -0.00246      |
|    value_loss           | 2.59e-06      |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 11           |
|    ep_rew_mean          | 0.961        |
| time/                   |              |
|    fps                  | 269          |
|    iterations           | 25           |
|    time_elapsed         | 190          |
|    total_timesteps      | 51200        |
| train/                  |              |
|    approx_kl            | 0.0010026803 |
|    clip_fraction        | 0.00586      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.0188      |
|    explained_variance   | 0.974        |
|    learning_rate        | 0.0003       |
|    loss                 | -7.19e-06    |
|    n_updates            | 240          |
|    policy_gradient_loss | -0.00502     |
|    value_loss           | 3.1e-05      |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 11           |
|    ep_rew_mean          | 0.961        |
| time/                   |              |
|    fps                  | 268          |
|    iterations           | 26           |
|    time_elapsed         | 198          |
|    total_timesteps      | 53248        |
| train/                  |              |
|    approx_kl            | 0.0002259549 |
|    clip_fraction        | 0.00278      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.0135      |
|    explained_variance   | 0.998        |
|    learning_rate        | 0.0003       |
|    loss                 | -3.97e-06    |
|    n_updates            | 250          |
|    policy_gradient_loss | -0.00343     |
|    value_loss           | 2.16e-06     |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 11            |
|    ep_rew_mean          | 0.961         |
| time/                   |               |
|    fps                  | 268           |
|    iterations           | 27            |
|    time_elapsed         | 205           |
|    total_timesteps      | 55296         |
| train/                  |               |
|    approx_kl            | 3.0520838e-05 |
|    clip_fraction        | 0.000879      |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0108       |
|    explained_variance   | 0.999         |
|    learning_rate        | 0.0003        |
|    loss                 | -0.000144     |
|    n_updates            | 260           |
|    policy_gradient_loss | -0.00141      |
|    value_loss           | 5.06e-07      |
-------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 11           |
|    ep_rew_mean          | 0.961        |
| time/                   |              |
|    fps                  | 268          |
|    iterations           | 28           |
|    time_elapsed         | 213          |
|    total_timesteps      | 57344        |
| train/                  |              |
|    approx_kl            | 3.156127e-05 |
|    clip_fraction        | 0.000439     |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.00973     |
|    explained_variance   | 1            |
|    learning_rate        | 0.0003       |
|    loss                 | 5.18e-07     |
|    n_updates            | 270          |
|    policy_gradient_loss | -0.000643    |
|    value_loss           | 5.24e-07     |
------------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 28.1       |
|    ep_rew_mean          | 0.894      |
| time/                   |            |
|    fps                  | 268        |
|    iterations           | 29         |
|    time_elapsed         | 221        |
|    total_timesteps      | 59392      |
| train/                  |            |
|    approx_kl            | 0.10398772 |
|    clip_fraction        | 0.0267     |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.0226    |
|    explained_variance   | 0.989      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.000663   |
|    n_updates            | 280        |
|    policy_gradient_loss | -0.00438   |
|    value_loss           | 0.000265   |
----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 47         |
|    ep_rew_mean          | 0.825      |
| time/                   |            |
|    fps                  | 268        |
|    iterations           | 30         |
|    time_elapsed         | 228        |
|    total_timesteps      | 61440      |
| train/                  |            |
|    approx_kl            | 0.29233116 |
|    clip_fraction        | 0.197      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.173     |
|    explained_variance   | -0.839     |
|    learning_rate        | 0.0003     |
|    loss                 | -0.0379    |
|    n_updates            | 290        |
|    policy_gradient_loss | 0.0435     |
|    value_loss           | 0.00144    |
----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 64.7        |
|    ep_rew_mean          | 0.763       |
| time/                   |             |
|    fps                  | 268         |
|    iterations           | 31          |
|    time_elapsed         | 236         |
|    total_timesteps      | 63488       |
| train/                  |             |
|    approx_kl            | 0.013439978 |
|    clip_fraction        | 0.131       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.358      |
|    explained_variance   | 0.112       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.00458     |
|    n_updates            | 300         |
|    policy_gradient_loss | -0.00525    |
|    value_loss           | 0.00747     |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 47.5        |
|    ep_rew_mean          | 0.832       |
| time/                   |             |
|    fps                  | 268         |
|    iterations           | 32          |
|    time_elapsed         | 244         |
|    total_timesteps      | 65536       |
| train/                  |             |
|    approx_kl            | 0.015021564 |
|    clip_fraction        | 0.141       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.345      |
|    explained_variance   | 0.0876      |
|    learning_rate        | 0.0003      |
|    loss                 | -0.00122    |
|    n_updates            | 310         |
|    policy_gradient_loss | -0.00738    |
|    value_loss           | 0.0111      |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 19.3        |
|    ep_rew_mean          | 0.932       |
| time/                   |             |
|    fps                  | 268         |
|    iterations           | 33          |
|    time_elapsed         | 251         |
|    total_timesteps      | 67584       |
| train/                  |             |
|    approx_kl            | 0.027044665 |
|    clip_fraction        | 0.16        |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.299      |
|    explained_variance   | 0.222       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0331     |
|    n_updates            | 320         |
|    policy_gradient_loss | -0.0173     |
|    value_loss           | 0.0133      |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 14.8        |
|    ep_rew_mean          | 0.948       |
| time/                   |             |
|    fps                  | 268         |
|    iterations           | 34          |
|    time_elapsed         | 259         |
|    total_timesteps      | 69632       |
| train/                  |             |
|    approx_kl            | 0.013827939 |
|    clip_fraction        | 0.172       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.229      |
|    explained_variance   | 0.451       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0137     |
|    n_updates            | 330         |
|    policy_gradient_loss | -0.0202     |
|    value_loss           | 0.00739     |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12.6        |
|    ep_rew_mean          | 0.956       |
| time/                   |             |
|    fps                  | 268         |
|    iterations           | 35          |
|    time_elapsed         | 266         |
|    total_timesteps      | 71680       |
| train/                  |             |
|    approx_kl            | 0.061735045 |
|    clip_fraction        | 0.142       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.0657     |
|    explained_variance   | 0.376       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.047      |
|    n_updates            | 340         |
|    policy_gradient_loss | -0.0322     |
|    value_loss           | 0.00195     |
-----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.2       |
|    ep_rew_mean          | 0.96       |
| time/                   |            |
|    fps                  | 268        |
|    iterations           | 36         |
|    time_elapsed         | 274        |
|    total_timesteps      | 73728      |
| train/                  |            |
|    approx_kl            | 0.01864114 |
|    clip_fraction        | 0.0241     |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.0543    |
|    explained_variance   | 0.445      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.00768    |
|    n_updates            | 350        |
|    policy_gradient_loss | -0.00752   |
|    value_loss           | 0.00136    |
----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12          |
|    ep_rew_mean          | 0.958       |
| time/                   |             |
|    fps                  | 268         |
|    iterations           | 37          |
|    time_elapsed         | 282         |
|    total_timesteps      | 75776       |
| train/                  |             |
|    approx_kl            | 0.034024276 |
|    clip_fraction        | 0.0933      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.0756     |
|    explained_variance   | 0.602       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0134     |
|    n_updates            | 360         |
|    policy_gradient_loss | 0.0461      |
|    value_loss           | 0.000661    |
-----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 20         |
|    ep_rew_mean          | 0.93       |
| time/                   |            |
|    fps                  | 268        |
|    iterations           | 38         |
|    time_elapsed         | 289        |
|    total_timesteps      | 77824      |
| train/                  |            |
|    approx_kl            | 0.08158496 |
|    clip_fraction        | 0.198      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.165     |
|    explained_variance   | 0.898      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.0372    |
|    n_updates            | 370        |
|    policy_gradient_loss | 0.114      |
|    value_loss           | 0.000161   |
----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 12.8       |
|    ep_rew_mean          | 0.955      |
| time/                   |            |
|    fps                  | 268        |
|    iterations           | 39         |
|    time_elapsed         | 297        |
|    total_timesteps      | 79872      |
| train/                  |            |
|    approx_kl            | 0.06071516 |
|    clip_fraction        | 0.119      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.205     |
|    explained_variance   | 0.191      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.0348    |
|    n_updates            | 380        |
|    policy_gradient_loss | -0.0162    |
|    value_loss           | 0.00396    |
----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 20.6        |
|    ep_rew_mean          | 0.928       |
| time/                   |             |
|    fps                  | 267         |
|    iterations           | 41          |
|    time_elapsed         | 313         |
|    total_timesteps      | 83968       |
| train/                  |             |
|    approx_kl            | 0.084637254 |
|    clip_fraction        | 0.275       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.228      |
|    explained_variance   | 0.848       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0891     |
|    n_updates            | 400         |
|    policy_gradient_loss | -0.0608     |
|    value_loss           | 9.8e-05     |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 15.8        |
|    ep_rew_mean          | 0.945       |
| time/                   |             |
|    fps                  | 266         |
|    iterations           | 42          |
|    time_elapsed         | 322         |
|    total_timesteps      | 86016       |
| train/                  |             |
|    approx_kl            | 0.091567636 |
|    clip_fraction        | 0.192       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.188      |
|    explained_variance   | 0.0857      |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0566     |
|    n_updates            | 410         |
|    policy_gradient_loss | -0.0309     |
|    value_loss           | 0.0019      |
-----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 21.7       |
|    ep_rew_mean          | 0.924      |
| time/                   |            |
|    fps                  | 267        |
|    iterations           | 43         |
|    time_elapsed         | 329        |
|    total_timesteps      | 88064      |
| train/                  |            |
|    approx_kl            | 0.06881771 |
|    clip_fraction        | 0.284      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.537     |
|    explained_variance   | 0.367      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.255      |
|    n_updates            | 420        |
|    policy_gradient_loss | -0.0176    |
|    value_loss           | 0.00252    |
----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 13.6       |
|    ep_rew_mean          | 0.952      |
| time/                   |            |
|    fps                  | 267        |
|    iterations           | 44         |
|    time_elapsed         | 337        |
|    total_timesteps      | 90112      |
| train/                  |            |
|    approx_kl            | 0.10394019 |
|    clip_fraction        | 0.245      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.457     |
|    explained_variance   | 0.293      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.0259    |
|    n_updates            | 430        |
|    policy_gradient_loss | -0.0292    |
|    value_loss           | 0.00299    |
----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12.1        |
|    ep_rew_mean          | 0.957       |
| time/                   |             |
|    fps                  | 266         |
|    iterations           | 45          |
|    time_elapsed         | 345         |
|    total_timesteps      | 92160       |
| train/                  |             |
|    approx_kl            | 0.038059633 |
|    clip_fraction        | 0.0748      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.211      |
|    explained_variance   | 0.572       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0109     |
|    n_updates            | 440         |
|    policy_gradient_loss | -0.0176     |
|    value_loss           | 0.00122     |
-----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.3       |
|    ep_rew_mean          | 0.96       |
| time/                   |            |
|    fps                  | 267        |
|    iterations           | 46         |
|    time_elapsed         | 352        |
|    total_timesteps      | 94208      |
| train/                  |            |
|    approx_kl            | 0.05231437 |
|    clip_fraction        | 0.0536     |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.129     |
|    explained_variance   | 0.691      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.00616   |
|    n_updates            | 450        |
|    policy_gradient_loss | -0.0106    |
|    value_loss           | 0.000544   |
----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.1        |
|    ep_rew_mean          | 0.961       |
| time/                   |             |
|    fps                  | 267         |
|    iterations           | 47          |
|    time_elapsed         | 360         |
|    total_timesteps      | 96256       |
| train/                  |             |
|    approx_kl            | 0.044745695 |
|    clip_fraction        | 0.0729      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.0518     |
|    explained_variance   | 0.866       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0078      |
|    n_updates            | 460         |
|    policy_gradient_loss | -0.023      |
|    value_loss           | 0.000185    |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.2        |
|    ep_rew_mean          | 0.96        |
| time/                   |             |
|    fps                  | 266         |
|    iterations           | 48          |
|    time_elapsed         | 368         |
|    total_timesteps      | 98304       |
| train/                  |             |
|    approx_kl            | 0.004192096 |
|    clip_fraction        | 0.00601     |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.0295     |
|    explained_variance   | 0.804       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.00626    |
|    n_updates            | 470         |
|    policy_gradient_loss | -0.00537    |
|    value_loss           | 3.55e-05    |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 11           |
|    ep_rew_mean          | 0.961        |
| time/                   |              |
|    fps                  | 267          |
|    iterations           | 49           |
|    time_elapsed         | 375          |
|    total_timesteps      | 100352       |
| train/                  |              |
|    approx_kl            | 0.0017093418 |
|    clip_fraction        | 0.0061       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.0168      |
|    explained_variance   | 0.95         |
|    learning_rate        | 0.0003       |
|    loss                 | -1.78e-05    |
|    n_updates            | 480          |
|    policy_gradient_loss | -0.00545     |
|    value_loss           | 5.04e-05     |
------------------------------------------


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 11.2          |
|    ep_rew_mean          | 0.961         |
| time/                   |               |
|    fps                  | 266           |
|    iterations           | 50            |
|    time_elapsed         | 383           |
|    total_timesteps      | 102400        |
| train/                  |               |
|    approx_kl            | 9.0349204e-05 |
|    clip_fraction        | 0.000732      |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.0113       |
|    explained_variance   | 0.993         |
|    learning_rate        | 0.0003        |
|    loss                 | -9.62e-06     |
|    n_updates            | 490           |
|    policy_gradient_loss | -0.00118      |
|    value_loss           | 4.29e-06      |
-------------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11         |
|    ep_rew_mean          | 0.961      |
| time/                   |            |
|    fps                  | 267        |
|    iterations           | 51         |
|    time_elapsed         | 390        |
|    total_timesteps      | 104448     |
| train/                  |            |
|    approx_kl            | 0.00068752 |
|    clip_fraction        | 0.00488    |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.00938   |
|    explained_variance   | 0.956      |
|    learning_rate        | 0.0003     |
|    loss                 | 6.03e-05   |
|    n_updates            | 500        |
|    policy_gradient_loss | -0.00349   |
|    value_loss           | 4.69e-05   |
----------------------------------------


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 28.7      |
|    ep_rew_mean          | 0.893     |
| time/                   |           |
|    fps                  | 267       |
|    iterations           | 52        |
|    time_elapsed         | 398       |
|    total_timesteps      | 106496    |
| train/                  |           |
|    approx_kl            | 0.3421618 |
|    clip_fraction        | 0.14      |
|    clip_range           | 0.2       |
|    entropy_loss         | -0.107    |
|    explained_variance   | 0.997     |
|    learning_rate        | 0.0003    |
|    loss                 | -0.0641   |
|    n_updates            | 510       |
|    policy_gradient_loss | -0.0351   |
|    value_loss           | 0.000317  |
---------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 14.3       |
|    ep_rew_mean          | 0.95       |
| time/                   |            |
|    fps                  | 266        |
|    iterations           | 53         |
|    time_elapsed         | 406        |
|    total_timesteps      | 108544     |
| train/                  |            |
|    approx_kl            | 0.16307603 |
|    clip_fraction        | 0.273      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.527     |
|    explained_variance   | -0.324     |
|    learning_rate        | 0.0003     |
|    loss                 | -0.0363    |
|    n_updates            | 520        |
|    policy_gradient_loss | 0.07       |
|    value_loss           | 0.00197    |
----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 15         |
|    ep_rew_mean          | 0.947      |
| time/                   |            |
|    fps                  | 267        |
|    iterations           | 54         |
|    time_elapsed         | 413        |
|    total_timesteps      | 110592     |
| train/                  |            |
|    approx_kl            | 0.02253037 |
|    clip_fraction        | 0.26       |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.263     |
|    explained_variance   | 0.19       |
|    learning_rate        | 0.0003     |
|    loss                 | -0.0657    |
|    n_updates            | 530        |
|    policy_gradient_loss | -0.0254    |
|    value_loss           | 0.00346    |
----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12.2        |
|    ep_rew_mean          | 0.957       |
| time/                   |             |
|    fps                  | 266         |
|    iterations           | 55          |
|    time_elapsed         | 421         |
|    total_timesteps      | 112640      |
| train/                  |             |
|    approx_kl            | 0.073046885 |
|    clip_fraction        | 0.268       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.281      |
|    explained_variance   | 0.504       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.056      |
|    n_updates            | 540         |
|    policy_gradient_loss | -0.0416     |
|    value_loss           | 0.00124     |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.8        |
|    ep_rew_mean          | 0.959       |
| time/                   |             |
|    fps                  | 266         |
|    iterations           | 56          |
|    time_elapsed         | 429         |
|    total_timesteps      | 114688      |
| train/                  |             |
|    approx_kl            | 0.043069106 |
|    clip_fraction        | 0.114       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.132      |
|    explained_variance   | 0.745       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0527     |
|    n_updates            | 550         |
|    policy_gradient_loss | -0.0278     |
|    value_loss           | 0.000373    |
-----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 16.8       |
|    ep_rew_mean          | 0.941      |
| time/                   |            |
|    fps                  | 266        |
|    iterations           | 57         |
|    time_elapsed         | 437        |
|    total_timesteps      | 116736     |
| train/                  |            |
|    approx_kl            | 0.08601675 |
|    clip_fraction        | 0.115      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.113     |
|    explained_variance   | 0.729      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.0192    |
|    n_updates            | 560        |
|    policy_gradient_loss | 0.0833     |
|    value_loss           | 0.000405   |
----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12.4        |
|    ep_rew_mean          | 0.956       |
| time/                   |             |
|    fps                  | 266         |
|    iterations           | 58          |
|    time_elapsed         | 445         |
|    total_timesteps      | 118784      |
| train/                  |             |
|    approx_kl            | 0.024024922 |
|    clip_fraction        | 0.155       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.329      |
|    explained_variance   | 0.267       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0358     |
|    n_updates            | 570         |
|    policy_gradient_loss | -0.0134     |
|    value_loss           | 0.00478     |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11          |
|    ep_rew_mean          | 0.961       |
| time/                   |             |
|    fps                  | 266         |
|    iterations           | 60          |
|    time_elapsed         | 460         |
|    total_timesteps      | 122880      |
| train/                  |             |
|    approx_kl            | 0.029495535 |
|    clip_fraction        | 0.011       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.0113     |
|    explained_variance   | 0.955       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.00691    |
|    n_updates            | 590         |
|    policy_gradient_loss | -0.0102     |
|    value_loss           | 3.7e-05     |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 20.9        |
|    ep_rew_mean          | 0.927       |
| time/                   |             |
|    fps                  | 266         |
|    iterations           | 61          |
|    time_elapsed         | 468         |
|    total_timesteps      | 124928      |
| train/                  |             |
|    approx_kl            | 0.023817312 |
|    clip_fraction        | 0.186       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.159      |
|    explained_variance   | 0.994       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.092      |
|    n_updates            | 600         |
|    policy_gradient_loss | -0.0396     |
|    value_loss           | 4.19e-05    |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 18.3        |
|    ep_rew_mean          | 0.936       |
| time/                   |             |
|    fps                  | 266         |
|    iterations           | 62          |
|    time_elapsed         | 475         |
|    total_timesteps      | 126976      |
| train/                  |             |
|    approx_kl            | 0.023860686 |
|    clip_fraction        | 0.202       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.309      |
|    explained_variance   | 0.0504      |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0346     |
|    n_updates            | 610         |
|    policy_gradient_loss | -0.0247     |
|    value_loss           | 0.00286     |
-----------------------------------------


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 12.8      |
|    ep_rew_mean          | 0.955     |
| time/                   |           |
|    fps                  | 266       |
|    iterations           | 63        |
|    time_elapsed         | 483       |
|    total_timesteps      | 129024    |
| train/                  |           |
|    approx_kl            | 0.1346942 |
|    clip_fraction        | 0.199     |
|    clip_range           | 0.2       |
|    entropy_loss         | -0.195    |
|    explained_variance   | 0.31      |
|    learning_rate        | 0.0003    |
|    loss                 | -0.0332   |
|    n_updates            | 620       |
|    policy_gradient_loss | -0.0392   |
|    value_loss           | 0.00204   |
---------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.1       |
|    ep_rew_mean          | 0.961      |
| time/                   |            |
|    fps                  | 266        |
|    iterations           | 64         |
|    time_elapsed         | 491        |
|    total_timesteps      | 131072     |
| train/                  |            |
|    approx_kl            | 0.18778455 |
|    clip_fraction        | 0.0614     |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.0706    |
|    explained_variance   | 0.659      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.0193    |
|    n_updates            | 630        |
|    policy_gradient_loss | -0.0203    |
|    value_loss           | 0.000671   |
----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 23.8       |
|    ep_rew_mean          | 0.916      |
| time/                   |            |
|    fps                  | 266        |
|    iterations           | 65         |
|    time_elapsed         | 498        |
|    total_timesteps      | 133120     |
| train/                  |            |
|    approx_kl            | 0.11892122 |
|    clip_fraction        | 0.187      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.187     |
|    explained_variance   | 0.908      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.0772    |
|    n_updates            | 640        |
|    policy_gradient_loss | 0.221      |
|    value_loss           | 0.000102   |
----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 21.6        |
|    ep_rew_mean          | 0.924       |
| time/                   |             |
|    fps                  | 266         |
|    iterations           | 66          |
|    time_elapsed         | 506         |
|    total_timesteps      | 135168      |
| train/                  |             |
|    approx_kl            | 0.014702764 |
|    clip_fraction        | 0.25        |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.453      |
|    explained_variance   | 0.0892      |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0273     |
|    n_updates            | 650         |
|    policy_gradient_loss | -0.0294     |
|    value_loss           | 0.00217     |
-----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 18.3       |
|    ep_rew_mean          | 0.936      |
| time/                   |            |
|    fps                  | 266        |
|    iterations           | 67         |
|    time_elapsed         | 514        |
|    total_timesteps      | 137216     |
| train/                  |            |
|    approx_kl            | 0.04456736 |
|    clip_fraction        | 0.372      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.43      |
|    explained_variance   | 0.289      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.0667    |
|    n_updates            | 660        |
|    policy_gradient_loss | -0.0482    |
|    value_loss           | 0.0022     |
----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 14.8        |
|    ep_rew_mean          | 0.948       |
| time/                   |             |
|    fps                  | 266         |
|    iterations           | 68          |
|    time_elapsed         | 521         |
|    total_timesteps      | 139264      |
| train/                  |             |
|    approx_kl            | 0.026721153 |
|    clip_fraction        | 0.27        |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.358      |
|    explained_variance   | 0.454       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.059      |
|    n_updates            | 670         |
|    policy_gradient_loss | -0.0401     |
|    value_loss           | 0.00222     |
-----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.2       |
|    ep_rew_mean          | 0.96       |
| time/                   |            |
|    fps                  | 266        |
|    iterations           | 69         |
|    time_elapsed         | 529        |
|    total_timesteps      | 141312     |
| train/                  |            |
|    approx_kl            | 0.22456448 |
|    clip_fraction        | 0.157      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.12      |
|    explained_variance   | 0.635      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.035     |
|    n_updates            | 680        |
|    policy_gradient_loss | -0.0316    |
|    value_loss           | 0.000985   |
----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 18.4        |
|    ep_rew_mean          | 0.935       |
| time/                   |             |
|    fps                  | 266         |
|    iterations           | 70          |
|    time_elapsed         | 537         |
|    total_timesteps      | 143360      |
| train/                  |             |
|    approx_kl            | 0.013878487 |
|    clip_fraction        | 0.197       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.18       |
|    explained_variance   | 0.846       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0487     |
|    n_updates            | 690         |
|    policy_gradient_loss | -0.0391     |
|    value_loss           | 0.000131    |
-----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 12         |
|    ep_rew_mean          | 0.958      |
| time/                   |            |
|    fps                  | 266        |
|    iterations           | 71         |
|    time_elapsed         | 545        |
|    total_timesteps      | 145408     |
| train/                  |            |
|    approx_kl            | 0.05368741 |
|    clip_fraction        | 0.225      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.361     |
|    explained_variance   | 0.121      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.00959    |
|    n_updates            | 700        |
|    policy_gradient_loss | -0.0206    |
|    value_loss           | 0.0027     |
----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11         |
|    ep_rew_mean          | 0.961      |
| time/                   |            |
|    fps                  | 266        |
|    iterations           | 72         |
|    time_elapsed         | 552        |
|    total_timesteps      | 147456     |
| train/                  |            |
|    approx_kl            | 0.16038588 |
|    clip_fraction        | 0.0517     |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.0518    |
|    explained_variance   | 0.641      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.00664   |
|    n_updates            | 710        |
|    policy_gradient_loss | -0.0207    |
|    value_loss           | 0.000722   |
----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 15.3       |
|    ep_rew_mean          | 0.946      |
| time/                   |            |
|    fps                  | 266        |
|    iterations           | 73         |
|    time_elapsed         | 560        |
|    total_timesteps      | 149504     |
| train/                  |            |
|    approx_kl            | 0.02232788 |
|    clip_fraction        | 0.338      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.262     |
|    explained_variance   | 0.92       |
|    learning_rate        | 0.0003     |
|    loss                 | -0.0793    |
|    n_updates            | 720        |
|    policy_gradient_loss | -0.0631    |
|    value_loss           | 2.46e-05   |
----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 12.9       |
|    ep_rew_mean          | 0.955      |
| time/                   |            |
|    fps                  | 266        |
|    iterations           | 74         |
|    time_elapsed         | 568        |
|    total_timesteps      | 151552     |
| train/                  |            |
|    approx_kl            | 0.07107993 |
|    clip_fraction        | 0.357      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.328     |
|    explained_variance   | 0.189      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.061     |
|    n_updates            | 730        |
|    policy_gradient_loss | -0.0348    |
|    value_loss           | 0.000609   |
----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11         |
|    ep_rew_mean          | 0.961      |
| time/                   |            |
|    fps                  | 266        |
|    iterations           | 75         |
|    time_elapsed         | 575        |
|    total_timesteps      | 153600     |
| train/                  |            |
|    approx_kl            | 0.27386162 |
|    clip_fraction        | 0.18       |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.116     |
|    explained_variance   | 0.544      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.047     |
|    n_updates            | 740        |
|    policy_gradient_loss | -0.0297    |
|    value_loss           | 0.000837   |
----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 15.6        |
|    ep_rew_mean          | 0.945       |
| time/                   |             |
|    fps                  | 266         |
|    iterations           | 76          |
|    time_elapsed         | 583         |
|    total_timesteps      | 155648      |
| train/                  |             |
|    approx_kl            | 0.041087754 |
|    clip_fraction        | 0.176       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.121      |
|    explained_variance   | 0.979       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0669     |
|    n_updates            | 750         |
|    policy_gradient_loss | -0.0427     |
|    value_loss           | 3.47e-05    |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 13.5        |
|    ep_rew_mean          | 0.953       |
| time/                   |             |
|    fps                  | 266         |
|    iterations           | 77          |
|    time_elapsed         | 591         |
|    total_timesteps      | 157696      |
| train/                  |             |
|    approx_kl            | 0.012227878 |
|    clip_fraction        | 0.226       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.333      |
|    explained_variance   | 0.506       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0404     |
|    n_updates            | 760         |
|    policy_gradient_loss | -0.0394     |
|    value_loss           | 0.00128     |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12.1        |
|    ep_rew_mean          | 0.957       |
| time/                   |             |
|    fps                  | 266         |
|    iterations           | 78          |
|    time_elapsed         | 598         |
|    total_timesteps      | 159744      |
| train/                  |             |
|    approx_kl            | 0.018685652 |
|    clip_fraction        | 0.2         |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.183      |
|    explained_variance   | 0.658       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.034      |
|    n_updates            | 770         |
|    policy_gradient_loss | -0.0347     |
|    value_loss           | 0.000741    |
-----------------------------------------


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 11.2      |
|    ep_rew_mean          | 0.961     |
| time/                   |           |
|    fps                  | 266       |
|    iterations           | 79        |
|    time_elapsed         | 606       |
|    total_timesteps      | 161792    |
| train/                  |           |
|    approx_kl            | 0.4318135 |
|    clip_fraction        | 0.185     |
|    clip_range           | 0.2       |
|    entropy_loss         | -0.0678   |
|    explained_variance   | 0.801     |
|    learning_rate        | 0.0003    |
|    loss                 | -0.0317   |
|    n_updates            | 780       |
|    policy_gradient_loss | -0.0359   |
|    value_loss           | 0.000363  |
---------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 11.7         |
|    ep_rew_mean          | 0.959        |
| time/                   |              |
|    fps                  | 266          |
|    iterations           | 80           |
|    time_elapsed         | 614          |
|    total_timesteps      | 163840       |
| train/                  |              |
|    approx_kl            | 0.0035419362 |
|    clip_fraction        | 0.048        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.0769      |
|    explained_variance   | 0.962        |
|    learning_rate        | 0.0003       |
|    loss                 | -0.0152      |
|    n_updates            | 790          |
|    policy_gradient_loss | -0.0108      |
|    value_loss           | 3.55e-05     |
------------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.6       |
|    ep_rew_mean          | 0.959      |
| time/                   |            |
|    fps                  | 266        |
|    iterations           | 81         |
|    time_elapsed         | 622        |
|    total_timesteps      | 165888     |
| train/                  |            |
|    approx_kl            | 0.05430057 |
|    clip_fraction        | 0.0778     |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.047     |
|    explained_variance   | 0.844      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.0243    |
|    n_updates            | 800        |
|    policy_gradient_loss | -0.0134    |
|    value_loss           | 0.000146   |
----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11         |
|    ep_rew_mean          | 0.961      |
| time/                   |            |
|    fps                  | 266        |
|    iterations           | 82         |
|    time_elapsed         | 630        |
|    total_timesteps      | 167936     |
| train/                  |            |
|    approx_kl            | 0.22567827 |
|    clip_fraction        | 0.108      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.0106    |
|    explained_variance   | 0.871      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.0401    |
|    n_updates            | 810        |
|    policy_gradient_loss | -0.0195    |
|    value_loss           | 0.000136   |
----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 11           |
|    ep_rew_mean          | 0.961        |
| time/                   |              |
|    fps                  | 266          |
|    iterations           | 83           |
|    time_elapsed         | 637          |
|    total_timesteps      | 169984       |
| train/                  |              |
|    approx_kl            | 6.237242e-07 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.00195     |
|    explained_variance   | 0.997        |
|    learning_rate        | 0.0003       |
|    loss                 | -0.000994    |
|    n_updates            | 820          |
|    policy_gradient_loss | -0.000628    |
|    value_loss           | 9.77e-07     |
------------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 30         |
|    ep_rew_mean          | 0.89       |
| time/                   |            |
|    fps                  | 266        |
|    iterations           | 84         |
|    time_elapsed         | 645        |
|    total_timesteps      | 172032     |
| train/                  |            |
|    approx_kl            | 0.44329295 |
|    clip_fraction        | 0.18       |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.162     |
|    explained_variance   | 0.999      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.0823    |
|    n_updates            | 830        |
|    policy_gradient_loss | -0.0352    |
|    value_loss           | 3.22e-05   |
----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 42.1        |
|    ep_rew_mean          | 0.848       |
| time/                   |             |
|    fps                  | 266         |
|    iterations           | 85          |
|    time_elapsed         | 653         |
|    total_timesteps      | 174080      |
| train/                  |             |
|    approx_kl            | 0.060374554 |
|    clip_fraction        | 0.166       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.535      |
|    explained_variance   | -0.111      |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0312     |
|    n_updates            | 840         |
|    policy_gradient_loss | 0.154       |
|    value_loss           | 0.00379     |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 22.6        |
|    ep_rew_mean          | 0.921       |
| time/                   |             |
|    fps                  | 266         |
|    iterations           | 86          |
|    time_elapsed         | 660         |
|    total_timesteps      | 176128      |
| train/                  |             |
|    approx_kl            | 0.035096485 |
|    clip_fraction        | 0.252       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.584      |
|    explained_variance   | -0.843      |
|    learning_rate        | 0.0003      |
|    loss                 | -0.019      |
|    n_updates            | 850         |
|    policy_gradient_loss | -0.0274     |
|    value_loss           | 0.00592     |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 19.7        |
|    ep_rew_mean          | 0.931       |
| time/                   |             |
|    fps                  | 266         |
|    iterations           | 87          |
|    time_elapsed         | 668         |
|    total_timesteps      | 178176      |
| train/                  |             |
|    approx_kl            | 0.031941548 |
|    clip_fraction        | 0.266       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.574      |
|    explained_variance   | 0.412       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0294     |
|    n_updates            | 860         |
|    policy_gradient_loss | -0.0307     |
|    value_loss           | 0.00303     |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 17.3        |
|    ep_rew_mean          | 0.939       |
| time/                   |             |
|    fps                  | 266         |
|    iterations           | 88          |
|    time_elapsed         | 675         |
|    total_timesteps      | 180224      |
| train/                  |             |
|    approx_kl            | 0.039458975 |
|    clip_fraction        | 0.285       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.537      |
|    explained_variance   | 0.508       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0171     |
|    n_updates            | 870         |
|    policy_gradient_loss | -0.0373     |
|    value_loss           | 0.00217     |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 13.7        |
|    ep_rew_mean          | 0.952       |
| time/                   |             |
|    fps                  | 266         |
|    iterations           | 89          |
|    time_elapsed         | 683         |
|    total_timesteps      | 182272      |
| train/                  |             |
|    approx_kl            | 0.037678182 |
|    clip_fraction        | 0.286       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.435      |
|    explained_variance   | 0.581       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0427     |
|    n_updates            | 880         |
|    policy_gradient_loss | -0.0341     |
|    value_loss           | 0.00164     |
-----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 12.6       |
|    ep_rew_mean          | 0.956      |
| time/                   |            |
|    fps                  | 266        |
|    iterations           | 90         |
|    time_elapsed         | 691        |
|    total_timesteps      | 184320     |
| train/                  |            |
|    approx_kl            | 0.19900385 |
|    clip_fraction        | 0.181      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.202     |
|    explained_variance   | 0.625      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.044     |
|    n_updates            | 890        |
|    policy_gradient_loss | -0.0373    |
|    value_loss           | 0.000688   |
----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 13.4        |
|    ep_rew_mean          | 0.953       |
| time/                   |             |
|    fps                  | 266         |
|    iterations           | 91          |
|    time_elapsed         | 698         |
|    total_timesteps      | 186368      |
| train/                  |             |
|    approx_kl            | 0.010247037 |
|    clip_fraction        | 0.0846      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.132      |
|    explained_variance   | 0.643       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0454      |
|    n_updates            | 900         |
|    policy_gradient_loss | -0.0146     |
|    value_loss           | 0.000522    |
-----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11         |
|    ep_rew_mean          | 0.961      |
| time/                   |            |
|    fps                  | 266        |
|    iterations           | 92         |
|    time_elapsed         | 706        |
|    total_timesteps      | 188416     |
| train/                  |            |
|    approx_kl            | 0.07358906 |
|    clip_fraction        | 0.0604     |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.0944    |
|    explained_variance   | 0.372      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.00486   |
|    n_updates            | 910        |
|    policy_gradient_loss | -0.0133    |
|    value_loss           | 0.00227    |
----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12.2        |
|    ep_rew_mean          | 0.957       |
| time/                   |             |
|    fps                  | 266         |
|    iterations           | 93          |
|    time_elapsed         | 714         |
|    total_timesteps      | 190464      |
| train/                  |             |
|    approx_kl            | 0.023180477 |
|    clip_fraction        | 0.0888      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.122      |
|    explained_variance   | 0.972       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0324     |
|    n_updates            | 920         |
|    policy_gradient_loss | -0.0183     |
|    value_loss           | 2.1e-05     |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.2        |
|    ep_rew_mean          | 0.961       |
| time/                   |             |
|    fps                  | 266         |
|    iterations           | 94          |
|    time_elapsed         | 721         |
|    total_timesteps      | 192512      |
| train/                  |             |
|    approx_kl            | 0.055476874 |
|    clip_fraction        | 0.128       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.121      |
|    explained_variance   | 0.806       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.015      |
|    n_updates            | 930         |
|    policy_gradient_loss | -0.0173     |
|    value_loss           | 0.000165    |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.5        |
|    ep_rew_mean          | 0.96        |
| time/                   |             |
|    fps                  | 266         |
|    iterations           | 95          |
|    time_elapsed         | 729         |
|    total_timesteps      | 194560      |
| train/                  |             |
|    approx_kl            | 0.032975264 |
|    clip_fraction        | 0.0102      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.0127     |
|    explained_variance   | 0.958       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0219     |
|    n_updates            | 940         |
|    policy_gradient_loss | -0.0126     |
|    value_loss           | 2.46e-05    |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12.8        |
|    ep_rew_mean          | 0.955       |
| time/                   |             |
|    fps                  | 266         |
|    iterations           | 96          |
|    time_elapsed         | 737         |
|    total_timesteps      | 196608      |
| train/                  |             |
|    approx_kl            | 0.012480363 |
|    clip_fraction        | 0.0397      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.0947     |
|    explained_variance   | 0.871       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0257     |
|    n_updates            | 950         |
|    policy_gradient_loss | -0.0117     |
|    value_loss           | 0.00012     |
-----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 11.2       |
|    ep_rew_mean          | 0.961      |
| time/                   |            |
|    fps                  | 266        |
|    iterations           | 97         |
|    time_elapsed         | 745        |
|    total_timesteps      | 198656     |
| train/                  |            |
|    approx_kl            | 0.29927137 |
|    clip_fraction        | 0.303      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.0825    |
|    explained_variance   | 0.744      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.0462    |
|    n_updates            | 960        |
|    policy_gradient_loss | -0.0461    |
|    value_loss           | 0.000271   |
----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 13.3       |
|    ep_rew_mean          | 0.953      |
| time/                   |            |
|    fps                  | 266        |
|    iterations           | 98         |
|    time_elapsed         | 753        |
|    total_timesteps      | 200704     |
| train/                  |            |
|    approx_kl            | 0.11365476 |
|    clip_fraction        | 0.0964     |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.065     |
|    explained_variance   | 0.964      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.0354    |
|    n_updates            | 970        |
|    policy_gradient_loss | -0.0221    |
|    value_loss           | 6.11e-05   |
----------------------------------------


Mean reward: 0.0 +/- 0.0


In [11]:
def record_video(env, agent):
	frames = []
	state, _ = env.reset()
	# Record the first frame
	frames.append(env.render())

	done = False
	while not done:
		action, _ = agent.predict(state, deterministic=True)
		state, _, terminated, truncated, _ = env.step(action)
		frames.append(env.render())
		done = terminated or truncated

	# Save the recorded frames as a video
	imageio.mimsave(CURRENT_DIRECTORY / "demo.mp4", frames, fps=1)

record_video(env, model)

In [12]:
from IPython.display import Video

Video("./demo.mp4", embed=True)