In [1]:
# !apt-get install -y \
#     libgl1-mesa-dev \
#     libgl1-mesa-glx \
#     libglew-dev \
#     libosmesa6-dev \
#     software-properties-common

# !apt-get install -y patchelf

# !apt-get update --fix-missing
# !pip install mujoco
# !pip install  gymnasium
# !apt-get update --fix-missing
# !pip uninstall stable-baselines3

In [2]:
import os
import time
import wandb
import random
from distutils.util import strtobool

import gymnasium as gym
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.distributions import Normal
from torch.utils.tensorboard import SummaryWriter

In [3]:
def make_env(env_id, idx, capture_video, run_name, gamma):
    def thunk():
        if capture_video:
            env = gym.make(env_id, render_mode="rgb_array")
        else:
            env = gym.make(env_id)
        env = gym.wrappers.FlattenObservation(env)
        env = gym.wrappers.RecordEpisodeStatistics(env)
        if capture_video and idx == 0:
            env = gym.wrappers.RecordVideo(env, f"videos/{run_name}")
        env = gym.wrappers.ClipAction(env)
        env = gym.wrappers.NormalizeObservation(env)
        env = gym.wrappers.TransformObservation(env, lambda obs: np.clip(obs, -10, 10))
        env = gym.wrappers.NormalizeReward(env, gamma=gamma)
        env = gym.wrappers.TransformReward(env, lambda reward: np.clip(reward, -10, 10))
        return env

    return thunk

In [4]:
def layer_init(layer, std=np.sqrt(2), bias_const=0.0):
    torch.nn.init.orthogonal_(layer.weight, std)
    torch.nn.init.constant_(layer.bias, bias_const)
    return layer

In [5]:
class Agent(nn.Module):

    def __init__(self, envs):
        super().__init__()
        self.critic = nn.Sequential(
            layer_init(nn.Linear(np.array(envs.single_observation_space.shape).prod(),64)),
            nn.Tanh(),
            layer_init(nn.Linear(64,64)),
            nn.Tanh(),
            layer_init(nn.Linear(64,1), std=1.0),
        )

        self.actor_mean = nn.Sequential(
            layer_init(nn.Linear(np.array(envs.single_observation_space.shape).prod(),64)),
            nn.Tanh(),
            layer_init(nn.Linear(64,64)),
            nn.Tanh(),
            layer_init(nn.Linear(64,np.prod(envs.single_action_space.shape)), std=0.01),
        )

        self.actor_logstd = nn.Parameter(torch.zeros(1,np.prod(envs.single_action_space.shape)))

    def get_value(self, x):
        return self.critic(x)

    def get_action_and_value(self, x, action=None):
        action_value = self.critic(x)
        action_mean = self.actor_mean(x)
        action_logstd = self.actor_logstd.expand_as(action_mean)
        action_std = torch.exp(action_logstd)
        probs = Normal(action_mean,action_std)
        if action is None:
            action = probs.sample()
        return action, probs.log_prob(action).sum(1), probs.entropy().sum(1),action_value

In [6]:
def train(
    seed,
    env_id,
    total_timesteps,
    learning_rate,
    num_envs,
    num_steps,
    gamma,
    gae_lambda,
    num_minibatches,
    update_epochs,
    clip_coef,
    entropy_coef,
    vf_coef,
    max_grad_norm,
    target_kl):

    batch_size = int(num_envs * num_steps)
    minibatch_size = batch_size // num_minibatches

    run_name = f"{env_id}__{seed}__{int(time.time())}"
    wandb.init(
        project="ppo-mujoco-benchmark",
        config={
            "env":env_id,
            "timesteps":total_timesteps,
            "lr":learning_rate,
            "num_steps":num_steps,
            "gamma":gamma,
            "gae_lambda":gae_lambda,
            "num_minibatches":num_minibatches,
            "update_epochs":update_epochs,
            "clip_coef":clip_coef,
            "entropy_coef":entropy_coef,
            "vf_coef":vf_coef,
            "max_grad_norm":max_grad_norm
        },
        sync_tensorboard=True,
        monitor_gym=True,
        name=run_name
    )
    writer = SummaryWriter(f"runs/{run_name}")

    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    envs = gym.vector.SyncVectorEnv(
        [make_env(env_id, i, True, run_name, gamma) for i in range(num_envs)]
    )

    agent = Agent(envs).to(device)
    optimizer = optim.Adam(agent.parameters(),lr=learning_rate, eps=1e-5)

    obs = torch.zeros((num_steps, num_envs) + envs.single_observation_space.shape).to(device)
    actions = torch.zeros((num_steps,num_envs) + envs.single_action_space.shape).to(device)
    logprobs = torch.zeros((num_steps, num_envs)).to(device)
    rewards = torch.zeros((num_steps, num_envs)).to(device)
    dones = torch.zeros((num_steps, num_envs)).to(device)
    values = torch.zeros((num_steps, num_envs)).to(device)

    global_step = 0
    start_time = time.time()
    next_obs, _ = envs.reset(seed=seed)
    next_obs = torch.Tensor(next_obs).to(device)
    next_done = torch.zeros(num_envs).to(device)
    num_updates = total_timesteps // batch_size

    for update in range(1, num_updates + 1):

        frac = 1.0 - (update - 1.0) / num_updates
        lrnow = frac * learning_rate
        optimizer.param_groups[0]["lr"] = lrnow

        for step in range(num_steps):

            global_step += 1 * num_envs
            obs[step] = next_obs
            dones[step] = next_done

            with torch.no_grad():
                action, logprob, _, value = agent.get_action_and_value(next_obs)
                values[step] = value.flatten()

            actions[step] = action
            logprobs[step] = logprob

            next_obs, reward, terminated, truncated, infos = envs.step(action.cpu().numpy())
            done = np.logical_or(terminated, truncated)
            rewards[step] = torch.tensor(reward).to(device).view(-1)
            next_obs, next_done = torch.Tensor(next_obs).to(device), torch.Tensor(next_done).to(device)

            if "final_info" not in infos:
                continue

            for info in infos["final_info"]:
                if info is None:
                    continue

                print(f"global_step={global_step}, episodic_return={info['episode']['r']}")
                writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step)
                writer.add_scalar("charts/episodic_lenght", info["episode"]["l"], global_step)

        with torch.no_grad():
            next_value = agent.get_value(next_obs).reshape(1, -1)
            advantages = torch.zeros_like(rewards).to(device)
            lastgaelam = 0
            for t in reversed(range(num_steps)):
                if t == num_steps - 1:
                    nextnonterminal = 1.0 - next_done
                    nextvalues = next_value
                else:
                    nextnonterminal = 1.0 - dones[t + 1]
                    nextvalues = values [t + 1]

                delta = rewards[t] + gamma * nextvalues * nextnonterminal - values[t]
                advantages[t] = delta + gamma * gae_lambda * nextnonterminal  * lastgaelam
                lastgaelam = advantages[t]
            returns = advantages + values


        b_obs = obs.reshape((-1, ) + envs.single_observation_space.shape )
        b_logprobs = logprobs.reshape(-1)
        b_actions = actions.reshape((-1, ) + envs.single_action_space.shape)
        b_advantages = advantages.reshape(-1)
        b_returns = returns.reshape(-1)
        b_values = values.reshape(-1)

        b_inds = np.arange(batch_size)
        clipfracs = []

        for epoch in range(update_epochs):
            np.random.shuffle(b_inds)
            for start in range(0, batch_size, minibatch_size):
                end = start + minibatch_size
                mb_inds = b_inds[start:end]

                _, newlogprob, entropy, newvalue = agent.get_action_and_value(b_obs[mb_inds],b_actions[mb_inds])
                logratio = newlogprob - b_logprobs[mb_inds]
                ratio = logratio.exp()

                with torch.no_grad():
                    old_approx_kl = (-logratio).mean()
                    approx_kl = ((ratio -1) - logratio).mean()

                mb_advantages = b_advantages[mb_inds]
                mb_advantages = (mb_advantages - mb_advantages.mean()) / (mb_advantages.std() + 1e-8)

                pg_loss1 = -mb_advantages * ratio
                pg_loss2 = -mb_advantages * torch.clamp(ratio, 1 - clip_coef, 1 + clip_coef )
                pg_loss = torch.max(pg_loss1, pg_loss2).mean()

                newvalue = newvalue.view(-1)

                v_loss_unclipped = (newvalue - b_returns[mb_inds]) ** 2
                v_clipped = b_values[mb_inds] + torch.clamp(
                    newvalue - b_values[mb_inds],
                    -clip_coef,
                    clip_coef,
                )
                v_loss_clipped = (v_clipped - b_returns[mb_inds]) ** 2
                v_loss_max = torch.max(v_loss_unclipped, v_loss_clipped)
                v_loss = 0.5 * v_loss_max.mean()

                entropy_loss = entropy.mean()
                loss = pg_loss - entropy_coef * entropy_loss + v_loss * vf_coef


                optimizer.zero_grad()
                loss.backward()
                nn.utils.clip_grad_norm_(agent.parameters(), max_grad_norm)
                optimizer.step()

            if target_kl is not None:
                if approx_kl > target_kl:
                    break

            y_pred, y_true = b_values.cpu().numpy(), b_returns.cpu().numpy()
            var_y = np.var(y_true)
            explained_var = np.nan if var_y == 0 else 1 - np.var(y_true - y_pred) / var_y

            writer.add_scalar("charts/learning_rate", optimizer.param_groups[0]["lr"], global_step)
            writer.add_scalar("losses/value_loss", v_loss.item(), global_step)
            writer.add_scalar("losses/policy_loss", pg_loss.item(), global_step)
            writer.add_scalar("losses/entropy", entropy_loss.item(), global_step)
            writer.add_scalar("losses/old_approx_kl", old_approx_kl.item(), global_step)
            writer.add_scalar("losses/approx_kl",approx_kl.item(), global_step)
            writer.add_scalar("losses/explained_variance", explained_var, global_step)
            writer.add_scalar("charts/SPS", int(global_step/ (time.time() - start_time)), global_step)

    envs.close()
    writer.close()


In [15]:
seed = 2
total_timesteps = 1000000
learning_rate = 0.00003
num_envs = 1
num_steps = 2048
gamma = 0.99
gae_lambda = 0.95
num_minibatches = 64
update_epochs = 10
clip_coef = 0.5
ent_coef = 0.0
vf_coef = 0.5
max_grad_norm = 0.5
target_kl = None
env = {"hopper":"Hopper-v2","humanoid":"Humanoid-v2","halfCheetah":"HalfCheetah-v2","ant":"Ant-v2"}

In [None]:
train(seed,
      env["hopper"],
      total_timesteps,
      learning_rate,
      num_envs,
      num_steps,
      gamma,
      gae_lambda,
      num_minibatches,
      update_epochs,
      clip_coef,
      ent_coef,
      vf_coef,
      max_grad_norm,
      target_kl)

VBox(children=(Label(value='6.987 MB of 6.987 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
charts/SPS,▁▇████████████▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
charts/episodic_lenght,▂▁▁▁▁▂▁▁▂▁▃▃▃▄▃▃▃▅▂▄▃▄▅▅▆▅▅▅▅▆▅▆▇▅▄▆█▆█▇
charts/episodic_return,▁▁▁▁▁▂▁▁▁▁▂▂▃▃▃▃▃▄▂▄▃▄▅▄▅▅▄▅▅▆▅▆▆▅▄▅█▆▇▇
charts/learning_rate,███▇▇▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▁▁▁
global_step,▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇██
losses/approx_kl,▂▃▃▃▄▄▅█▅▅▄▃▄▄▃▅▃▄▅▄▃▃▅▃▆▅▆▄▄▃▃▂▃▃▃▂▂▁▁▁
losses/entropy,███▇▇▆▆▆▅▅▅▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁
losses/explained_variance,▃▃▁▄▅▇█▇▇███████████████████████████████
losses/old_approx_kl,▄▆▁▅▅█▃▅▇▇▃▅▆▇▆▁▅▃▄▆▇▁▁▅▆▇▆▅▁▄▅▃▄▅▄▅▄▄▄▃
losses/policy_loss,▇▅▄▃▆▃▅▃▄▆▁▄▄▅▄▅▄▄▂▄▅▅▅▂▂▃▄▆▆▄▂▇▄▅▇▇█▇▇▇

0,1
charts/SPS,150.0
charts/episodic_lenght,90.0
charts/episodic_return,619.55664
charts/learning_rate,0.0
global_step,999424.0
losses/approx_kl,0.0
losses/entropy,21.40192
losses/explained_variance,0.85805
losses/old_approx_kl,-0.0002
losses/policy_loss,0.0001


  logger.deprecation(
  logger.deprecation(


Moviepy - Building video /notebooks/rl-algos/videos/Hopper-v2__2__1700150624/rl-video-episode-0.mp4.
Moviepy - Writing video /notebooks/rl-algos/videos/Hopper-v2__2__1700150624/rl-video-episode-0.mp4



                                                             

Moviepy - Done !
Moviepy - video ready /notebooks/rl-algos/videos/Hopper-v2__2__1700150624/rl-video-episode-0.mp4
global_step=21, episodic_return=[22.78595]
Moviepy - Building video /notebooks/rl-algos/videos/Hopper-v2__2__1700150624/rl-video-episode-1.mp4.
Moviepy - Writing video /notebooks/rl-algos/videos/Hopper-v2__2__1700150624/rl-video-episode-1.mp4



                                                             

Moviepy - Done !
Moviepy - video ready /notebooks/rl-algos/videos/Hopper-v2__2__1700150624/rl-video-episode-1.mp4
global_step=51, episodic_return=[36.201855]
global_step=72, episodic_return=[23.97786]
global_step=82, episodic_return=[4.733545]
global_step=99, episodic_return=[10.860237]
global_step=116, episodic_return=[10.610827]
global_step=125, episodic_return=[6.901435]
global_step=135, episodic_return=[8.571838]
Moviepy - Building video /notebooks/rl-algos/videos/Hopper-v2__2__1700150624/rl-video-episode-8.mp4.
Moviepy - Writing video /notebooks/rl-algos/videos/Hopper-v2__2__1700150624/rl-video-episode-8.mp4



                                                             

Moviepy - Done !
Moviepy - video ready /notebooks/rl-algos/videos/Hopper-v2__2__1700150624/rl-video-episode-8.mp4
global_step=155, episodic_return=[20.855782]
global_step=174, episodic_return=[13.612928]
global_step=194, episodic_return=[9.241912]
global_step=217, episodic_return=[22.081654]
global_step=235, episodic_return=[15.023452]
global_step=248, episodic_return=[3.9606402]
global_step=259, episodic_return=[9.447323]
global_step=306, episodic_return=[38.756435]
global_step=323, episodic_return=[8.405559]
global_step=342, episodic_return=[12.99211]
global_step=356, episodic_return=[8.711075]
global_step=374, episodic_return=[18.762356]
global_step=387, episodic_return=[10.6585865]
global_step=434, episodic_return=[74.67044]
global_step=441, episodic_return=[4.609004]
global_step=457, episodic_return=[10.13831]
global_step=480, episodic_return=[25.526222]
global_step=494, episodic_return=[12.102865]
global_step=511, episodic_return=[19.087944]
Moviepy - Building video /notebooks/rl

                                                             

Moviepy - Done !
Moviepy - video ready /notebooks/rl-algos/videos/Hopper-v2__2__1700150624/rl-video-episode-27.mp4
global_step=526, episodic_return=[7.005193]
global_step=540, episodic_return=[13.418036]
global_step=567, episodic_return=[21.080496]
global_step=581, episodic_return=[9.381006]
global_step=601, episodic_return=[21.635185]
global_step=617, episodic_return=[14.461506]
global_step=666, episodic_return=[52.98349]
global_step=688, episodic_return=[22.04763]
global_step=705, episodic_return=[11.505437]
global_step=725, episodic_return=[8.445497]
global_step=744, episodic_return=[18.79848]
global_step=776, episodic_return=[27.377815]
global_step=821, episodic_return=[43.512737]
global_step=848, episodic_return=[14.8426485]
global_step=868, episodic_return=[10.478578]
global_step=889, episodic_return=[24.80492]
global_step=904, episodic_return=[14.426449]
global_step=920, episodic_return=[12.382785]
global_step=936, episodic_return=[15.499781]
global_step=992, episodic_return=[90

                                                             

Moviepy - Done !
Moviepy - video ready /notebooks/rl-algos/videos/Hopper-v2__2__1700150624/rl-video-episode-64.mp4
global_step=1276, episodic_return=[8.004535]
global_step=1288, episodic_return=[10.985145]
global_step=1309, episodic_return=[10.34255]
global_step=1322, episodic_return=[6.2239747]
global_step=1345, episodic_return=[27.626156]
global_step=1358, episodic_return=[11.80439]
global_step=1370, episodic_return=[11.156066]
global_step=1394, episodic_return=[11.034778]
global_step=1408, episodic_return=[11.645918]
global_step=1420, episodic_return=[8.071956]
global_step=1438, episodic_return=[16.569595]
global_step=1469, episodic_return=[27.591425]
global_step=1481, episodic_return=[10.115369]
global_step=1501, episodic_return=[15.680364]
global_step=1524, episodic_return=[17.506306]
global_step=1538, episodic_return=[9.04779]
global_step=1546, episodic_return=[4.864487]
global_step=1572, episodic_return=[15.464795]
global_step=1589, episodic_return=[13.485472]
global_step=1599, 

                                                             

Moviepy - Done !
Moviepy - video ready /notebooks/rl-algos/videos/Hopper-v2__2__1700150624/rl-video-episode-125.mp4
global_step=2409, episodic_return=[7.2866344]
global_step=2437, episodic_return=[36.84675]
global_step=2454, episodic_return=[18.51517]
global_step=2464, episodic_return=[8.035395]
global_step=2475, episodic_return=[7.497376]
global_step=2487, episodic_return=[9.135271]
global_step=2506, episodic_return=[16.270504]
global_step=2518, episodic_return=[8.216037]
global_step=2527, episodic_return=[6.7215886]
global_step=2591, episodic_return=[117.26105]
global_step=2609, episodic_return=[18.196932]
global_step=2620, episodic_return=[8.980855]
global_step=2634, episodic_return=[10.92584]
global_step=2653, episodic_return=[17.657818]
global_step=2676, episodic_return=[10.538136]
global_step=2691, episodic_return=[9.761757]
global_step=2747, episodic_return=[87.47001]
global_step=2765, episodic_return=[11.702009]
global_step=2806, episodic_return=[32.802887]
global_step=2818, ep

                                                             

Moviepy - Done !
Moviepy - video ready /notebooks/rl-algos/videos/Hopper-v2__2__1700150624/rl-video-episode-216.mp4
global_step=4426, episodic_return=[59.68289]
global_step=4448, episodic_return=[12.876695]
global_step=4462, episodic_return=[9.5455475]
global_step=4476, episodic_return=[9.884925]
global_step=4491, episodic_return=[11.848594]
global_step=4511, episodic_return=[17.737097]
global_step=4566, episodic_return=[96.991455]
global_step=4581, episodic_return=[13.478153]
global_step=4607, episodic_return=[10.613042]
global_step=4621, episodic_return=[12.077562]
global_step=4630, episodic_return=[6.78384]
global_step=4639, episodic_return=[4.939935]
global_step=4655, episodic_return=[8.111456]
global_step=4687, episodic_return=[25.536617]
global_step=4715, episodic_return=[14.656013]
global_step=4730, episodic_return=[13.451569]
global_step=4761, episodic_return=[43.18876]
global_step=4821, episodic_return=[99.072716]
global_step=4858, episodic_return=[33.011837]
global_step=4886,

                                                             

Moviepy - Done !
Moviepy - video ready /notebooks/rl-algos/videos/Hopper-v2__2__1700150624/rl-video-episode-343.mp4
global_step=7324, episodic_return=[86.72626]
global_step=7351, episodic_return=[33.298923]
global_step=7364, episodic_return=[11.018105]
global_step=7377, episodic_return=[12.409418]
global_step=7386, episodic_return=[6.1383266]
global_step=7404, episodic_return=[8.863759]
global_step=7420, episodic_return=[8.510678]
global_step=7460, episodic_return=[33.37271]
global_step=7470, episodic_return=[4.419045]
global_step=7500, episodic_return=[51.168705]
global_step=7522, episodic_return=[10.813731]
global_step=7536, episodic_return=[12.26352]
global_step=7568, episodic_return=[32.821354]
global_step=7585, episodic_return=[7.7941227]
global_step=7625, episodic_return=[67.43402]
global_step=7669, episodic_return=[67.75295]
global_step=7712, episodic_return=[33.59924]
global_step=7723, episodic_return=[8.436681]
global_step=7731, episodic_return=[6.041297]
global_step=7741, epi

                                                             

Moviepy - Done !
Moviepy - video ready /notebooks/rl-algos/videos/Hopper-v2__2__1700150624/rl-video-episode-512.mp4
global_step=11457, episodic_return=[48.994717]
global_step=11490, episodic_return=[30.512154]
global_step=11531, episodic_return=[72.457214]
global_step=11548, episodic_return=[19.44528]
global_step=11582, episodic_return=[57.7005]
global_step=11596, episodic_return=[13.23178]
global_step=11606, episodic_return=[5.0945206]
global_step=11633, episodic_return=[33.535778]
global_step=11679, episodic_return=[81.70377]
global_step=11713, episodic_return=[55.6011]
global_step=11741, episodic_return=[46.624146]
global_step=11751, episodic_return=[7.317925]
global_step=11784, episodic_return=[54.481792]
global_step=11814, episodic_return=[49.961365]
global_step=11847, episodic_return=[55.446785]
global_step=11870, episodic_return=[22.82725]
global_step=11920, episodic_return=[88.10466]
global_step=11990, episodic_return=[139.592]
global_step=12029, episodic_return=[66.00359]
glob

                                                             

Moviepy - Done !
Moviepy - video ready /notebooks/rl-algos/videos/Hopper-v2__2__1700150624/rl-video-episode-729.mp4
global_step=17353, episodic_return=[51.027447]
global_step=17373, episodic_return=[21.743185]
global_step=17407, episodic_return=[55.735935]
global_step=17419, episodic_return=[9.244238]
global_step=17451, episodic_return=[54.1162]
global_step=17481, episodic_return=[49.77081]
global_step=17493, episodic_return=[10.490501]
global_step=17501, episodic_return=[4.3196564]
global_step=17532, episodic_return=[53.146824]
global_step=17562, episodic_return=[49.88034]
global_step=17589, episodic_return=[45.72382]
global_step=17607, episodic_return=[9.702725]
global_step=17637, episodic_return=[45.15964]
global_step=17668, episodic_return=[51.532253]
global_step=17697, episodic_return=[26.877377]
global_step=17722, episodic_return=[41.433887]
global_step=17750, episodic_return=[46.89461]
global_step=17761, episodic_return=[8.979247]
global_step=17774, episodic_return=[10.650771]
g

                                                             

Moviepy - Done !
Moviepy - video ready /notebooks/rl-algos/videos/Hopper-v2__2__1700150624/rl-video-episode-1000.mp4
global_step=24917, episodic_return=[39.38205]
global_step=24942, episodic_return=[41.398308]
global_step=24970, episodic_return=[47.060375]
global_step=24995, episodic_return=[42.6407]
global_step=25022, episodic_return=[45.56648]
global_step=25047, episodic_return=[42.36507]
global_step=25074, episodic_return=[45.87702]
global_step=25106, episodic_return=[54.272797]
global_step=25137, episodic_return=[53.703327]
global_step=25164, episodic_return=[44.546772]
global_step=25189, episodic_return=[41.49389]
global_step=25218, episodic_return=[48.680305]
global_step=25243, episodic_return=[42.438713]
global_step=25272, episodic_return=[48.11451]
global_step=25300, episodic_return=[46.081726]
global_step=25320, episodic_return=[21.238157]
global_step=25347, episodic_return=[45.733795]
global_step=25355, episodic_return=[5.4847007]
global_step=25384, episodic_return=[48.052715

                                                             

Moviepy - Done !
Moviepy - video ready /notebooks/rl-algos/videos/Hopper-v2__2__1700150624/rl-video-episode-2000.mp4
global_step=51818, episodic_return=[39.894405]
global_step=51842, episodic_return=[41.095676]
global_step=51867, episodic_return=[43.645607]
global_step=51892, episodic_return=[42.568104]
global_step=51917, episodic_return=[42.525032]
global_step=51943, episodic_return=[44.614376]
global_step=51968, episodic_return=[42.899204]
global_step=51996, episodic_return=[49.228886]
global_step=52020, episodic_return=[41.81355]
global_step=52049, episodic_return=[49.3225]
global_step=52074, episodic_return=[42.48402]
global_step=52097, episodic_return=[39.86651]
global_step=52121, episodic_return=[40.867756]
global_step=52145, episodic_return=[41.121704]
global_step=52168, episodic_return=[39.52782]
global_step=52194, episodic_return=[44.363018]
global_step=52221, episodic_return=[46.74425]
global_step=52245, episodic_return=[41.37885]
global_step=52268, episodic_return=[40.153454

                                                             

Moviepy - Done !
Moviepy - video ready /notebooks/rl-algos/videos/Hopper-v2__2__1700150624/rl-video-episode-3000.mp4
global_step=75123, episodic_return=[36.874752]
global_step=75146, episodic_return=[40.37548]
global_step=75169, episodic_return=[40.129063]
global_step=75191, episodic_return=[38.428143]
global_step=75214, episodic_return=[40.561344]
global_step=75237, episodic_return=[40.418476]
global_step=75259, episodic_return=[39.04573]
global_step=75281, episodic_return=[38.88674]
global_step=75303, episodic_return=[38.342937]
global_step=75324, episodic_return=[36.977]
global_step=75347, episodic_return=[40.054356]
global_step=75370, episodic_return=[39.95095]
global_step=75392, episodic_return=[38.792973]
global_step=75414, episodic_return=[38.1793]
global_step=75437, episodic_return=[39.985565]
global_step=75460, episodic_return=[40.078766]
global_step=75482, episodic_return=[38.65296]
global_step=75506, episodic_return=[41.942745]
global_step=75528, episodic_return=[38.493103]


                                                             

Moviepy - Done !
Moviepy - video ready /notebooks/rl-algos/videos/Hopper-v2__2__1700150624/rl-video-episode-4000.mp4
global_step=97478, episodic_return=[38.361576]
global_step=97500, episodic_return=[38.581924]
global_step=97522, episodic_return=[38.483864]
global_step=97544, episodic_return=[38.78525]
global_step=97567, episodic_return=[40.667645]
global_step=97587, episodic_return=[35.21237]
global_step=97610, episodic_return=[40.310837]
global_step=97632, episodic_return=[38.92455]
global_step=97654, episodic_return=[38.830612]
global_step=97676, episodic_return=[38.19021]
global_step=97699, episodic_return=[40.412323]
global_step=97722, episodic_return=[40.55012]
global_step=97744, episodic_return=[38.42736]
global_step=97767, episodic_return=[40.141476]
global_step=97790, episodic_return=[40.086254]
global_step=97813, episodic_return=[40.4285]
global_step=97835, episodic_return=[38.867905]
global_step=97857, episodic_return=[38.704964]
global_step=97879, episodic_return=[38.796577

                                                            

Moviepy - Done !
Moviepy - video ready /notebooks/rl-algos/videos/Hopper-v2__2__1700150624/rl-video-episode-5000.mp4
global_step=119725, episodic_return=[39.052265]
global_step=119747, episodic_return=[38.98298]
global_step=119773, episodic_return=[45.82729]
global_step=119796, episodic_return=[40.294445]
global_step=119818, episodic_return=[38.634567]
global_step=119841, episodic_return=[39.998886]
global_step=119864, episodic_return=[40.452732]
global_step=119886, episodic_return=[39.008854]
global_step=119907, episodic_return=[37.30182]
global_step=119929, episodic_return=[39.06403]
global_step=119950, episodic_return=[37.243248]
global_step=119973, episodic_return=[40.281105]
global_step=119995, episodic_return=[38.680798]
global_step=120018, episodic_return=[40.350616]
global_step=120041, episodic_return=[40.39728]
global_step=120063, episodic_return=[38.606564]
global_step=120086, episodic_return=[40.330235]
global_step=120109, episodic_return=[39.679253]
global_step=120130, epis

                                                             

Moviepy - Done !
Moviepy - video ready /notebooks/rl-algos/videos/Hopper-v2__2__1700150624/rl-video-episode-6000.mp4
global_step=142040, episodic_return=[38.492676]
global_step=142062, episodic_return=[38.700066]
global_step=142085, episodic_return=[40.12828]
global_step=142108, episodic_return=[40.215332]
global_step=142130, episodic_return=[38.657097]
global_step=142154, episodic_return=[42.756462]
global_step=142176, episodic_return=[38.994595]
global_step=142198, episodic_return=[39.215546]
global_step=142220, episodic_return=[38.92993]
global_step=142243, episodic_return=[40.00016]
global_step=142266, episodic_return=[40.809597]
global_step=142288, episodic_return=[38.831158]
global_step=142310, episodic_return=[38.851353]
global_step=142332, episodic_return=[38.924294]
global_step=142354, episodic_return=[38.44576]
global_step=142377, episodic_return=[40.07586]
global_step=142399, episodic_return=[38.44525]
global_step=142421, episodic_return=[38.99895]
global_step=142443, episod

                                                             

Moviepy - Done !
Moviepy - video ready /notebooks/rl-algos/videos/Hopper-v2__2__1700150624/rl-video-episode-7000.mp4
global_step=164549, episodic_return=[38.59479]
global_step=164571, episodic_return=[38.718872]
global_step=164594, episodic_return=[40.58846]
global_step=164620, episodic_return=[46.06108]
global_step=164643, episodic_return=[40.28395]
global_step=164664, episodic_return=[36.755997]
global_step=164686, episodic_return=[38.42759]
global_step=164708, episodic_return=[38.757065]
global_step=164730, episodic_return=[38.588074]
global_step=164754, episodic_return=[42.429287]
global_step=164776, episodic_return=[38.69905]
global_step=164799, episodic_return=[40.519672]
global_step=164820, episodic_return=[37.140594]
global_step=164843, episodic_return=[39.949604]
global_step=164866, episodic_return=[40.272495]
global_step=164889, episodic_return=[40.658836]
global_step=164911, episodic_return=[38.96208]
global_step=164933, episodic_return=[38.91012]
global_step=164955, episodi

                                                             

Moviepy - Done !
Moviepy - video ready /notebooks/rl-algos/videos/Hopper-v2__2__1700150624/rl-video-episode-8000.mp4
global_step=190082, episodic_return=[46.46753]
global_step=190112, episodic_return=[53.88542]
global_step=190138, episodic_return=[46.620335]
global_step=190170, episodic_return=[57.297344]
global_step=190193, episodic_return=[40.646786]
global_step=190224, episodic_return=[55.75165]
global_step=190248, episodic_return=[42.257065]
global_step=190274, episodic_return=[45.99583]
global_step=190304, episodic_return=[53.638187]
global_step=190330, episodic_return=[46.24725]
global_step=190357, episodic_return=[48.61723]
global_step=190380, episodic_return=[40.113674]
global_step=190406, episodic_return=[46.529617]
global_step=190431, episodic_return=[44.68707]
global_step=190463, episodic_return=[57.296238]
global_step=190495, episodic_return=[57.710667]
global_step=190521, episodic_return=[46.667427]
global_step=190550, episodic_return=[52.31816]
global_step=190578, episodi

                                                             

Moviepy - Done !
Moviepy - video ready /notebooks/rl-algos/videos/Hopper-v2__2__1700150624/rl-video-episode-9000.mp4
global_step=224550, episodic_return=[73.52317]
global_step=224590, episodic_return=[72.30749]
global_step=224627, episodic_return=[68.32905]
global_step=224667, episodic_return=[73.66196]
global_step=224707, episodic_return=[74.13907]
global_step=224748, episodic_return=[73.73415]
global_step=224787, episodic_return=[70.548256]
global_step=224827, episodic_return=[72.609406]
global_step=224863, episodic_return=[65.18446]
global_step=224904, episodic_return=[74.12629]
global_step=224947, episodic_return=[79.16142]
global_step=224985, episodic_return=[69.36971]
global_step=225024, episodic_return=[72.00301]
global_step=225064, episodic_return=[73.675026]
global_step=225102, episodic_return=[68.813]
global_step=225135, episodic_return=[59.713757]
global_step=225172, episodic_return=[67.196526]
global_step=225213, episodic_return=[73.749214]
global_step=225252, episodic_retu

                                                             

Moviepy - Done !
Moviepy - video ready /notebooks/rl-algos/videos/Hopper-v2__2__1700150624/rl-video-episode-10000.mp4
global_step=269935, episodic_return=[92.66995]
global_step=269982, episodic_return=[84.59177]
global_step=270031, episodic_return=[89.57412]
global_step=270081, episodic_return=[91.4024]
global_step=270130, episodic_return=[89.92692]
global_step=270179, episodic_return=[90.4864]
global_step=270227, episodic_return=[88.49407]
global_step=270276, episodic_return=[89.7081]
global_step=270323, episodic_return=[87.646225]
global_step=270369, episodic_return=[83.743835]
global_step=270417, episodic_return=[87.32423]
global_step=270461, episodic_return=[81.982544]
global_step=270504, episodic_return=[78.950035]
global_step=270550, episodic_return=[84.06126]
global_step=270600, episodic_return=[90.61787]
global_step=270647, episodic_return=[86.32619]
global_step=270690, episodic_return=[78.80643]
global_step=270738, episodic_return=[86.92409]
global_step=270789, episodic_return

                                                             

Moviepy - Done !
Moviepy - video ready /notebooks/rl-algos/videos/Hopper-v2__2__1700150624/rl-video-episode-11000.mp4
global_step=320628, episodic_return=[102.13541]
global_step=320682, episodic_return=[98.4784]
global_step=320738, episodic_return=[102.90048]
global_step=320794, episodic_return=[103.25397]
global_step=320852, episodic_return=[107.07735]
global_step=320919, episodic_return=[124.05827]
global_step=320975, episodic_return=[102.782814]
global_step=321031, episodic_return=[103.658424]
global_step=321084, episodic_return=[97.59829]
global_step=321139, episodic_return=[101.12096]
global_step=321195, episodic_return=[101.96222]
global_step=321254, episodic_return=[107.9639]
global_step=321311, episodic_return=[104.20503]
global_step=321387, episodic_return=[139.71957]
global_step=321464, episodic_return=[141.56984]
global_step=321520, episodic_return=[101.77127]
global_step=321578, episodic_return=[107.68336]
global_step=321638, episodic_return=[109.00622]
global_step=321697, 

                                                               

Moviepy - Done !
Moviepy - video ready /notebooks/rl-algos/videos/Hopper-v2__2__1700150624/rl-video-episode-12000.mp4
global_step=441905, episodic_return=[661.0282]
global_step=442113, episodic_return=[645.3384]
global_step=442337, episodic_return=[722.5835]
global_step=442557, episodic_return=[680.65393]
global_step=442746, episodic_return=[593.9192]
global_step=442948, episodic_return=[649.5858]
global_step=443149, episodic_return=[646.6883]
global_step=443354, episodic_return=[656.63275]
global_step=443580, episodic_return=[735.61945]
global_step=443801, episodic_return=[696.93414]
global_step=443990, episodic_return=[603.1377]
global_step=444192, episodic_return=[651.71204]
global_step=444394, episodic_return=[659.04486]
global_step=444527, episodic_return=[354.9696]
global_step=444721, episodic_return=[608.23926]
global_step=444917, episodic_return=[631.1499]
global_step=445135, episodic_return=[671.35333]
global_step=445272, episodic_return=[371.70688]
global_step=445479, episodi