In [1]:
# !apt-get install -y \
#     libgl1-mesa-dev \
#     libgl1-mesa-glx \
#     libglew-dev \
#     libosmesa6-dev \
#     software-properties-common

# !apt-get install -y patchelf

# !apt-get update --fix-missing
# !pip install stable-baselines3
# !pip install mujoco
# !pip install  --upgrade gymnasium==0.29
# !pip install free-mujoco-py

In [2]:
import os
import time
import wandb
import random
import numpy as np
import gymnasium as gym
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.tensorboard import SummaryWriter
from stable_baselines3.common.buffers import ReplayBuffer

In [3]:
def make_env(env_id, seed, idx, capture_video, run_name):
    def thunk():
        if capture_video and idx == 0:
            env = gym.make(env_id, render_mode="rgb_array")
            env = gym.wrappers.RecordVideo(env, f"videos/{run_name}")
        else:
            env = gym.make(env_id)

        env = gym.wrappers.RecordEpisodeStatistics(env)
        env.action_space.seed(seed)
        env.observation_space.seed(seed)
        return env

    return thunk

In [4]:
class QNetwork(nn.Module):

    def __init__(self, env):
        super().__init__()
        self.fc1 = nn.Linear(np.array(env.single_observation_space.shape).prod() +  np.prod(env.single_action_space.shape), 256)
        self.fc2 = nn.Linear(256, 256)
        self.fc3 = nn.Linear(256, 1)

    def forward(self, x, a):
        x = torch.cat([x,a], 1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)

        return x

In [5]:
class Actor(nn.Module):

    def __init__(self, env):
        super().__init__()
        self.fc1 = nn.Linear(np.array(env.single_observation_space.shape).prod(), 256)
        self.fc2 = nn.Linear(256, 256)
        self.fc_mu = nn.Linear(256, np.prod(env.single_action_space.shape))
        self.register_buffer(
            "action_scale", torch.tensor((env.action_space.high - env.action_space.low) / 2.0, dtype = torch.float32)
        )

        self.register_buffer(
            "action_bias", torch.tensor((env.action_space.high + env.action_space.low)/ 2.0, dtype =torch.float32 )
        )

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = torch.tanh(self.fc_mu(x))
        return x * self.action_scale + self.action_bias

In [6]:
def train(env_id,
          seed,
          total_timesteps,
          learning_rate,
          buffer_size,
          gamma,
          tau,
          batch_size,
          policy_noise ,
          exploration_noise,
          learning_starts,
          policy_frequency,
          noise_clip):

    run_name = f"{env_id}__{seed}__{int(time.time())}"
    wandb.init(
        project="td3-mujoco-benchmark",
        config={
            "env":env_id,
            "seed":seed,
            "timesteps":total_timesteps,
            "buffer_size":buffer_size,
            "gamma":gamma,
            "tau":tau,
            "batch_size":batch_size,
            "policy_noise":policy_noise,
            "exploration_noise":exploration_noise,
            "learning_starts":learning_starts,
            "policy_frequency":policy_frequency,
            "noise_clip":noise_clip,
        },
        sync_tensorboard=True,
        monitor_gym=True,
        name=run_name
    )
    writer = SummaryWriter(f"runs/{run_name}")


    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    envs = gym.vector.SyncVectorEnv([make_env(env_id, seed, 0, True, run_name)])

    actor = Actor(envs).to(device)
    target_actor = Actor(envs).to(device)
    qf1 = QNetwork(envs).to(device)
    qf2 = QNetwork(envs).to(device)
    qf1_target = QNetwork(envs).to(device)
    qf2_target = QNetwork(envs).to(device)

    target_actor.load_state_dict(actor.state_dict())
    qf1_target.load_state_dict(qf1.state_dict())
    qf2_target.load_state_dict(qf2.state_dict())
    q_optimiser = optim.Adam(list(qf1.parameters()) + list(qf2.parameters()), lr=learning_rate)
    actor_optimiser = optim.Adam(actor.parameters(), lr=learning_rate)


    envs.single_observation_space.dtype = np.float32
    rb = ReplayBuffer(
        buffer_size,
        envs.single_observation_space,
        envs.single_action_space,
        device,
        handle_timeout_termination=False,
    )

    start_time = time.time()

    obs, _ = envs.reset(seed=seed)
    for global_step in range(total_timesteps):

        if global_step < learning_starts:
            actions = np.array([envs.single_action_space.sample() for _ in range(envs.num_envs)])
            # print("GlobalStep:",global_step,":Actions:", actions)
        else:
            with torch.no_grad():
                actions = actor(torch.Tensor(obs).to(device))
                actions += torch.normal(0, actor.action_scale * exploration_noise)
                # print("GlobalStep:",global_step,":Unclippe Actions:", actions)
                actions = actions.cpu().numpy()
                # print("GlobalStep:",global_step,":Actions:", actions)

        next_obs, rewards, terminated, truncated, infos = envs.step(actions)

        if "final_info" in infos:
            for info in infos["final_info"]:
                print(f"global_step={global_step} episodic_return={info['episode']['r']}")
                writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step)
                writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step)
                break


        real_next_obs = next_obs.copy()

        for idx, d in enumerate(truncated):
            if d:
                real_next_obs[idx] = infos["final_observation"][idx]

        rb.add(obs, real_next_obs, actions, rewards, terminated, infos)

        obs = next_obs

        if global_step > learning_starts:
            data = rb.sample(batch_size)
            with torch.no_grad():
                clipped_noise = (torch.randn_like(data.actions, device=device) * policy_noise).clamp(
                    -noise_clip, noise_clip
                ) * target_actor.action_scale

                next_state_actions = (target_actor(data.next_observations) + clipped_noise).clamp(
                    envs.single_action_space.low[0], envs.single_action_space.high[0]
                )

                qf1_next_target = qf1_target(data.next_observations, next_state_actions)
                qf2_next_target = qf2_target(data.next_observations, next_state_actions)
                min_qf_next_state_target = torch.min(qf1_next_target, qf2_next_target)
                next_q_value = data.rewards.flatten() + (1 - data.dones.flatten()) * gamma * (min_qf_next_state_target).view(-1)

            qf1_a_values = qf1(data.observations, data.actions).view(-1)
            qf2_a_values = qf2(data.observations, data.actions).view(-1)
            qf1_loss = F.mse_loss(qf1_a_values, next_q_value)
            qf2_loss = F.mse_loss(qf2_a_values, next_q_value)
            qf_loss = qf1_loss + qf2_loss

            q_optimiser.zero_grad()
            qf_loss.backward()
            q_optimiser.step()

            if global_step % policy_frequency == 0:
                actor_loss = -qf1(data.observations, actor(data.observations)).mean()
                actor_optimiser.zero_grad()
                actor_loss.backward()
                actor_optimiser.step()

                for param, target_param in zip(actor.parameters(), target_actor.parameters()):
                    target_param.data.copy_(tau * param.data + (1 - tau) * target_param.data)
                for param, target_param in zip(qf1.parameters(), qf1_target.parameters()):
                    target_param.data.copy_(tau * param.data + (1 - tau) * target_param.data)
                for param, target_param in zip(qf2.parameters(), qf2_target.parameters()):
                    target_param.data.copy_(tau * param.data + (1 - tau) * target_param.data)

            if global_step % 100 == 0:
                writer.add_scalar("losses/qf1_values", qf1_a_values.mean().item(), global_step)
                writer.add_scalar("losses/qf2_values", qf2_a_values.mean().item(), global_step)
                writer.add_scalar("losses/qf1_loss", qf1_loss.item(), global_step)
                writer.add_scalar("losses/qf2_loss", qf2_loss.item(), global_step)
                writer.add_scalar("losses/qf_loss", qf_loss.item() / 2.0, global_step)
                writer.add_scalar("losses/actor_loss", actor_loss.item(), global_step)
                writer.add_scalar("charts/SPS", int(global_step / (time.time() - start_time)), global_step)

    envs.close()
    writer.close()
    wandb.finish()

In [7]:
env = {"hopper":"Hopper-v2","humanoid":"Humanoid-v2","halfCheetah":"HalfCheetah-v2","ant":"Ant-v2"}
seed = 1
total_timesteps = 1000000
learning_rate = 0.0003
buffer_size = 1000000
gamma = 0.99
tau = 0.005
batch_size = 512
policy_noise = 0.2
exploration_noise = 0.4
learning_starts = 25000
policy_frequency = 2
noise_clip = 0.5

In [8]:
train(
    env["humanoid"],
    seed,
    total_timesteps,
    learning_rate,
    buffer_size,
    gamma,
    tau,
    batch_size,
    policy_noise,
    exploration_noise,
    learning_starts,
    policy_frequency,
    noise_clip
)

[34m[1mwandb[0m: Currently logged in as: [33mchkda[0m. Use [1m`wandb login --relogin`[0m to force relogin


  logger.deprecation(
  logger.deprecation(
  logger.warn(
  logger.warn(f"{pre} is not within the observation space.")
  logger.warn(
  logger.warn(f"{pre} is not within the observation space.")


Moviepy - Building video /notebooks/rl-algos/videos/Humanoid-v2__1__1700377799/rl-video-episode-0.mp4.
Moviepy - Writing video /notebooks/rl-algos/videos/Humanoid-v2__1__1700377799/rl-video-episode-0.mp4



                                                             

Moviepy - Done !
Moviepy - video ready /notebooks/rl-algos/videos/Humanoid-v2__1__1700377799/rl-video-episode-0.mp4
global_step=23 episodic_return=[121.24579]
Moviepy - Building video /notebooks/rl-algos/videos/Humanoid-v2__1__1700377799/rl-video-episode-1.mp4.
Moviepy - Writing video /notebooks/rl-algos/videos/Humanoid-v2__1__1700377799/rl-video-episode-1.mp4



                                                             

Moviepy - Done !
Moviepy - video ready /notebooks/rl-algos/videos/Humanoid-v2__1__1700377799/rl-video-episode-1.mp4
global_step=47 episodic_return=[119.5225]
global_step=68 episodic_return=[108.16019]
global_step=85 episodic_return=[83.25718]
global_step=102 episodic_return=[83.582855]
global_step=120 episodic_return=[88.58781]
global_step=154 episodic_return=[163.99673]
global_step=176 episodic_return=[104.62305]
Moviepy - Building video /notebooks/rl-algos/videos/Humanoid-v2__1__1700377799/rl-video-episode-8.mp4.
Moviepy - Writing video /notebooks/rl-algos/videos/Humanoid-v2__1__1700377799/rl-video-episode-8.mp4



                                                             

Moviepy - Done !
Moviepy - video ready /notebooks/rl-algos/videos/Humanoid-v2__1__1700377799/rl-video-episode-8.mp4
global_step=193 episodic_return=[83.57454]
global_step=218 episodic_return=[129.3954]
global_step=237 episodic_return=[95.060486]
global_step=263 episodic_return=[131.10318]
global_step=283 episodic_return=[100.64491]
global_step=311 episodic_return=[138.91057]
global_step=332 episodic_return=[105.19732]
global_step=378 episodic_return=[228.43713]
global_step=409 episodic_return=[161.85236]
global_step=429 episodic_return=[100.045456]
global_step=448 episodic_return=[94.742546]
global_step=481 episodic_return=[165.44429]
global_step=501 episodic_return=[101.72737]
global_step=534 episodic_return=[164.74854]
global_step=582 episodic_return=[234.22275]
global_step=601 episodic_return=[95.23885]
global_step=629 episodic_return=[140.49931]
global_step=647 episodic_return=[89.55818]
global_step=665 episodic_return=[88.441986]
Moviepy - Building video /notebooks/rl-algos/videos

                                                             

Moviepy - Done !
Moviepy - video ready /notebooks/rl-algos/videos/Humanoid-v2__1__1700377799/rl-video-episode-27.mp4
global_step=697 episodic_return=[153.09024]
global_step=722 episodic_return=[129.82297]
global_step=742 episodic_return=[98.66413]
global_step=761 episodic_return=[96.38866]
global_step=782 episodic_return=[107.38056]
global_step=800 episodic_return=[89.66012]
global_step=822 episodic_return=[112.00469]
global_step=845 episodic_return=[117.41867]
global_step=868 episodic_return=[118.5095]
global_step=888 episodic_return=[98.70106]
global_step=909 episodic_return=[104.24773]
global_step=941 episodic_return=[161.68684]
global_step=958 episodic_return=[83.54515]
global_step=976 episodic_return=[88.36508]
global_step=999 episodic_return=[116.14726]
global_step=1017 episodic_return=[89.44524]
global_step=1036 episodic_return=[95.87257]
global_step=1069 episodic_return=[159.50243]
global_step=1093 episodic_return=[121.28822]
global_step=1112 episodic_return=[95.75477]
global_s

                                                             

Moviepy - Done !
Moviepy - video ready /notebooks/rl-algos/videos/Humanoid-v2__1__1700377799/rl-video-episode-64.mp4
global_step=1487 episodic_return=[101.12164]
global_step=1504 episodic_return=[83.68329]
global_step=1525 episodic_return=[107.244125]
global_step=1566 episodic_return=[203.1454]
global_step=1598 episodic_return=[159.83514]
global_step=1633 episodic_return=[171.99118]
global_step=1654 episodic_return=[107.740524]
global_step=1672 episodic_return=[90.083595]
global_step=1690 episodic_return=[89.38104]
global_step=1731 episodic_return=[201.15257]
global_step=1752 episodic_return=[106.46037]
global_step=1778 episodic_return=[128.38098]
global_step=1796 episodic_return=[88.35544]
global_step=1818 episodic_return=[108.14943]
global_step=1850 episodic_return=[166.39172]
global_step=1880 episodic_return=[146.7388]
global_step=1911 episodic_return=[146.31044]
global_step=1932 episodic_return=[107.803246]
global_step=1949 episodic_return=[83.6345]
global_step=1977 episodic_return

                                                             

Moviepy - Done !
Moviepy - video ready /notebooks/rl-algos/videos/Humanoid-v2__1__1700377799/rl-video-episode-125.mp4
global_step=3027 episodic_return=[144.58939]
global_step=3048 episodic_return=[106.03525]
global_step=3066 episodic_return=[88.58104]
global_step=3095 episodic_return=[147.59845]
global_step=3122 episodic_return=[138.32521]
global_step=3140 episodic_return=[90.66418]
global_step=3161 episodic_return=[106.7882]
global_step=3179 episodic_return=[89.2904]
global_step=3203 episodic_return=[121.749435]
global_step=3229 episodic_return=[134.46132]
global_step=3251 episodic_return=[110.218414]
global_step=3269 episodic_return=[88.98237]
global_step=3287 episodic_return=[88.82217]
global_step=3350 episodic_return=[321.02838]
global_step=3385 episodic_return=[182.1854]
global_step=3417 episodic_return=[165.14932]
global_step=3445 episodic_return=[136.80377]
global_step=3462 episodic_return=[83.5225]
global_step=3481 episodic_return=[94.72623]
global_step=3514 episodic_return=[17

                                                             

Moviepy - Done !
Moviepy - video ready /notebooks/rl-algos/videos/Humanoid-v2__1__1700377799/rl-video-episode-216.mp4
global_step=5293 episodic_return=[88.408676]
global_step=5316 episodic_return=[113.8076]
global_step=5335 episodic_return=[95.32935]
global_step=5357 episodic_return=[109.604126]
global_step=5378 episodic_return=[107.19118]
global_step=5410 episodic_return=[157.23112]
global_step=5429 episodic_return=[94.87284]
global_step=5448 episodic_return=[96.11315]
global_step=5473 episodic_return=[127.847015]
global_step=5491 episodic_return=[89.25822]
global_step=5532 episodic_return=[204.36157]
global_step=5555 episodic_return=[115.36713]
global_step=5578 episodic_return=[116.2715]
global_step=5603 episodic_return=[128.98735]
global_step=5622 episodic_return=[93.728195]
global_step=5640 episodic_return=[88.29129]
global_step=5670 episodic_return=[155.50348]
global_step=5693 episodic_return=[115.87237]
global_step=5720 episodic_return=[136.88393]
global_step=5741 episodic_return

                                                             

Moviepy - Done !
Moviepy - video ready /notebooks/rl-algos/videos/Humanoid-v2__1__1700377799/rl-video-episode-343.mp4
global_step=8452 episodic_return=[105.55876]
global_step=8487 episodic_return=[172.4531]
global_step=8517 episodic_return=[142.25896]
global_step=8542 episodic_return=[128.72295]
global_step=8569 episodic_return=[139.34517]
global_step=8591 episodic_return=[110.078514]
global_step=8625 episodic_return=[167.5211]
global_step=8643 episodic_return=[89.249855]
global_step=8680 episodic_return=[192.07573]
global_step=8700 episodic_return=[99.305145]
global_step=8721 episodic_return=[105.80222]
global_step=8739 episodic_return=[88.8287]
global_step=8757 episodic_return=[87.954475]
global_step=8774 episodic_return=[83.51257]
global_step=8794 episodic_return=[100.63593]
global_step=8826 episodic_return=[161.96991]
global_step=8844 episodic_return=[89.3107]
global_step=8863 episodic_return=[93.26661]
global_step=8883 episodic_return=[101.88394]
global_step=8904 episodic_return=[

                                                             

Moviepy - Done !
Moviepy - video ready /notebooks/rl-algos/videos/Humanoid-v2__1__1700377799/rl-video-episode-512.mp4
global_step=12627 episodic_return=[160.32275]
global_step=12654 episodic_return=[136.47627]
global_step=12675 episodic_return=[107.82095]
global_step=12700 episodic_return=[126.63378]
global_step=12732 episodic_return=[163.12189]
global_step=12758 episodic_return=[132.38522]
global_step=12783 episodic_return=[120.29644]
global_step=12814 episodic_return=[152.66998]
global_step=12832 episodic_return=[88.42344]
global_step=12851 episodic_return=[93.60419]
global_step=12869 episodic_return=[88.73578]
global_step=12887 episodic_return=[88.67666]
global_step=12917 episodic_return=[152.02596]
global_step=12934 episodic_return=[83.38426]
global_step=12956 episodic_return=[110.18817]
global_step=12980 episodic_return=[125.92029]
global_step=13002 episodic_return=[106.5965]
global_step=13035 episodic_return=[172.16429]
global_step=13081 episodic_return=[226.78606]
global_step=13

                                                             

Moviepy - Done !
Moviepy - video ready /notebooks/rl-algos/videos/Humanoid-v2__1__1700377799/rl-video-episode-729.mp4
global_step=17836 episodic_return=[120.318985]
global_step=17858 episodic_return=[113.17713]
global_step=17888 episodic_return=[151.47075]
global_step=17923 episodic_return=[179.82326]
global_step=17940 episodic_return=[83.193474]
global_step=17965 episodic_return=[124.90668]
global_step=17992 episodic_return=[139.15288]
global_step=18016 episodic_return=[120.962]
global_step=18033 episodic_return=[83.33548]
global_step=18055 episodic_return=[115.15035]
global_step=18087 episodic_return=[165.29156]
global_step=18113 episodic_return=[134.34834]
global_step=18136 episodic_return=[112.31612]
global_step=18159 episodic_return=[113.175514]
global_step=18181 episodic_return=[114.89611]
global_step=18209 episodic_return=[137.98936]
global_step=18249 episodic_return=[205.95726]
global_step=18283 episodic_return=[173.8506]
global_step=18300 episodic_return=[84.46273]
global_step

                                                             

Moviepy - Done !
Moviepy - video ready /notebooks/rl-algos/videos/Humanoid-v2__1__1700377799/rl-video-episode-1000.mp4
global_step=24506 episodic_return=[139.44423]
global_step=24532 episodic_return=[132.98662]
global_step=24554 episodic_return=[113.009445]
global_step=24572 episodic_return=[90.05541]
global_step=24601 episodic_return=[143.20116]
global_step=24618 episodic_return=[83.65755]
global_step=24635 episodic_return=[83.879074]
global_step=24652 episodic_return=[83.31864]
global_step=24673 episodic_return=[100.08079]
global_step=24694 episodic_return=[106.07767]
global_step=24721 episodic_return=[131.47441]
global_step=24743 episodic_return=[112.35274]
global_step=24760 episodic_return=[83.47445]
global_step=24781 episodic_return=[105.16341]
global_step=24800 episodic_return=[95.95915]
global_step=24836 episodic_return=[170.78575]
global_step=24864 episodic_return=[139.3794]
global_step=24881 episodic_return=[83.27128]
global_step=24904 episodic_return=[118.28743]
global_step=2

                                                             

Moviepy - Done !
Moviepy - video ready /notebooks/rl-algos/videos/Humanoid-v2__1__1700377799/rl-video-episode-2000.mp4
global_step=44886 episodic_return=[208.38916]
global_step=44920 episodic_return=[171.73865]
global_step=44960 episodic_return=[203.37679]
global_step=45008 episodic_return=[242.14088]
global_step=45062 episodic_return=[283.6806]
global_step=45118 episodic_return=[298.25607]
global_step=45165 episodic_return=[245.01291]
global_step=45220 episodic_return=[291.11783]
global_step=45285 episodic_return=[347.96277]
global_step=45338 episodic_return=[282.43976]
global_step=45405 episodic_return=[362.23126]
global_step=45463 episodic_return=[307.37256]
global_step=45525 episodic_return=[335.25598]
global_step=45587 episodic_return=[329.54312]
global_step=45639 episodic_return=[271.0043]
global_step=45704 episodic_return=[356.5568]
global_step=45752 episodic_return=[246.27936]
global_step=45805 episodic_return=[284.57263]
global_step=45853 episodic_return=[251.27408]
global_ste

                                                             

Moviepy - Done !
Moviepy - video ready /notebooks/rl-algos/videos/Humanoid-v2__1__1700377799/rl-video-episode-3000.mp4
global_step=121117 episodic_return=[418.96213]
global_step=121234 episodic_return=[619.1163]
global_step=121306 episodic_return=[359.69745]
global_step=121382 episodic_return=[336.47214]
global_step=121465 episodic_return=[392.16693]
global_step=121542 episodic_return=[394.16452]
global_step=121610 episodic_return=[354.65936]
global_step=121717 episodic_return=[524.3334]
global_step=121833 episodic_return=[563.55334]
global_step=121925 episodic_return=[409.6653]
global_step=122104 episodic_return=[831.14526]
global_step=122193 episodic_return=[437.92844]
global_step=122311 episodic_return=[578.5964]
global_step=122399 episodic_return=[403.57928]
global_step=122595 episodic_return=[927.372]
global_step=122674 episodic_return=[391.7974]
global_step=122759 episodic_return=[453.1672]
global_step=122930 episodic_return=[812.4788]
global_step=123104 episodic_return=[906.8736

                                                               

Moviepy - Done !
Moviepy - video ready /notebooks/rl-algos/videos/Humanoid-v2__1__1700377799/rl-video-episode-4000.mp4
global_step=249421 episodic_return=[907.22473]
global_step=249562 episodic_return=[718.6727]
global_step=249702 episodic_return=[741.14124]
global_step=249879 episodic_return=[831.57697]
global_step=250150 episodic_return=[1410.9055]
global_step=250253 episodic_return=[515.5625]
global_step=250429 episodic_return=[884.6557]
global_step=250513 episodic_return=[446.91547]
global_step=250967 episodic_return=[2282.7744]
global_step=251081 episodic_return=[587.75616]
global_step=251163 episodic_return=[433.38574]
global_step=251303 episodic_return=[746.20447]
global_step=251522 episodic_return=[1176.7308]
global_step=251716 episodic_return=[998.81006]
global_step=251820 episodic_return=[501.41275]
global_step=251902 episodic_return=[439.76907]
global_step=252256 episodic_return=[1718.3176]
global_step=252656 episodic_return=[2048.1333]
global_step=252998 episodic_return=[17

                                                                

Moviepy - Done !
Moviepy - video ready /notebooks/rl-algos/videos/Humanoid-v2__1__1700377799/rl-video-episode-5000.mp4
global_step=919156 episodic_return=[5263.2183]
global_step=920156 episodic_return=[5258.71]
global_step=921156 episodic_return=[5182.184]
global_step=922156 episodic_return=[5214.099]
global_step=923156 episodic_return=[5197.045]
global_step=924156 episodic_return=[5200.3115]
global_step=924582 episodic_return=[2257.1204]
global_step=925582 episodic_return=[5261.934]
global_step=926582 episodic_return=[5222.3306]
global_step=927582 episodic_return=[5156.303]
global_step=928582 episodic_return=[5265.825]
global_step=929582 episodic_return=[5229.543]
global_step=930582 episodic_return=[5137.8506]
global_step=931582 episodic_return=[5285.85]
global_step=932582 episodic_return=[5204.3286]
global_step=932698 episodic_return=[641.55176]
global_step=933698 episodic_return=[5212.221]
global_step=934698 episodic_return=[5206.1255]
global_step=935698 episodic_return=[5165.8784]


VBox(children=(Label(value='6.524 MB of 6.524 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
charts/SPS,█▅▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
charts/episodic_length,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▁▂▂▁▂▂▂▂▁▂▂▃▃██████
charts/episodic_return,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▁▂▂▁▂▂▂▂▁▂▂▃▃██████
global_step,▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███
losses/actor_loss,█▇▆▆▆▆▆▆▆▆▅▅▅▅▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▁▂▂▁▁▁▁▁▁▁▁
losses/qf1_loss,▁▁▂▃▅▄▄▄▄▄▅▅▆▅▇▇▇▇▇▇█▆▇█▇▆▆▇█▆▇▆▇▆▇▇▅▇▇▆
losses/qf1_values,▁▂▃▃▃▃▃▃▃▃▄▄▄▄▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇█▇▇▇███████
losses/qf2_loss,▁▁▂▄▅▄▄▄▄▄▄▆▅▇▇█▇█▇▇▇███▇▇█▇▇▇▇▇█▇▆█▇▇▇▆
losses/qf2_values,▁▂▃▃▃▃▃▃▃▃▄▄▄▄▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇█▇▇▇███████
losses/qf_loss,▁▁▂▄▅▄▄▄▄▄▅▅▆▆▇▇▇█▇▇█▇██▇▇▇▇█▆▇▇▇▇▇▇▆▇▇▆

0,1
charts/SPS,90.0
charts/episodic_length,1000.0
charts/episodic_return,5220.78564
global_step,999900.0
losses/actor_loss,-420.78198
losses/qf1_loss,53.95958
losses/qf1_values,417.00497
losses/qf2_loss,43.33659
losses/qf2_values,418.29453
losses/qf_loss,48.64809
