In [1]:
import os
import random
import time
import wandb
from distutils.util import strtobool
from typing import Callable
import flax
import flax.linen as nn
import gymnasium as gym
import jax
import jax.numpy as jnp
import numpy as np
import optax

from flax.training.train_state import TrainState
from stable_baselines3.common.buffers import ReplayBuffer
from torch.utils.tensorboard import SummaryWriter

In [2]:
def make_env(env_id, seed, idx, capture_video, run_name):
    def thunk():
        if capture_video and idx == 0:
            env = gym.make(env_id, render_mode="rgb_array")
            env = gym.wrappers.RecordVideo(env, f"video/{run_name}")
        else:
            env = gym.make(env_id)
        env = gym.wrappers.RecordEpisodeStatistics(env)
        env.action_space.seed(seed)

        return env

    return thunk

In [3]:
class QNetwork(nn.Module):
    action_dim: int

    @nn.compact
    def __call__(self, x: jnp.ndarray):
        x = nn.Dense(120)(x)
        x = nn.relu(x)
        x = nn.Dense(84)(x)
        x = nn.relu(x)
        x = nn.Dense(self.action_dim)(x)
        # x = nn.relu(x)

        return x


In [4]:
class TrainState(TrainState):
    target_params: flax.core.FrozenDict

In [5]:
def linear_schedule(start_e: float, end_e: float, duration: int, t: int):
    slope = (end_e - start_e)/ duration
    return max(slope * t + start_e, end_e)

In [6]:
def evaluate(model_path: str,
             make_env: Callable,
             env_id: str,
             eval_episodes: int,
             run_name: str,
             Model: nn.Module,
             epsilon: float = 0.05,
             capture_video: bool = True,
             seed=1):
    envs = gym.vector.SyncVectorEnv([make_env(env_id, 0, 0, capture_video, run_name)])
    obs, _ = envs.reset()
    model = Model(action_dim=envs.single_action_space.n)
    q_key = jax.random.PRNGKey(seed)
    params = model.init(q_key, obs)
    with open(model_path, "rb") as f:
        params = flax.serialization.from_bytes(params, f.read())
    model.apply = jax.jit(model.apply)

    episodic_returns = []

    while len(episodic_returns) < eval_episodes:
        if random.random() < epsilon:
            actions = np.array([envs.single_action_space.sample() for _ in range(envs.num_envs)])
        else:
            q_values = model.apply(params,obs)
            actions = q_values.argmax(axis=-1)
            actions = jax.device_get(actions)
        next_obs, _, _, _, infos = envs.step(actions)
        if "final_info" in infos:
            for info in infos["final_info"]:
                if "episode" not in info:
                    continue
                print(f"eval_episode={len(episodic_returns)}, episodic_return={info['episode']['r']}")
                episodic_returns += [info["episode"]["r"]]

        obs = next_obs

    return episodic_returns

In [12]:
envs = {"cartPole":"CartPole-v1","acrobot":"Acrobot-v1","mountainCar":"MountainCar-v0"}
n_timesteps = 500000
learning_rate = 0.00025
buffer_size = 10000
gamma = 0.99
target_network_frequency = 500
max_grad_norm = 0.5
batch_size = 128
start_e = 1
end_e = 0.05
exploration_fraction = 0.3
learning_starts = 10000
train_frequency = 10
seed = 1
capture_video = True
tau = 1
num_envs = 1

In [8]:
def train(env_id,timesteps,learning_rate,buffer_size,gamma,
          target_network_frequency,max_grad_norm,batch_size,
          start_e,end_e,exploration_fraction,learning_starts,
          train_frequency,seed,tau,num_envs):

    run_name = f"{env_id}__{seed}__{int(time.time())}"
    wandb.init(
        project="dqn-classic-control-benchmark",
        config={
            "env":env_id,
            "timesteps":timesteps,
            "lr":learning_rate,
            "buffer_size":buffer_size,
            "gamma":gamma,
            "target_network_frequency":target_network_frequency,
            "batch_size":batch_size,
            "start_e":start_e,
            "end_e":end_e,
            "exploration_fraction":exploration_fraction,
            "learning_starts":learning_starts,
            "train_frequency":train_frequency,
            "seed":seed,
            "tau":tau,
            "num_envs":num_envs,
        },
        sync_tensorboard=True,
        monitor_gym=True,
        name=run_name,
    )
    
    writer = SummaryWriter(f"runs/{run_name}")

    random.seed(seed)
    np.random.seed(seed)
    key = jax.random.PRNGKey(seed)
    key, q_key = jax.random.split(key, 2)

    envs = gym.vector.SyncVectorEnv(
        [make_env(env_id, seed + i, i, True, run_name) for i in range(num_envs)]
    )

    obs, _ = envs.reset(seed=seed)

    q_network = QNetwork(action_dim=envs.single_action_space.n)

    q_state = TrainState.create(
        apply_fn=q_network.apply,
        params=q_network.init(q_key, obs),
        target_params=q_network.init(q_key,obs),
        tx=optax.adam(learning_rate=learning_rate)
    )

    q_network.apply = jax.jit(q_network.apply)
    q_state = q_state.replace(target_params=optax.incremental_update(q_state.params, q_state.target_params, 1))

    rb = ReplayBuffer(
        buffer_size,
        envs.single_observation_space,
        envs.single_action_space,
        "cpu",
        n_envs=num_envs,
        handle_timeout_termination=False,
    )

    @jax.jit
    def update(q_state, observations, actions, next_observations, rewards, dones):
        q_next_target = q_network.apply(q_state.target_params, next_observations)
        q_next_target = jnp.max(q_next_target, axis=-1)
        next_q_value = rewards + (1 - dones) * gamma * q_next_target

        def mse_loss(params):
            q_pred = q_network.apply(params, observations)
            q_pred = q_pred[jnp.arange(q_pred.shape[0]), actions.squeeze()]
            return ((q_pred - next_q_value)**2).mean(), q_pred

        (loss_value,q_pred), grads = jax.value_and_grad(mse_loss,has_aux=True)(q_state.params)
        q_state = q_state.apply_gradients(grads=grads)
        return loss_value, q_pred, q_state

    start_time = time.time()

    obs, _ = envs.reset(seed=seed)
    for global_step in range(timesteps):
        epsilon = linear_schedule(start_e, end_e, exploration_fraction * timesteps, global_step)
        if random.random() < epsilon:
            actions = np.array([envs.single_action_space.sample() for _ in range(envs.num_envs)])
        else:
            q_values = q_network.apply(q_state.params, obs)
            actions = q_values.argmax(axis=-1)
            actions = jax.device_get(actions)

        next_obs, rewards, terminated, truncated, infos = envs.step(actions)

        if "final_info" in infos:
            for info in infos["final_info"]:
                if "episode" not in info:
                    continue

                print(f"global_step={global_step}, episodic_return={info['episode']['r']}")
                writer.add_scalar("charts/episodic_return",info["episode"]["r"], global_step)
                writer.add_scalar("charts/episodic_length",info["episode"]["l"], global_step)
                writer.add_scalar("charts/epsilon",epsilon,global_step)


        real_next_obs = next_obs.copy()
        for idx,d in enumerate(truncated):
            if d and "final_observations" in infos:
                real_next_obs[idx] = infos["final_observations"][idx]
        rb.add(obs, real_next_obs, actions, rewards, terminated, infos)

        obs = next_obs

        if global_step > learning_starts:
            if global_step % train_frequency == 0:
                data = rb.sample(batch_size)

                loss, old_val, q_state = update(
                    q_state,
                    data.observations.numpy(),
                    data.actions.numpy(),
                    data.next_observations.numpy(),
                    data.rewards.flatten().numpy(),
                    data.dones.flatten().numpy(),
                )

                if global_step % 100 == 0:
                    writer.add_scalar("losses/td_loss",jax.device_get(loss),global_step)
                    writer.add_scalar("losses/q_values",jax.device_get(old_val).mean(), global_step)
                    writer.add_scalar("charts/SPS",int(global_step / (time.time() - start_time)),global_step)

            if global_step % target_network_frequency == 0:
                q_state = q_state.replace(
                    target_params=optax.incremental_update(q_state.params, q_state.target_params, tau)
                )

    model_path = f"runs/{run_name}.cleanrl_model"
    with open(model_path,"wb") as f:
        f.write(flax.serialization.to_bytes(q_state.params))

    print("Model Saved")

    episodic_returns = evaluate(
        model_path,
        make_env,
        env_id,
        run_name=f"{run_name}-eval",
        Model=QNetwork,
        epsilon=0.05,
        eval_episodes=100
    )

    for idx, episodic_return in enumerate(episodic_returns):
        writer.add_scalar("eval/episodic_return", episodic_return,idx)

    envs.close()
    writer.close()
    wandb.finish()

In [13]:
train(envs["mountainCar"],n_timesteps,learning_rate,buffer_size,gamma,target_network_frequency,max_grad_norm,batch_size,start_e,end_e,exploration_fraction,learning_starts,train_frequency,seed,tau,num_envs)



Moviepy - Building video /notebooks/rl-algos/video/MountainCar-v0__1__1699891298/rl-video-episode-0.mp4.
Moviepy - Writing video /notebooks/rl-algos/video/MountainCar-v0__1__1699891298/rl-video-episode-0.mp4



                                                               

Moviepy - Done !
Moviepy - video ready /notebooks/rl-algos/video/MountainCar-v0__1__1699891298/rl-video-episode-0.mp4
global_step=199, episodic_return=[-200.]
Moviepy - Building video /notebooks/rl-algos/video/MountainCar-v0__1__1699891298/rl-video-episode-1.mp4.
Moviepy - Writing video /notebooks/rl-algos/video/MountainCar-v0__1__1699891298/rl-video-episode-1.mp4



                                                               

Moviepy - Done !
Moviepy - video ready /notebooks/rl-algos/video/MountainCar-v0__1__1699891298/rl-video-episode-1.mp4
global_step=399, episodic_return=[-200.]
global_step=599, episodic_return=[-200.]
global_step=799, episodic_return=[-200.]
global_step=999, episodic_return=[-200.]
global_step=1199, episodic_return=[-200.]
global_step=1399, episodic_return=[-200.]
global_step=1599, episodic_return=[-200.]
Moviepy - Building video /notebooks/rl-algos/video/MountainCar-v0__1__1699891298/rl-video-episode-8.mp4.
Moviepy - Writing video /notebooks/rl-algos/video/MountainCar-v0__1__1699891298/rl-video-episode-8.mp4



                                                               

Moviepy - Done !
Moviepy - video ready /notebooks/rl-algos/video/MountainCar-v0__1__1699891298/rl-video-episode-8.mp4
global_step=1799, episodic_return=[-200.]
global_step=1999, episodic_return=[-200.]
global_step=2199, episodic_return=[-200.]
global_step=2399, episodic_return=[-200.]
global_step=2599, episodic_return=[-200.]
global_step=2799, episodic_return=[-200.]
global_step=2999, episodic_return=[-200.]
global_step=3199, episodic_return=[-200.]
global_step=3399, episodic_return=[-200.]
global_step=3599, episodic_return=[-200.]
global_step=3799, episodic_return=[-200.]
global_step=3999, episodic_return=[-200.]
global_step=4199, episodic_return=[-200.]
global_step=4399, episodic_return=[-200.]
global_step=4599, episodic_return=[-200.]
global_step=4799, episodic_return=[-200.]
global_step=4999, episodic_return=[-200.]
global_step=5199, episodic_return=[-200.]
global_step=5399, episodic_return=[-200.]
Moviepy - Building video /notebooks/rl-algos/video/MountainCar-v0__1__1699891298/rl-

                                                               

Moviepy - Done !
Moviepy - video ready /notebooks/rl-algos/video/MountainCar-v0__1__1699891298/rl-video-episode-27.mp4
global_step=5599, episodic_return=[-200.]
global_step=5799, episodic_return=[-200.]
global_step=5999, episodic_return=[-200.]
global_step=6199, episodic_return=[-200.]
global_step=6399, episodic_return=[-200.]
global_step=6599, episodic_return=[-200.]
global_step=6799, episodic_return=[-200.]
global_step=6999, episodic_return=[-200.]
global_step=7199, episodic_return=[-200.]
global_step=7399, episodic_return=[-200.]
global_step=7599, episodic_return=[-200.]
global_step=7799, episodic_return=[-200.]
global_step=7999, episodic_return=[-200.]
global_step=8199, episodic_return=[-200.]
global_step=8399, episodic_return=[-200.]
global_step=8599, episodic_return=[-200.]
global_step=8799, episodic_return=[-200.]
global_step=8999, episodic_return=[-200.]
global_step=9199, episodic_return=[-200.]
global_step=9399, episodic_return=[-200.]
global_step=9599, episodic_return=[-200.]

                                                               

Moviepy - Done !
Moviepy - video ready /notebooks/rl-algos/video/MountainCar-v0__1__1699891298/rl-video-episode-64.mp4
global_step=12999, episodic_return=[-200.]
global_step=13199, episodic_return=[-200.]
global_step=13399, episodic_return=[-200.]
global_step=13599, episodic_return=[-200.]
global_step=13799, episodic_return=[-200.]
global_step=13999, episodic_return=[-200.]
global_step=14199, episodic_return=[-200.]
global_step=14399, episodic_return=[-200.]
global_step=14599, episodic_return=[-200.]
global_step=14799, episodic_return=[-200.]
global_step=14999, episodic_return=[-200.]
global_step=15199, episodic_return=[-200.]
global_step=15399, episodic_return=[-200.]
global_step=15599, episodic_return=[-200.]
global_step=15799, episodic_return=[-200.]
global_step=15999, episodic_return=[-200.]
global_step=16199, episodic_return=[-200.]
global_step=16399, episodic_return=[-200.]
global_step=16599, episodic_return=[-200.]
global_step=16799, episodic_return=[-200.]
global_step=16999, ep

                                                               

Moviepy - Done !
Moviepy - video ready /notebooks/rl-algos/video/MountainCar-v0__1__1699891298/rl-video-episode-125.mp4
global_step=25199, episodic_return=[-200.]
global_step=25399, episodic_return=[-200.]
global_step=25599, episodic_return=[-200.]
global_step=25799, episodic_return=[-200.]
global_step=25999, episodic_return=[-200.]
global_step=26199, episodic_return=[-200.]
global_step=26399, episodic_return=[-200.]
global_step=26599, episodic_return=[-200.]
global_step=26799, episodic_return=[-200.]
global_step=26999, episodic_return=[-200.]
global_step=27199, episodic_return=[-200.]
global_step=27399, episodic_return=[-200.]
global_step=27599, episodic_return=[-200.]
global_step=27799, episodic_return=[-200.]
global_step=27999, episodic_return=[-200.]
global_step=28199, episodic_return=[-200.]
global_step=28399, episodic_return=[-200.]
global_step=28599, episodic_return=[-200.]
global_step=28799, episodic_return=[-200.]
global_step=28999, episodic_return=[-200.]
global_step=29199, e

                                                               

Moviepy - Done !
Moviepy - video ready /notebooks/rl-algos/video/MountainCar-v0__1__1699891298/rl-video-episode-216.mp4
global_step=43399, episodic_return=[-200.]
global_step=43599, episodic_return=[-200.]
global_step=43799, episodic_return=[-200.]
global_step=43999, episodic_return=[-200.]
global_step=44199, episodic_return=[-200.]
global_step=44399, episodic_return=[-200.]
global_step=44599, episodic_return=[-200.]
global_step=44799, episodic_return=[-200.]
global_step=44999, episodic_return=[-200.]
global_step=45199, episodic_return=[-200.]
global_step=45399, episodic_return=[-200.]
global_step=45599, episodic_return=[-200.]
global_step=45799, episodic_return=[-200.]
global_step=45999, episodic_return=[-200.]
global_step=46199, episodic_return=[-200.]
global_step=46399, episodic_return=[-200.]
global_step=46599, episodic_return=[-200.]
global_step=46799, episodic_return=[-200.]
global_step=46999, episodic_return=[-200.]
global_step=47199, episodic_return=[-200.]
global_step=47399, e

                                                               

Moviepy - Done !
Moviepy - video ready /notebooks/rl-algos/video/MountainCar-v0__1__1699891298/rl-video-episode-343.mp4
global_step=68799, episodic_return=[-200.]
global_step=68999, episodic_return=[-200.]
global_step=69199, episodic_return=[-200.]
global_step=69399, episodic_return=[-200.]
global_step=69599, episodic_return=[-200.]
global_step=69799, episodic_return=[-200.]
global_step=69999, episodic_return=[-200.]
global_step=70199, episodic_return=[-200.]
global_step=70399, episodic_return=[-200.]
global_step=70599, episodic_return=[-200.]
global_step=70799, episodic_return=[-200.]
global_step=70999, episodic_return=[-200.]
global_step=71199, episodic_return=[-200.]
global_step=71399, episodic_return=[-200.]
global_step=71599, episodic_return=[-200.]
global_step=71799, episodic_return=[-200.]
global_step=71999, episodic_return=[-200.]
global_step=72199, episodic_return=[-200.]
global_step=72399, episodic_return=[-200.]
global_step=72599, episodic_return=[-200.]
global_step=72799, e

                                                               

Moviepy - Done !
Moviepy - video ready /notebooks/rl-algos/video/MountainCar-v0__1__1699891298/rl-video-episode-512.mp4
global_step=102599, episodic_return=[-200.]
global_step=102799, episodic_return=[-200.]
global_step=102999, episodic_return=[-200.]
global_step=103199, episodic_return=[-200.]
global_step=103399, episodic_return=[-200.]
global_step=103599, episodic_return=[-200.]
global_step=103799, episodic_return=[-200.]
global_step=103999, episodic_return=[-200.]
global_step=104199, episodic_return=[-200.]
global_step=104399, episodic_return=[-200.]
global_step=104599, episodic_return=[-200.]
global_step=104799, episodic_return=[-200.]
global_step=104999, episodic_return=[-200.]
global_step=105199, episodic_return=[-200.]
global_step=105399, episodic_return=[-200.]
global_step=105599, episodic_return=[-200.]
global_step=105799, episodic_return=[-200.]
global_step=105999, episodic_return=[-200.]
global_step=106199, episodic_return=[-200.]
global_step=106399, episodic_return=[-200.]


                                                               

Moviepy - Done !
Moviepy - video ready /notebooks/rl-algos/video/MountainCar-v0__1__1699891298/rl-video-episode-729.mp4
global_step=145999, episodic_return=[-200.]
global_step=146199, episodic_return=[-200.]
global_step=146399, episodic_return=[-200.]
global_step=146599, episodic_return=[-200.]
global_step=146799, episodic_return=[-200.]
global_step=146999, episodic_return=[-200.]
global_step=147199, episodic_return=[-200.]
global_step=147399, episodic_return=[-200.]
global_step=147599, episodic_return=[-200.]
global_step=147799, episodic_return=[-200.]
global_step=147999, episodic_return=[-200.]
global_step=148199, episodic_return=[-200.]
global_step=148399, episodic_return=[-200.]
global_step=148599, episodic_return=[-200.]
global_step=148799, episodic_return=[-200.]
global_step=148999, episodic_return=[-200.]
global_step=149199, episodic_return=[-200.]
global_step=149399, episodic_return=[-200.]
global_step=149599, episodic_return=[-200.]
global_step=149799, episodic_return=[-200.]


                                                               

Moviepy - Done !
Moviepy - video ready /notebooks/rl-algos/video/MountainCar-v0__1__1699891298/rl-video-episode-1000.mp4
global_step=200199, episodic_return=[-200.]
global_step=200399, episodic_return=[-200.]
global_step=200599, episodic_return=[-200.]
global_step=200799, episodic_return=[-200.]
global_step=200999, episodic_return=[-200.]
global_step=201199, episodic_return=[-200.]
global_step=201399, episodic_return=[-200.]
global_step=201599, episodic_return=[-200.]
global_step=201799, episodic_return=[-200.]
global_step=201999, episodic_return=[-200.]
global_step=202199, episodic_return=[-200.]
global_step=202399, episodic_return=[-200.]
global_step=202599, episodic_return=[-200.]
global_step=202799, episodic_return=[-200.]
global_step=202999, episodic_return=[-200.]
global_step=203199, episodic_return=[-200.]
global_step=203399, episodic_return=[-200.]
global_step=203599, episodic_return=[-200.]
global_step=203799, episodic_return=[-200.]
global_step=203999, episodic_return=[-200.]

                                                               

Moviepy - Done !
Moviepy - video ready /notebooks/rl-algos/video/MountainCar-v0__1__1699891298/rl-video-episode-2000.mp4
global_step=400163, episodic_return=[-200.]
global_step=400363, episodic_return=[-200.]
global_step=400563, episodic_return=[-200.]
global_step=400763, episodic_return=[-200.]
global_step=400963, episodic_return=[-200.]
global_step=401163, episodic_return=[-200.]
global_step=401363, episodic_return=[-200.]
global_step=401563, episodic_return=[-200.]
global_step=401763, episodic_return=[-200.]
global_step=401963, episodic_return=[-200.]
global_step=402163, episodic_return=[-200.]
global_step=402363, episodic_return=[-200.]
global_step=402563, episodic_return=[-200.]
global_step=402763, episodic_return=[-200.]
global_step=402963, episodic_return=[-200.]
global_step=403163, episodic_return=[-200.]
global_step=403363, episodic_return=[-200.]
global_step=403563, episodic_return=[-200.]
global_step=403763, episodic_return=[-200.]
global_step=403963, episodic_return=[-200.]

                                                               

Moviepy - Done !
Moviepy - video ready /notebooks/rl-algos/video/MountainCar-v0__1__1699891298-eval/rl-video-episode-0.mp4
eval_episode=0, episodic_return=[-200.]
Moviepy - Building video /notebooks/rl-algos/video/MountainCar-v0__1__1699891298-eval/rl-video-episode-1.mp4.
Moviepy - Writing video /notebooks/rl-algos/video/MountainCar-v0__1__1699891298-eval/rl-video-episode-1.mp4



                                                               

Moviepy - Done !
Moviepy - video ready /notebooks/rl-algos/video/MountainCar-v0__1__1699891298-eval/rl-video-episode-1.mp4
eval_episode=1, episodic_return=[-200.]
eval_episode=2, episodic_return=[-200.]
eval_episode=3, episodic_return=[-200.]
eval_episode=4, episodic_return=[-200.]
eval_episode=5, episodic_return=[-200.]
eval_episode=6, episodic_return=[-200.]
eval_episode=7, episodic_return=[-200.]
Moviepy - Building video /notebooks/rl-algos/video/MountainCar-v0__1__1699891298-eval/rl-video-episode-8.mp4.
Moviepy - Writing video /notebooks/rl-algos/video/MountainCar-v0__1__1699891298-eval/rl-video-episode-8.mp4



                                                               

Moviepy - Done !
Moviepy - video ready /notebooks/rl-algos/video/MountainCar-v0__1__1699891298-eval/rl-video-episode-8.mp4
eval_episode=8, episodic_return=[-200.]
eval_episode=9, episodic_return=[-200.]
eval_episode=10, episodic_return=[-200.]
eval_episode=11, episodic_return=[-200.]
eval_episode=12, episodic_return=[-200.]
eval_episode=13, episodic_return=[-200.]
eval_episode=14, episodic_return=[-200.]
eval_episode=15, episodic_return=[-200.]
eval_episode=16, episodic_return=[-200.]
eval_episode=17, episodic_return=[-200.]
eval_episode=18, episodic_return=[-200.]
eval_episode=19, episodic_return=[-200.]
eval_episode=20, episodic_return=[-200.]
eval_episode=21, episodic_return=[-200.]
eval_episode=22, episodic_return=[-200.]
eval_episode=23, episodic_return=[-200.]
eval_episode=24, episodic_return=[-200.]
eval_episode=25, episodic_return=[-200.]
eval_episode=26, episodic_return=[-200.]
Moviepy - Building video /notebooks/rl-algos/video/MountainCar-v0__1__1699891298-eval/rl-video-episo

                                                               

Moviepy - Done !
Moviepy - video ready /notebooks/rl-algos/video/MountainCar-v0__1__1699891298-eval/rl-video-episode-27.mp4
eval_episode=27, episodic_return=[-200.]
eval_episode=28, episodic_return=[-200.]
eval_episode=29, episodic_return=[-200.]
eval_episode=30, episodic_return=[-200.]
eval_episode=31, episodic_return=[-200.]
eval_episode=32, episodic_return=[-200.]
eval_episode=33, episodic_return=[-200.]
eval_episode=34, episodic_return=[-200.]
eval_episode=35, episodic_return=[-200.]
eval_episode=36, episodic_return=[-200.]
eval_episode=37, episodic_return=[-200.]
eval_episode=38, episodic_return=[-200.]
eval_episode=39, episodic_return=[-200.]
eval_episode=40, episodic_return=[-200.]
eval_episode=41, episodic_return=[-200.]
eval_episode=42, episodic_return=[-200.]
eval_episode=43, episodic_return=[-200.]
eval_episode=44, episodic_return=[-200.]
eval_episode=45, episodic_return=[-200.]
eval_episode=46, episodic_return=[-200.]
eval_episode=47, episodic_return=[-200.]
eval_episode=48

                                                               

Moviepy - Done !
Moviepy - video ready /notebooks/rl-algos/video/MountainCar-v0__1__1699891298-eval/rl-video-episode-64.mp4
eval_episode=64, episodic_return=[-200.]
eval_episode=65, episodic_return=[-200.]
eval_episode=66, episodic_return=[-200.]
eval_episode=67, episodic_return=[-200.]
eval_episode=68, episodic_return=[-200.]
eval_episode=69, episodic_return=[-200.]
eval_episode=70, episodic_return=[-200.]
eval_episode=71, episodic_return=[-200.]
eval_episode=72, episodic_return=[-200.]
eval_episode=73, episodic_return=[-200.]
eval_episode=74, episodic_return=[-200.]
eval_episode=75, episodic_return=[-200.]
eval_episode=76, episodic_return=[-200.]
eval_episode=77, episodic_return=[-200.]
eval_episode=78, episodic_return=[-200.]
eval_episode=79, episodic_return=[-200.]
eval_episode=80, episodic_return=[-200.]
eval_episode=81, episodic_return=[-200.]
eval_episode=82, episodic_return=[-200.]
eval_episode=83, episodic_return=[-200.]
eval_episode=84, episodic_return=[-200.]
eval_episode=85

VBox(children=(Label(value='1.668 MB of 1.668 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
charts/SPS,▄█▇▆▆▅▅▄▃▃▂▁▁▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃
charts/episodic_length,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
charts/episodic_return,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
charts/epsilon,██▇▆▆▅▅▄▃▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/episodic_return,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
losses/q_values,█▇▆▅▄▄▄▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
losses/td_loss,▁▄▄▃▄▃▂█▃▄▄▃▂▃▄▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▂▂▁▁▁▂

0,1
charts/SPS,924.0
charts/episodic_length,171.0
charts/episodic_return,-171.0
charts/epsilon,0.05
eval/episodic_return,-200.0
global_step,499958.0
losses/q_values,-91.80875
losses/td_loss,4.10196
