# Working DQN Example on CartPole-v1 Environment

In [4]:
import gym
from stable_baselines3 import DQN, PPO
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import DummyVecEnv, VecVideoRecorder
import wandb
from wandb.integration.sb3 import WandbCallback


config = {
  "n_timesteps": 5e4,
  "policy": 'MlpPolicy',
  "learning_rate": 2.3e-3,
  "batch_size": 64,
  "buffer_size": 100000,
  "learning_starts": 1000,
  "gamma": 0.99,
  "target_update_interval": 10,
  "train_freq": 256,
  "gradient_steps": 128,
  "exploration_fraction": 0.16,
  "exploration_final_eps": 0.04,
  "policy_kwargs": dict(net_arch=[256, 256]),
  "seed":42,
  "env_name":"CartPole-v1",
}

run = wandb.init(
    project="sb3",
    config=config,
    sync_tensorboard=True,  # auto-upload sb3's tensorboard metrics
    monitor_gym=True,  # auto-upload the videos of agents playing the game
    save_code=True,  # optional
)


def make_env():
    env = gym.make(config["env_name"])
    env = Monitor(env)  # record stats such as returns
    return env


env = DummyVecEnv([make_env])
env = VecVideoRecorder(
    env,
    f"videos/{run.id}",
    record_video_trigger=lambda x: x % 2000 == 0,
    video_length=200
)


model = DQN(
    config["policy"],
    env,
    learning_rate=config["learning_rate"],
    batch_size=config["batch_size"],
    buffer_size=config["buffer_size"],
    learning_starts=config["learning_starts"],
    gamma=config["gamma"],
    target_update_interval=config["target_update_interval"],
    train_freq=config["train_freq"],
    gradient_steps=config["gradient_steps"],
    exploration_fraction=config["exploration_fraction"],
    exploration_final_eps=config["exploration_final_eps"],
    policy_kwargs=config["policy_kwargs"],
    tensorboard_log=f"runs/{run.id}",
    verbose=1,
    seed=config["seed"]
)

model.learn(
    total_timesteps=config["n_timesteps"],
    callback=WandbCallback(
        gradient_save_freq=100,
        model_save_path=f"models/{run.id}",
        verbose=0,
    ),
)
run.finish()

Using cuda device




Logging to runs/rbu3wujf/DQN_1
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 18.8     |
|    ep_rew_mean      | 18.8     |
|    exploration_rate | 0.991    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 45       |
|    time_elapsed     | 1        |
|    total_timesteps  | 75       |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 18       |
|    ep_rew_mean      | 18       |
|    exploration_rate | 0.983    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 51       |
|    time_elapsed     | 2        |
|    total_timesteps  | 144      |
----------------------------------
Saving video to /home/curtis/classes/self-driving-cars/videos/rbu3wujf/rl-video-step-0-to-step-200.mp4
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 18

0,1
global_step,▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▆▆▇▇▇▇█
rollout/ep_len_mean,▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▃▃▄▄▄▄▄▄▄▄▃▄▃▄▄▅▅▅▇▇██▇█
rollout/ep_rew_mean,▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▃▃▄▄▄▄▄▄▄▄▃▄▃▄▄▅▅▅▇▇██▇█
rollout/exploration_rate,███▇▇▇▇▆▆▆▅▅▄▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
time/fps,▁▂▃▄▆▇▇█▆▆▇█▇█▇▇▇█▇▇█▇█▇█▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
train/learning_rate,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/loss,▁▂▂▂▂▂▃▃▄▄▄▄▄▄▅▅▆▇▅▆▆▆▅▆▅▆▅▆▆▅▇▇▇▆▇▇▇▆▇█

0,1
global_step,49190.0
rollout/ep_len_mean,192.78999
rollout/ep_rew_mean,192.78999
rollout/exploration_rate,0.04
time/fps,360.0
train/learning_rate,0.0023
train/loss,0.61324
