In [18]:
# Environment
import gymnasium as gym
import highway_env

# Agent
from stable_baselines3 import DQN

# Visualization utils
%load_ext tensorboard
import sys
from tqdm.notebook import trange

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [19]:
%reload_ext tensorboard
!git clone https://github.com/Farama-Foundation/HighwayEnv.git 2> /dev/null
sys.path.insert(0, './HighwayEnv/scripts/')
from utils import record_videos, show_videos

In [20]:
%tensorboard --logdir "highway_dqn"

Reusing TensorBoard on port 6006 (pid 16976), started 0:12:45 ago. (Use '!kill 16976' to kill it.)

In [21]:
model = DQN('MlpPolicy', 'highway-fast-v0',
                policy_kwargs=dict(net_arch=[256, 256]),
                learning_rate=5e-4,
                buffer_size=15000,
                learning_starts=200,
                batch_size=32,
                gamma=0.8,
                train_freq=1,
                gradient_steps=1,
                target_update_interval=50,
                exploration_fraction=0.7,
                verbose=1,
                tensorboard_log='highway_dqn/')
model.learn(int(2e4))

Using cpu device
Creating environment from the given name 'highway-fast-v0'
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Logging to highway_dqn/DQN_2
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 9.5      |
|    ep_rew_mean      | 7.44     |
|    exploration_rate | 0.997    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 50       |
|    time_elapsed     | 0        |
|    total_timesteps  | 38       |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 12.9     |
|    ep_rew_mean      | 9.65     |
|    exploration_rate | 0.993    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 56       |
|    time_elapsed     | 1        |
|    total_timesteps  | 103      |
----------------------------------
----------------------------------
| rollout/   

<stable_baselines3.dqn.dqn.DQN at 0x36d174f10>

In [22]:
env = gym.make('highway-fast-v0', render_mode='rgb_array')
env = record_videos(env)
for episode in trange(3, desc='Test episodes'):
    (obs, info), done = env.reset(), False
    while not done:
        action, _ = model.predict(obs, deterministic=True)
        obs, reward, done, truncated, info = env.step(int(action))
env.close()
show_videos()

  logger.warn(


Test episodes:   0%|          | 0/3 [00:00<?, ?it/s]

Moviepy - Building video /Users/jonathanhu/autonomous_driving_sim/videos/rl-video-episode-0.mp4.
Moviepy - Writing video /Users/jonathanhu/autonomous_driving_sim/videos/rl-video-episode-0.mp4



  logger.warn(


Moviepy - Done !
Moviepy - video ready /Users/jonathanhu/autonomous_driving_sim/videos/rl-video-episode-0.mp4
Moviepy - Building video /Users/jonathanhu/autonomous_driving_sim/videos/rl-video-episode-1.mp4.
Moviepy - Writing video /Users/jonathanhu/autonomous_driving_sim/videos/rl-video-episode-1.mp4



  logger.warn(


Moviepy - Done !
Moviepy - video ready /Users/jonathanhu/autonomous_driving_sim/videos/rl-video-episode-1.mp4
Moviepy - Building video /Users/jonathanhu/autonomous_driving_sim/videos/rl-video-episode-2.mp4.
Moviepy - Writing video /Users/jonathanhu/autonomous_driving_sim/videos/rl-video-episode-2.mp4



  logger.warn(


Moviepy - Done !
Moviepy - video ready /Users/jonathanhu/autonomous_driving_sim/videos/rl-video-episode-2.mp4


In [23]:
import gymnasium as gym
import highway_env
import numpy as np
import matplotlib.pyplot as plt
from stable_baselines3 import DQN, A2C, PPO, SAC
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.evaluation import evaluate_policy

def train_and_evaluate(algorithm, env, total_timesteps=20000):
    model = algorithm('MlpPolicy', env, verbose=1)
    model.learn(total_timesteps=total_timesteps)
    mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=10)
    return mean_reward, std_reward

# Create the environment
env_name = "highway-fast-v0"
env = make_vec_env(env_name, n_envs=1)

# Algorithms to use
algorithms = {
    'DQN': DQN,
    'A2C': A2C,
    'PPO': PPO,
    'SAC': SAC
}

results = {}

# Train and evaluate each algorithm
for name, alg in algorithms.items():
    print(f"Training and evaluating {name}")
    mean_reward, std_reward = train_and_evaluate(alg, env)
    results[name] = (mean_reward, std_reward)
    print(f"{name} Mean Reward: {mean_reward}, Std Reward: {std_reward}")

# Plotting results
labels = results.keys()
mean_rewards = [res[0] for res in results.values()]
std_rewards = [res[1] for res in results.values()]

x = np.arange(len(labels))
width = 0.35

fig, ax = plt.subplots()
rects1 = ax.bar(x - width/2, mean_rewards, width, label='Mean Reward')
rects2 = ax.bar(x + width/2, std_rewards, width, label='Std Deviation')

ax.set_ylabel('Scores')
ax.set_title('Performance by Algorithm')
ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.legend()

plt.show()


Training and evaluating DQN
Using cpu device
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 14.8     |
|    ep_rew_mean      | 10.6     |
|    exploration_rate | 0.972    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 57       |
|    time_elapsed     | 1        |
|    total_timesteps  | 59       |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 13.4     |
|    ep_rew_mean      | 9.85     |
|    exploration_rate | 0.949    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 58       |
|    time_elapsed     | 1        |
|    total_timesteps  | 107      |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.39     |
|    n_updates        | 1        |
----------------------------------
----------------------------------
| rollout/

KeyboardInterrupt: 

: 