In [None]:
# imports - ignore
# !pip install gymnasium stable_baselines3[extra] opencv-python highway_env

In [10]:
import gymnasium as gym
from gymnasium.wrappers import RecordVideo
from stable_baselines3 import DQN
import numpy as np
import highway_env  # noqa: F401
import os

In [14]:
os.chdir('/Users/williamzhang/Documents/StanfordRoundabout/')


In [None]:
def safety_margin(env, agent_id):
    """
    Calculate the safety margin of the agent in the environment.
    """
    pass


In [2]:
TRAIN = False

# Create the environment
env = gym.make("roundabout-v0", render_mode="rgb_array")
obs, info = env.reset()

# Create the model
model = DQN(
    "MlpPolicy",
    env,
    policy_kwargs=dict(net_arch=[256, 256]),
    learning_rate=5e-4,
    buffer_size=15000,
    learning_starts=200,
    batch_size=32,
    gamma=0.8,
    train_freq=1,
    gradient_steps=1,
    target_update_interval=50,
    verbose=1,
    tensorboard_log="roundabout_dqn/",
)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [6]:
# Train the model
if TRAIN:
    model.learn(total_timesteps=int(2e4))
    model.save("roundabout_dqn/model")
    del model

In [15]:
# Run the trained model and record video
model = DQN.load("roundabout_dqn/model", env=env, device="cpu")

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [None]:
num_environments = 30       # unique environment configurations (seeds)
num_trajectories = 5        # trajectories per environment

# Store metrics per environment: env_metrics[env_idx] = {metric: [traj_values]}
env_metrics = []

for env_idx in range(num_environments):
    seed = env_idx * 100  # deterministic, reproducible seeds

    traj_metrics = {
        "success": [],
        "min_distance": [],
        "avg_speed_ratio": [],
        "jerk_score": [],
        "hard_brakes": [],
        "brake_severity": [],
        "lane_changes": [],
        "on_road_frac": [],
        "cumulative_reward": [],
    }

    for traj_idx in range(num_trajectories):
        # Reset with the SAME seed to get the same initial environment layout,
        # but use deterministic=False so the DQN's epsilon-greedy exploration
        # produces different trajectories in the same scenario.
        obs, info = env.reset(seed=seed)
        done = truncated = False

        velocities = []
        min_dist = float("inf")
        lane_changes = 0
        on_road_steps = 0
        total_steps = 0
        cumulative_reward = 0.0
        prev_action = None

        while not (done or truncated):
            action, _ = model.predict(obs, deterministic=False)

            if prev_action is not None and action != prev_action and action in [0, 2]:
                lane_changes += 1
            prev_action = action

            obs, reward, done, truncated, info = env.step(action)
            cumulative_reward += reward
            total_steps += 1

            # Use raw vehicle state from the environment (avoids normalization issues)
            ego = env.unwrapped.vehicle
            velocities.append(np.sqrt(ego.velocity[0]**2 + ego.velocity[1]**2))

            for vehicle in env.unwrapped.road.vehicles:
                if vehicle is not ego:
                    dist = np.linalg.norm(ego.position - vehicle.position)
                    min_dist = min(min_dist, dist)
                    

            if env.unwrapped.vehicle.on_road:
                on_road_steps += 1

        velocities = np.array(velocities)
        dt = 1.0 / env.unwrapped.config["policy_frequency"]  # time between decisions
        accel = np.diff(velocities) / dt  # m/s²
        jerk = np.diff(accel) / dt        # m/s³

        # Hard braking: count of steps where deceleration exceeds 4 m/s²
        hard_brake_threshold = 4.0  # m/s²
        hard_brakes = int(np.sum(accel < -hard_brake_threshold))

        # Brake severity: quadratic penalty for deceleration beyond threshold
        # Captures *how hard* the brakes were, not just how often
        excess_decel = np.clip(-accel - hard_brake_threshold, 0, None)
        brake_severity = float(np.sum(excess_decel**2))

        traj_metrics["success"].append(0.0 if env.unwrapped.vehicle.crashed else 1.0)
        traj_metrics["min_distance"].append(min_dist if min_dist != float("inf") else 0.0)
        traj_metrics["avg_speed_ratio"].append(np.mean(velocities) / 16.0)
        traj_metrics["jerk_score"].append(np.mean(jerk**2) if len(jerk) > 0 else 0.0)
        traj_metrics["hard_brakes"].append(hard_brakes)
        traj_metrics["brake_severity"].append(brake_severity)
        traj_metrics["lane_changes"].append(lane_changes)
        traj_metrics["on_road_frac"].append(on_road_steps / max(total_steps, 1))
        traj_metrics["cumulative_reward"].append(cumulative_reward)

    env_metrics.append(traj_metrics)

# ── Per-environment summary ──
print(f"=== Robustness Metrics ({num_environments} environments × {num_trajectories} trajectories) ===\n")
print(f"{'Env':>4}  {'Seed':>5}  {'Success':>8}  {'MinDist':>8}  {'SpeedR':>7}  {'Jerk':>9}  {'HBrake':>7}  {'BrkSev':>8}  {'LaneCh':>7}  {'OnRoad':>7}  {'Reward':>9}")
print("-" * 102)

all_success, all_min_dist, all_speed, all_jerk, all_hb, all_bs, all_lc, all_road, all_reward = [], [], [], [], [], [], [], [], []

for env_idx, m in enumerate(env_metrics):
    seed = env_idx * 100
    sr = np.mean(m["success"])
    md = np.mean(m["min_distance"])
    sp = np.mean(m["avg_speed_ratio"])
    jk = np.mean(m["jerk_score"])
    hb = np.mean(m["hard_brakes"])
    bs = np.mean(m["brake_severity"])
    lc = np.mean(m["lane_changes"])
    rd = np.mean(m["on_road_frac"])
    rw = np.mean(m["cumulative_reward"])
    print(f"{env_idx:>4}  {seed:>5}  {sr:>7.0%}  {md:>8.3f}  {sp:>7.3f}  {jk:>9.5f}  {hb:>7.1f}  {bs:>8.2f}  {lc:>7.1f}  {rd:>6.0%}  {rw:>9.3f}")

    all_success.extend(m["success"])
    all_min_dist.extend(m["min_distance"])
    all_speed.extend(m["avg_speed_ratio"])
    all_jerk.extend(m["jerk_score"])
    all_hb.extend(m["hard_brakes"])
    all_bs.extend(m["brake_severity"])
    all_lc.extend(m["lane_changes"])
    all_road.extend(m["on_road_frac"])
    all_reward.extend(m["cumulative_reward"])

# ── Aggregate summary ──
print("\n=== Aggregate (all trajectories) ===")
print(f"  Success Rate:          {np.mean(all_success):.2%}")
print(f"  Min Safety Distance:   {np.mean(all_min_dist):.3f} m  (std={np.std(all_min_dist):.3f})")
print(f"  Avg Speed Ratio:       {np.mean(all_speed):.3f}  (std={np.std(all_speed):.3f})")
print(f"  Jerk (lower=smoother): {np.mean(all_jerk):.5f}")
print(f"  Hard Brakes/ep:        {np.mean(all_hb):.1f}  (count, decel > 4 m/s²)")
print(f"  Brake Severity/ep:    {np.mean(all_bs):.2f}  (quadratic penalty beyond threshold)")
print(f"  Lane Changes/ep:       {np.mean(all_lc):.1f}")
print(f"  On-Road Fraction:      {np.mean(all_road):.2%}")
print(f"  Cumulative Reward:     {np.mean(all_reward):.3f}  (std={np.std(all_reward):.3f})")

# ── Within-environment consistency (how stable is the policy in the same scenario?) ──
reward_stds = [np.std(m["cumulative_reward"]) for m in env_metrics]
success_stds = [np.std(m["success"]) for m in env_metrics]
print(f"\n=== Within-Environment Consistency ===")
print(f"  Avg reward std per env:  {np.mean(reward_stds):.3f}  (lower = more consistent)")
print(f"  Avg success std per env: {np.mean(success_stds):.3f}")

# ── Composite robustness score ──
w = {"safety": 0.4, "stability": 0.2, "efficiency": 0.2, "road": 0.2}
norm_safety = np.clip(np.mean(all_min_dist) / 20.0, 0, 1)
norm_stability = 1.0 - np.clip(np.mean(all_jerk) / 0.1, 0, 1)
norm_efficiency = np.clip(np.mean(all_speed), 0, 1)
norm_road = np.mean(all_road)

robustness = (
    w["safety"] * norm_safety
    + w["stability"] * norm_stability
    + w["efficiency"] * norm_efficiency
    + w["road"] * norm_road
)
print(f"\n  Composite Robustness Score: {robustness:.4f}  (range [0, 1])")

In [None]:
# # Change working directory
# import os
# os.chdir('/content/drive/MyDrive/CS238V-Cars/')  # put your folder path here

# # Check current working directory
# !pwd

/content/drive/MyDrive/CS238V-Cars


In [8]:
# Run the trained model and record video
model = DQN.load("roundabout_dqn/model", env=env, device="cpu")
# model.set_env(env)
env = RecordVideo(
    env, video_folder="roundabout_dqn/videos", episode_trigger=lambda e: True
)
env.unwrapped.config["simulation_frequency"] = 15  # Higher FPS for rendering
env.unwrapped.set_record_video_wrapper(env)

for videos in range(10):
    done = truncated = False
    obs, info = env.reset()
    while not (done or truncated):
        # Predict
        action, _states = model.predict(obs, deterministic=True)
        # Get reward
        obs, reward, done, truncated, info = env.step(action)
        # Render
        env.render()
env.close()

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


  logger.warn(


In [None]:
# !pip freeze > colab_requirements.txt

In [None]:
# imports
# # import numpy
# # import gymnasium
# # import stable_baselines3
# # import cv2
# # import highway_env
# import torch
# import torchvision
# import torchaudio


# # print("numpy:", numpy.__version__)
# # print("gymnasium:", gymnasium.__version__)
# # print("stable_baselines3:", stable_baselines3.__version__)
# # print("opencv-python:", cv2.__version__)
# # print("highway_env:", highway_env.__version__)

# print("torch:", torch.__version__)
# print("torchvision:", torchvision.__version__)
# print("torchaudio:", torchaudio.__version__)


numpy: 2.0.2
gymnasium: 1.2.3
stable_baselines3: 2.7.1
opencv-python: 4.13.0
highway_env: 1.10.2
