In [4]:
import gymnasium as gym
import highway_env
import pandas as pd
import time
import os
from datetime import datetime

# --- Configuration ---
N_EPISODES = 50  # How many full episodes to run
SPEED_LIMIT_MS = 30.0 # The default speed limit in highway-v0 is 30 m/s
BASE_OUTPUT_DIR = "random_baseline_agent" # Base directory for all results

# --- Setup ---
# YOUR CHANGE: Define sub-directories for raw runs and summaries
RAW_RUNS_DIR = os.path.join(BASE_OUTPUT_DIR, "instant_runs")
SUMMARY_DIR = os.path.join(BASE_OUTPUT_DIR, "summary")

# Create all directories if they don't exist
os.makedirs(RAW_RUNS_DIR, exist_ok=True)
os.makedirs(SUMMARY_DIR, exist_ok=True)

# --- Data Storage ---
# We'll store the results of each episode in this list
all_episode_stats = []

print(f"--- Starting Baseline Evaluation ---")
print(f"Running {N_EPISODES} episodes with a RANDOM AGENT.")
print("The simulation window will open. It will be fast because the agent crashes quickly!")

# 1. Create the environment
# YOUR CHANGE: Added render_mode='human' to see the simulation
env = gym.make('highway-v0', render_mode='human')

# Start the main loop to run N_EPISODES
for i in range(N_EPISODES):
    
    # --- Per-Episode Counters ---
    current_episode_reward = 0
    current_episode_steps = 0
    current_episode_speed_sum = 0
    current_episode_lane_changes = 0
    current_episode_speed_limit_violations = 0
    
    # 2. Reset the environment for a new episode
    obs, info = env.reset()
    done = truncated = False
    
    # 3. Inner loop: run one full episode
    while not (done or truncated):
        
        # --- The "Brain": A Random Agent ---
        action = env.action_space.sample() 
        
        # 4. Take the action
        obs, reward, done, truncated, info = env.step(action)
        
        # --- Collect Metrics ---
        current_episode_reward += reward
        current_episode_steps += 1
        
        # Get Ego Vehicle speed (it's the first vehicle in the observation)
        # obs[0] is the ego car, obs[0][3] is its 'vx' (longitudinal velocity)
        ego_speed = obs[0][3] 
        current_episode_speed_sum += ego_speed
        
        # Check for lane change
        if action == 0 or action == 2: # 0=LANE_LEFT, 2=LANE_RIGHT
            current_episode_lane_changes += 1
            
        # Check for speed limit compliance
        if ego_speed > SPEED_LIMIT_MS:
            current_episode_speed_limit_violations += 1
            
        # Optional: Add a tiny sleep to make it more watchable
        # time.sleep(0.01) # You can uncomment this if it runs too fast

    # --- Episode Finished: Save Stats ---
    # `info['crashed']` is True if the episode ended in a collision
    was_collision = info.get('crashed', False)
    
    # Calculate averages for this episode (add 1e-6 to avoid divide by zero if steps=0)
    avg_speed = current_episode_speed_sum / (current_episode_steps + 1e-6)
    speed_compliance_frac = 1.0 - (current_episode_speed_limit_violations / (current_episode_steps + 1e-6))
    
    # Store all metrics in a dictionary
    stats = {
        "episode": i + 1,
        "collision": was_collision,
        "total_reward": current_episode_reward,
        "avg_speed_ms": avg_speed,
        "lane_changes": current_episode_lane_changes,
        "speed_compliance": speed_compliance_frac
    }
    all_episode_stats.append(stats)
    
    # Print progress
    if (i+1) % 10 == 0:
        print(f"  ... Episode {i+1} / {N_EPISODES} complete.")

# --- All Episodes Done: Final Report ---
env.close()
print("\n--- Evaluation Complete. ---")

# Convert the list of dictionaries into a pandas DataFrame
df = pd.DataFrame(all_episode_stats)

# YOUR CHANGE: Save the raw DataFrame to the 'instant_runs' directory
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"run_{timestamp}.csv"
output_path = os.path.join(RAW_RUNS_DIR, filename)
df.to_csv(output_path, index=False)

print(f"Raw results for all {N_EPISODES} episodes saved to: {output_path}")


# --- Calculate and Print Final Metrics Table ---
print("\n--- Final Metrics Summary (Random Agent Baseline) ---")

# We use .mean() to get the average of all episodes
final_metrics = {
    "Collision Rate": df['collision'].mean(),
    "Average Speed (m/s)": df['avg_speed_ms'].mean(),
    "Avg Lane Changes / Episode": df['lane_changes'].mean(),
    "Average Reward / Episode": df['total_reward'].mean(),
    "Speed Limit Compliance": df['speed_compliance'].mean()
}

# YOUR CHANGE: Save the summary metrics to a matching CSV file
# Convert the dictionary to a DataFrame for easy saving
df_summary = pd.DataFrame(list(final_metrics.items()), columns=['Metric', 'Value'])

# Create a new filename for the summary, matching the raw run's timestamp
summary_filename = f"summary_{timestamp}.csv"
summary_output_path = os.path.join(SUMMARY_DIR, summary_filename)
df_summary.to_csv(summary_output_path, index=False)

print(f"Summary metrics file saved to: {summary_output_path}")

# Print as a clean table
for metric, value in final_metrics.items():
    print(f"{metric:<28} | {value:>8.4f}")

--- Starting Baseline Evaluation ---
Running 10 episodes with a RANDOM AGENT.
The simulation window will open. It will be fast because the agent crashes quickly!
  ... Episode 10 / 10 complete.

--- Evaluation Complete. ---
Raw results for all 10 episodes saved to: random_baseline_agent/instant_runs/run_20251114_123336.csv

--- Final Metrics Summary (Random Agent Baseline) ---
Summary metrics file saved to: random_baseline_agent/summary/summary_20251114_123336.csv
Collision Rate               |   0.9000
Average Speed (m/s)          |   0.3110
Avg Lane Changes / Episode   |   6.4000
Average Reward / Episode     |  12.9860
Speed Limit Compliance       |   1.0000
