In [None]:
import gymnasium as gym
import highway_env
import pandas as pd
import numpy as np
import time
import os
from datetime import datetime

# --- Configuration ---
N_EPISODES = 50  # How many full episodes to run
SPEED_LIMIT_MS = 30.0 # The default speed limit in highway-v0 is 30 m/s
BASE_OUTPUT_DIR = "random_baseline_agent" # Base directory for all results
SAFE_TTC_THRESHOLD = 2.0  # Seconds - below this is considered unsafe

# --- Setup ---
RAW_RUNS_DIR = os.path.join(BASE_OUTPUT_DIR, "instant_runs")
SUMMARY_DIR = os.path.join(BASE_OUTPUT_DIR, "summary")

# Create all directories if they don't exist
os.makedirs(RAW_RUNS_DIR, exist_ok=True)
os.makedirs(SUMMARY_DIR, exist_ok=True)

# --- Data Storage ---
all_episode_stats = []

print(f"--- Starting ENHANCED Baseline Evaluation ---")
print(f"Running {N_EPISODES} episodes with a RANDOM AGENT.")
print(f"Now capturing: TTC, Jerk, Episode Duration, and more!")
print("The simulation window will open. It will be fast because the agent crashes quickly!")

# 1. Create the environment
env = gym.make('highway-v0', render_mode='human')

# Start the main loop to run N_EPISODES
for i in range(N_EPISODES):
    
    # --- Per-Episode Counters ---
    current_episode_reward = 0
    current_episode_steps = 0
    current_episode_speed_sum = 0
    current_episode_lane_changes = 0
    current_episode_speed_limit_violations = 0
    
    # NEW: Track acceleration for jerk calculation
    previous_acceleration = 0.0
    jerk_values = []
    
    # NEW: Track Time-to-Collision (TTC)
    ttc_values = []
    
    # NEW: Track episode start time
    episode_start_time = time.time()
    
    # NEW: Track previous speed for acceleration calculation
    previous_speed = 0.0
    
    # 2. Reset the environment for a new episode
    obs, info = env.reset()
    done = truncated = False
    
    # 3. Inner loop: run one full episode
    while not (done or truncated):
        
        # --- The "Brain": A Random Agent ---
        action = env.action_space.sample() 
        
        # 4. Take the action
        obs, reward, done, truncated, info = env.step(action)
        
        # --- Collect Metrics ---
        current_episode_reward += reward
        current_episode_steps += 1
        
        # Get Ego Vehicle speed (it's the first vehicle in the observation)
        # obs[0] is the ego car, obs[0][3] is its 'vx' (longitudinal velocity)
        ego_speed = obs[0][3] 
        current_episode_speed_sum += ego_speed
        
        # NEW: Calculate acceleration and jerk
        acceleration = ego_speed - previous_speed
        jerk = acceleration - previous_acceleration
        jerk_values.append(abs(jerk))
        previous_acceleration = acceleration
        previous_speed = ego_speed
        
        # NEW: Calculate Time-to-Collision (TTC) with nearest vehicle
        # Check all other vehicles in observation
        min_ttc_this_step = float('inf')
        ego_x = obs[0][1]  # Ego longitudinal position
        ego_y = obs[0][2]  # Ego lateral position
        
        for vehicle_idx in range(1, len(obs)):  # Skip ego (index 0)
            if obs[vehicle_idx][0] == 0:  # Check if vehicle exists (presence flag)
                continue
                
            other_x = obs[vehicle_idx][1]
            other_y = obs[vehicle_idx][2]
            other_vx = obs[vehicle_idx][3]
            
            # Only consider vehicles ahead in the same or adjacent lanes
            relative_x = other_x - ego_x
            relative_y = abs(other_y - ego_y)
            
            if relative_x > 0 and relative_y < 0.2:  # Vehicle ahead, similar lane
                relative_velocity = ego_speed - other_vx
                
                if relative_velocity > 0.01:  # We're approaching them
                    ttc = relative_x / relative_velocity
                    if ttc > 0:
                        min_ttc_this_step = min(min_ttc_this_step, ttc)
        
        if min_ttc_this_step != float('inf'):
            ttc_values.append(min_ttc_this_step)
        
        # Check for lane change
        if action == 0 or action == 2: # 0=LANE_LEFT, 2=LANE_RIGHT
            current_episode_lane_changes += 1
            
        # Check for speed limit compliance
        if ego_speed > SPEED_LIMIT_MS:
            current_episode_speed_limit_violations += 1

    # --- Episode Finished: Calculate Stats ---
    episode_duration = time.time() - episode_start_time
    
    # Check termination reason
    was_collision = info.get('crashed', False)
    was_success = not was_collision and not truncated  # Reached goal safely
    
    # Calculate averages for this episode
    avg_speed = current_episode_speed_sum / (current_episode_steps + 1e-6)
    speed_compliance_frac = 1.0 - (current_episode_speed_limit_violations / (current_episode_steps + 1e-6))
    
    # NEW: Calculate jerk statistics
    avg_jerk = np.mean(jerk_values) if jerk_values else 0.0
    max_jerk = np.max(jerk_values) if jerk_values else 0.0
    
    # NEW: Calculate TTC statistics
    avg_ttc = np.mean(ttc_values) if ttc_values else float('inf')
    min_ttc = np.min(ttc_values) if ttc_values else float('inf')
    ttc_violations = sum(1 for ttc in ttc_values if ttc < SAFE_TTC_THRESHOLD)
    ttc_violation_rate = ttc_violations / (current_episode_steps + 1e-6)
    
    # Store all metrics in a dictionary
    stats = {
        "episode": i + 1,
        "collision": was_collision,
        "success": was_success,
        "total_reward": current_episode_reward,
        "episode_duration_s": episode_duration,
        "steps": current_episode_steps,
        "avg_speed_ms": avg_speed,
        "lane_changes": current_episode_lane_changes,
        "speed_compliance": speed_compliance_frac,
        "avg_jerk": avg_jerk,
        "max_jerk": max_jerk,
        "avg_ttc": avg_ttc if avg_ttc != float('inf') else -1,  # -1 means no TTC data
        "min_ttc": min_ttc if min_ttc != float('inf') else -1,
        "ttc_violation_rate": ttc_violation_rate
    }
    all_episode_stats.append(stats)
    
    # Print progress
    if (i+1) % 10 == 0:
        print(f"  ... Episode {i+1} / {N_EPISODES} complete.")

# --- All Episodes Done: Final Report ---
env.close()
print("\n--- Evaluation Complete. ---")

# Convert the list of dictionaries into a pandas DataFrame
df = pd.DataFrame(all_episode_stats)

# Save the raw DataFrame to the 'instant_runs' directory
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"run_{timestamp}.csv"
output_path = os.path.join(RAW_RUNS_DIR, filename)
df.to_csv(output_path, index=False)

print(f"\n‚úÖ Raw results for all {N_EPISODES} episodes saved to: {output_path}")

# --- Calculate and Print Final Metrics Summary ---
print("\n" + "="*70)
print("--- ENHANCED METRICS SUMMARY (Random Agent Baseline) ---")
print("="*70)

# Basic metrics
final_metrics = {
    "Collision Rate": df['collision'].mean(),
    "Success Rate": df['success'].mean(),
    "Average Speed (m/s)": df['avg_speed_ms'].mean(),
    "Avg Episode Duration (s)": df['episode_duration_s'].mean(),
    "Avg Steps per Episode": df['steps'].mean(),
    "Avg Lane Changes / Episode": df['lane_changes'].mean(),
    "Average Reward / Episode": df['total_reward'].mean(),
    "Speed Limit Compliance": df['speed_compliance'].mean(),
    "Avg Jerk (Comfort)": df['avg_jerk'].mean(),
    "Max Jerk (Worst Case)": df['max_jerk'].mean(),
    "Avg Time-to-Collision (s)": df[df['avg_ttc'] > 0]['avg_ttc'].mean() if len(df[df['avg_ttc'] > 0]) > 0 else -1,
    "Min Time-to-Collision (s)": df[df['min_ttc'] > 0]['min_ttc'].mean() if len(df[df['min_ttc'] > 0]) > 0 else -1,
    "TTC Violation Rate": df['ttc_violation_rate'].mean()
}

# Save the summary metrics
df_summary = pd.DataFrame(list(final_metrics.items()), columns=['Metric', 'Value'])
summary_filename = f"summary_{timestamp}.csv"
summary_output_path = os.path.join(SUMMARY_DIR, summary_filename)
df_summary.to_csv(summary_output_path, index=False)

print(f"\n‚úÖ Summary metrics file saved to: {summary_output_path}")
print("\n" + "-"*70)

# Print as a clean table
for metric, value in final_metrics.items():
    if value == -1:
        print(f"{metric:<35} | {'N/A':>12}")
    else:
        print(f"{metric:<35} | {value:>12.4f}")

print("="*70)
print("\nüéØ Metrics saved! Continue to next cells for indicator analysis.")
print("="*70)

--- Starting Baseline Evaluation ---
Running 10 episodes with a RANDOM AGENT.
The simulation window will open. It will be fast because the agent crashes quickly!
  ... Episode 10 / 10 complete.

--- Evaluation Complete. ---
Raw results for all 10 episodes saved to: random_baseline_agent/instant_runs/run_20251114_123336.csv

--- Final Metrics Summary (Random Agent Baseline) ---
Summary metrics file saved to: random_baseline_agent/summary/summary_20251114_123336.csv
Collision Rate               |   0.9000
Average Speed (m/s)          |   0.3110
Avg Lane Changes / Episode   |   6.4000
Average Reward / Episode     |  12.9860
Speed Limit Compliance       |   1.0000


---

## üìä PART 2: Performance Indicators Calculation

Now we calculate the 5 key performance indicators defined in README.md Section 7-8:

1. **Safety Index (SI)** - Combines collision rate, TTC, and violations
2. **Efficiency Index (EI)** - Speed and success rate
3. **Comfort Index (CI)** - Smoothness (inverse of jerk and lane changes)
4. **Rule Compliance Index (RCI)** - Traffic rule adherence
5. **Global Performance Score (GPS)** - Weighted aggregate of all indices

### Indicator Weights (from research priorities):
- GPS = **40% Safety** + **30% Efficiency** + **15% Comfort** + **15% Compliance**

In [None]:
# ============================================================================
# INDICATOR CALCULATION MODULE
# ============================================================================
# This section calculates performance indicators from the metrics collected above.
# These indicators provide a comprehensive evaluation framework for comparing
# different RL models (Random Baseline, PPO, DQN, SAC).

# --- Configuration: Indicator Weights ---
INDICATOR_WEIGHTS = {
    'safety_index': {
        'w_collision': 0.4,      # Weight for collision avoidance
        'w_ttc': 0.4,            # Weight for Time-to-Collision
        'w_ttc_violations': 0.2  # Weight for TTC violation rate
    },
    'efficiency_index': {
        'w_speed': 0.5,          # Weight for average speed
        'w_success': 0.5         # Weight for success rate
    },
    'comfort_index': {
        'w_jerk': 0.6,           # Weight for jerk (smoothness)
        'w_lane_changes': 0.4    # Weight for lane change frequency
    },
    'rule_compliance_index': {
        'w_speed_compliance': 1.0  # Weight for speed limit adherence
    },
    'global_performance': {
        'a_safety': 0.40,        # Weight for Safety Index
        'b_efficiency': 0.30,    # Weight for Efficiency Index
        'c_comfort': 0.15,       # Weight for Comfort Index
        'd_compliance': 0.15     # Weight for Rule Compliance Index
    }
}

# Normalization constants
MAX_EXPECTED_JERK = 10.0
MAX_EXPECTED_LANE_CHANGES = 20.0


# --- Helper Function: Extract Metric Value ---
def get_metric_value(metrics_dict, metric_name, default=0.0):
    """Safely extract a metric value from the dictionary."""
    return metrics_dict.get(metric_name, default)


# --- 1. Safety Index (SI) ---
def calculate_safety_index(metrics):
    """
    Safety Index = w1*(1 - CollisionRate) + w2*(TTC_norm) + w3*(1 - TTC_ViolationRate)
    Range: [0, 1], Higher is safer
    """
    w = INDICATOR_WEIGHTS['safety_index']
    
    collision_rate = get_metric_value(metrics, "Collision Rate")
    avg_ttc = get_metric_value(metrics, "Avg Time-to-Collision (s)", -1)
    ttc_violation_rate = get_metric_value(metrics, "TTC Violation Rate")
    
    # Normalize TTC to [0, 1]
    if avg_ttc > 0:
        ttc_normalized = min(avg_ttc / SAFE_TTC_THRESHOLD, 1.0)
    else:
        ttc_normalized = 0.0
    
    safety_index = (
        w['w_collision'] * (1 - collision_rate) +
        w['w_ttc'] * ttc_normalized +
        w['w_ttc_violations'] * (1 - ttc_violation_rate)
    )
    
    return safety_index


# --- 2. Efficiency Index (EI) ---
def calculate_efficiency_index(metrics):
    """
    Efficiency Index = w1*(avg_speed / speed_limit) + w2*(SuccessRate)
    Range: [0, 1], Higher is more efficient
    """
    w = INDICATOR_WEIGHTS['efficiency_index']
    
    avg_speed = get_metric_value(metrics, "Average Speed (m/s)")
    success_rate = get_metric_value(metrics, "Success Rate")
    
    speed_ratio = min(avg_speed / SPEED_LIMIT_MS, 1.0)
    
    efficiency_index = (
        w['w_speed'] * speed_ratio +
        w['w_success'] * success_rate
    )
    
    return efficiency_index


# --- 3. Comfort Index (CI) ---
def calculate_comfort_index(metrics):
    """
    Comfort Index = 1 - (w1*Jerk_norm + w2*LaneChanges_norm)
    Range: [0, 1], Higher is more comfortable (smoother)
    """
    w = INDICATOR_WEIGHTS['comfort_index']
    
    avg_jerk = get_metric_value(metrics, "Avg Jerk (Comfort)")
    lane_changes = get_metric_value(metrics, "Avg Lane Changes / Episode")
    
    # Normalize to [0, 1]
    jerk_normalized = min(avg_jerk / MAX_EXPECTED_JERK, 1.0)
    lane_changes_normalized = min(lane_changes / MAX_EXPECTED_LANE_CHANGES, 1.0)
    
    # Calculate discomfort, then invert
    discomfort = w['w_jerk'] * jerk_normalized + w['w_lane_changes'] * lane_changes_normalized
    comfort_index = max(1 - discomfort, 0.0)  # Ensure non-negative
    
    return comfort_index


# --- 4. Rule Compliance Index (RCI) ---
def calculate_rule_compliance_index(metrics):
    """
    Rule Compliance Index = SpeedLimitCompliance
    Range: [0, 1], Higher is better compliance
    """
    speed_compliance = get_metric_value(metrics, "Speed Limit Compliance")
    return speed_compliance


# --- 5. Global Performance Score (GPS) ---
def calculate_global_performance_score(si, ei, ci, rci):
    """
    Global Performance Score = a*SI + b*EI + c*CI + d*RCI
    Range: [0, 1], Higher is better overall performance
    """
    w = INDICATOR_WEIGHTS['global_performance']
    
    gps = (
        w['a_safety'] * si +
        w['b_efficiency'] * ei +
        w['c_comfort'] * ci +
        w['d_compliance'] * rci
    )
    
    return gps


# ============================================================================
# CALCULATE ALL INDICATORS
# ============================================================================

print("\n" + "="*70)
print("üî¨ CALCULATING PERFORMANCE INDICATORS")
print("="*70)

# Calculate each indicator
SI = calculate_safety_index(final_metrics)
EI = calculate_efficiency_index(final_metrics)
CI = calculate_comfort_index(final_metrics)
RCI = calculate_rule_compliance_index(final_metrics)
GPS = calculate_global_performance_score(SI, EI, CI, RCI)

# Store indicators
indicators = {
    "Safety Index (SI)": SI,
    "Efficiency Index (EI)": EI,
    "Comfort Index (CI)": CI,
    "Rule Compliance Index (RCI)": RCI,
    "Global Performance Score (GPS)": GPS
}

# Print indicators
print("\nüìä PERFORMANCE INDICATORS:")
print("-" * 70)
for indicator_name, value in indicators.items():
    # Add emoji based on performance
    if value >= 0.75:
        emoji = "üü¢"
    elif value >= 0.50:
        emoji = "üü°"
    elif value >= 0.25:
        emoji = "üü†"
    else:
        emoji = "üî¥"
    
    print(f"{emoji} {indicator_name:<35} | {value:>8.4f}")

print("-" * 70)
print(f"\nüéØ OVERALL PERFORMANCE: {GPS:.4f} / 1.0000")

# Interpret GPS
if GPS >= 0.80:
    performance_label = "üåü EXCELLENT - Ready for deployment"
elif GPS >= 0.60:
    performance_label = "‚úÖ GOOD - Acceptable performance"
elif GPS >= 0.40:
    performance_label = "‚ö†Ô∏è  FAIR - Needs improvement"
else:
    performance_label = "‚ùå POOR - Requires significant work"

print(f"   {performance_label}")
print("="*70)

# ============================================================================
# SAVE INDICATORS TO FILE
# ============================================================================

INDICATORS_DIR = os.path.join(BASE_OUTPUT_DIR, "indicators")
os.makedirs(INDICATORS_DIR, exist_ok=True)

indicators_df = pd.DataFrame(list(indicators.items()), columns=['Indicator', 'Value'])
indicators_filename = f"indicators_{timestamp}.csv"
indicators_output_path = os.path.join(INDICATORS_DIR, indicators_filename)
indicators_df.to_csv(indicators_output_path, index=False)

print(f"\n‚úÖ Indicators saved to: {indicators_output_path}")

# ============================================================================
# BREAKDOWN ANALYSIS (For Understanding)
# ============================================================================

print("\n" + "="*70)
print("üîç INDICATOR BREAKDOWN (What drives each score?)")
print("="*70)

print("\n1Ô∏è‚É£  SAFETY INDEX (SI) = {:.4f}".format(SI))
print("    Components:")
print(f"    - Collision Avoidance : {1 - get_metric_value(final_metrics, 'Collision Rate'):.4f} (weight: 40%)")
avg_ttc = get_metric_value(final_metrics, "Avg Time-to-Collision (s)", -1)
ttc_norm = min(avg_ttc / SAFE_TTC_THRESHOLD, 1.0) if avg_ttc > 0 else 0.0
print(f"    - Time-to-Collision   : {ttc_norm:.4f} (weight: 40%)")
print(f"    - TTC Safety Margin   : {1 - get_metric_value(final_metrics, 'TTC Violation Rate'):.4f} (weight: 20%)")

print("\n2Ô∏è‚É£  EFFICIENCY INDEX (EI) = {:.4f}".format(EI))
print("    Components:")
print(f"    - Speed Ratio         : {get_metric_value(final_metrics, 'Average Speed (m/s)') / SPEED_LIMIT_MS:.4f} (weight: 50%)")
print(f"    - Success Rate        : {get_metric_value(final_metrics, 'Success Rate'):.4f} (weight: 50%)")

print("\n3Ô∏è‚É£  COMFORT INDEX (CI) = {:.4f}".format(CI))
print("    Components:")
jerk_norm = min(get_metric_value(final_metrics, "Avg Jerk (Comfort)") / MAX_EXPECTED_JERK, 1.0)
lc_norm = min(get_metric_value(final_metrics, "Avg Lane Changes / Episode") / MAX_EXPECTED_LANE_CHANGES, 1.0)
print(f"    - Smoothness (1-Jerk) : {1 - jerk_norm:.4f} (weight: 60%)")
print(f"    - Lane Stability      : {1 - lc_norm:.4f} (weight: 40%)")

print("\n4Ô∏è‚É£  RULE COMPLIANCE INDEX (RCI) = {:.4f}".format(RCI))
print("    Components:")
print(f"    - Speed Compliance    : {get_metric_value(final_metrics, 'Speed Limit Compliance'):.4f} (weight: 100%)")

print("\n5Ô∏è‚É£  GLOBAL PERFORMANCE SCORE (GPS) = {:.4f}".format(GPS))
print("    Weighted Combination:")
print(f"    - Safety      ({INDICATOR_WEIGHTS['global_performance']['a_safety']:.0%})  : {SI:.4f} ‚Üí {SI * INDICATOR_WEIGHTS['global_performance']['a_safety']:.4f}")
print(f"    - Efficiency  ({INDICATOR_WEIGHTS['global_performance']['b_efficiency']:.0%})  : {EI:.4f} ‚Üí {EI * INDICATOR_WEIGHTS['global_performance']['b_efficiency']:.4f}")
print(f"    - Comfort     ({INDICATOR_WEIGHTS['global_performance']['c_comfort']:.0%})  : {CI:.4f} ‚Üí {CI * INDICATOR_WEIGHTS['global_performance']['c_comfort']:.4f}")
print(f"    - Compliance  ({INDICATOR_WEIGHTS['global_performance']['d_compliance']:.0%})  : {RCI:.4f} ‚Üí {RCI * INDICATOR_WEIGHTS['global_performance']['d_compliance']:.4f}")

print("="*70)
print("\n‚ú® Analysis Complete! All metrics and indicators saved.")
print("="*70)

---

## üìà PART 3: Visualization (Optional)

Uncomment and run the cell below to create a visual comparison chart of all indicators.

In [None]:
# ============================================================================
# VISUALIZATION: Bar Chart of Indicators
# ============================================================================
# Uncomment the code below to create a visual chart

import matplotlib.pyplot as plt

# Prepare data for plotting
indicator_names = ['Safety\nIndex', 'Efficiency\nIndex', 'Comfort\nIndex', 
                   'Rule\nCompliance', 'Global\nPerformance']
indicator_values = [SI, EI, CI, RCI, GPS]
colors = ['#FF6B6B', '#4ECDC4', '#45B7D1', '#FFA07A', '#98D8C8']

# Create bar chart
fig, ax = plt.subplots(figsize=(12, 6))
bars = ax.bar(indicator_names, indicator_values, color=colors, alpha=0.8, edgecolor='black', linewidth=1.5)

# Add value labels on bars
for bar, value in zip(bars, indicator_values):
    height = bar.get_height()
    ax.text(bar.get_x() + bar.get_width()/2., height,
            f'{value:.3f}',
            ha='center', va='bottom', fontsize=12, fontweight='bold')

# Styling
ax.set_ylim(0, 1.0)
ax.set_ylabel('Score (0-1)', fontsize=12, fontweight='bold')
ax.set_title('Performance Indicators - Random Baseline Agent', fontsize=14, fontweight='bold', pad=20)
ax.axhline(y=0.5, color='gray', linestyle='--', alpha=0.5, linewidth=1)
ax.axhline(y=0.75, color='green', linestyle='--', alpha=0.3, linewidth=1)
ax.grid(axis='y', alpha=0.3)

# Add reference lines legend
ax.text(0.02, 0.51, 'Acceptable (0.5)', transform=ax.transAxes, 
        fontsize=9, color='gray', style='italic')
ax.text(0.02, 0.76, 'Good (0.75)', transform=ax.transAxes, 
        fontsize=9, color='green', style='italic')

plt.tight_layout()
plt.show()

print("\nüìä Visualization complete!")

---

## üèÜ PART 4: Multi-Model Comparison (Optional)

**After running multiple models**, use the cell below to compare them all side-by-side.

This replaces the need for the separate `analyze_results.py` script!

In [None]:
# ============================================================================
# MULTI-MODEL COMPARISON
# ============================================================================
# Run this cell AFTER you've completed all model runs (Random, PPO, DQN, SAC)
# to generate a side-by-side comparison table

import glob

# --- Configuration ---
MODELS_TO_COMPARE = [
    ("Random Baseline", "random_baseline_agent/summary/summary_*.csv"),
    ("PPO Agent", "ppo_agent/summary/summary_*.csv"),
    ("DQN Agent", "dqn_agent/summary/summary_*.csv"),
    ("SAC Agent", "sac_agent/summary/summary_*.csv"),
]

# Enable comparison mode
RUN_COMPARISON = False  # Set to True when you want to compare models

if RUN_COMPARISON:
    print("\n" + "="*70)
    print("üèÜ MULTI-MODEL COMPARISON")
    print("="*70)
    
    comparison_results = []
    
    for model_name, csv_pattern in MODELS_TO_COMPARE:
        # Find the most recent CSV for this model
        csv_files = glob.glob(csv_pattern)
        
        if not csv_files:
            print(f"\n‚ö†Ô∏è  No data found for {model_name} (pattern: {csv_pattern})")
            continue
        
        # Get the most recent file
        latest_csv = max(csv_files, key=os.path.getctime)
        
        print(f"\nüìä Loading {model_name} from: {os.path.basename(latest_csv)}")
        
        # Load metrics
        model_metrics_df = pd.read_csv(latest_csv)
        model_metrics = dict(zip(model_metrics_df['Metric'], model_metrics_df['Value']))
        
        # Calculate indicators for this model
        model_SI = calculate_safety_index(model_metrics)
        model_EI = calculate_efficiency_index(model_metrics)
        model_CI = calculate_comfort_index(model_metrics)
        model_RCI = calculate_rule_compliance_index(model_metrics)
        model_GPS = calculate_global_performance_score(model_SI, model_EI, model_CI, model_RCI)
        
        # Store results
        comparison_results.append({
            'Model': model_name,
            'Safety_Index': model_SI,
            'Efficiency_Index': model_EI,
            'Comfort_Index': model_CI,
            'Rule_Compliance_Index': model_RCI,
            'Global_Performance_Score': model_GPS,
            'Collision_Rate': get_metric_value(model_metrics, "Collision Rate"),
            'Success_Rate': get_metric_value(model_metrics, "Success Rate"),
            'Avg_Speed': get_metric_value(model_metrics, "Average Speed (m/s)"),
            'Avg_Reward': get_metric_value(model_metrics, "Average Reward / Episode")
        })
    
    if comparison_results:
        # Create comparison DataFrame
        comparison_df = pd.DataFrame(comparison_results)
        
        # Display comparison table
        print("\n" + "="*70)
        print("üìä PERFORMANCE COMPARISON TABLE")
        print("="*70)
        print("\n--- Key Indicators ---")
        print(comparison_df[['Model', 'Global_Performance_Score', 'Safety_Index', 
                             'Efficiency_Index', 'Comfort_Index', 'Rule_Compliance_Index']].to_string(index=False))
        
        print("\n--- Raw Metrics ---")
        print(comparison_df[['Model', 'Collision_Rate', 'Success_Rate', 
                             'Avg_Speed', 'Avg_Reward']].to_string(index=False))
        
        print("="*70)
        
        # Highlight best performers
        print("\nü•á BEST PERFORMERS:")
        print("-" * 70)
        
        best_performers = {
            'Global Performance': comparison_df.loc[comparison_df['Global_Performance_Score'].idxmax()],
            'Safety': comparison_df.loc[comparison_df['Safety_Index'].idxmax()],
            'Efficiency': comparison_df.loc[comparison_df['Efficiency_Index'].idxmax()],
            'Comfort': comparison_df.loc[comparison_df['Comfort_Index'].idxmax()],
            'Compliance': comparison_df.loc[comparison_df['Rule_Compliance_Index'].idxmax()],
        }
        
        for category, best_model in best_performers.items():
            if category == 'Global Performance':
                score = best_model['Global_Performance_Score']
                metric = 'GPS'
            elif category == 'Safety':
                score = best_model['Safety_Index']
                metric = 'SI'
            elif category == 'Efficiency':
                score = best_model['Efficiency_Index']
                metric = 'EI'
            elif category == 'Comfort':
                score = best_model['Comfort_Index']
                metric = 'CI'
            else:
                score = best_model['Rule_Compliance_Index']
                metric = 'RCI'
            
            print(f"   {category:<20}: {best_model['Model']:<20} ({metric} = {score:.4f})")
        
        print("="*70)
        
        # Save comparison results
        comparison_output_dir = "model_comparison"
        os.makedirs(comparison_output_dir, exist_ok=True)
        comparison_output_path = os.path.join(comparison_output_dir, 
                                              f"comparison_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv")
        comparison_df.to_csv(comparison_output_path, index=False)
        print(f"\n‚úÖ Comparison results saved to: {comparison_output_path}")
        
        # Optional: Create comparison visualization
        try:
            import matplotlib.pyplot as plt
            
            # Prepare data for grouped bar chart
            models = comparison_df['Model'].tolist()
            x = np.arange(len(models))
            width = 0.15
            
            fig, ax = plt.subplots(figsize=(14, 7))
            
            # Plot bars for each indicator
            bars1 = ax.bar(x - 2*width, comparison_df['Safety_Index'], width, label='Safety', color='#FF6B6B', alpha=0.8)
            bars2 = ax.bar(x - width, comparison_df['Efficiency_Index'], width, label='Efficiency', color='#4ECDC4', alpha=0.8)
            bars3 = ax.bar(x, comparison_df['Comfort_Index'], width, label='Comfort', color='#45B7D1', alpha=0.8)
            bars4 = ax.bar(x + width, comparison_df['Rule_Compliance_Index'], width, label='Compliance', color='#FFA07A', alpha=0.8)
            bars5 = ax.bar(x + 2*width, comparison_df['Global_Performance_Score'], width, label='GPS', color='#98D8C8', alpha=0.8, edgecolor='black', linewidth=2)
            
            # Styling
            ax.set_ylabel('Score (0-1)', fontsize=12, fontweight='bold')
            ax.set_title('Multi-Model Performance Comparison', fontsize=14, fontweight='bold', pad=20)
            ax.set_xticks(x)
            ax.set_xticklabels(models, fontsize=11)
            ax.legend(loc='upper left', fontsize=10)
            ax.axhline(y=0.5, color='gray', linestyle='--', alpha=0.3, linewidth=1)
            ax.axhline(y=0.75, color='green', linestyle='--', alpha=0.3, linewidth=1)
            ax.set_ylim(0, 1.0)
            ax.grid(axis='y', alpha=0.3)
            
            plt.tight_layout()
            plt.show()
            
            print("\nüìä Comparison visualization complete!")
            
        except Exception as e:
            print(f"\n‚ö†Ô∏è  Could not create visualization: {e}")
    
    else:
        print("\n‚ùå No model data found. Make sure you've run simulations for at least one model.")
        print("   Expected directory structure:")
        for model_name, csv_pattern in MODELS_TO_COMPARE:
            print(f"   - {csv_pattern}")

else:
    print("\nüí° TIP: To compare multiple models:")
    print("   1. Run this notebook for each model (Random, PPO, DQN, SAC)")
    print("   2. Set RUN_COMPARISON = True in this cell")
    print("   3. Re-run this cell to see side-by-side comparison!")
    print("\n   This replaces the need for analyze_results.py üéØ")