In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import re

def aggregate_truck_speeds(base_dir, case_labels, num_trials=50):
    data = {
        "Case": [],
        "Mean Truck Speed": [],
        "Kappa": [],
        "Density": [],
        "Rate": [],
        "Stop Spacing": []
    }

    for case_dir in os.listdir(base_dir):
        case_path = os.path.join(base_dir, case_dir)
        if not os.path.isdir(case_path) or not case_dir.startswith("Case_"):
            continue

        match = re.match(r'Case_([A-D])_StopToStop_(\d+)_Evenly_Spaced_Stops', case_dir)
        if not match:
            continue

        case_index = ord(match.group(1)) - ord('A')
        stop_spacing = int(match.group(2))
        case_label = case_labels[case_index]

        for kappa_dir in os.listdir(case_path):
            if not kappa_dir.startswith("Kappa_"):
                continue
            kappa_value = kappa_dir.split("_")[1]
            kappa_path = os.path.join(case_path, kappa_dir)

            for density_dir in os.listdir(kappa_path):
                if not density_dir.startswith("Density_"):
                    continue
                density_value = density_dir.split("_")[1]
                density_path = os.path.join(kappa_path, density_dir)

                # --- Determine Low and High rates dynamically ---
                rate_dirs = [d for d in os.listdir(density_path) if d.startswith("PassengerRate_")]
                try:
                    rate_values = sorted([float(d.split("_")[1]) for d in rate_dirs])
                except (IndexError, ValueError):
                    continue

                if len(rate_values) < 2:
                    continue  # Need at least low & high rates

                low_val = min(rate_values)
                high_val = max(rate_values)

                rate_label_map = {}
                for d in rate_dirs:
                    try:
                        val = float(d.split("_")[1])
                        rate_label_map[d] = "Low" if val == low_val else "High"
                    except (IndexError, ValueError):
                        continue

                # Process each rate directory
                for rate_dir in rate_dirs:
                    rate_label = rate_label_map.get(rate_dir, None)
                    if rate_label is None:
                        continue

                    adjusted_rate = rate_dir.split("_")[1]
                    vehicle_data_path = os.path.join(density_path, rate_dir, "VehicleData")
                    if not os.path.exists(vehicle_data_path):
                        continue

                    for trial in range(1, num_trials + 1):
                        file_name = f"Trial_{trial}_D{density_value}_K{kappa_value}_R{adjusted_rate}_S{stop_spacing}.csv"
                        file_path = os.path.join(vehicle_data_path, file_name)

                        if os.path.exists(file_path):
                            df = pd.read_csv(file_path)
                            if {'Vehicle Type', 'Mean Speed Across Time'}.issubset(df.columns):
                                truck_speeds = df[df['Vehicle Type'] == 'truck']['Mean Speed Across Time'].dropna()
                                if not truck_speeds.empty:
                                    mean_speed = truck_speeds.mean()
                                    data["Case"].append(case_label)
                                    data["Mean Truck Speed"].append(mean_speed)
                                    data["Kappa"].append(kappa_value)
                                    data["Density"].append(density_value)
                                    data["Rate"].append(rate_label)
                                    data["Stop Spacing"].append(stop_spacing)

    return pd.DataFrame(data)

def plot_truck_speed_distributions(df, case_labels):
    sns.set_theme(style="whitegrid", context="talk", font_scale=1.2)

    unique_kappas = sorted(df['Kappa'].unique(), key=lambda x: float(x))
    unique_densities = sorted(df['Density'].unique(), key=lambda x: float(x))
    unique_rates = ["Low", "High"]

    for kappa in unique_kappas:
        for density in unique_densities:
            for rate in unique_rates:
                subset = df[
                    (df['Kappa'] == kappa) &
                    (df['Density'] == density) &
                    (df['Rate'] == rate)
                ]

                if subset.empty:
                    continue

                plt.figure(figsize=(10, 6))
                sns.boxplot(
                    data=subset,
                    x="Stop Spacing",
                    y="Mean Truck Speed",
                    hue="Case",
                    hue_order=case_labels,
                    palette=["#1F77B4", "#FF7F0E", "#2CA02C", "#D62728"],
                    showfliers=False
                )

                plt.xlabel(r"$\Delta x_{\mathrm{stop}}$", fontsize=40)
                plt.ylabel(r"$\langle v_{\mathrm{truck}} \rangle_{\mathrm{temporal}}$", fontsize=40)
                #plt.title(f"Kappa={kappa}, Density={density}, Rate={rate}", fontsize=16)
                plt.xticks(fontsize=20)
                plt.yticks(fontsize=20)
                plt.legend(title="", fontsize=14)
                plt.grid(True, linestyle="--", alpha=0.6)
                sns.despine()
                plt.tight_layout()

                filename = f"TruckSpeed_Kappa_{kappa}_Density_{density}_Rate_{rate}.png"
                plt.savefig(filename, dpi=200)
                plt.close()

# ===============================
# Parameters and Execution
# ===============================
base_dir = os.path.abspath(os.path.join(os.getcwd(), '../../..', 'With Designated Stops Results'))
case_labels = ["OO", r"$O\tilde{T}$", r"$\tilde{J}O$", r"$\tilde{J}\tilde{T}$"]

print("🔄 Aggregating truck speed data...")
df_truck = aggregate_truck_speeds(base_dir, case_labels, num_trials=50)
print("✅ Aggregation complete. Plotting...")
plot_truck_speed_distributions(df_truck, case_labels)
print("✅ All plots saved.")
