In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import re

def extract_relative_delays_regrouped(base_dir, case_labels, t_max_speed, num_trials=50):
    data = {
        "Case": [],
        "Relative Delay": [],
        "Kappa": [],
        "Rate": [],  # Will be "Low" or "High"
        "Stop Spacing": [],
        "Status": [],
        "Density": []
    }

    for case_dir in os.listdir(base_dir):
        case_path = os.path.join(base_dir, case_dir)
        if not os.path.isdir(case_path) or not case_dir.startswith("Case_"):
            continue

        match = re.match(r'Case_([A-D])_StopToStop_(\d+)_Evenly_Spaced_Stops', case_dir)
        if not match:
            continue

        case_index = ord(match.group(1)) - ord('A')
        stop_spacing = int(match.group(2))
        case_label = case_labels[case_index]

        for kappa_dir in os.listdir(case_path):
            if not kappa_dir.startswith('Kappa_'):
                continue
            kappa_value = kappa_dir.split('_')[1]
            kappa_path = os.path.join(case_path, kappa_dir)

            for density_dir in os.listdir(kappa_path):
                if not density_dir.startswith('Density_'):
                    continue
                density_value = density_dir.split('_')[1]
                density_path = os.path.join(kappa_path, density_dir)

                # Get all rate directories and their numeric rate values
                rate_dirs = [d for d in os.listdir(density_path) if d.startswith('PassengerRate_')]
                try:
                    rate_values = sorted([float(d.split('_')[1]) for d in rate_dirs])
                except (IndexError, ValueError):
                    continue

                if len(rate_values) < 2:
                    # Skip if cannot find both low and high rates
                    continue

                low_val = min(rate_values)
                high_val = max(rate_values)

                # Map folder name to "Low" or "High"
                rate_label_map = {}
                for d in rate_dirs:
                    try:
                        val = float(d.split('_')[1])
                        rate_label_map[d] = "Low" if val == low_val else "High"
                    except (IndexError, ValueError):
                        continue

                # Now read CSV files for all rate dirs
                for rate_dir in rate_dirs:
                    rate_label = rate_label_map.get(rate_dir, None)
                    if rate_label is None:
                        continue
                    passenger_data_path = os.path.join(density_path, rate_dir, 'PassengerData')
                    if not os.path.exists(passenger_data_path):
                        continue

                    # Read all trial CSV files in passenger_data_path matching stop spacing
                    # This avoids filename mismatch problems
                    all_files = os.listdir(passenger_data_path)
                    trial_files = [f for f in all_files if f.endswith('.csv') and f"_S{stop_spacing}" in f]

                    for filename in trial_files:
                        file_path = os.path.join(passenger_data_path, filename)
                        df = pd.read_csv(file_path)
                        if {'Spawning Time', 'Riding Time', 'Riding Status'}.issubset(df.columns):
                            # Filter relevant rows
                            df = df[(df['Spawning Time'] > 6999) & 
                                    (df['Riding Status'].isin(['In-Transit', 'Alighted']))]

                            for _, row in df.iterrows():
                                if pd.notna(row['Riding Time']):
                                    rel_delay = (row['Riding Time'] - t_max_speed) / t_max_speed
                                    data["Case"].append(case_label)
                                    data["Relative Delay"].append(rel_delay)
                                    data["Kappa"].append(kappa_value)
                                    data["Rate"].append(rate_label)  # Use "Low" or "High" here
                                    data["Stop Spacing"].append(stop_spacing)
                                    data["Status"].append(row['Riding Status'])
                                    data["Density"].append(density_value)

    return pd.DataFrame(data)


def plot_relative_delays_regrouped(df, case_labels):
    sns.set_theme(style="ticks", context="talk", font_scale=1.2)

    unique_densities = sorted(df['Density'].unique(), key=lambda x: float(x))
    unique_kappas = sorted(df['Kappa'].unique(), key=lambda x: float(x))
    unique_rates = ["Low", "High"]

    for status in ["Alighted", "In-Transit"]:
        status_df = df[df["Status"] == status]

        for density in unique_densities:
            for kappa in unique_kappas:
                for rate in unique_rates:
                    subset = status_df[
                        (status_df["Density"] == density) &
                        (status_df["Kappa"] == kappa) &
                        (status_df["Rate"] == rate)
                    ]

                    if subset.empty:
                        continue

                    plt.figure(figsize=(10, 6))
                    sns.boxplot(
                        data=subset,
                        x="Stop Spacing",
                        y="Relative Delay",
                        hue="Case",
                        hue_order=case_labels,
                        palette=["#1F77B4", "#FF7F0E", "#2CA02C", "#D62728"],
                        showfliers=False
                    )

                    plt.xlabel(r"$\Delta x_{stop}$", fontsize=40)
                    plt.ylabel(r"$t_{delay, relative}$", fontsize=42)
                    plt.xticks(fontsize=35)
                    plt.yticks(fontsize=30)
                    plt.grid(True, linestyle='--', alpha=0.6)
                    plt.legend(title="", fontsize=22)
                    sns.despine()
                    for spine in plt.gca().spines.values():
                        spine.set_visible(True)
                    plt.tight_layout()

                    filename = (
                        f"RelativeDelay_{status}_Density_{density}"
                        f"_Kappa_{kappa}_Rate_{rate}.png"
                    )
                    plt.savefig(filename, dpi=200)
                    plt.close()


# ========================
# 🔧 Set Your Parameters
# ========================
base_dir = os.path.abspath(os.path.join(os.getcwd(), '../../..', 'With Designated Stops Results'))
case_labels = ["OO", r"$O\tilde{T}$", r"$\tilde{J}O$", r"$\tilde{J}\tilde{T}$"]
t_max_speed = 48  # <-- Max expected no-delay travel time

# Run the analysis and plot generation
df_delays = extract_relative_delays_regrouped(base_dir, case_labels, t_max_speed, num_trials=50)
plot_relative_delays_regrouped(df_delays, case_labels)
