In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import os
import ast
from typing import Optional, Dict, List
from concurrent.futures import ProcessPoolExecutor, as_completed
from tqdm import tqdm

In [None]:
#from google.colab import drive
#drive.mount('/content/drive')

In [None]:
os.chdir("")

In [None]:
pd.set_option('display.max_columns', None)

## Get aggregated results

In [None]:
def get_ratio(num, den):
    return round(num / den, 2) if den else 0

def get_metrics(df: pd.DataFrame, entity_type: str, priority_col: str = "all") -> pd.DataFrame:
    """
    Compute completion, cancellation, abandonment ratios and distance metrics
    overall and optionally broken down by priority value.

    Args:
    df (pd.DataFrame): Input dataframe containing at least ["status", "assignment_distance", priority columns].
    entity_type (str): Entity type to calculate metrics for 'passenger' or 'driver'
    priority_col (str): "all" (default) to calculate across all priority levels or one of ["priority_0.05", "priority_0.1", "priority_0.2", "priority_0.3"].
    """

    total = len(df)
    if total == 0:
        return {"error": "Empty dataframe"}

    results = []
    results.append(("all", "all", "total", total))


    for status in ["Completed", "Abandoned"]:
        subset = df[df["status"] == status]["assignment_distance"]
        results.append(("all", "all", f"avg_assignment_distance_{status.lower()}", round(subset.mean(), 2)))
        results.append(("all", "all", f"max_assignment_distance_{status.lower()}", round(subset.max(), 2)))

    if entity_type == "passenger":
      status_list = ["Completed", "Cancelled", "Abandoned"]

      for status in status_list:
        count = (df["status"] == status).sum()
        results.append(("all", "all", f"{status.lower()}_count", count))
        results.append(("all", "all", f"ratio_{status.lower()}", get_ratio(count, total)))

    if entity_type == "driver":
      completed = (df["status"] == "Completed").sum()
      results.append(("all", "all", "completed", completed))
      results.append(("all", "all", "completed_ratio", get_ratio(completed, completed)))

      abandoned = (df["status"] == "Abandoned").sum()
      results.append(("all", "all", "abandoned", abandoned))
      results.append(("all", "all", "abandoned_ratio", get_ratio(abandoned, abandoned)))

      matched = completed + abandoned
      results.append(("all", "all", "matched", matched))
      results.append(("all", "all", "matched_ratio", get_ratio(matched, matched)))


    #select priority columns
    priority_values = [0.05, 0.1, 0.2, 0.3]
    if priority_col == "all":
        selected = [f"priority_{t}" for t in priority_values]
    elif priority_col.startswith("priority_"):
        selected = [priority_col]
    else:
        selected = []

    #calculate metrics for priority
    for col in selected:
        total_scope = len(df)
        for val, label in [(1, "priority"), (0, "nonpriority")]:
            subset = df[df[col] == val]
            sub_total = len(subset)
            results.append((col, label, "total", sub_total))
            results.append((col, label, "ratio", get_ratio(sub_total, total_scope)))

            for status in ["Completed", "Abandoned"]:
                  dist = subset.loc[subset["status"] == status, "assignment_distance"]
                  results.append((col, label, f"avg_assignment_distance_{status.lower()}", round(dist.mean(), 2)))
                  results.append((col, label, f"max_assignment_distance_{status.lower()}", round(dist.max(), 2)))

            if entity_type == "passenger":
              for status in status_list:
                  count = (subset["status"] == status).sum()
                  results.append((col, label, f"{status.lower()}_count", count))
                  results.append((col, label, f"ratio_{status.lower()}", get_ratio(count, sub_total)))

            if entity_type == "driver":
                comp = (subset["status"] == 'Completed').sum()
                results.append((col, label, "completed", comp))
                results.append((col, label, "completed_ratio",  get_ratio(comp, completed)))

                abnd = (subset["status"] == 'Abandoned').sum()
                results.append((col, label, "abandoned", abnd))
                results.append((col, label, "abandoned_ratio",  get_ratio(abnd, abandoned)))

                mtch = comp + abnd
                results.append((col, label, "matched", mtch))
                results.append((col, label, "matched_ratio", get_ratio(mtch, matched)))

    df_long = pd.DataFrame(results, columns=["scope", "group", "metric", "value"])
    df_wide = df_long.pivot_table(index=["scope", "group"], columns="metric", values="value" ).reset_index()

    return df_wide

In [None]:
def load_and_clean(path: str) -> pd.DataFrame:
    """
    Load a simulation outcome CSV file, filter arrival_time > 10, and drop rows with missing 'status'.

    Args:
        path (str): Full path to the CSV file.
    """
    df = pd.read_csv(path, converters={"loc": ast.literal_eval})
    df = df[df["arrival_time"] > 10].copy()
    df.dropna(subset=["status"], inplace=True)
    return df

def process_folder(folder: str, save_dir: Optional[str] = None) -> Dict[str, Optional[pd.DataFrame]]:
    """
    Process a folder of passenger and driver CSVs, compute metrics, and return results.

    Args:
        folder (str): Folder name containing simulation outcome CSV files.
        save_dir (str): Directory to save outputs.
    """

    print(f"Processing folder: {folder}")
    include_priorities = priority_map.get(folder)
    print(f"Include priorities: {include_priorities}")

    overall_results = {"passenger": [], "driver": []}

    for setting in range(1, 109):
        print(f"Parameter Setting: {setting}")
        setting_results = {"passenger": [], "driver": []}

        for j in [1, 2, 3]:
            passenger_file = os.path.join(os.getcwd(), folder, f"result_pdf_{setting:03d}_{j:03d}.csv")
            driver_file = os.path.join(os.getcwd(), folder, f"result_ddf_{setting:03d}_{j:03d}.csv")

            passengers_df = load_and_clean(passenger_file)
            drivers_df = load_and_clean(driver_file)

            if "driver" in folder or folder in ["base", "nearest_neighbor"]:
                drivers_df = drivers_df.merge(passengers_df[["id", "assignment_distance"]], how="left", left_on="assigned_passenger", right_on = "id") #to get assignment distances
                drivers_metrics = get_metrics(drivers_df, "driver", include_priorities)
                setting_results["driver"].append(drivers_metrics)

                passengers_metrics = get_metrics(passengers_df, "passenger", include_priorities)
                setting_results["passenger"].append(passengers_metrics)

            if "passenger" in folder or folder in ["base", "nearest_neighbor"]:
                passengers_metrics = get_metrics(passengers_df, "passenger", include_priorities)
                setting_results["passenger"].append(passengers_metrics)

        for entity in ["passenger", "driver"]:
            if setting_results[entity]:
                df_concat = pd.concat(setting_results[entity], ignore_index=True)
                df_concat["setting"] = setting
                overall_results[entity].append(df_concat)

    final_passenger_df = pd.concat(overall_results["passenger"], ignore_index=True) if overall_results["passenger"] else None
    final_driver_df = pd.concat(overall_results["driver"], ignore_index=True) if overall_results["driver"] else None

    if save_dir:
        os.makedirs(save_dir, exist_ok=True)
        if final_passenger_df is not None:
            final_passenger_df.to_csv(os.path.join(save_dir, f"{folder}_passenger_results.csv"), index=False)
        if final_driver_df is not None:
            final_driver_df.to_csv(os.path.join(save_dir, f"{folder}_driver_results.csv"), index=False)

    return {"passenger": final_passenger_df, "driver": final_driver_df}


In [None]:
def process_all_folders(folders: List[str], save_dir: Optional[str] = None) -> Dict[str, Dict[str, Optional[pd.DataFrame]]]:
    """
    Process all folders in parallel and return a dictionary of results.

    Args:
    folders (List[str]): List of folder names to process.
    save_dir (str): Directory to save outputs.
    """

    results = {}
    with ProcessPoolExecutor() as executor:
        future_to_folder = {executor.submit(process_folder, folder, save_dir): folder for folder in folders}
        for future in tqdm(as_completed(future_to_folder), total=len(folders), desc="Processing folders"):
            folder_name = future_to_folder[future]
            try:
                results[folder_name] = future.result()
                print(f"Completed folder: {folder_name}")
            except Exception as e:
                print(f"Error processing folder {folder_name}: {e}")
                results[folder_name] = {"passenger": None, "driver": None}
    return results

In [None]:
priority_map = {
    "base": "all",
    "nearest_neighbor": "all",
    "passenger_priority_0.05": "priority_0.05",
    "driver_priority_0.05": "priority_0.05",
    "passenger_priority_0.1": "priority_0.1",
    "driver_priority_0.1": "priority_0.1",
    "passenger_priority_0.2": "priority_0.2",
    "driver_priority_0.2": "priority_0.2",
    "passenger_priority_0.3": "priority_0.3",
    "driver_priority_0.3": "priority_0.3"
}
folders = ["base", "nearest_neighbor", "passenger_priority_0.05", "passenger_priority_0.1", "passenger_priority_0.2", "passenger_priority_0.3", "driver_priority_0.05", "driver_priority_0.1", "driver_priority_0.2" , "driver_priority_0.3"]

In [None]:
all_results = process_all_folders(folders, os.path.join(os.getcwd(), 'results'))

## Analyze results

In [None]:
def filter_df(df, scope, group):
  filtered_df = df.copy()
  filtered_df = filtered_df[(filtered_df['scope'] == scope) & (filtered_df['group'] == group)].groupby(['lambda_p', 'lambda_d_ratio', 'scope', 'group']).mean().round(2).reset_index()
  return filtered_df

def get_metric_comparison(df_list, algorithm_label_list, metric_list, scope_list):

  all_results = []
  metric_res = {}

  for scope in scope_list:
    for df, tspmp in zip(df_list, algorithm_label_list):
      if scope in df['scope'].unique():
        if scope == 'all':
          df = filter_df(df, scope, 'all')
          df["priority"] = tspmp
          df['scope'] = scope
          all_results.append(df[["lambda_p", "lambda_d_ratio", "priority", "scope"] + metric_list])
        else:
          df = filter_df(df, scope, 'priority')
          df["priority"] = tspmp
          df['scope'] = scope
          all_results.append(df[["lambda_p", "lambda_d_ratio", "priority", "scope"] + metric_list])

  results = pd.concat(all_results, ignore_index=True)

  for v in metric_list:
    metric_res[v] = results.pivot_table(index=["lambda_p", "lambda_d_ratio"],
                            columns=["priority", "scope"],
                            values=v)
  return metric_res

In [None]:
#read parameter setting
params_setting = pd.read_csv('param_set_list.csv', sep=",")

In [None]:
results_folder = 'results'

In [None]:
# Read result csv files
passenger_results = {}
driver_results = {}

for result_file in os.listdir(os.path.join(os.getcwd(), results_folder)):
  result_csv = pd.read_csv(os.path.join(os.getcwd(), results_folder, result_file), index_col=0)
  result_csv.reset_index(drop=False, inplace=True)
  result_csv = result_csv.merge(params_setting, how="left", left_on="setting", right_on="set_id")
  result_csv['lambda_d_ratio'] = result_csv['lambda_d'] / result_csv['lambda_p']
  result_csv.drop(["set_id", "setting", "passenger_patience_before", "driver_patience_before"], axis=1, inplace=True)

  if 'passenger' in result_file:
    passenger_results[result_file[:-4]] = result_csv
  elif 'driver' in result_file:
    driver_results[result_file[:-4]] = result_csv

### **Passenger priority results**

#### Compare overall results

In [None]:
passenger_priority_keys = [
    "base_passenger_results",
    "nearest_neighbor_passenger_results",
    "passenger_priority_0.05_passenger_results",
    "passenger_priority_0.1_passenger_results",
    "passenger_priority_0.2_passenger_results",
    "passenger_priority_0.3_passenger_results",
]

passenger_priority_passenger_df_list = [pd.DataFrame(passenger_results[k]) for k in passenger_priority_keys]
passenger_metrics = ["ratio_completed", "ratio_cancelled", "ratio_abandoned", "avg_assignment_distance_completed", "avg_assignment_distance_abandoned"]

##### BA vs. NN

In [None]:
metric_comparisons = get_metric_comparison(passenger_priority_passenger_df_list, ['BA', 'NN'], passenger_metrics, ['all'])

In [None]:
for k in metric_comparisons:
  print(k)
  display(metric_comparisons[k])

##### BA vs. TSPMP overall performance

In [None]:
passenger_priority_keys = [
    "base_passenger_results",
    "passenger_priority_0.05_passenger_results",
    "passenger_priority_0.1_passenger_results",
    "passenger_priority_0.2_passenger_results",
    "passenger_priority_0.3_passenger_results",
]

passenger_priority_passenger_df_list = [pd.DataFrame(passenger_results[k]) for k in passenger_priority_keys]
passenger_metrics = ["ratio_completed", "ratio_cancelled", "ratio_abandoned", "avg_assignment_distance_completed", "avg_assignment_distance_abandoned"]

In [None]:
metric_comparisons = get_metric_comparison(passenger_priority_passenger_df_list, ['BA', 'TSPMP 05%', 'TSPMP 10%', 'TSPMP 20%', 'TSPMP 30%'], passenger_metrics, ['all'])

In [None]:
for k in metric_comparisons:
  print(k)
  display(metric_comparisons[k])

#### Compare priority results

In [None]:
passenger_priority_keys = [
    "base_passenger_results",
    "passenger_priority_0.05_passenger_results",
    "passenger_priority_0.1_passenger_results",
    "passenger_priority_0.2_passenger_results",
    "passenger_priority_0.3_passenger_results",
]

passenger_priority_passenger_df_list = [pd.DataFrame(passenger_results[k]) for k in passenger_priority_keys]

passenger_algorithm_list = ['BA', 'TSPMP 05%', 'TSPMP 10%', 'TSPMP 20%', 'TSPMP 30%']
passenger_metrics = ["ratio_completed", "ratio_cancelled", "ratio_abandoned", "avg_assignment_distance_completed"]

In [None]:
metric_comparisons = get_metric_comparison(passenger_priority_passenger_df_list, passenger_algorithm_list, passenger_metrics,  ['all', 'priority_0.05', 'priority_0.1', 'priority_0.2', 'priority_0.3'])

In [None]:
for k in metric_comparisons:
  print(k)
  display(metric_comparisons[k])

### **Driver priority results**

#### Compare overall results

In [None]:
driver_priority_passenger_keys = [
    "base_passenger_results",
    "driver_priority_0.05_passenger_results",
    "driver_priority_0.1_passenger_results",
    "driver_priority_0.2_passenger_results",
    "driver_priority_0.3_passenger_results",
]

driver_priority_passenger_df_list = [pd.DataFrame(passenger_results[k]) for k in driver_priority_passenger_keys]

driver_algorithm_list = ['BA', 'TSPMD 05%', 'TSPMD 10%', 'TSPMD 20%', 'TSPMD 30%']
driver_priority_passenger_metrics = ["ratio_completed", "ratio_cancelled", "ratio_abandoned", "avg_assignment_distance_completed", "avg_assignment_distance_abandoned"]

In [None]:
metric_comparisons = get_metric_comparison(driver_priority_passenger_df_list, driver_algorithm_list, driver_priority_passenger_metrics,  ['all'])

In [None]:
for k in metric_comparisons:
  print(k)
  display(metric_comparisons[k])

In [None]:
driver_priority_driver_keys = [
    "base_driver_results",
    "driver_priority_0.05_driver_results",
    "driver_priority_0.1_driver_results",
    "driver_priority_0.2_driver_results",
    "driver_priority_0.3_driver_results",
]

driver_priority_driver_df_list = [pd.DataFrame(driver_results[k]) for k in driver_priority_driver_keys]

driver_algorithm_list = ['BA', 'TSPMD 05%', 'TSPMD 10%', 'TSPMD 20%', 'TSPMD 30%']
driver_priority_passenger_metrics = ["matched_ratio", "completed_ratio", "abandoned_ratio", "avg_assignment_distance_completed", "avg_assignment_distance_abandoned"]

In [None]:
metric_comparisons = get_metric_comparison(driver_priority_driver_df_list, driver_algorithm_list, driver_priority_passenger_metrics,  ['priority_0.05', 'priority_0.1', 'priority_0.2', 'priority_0.3'])

In [None]:
for k in metric_comparisons:
  print(k)
  display(metric_comparisons[k])