In [None]:
import numpy as np
import pandas as pd
import itertools
from typing import List, Tuple
from scipy.optimize import linear_sum_assignment
!pip install pulp
from pulp import *
import datetime
import os
import ast
from scipy.spatial import distance
import random
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.ticker import FormatStrFormatter
import psutil
import platform
import shutil
import subprocess
import importlib
import time

In [None]:
#from google.colab import drive
#drive.mount('/content/drive')

In [None]:
def get_gpu_info():
    try:
        gpu_info = subprocess.check_output(
            "nvidia-smi --query-gpu=name,memory.total --format=csv,noheader",
            shell=True
        )
        return gpu_info.decode("utf-8").strip()
    except:
        return "No GPU detected"

def get_package_version(pkg_name):
    try:
        return importlib.import_module(pkg_name).__version__
    except:
        return "Not installed"

def system_report():
    print("=== Google Colab Runtime Specs ===")
    print(f"Python version: {platform.python_version()}")
    print(f"CPU cores (vCPUs): {psutil.cpu_count(logical=True)}")
    print(f"Total RAM: {round(psutil.virtual_memory().total / (1024**3), 2)} GB")
    print(f"Disk capacity: {round(shutil.disk_usage('/').total / (1024**3), 2)} GB")
    print(f"GPU: {get_gpu_info()}")
    print("\n=== Key Libraries ===")
    for pkg in ["numpy", "pandas", "scipy", "pulp"]:
        print(f"{pkg}: {get_package_version(pkg)}")
    print("==================================")

system_report()

## **Generate experiment data**

In [None]:
def generate_spatial_experimental(size: int, scale: float = 2000) -> List[Tuple[float, float]]:
    """Generate arrival locations that are distributed according to the analysis of NYC taxi rides data.

    Args
    size: Number of locations to be generated.
    scale: Avg meters away from the center.
    """
    locx_list = []
    locy_list = []

    dist_x = np.random.exponential(scale, size)
    signs_x = np.where(np.random.rand(size) > 0.5, 1, -1)
    locx = dist_x * signs_x
    locx = np.round(locx, 3).astype(float)
    """for d in dist_x:
      locx_list.append(d if float(np.random.rand()) > 0.5 else -d)
      locx_list = list(np.around(np.array(locx_list), 3))"""

    dist_y = np.random.exponential(scale, size)
    signs_y = np.where(np.random.rand(size) > 0.5, 1, -1)
    locy = dist_y * signs_y
    locy = np.round(locy, 3).astype(float)
    """for d in dist_y:
      locy_list.append(d if float(np.random.rand()) > 0.5 else -d)
      locy_list = list(np.around(np.array(locy_list), 3))"""

    coords = [(float(x), float(y)) for x, y in zip(locx, locy)]
    return coords


def generate_priority_df(size: int, priority_probs: List[float]) -> pd.DataFrame:
    """
    Generate a DataFrame of binary priority flags for a set of entities.

    Args:
        size (int): Number of entities.
        priority_probs (List[float]): List of priority probabilities.
    """
    columns = []
    data = []
    for prob in priority_probs:
        n_priority = round(prob * size)
        prio = np.array([1] * n_priority + [0] * (size - n_priority))
        np.random.shuffle(prio)
        data.append(prio)
        columns.append(f"priority_{prob}")

    return pd.DataFrame(np.array(data).T, columns=columns)


def generate_data(seed: int, size_set1: int, size_set2:int, priority_prob1_set: List[float], priority_prob2_set: List[float]) -> Tuple[pd.DataFrame, pd.DataFrame]:
  """Generate experiment data.

  Args:
    seed (int): Random seed for reproducibility.
    size_set1 (int): Number of entities in set 1.
    size_set2 (int): Number of entities in set 2.
    priority_prob1_set (List[float]): Priority probabilities for set 1.
    priority_prob2_set (List[float]): Priority probabilities for set 2.
  """
  np.random.seed(seed)

  loc_set1 = generate_spatial_experimental(size_set1)
  loc_set2 = generate_spatial_experimental(size_set2)

  set1_prio = generate_priority_df(size_set1, priority_prob1_set)
  set2_prio = generate_priority_df(size_set2, priority_prob2_set)

  set1_ids = [f"p_{str(i).rjust(len(str(size_set1)), '0')}" for i in range(size_set1)]
  set2_ids = [f"d_{str(i).rjust(len(str(size_set2)), '0')}" for i in range(size_set2)]

  set1_df = pd.DataFrame({'id': set1_ids, 'loc': loc_set1})
  set1_df = pd.concat([set1_df, set1_prio], axis=1)

  set2_df = pd.DataFrame({'id': set2_ids, 'loc': loc_set2})
  set2_df = pd.concat([set2_df, set2_prio], axis=1)

  return set1_df, set2_df

In [None]:
data_save_loc = ""

In [None]:
num_set1 = [50]
prob_set1 = [0.05, 0.1, 0.2, 0.3]

num_set2 = [12, 25, 37]
prob_set2 = [0.05, 0.1, 0.2, 0.3]

seeds = [290, 534, 839, 490, 445, 771, 726, 871, 883, 518, 895, 978, 295, 158, 85, 244, 723, 137, 361, 589, 716, 584, 925, 436, 285, 159, 456, 492, 149, 184, 888, 760, 46, 961, 964, 349, 117, 551,
                     24, 174, 354, 342, 719, 198, 935, 452, 279, 809, 132, 171, 849, 109, 945, 30, 529, 724, 150, 940, 800, 714, 503, 249, 92, 463, 906, 562, 677, 844, 597, 608, 25, 55, 624, 563, 145, 851,
                     518, 726, 625, 982, 85, 748, 27, 609, 218, 303, 31, 6, 91, 202, 291, 212, 661, 344, 866, 943, 360, 247, 209, 164]

param_set_list = []

for set_count, (n_of_p, n_of_d) in enumerate(itertools.product(num_set1, num_set2), start=1):

        param_set =  f"Parameter Setting {set_count}, {n_of_p},  {n_of_d}"
        param_set_list.append(param_set)
        print(param_set)

        for count, seed in enumerate(seeds, start=1):
            df1, df2 = generate_data(seed=seed,
                                      size_set1=n_of_p,
                                      size_set2 = n_of_d,
                                      priority_prob1_set=prob_set1,
                                      priority_prob2_set=prob_set2)

            os.makedirs(data_save_loc, exist_ok=True)
            df1.to_csv(f"{data_save_loc}/df1_{set_count:03d}_{count:03d}.csv", index=False)
            df2.to_csv(f"{data_save_loc}/df2_{set_count:03d}_{count:03d}.csv", index=False)


## **Experiments**

In [None]:
def min_cost_matching(distance_matrix: np.ndarray, distance_threshold: float, passenger_priority: List[int], driver_priority: List[int]) -> Tuple[pd.DataFrame, int, int, float]:
    """ Computes the minimum cost matching between drivers and passengers using the Hungarian algorithm (linear_sum_assignment).

    Args:
      distance_matrix (np.ndarray: Distance matrix of shape [num_cars, num_passengers] representing distances between each driver and passenger.
      distance_threshold (float): Maximum feasible distance for an assignment. Any distance above this threshold is considered infeasible.
      passenger_priority (List[int]): List indicating priority status of each passenger. Priority value `2` is considered "priority". Priority value `1` is considered non-"priority".
      driver_priority (List[int]): List indicating priority status of each driver. Priority value `2` is considered "priority". Priority value `1` is considered non-"priority".
    """
    mat = np.array(distance_matrix, copy=True)

    #Infeasible distances
    mat[mat > distance_threshold] = 1e6

    if np.any(mat < 1e6):
       #Solve the linear sum assignment problem
       driver_indices, passenger_indices  = linear_sum_assignment(mat, maximize=False)

       #Extract the matches
       assignment_distances = [mat[i][j]  for (i,j) in zip(driver_indices, passenger_indices)]
       assignment_df = pd.DataFrame({"car_ind": driver_indices, "pass_ind": passenger_indices, "dist": assignment_distances})
       assignment_df = assignment_df.loc[assignment_df["dist"] <= distance_threshold]

       #Calculate the total cost
       total_cost = assignment_df["dist"].sum()

       matched_drivers = assignment_df["car_ind"].nunique()
       matched_passengers = assignment_df["pass_ind"].nunique()

       if matched_drivers != matched_passengers:
          raise Exception("Unmatched assignment: number of matched cars and passengers differ.")

       num_matched_priority_passengers = assignment_df["pass_ind"].apply(lambda j: passenger_priority[j] != 1).sum()
       num_matched_priority_drivers = assignment_df["car_ind"].apply(lambda i: driver_priority[i] != 1).sum()

       return len(assignment_df), int(num_matched_priority_passengers), int(num_matched_priority_drivers), float(total_cost), None

    else:
      return 0, 0, 0, np.inf, 0

In [None]:
def constrained_max_weight_matching(distance_matrix: np.ndarray, distance_threshold: float, passenger_priority: List[int], driver_priority: List[int], match_lb: int, cost: float, add_cost_const: bool, add_lb_const: bool, theta: float=1e-6) -> Tuple[pd.DataFrame, int, int, float]:
    """ Computes the constrained maximum weight matching between drivers and passengers using a Mixed-Integer Linear Programming formulation.

    Args:
      distance_matrix (np.ndarray: Distance matrix of shape [num_cars, num_passengers] representing distances between each driver and passenger.
      distance_threshold (float): Maximum feasible distance for an assignment. Any distance above this threshold is considered infeasible.
      passenger_priority (List[int]): List indicating priority status of each passenger. Priority value `2` is considered "priority". Priority value `1` is considered non-"priority".
      driver_priority (List[int]): List indicating priority status of each driver. Priority value `2` is considered "priority". Priority value `1` is considered non-"priority".
      match_lb (int): Minimum number of required assignments.
      cost (float): Max. feasible cost.
      add_cost_const (bool): Whether to enforce the total cost constraint.
      add_lb_const (bool): Whether to enforce the lower bound on total assignments.
      theta (float): Relaxation parameter for the cost constraint.
    """
    if len(distance_matrix) == 0:
      raise ValueError("No participants!")

    num_drivers, num_passengers = distance_matrix.shape

    if num_drivers != len(driver_priority):
      raise ValueError("Number of drivers and driver_priority list length do not match.")
    if num_passengers != len(passenger_priority):
      raise ValueError("Number of passengers and passenger_priority list length do not match.")

    mat = np.array(distance_matrix, copy=True)

    #define the problem
    max_weight_problem = LpProblem("Maximize_Total_Weight", LpMaximize)

    #add decision variables for feasible assignments
    x = {(i, j): LpVariable(f"x_{i}_{j}", cat='Binary') for i in range(num_drivers) for j in range(num_passengers) if mat[i][j] <= distance_threshold}

    #define the objective funtion
    max_weight_problem += lpSum((driver_priority[i] + passenger_priority[j]) * x[i, j] for (i, j) in x)

    #define constraints
    #each driver can be assigned to at most one passenger
    for i in range(num_drivers):
        max_weight_problem += lpSum(x[i, j] for j in range(num_passengers) if (i, j) in x) <= 1

    #each passenger can get assigned to at most one driver
    for j in range(num_passengers):
        max_weight_problem += lpSum(x[i, j] for i in range(num_drivers) if (i, j) in x) <= 1

    #cost constraint
    if add_cost_const:
      max_weight_problem += lpSum(mat[i][j] * x[(i, j)] for (i, j) in x) <= cost*(1+theta)

    #lower bound on the total number of assignments
    if add_lb_const:
      max_weight_problem += lpSum(x[(i, j)] for (i, j) in x) >= match_lb

    start = time.time()
    #solve the LP problem
    max_weight_problem.solve()
    end = time.time()
    objective_value = value(max_weight_problem.objective)

    #extract the solution
    matching = [(i, j) for (i, j) in x if value(x[i, j]) == 1]
    car_ind =  [i for (i,j) in matching]
    pass_ind =  [j for (i,j) in matching]
    assignment_distances = [mat[i, j]  for (i, j) in matching]

    assignment_df = pd.DataFrame({"car_ind": car_ind, "pass_ind": pass_ind, "dist": assignment_distances})

    total_cost = assignment_df["dist"].sum()
    matched_cars = assignment_df["car_ind"].nunique()
    matched_pass = assignment_df["pass_ind"].nunique()

    if matched_cars != matched_pass:
      raise Exception("Unmatched assignment: number of matched cars and passengers differ.")

    num_matched_priority_p = assignment_df["pass_ind"].apply(lambda j: passenger_priority[j] != 1).sum()
    num_matched_priority_d = assignment_df["car_ind"].apply(lambda i: driver_priority[i] != 1).sum()

    return len(assignment_df), int(num_matched_priority_p), int(num_matched_priority_d), float(total_cost), objective_value

In [None]:
def price_of_priority(distance_threshold:float, distance_matrix: np.ndarray, passenger_priority: List[int], driver_priority: List[int], theta_step: float = 0.1, add_lb_const: bool = True) -> pd.DataFrame:
  """
  Compute the price of priority by comparing min-cost matching and constrained max-weight priority matching.

  Args:
    distance_threshold (float): Maximum feasible distance for assignments.
    distance_matrix (np.ndarray): Driver-passenger distance matrix.
    passenger_priority (List[int]): List of passenger priorities (2 = priority).
    driver_priority (List[int]): List of driver priorities (2 = priority).
    theta_step (float, optional): Step size for relaxing cost constraint. Default 0.01.
    add_lb_const (bool, optional): Whether to enforce lower bound on assignments in priority matching. Default True.
  """

  summary = []

  num_priority_passengers = sum(p != 1 for p in passenger_priority)
  num__passengers = len(passenger_priority)
  num_priority_drivers = sum(d != 1 for d in driver_priority)
  num_drivers = len(driver_priority)

  #solve min cost matching
  number_of_assignments_min_cost, num_matched_priority_passengers_min_cost, num_matched_priority_drivers_min_cost, total_cost_min, objective_value_min_cost = min_cost_matching(distance_matrix,
                                                                                                                                         distance_threshold,
                                                                                                                                         passenger_priority,
                                                                                                                                         driver_priority)

  summary.append({"status": "min_cost",
                  "cost": total_cost_min,
                  "number_of_assignments": number_of_assignments_min_cost,
                  "matched_priority_driver": num_matched_priority_drivers_min_cost,
                  "matched_priority_passenger": num_matched_priority_passengers_min_cost,
                  "passenger_priority_gain_over_min_cost": 0,
                  "theta": None,
                  "number_of_passengers": num__passengers,
                  "number_of_priority_passengers": num_priority_passengers,
                  "number_of_drivers": num_drivers,
                  "number_of_priority_drivers": num_priority_drivers,
                  "objective_value": objective_value_min_cost
                  })

  #solve max weight matching to find priority matching
  number_of_assignments_max_priority, passenger_priority_matching_number, driver_priority_matching_number, total_cost_max, objective_value_max_weight = constrained_max_weight_matching(distance_matrix=distance_matrix,
                                                                                                                                           distance_threshold=distance_threshold,
                                                                                                                                           passenger_priority=passenger_priority,
                                                                                                                                           driver_priority=driver_priority,
                                                                                                                                           match_lb=number_of_assignments_min_cost,
                                                                                                                                           cost=total_cost_min,
                                                                                                                                           add_cost_const=False,
                                                                                                                                           add_lb_const=add_lb_const)
  summary.append({"status": "max_priority",
                  "cost": total_cost_max,
                  "number_of_assignments": number_of_assignments_max_priority,
                  "matched_priority_driver": driver_priority_matching_number,
                  "matched_priority_passenger": passenger_priority_matching_number,
                  "passenger_priority_gain_over_min_cost": passenger_priority_matching_number - num_matched_priority_passengers_min_cost,
                  "theta": None,
                  "number_of_passengers": num__passengers,
                  "number_of_priority_passengers": num_priority_passengers,
                  "number_of_drivers": num_drivers,
                  "number_of_priority_drivers": num_priority_drivers,
                  "objective_value": objective_value_max_weight
                  })

  num_matched_priority_passengers = num_matched_priority_passengers_min_cost
  num_matched_priority_drivers = num_matched_priority_drivers_min_cost
  number_of_assignments = number_of_assignments_min_cost
  total_cost = total_cost_min
  objective_value = objective_value_min_cost
  theta = 0

  if (num_matched_priority_passengers_min_cost != passenger_priority_matching_number) | (num_matched_priority_drivers_min_cost != driver_priority_matching_number):
    theta = -theta_step
    while (num_matched_priority_passengers != passenger_priority_matching_number) | (num_matched_priority_drivers != driver_priority_matching_number):
      theta += theta_step
      number_of_assignments, num_matched_priority_passengers, num_matched_priority_drivers, total_cost, objective_value = constrained_max_weight_matching(distance_matrix=distance_matrix,
                                                                                                                                distance_threshold=distance_threshold,
                                                                                                                                passenger_priority=passenger_priority,
                                                                                                                                driver_priority=driver_priority,
                                                                                                                                match_lb=number_of_assignments_min_cost,
                                                                                                                                cost=total_cost_min,
                                                                                                                                add_cost_const=True,
                                                                                                                                add_lb_const=add_lb_const,
                                                                                                                                theta=theta)

      summary.append({"status": f"{theta:.0%}",
                      "cost": total_cost,
                      "number_of_assignments": number_of_assignments,
                      "matched_priority_driver": num_matched_priority_drivers,
                      "matched_priority_passenger": num_matched_priority_passengers,
                      "passenger_priority_gain_over_min_cost": num_matched_priority_passengers - num_matched_priority_passengers_min_cost,
                      "theta": theta,
                      "number_of_passengers": num__passengers,
                      "number_of_priority_passengers": num_priority_passengers,
                      "number_of_drivers": num_drivers,
                      "number_of_priority_drivers": num_priority_drivers,
                      "objective_value": objective_value
                      })

  else:
    theta = 0
    summary.append({"status": f"{theta:.0%}",
                    "cost": total_cost,
                    "number_of_assignments": number_of_assignments,
                    "matched_priority_driver": num_matched_priority_drivers,
                    "matched_priority_passenger": num_matched_priority_passengers,
                    "passenger_priority_gain_over_min_cost": num_matched_priority_passengers - num_matched_priority_passengers_min_cost,
                    "theta": theta,
                    "number_of_passengers": num__passengers,
                    "number_of_priority_passengers": num_priority_passengers,
                    "number_of_drivers": num_drivers,
                    "number_of_priority_drivers": num_priority_drivers,
                    "objective_value": objective_value
                    })

  summary.append({"status": "max_priority_min_cost",
                  "cost": total_cost,
                  "number_of_assignments": number_of_assignments,
                  "matched_priority_driver": num_matched_priority_drivers,
                  "matched_priority_passenger": num_matched_priority_passengers,
                  "passenger_priority_gain_over_min_cost": num_matched_priority_passengers - num_matched_priority_passengers_min_cost,
                  "theta": theta,
                  "number_of_passengers": num__passengers,
                  "number_of_priority_passengers": num_priority_passengers,
                  "number_of_drivers": num_drivers,
                  "number_of_priority_drivers": num_priority_drivers,
                  "objective_value": objective_value
                  })
  summary_df = pd.DataFrame(summary)

  summary_df["perc_deviation_from_min_cost"] = summary_df["cost"].apply(lambda x: f"{round((x - total_cost_min) / total_cost_min, 1):.0%}")
  summary_df["passenger_priority_matching_number"] = passenger_priority_matching_number
  summary_df["driver_priority_matching_number"] = driver_priority_matching_number
  summary_df["matched_passenger_ratio"] = round(summary_df["number_of_assignments"] / summary_df["number_of_passengers"], 2)
  summary_df["matched_priority_passenger_ratio"] = round(summary_df["matched_priority_passenger"] / summary_df["number_of_priority_passengers"], 2)

  return summary_df

In [None]:
def run_experiment(source_folder: str, outdir: str, distance_threshold: float = 5000, theta_step: float = 0.1, add_lb_const: bool = True) -> None:
  """
  Run simulation experiments across multiple parameter sets and seeds, generates aggregated results and saves them to CSV.

  ARgs:
  source_folder (str): Directory containing experiment input CSV files. Files must follow the naming pattern "df1_{i:03d}_{j:03d}.csv" and "df2_{i:03d}_{j:03d}.csv".
  outdir (str): Directory where output CSV will be written.
  distance_threshold (float): Maximum feasible distance for assignments (default: 5000 meters).
  theta_step (float): Step size for theta parameter in price_of_priority (default: 0.1).
  add_lb_const (bool): Whether to add a lower-bound constraint in price_of_priority (default: True).
  """

  results_list = []

  for i in [1,2,3]:
      for j in range(1,101):
        file_name1 = "df1_"+str(i).rjust(3, '0')+"_"+str(j).rjust(3, '0')+".csv"
        file_name2 = "df2_"+str(i).rjust(3, '0')+"_"+str(j).rjust(3, '0')+".csv"

        path1 = os.path.join(source_folder, file_name1)
        path2 = os.path.join(source_folder, file_name2)

        if not os.path.exists(path1) or not os.path.exists(path2):
          raise FileNotFoundError(f"Missing input files: {path1}, {path2}")

        df1 = pd.read_csv(path1)
        df2 = pd.read_csv(path2)

        df1['loc'] = df1['loc'].apply(ast.literal_eval)
        df2['loc'] = df2['loc'].apply(ast.literal_eval)

        distance_matrix = distance.cdist(list(df2["loc"]),  list(df1["loc"]), 'cityblock')

        for priority_level in [0.05, 0.1, 0.2, 0.3]:
          priority_col = f"priority_{priority_level}"
          if priority_col not in df1.columns:
            raise KeyError(f"Missing priority column: {priority_col}")

          passenger_priority_list = [1 if x == 0 else 100 for x in df1[priority_col]]
          driver_priority_list = [1] * len(df2)

          res_df = price_of_priority(
                    distance_threshold=distance_threshold,
                    distance_matrix=distance_matrix,
                    passenger_priority=passenger_priority_list,
                    driver_priority=driver_priority_list,
                    theta_step=theta_step,
                    add_lb_const=add_lb_const
                )

          res_df["priority_level"] =  priority_level
          res_df["seed"] = j
          res_df["params_set"] = i

          results_list.append(res_df)

  results_df = pd.concat(results_list, ignore_index=True)

  today=datetime.datetime.now().strftime('%Y%m%d')
  os.makedirs(outdir, exist_ok=True)

  if add_lb_const:
    out_path = os.path.join(outdir, f'results_with_lb_constraint_100_{today}.csv')
  else:
    out_path = os.path.join(outdir, f'results_without_lb_constraint_100_{today}.csv')

  results_df.to_csv(out_path, index=False)

In [None]:
source_folder = ""
outdir = ""

## **Run with the lower bound constraint**

In [None]:
run_experiment(source_folder, outdir, add_lb_const=True)

## **Run without the lower bound constraint**

In [None]:
run_experiment(source_folder, outdir, add_lb_const=False)

## **Analyze Results**

In [None]:
def get_aggregated_results(df: pd.DataFrame) -> pd.DataFrame:
  """
  Aggregate experimental results by theta, parameter set, and priority level.

  Args:
  df (pd.DataFrame): Input dataframe containing columns: ["theta", "params_set", "priority_level", "seed", "matched_priority_passenger", "matched_passenger_ratio", "is_priority_matching"]
  """
  aggregated_dfs = []

  for priority_level  in df["priority_level"].unique():
    for params_set  in df["params_set"].unique():

      sub_df = df[(df["params_set"] == params_set) & (df["priority_level"] == priority_level)].copy()
      max_length = int(sub_df.theta.max() * 10) + 1
      grouped = sub_df.groupby('seed')

      df_analyze = pd.DataFrame()

      for seed, group in grouped:
        #Pad rows to match max_length
        rows_to_add = max_length - len(group)

        if rows_to_add > 0:
          last_row = group.iloc[-1].copy()
          padding = pd.DataFrame([last_row] * rows_to_add)
          group = pd.concat([group, padding], ignore_index=True)

        group["status"] = [f"{int(x/10*100)}%"  for x in range(max_length)]
        group["theta"] = [round(x/10,1) for x in range(max_length)]
        df_analyze = pd.concat([df_analyze, group], ignore_index=True)

      agg_df = (df_analyze[['theta', 'params_set', 'priority_level', 'matched_priority_passenger', 'matched_priority_passenger_ratio', 'matched_passenger_ratio', 'is_priority_matching']]
               .groupby(['theta', 'params_set', 'priority_level'])
               .agg(avg_matched_priority_passenger=('matched_priority_passenger', 'mean'),
                    avg_ratio_of_matched_priority_passengers=('matched_priority_passenger_ratio', 'mean'),
                    number_of_priority_matchings_obtained=('is_priority_matching', 'sum'),
                    avg_ratio_of_matched_passengers=('matched_passenger_ratio', 'mean'),
                    ).reset_index()
               )

      agg_df = agg_df.sort_values(['theta'])
      agg_df['priority_matching_const_ratio'] = agg_df['number_of_priority_matchings_obtained']/100

      aggregated_dfs.append(agg_df)

  return aggregated_dfs

In [None]:
def display_results(dfs: List[pd.DataFrame], palette: str = "viridis") -> None:
  """
  Display line and scatter plots of matched priority agents over theta values.
  Args:

  dfs (List[pd.DataFrame]): List of aggregated DataFrames.
  palette (str): Color palette for scatterplot. Default is 'viridis'.
  """
  min_hue = 0
  max_hue = 1
  palette = palette

  for i, df in enumerate(dfs, start=1):
      fig, ax = plt.subplots(figsize=(10, 10))

      sns.lineplot(data=df,
                  x='theta',
                  y='avg_ratio_of_matched_priority_passengers',
                  color='gray',
                  alpha=0.5,
                  ax=ax,
                  zorder=1,
                  )
      sns.scatterplot(
          data=df,
          x='theta',
          y='avg_ratio_of_matched_priority_passengers',
          hue='priority_matching_const_ratio',
          palette=palette,
          hue_norm=(min_hue, max_hue),
          ax = ax,
          zorder=2,
          s=200,
          legend=False,
      )

      norm = plt.Normalize(vmin=min_hue, vmax=max_hue)
      sm = plt.cm.ScalarMappable(cmap=palette, norm=norm)
      sm.set_array([])
      #fig.colorbar(sm, ax=ax, label='Priority Matching Formation Ratio')

      plt.xlabel(r'Cost Constraint Relaxation Ratio $\theta$')
      plt.ylabel('Mean of Matched Priority Agents')
      ax.set_title(f'Parameter Setting {df["params_set"].unique()}, Priority Level {df["priority_level"].unique()}')

      plt.grid(True, alpha=0.4)
      plt.xticks(rotation=45)
      ax.xaxis.set_major_formatter(FormatStrFormatter('%.2f'))
      sns.despine()

      plt.show()

In [None]:
def display_results_grid(dfs: List[pd.DataFrame], palette: str = "viridis") -> None:
  """
  Display aggregated results on a grid of subplots for multiple parameter settings and priority levels.

  Args:
        dfs (List[pd.DataFrame]): List of aggregated DataFrames.
        palette (str): Colormap for scatterplot points.
  """

  plt.rcParams.update({
    "font.size": 20,
    "axes.titlesize": 24,
    "axes.labelsize": 20,
    "xtick.labelsize": 18,
    "ytick.labelsize": 18,
    "legend.fontsize": 18
  })

  min_hue = 0
  max_hue = 1
  palette = palette
  norm = plt.Normalize(vmin=min_hue, vmax=max_hue)

  all_params_sets = sorted({df["params_set"].iloc[0] for df in dfs})
  all_priority_levels = sorted({df["priority_level"].iloc[0] for df in dfs})

  fig, axes = plt.subplots(len(all_params_sets), len(all_priority_levels), figsize=(32,24))
  plt.subplots_adjust(left=0.05, right=1, top=0.95, bottom=0.08, hspace=0.2, wspace=0.1)

  for df in dfs:
      c_idx = all_priority_levels.index(df["priority_level"].iloc[0])
      r_idx = all_params_sets.index(df["params_set"].iloc[0])
      ax = axes[r_idx, c_idx]
      sns.lineplot(data=df,
                  x='theta',
                  y='avg_ratio_of_matched_priority_passengers',
                  color='gray',
                  alpha=0.5,
                  ax=ax,
                  zorder=1,
                  )
      sns.scatterplot(
          data=df,
          x='theta',
          y='avg_ratio_of_matched_priority_passengers',
          hue='priority_matching_const_ratio',
          palette=palette,
          hue_norm=(min_hue, max_hue),
          ax = ax,
          zorder=2,
          s=200,
          legend=False,
      )

      ax.xaxis.set_major_formatter(FormatStrFormatter('%.2f'))
      ax.grid(True, alpha=0.4)
      sns.despine()

      dataset_ratio = {1: '12/50', 2: '25/50', 3: '37/50'}.get(df['params_set'].iloc[0], '')
      ax.set_title(f"Priority {df['priority_level'].iloc[0]} - {dataset_ratio}")
      ax.set_xlabel("")
      ax.set_ylabel("")

  sm = plt.cm.ScalarMappable(cmap=palette, norm=norm)
  sm.set_array([])
  cax = fig.add_axes([1.05, 0.15, 0.05, 0.7])
  fig.colorbar(sm, cax=cax, orientation='vertical').set_label(label='Priority Matching Formation Ratio', size=24)

  fig.supxlabel(r'Cost Constraint Relaxation Ratio $\theta$', fontsize=24)
  fig.supylabel('Mean of Matched Priority Agents', fontsize=24)

  plt.show()

### **Results with the lower bound constraint**

In [None]:
file_path = os.path.join(outdir, f'results_with_lb_constraint_100_20251018.csv')
results_df_with_constraint = pd.read_csv(file_path, converters={"loc": ast.literal_eval})

In [None]:
results_df_with_constraint.drop(results_df_with_constraint[results_df_with_constraint['status'] == 'max_priority_min_cost'].index, inplace=True)
results_df_with_constraint.drop(results_df_with_constraint[results_df_with_constraint['status'] == 'min_cost'].index, inplace=True)
results_df_with_constraint.drop(results_df_with_constraint[results_df_with_constraint['status'] == 'max_priority'].index, inplace=True)
results_df_with_constraint.reset_index(drop=True, inplace=True)

In [None]:
results_df_with_constraint['is_priority_matching'] = results_df_with_constraint['matched_priority_passenger'] == results_df_with_constraint['passenger_priority_matching_number']
aggregated_results_with_constraint = get_aggregated_results(results_df_with_constraint)
aggregated_results_with_constraint_concat = pd.concat(aggregated_results_with_constraint, axis=0, ignore_index=True)

In [None]:
for i in [1, 2, 3]:
  df_temp = aggregated_results_with_constraint_concat[aggregated_results_with_constraint_concat['params_set'] == i]
  #df_agg = df_temp.groupby(['params_set', 'priority_level', 'theta'])[['avg_ratio_of_matched_passengers', 'avg_ratio_of_matched_priority_passengers', 'number_of_priority_matchings_obtained']].mean()
  display(df_temp.groupby(['params_set', 'priority_level', 'theta'])[['avg_ratio_of_matched_passengers', 'avg_ratio_of_matched_priority_passengers', 'number_of_priority_matchings_obtained']].mean())

In [None]:
display_results_grid(aggregated_results_with_constraint)

### **Results without the lower bound constraint**

In [None]:
file_path = os.path.join(outdir, f'results_without_lb_constraint_100_20251018.csv')
results_df_without_constraint = pd.read_csv(file_path, converters={"loc": ast.literal_eval})

In [None]:
results_df_without_constraint.drop(results_df_without_constraint[results_df_without_constraint['status'] == 'max_priority_min_cost'].index, inplace=True)
results_df_without_constraint.drop(results_df_without_constraint[results_df_without_constraint['status'] == 'min_cost'].index, inplace=True)
results_df_without_constraint.drop(results_df_without_constraint[results_df_without_constraint['status'] == 'max_priority'].index, inplace=True)
results_df_without_constraint.reset_index(drop=True, inplace=True)

In [None]:
results_df_without_constraint['is_priority_matching'] = results_df_without_constraint['matched_priority_passenger'] == results_df_without_constraint['passenger_priority_matching_number']
aggregated_results_without_constraint = get_aggregated_results(results_df_without_constraint)
aggregated_results_without_constraint_concat = pd.concat(aggregated_results_without_constraint, axis=0, ignore_index=True)

In [None]:
for i in [1, 2, 3]:
  df_temp = aggregated_results_without_constraint_concat[aggregated_results_without_constraint_concat['params_set'] == i]
  display(df_temp.groupby(['params_set', 'priority_level', 'theta'])[['avg_ratio_of_matched_passengers', 'avg_ratio_of_matched_priority_passengers', 'number_of_priority_matchings_obtained']].mean())