In [28]:
import json
import numpy as np
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_theme(style="darkgrid")

seeds = []
for i in range(1, 101):
    seeds.append(i)


def time_to_minutes(timestr):
    # Handle '+1' suffix by removing it before parsing
    timestr = timestr.split('+')[0]  # Remove '+1' if present
    hh, mm = timestr.split(':')
    return int(hh) * 60 + int(mm)



def calculate_slack_for_scenario(scenario_data):
    """
    Calculate the slack metric for the given scenario.
    
    Slack is defined as:
        Slack = 1 - (total flight minutes in recovery period / total recovery period aircraft-minutes)
    
    A slack of 1 means no flights during recovery period.
    A slack of 0 means flights occupy the entire recovery period.
    """
    def time_to_minutes(timestr):
        # Handle '+1' suffix by removing it before parsing
        timestr = timestr.split('+')[0]  # Remove '+1' if present
        hh, mm = timestr.split(':')
        return int(hh) * 60 + int(mm)

    
    # Extract scenario start/end times
    # We assume the same date for start and end for simplicity.
    recovery_start_time_str = scenario_data["recovery_start_time"]  
    recovery_end_time_str = scenario_data["recovery_end_time"]      
    
    recovery_start_minutes = time_to_minutes(recovery_start_time_str)
    recovery_end_minutes = time_to_minutes(recovery_end_time_str)
    total_recovery_period_minutes = recovery_end_minutes - recovery_start_minutes
    
    total_aircraft = scenario_data["total_aircraft"]
    
    # Calculate total flight minutes within the recovery period
    flights = scenario_data["flights"]
    total_flights = len(flights)
    total_flight_minutes_in_recovery = 0
    total_flight_minutes_total = 0
    
    for flight_id, flight_data in flights.items():
        dep_time_str = flight_data["DepTime"]  
        arr_time_str = flight_data["ArrTime"] 
        
        dep_minutes = time_to_minutes(dep_time_str)
        arr_minutes = time_to_minutes(arr_time_str)
        
        total_flight_minutes_total += arr_minutes - dep_minutes
        overlap_start = max(dep_minutes, recovery_start_minutes)
        overlap_end = min(arr_minutes, recovery_end_minutes)
        
        if overlap_end > overlap_start:
            flight_overlap = overlap_end - overlap_start
        else:
            flight_overlap = 0
        
        total_flight_minutes_in_recovery += flight_overlap
    
    # Calculate total aircraft-minutes available during the recovery period
    total_recovery_aircraft_minutes = total_recovery_period_minutes * total_aircraft
    
    # Slack calculation
    if total_recovery_aircraft_minutes == 0:
        slack = 1.0
    else:
        slack = 1 - (total_flight_minutes_in_recovery / total_recovery_aircraft_minutes)
    
    return slack, total_flights, total_flight_minutes_total


def extract_disruption_stats(scenario_data):
    """
    Extract disruption statistics:
    - Count of fully disrupted (prob = 1.0)
    - Count of uncertain disruptions (0 < prob < 1.0)
    - Average probability across all aircraft (where an aircraft's probability is the max disruption probability it faces, 
      with 1.0 for fully disrupted and 0.0 if no disruption)
    - Average uncertainty probability (average of all disruptions where 0<prob<1.0, excluding 0 and 1)
    """
    disruptions_info = scenario_data.get('disruptions', {})
    disruptions_list = disruptions_info.get('disruptions', [])
    total_aircraft = disruptions_info.get('total_aircraft', 0)

    if total_aircraft == 0:
        # No aircraft or no disruptions
        return 0, 0, 0.0, 0.0

    fully_disrupted_count = sum(1 for d in disruptions_list if d.get('probability', 0.0) == 1.0)
    uncertain_disruptions = [d for d in disruptions_list if 0.0 < d.get('probability', 0.0) < 1.0]
    uncertain_count = len(uncertain_disruptions)

    aircraft_ids = scenario_data.get('aircraft_ids', [])
    ac_prob_map = {ac: 0.0 for ac in aircraft_ids}  
    
    for d in disruptions_list:
        ac_id = d.get('aircraft_id')
        p = d.get('probability', 0.0)
        # Keep the max probability for that aircraft
        if ac_id in ac_prob_map:
            ac_prob_map[ac_id] = max(ac_prob_map[ac_id], p)

    avg_ac_prob = sum(ac_prob_map.values()) / total_aircraft if total_aircraft > 0 else 0.0

    # Average uncertainty probability (only consider disruptions where 0<prob<1)
    if len(uncertain_disruptions) > 0:
        avg_uncertainty_prob = np.mean([d['probability'] for d in uncertain_disruptions])
    else:
        avg_uncertainty_prob = 0.0

    return fully_disrupted_count, uncertain_count, avg_ac_prob, avg_uncertainty_prob, total_aircraft

# Path to the scenarios folder
scenario_folder_path = "../logs/scenarios/"
latest_folder = max(
    [f for f in os.listdir(scenario_folder_path) if f.startswith("scenario_folder_")],
    key=lambda x: int(x.split('_')[-1].replace('.json', ''))
)

# latest_folder = "scenario_folder_scenario_74.json" # Training/6ac-10-superdiverse

# latest_folder = "scenario_folder_scenario_77.json" # Training/6ac-10000-superdiverse
latest_folder = "scenario_folder_scenario_4.json" # Testing/6ac-700-diverse

file_path = os.path.join(scenario_folder_path, latest_folder)

# Extract scenario ID
scenario_id = file_path.split('_')[-1].split('.')[0]
print(f"Scenario ID: {scenario_id}")

# Load the JSON data
with open(file_path, 'r') as file:
    data = json.load(file)

# Extract the scenarios from the JSON data
scenarios = data['outputs']


# Extract the data_folder (not strictly necessary for slack calculation, but we print it for context)
data_folder = data['data_folder']
print(f"Data Folder: {data_folder}")

# Calculate slack and disruption stats for each scenario and store in a list of dicts
results = []
for scenario_name, scenario_data in scenarios.items():
    scenario_slack, total_flights, total_flight_minutes_total = calculate_slack_for_scenario(scenario_data)
    fully_disrupted_count, uncertain_count, avg_ac_prob, avg_uncertain_prob, total_aircraft = extract_disruption_stats(scenario_data)
    results.append({
        "Scenario": scenario_name,
        "ScenarioSlack": scenario_slack,
        "TotalFlights": total_flights,
        "TotalFlightMinutes": total_flight_minutes_total,
        "FullyDisruptedCount": fully_disrupted_count,
        "UncertainCount": uncertain_count,
        "AvgAircraftProbability": avg_ac_prob,
        "AvgUncertaintyProbability": avg_uncertain_prob,
        "TotalAircraft": total_aircraft
    })

# Convert results to DataFrame
scenarios_df = pd.DataFrame(results)
print(scenarios_df)

# Save the slack results to CSV
# output_file = os.path.join(scenario_folder_path, f"scenario_slack_metrics_{scenario_id}.csv")
# scenarios_df.to_csv(output_file, index=False)
# print(f"Slack metrics saved to {output_file}")


Scenario ID: 4
Data Folder: ../data/Testing/6ac-700-diverse/
                          Scenario  ScenarioSlack  TotalFlights  \
0    deterministic_na_Scenario_001       0.349048            21   
1    deterministic_na_Scenario_002       0.369713            21   
2    deterministic_na_Scenario_003       0.420513            13   
3    deterministic_na_Scenario_004       0.441493            16   
4    deterministic_na_Scenario_005       0.484896            14   
..                             ...            ...           ...   
695        mixed_high_Scenario_096       0.479683            15   
696        mixed_high_Scenario_097       0.460082            13   
697        mixed_high_Scenario_098       0.424324            20   
698        mixed_high_Scenario_099       0.407037            15   
699        mixed_high_Scenario_100       0.428228            18   

     TotalFlightMinutes  FullyDisruptedCount  UncertainCount  \
0                  4101                    2               0   
1     

<div class="alert alert-block alert-success">
</br>
</br>
</br>
<b>DONE: </b>MERGED DATASET
</br>
</br>
</br>
</br>
</div>

In [29]:
import os
import pandas as pd

scenario_folder_path = "../logs/scenarios/"
# unpack results_df
results_df = pd.read_csv(os.path.join(scenario_folder_path, f"t_results_df_in_rapport_seeds_{len(seeds)}.csv"))


# Merge scenario-level info from scenarios_df into results_df
merged_df = results_df.merge(scenarios_df, on='Scenario', how='left')

# Sort models in desired order
model_order = ['proactive', 'myopic', 'reactive', 'greedy_reactive']
merged_df['Model_Type'] = merged_df['Model'].str.extract('(' + '|'.join(model_order) + ')')
merged_df = merged_df.sort_values('Model_Type')
merged_df["Model"] = merged_df["Model_Type"]
merged_df = merged_df.drop('Model_Type', axis=1)
merged_df_backup = merged_df.copy()


# Update model names in merged_df
merged_df['Model'] = merged_df['Model'].apply(lambda x: 
    'DQN Proactive-U' if x.startswith('proactive') else
    'DQN Proactive-N' if x.startswith('myopic') else 
    'DQN Reactive' if x.startswith('reactive') else
    'Greedy Reactive' if x.startswith('greedy_reactive') else
    x
)

print("Inference Results (After Merging):")
print(merged_df)

# Save the merged results to CSV
merged_output_file = os.path.join(scenario_folder_path, f"scenario_inference_metrics_{scenario_id}.csv")
merged_df.to_csv(merged_output_file, index=False)
print(f"Inference results with scenario info saved to {merged_output_file}")



# print all column names
print("==== Columns: ====")
print(merged_df.columns)

print("==== amount of rows: ====")
print(len(merged_df))

print("==== Models: ====")
print(merged_df["Model"].unique())

print('===== len(seeds) =====')
print(len(merged_df['Seed'].unique()))

print('===== len(scenarios) =====')
print(len(merged_df['Scenario'].unique()))





Inference Results (After Merging):
                              Scenario            Model  Seed  TotalReward  \
139999     stochastic_low_Scenario_093  Greedy Reactive   100       -217.6   
130773          mixed_low_Scenario_063  Greedy Reactive    74      24786.2   
130772          mixed_low_Scenario_063  Greedy Reactive    73      24786.2   
130771          mixed_low_Scenario_063  Greedy Reactive    72      19799.3   
130770          mixed_low_Scenario_063  Greedy Reactive    71      19799.3   
...                                ...              ...   ...          ...   
222263  stochastic_medium_Scenario_085     DQN Reactive    64       9612.9   
222262  stochastic_medium_Scenario_085     DQN Reactive    63       -217.0   
222261  stochastic_medium_Scenario_085     DQN Reactive    62       -217.0   
222268  stochastic_medium_Scenario_085     DQN Reactive    69       9612.9   
35000      stochastic_low_Scenario_091     DQN Reactive     1        -90.8   

        TotalDelays  TotalCa

<div class="alert alert-block alert-info">
<b>Check: </b>Comparison of Models Across All Scenarios
</div>

In [30]:
import pandas as pd
import matplotlib.pyplot as plt

# Define model colors and order
model_colors = {
    'DQN Proactive-U': ('orange', 'DQN Proactive-U'),
    'DQN Proactive-N': ('blue', 'DQN Proactive-N'),
    'DQN Reactive': ('green', 'DQN Reactive'),
    'Greedy Reactive': ('darkgrey', 'Greedy Reactive')
}

# First aggregate by Model and Seed, then calculate mean and std across seeds
comparison_table = (
    merged_df
    .groupby(['Model', 'Seed'])
    .agg(
        TotalReward=('TotalReward', 'mean'),
        ScenarioTime=('ScenarioTime', 'mean'), 
        ScenarioSteps=('ScenarioSteps', 'mean'),
        TotalDelays=('TotalDelays', 'mean'),
        TotalCancelledFlights=('TotalCancelledFlights', 'mean'),
        TotalTailSwaps=('TailSwaps', 'mean')
    )
    .groupby('Model')
    .agg(
        Mean_Reward=('TotalReward', 'mean'),
        Std_Reward=('TotalReward', 'std'),
        Mean_Runtime=('ScenarioTime', 'mean'),
        Std_Runtime=('ScenarioTime', 'std'),
        Mean_Steps=('ScenarioSteps', 'mean'),
        Std_Steps=('ScenarioSteps', 'std'),
        Mean_Delays=('TotalDelays', 'mean'),
        Std_Delays=('TotalDelays', 'std'),
        Mean_CancelledFlights=('TotalCancelledFlights', 'mean'),
        Std_CancelledFlights=('TotalCancelledFlights', 'std'),
        Mean_TailSwaps=('TotalTailSwaps', 'mean'),
        Std_TailSwaps=('TotalTailSwaps', 'std')
    )
    .round(2)
)

# Sort the comparison table according to model order
comparison_table['Model_Type'] = comparison_table.index.map(lambda x: next(m for m in model_order if m in x.lower()))
comparison_table = comparison_table.sort_values('Model_Type')
comparison_table = comparison_table.drop('Model_Type', axis=1)

print("Comparison of Models Across All Scenarios:")
print(comparison_table)


Comparison of Models Across All Scenarios:
                 Mean_Reward  Std_Reward  Mean_Runtime  Std_Runtime  \
Model                                                                 
DQN Proactive-N      6198.37      474.22          0.03          0.0   
DQN Proactive-U     10186.83      535.50          0.03          0.0   
DQN Reactive        -4062.91      443.99          0.03          0.0   
Greedy Reactive     32888.75      403.08          0.17          0.0   

                 Mean_Steps  Std_Steps  Mean_Delays  Std_Delays  \
Model                                                             
DQN Proactive-N        7.52       0.03       343.03        4.18   
DQN Proactive-U        7.44       0.03       301.04        3.25   
DQN Reactive           7.95       0.04       226.31        5.54   
Greedy Reactive        6.89       0.02        19.58        2.53   

                 Mean_CancelledFlights  Std_CancelledFlights  Mean_TailSwaps  \
Model                                          