# Generated flight route analysis
- Min, Max, Avg, mean, median distance
- Sectors analysis:
    - Example: VV-TSN VV-LATHA VV-NIXUP VV-CN WS-ESPOB WS-ENREP WS-VEPLI WM-EGOLO WM-ROBMO WM-VMR WS-PU20 WS-VTK -> unique VV4WS2WM2WS1 -> count unique
                                            - > VVWSWMWS -> count

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from itertools import groupby

## Load dataframe

In [4]:
PATH = "synthetic_flightplan.csv"
df = pd.read_csv(PATH)
print(df.head())

  origin destination  min_dist_origin  min_dist_dest  route_distances  \
0    PHS         DMK         1.032578       1.305384       174.521034   
1    PHS         DMK         1.032578       1.305384       309.726818   
2    PHS         DMK         1.032578       1.305384       310.535807   
3    PHS         DMK         1.032578       1.305384       347.363798   
4    PHS         DMK         1.032578       1.305384       348.172787   

   total_distances                                              route  
0       176.858995                       VT-PSL VT-PEBLI VT-TL VT-BKK  
1       312.064780  VT-PSL VT-PEBLI VT-TL VT-KRT VT-UBLOD VT-MALKI...  
2       312.873769  VT-PSL VT-PEBLI VT-TL VT-KRT VT-UBLOD VT-PIPOB...  
3       349.701760  VT-PSL VT-GOKON VT-PCB VT-CMP VT-KKN VT-UBLOD ...  
4       350.510749  VT-PSL VT-GOKON VT-PCB VT-CMP VT-KKN VT-UBLOD ...  


## Descriptive flight route analysis

In [17]:
df.total_distances.describe()

count    145751.000000
mean       1405.501531
std         956.220606
min           0.213919
25%         745.155369
50%        1122.979553
75%        1760.735573
max        7020.821435
Name: total_distances, dtype: float64

In [18]:
unique_od_pairs = df.groupby(['origin', 'destination'])
unique_od_pairs.total_distances.describe().sort_values(by='count', ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,count,mean,std,min,25%,50%,75%,max
origin,destination,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
ZVK,VTE,100.0,711.049835,67.028404,441.702801,709.722618,734.838413,754.068944,766.671101
ZBY,VTE,100.0,936.358872,93.445092,493.599846,915.447560,967.126492,993.182547,1018.957023
ADL,DPS,100.0,3122.438555,128.087176,2095.112988,3101.557728,3149.459097,3187.706189,3238.107157
ADL,KUL,100.0,3206.537289,46.486918,3119.825384,3168.396937,3196.698608,3253.408040,3274.866804
ADL,SIN,100.0,3195.251112,62.235345,2981.116012,3171.023839,3210.899799,3243.675527,3264.405901
...,...,...,...,...,...,...,...,...,...
UPG,MOH,1.0,218.536137,,218.536137,218.536137,218.536137,218.536137,218.536137
UPG,KXB,1.0,175.720693,,175.720693,175.720693,175.720693,175.720693,175.720693
UPG,KDI,1.0,188.752760,,188.752760,188.752760,188.752760,188.752760,188.752760
AMQ,SXK,1.0,334.078128,,334.078128,334.078128,334.078128,334.078128,334.078128


In [19]:
# Top 10 od_pairs with the highest standard deviation of total_distances
k=10
top_k_od_pairs = unique_od_pairs.total_distances.std().sort_values(ascending=False).head(k)
for i, (od_pair, std) in enumerate(top_k_od_pairs.items()):
    print(f"{i+1}. {od_pair[0]} {od_pair[1]}: {std:.2f}nm")

1. BMU UPG: 825.07nm
2. SOQ BXB: 690.19nm
3. KOE LWE: 688.24nm
4. KOE MOF: 688.24nm
5. BPN PLW: 682.19nm
6. KOE ABU: 652.39nm
7. MKQ DJJ: 617.27nm
8. NRE AMQ: 604.33nm
9. NYW HOX: 600.82nm
10. NYU HEH: 552.65nm


## Sector analysis

In [20]:

def route_to_sector_unqiue_count(route: str, delimiter="-") -> str:
    """ 
    Example:
    "VV-TSN VV-LATHA VV-NIXUP VV-CN WS-ESPOB WS-ENREP WS-VEPLI WM-EGOLO WM-ROBMO WM-VMR WS-PU20 WS-VTK" -> "VV4-WS3-WM3-WS2"
    """
    # Split the route into segments; handle empty input
    segments = route.split()
    if not segments:
        return ""
    
    # Define a key function to extract the two-letter code from each segment
    def key_func(segment):
        return segment.split('-')[0]
    
    # Initialize result list to store code-count pairs
    result = []
    
    # Group consecutive segments by their code and count each group
    for code, group in groupby(segments, key=key_func):
        count = sum(1 for _ in group)  # Count the number of segments in the group
        result.append(code + str(count))  # Append code followed by count
    
    # Join all parts into a single string without separators
    return f'{delimiter}'.join(result)

def route_to_sector_unqiue(route: str, delimiter="-") -> str:
    # Split the route into segments
    segments = route.split()
    # Handle empty input
    if not segments:
        return ""
    
    # Extract the two-letter code from each segment
    codes = [segment.split('-')[0] for segment in segments]
    
    # Remove consecutive duplicates using groupby
    unique_codes = [key for key, _ in groupby(codes)]
    
    # Join the unique codes with hyphens
    return f'{delimiter}'.join(unique_codes)

In [21]:
def filter_routes(df: pd.DataFrame) -> pd.DataFrame:
    """
    """
    idx = df.groupby('sector_with_count_waypoint')['total_distances'].idxmin()
    df_final = df.loc[idx].reset_index(drop=True)
    
    return df_final

In [22]:
unique_count_sectors_df = df.route.apply(route_to_sector_unqiue_count)
df['sector_with_count_waypoint'] = unique_count_sectors_df
unique_sectors_df = df.route.apply(lambda x: len(route_to_sector_unqiue(x).split("-")))
df['count_unique_sector'] = unique_sectors_df
df.tail()

Unnamed: 0,origin,destination,min_dist_origin,min_dist_dest,route_distances,total_distances,route,sector_with_count_waypoint,count_unique_sector
145746,OIR,HKD,0.471177,21.51387,857.91451,879.899557,RJ-ORE RJ-MAIKA RJ-ESASI RJ-PATRA RJ-HWE RJ-TA...,RJ26,1
145747,OIR,HKD,25.485857,21.601246,851.398901,898.486005,RJ-MAIKA RJ-ESASI RJ-PATRA RJ-HWE RJ-ARIKA RJ-...,RJ25,1
145748,OIR,HKD,0.471177,21.601246,876.415758,898.488181,RJ-ORE RJ-MAIKA RJ-ESASI RJ-PATRA RJ-HWE RJ-AR...,RJ26,1
145749,OIR,HKD,25.485857,21.601246,853.255785,900.342888,RJ-MAIKA RJ-ESASI RJ-PATRA RJ-HWE RJ-TAPPI RJ-...,RJ26,1
145750,OIR,HKD,0.471177,21.601246,878.272641,900.345064,RJ-ORE RJ-MAIKA RJ-ESASI RJ-PATRA RJ-HWE RJ-TA...,RJ27,1


In [24]:
od_df_pairs = df.groupby(['origin', 'destination'])

In [None]:
results = pd.DataFrame(columns=["origin","destination","min_dist_origin","min_dist_dest","route_distances","total_distances","route","sector_with_count_waypoint","count_unique_sector"])
for i, (od_pair, od_df) in enumerate(od_df_pairs):
    od_routes = filter_routes(od_df)
    od_routes = od_routes.sort_values(by=["total_distances", "count_unique_sector"]).iloc[:100]
    results = pd.concat([results, od_routes], ignore_index=True)


  results = pd.concat([results, od_routes], ignore_index=True)


In [27]:
results

Unnamed: 0,origin,destination,min_dist_origin,"min_dist_dest,route_distances",total_distances,route,sector_with_count_waypoint,count_unique_sector,min_dist_dest,route_distances
0,ABU,KOE,22.879036,,1182.326011,WA-POVOT WA-KEONG WA-ELBAM WA-BLI WA-GOMAT WA-...,WA7,1,1.263595,1158.183381
1,ABU,KOE,22.879036,,1194.436163,WA-POVOT WA-KEONG WA-ELBAM WA-BLI WA-LMB WA-SM...,WA8,1,1.263595,1170.293533
2,ABU,KOE,22.879036,,1323.300792,WA-POVOT WA-KEONG WA-ELBAM WA-BLI WA-ENTAS WA-...,WA9,1,1.263595,1299.158162
3,ABU,KOE,22.879036,,1619.977359,WA-POVOT WA-KEONG WA-KEVOK WA-GALKO WA-BLI WA-...,WA10,1,1.263595,1595.834728
4,ABU,KOE,22.879036,,1624.326060,WA-POVOT WA-KEONG WA-ELBAM WA-BLI WA-UDONO WA-...,WA11,1,1.263595,1600.183429
...,...,...,...,...,...,...,...,...,...,...
54906,ZVK,VTE,0.507756,,747.203538,VL-SAV VT-OKENA VT-GUROK VT-RAMEI VT-KRT VT-UB...,VL1-VT13-VL1,3,1.846419,744.849363
54907,ZVK,VTE,0.507756,,757.109092,VL-SAV VL-LASAP VL-PAK VT-UBL VT-PASAT VT-SANO...,VL3-VT13-VL1,3,1.846419,754.754918
54908,ZYI,WUH,11.266840,,426.994819,ZP-EKADI ZP-TAXOR ZP-EPGAL ZP-HUY ZG-BODUV ZG-...,ZP4-ZG2-ZH3,3,0.184300,415.543679
54909,ZYI,WUH,6.913668,,436.087056,ZP-MEMAG ZP-EKADI ZP-TAXOR ZP-EPGAL ZP-HUY ZG-...,ZP5-ZG2-ZH3,3,0.184300,428.989088


# Evaluate with trajectory metric