### Dataset

Loading airport and flight connection dataset from https://openflights.org/data.html

In [2]:
import pandas as pd

airport_columns=["Airport ID", "Name", "City", "Country", "IATA", "ICAO", "Latitude", "Longitude", "Altitude", "Timezone", "DST", "Tz database time zone", "Type", "Source"]

df_airports = pd.read_csv("data/airports.dat", 
                          names=airport_columns,
                          dtype={"Airport ID": "Int32"},
                          na_values=r"\N")

route_columns=["Airline", "Airline ID", "Source airport", "Source airport ID", "Destination airport", "Destination airport ID", "Codeshare", "Stops", "Equipment"]

df_routes = pd.read_csv("data/routes.dat", 
                          names=route_columns,
                          dtype={"Source airport ID": "Int32", "Destination airport ID": "Int32"},
                          na_values=r"\N")                     

In [3]:
# Airport dataset
df_airports.head(3)

Unnamed: 0,Airport ID,Name,City,Country,IATA,ICAO,Latitude,Longitude,Altitude,Timezone,DST,Tz database time zone,Type,Source
0,1,Goroka Airport,Goroka,Papua New Guinea,GKA,AYGA,-6.08169,145.391998,5282,10.0,U,Pacific/Port_Moresby,airport,OurAirports
1,2,Madang Airport,Madang,Papua New Guinea,MAG,AYMD,-5.20708,145.789001,20,10.0,U,Pacific/Port_Moresby,airport,OurAirports
2,3,Mount Hagen Kagamuga Airport,Mount Hagen,Papua New Guinea,HGU,AYMH,-5.82679,144.296005,5388,10.0,U,Pacific/Port_Moresby,airport,OurAirports


In [4]:
# Flight route database
df_routes.head(3)

Unnamed: 0,Airline,Airline ID,Source airport,Source airport ID,Destination airport,Destination airport ID,Codeshare,Stops,Equipment
0,2B,410.0,AER,2965,KZN,2990,,0,CR2
1,2B,410.0,ASF,2966,KZN,2990,,0,CR2
2,2B,410.0,ASF,2966,MRV,2962,,0,CR2


#### Construct Route Graph

We will constrct a simple route graph depending on
- geodistance as flight distance
- approximated "ground" distance for close airports

In [141]:
# we construct edge list in dataframe to represent route graph
import numpy as np

#
# calculate flight route (graph in edge list) ...
#

# join df_routes and df_airports dataframe
df_flight_route_graph = pd.merge(
         left=df_routes[['Source airport ID', 'Destination airport ID']],
         right=df_airports[['Airport ID', 'Latitude', 'Longitude']],
         how="inner",
         left_on='Source airport ID',
         right_on='Airport ID')
df_flight_route_graph = pd.merge(
         left=df_flight_route_graph,
         right=df_airports[['Airport ID', 'Latitude', 'Longitude']],
         how="inner",
         left_on='Destination airport ID',
         right_on='Airport ID',
         suffixes=['_src', '_dst'])
# calculate (haversine) flight distance
def geodistance(latlon1, latlon2):
    lat1, lon1 = latlon1
    lat2, lon2 = latlon2
    r=6371
    phi1, lambda1, phi2, lambda2 = [np.radians(c) for c in [lat1, lon1, lat2, lon2]]
    a = (np.square(np.sin((phi2-phi1)/2))+np.cos(phi1)*np.cos(phi2)*np.square(np.sin((lambda2-lambda1)/2)))
    return 2*r*np.arcsin(np.sqrt(a))
df_flight_route_graph['flight_distance'] = geodistance([df_flight_route_graph['Latitude_src'], df_flight_route_graph['Longitude_src']],
                                                       [df_flight_route_graph['Latitude_dst'], df_flight_route_graph['Longitude_dst']])
df_flight_route_graph = df_flight_route_graph.drop(columns=['Airport ID_src', 'Airport ID_dst',
                                                            'Latitude_src', 'Longitude_src',
                                                            'Latitude_dst', 'Longitude_dst',])

# remove zero distance
df_flight_route_graph = df_flight_route_graph[df_flight_route_graph['flight_distance'] > 0]

In [144]:
df_flight_route_graph

Unnamed: 0,Source airport ID,Destination airport ID,flight_distance
0,2965,2990,1506.825604
1,2966,2990,1040.438320
2,2968,2990,770.508500
3,4029,2990,715.649350
4,4029,2990,715.649350
...,...,...,...
66766,5543,5490,9.111213
66767,5522,5482,302.130756
66768,5543,5482,87.959670
66769,6276,9904,144.852937


In [143]:
from sklearn.metrics.pairwise import haversine_distances

#
# approximate "ground" route distance
#

approimate_factor = 1.5
upper_limit = 100 #km

# adjacency matrix of all airports
# we calculate at first haversine distance, then approimate it to a ground travel distance with some factor
r = 6371
df_ground_dist_adj_m = pd.DataFrame(r*haversine_distances(df_airports[['Latitude',	'Longitude']].apply(np.radians)),
                                    columns=df_airports['Airport ID'], 
                                    index=df_airports['Airport ID'])
df_ground_dist_adj_m = df_ground_dist_adj_m * approimate_factor

# keep only short enough distance and convert them to edge list 
np.fill_diagonal(df_ground_dist_adj_m.values, np.nan)
df_ground_dist_adj_m[df_ground_dist_adj_m > upper_limit] = np.NaN
df_ground_route_graph = df_ground_dist_adj_m.stack()
df_ground_route_graph = \
  df_ground_route_graph.to_frame() \
                       .reset_index(allow_duplicates=True)\
                       .set_axis(["Source airport ID", "Destination airport ID", "ground_distance"], axis=1)

In [145]:
df_ground_route_graph

Unnamed: 0,Source airport ID,Destination airport ID,ground_distance
0,1,5420,70.482225
1,3,5437,73.249101
2,5,11018,87.025191
3,5,11019,90.016735
4,5,11020,91.325569
...,...,...,...
17961,14105,8661,82.559362
17962,14105,11988,64.063491
17963,14107,2938,31.714277
17964,14109,4312,54.596735


In [146]:
#
# combine flight & rround routes
#
df_route_graph = pd.merge(
         left=df_flight_route_graph,
         right=df_ground_route_graph,
         how="outer",
         left_on=["Source airport ID", "Destination airport ID"],
         right_on=["Source airport ID", "Destination airport ID"])

In [147]:
df_route_graph

Unnamed: 0,Source airport ID,Destination airport ID,flight_distance,ground_distance
0,2965,2990,1506.825604,
1,2966,2990,1040.438320,
2,2968,2990,770.508500,
3,4029,2990,715.649350,
4,4029,2990,715.649350,
...,...,...,...,...
84401,14105,8661,,82.559362
84402,14105,11988,,64.063491
84403,14107,2938,,31.714277
84404,14109,4312,,54.596735


### Save dataframe

In [148]:
df_airports.to_pickle('data/df_airports.pkl')
df_routes.to_pickle('data/df_routes.pkl')
df_flight_route_graph.to_pickle('data/df_flight_route_graph.pkl')
df_ground_route_graph.to_pickle('data/df_ground_route_graph.pkl')
df_route_graph.to_pickle('data/df_route_graph.pkl')

### Demo

In [6]:
from flight_connection.connection import find_shortest_connection, find_airport, g_flight_routes, g_ground_routes

route = find_shortest_connection(departure_code='MUC', destination_code='BRC')
print(route)

airports = [find_airport(id=id)['Name'] for id in route] if route is not None else None
print(airports)


[346, 1218, 3988, 2513]
['Munich Airport', 'Barcelona International Airport', 'Ministro Pistarini International Airport', 'San Carlos De Bariloche Airport']
