# Airport coordinate download
In this notebook we tried to build our dataset in order to have the coordinates for every airport.
The key concept is that we try to look for them on OpenStreetMap (https://www.openstreetmap.org/#map=5/41.951/-103.359) for the "ATL airport" and save the coordinates of the first instance that we can find.
If OSM research failes, we need to look for them manually and store them in a dictionary.

In [None]:
import folium
from folium import plugins
import pandas as pd
import plotly.express as px
import os
import numpy as np
import osmnx as ox

In [None]:
# Import DataFrame
df = pd.read_csv(os.getcwd() + '/airlinedelaycauses_DelayedFlights_Filtered.csv')

In [None]:
# We have looked for these one manually
not_found_airport = {
 'OMA': (41.3032, 95.8942),
 'MLU': (32.5102, 92.0377),
 'CPR': (42.9036, 106.4647),
 'TEX': (37.9538, 107.9087),
 'HDN': (40.4886, 107.2191),
 'IYK': (35.6586, 117.8281),
 'COD': (44.5207, -109.0245),
 'CDC': (37.7044, -113.0984),
 'PFN': (30.2121, -85.6828),
 'GTR': (33.4504, -88.5914),
 'EYW': (24.5561, -81.7587),
 'ROW': (33.3083, -104.5259),
 'SUX': (42.4026, -96.3846),
 'BRW': (71.2874, -156.7725),
 'SCC': (70.1948, -148.4652),
 'ADQ': (57.7498, -152.4937),
 'WRG': (56.4843, -132.3698),
 'OTZ': (66.8847, -162.5983),
 'RKS': (41.5942, -109.0652),
 'MKG': (43.1699, -86.2389),
 'AKN': (58.6768, -156.6492),
 'WYS': (44.6884, -111.1177),
 'INL': (48.5662, -93.4004)
}

In [None]:
# Define the function that will be called once for each airport
def get_airport_coordinates(initials):
    # Create a query to get airport data based on the initials
    query = f"{initials} airport"
    if initials in not_found_airport:
        res = not_found_airport[initials]
        print(f"s: {initials} - lat {res[0]} - long {res[1]}")
        return res

    # Get the graph of the surrounding area
    graph = ox.graph_from_place(query, network_type='all')
    
    # Find the nearest node to the airport's centroid -> i.e the closest thing OSM can find at the center of the screen after executing the query
    centroid = ox.distance.nearest_nodes(graph, 0, 0)

    # Extract the latitude and longitude of the centroid node
    latitude = graph.nodes[centroid]['y']
    longitude = graph.nodes[centroid]['x']
    print(f"s: {initials} - lat {latitude} - long {longitude}")

    return latitude, longitude

In [None]:
# Get a list of each airport mentioned in the dataset at least once
airports_initials = df['Origin'].unique().tolist()
print(len(airports_initials))

# Create an empty list, it will be converted in the final dataframe
airports_coord = []
i = 0
for initials in airports_initials: # For eache airport
    print(initials, i)
    i += 1
    # Get latitude and longitude 
    lat, lon = get_airport_coordinates(initials)
    
    # And add them to the list 
    airports_coord.append({'Airport': initials, 'Latitudine': lat, 'Longitudine': lon})

# Convert the list to a dataframe
coord_df = pd.DataFrame(airports_coord)
coord_df.head()

In [None]:
# Reset index for clarity
coord_df.set_index('Airport', inplace=True)
coord_df.head()

In [None]:
# Export it as a csv file
coord_df.to_csv('airport_coords.csv')