# Geospatial analysis

In [None]:
import pandas as pd
import numpy as np
import folium
import cartopy
import cartopy.crs as ccrs

In [None]:
import seaborn as sns
from matplotlib import pyplot as plt

In [None]:
flights_all = pd.read_csv('../data/clean_data/flights_data.csv', index_col=0)
ewr_flights = flights_all.loc[(flights_all['origin'] == 'EWR')].copy()

In [None]:
ewr_flights.isna().sum()

# Format ewr_flights for geospatial plotting 
Need to summarise the data per destination airport so that it can be plotted on a map.

88 different destination airports.

Summary columns 
- percentage delayed flights per destination airport sized by total number of flights?
- percentage delayed flights per destination airport coloured/sized by delay time?
- percentage delayed flights selecting top 5 delayed airlines on the route?


In [None]:
ewr_flights.head()

In [None]:
ewr_flights.columns

In [None]:
ewr_flights.groupby('flight')['dep_delay'].\
    agg('mean').sort_values(ascending = False).reset_index(name='mean_dep_delay')

In [None]:
ewr_geo = ewr_flights.loc[:, ['ori_airport', 'ori_lat', 'ori_lon', 
'ori_alt','dest_airport', 'dest_lat', 'dest_lon', 'carrier_name', 'dep_delay', 
'dep_delay_true', 'air_time', 'distance', 'hour', 'wind_speed', 'visib']].copy()

In [None]:
ewr_geo.groupby('dest_airport')['dep_delay'].\
    agg('mean').sort_values(ascending = False).reset_index(name='mean_dep_delay')

In [None]:
ewr_geo.groupby(['dest_airport', 'carrier_name'])['dep_delay'].\
    agg('mean').sort_values(ascending = False).reset_index(name='mean_dep_delay_per_carrier')

In [None]:
ewr_geo.groupby('dest_airport')['dep_delay'].\
    agg('mean').sort_values(ascending = False).reset_index(name='mean_dep_delay')

ewr_geo.loc[:, 'mean_dep_delay_per_dest'] = \
    ewr_geo.groupby('dest_airport')['dep_delay'].transform('mean')

In [None]:
def percent_delay(df, col):
    df.loc[:, 'dep_delay_per_'+ col] = df.groupby([col]).dep_delay_true.transform('sum')
    df.loc[:, 'total_flights_per_' + col] = df.groupby([col]).dep_delay.transform('count')
    df.loc[:, 'percent_delay_per_' + col] = (df['dep_delay_per_'+ col] / df['total_flights_per_' + col]) * 100

In [None]:
percent_delay(ewr_geo)

In [None]:
ewr_unique_dest = ewr_geo.drop_duplicates(['dest_airport', 'mean_dep_delay_per_dest'])

In [None]:
ewr_unique_dest = ewr_unique_dest.loc[:, ['ori_airport', 'ori_lat', 'ori_lon',
'dest_airport', 'dest_lat', 'dest_lon', 'mean_dep_delay_per_dest']].copy()

In [None]:
ewr_unique_dest.dropna(inplace = True)

In [None]:
ewr_unique_dest.head()

In [None]:
ewr_unique_dest.shape

# ready to plot ori airport to dest airport
Try to colour on mean_dep_delay_per_dest

In [None]:
# set figure print outs in notebook
plt.rcParams['figure.figsize'] = [5, 3]
plt.rcParams['figure.dpi'] = 120
plt.rcParams['font.size'] = 10

In [None]:
# background map
extent = [-150, -40, 5, 70]
ax = plt.axes(projection=ccrs.Mercator())
ax.coastlines()

In [None]:
def main():
    fig = plt.figure(figsize=(10, 5))
    ax = fig.add_subplot(1, 1, 1, projection=ccrs.Robinson())

    # make the map global rather than have it zoom in to
    # the extents of any plotted data
    ax.set_global()

    ax.stock_img()
    ax.coastlines()

    ax.plot(-0.08, 51.53, 'o', transform=ccrs.PlateCarree())

    plt.show()

In [None]:
if __name__ == '__main__':
    main()

In [None]:
import geopandas as gpd
import plotly.graph_objects as go

In [None]:
ewr_unique_dest.head()

fig = go.Figure()

fig.add_trace(go.Scattergeo(
                        lat = [40.692501, 28.429399],
                        lon = [-74.168701, -81.308998],
                        mode = 'lines',
                        line = dict(width=1, color='blue')
                        ))

fig.update_layout(title_text = 'Connection Map Depicting Flights from Newark',
                  height=700, width=900,
                  margin={"t":0,"b":0,"l":0, "r":0, "pad":0},
                  showlegend=False)

fig.show()

fig = go.Figure()

ori_to_dest = zip(ewr_unique_dest['ori_lat'], ewr_unique_dest['dest_lat'],
                     ewr_unique_dest['ori_lon'], ewr_unique_dest['dest_lon'],
                     ewr_unique_dest['mean_dep_delay_per_dest'])

## Loop through each flight entry
for ori_lat, dest_lat, ori_lon, dest_lon, mean_dep_delay in ori_to_dest:
    fig.add_trace(go.Scattergeo(
                        lat = [ori_lat, dest_lat],
                        lon = [ori_lon, dest_lon],
                        mode = 'lines'
                        ))

fig.update_layout(title_text = 'Connection Map Depicting Flights from Newark',
                  height=700, width=900,
                  margin={"t":0,"b":0,"l":0, "r":0, "pad":0},
                  showlegend=False)

fig.show()

In [None]:
ewr_unique_dest.shape

In [None]:
ewr_unique_dest.describe(include = 'all')

In [None]:
ewr_unique_dest['mean_dest_delay_color'] = ewr_unique_dest['mean_dep_delay_per_dest'] ** 2

In [None]:
ewr_unique_dest['mean_dest_delay_color'].round(0)

In [None]:
us_map = folium.Map(location=[40.6925, -74.1687], 
zoom_start = 7,
tiles="cartodbpositron")

In [None]:
ewr_unique_dest.isna().sum()

In [None]:

    test_map = folium.Map([22.5, 22.5], zoom_start=3)

    color_line = folium.ColorLine(
        [[0, 0], [0, 45], [45, 45], [45, 0], [0, 0]],
        [0, 1, 2, 3],
        colormap=['b', 'g', 'y', 'r'],
        nb_steps=4,
        weight=10,
        opacity=1).add_to(test_map)

In [None]:
test_map = folium.Map(location=[51.5, -0.115],
                      zoom_start=13,
                      tiles='cartodbpositron')

folium.ColorLine([(51.49, -0.10),
                  (51.50, -0.105),
                  (51.51, -0.103),
                  (51.52, -0.11)],
                 colors = [0,1,2],
                 colormap = ['green', 'red'],
                 weight = 10,
                 opacity = 0.25).add_to(test_map)

test_map

In [None]:
us_map = folium.Map(location=[40.6925, -74.1687], 
zoom_start = 7,
tiles="cartodbpositron")

#_ refers to variable which is not needed.
#iterrows returns index (stored in _) and row which we need

for _, row in ewr_unique_dest.iterrows():

    folium.CircleMarker([row['ori_lat'], row['ori_lon']],
                        radius=5,
                        fill_color="#3db7e4", # divvy color
                       ).add_to(us_map)

    folium.CircleMarker([row['dest_lat'], row['dest_lon']],
                        radius=5,
                        fill_color="red", # divvy color
                       ).add_to(us_map)

    folium.PolyLine([[row['ori_lat'], row['ori_lon']], 
                     [row['dest_lat'], row['dest_lon']]],
                     color = 'red').add_to(us_map)

>>> dict_speed = {}
>>> cities_all = df.index.to_list()
>>> 
>>> for origin in df.index:
...     dict_speed[origin] = {
...         desti: np.random.uniform(3, 30) for desti in cities_all if origin != desti
...     }
...     cities_all.remove(origin)
... 
>>> dict_speed
{'Amsterdam': {'Utrecht': 11.17300608896625, 'Haarlem': 28.078139459927645},
 'Utrecht': {'Haarlem': 17.07939077211872},
 'Haarlem': {}}

In [None]:
us_map

In [None]:
ewr_unique_dest.mean_dep_delay_per_dest.max()

In [None]:
ewr_unique_dest.mean_dep_delay_per_dest.min()

In [None]:
import plotly.graph_objects as go

fig = go.Figure(go.Scattermapbox(
    mode = "markers+lines",
    lon = [10, 20, 30],
    lat = [10, 20,30],
    marker = {'size': 10}))

fig.update_layout(mapbox_style='carto-positron')
fig.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0})
fig.update_layout(title_text="percent_delay_all_months")

fig.show()

In [None]:
import plotly.express as px

test_fig = px.line_mapbox(ewr_unique_dest,
lat='ori_lat',
lon='ori_lon',
color='mean_dep_delay_per_dest',
zoom=9,
height=600,
width=1300)

test_fig.update_layout(mapbox_style='carto-positron')
test_fig.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0})
test_fig.update_layout(title_text="percent_delay_all_months")
test_fig.show()

In [None]:
#https://plotly.com/python/lines-on-maps/
import plotly.graph_objects as go
import pandas as pd

df_airports = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2011_february_us_airport_traffic.csv')
df_airports.head()

df_flight_paths = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2011_february_aa_flight_paths.csv')
df_flight_paths.head()

fig = go.Figure()

fig.add_trace(go.Scattergeo(
    locationmode = 'USA-states',
    lon = df_airports['long'],
    lat = df_airports['lat'],
    hoverinfo = 'text',
    text = df_airports['airport'],
    mode = 'markers',
    marker = dict(
        size = 2,
        color = 'rgb(255, 0, 0)',
        line = dict(
            width = 3,
            color = 'rgba(68, 68, 68, 0)'
        )
    )))

flight_paths = []
for i in range(len(df_flight_paths)):
    fig.add_trace(
        go.Scattergeo(
            locationmode = 'USA-states',
            lon = [df_flight_paths['start_lon'][i], df_flight_paths['end_lon'][i]],
            lat = [df_flight_paths['start_lat'][i], df_flight_paths['end_lat'][i]],
            mode = 'lines',
            line = dict(width = 1,color = 'red'),
            opacity = float(df_flight_paths['cnt'][i]) / float(df_flight_paths['cnt'].max()),
        )
    )

fig.update_layout(
    title_text = 'Feb. 2011 American Airline flight paths<br>(Hover for airport names)',
    showlegend = False,
    geo = dict(
        scope = 'north america',
        projection_type = 'azimuthal equal area',
        showland = True,
        landcolor = 'rgb(243, 243, 243)',
        countrycolor = 'rgb(204, 204, 204)',
    ),
)

fig.show()

In [None]:
test_fig_2 = go.Figure()

ewr_unique_dest['ori_lon'] = ewr_unique_dest['ori_lon'].astype(float)
ewr_unique_dest['dest_lon'] = ewr_unique_dest['dest_lon'].astype(float)
ewr_unique_dest['ori_lat'] = ewr_unique_dest['ori_lat'].astype(float)
ewr_unique_dest['dest_lat'] = ewr_unique_dest['dest_lat'].astype(float)
# ewr_unique_dest['mean_dest_delay_color'] = ewr_unique_dest['mean_dest_delay_color'].astype(str)


for i, row in ewr_unique_dest.iterrows():
    test_fig_2.add_trace(
        go.Scattermapbox(
            mode="lines+markers",
            lon = [row['ori_lon'], row['dest_lon']],
            lat = [row['ori_lat'], row['dest_lat']]
            #width = [row['mean_dep_delay_per_dest']]
            )
        )

test_fig_2.update_layout(mapbox_style='carto-positron')
test_fig_2.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0})
test_fig_2.update_layout(title_text="percent_delay_all_months")
test_fig_2.show()

In [None]:
ewr_unique_dest['ori_dest_lat'] = ewr_unique_dest['ori_lat'].round(4).astype(str) + ', ' + ewr_unique_dest['dest_lat'].round(4).astype(str)
ewr_unique_dest['ori_dest_lon'] = ewr_unique_dest['ori_lon'].round(4).astype(str) + ', ' + ewr_unique_dest['dest_lon'].round(4).astype(str)

In [None]:
ewr_unique_dest['ori_dest_lat'] = ewr_unique_dest['ori_lat'].round(4).astype(str) + ', ' + ewr_unique_dest['dest_lat'].round(4).astype(str)
ewr_unique_dest['ori_dest_lon'] = ewr_unique_dest['ori_lon'].round(4).astype(str) + ', ' + ewr_unique_dest['dest_lon'].round(4).astype(str)

In [None]:
ewr_unique_dest.head()

In [None]:
test_dataset = pd.DataFrame({
    'lat':[l],
    'lon':[],
    'airport':['Newark Liberty International Airport', 'Newark Liberty International Airport', ]
})

In [None]:
test_fig = 

for _, row in ewr_unique_dest.iterrows():

    folium.CircleMarker([row['ori_lat'], row['ori_lon']],
                        radius=5,
                        fill_color="#3db7e4", # divvy color
                       ).add_to(us_map)
px.line_mapbox(ewr_unique_dest,
lat='ori_dest_lat',
lon='ori_dest_lon',
color='mean_dep_delay_per_dest',
zoom=9,
height=600,
width=1300)

test_fig.update_layout(mapbox_style='carto-positron')
test_fig.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0})
test_fig.update_layout(title_text="percent_delay_all_months")
test_fig.show()

In [None]:
px.line_mapbox(ewr_unique_dest,
lat='ori_dest_lat',
lon='ori_dest_lon',
color='mean_dep_delay_per_dest',
zoom=9,
height=600,
width=1300)

test_fig.update_layout(mapbox_style='carto-positron')
test_fig.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0})
test_fig.update_layout(title_text="percent_delay_all_months")
test_fig.show()