## Importing Libraries

In [None]:
import numpy as np
import pandas as pd
import folium

from folium.plugins import HeatMap

from google.colab import drive

In [None]:
pip install meteostat

Collecting meteostat
  Downloading meteostat-1.6.8-py3-none-any.whl.metadata (4.6 kB)
Downloading meteostat-1.6.8-py3-none-any.whl (31 kB)
Installing collected packages: meteostat
Successfully installed meteostat-1.6.8


## Data Uploading

In [None]:
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
path = "/content/drive/MyDrive/Colab Notebooks/Distributed Data Analysis and Mining/Project/data"

data = pd.read_csv(path + "/df_final_with_weather.csv")

  data = pd.read_csv(path + "/df_final_with_weather.csv")


In [None]:
data.head()

Unnamed: 0,Flight_Date,Operating_Carrier,Flight_Number,Origin_Airport,Destination_Airport,Scheduled_Departure_Time,Actual_Departure_Time,Departure_Delay_Minutes,Taxi_Out_Time,Takeoff_Time,...,city_dest,state_dest,id,distance_km,latitude_orig,longitude_orig,tavg,wspd,wdir,pres
0,2018-01-02,AS,857,SAN,LIH,820,814.0,-6.0,25.0,839.0,...,Hanama'ulu,Hawaii,0,4300.254683,32.73336,-117.192245,15.8,7.2,337.0,1019.5
1,2018-01-04,AS,857,SAN,LIH,820,813.0,-7.0,13.0,826.0,...,Hanama'ulu,Hawaii,1,4300.254683,32.73336,-117.192245,16.6,5.8,0.0,1020.2
2,2018-01-06,AS,857,SAN,LIH,850,847.0,-3.0,30.0,917.0,...,Hanama'ulu,Hawaii,2,4300.254683,32.73336,-117.192245,16.6,6.8,344.0,1020.4
3,2018-01-09,AS,857,SAN,LIH,820,816.0,-4.0,21.0,837.0,...,Hanama'ulu,Hawaii,3,4300.254683,32.73336,-117.192245,17.1,23.4,221.0,0.0
4,2018-01-11,AS,857,SAN,LIH,820,811.0,-9.0,21.0,832.0,...,Hanama'ulu,Hawaii,4,4300.254683,32.73336,-117.192245,14.4,5.8,0.0,1022.3


## Define Routes

In [None]:
unique_routes = data[['latitude_orig', 'longitude_orig', 'latitude_dest', 'longitude_dest']].drop_duplicates()
route_counts = data.groupby(['latitude_orig', 'longitude_orig', 'latitude_dest', 'longitude_dest']).size().reset_index(name = 'count')
unique_airports = data[['Origin_Airport','latitude_orig','longitude_orig']].drop_duplicates()

top_routes = route_counts.nlargest(30, 'count')

In [None]:
unique_orig_counts = data.groupby(['Origin_Airport', 'latitude_orig', 'longitude_orig']).size().reset_index(name='count')

top_origin = unique_orig_counts.nlargest(30, 'count')

In [None]:
top_origin.head()

Unnamed: 0,Origin_Airport,latitude_orig,longitude_orig,count
21,ATL,33.6378,-84.42927,390046
247,ORD,41.97796,-87.90917,332953
92,DFW,32.89652,-97.046524,279298
91,DEN,39.86067,-104.68536,235989
71,CLT,35.210743,-80.94574,233317


In [None]:
unique_dest_counts = data.groupby(['Destination_Airport', 'latitude_dest', 'longitude_dest']).size().reset_index(name='count')

top_destinations = unique_dest_counts.nlargest(30, 'count')

top_destinations.head()

Unnamed: 0,Destination_Airport,latitude_dest,longitude_dest,count
21,ATL,33.6378,-84.42927,390079
247,ORD,41.97796,-87.90917,332942
92,DFW,32.89652,-97.046524,279272
91,DEN,39.86067,-104.68536,236020
71,CLT,35.210743,-80.94574,233309


## Main Routes

In [None]:
# Logarithmic scale for line thickness

min_count = top_routes['count'].min()
max_count = top_routes['count'].max()

In [None]:
m = folium.Map(location=[39.8283, -98.5795], zoom_start=4)

for _, row in top_routes.iterrows(): # Iterate over most frequent routes
    origin = (row['latitude_orig'], row['longitude_orig'])
    destination = (row['latitude_dest'], row['longitude_dest'])
    frequency = row['count']  # Obtain route frequency

    folium.PolyLine(
        locations=[origin, destination],
        color='blue',
        weight = 2.5 + 5 * (np.log(frequency) - np.log(min_count)) / (np.log(max_count) - np.log(min_count)),
        opacity=0.7
    ).add_to(m) # Plot a line according to the frequency

    folium.Marker(location=origin, popup="Origin", icon=folium.Icon(color='green')).add_to(m)
    folium.Marker(location=destination, popup="Destination", icon=folium.Icon(color='red')).add_to(m)

m.save("flights_top_routes_map.html")

In [None]:
m

## Airports Location

In [None]:
m = folium.Map(location=[39.8283, -98.5795], zoom_start=4)

for _, row in unique_airports.iterrows(): # Iterate over most frequent routes
    origin = (row['latitude_orig'], row['longitude_orig'])
    airport = row['Origin_Airport']

    folium.CircleMarker(
        location=origin,
        radius=3,  # Adjust the radius (size) of the origin point
        color = 'blue' ,  # Set the color of the origin point
        fill=True,  # Fill the circle with the color
        fill_color = 'blue',  # Fill the circle with the same color
        fill_opacity=0.6,  # Opacity of the filled color
        popup=airport
        ).add_to(m)

m.save("airports_unique_map.html")

In [None]:
m

## Heat Map

In [None]:
m = folium.Map(location=[39.8283, -98.5795], zoom_start=4)

airport_coords = unique_airports[['latitude_orig', 'longitude_orig']].values.tolist()

HeatMap(airport_coords).add_to(m) # Add airport points to HeatMap

m.save("airports_heatmap.html")

In [None]:
m

### Top Airports per Arrivals Volume

In [None]:
m = folium.Map(location=[39.8283, -98.5795], zoom_start=4)

for _, row in top_destinations.iterrows():
    origin = (row['latitude_dest'], row['longitude_dest'])
    airport = row['Destination_Airport']
    count = row['count']

    icon_size = min(count / 10, 10)  # Adjust icon size based on count, so size will be proportional to count

    folium.Marker(
        location=origin,
        popup=airport + ", " + str(count),
        icon=folium.Icon(color='green', icon_size=(icon_size, icon_size))
    ).add_to(m) # Iterate over most frequent routes

m.save("airports_unique_map.html")

In [None]:
m

### Routes from the most Popular Airport

In [None]:
atlanta_routes = data[data['Destination_Airport'] == "ATL"]
atlanta_routes_counts = atlanta_routes.groupby(['latitude_orig', 'longitude_orig', 'latitude_dest', 'longitude_dest']).size().reset_index(name='count')

In [None]:
m = folium.Map(location=[39.8283, -98.5795], zoom_start=4)

for _, row in atlanta_routes_counts.iterrows():
    origin = (row['latitude_orig'], row['longitude_orig'])
    destination = (row['latitude_dest'], row['longitude_dest'])
    frequency = row['count']  # Obtain route frequency

    folium.PolyLine(     # Plot a line according to the frequency
        locations=[origin, destination],
        color='gray',
        weight = 2.5 + 5 * (np.log(frequency) - np.log(min_count)) / (np.log(max_count) - np.log(min_count)),
        opacity=0.7
    ).add_to(m) # Iterate over most frequent routes

    origin_color = 'green'  # Color for origin point
    destination_color = 'red'  # Color for destination point
    origin_size = 6  # Radius of the origin point
    destination_size = 8  # Radius of the destination point

    folium.CircleMarker(
        location=origin,
        radius=origin_size,  # Adjust the radius (size) of the origin point
        color=origin_color,  # Set the color of the origin point
        fill=True,  # Fill the circle with the color
        fill_color=origin_color,  # Fill the circle with the same color
        fill_opacity=0.6,  # Opacity of the filled color
        popup="Origin"
        ).add_to(m) # Add points using CircleMarker for origin

    folium.CircleMarker(
        location=destination,
        radius=destination_size,  # Adjust the radius (size) of the destination point
        color=destination_color,  # Set the color of the destination point
        fill=True,  # Fill the circle with the color
        fill_color=destination_color,  # Fill the circle with the same color
        fill_opacity=0.6,  # Opacity of the filled color
        popup="Destination"
        ).add_to(m) # Add points using CircleMarker for destination

m.save("atlanta_route_map.html")

In [None]:
m