In [2]:
import altair as alt
import pandas as pd
import numpy as np
import geopandas as gpd
from vega_datasets import data

Load airports dataset and drop redundant columns

In [3]:
airports_url = 'csv/airports_continents.csv'
airports_df = pd.read_csv(airports_url)
airports_df.drop(columns=["Type","Source"],inplace=True)
airports_df

Unnamed: 0,Airport ID,Name,City,Country,IATA,ICAO,Latitude,Longitude,Altitude,TimeZone,DST,Tz Database Time Zone,Continent
3285,3484,Los Angeles International Airport,Los Angeles,United States,LAX,KLAX,33.942501,-118.407997,125,-8,A,America/Los_Angeles,North America


In [4]:
routes_df = pd.read_csv("csv/routes.csv")
routes_df

Unnamed: 0,Airline,Airline ID,Source airport,Source airport ID,Destination airport,Destination airport ID,Codeshare,Stops,Equipment
0,2B,410,AER,2965,KZN,2990,,0,CR2
1,2B,410,ASF,2966,KZN,2990,,0,CR2
2,2B,410,ASF,2966,MRV,2962,,0,CR2
3,2B,410,CEK,2968,KZN,2990,,0,CR2
4,2B,410,CEK,2968,OVB,4078,,0,CR2
...,...,...,...,...,...,...,...,...,...
67658,ZL,4178,WYA,6334,ADL,3341,,0,SF3
67659,ZM,19016,DME,4029,FRU,2912,,0,734
67660,ZM,19016,FRU,2912,DME,4029,,0,734
67661,ZM,19016,FRU,2912,OSS,2913,,0,734


In [49]:
merged_data = pd.merge(routes_df, airports_df, how='inner', left_on='Source airport', right_on='IATA')
merged_data = merged_data.rename(columns={'Latitude': 'source_latitude', 'Longitude': 'source_longitude'})
merged_data = pd.merge(merged_data, airports_df, how='inner', left_on='Destination airport', right_on='IATA')
merged_data = merged_data.rename(columns={'Latitude': 'destination_latitude', 'Longitude': 'destination_longitude'})

international_routes = merged_data.where(merged_data["Continent_x"] != merged_data["Continent_y"])
international_routes.dropna(inplace=True)
international_routes_url = "csv/international_routes.csv"
international_routes.to_csv(international_routes_url)

Unnamed: 0,Airline,Airline ID,Source airport,Source airport ID,Destination airport,Destination airport ID,Codeshare,Stops,Equipment,Airport ID_x,...,Country_y,IATA_y,ICAO_y,destination_latitude,destination_longitude,Altitude_y,TimeZone_y,DST_y,Tz Database Time Zone_y,Continent_y
225,BT,333,RIX,3953,TAS,2983,Y,0.0,763,3953.0,...,Uzbekistan,TAS,UTTT,41.257900,69.281197,1417.0,5,U,Asia/Samarkand,Asia
281,OS,491,VIE,1613,EVN,3964,Y,0.0,320 321,1613.0,...,Armenia,EVN,UDYZ,40.147301,44.395901,2838.0,4,E,Asia/Yerevan,Asia
282,TD,1048,VIE,1613,EVN,3964,Y,0.0,321,1613.0,...,Armenia,EVN,UDYZ,40.147301,44.395901,2838.0,4,E,Asia/Yerevan,Asia
283,UA,5209,VIE,1613,EVN,3964,Y,0.0,320 321,1613.0,...,Armenia,EVN,UDYZ,40.147301,44.395901,2838.0,4,E,Asia/Yerevan,Asia
303,Z6,16120,DNK,2941,EVN,3964,Y,0.0,ER4,2941.0,...,Armenia,EVN,UDYZ,40.147301,44.395901,2838.0,4,E,Asia/Yerevan,Asia
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
62308,SA,4305,DXB,2188,DUR,799,Y,0.0,77W,2188.0,...,South Africa,DUR,FALE,-29.614444,31.119722,295.0,2,U,Africa/Johannesburg,Africa
63563,TX,567,ORY,1386,RUN,916,Y,0.0,747,1386.0,...,Reunion,RUN,FMEE,-20.887100,55.510300,66.0,4,U,Indian/Reunion,Africa
63722,TP,4869,LIS,1638,TMS,973,Y,0.0,313,1638.0,...,Sao Tome and Principe,TMS,FPST,0.378175,6.712150,33.0,0,N,Africa/Sao_Tome,Africa
63723,UA,5209,LIS,1638,TMS,973,Y,0.0,313,1638.0,...,Sao Tome and Principe,TMS,FPST,0.378175,6.712150,33.0,0,N,Africa/Sao_Tome,Africa


Load continents dataset

In [None]:
continents_df = pd.read_csv('csv/continents.csv')
continents_df = continents_df['Continent'].unique()
continents_df = pd.DataFrame(continents_df,columns=['Continent'])
continents_df

Unnamed: 0,Continent
0,Asia
1,Europe
2,Africa
3,Oceania
4,North America
5,Antarctica
6,South America


derive additional "id" and "airports" columns

In [None]:
# create id column
continents_df.insert(0, 'id', continents_df.index+1)
airports_count = []
for continent in continents_df['Continent']:
    airports = len(airports_df.loc[airports_df['Continent'] == continent])
    airports_count.append(airports)

continents_df['airports'] = airports_count
continents_df

Unnamed: 0,id,Continent,airports
0,1,Asia,1518
1,2,Europe,1764
2,3,Africa,745
3,4,Oceania,621
4,5,North America,2333
5,6,Antarctica,8
6,7,South America,709


Load continent geography dataset

In [None]:
continents_url = './world-continents.json'
continents_topo = gpd.read_file(continents_url)
continents_topo

Unnamed: 0,id,continent,geometry
0,,South America,"MULTIPOLYGON (((-68.641 -54.801, -69.067 -54.9..."
1,,Oceania,"MULTIPOLYGON (((144.765 -40.725, 145.713 -40.9..."
2,,North America,"MULTIPOLYGON (((-73.032 21.158, -73.127 20.992..."
3,,Europe,"MULTIPOLYGON (((20.003 39.693, 19.853 40.048, ..."
4,,Asia,"MULTIPOLYGON (((56.280 25.629, 56.359 25.057, ..."
5,,Africa,"MULTIPOLYGON (((11.765 -17.254, 11.820 -16.480..."
6,,Oceania,"MULTIPOLYGON (((144.765 -40.725, 145.713 -40.9..."
7,,North America,"MULTIPOLYGON (((-73.032 21.158, -73.127 20.992..."
8,,Europe,"MULTIPOLYGON (((20.003 39.693, 19.853 40.048, ..."
9,,Asia,"MULTIPOLYGON (((56.280 25.629, 56.359 25.057, ..."


Derive id column from continents dataset to enable lookup transform

In [None]:
continent_ids = []
for continent in continents_topo['continent']:
    for continent_x in continents_df['Continent']:
        if continent == continent_x:
            index = continents_df.loc[continents_df['Continent']
                                      == continent]['id'].to_list()
            continent_ids.append(index[0])

continents_topo['id'] = continent_ids
continents_topo


Unnamed: 0,id,continent,geometry
0,7,South America,"MULTIPOLYGON (((-68.641 -54.801, -69.067 -54.9..."
1,4,Oceania,"MULTIPOLYGON (((144.765 -40.725, 145.713 -40.9..."
2,5,North America,"MULTIPOLYGON (((-73.032 21.158, -73.127 20.992..."
3,2,Europe,"MULTIPOLYGON (((20.003 39.693, 19.853 40.048, ..."
4,1,Asia,"MULTIPOLYGON (((56.280 25.629, 56.359 25.057, ..."
5,3,Africa,"MULTIPOLYGON (((11.765 -17.254, 11.820 -16.480..."
6,4,Oceania,"MULTIPOLYGON (((144.765 -40.725, 145.713 -40.9..."
7,5,North America,"MULTIPOLYGON (((-73.032 21.158, -73.127 20.992..."
8,2,Europe,"MULTIPOLYGON (((20.003 39.693, 19.853 40.048, ..."
9,1,Asia,"MULTIPOLYGON (((56.280 25.629, 56.359 25.057, ..."


Plot visualizations

In [None]:
# interactions
# Create mouseover selection
click_city = alt.selection_single(
    on="click", nearest=True, fields=["Source airport"], empty="none"
)
ex= alt.selection_single()
click_continent = alt.selection_single(
    on="click", nearest=True, fields=["Continent"], empty="none"
)
# dropdown interaction
input_dropdown = alt.binding_select(options=continents_df['Continent'].to_list(), name='Select a Continent:')
select_continent= alt.selection_single(fields=["Continent"], bind=input_dropdown)

# Define which attributes to lookup from airports.csv
lookup_data = alt.LookupData(
    airports_url, key="IATA", fields=["Continent", "Latitude", "Longitude","City"]
)

# .transform_lookup(
#     lookup="Source airport",
#     from_=lookup_data
# ).transform_lookup(
#     lookup="Destination airport",
#     from_=lookup_data,
#     as_=["Destination Continent", "lat2", "lon2","City1"]

# )
connections = alt.Chart(international_routes_url).mark_rule(opacity=0.35).encode(
    latitude="Latitude:Q",
    longitude="Longitude:Q",
    latitude2="lat2:Q",
    longitude2="lon2:Q",
    opacity=alt.condition(select_continent, alt.value(80),alt.value(0))
)
.transform_filter(
     (alt.datum.Continent != None) & (alt.datum["Destination Continent"] != alt.datum["Continent"]) 
).transform_filter(
    click_city
)

# .transform_aggregate(
#     routes="max(Total Routes)",
#     groupby=["Source airport","Destination airport"]
# )
points = alt.Chart(routes_url).mark_circle().transform_calculate(
    routes = " "
    
).transform_lookup(
    lookup="Source airport",
    from_=lookup_data,
).transform_lookup(
    lookup="Destination airport",
    from_=lookup_data,
    as_=["Destination Continent","Latitude2", "Longitude2","City2"]
).transform_filter(
    (alt.datum["Destination Continent"] != alt.datum["Continent"]) & (alt.datum.Continent != None)
).add_selection(
    click_city
).encode(
    latitude="Latitude:Q",
    longitude="Longitude:Q",
    size=alt.Size("routes:Q", scale=alt.Scale(range=[0,120]),legend=None),
    order=alt.Order("routes:Q", sort="descending"),
    opacity=alt.condition(select_continent, alt.value(80),alt.value(0)),
)
# Data generators for the background
sphere = alt.sphere()
graticule = alt.graticule()
source = alt.topo_feature(data.world_110m.url, 'countries')

map = alt.layer(alt.Chart(sphere).mark_geoshape(fill='lightblue'),
    alt.Chart(source).mark_geoshape(fill="white",
    stroke='black'), 
    alt.Chart(graticule).mark_geoshape(stroke='white', strokeWidth=0.5)).encode(color="Continent:N")


continents_chart = alt.layer(map,connections,points
                        ).encode(alt.Tooltip(['City:N',"Continent:N","routes:Q"]),color="Continent:N"
).add_selection(select_continent).project('equirectangular').properties(
                      width=1000, height=500, title="Total Airports by Continent")


continents_bar = alt.Chart(continents_df).mark_bar().encode(
        alt.Y("Continent:N"),alt.X("airports:Q"),

     color=alt.condition(ex, 'airports:Q', alt.value('lightgray')),
).add_selection(ex)

#make a bar that shows international routes for each continent
# domestic_airports = alt.Chart(routes_url).mark_bar().encode(x= "Continent:N", y= "")


# airports_bar = alt.Chart(airports_url).mark_bar().encode(
#     alt.X("Continent:N"),alt.Y("count():Q"),column="Continent:N"
# ).transform_lookup(
#     lookup="Continent", from_=alt.LookupData(data=continents_df,key="continent",fields=["airports","continent"]) ).add_selection(
#     select_continent
# ).transform_filter(select_continent)

vis = (continents_chart & continents_bar ).configure_title(
    fontSize=20,
    anchor='middle',
).configure(background='#f0fff1')


vis 


# map
# add bar chart for all airports per continent
# line chart for busiest airports per continent
# network showing routes between continents
# routes with stops should be color encoded with red and one way routes should be green

# big map should show only international routes
# select interval on map should now show domestic routes


#interactions
#bind interval selection to scale function to zoom https://altair-viz.github.io/user_guide/interactions.html#input-element-binding
#rotate using orthographic projection (not yet available)
#click on continent to shows bar graph of countries and their number of airports
#hover over country continent to show routes ---- done!
# low level
# create dropdown selection for each continent
# show countries in the continent and the active airlines that operate there (color code active variable)
# compare total flights between 

# clicking on continent should show airports

In [None]:
countries_bar = alt.Chart(continents_df).mark_bar().encode(
    alt.Y("count():Q"),alt.X("Country:N")
).transform_lookup(
            lookup="continent", from_=alt.LookupData(airports_url, 'Continent', ['Country']))


In [None]:
# world_map = (
#     alt.Chart(data.world_110m.url)
#     .mark_geoshape(fill="lightgray", stroke="white")
#     .project("equirectangular")
#     .properties(width=1200, height=700)
# )
# airports = (
#     alt.Chart(airport_url)
#     .mark_circle()
#     .encode(longitude="Longitude:Q", latitude="Latitude:Q", tooltip=["Continent:N"],color='DST:N')
# )
# # zoom viz based on user country input
# # show airports in the country (https://altair-viz.github.io/user_guide/interactions.html?highlight=interactive#binding-adding-data-driven-inputs)
# # use scale interaction & select input
# # use selection the change scale based on selection
# # make selection interactive on all views at the same time

# # add network link on continents

# # cartogram heatmap busiest airports (airports with plenty traffic)

# # cartogram, heatmap, network link.
# # airport network around all countries

# world_map + airports
