In [22]:
import altair as alt
import pandas as pd
from vega_datasets import data

Load airports dataset

In [23]:
airport_url = 'csv/airports.csv'
airports_df = pd.read_csv(airport_url, encoding='latin-1')
airports_df.head(5)

Unnamed: 0,Airport ID,Name,City,Country,IATA,ICAO,Latitude,Longitude,Altitude,TimeZone,DST,Tz Database Time Zone,Type,Source
0,1,Goroka Airport,Goroka,Papua New Guinea,GKA,AYGA,-6.08169,145.391998,5282,10,U,Pacific/Port_Moresby,airport,OurAirports
1,2,Madang Airport,Madang,Papua New Guinea,MAG,AYMD,-5.20708,145.789002,20,10,U,Pacific/Port_Moresby,airport,OurAirports
2,3,Mount Hagen Kagamuga Airport,Mount Hagen,Papua New Guinea,HGU,AYMH,-5.82679,144.296005,5388,10,U,Pacific/Port_Moresby,airport,OurAirports
3,4,Nadzab Airport,Nadzab,Papua New Guinea,LAE,AYNZ,-6.569803,146.725977,239,10,U,Pacific/Port_Moresby,airport,OurAirports
4,5,Port Moresby Jacksons International Airport,Port Moresby,Papua New Guinea,POM,AYPY,-9.44338,147.220001,146,10,U,Pacific/Port_Moresby,airport,OurAirports


Derive country continents from new dataset

In [24]:
continents = pd.read_csv('csv/continents.csv')
continents

Unnamed: 0,Entity,Code,Year,Continent
0,Abkhazia,OWID_ABK,2015,Asia
1,Afghanistan,AFG,2015,Asia
2,Akrotiri and Dhekelia,OWID_AKD,2015,Asia
3,Albania,ALB,2015,Europe
4,Algeria,DZA,2015,Africa
...,...,...,...,...
280,Yugoslavia,OWID_YGS,2015,Europe
281,Zambia,ZMB,2015,Africa
282,Zanzibar,OWID_ZAN,2015,Africa
283,Zimbabwe,ZWE,2015,Africa


In [25]:
# drop code and year attributes
continents.drop(columns=['Code', 'Year'],inplace=True)
continents

Unnamed: 0,Entity,Continent
0,Abkhazia,Asia
1,Afghanistan,Asia
2,Akrotiri and Dhekelia,Asia
3,Albania,Europe
4,Algeria,Africa
...,...,...
280,Yugoslavia,Europe
281,Zambia,Africa
282,Zanzibar,Africa
283,Zimbabwe,Africa


Certain country names do not match in the datasets so they must be normalized. To do this we must create a copy of the original dataframe and replace the unmatching countries with their continents. we end up with a mix of countries and continents in one column

In [26]:

# # for each row in airports, check if country is equal to entity (continents), append continent to it


temp_df = airports_df.copy()
for country in temp_df['Country'].to_list():
    for row in continents.values:
        if country.lower() in row[0].lower():
            temp_df['Country'] = temp_df['Country'].replace(
                country, row[1])
            break
mixed_continents_and_countries = temp_df['Country'].unique()
mixed_continents_and_countries

array(['Oceania', 'North America', 'Europe', 'Africa', 'South America',
       'Faroe Islands', 'Congo (Brazzaville)', 'Congo (Kinshasa)',
       'Swaziland', 'Asia', 'Czech Republic', 'Antarctica', 'West Bank',
       'Midway Islands', 'Macau', 'Burma', 'East Timor', 'Johnston Atoll',
       'Cocos (Keeling) Islands', 'Wake Island'], dtype=object)

Next we must extract these countries into their own list

In [27]:
unmatched_countries = []
for country in mixed_continents_and_countries:
    if country.lower() not in [x.lower() for x in continents['Continent'].to_list()]:
        unmatched_countries.append(country)
            
unmatched_countries

['Faroe Islands',
 'Congo (Brazzaville)',
 'Congo (Kinshasa)',
 'Swaziland',
 'Czech Republic',
 'West Bank',
 'Midway Islands',
 'Macau',
 'Burma',
 'East Timor',
 'Johnston Atoll',
 'Cocos (Keeling) Islands',
 'Wake Island']

Finally, we create a copy of the original dataframe and replace the unmatched country names

In [28]:
new_country_names = airports_df.copy()

for country in airports_df['Country'].to_list():
    for x in unmatched_countries:
        if x.lower() == country.lower():
            new_country_names['Country'] = new_country_names['Country'].replace(x)

Now we can finally add continent information to the dataset

In [29]:
continents_list = []
for country in new_country_names['Country'].to_list():
    for row in continents.values:
        if country.lower() in row[0].lower(): # 0 index for row is the country name
            continents_list.append(row[1]) # 1 index is continent
            break
new_country_names['Continent'] = continents_list
airports_df = new_country_names
# export to new csv
airports_df.to_csv('csv/airports_continents.csv',index=False)

 Load updated airports data
 

In [30]:
airports_url = 'csv/airports_continents.csv'
airports_df = pd.read_csv(airports_url)
airports_df.drop(columns=["Type","Source"],inplace=True)
airports_df

Unnamed: 0,Airport ID,Name,City,Country,IATA,ICAO,Latitude,Longitude,Altitude,TimeZone,DST,Tz Database Time Zone,Continent
0,1,Goroka Airport,Goroka,Papua New Guinea,GKA,AYGA,-6.081690,145.391998,5282,10,U,Pacific/Port_Moresby,Oceania
1,2,Madang Airport,Madang,Papua New Guinea,MAG,AYMD,-5.207080,145.789002,20,10,U,Pacific/Port_Moresby,Oceania
2,3,Mount Hagen Kagamuga Airport,Mount Hagen,Papua New Guinea,HGU,AYMH,-5.826790,144.296005,5388,10,U,Pacific/Port_Moresby,Oceania
3,4,Nadzab Airport,Nadzab,Papua New Guinea,LAE,AYNZ,-6.569803,146.725977,239,10,U,Pacific/Port_Moresby,Oceania
4,5,Port Moresby Jacksons International Airport,Port Moresby,Papua New Guinea,POM,AYPY,-9.443380,147.220001,146,10,U,Pacific/Port_Moresby,Oceania
...,...,...,...,...,...,...,...,...,...,...,...,...,...
7693,14106,Rogachyovo Air Base,Belaya,Russia,\N,ULDA,71.616699,52.478298,272,\N,\N,\N,Europe
7694,14107,Ulan-Ude East Airport,Ulan Ude,Russia,\N,XIUW,51.849998,107.737999,1670,\N,\N,\N,Europe
7695,14108,Krechevitsy Air Base,Novgorod,Russia,\N,ULLK,58.625000,31.385000,85,\N,\N,\N,Europe
7696,14109,Desierto de Atacama Airport,Copiapo,Chile,CPO,SCAT,-27.261200,-70.779198,670,\N,\N,\N,South America


Load routes dataset

In [31]:
routes_df = pd.read_csv("csv/routes.csv")
routes_df   

Unnamed: 0,Airline,Airline ID,Source airport,Source airport ID,Destination airport,Destination airport ID,Codeshare,Stops,Equipment
0,2B,410,AER,2965,KZN,2990,,0,CR2
1,2B,410,ASF,2966,KZN,2990,,0,CR2
2,2B,410,ASF,2966,MRV,2962,,0,CR2
3,2B,410,CEK,2968,KZN,2990,,0,CR2
4,2B,410,CEK,2968,OVB,4078,,0,CR2
...,...,...,...,...,...,...,...,...,...
67658,ZL,4178,WYA,6334,ADL,3341,,0,SF3
67659,ZM,19016,DME,4029,FRU,2912,,0,734
67660,ZM,19016,FRU,2912,DME,4029,,0,734
67661,ZM,19016,FRU,2912,OSS,2913,,0,734


Merge airports and routes dataset to derive two new datasets that contain international and domestic route information respectively.

In [32]:
merged_data = pd.merge(routes_df, airports_df, how='inner', left_on='Source airport', right_on='IATA')
merged_data = merged_data.rename(columns={'Latitude': 'source_latitude', 'Longitude': 'source_longitude'})
merged_data = pd.merge(merged_data, airports_df, how='inner', left_on='Destination airport', right_on='IATA')
merged_data = merged_data.rename(columns={'Latitude': 'destination_latitude', 'Longitude': 'destination_longitude'})
merged_data = merged_data.rename(columns={
    'Continent_x': 'Continent',
    'Continent_y': 'Destination Continent',
    'City_y': 'Destination City',
    'City_x': 'City',
    'Country_y': 'Destination Country',
    'Country_x': 'Country',
    'Altitude_y': 'Destination Altitude',
    'Altitude_x': 'Altitude',
    })
 
#  define international routes df and path to save to
international_routes = merged_data.where(merged_data["Continent"] != merged_data["Destination Continent"])
international_routes_url = "csv/international_routes.csv"

# define domestic route df and path to save to
domestic_routes = merged_data.where(merged_data["Continent"] == merged_data["Destination Continent"])
domestic_routes_url = "csv/domestic_routes.csv"

def derive_routes(df,path):
    df.dropna(inplace=True)
    df["Routes"] = df.groupby('Source airport')['Source airport'].transform("size");
    df.to_csv(path)
    
derive_routes(international_routes,international_routes_url)
derive_routes(domestic_routes,domestic_routes_url)

Load continents dataset

In [33]:
continents_df = pd.read_csv('csv/continents.csv')
continents_df = continents_df['Continent'].unique()
continents_df = pd.DataFrame(continents_df,columns=['Continent'])
continents_df

Unnamed: 0,Continent
0,Asia
1,Europe
2,Africa
3,Oceania
4,North America
5,Antarctica
6,South America


derive additional "airports" column

In [34]:

airports_count = []
for continent in continents_df['Continent']:
    airports = len(airports_df.loc[airports_df['Continent'] == continent])
    airports_count.append(airports)

continents_df['airports'] = airports_count
continents_df

Unnamed: 0,Continent,airports
0,Asia,1518
1,Europe,1764
2,Africa,745
3,Oceania,621
4,North America,2333
5,Antarctica,8
6,South America,709


Plot visualizations

In [53]:
# click interactions
click_city = alt.selection_single(
    on="click", nearest=True, fields=["Source airport"], empty="none"
)
click_continent= alt.selection_single(fields=["Continent"])

# dropdown interaction
input_dropdown = alt.binding_select(options=continents_df['Continent'].to_list(), name='Select a Continent:')
select_continent= alt.selection_single(fields=["Continent"], bind=input_dropdown)

# line connections between airports
connections = alt.Chart(international_routes).mark_rule(opacity=0.35).encode(
    latitude="source_latitude:Q",
    longitude="source_longitude:Q",
    latitude2="destination_latitude:Q",
    longitude2="destination_longitude:Q",
    opacity=alt.condition(select_continent, alt.value(80),alt.value(0))
).transform_filter(
    click_city
)
# airport plot
points = alt.Chart(international_routes).mark_circle().encode(
    alt.Tooltip(['City:N',"Continent:N","Routes:Q",]),
    latitude="source_latitude:Q",
    longitude="source_longitude:Q",
    size=alt.Size("Routes:Q", scale=alt.Scale(range=[0,200]),legend=None),
    order=alt.Order("Routes:Q", sort="descending"),
    opacity=alt.condition(select_continent, alt.value(80),alt.value(0)),
).add_selection(
    click_city
)
# Data generators for the map background
sphere = alt.sphere()
graticule = alt.graticule()
source = alt.topo_feature(data.world_110m.url, 'countries')

map = alt.layer(alt.Chart(sphere).mark_geoshape(fill='lightblue'),
    alt.Chart(source).mark_geoshape(fill="white",
    stroke='black'), 
    alt.Chart(graticule).mark_geoshape(stroke='white', strokeWidth=0.5)).encode(color="Continent:N")

# layer points and connections on the map
continents_chart = alt.layer(map,connections,points
                        ).encode(color="Continent:N"
).add_selection(select_continent).project('equirectangular').properties(
                      width=1000, height=500, title="International route connections between airports around the world")

# bar chart to show the total airport count per continent
continents_bar = alt.Chart(continents_df).mark_bar().encode(
        alt.Y("Continent:N"),alt.X("airports:Q",title="All Airports"),
     color=alt.condition(click_continent, alt.Color('airports:Q', scale=alt.Scale(scheme='reds')), alt.value('lightgray')),
     
).properties( title="All airports in each continent")

# text label for the count of airports per continent
continents_text = continents_bar.mark_text(dx=15 ).encode(text="airports:Q")

# bar chart showing domestic airports per country
domestic_airports_bar = alt.Chart(domestic_routes_url).mark_rect().encode(
    alt.Y("Routes:Q",title="Domestic Routes"),
    alt.X("Country:N",  sort=alt.EncodingSortField(field="Altitude", op="count", order='descending')),
    color=alt.Color('Altitude:Q', scale=alt.Scale(scheme='redblue')),
    tooltip=["City:N","Routes:Q",'Altitude:Q'],
    opacity=alt.condition(click_continent, alt.value(80),alt.value(0)),
    

).transform_filter(click_continent).properties(title="Domestic airports in each country color encoded by altitude")

# final vis with multiple views
vis = ((continents_chart | (continents_bar+continents_text).add_selection(click_continent) ) & domestic_airports_bar ).resolve_scale(
    color='independent'
).configure_view(strokeWidth=5).configure_title(
    fontSize=20,
    anchor='middle',
).configure(background='#f0fff1')

vis 

In [36]:

# # cartogram heatmap busiest airports (airports with plenty traffic)

# # cartogram, heatmap, network link.
# # airport network around all countries

# world_map + airports
#interactions
#bind interval selection to scale function to zoom https://altair-viz.github.io/user_guide/interactions.html#input-element-binding
#rotate using orthographic projection (not yet available)
#click on continent to shows bar graph of countries and their number of airports
#hover over country continent to show routes ---- done!
# low level
# create dropdown selection for each continent
# show countries in the continent and the active airlines that operate there (color code active variable)
# compare total flights between