In [11]:
routes = pd.read_csv('routes.dat', delimiter=',', names=['Airline', 'Airline ID', 'Source airport', 'Source airport ID', 'Destination airport', 'Destination airport ID', 'Codeshare', 'Stops', 'Equipment'])
print(routes.head())
print(routes.isna().sum())
# Drop unwanted columns
routes.drop('Codeshare', axis=1)

# Remove rows with missing data in Equipment column
routes.dropna(subset=['Equipment'], inplace=True)

  Airline Airline ID Source airport Source airport ID Destination airport  \
0      2B        410            AER              2965                 KZN   
1      2B        410            ASF              2966                 KZN   
2      2B        410            ASF              2966                 MRV   
3      2B        410            CEK              2968                 KZN   
4      2B        410            CEK              2968                 OVB   

  Destination airport ID Codeshare  Stops Equipment  
0                   2990       NaN      0       CR2  
1                   2990       NaN      0       CR2  
2                   2962       NaN      0       CR2  
3                   2990       NaN      0       CR2  
4                   4078       NaN      0       CR2  
Airline                       0
Airline ID                    0
Source airport                0
Source airport ID             0
Destination airport           0
Destination airport ID        0
Codeshare            

In [None]:
import altair as alt
import pandas as pd
from vega_datasets import data
alt.data_transformers.disable_max_rows() # this should be avoided but now let's disable the limit

columnNames = ["Airport ID", "Name", "City", "Country", "IATA", "ICAO", "Latitude", "Longitude", "Altitude(feet)", "TimeZone", "DST", "Tz", "Type", "source"]
airports = pd.read_table('https://raw.githubusercontent.com/jpatokal/openflights/master/data/airports.dat', 
                         sep = ',', header=None, names=columnNames)

columnNames = ["Airline", "Airline ID", "Origin", "SourceID", "Destination", "destinationID", "codeshare", "stops", "equipment"]
airoutes = pd.read_table('https://raw.githubusercontent.com/jpatokal/openflights/master/data/routes.dat',
                         sep = ',', header=None, names=columnNames)

worldmap = alt.topo_feature(data.world_110m.url, 'countries')

# Create mouseover selection
select_city = alt.selection_single(
    on="mouseover", nearest=True, fields=["Origin"], empty="none"
)

# Define which attributes to lookup 
lookup_data = alt.LookupData(
    airports, key="IATA", fields=["Name", "Latitude", "Longitude"]
)

background = alt.Chart(worldmap).mark_geoshape(
    fill="lightgray",
    stroke="white"
).properties(
    width=750,
    height=500
).project("naturalEarth1")


connections = alt.Chart(airoutes).mark_rule(opacity=0.35).encode(
    latitude="Latitude:Q",
    longitude="Longitude:Q",
    latitude2="lat2:Q",
    longitude2="lon2:Q"
).transform_lookup(
    lookup="Origin",
    from_=lookup_data
).transform_lookup(
    lookup="Destination",
    from_=lookup_data,
    as_=["Name", "lat2", "lon2"]
).transform_filter(
    select_city
)

points = alt.Chart(airoutes).mark_circle().encode(
    latitude="Latitude:Q",
    longitude="Longitude:Q",
    size=alt.Size("routes:Q", scale=alt.Scale(range=[0, 300]), legend=None),
    order=alt.Order("routes:Q", sort="descending"),
    tooltip=["Name:N", "routes:Q"]
).transform_aggregate(
    routes="count()",
    groupby=["Origin"]
).transform_lookup(
    lookup="Origin",
    from_=lookup_data
).add_selection(
    select_city
).properties(
    title='Global Airports Connection Interactive Map')

# (background + connections + points ).configure_view(stroke=None) # the map is too large so we export it as an html to view.
fig = (background + connections + points ).configure_view(stroke=None)
fig