In [1]:
import altair as alt
import pandas as pd
import geopandas as gpd
from vega_datasets import data
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [2]:
airports_url = "airports-extended.csv"
airports = pd.read_csv(airports_url)
airports

Unnamed: 0,Airport ID,Name,City,Country,IATA,ICAO,Longitude,Latitude,Altitude,TimeZone,DST,Tz Database Time Zone,Type,Source
0,1,Goroka Airport,Goroka,Papua New Guinea,GKA,AYGA,-6.081690,145.391998,5282,10.0,U,Pacific/Port_Moresby,airport,OurAirports
1,2,Madang Airport,Madang,Papua New Guinea,MAG,AYMD,-5.207080,145.789002,20,10.0,U,Pacific/Port_Moresby,airport,OurAirports
2,3,Mount Hagen Kagamuga Airport,Mount Hagen,Papua New Guinea,HGU,AYMH,-5.826790,144.296005,5388,10.0,U,Pacific/Port_Moresby,airport,OurAirports
3,4,Nadzab Airport,Nadzab,Papua New Guinea,LAE,AYNZ,-6.569803,146.725977,239,10.0,U,Pacific/Port_Moresby,airport,OurAirports
4,5,Port Moresby Jacksons International Airport,Port Moresby,Papua New Guinea,POM,AYPY,-9.443380,147.220001,146,10.0,U,Pacific/Port_Moresby,airport,OurAirports
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12663,14107,Ulan-Ude East Airport,Ulan Ude,Russia,\N,XIUW,51.849998,107.737999,1670,,\N,\N,airport,OurAirports
12664,14108,Krechevitsy Air Base,Novgorod,Russia,\N,ULLK,58.625000,31.385000,85,,\N,\N,airport,OurAirports
12665,14109,Desierto de Atacama Airport,Copiapo,Chile,CPO,SCAT,-27.261200,-70.779198,670,,\N,\N,airport,OurAirports
12666,14110,Melitopol Air Base,Melitopol,Ukraine,\N,UKDM,46.880001,35.305000,0,,\N,\N,airport,OurAirports


Derive US airports

In [3]:
us_airports = airports.where(airports["Country"] =="United States")
us_airports.dropna(inplace=True)
us_airports["Airport ID"] = us_airports["Airport ID"].astype(str)
us_airports["Airport ID"] = us_airports["Airport ID"].str.replace(".0","",regex=True)

us_airports["Type"].unique()  #column has unknown values that will be dropped
# only show known values
us_airports = us_airports.loc[(us_airports["Type"] == "airport") | (us_airports["Type"] == "station") | (us_airports["Type"] == "port")]
us_airports


Unnamed: 0,Airport ID,Name,City,Country,IATA,ICAO,Longitude,Latitude,Altitude,TimeZone,DST,Tz Database Time Zone,Type,Source
3223,3411,Barter Island LRRS Airport,Barter Island,United States,BTI,PABA,70.134003,-143.582001,2.0,-9.0,A,America/Anchorage,airport,OurAirports
3224,3412,Wainwright Air Station,Fort Wainwright,United States,\N,PAWT,70.613403,-159.860001,35.0,-9.0,A,America/Anchorage,airport,OurAirports
3225,3413,Cape Lisburne LRRS Airport,Cape Lisburne,United States,LUR,PALU,68.875099,-166.110001,16.0,-9.0,A,America/Anchorage,airport,OurAirports
3226,3414,Point Lay LRRS Airport,Point Lay,United States,PIZ,PPIZ,69.732903,-163.005005,22.0,-9.0,A,America/Anchorage,airport,OurAirports
3227,3415,Hilo International Airport,Hilo,United States,ITO,PHTO,19.721399,-155.048004,38.0,-10.0,N,Pacific/Honolulu,airport,OurAirports
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12264,137,Seldovia Airport,Seldovia,United States,SOV,PASO,59.442402,-151.703995,29.0,-9.0,A,\N,airport,OurAirports
12314,13757,Vidalia Regional Airport,Vidalia,United States,VDI,KVDI,32.192699,-82.371201,275.0,-4.0,A,\N,airport,OurAirports
12315,13758,Granbury Regional Airport,Granbury,United States,\N,KGDJ,32.444401,-97.816902,778.0,-5.0,A,\N,airport,OurAirports
12316,13759,Oswego County Airport,Fulton,United States,\N,KFZY,43.350800,-76.388100,475.0,-4.0,A,\N,airport,OurAirports


Import Us states data

In [4]:
us_cities = pd.read_csv("uscities.csv")   # https://www.kaggle.com/datasets/sergejnuss/united-states-cities-database
us_states = pd.read_csv("us_states.csv")

us_cities  = us_cities.filter(["city","state_id"])

airports_cities = pd.merge(us_airports,us_cities,how="inner", left_on="City",right_on="city")
airports_states = pd.merge(airports_cities,us_states,how="inner", left_on="state_id",right_on="STUSAB")
path = "airports_states.csv"
airports_states.to_csv(path)
airports_states["Type Counts"] = airports_states.groupby(["Type","STATE_NAME"])["Type"].transform("count")

airports_states

Unnamed: 0,Airport ID,Name,City,Country,IATA,ICAO,Longitude,Latitude,Altitude,TimeZone,...,Tz Database Time Zone,Type,Source,city,state_id,STATE,STUSAB,STATE_NAME,STATENS,Type Counts
0,3414,Point Lay LRRS Airport,Point Lay,United States,PIZ,PPIZ,69.732903,-163.005005,22.0,-9.0,...,America/Anchorage,airport,OurAirports,Point Lay,AK,2,AK,Alaska,1785533,216
1,3417,Bettles Airport,Bettles,United States,BTT,PABT,66.913902,-151.529007,647.0,-9.0,...,America/Anchorage,airport,OurAirports,Bettles,AK,2,AK,Alaska,1785533,216
2,34,Fort Yukon Airport,Fort Yukon,United States,FYU,PFYU,66.571503,-145.250000,433.0,-9.0,...,America/Anchorage,airport,OurAirports,Fort Yukon,AK,2,AK,Alaska,1785533,216
3,3429,Iliamna Airport,Iliamna,United States,ILI,PAIL,59.754398,-154.910996,192.0,-9.0,...,America/Anchorage,airport,OurAirports,Iliamna,AK,2,AK,Alaska,1785533,216
4,3438,Merrill Field,Anchorage,United States,MRI,PAMR,61.213501,-149.843994,137.0,-9.0,...,America/Anchorage,airport,OurAirports,Anchorage,AK,2,AK,Alaska,1785533,216
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7243,95,South Norwalk Station,Norwalk,United States,\N,\N,41.096730,-73.421132,36.0,-5.0,...,America/New_York,station,User,Norwalk,CT,9,CT,Connecticut,1779780,10
7244,646,New London Union Station,New London,United States,\N,\N,41.354167,-72.093056,0.0,-5.0,...,\N,station,User,New London,CT,9,CT,Connecticut,1779780,10
7245,135,Meriden Markham Municipal Airport,Meriden,United States,\N,KMMK,41.508701,-72.829498,103.0,-5.0,...,\N,airport,OurAirports,Meriden,CT,9,CT,Connecticut,1779780,15
7246,115,Rockville Amtrak Station,Rockville,United States,\N,\N,39.084497,-77.148261,0.0,-5.0,...,\N,station,User,Rockville,CT,9,CT,Connecticut,1779780,10


In [9]:
states = gpd.read_file("us_states.json")

plot = alt.concat( *  (
    alt.Chart(states,title=type).mark_geoshape().encode(
        color="Type Counts:Q",
        tooltip=['STATE_NAME:N',"Type Counts:Q"],
    ).transform_lookup(
        "name",
        alt.LookupData(
            airports_states[airports_states['Type']==type],
            "STATE_NAME",
            fields=airports_states.columns.to_list())
        ).project(type='albersUsa')
    
    for type in list(airports_states.Type.unique())
    ), columns=3, title="The number of different kinds of 'ports' in US States"
).configure_title(align="center",anchor="middle",fontSize=15, offset=20)

plot