# US Airport Passport Control Wait Times

Hit "Show Widgets" above and scroll to the bottom. Open console in your browser to see loading progress. For more, see https://github.com/abhinavsharma/airport-wait-times

In [4]:
from bs4 import BeautifulSoup
import matplotlib.pyplot as plt
import seaborn as sns
import nbinteract as nbi
import pandas as pd
from functools import lru_cache
import requests
import numpy as np
import datetime

In [5]:
@lru_cache(maxsize=128)
def get_airport_info(airport_code, fromDate, toDate):

    # get data from CPB website
    url = "https://awt.cbp.gov"
    data = {
        "port": airport_code,
        "rptFrom": fromDate,
        "rptTo": toDate
    }
    res = requests.post(url, data)

    # parse html for table and load into a pandas dataframe
    soup = BeautifulSoup(res.content)
    table = soup.find_all('table')[0] 
    dfs = pd.read_html(str(table))
    df = dfs[0]

    # flatten column names from tuples to strings
    df.columns = [' '.join(list(dict.fromkeys(col))).strip() for col in df.columns.values]

    # create a time series index field
    l = [d.split('--')[0] + ' ' + d.split('--')[1].split('-')[0][:2] for d in df['Date'] + '--' + df['Hour']]
    df['ts'] = pd.to_datetime(pd.Series(l), format="%m/%d/%Y %H")
    df = df.set_index('ts')

    # aggregate data by hour of day
    df_hour_of_day_mean = df.groupby(df.index.hour).mean()
    return df_hour_of_day_mean


In [6]:
def get_last_months_from_to_dates():
    today = datetime.date.today()
    first = today.replace(day=1)
    lastMonthLastDay = first - datetime.timedelta(days=1)
    toDate = lastMonthLastDay.strftime('%m/%d/%Y')
    lastMonthFirstDay = lastMonthLastDay.replace(day=1)
    fromDate = lastMonthFirstDay.strftime('%m/%d/%Y')
    return (fromDate, toDate)

In [7]:
# sample data
(fromDate, toDate) = get_last_months_from_to_dates()
df = get_airport_info('SFO', fromDate, toDate)
df.head()

Unnamed: 0_level_0,U.S. Citizen Average Wait Time,U.S. Citizen Max Wait Time,Non U.S. Citizen Average Wait Time,Non U.S. Citizen Max Wait Time,All Wait Times Average Wait Time,All Wait Times Max Wait Time,All Number Of Passengers Time Interval 0-15,All Number Of Passengers Time Interval 16-30,All Number Of Passengers Time Interval 31-45,All Number Of Passengers Time Interval 46-60,All Number Of Passengers Time Interval 61-90,All Number Of Passengers Time Interval 91-120,All Number Of Passengers Time Interval 120 plus,All Unnamed: 17_level_1 Excluded,All Unnamed: 18_level_1 Total,All Unnamed: 19_level_1 Flights,All Unnamed: 20_level_1 Booths
ts,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
0,11.24,29.92,17.04,37.08,14.48,37.12,112.48,49.0,11.52,3.44,0.0,0.0,0.0,7.68,184.12,1.44,6.48
1,7.0,29.0,17.5,37.0,14.0,37.0,74.0,33.0,13.5,0.0,0.0,0.0,0.0,6.5,127.0,1.0,4.5
5,23.272727,49.727273,25.590909,65.0,24.272727,65.136364,148.909091,127.227273,89.727273,39.954545,25.545455,4.727273,0.136364,17.636364,453.863636,1.863636,11.636364
6,18.807692,62.884615,33.576923,84.615385,27.230769,84.615385,303.076923,192.153846,101.884615,53.461538,49.884615,41.615385,5.0,25.269231,772.346154,3.192308,15.269231
7,17.846154,55.961538,36.384615,89.5,29.538462,89.615385,216.461538,128.038462,71.192308,39.461538,55.384615,15.307692,6.615385,17.423077,549.884615,2.538462,17.192308


In [8]:
codes = ["GUM", "ATL", "AUS", "BWI", "BOS", "CLT", "MDW", "ORD", "CVG", "DFW", "DEN", "DTW", "FLL", "FAT", "HNL", "IAH", "SNA", "ONT", "STL", "LAX", "SJU", "LAS", "OAK", "MIA", "MSP", "JFK", "EWR", "SJC", "MCO", "SFB", "PBI", "PHL", "PHX", "PDX", "RDU", "SMF", "SPN", "SLC", "SAT", "SAN", "SFO", "SEA", "TPA", "IAD"]
attrs = list(df.columns)

In [9]:
def y_f(xs, airport_code, statistic):
    (fromDate, toDate) = get_last_months_from_to_dates()
    df = get_airport_info(airport_code, fromDate, toDate)
    d = df[statistic].to_dict()
    squarer = lambda h: d[h] if h in d else 0
    vfunc = np.vectorize(squarer)
    return vfunc(xs)
    
def x_f():
    return np.arange(24)

opts = {
    'title': 'x: Hour of day, y: Mean value for last month',
}

nbi.bar(x_f, y_f, statistic=attrs,airport_code=codes, options=opts)

VBox(children=(interactive(children=(Dropdown(description='statistic', options=('U.S. Citizen Average Wait  Ti…