# US Airport Passport Control Wait Times

https://github.com/abhinavsharma/airport-wait-times

In [4]:
from bs4 import BeautifulSoup
import matplotlib.pyplot as plt
import seaborn as sns
import nbinteract as nbi
import pandas as pd
from functools import lru_cache
import requests
import numpy as np
import datetime

In [5]:
@lru_cache(maxsize=128)
def get_airport_info(airport_code, fromDate, toDate):

    # get data from CPB website
    url = "https://awt.cbp.gov"
    data = {
        "port": airport_code,
        "rptFrom": fromDate,
        "rptTo": toDate
    }
    res = requests.post(url, data)

    # parse html for table and load into a pandas dataframe
    soup = BeautifulSoup(res.content)
    table = soup.find_all('table')[0] 
    dfs = pd.read_html(str(table))
    df = dfs[0]

    # flatten column names from tuples to strings
    df.columns = [' '.join(list(dict.fromkeys(col))).strip() for col in df.columns.values]

    # create a time series index field
    l = [d.split('--')[0] + ' ' + d.split('--')[1].split('-')[0][:2] for d in df['Date'] + '--' + df['Hour']]
    df['ts'] = pd.to_datetime(pd.Series(l), format="%m/%d/%Y %H")
    df = df.set_index('ts')

    # aggregate data by hour of day
    df_hour_of_day_mean = df.groupby(df.index.hour).mean()
    return df_hour_of_day_mean


In [6]:
def get_last_months_from_to_dates():
    today = datetime.date.today()
    first = today.replace(day=1)
    lastMonthLastDay = first - datetime.timedelta(days=1)
    toDate = lastMonthLastDay.strftime('%m/%d/%Y')
    lastMonthFirstDay = lastMonthLastDay.replace(day=1)
    fromDate = lastMonthFirstDay.strftime('%m/%d/%Y')
    return (fromDate, toDate)

In [None]:
# sample data
(fromDate, toDate) = get_last_months_from_to_dates()
df = get_airport_info('SFO', fromDate, toDate)
df.head()

In [None]:
codes = ["GUM", "ATL", "AUS", "BWI", "BOS", "CLT", "MDW", "ORD", "CVG", "DFW", "DEN", "DTW", "FLL", "FAT", "HNL", "IAH", "SNA", "ONT", "STL", "LAX", "SJU", "LAS", "OAK", "MIA", "MSP", "JFK", "EWR", "SJC", "MCO", "SFB", "PBI", "PHL", "PHX", "PDX", "RDU", "SMF", "SPN", "SLC", "SAT", "SAN", "SFO", "SEA", "TPA", "IAD"]
attrs = list(df.columns)

In [None]:
def y_f(xs, airport_code, statistic):
    (fromDate, toDate) = get_last_months_from_to_dates()
    df = get_airport_info(airport_code, fromDate, toDate)
    d = df[statistic].to_dict()
    squarer = lambda h: d[h] if h in d else 0
    vfunc = np.vectorize(squarer)
    return vfunc(xs)
    
def x_f():
    return np.arange(24)

opts = {
    'title': 'x: Hour of day, y: Mean value for last month',
}

nbi.bar(x_f, y_f, statistic=attrs,airport_code=codes, options=opts)