# US Airport Passport Control Wait Times

I made this to help determine a good time to fly in and out of US airports. The data comes from CBP and the specs can be found here.

In [46]:
from bs4 import BeautifulSoup
import matplotlib.pyplot as plt
import seaborn as sns
import nbinteract as nbi
import pandas as pd
from functools import lru_cache
import requests
import numpy as np
import datetime

In [32]:
@lru_cache(maxsize=128)
def get_airport_info(code):
    today = datetime.date.today()
    first = today.replace(day=1)
    lastMonthLastDay = first - datetime.timedelta(days=1)
    toDate = lastMonthLastDay.strftime('%m/%d/%Y')
    lastMonthFirstDay = lastMonthLastDay.replace(day=1)
    fromDate = lastMonthFirstDay.strftime('%m/%d/%Y')
    
    url = "https://awt.cbp.gov"
    data = {
        "port": code,
        "rptFrom": fromDate,
        "rptTo": toDate
    }

    res = requests.post(url, data)
    soup = BeautifulSoup(res.content)
    table = soup.find_all('table')[0] 
    dfs = pd.read_html(str(table))
    df = dfs[0]
    df.columns = [' '.join(list(dict.fromkeys(col))).strip() for col in df.columns.values]
    # most of our analysis will be around day of
    
    # convert date and hour to a time datatype
    l = [d.split('--')[0] + ' ' + d.split('--')[1].split('-')[0][:2] for d in df['Date'] + '--' + df['Hour']]
    df['ts'] = pd.to_datetime(pd.Series(l), format="%m/%d/%Y %H")
    df = df.set_index('ts')
    df_hour_of_day_mean = df.groupby(df.index.hour).mean()
    return df_hour_of_day_mean


In [16]:
df = get_airport_info('SFO')
df

Unnamed: 0_level_0,U.S. Citizen Average Wait Time,U.S. Citizen Max Wait Time,Non U.S. Citizen Average Wait Time,Non U.S. Citizen Max Wait Time,All Wait Times Average Wait Time,All Wait Times Max Wait Time,All Number Of Passengers Time Interval 0-15,All Number Of Passengers Time Interval 16-30,All Number Of Passengers Time Interval 31-45,All Number Of Passengers Time Interval 46-60,All Number Of Passengers Time Interval 61-90,All Number Of Passengers Time Interval 91-120,All Number Of Passengers Time Interval 120 plus,All Unnamed: 17_level_1 Excluded,All Unnamed: 18_level_1 Total,All Unnamed: 19_level_1 Flights,All Unnamed: 20_level_1 Booths
ts,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
0,11.24,29.92,17.04,37.08,14.48,37.12,112.48,49.0,11.52,3.44,0.0,0.0,0.0,7.68,184.12,1.44,6.48
1,7.0,29.0,17.5,37.0,14.0,37.0,74.0,33.0,13.5,0.0,0.0,0.0,0.0,6.5,127.0,1.0,4.5
5,23.272727,49.727273,25.590909,65.0,24.272727,65.136364,148.909091,127.227273,89.727273,39.954545,25.545455,4.727273,0.136364,17.636364,453.863636,1.863636,11.636364
6,18.807692,62.884615,33.576923,84.615385,27.230769,84.615385,303.076923,192.153846,101.884615,53.461538,49.884615,41.615385,5.0,25.269231,772.346154,3.192308,15.269231
7,17.846154,55.961538,36.384615,89.5,29.538462,89.615385,216.461538,128.038462,71.192308,39.461538,55.384615,15.307692,6.615385,17.423077,549.884615,2.538462,17.192308
8,15.942857,42.085714,27.4,61.485714,22.542857,61.514286,304.171429,172.8,76.6,30.828571,14.085714,6.657143,3.285714,18.942857,627.371429,3.085714,18.742857
9,12.773585,41.226415,24.396226,54.075472,20.075472,54.09434,224.132075,139.358491,56.169811,13.773585,11.358491,2.754717,1.45283,14.339623,463.339623,2.226415,18.849057
10,12.611111,40.648148,23.759259,56.62963,19.462963,56.62963,256.425926,144.981481,57.444444,21.518519,21.240741,3.685185,1.574074,15.037037,521.907407,2.5,18.574074
11,12.603774,52.245283,32.113208,68.132075,25.54717,68.132075,274.075472,149.811321,83.830189,64.113208,74.0,17.320755,0.320755,19.660377,683.132075,2.792453,18.924528
12,13.456522,51.195652,33.565217,71.434783,26.26087,71.434783,265.543478,185.782609,88.478261,57.26087,68.782609,10.391304,0.413043,19.521739,696.173913,2.217391,21.173913


In [28]:
list(df.columns)

['U.S. Citizen Average Wait  Time',
 'U.S. Citizen Max Wait  Time',
 'Non U.S. Citizen Average Wait  Time',
 'Non U.S. Citizen Max Wait  Time',
 'All Wait Times Average Wait  Time',
 'All Wait Times Max Wait  Time',
 'All Number Of Passengers Time Interval 0-15',
 'All Number Of Passengers Time Interval 16-30',
 'All Number Of Passengers Time Interval 31-45',
 'All Number Of Passengers Time Interval 46-60',
 'All Number Of Passengers Time Interval 61-90',
 'All Number Of Passengers Time Interval 91-120',
 'All Number Of Passengers Time Interval 120 plus',
 'All Unnamed: 17_level_1 Excluded',
 'All Unnamed: 18_level_1 Total',
 'All Unnamed: 19_level_1 Flights',
 'All Unnamed: 20_level_1 Booths']

In [53]:
codes = ["GUM", "ATL", "AUS", "BWI", "BOS", "CLT", "MDW", "ORD", "CVG", "DFW", "DEN", "DTW", "FLL", "FAT", "HNL", "IAH", "SNA", "ONT", "STL", "LAX", "SJU", "LAS", "OAK", "MIA", "MSP", "JFK", "EWR", "SJC", "MCO", "SFB", "PBI", "PHL", "PHX", "PDX", "RDU", "SMF", "SPN", "SLC", "SAT", "SAN", "SFO", "SEA", "TPA", "IAD"]
attrs=list(df.columns)

In [40]:
df['U.S. Citizen Max Wait  Time'].to_dict()

{0: 29.92,
 1: 29.0,
 5: 49.72727272727273,
 6: 62.88461538461539,
 7: 55.96153846153846,
 8: 42.08571428571429,
 9: 41.22641509433962,
 10: 40.648148148148145,
 11: 52.24528301886792,
 12: 51.19565217391305,
 13: 46.91836734693877,
 14: 62.96296296296296,
 15: 47.57692307692308,
 16: 51.18518518518518,
 17: 37.82608695652174,
 18: 40.25581395348837,
 19: 45.333333333333336,
 20: 47.51923076923077,
 21: 31.195652173913043,
 22: 23.958333333333332,
 23: 22.88888888888889}

In [56]:
def y_f(xs, airport_code, statistic):
    df = get_airport_info(airport_code)
    d = df[statistic].to_dict()
    squarer = lambda h: d[h] if h in d else 0
    vfunc = np.vectorize(squarer)
    return vfunc(xs)
    
def x_f():
    return np.arange(24)

opts = {
    'xlim': (0, 24),
    'title': 'x: Hour of day, y: Mean value for last month',
}

nbi.bar(x_f, y_f, statistic=attrs,airport_code=codes, options=opts)



VBox(children=(interactive(children=(Dropdown(description='statistic', options=('U.S. Citizen Average Wait  Ti…