In [98]:
# Author: University of Washington Center for Human Rights
# Date: 2019-11-25
# License: GPL 3.0 or greater
# Path: ice-air/installment2/analyze/note/global.ipynb

import numpy as np
import pandas as pd
import yaml
import matplotlib.pyplot as plt
import matplotlib.mlab as mlab

In [99]:
with open('../input/passenger-dtypes.yaml', 'r') as yamlfile:
    column_types = yaml.load(yamlfile)

read_csv_opts = {'sep': '|',
                 'quotechar': '"',
                 'compression': 'gzip',
                 'encoding': 'utf-8',
                 'dtype': column_types,
                 'parse_dates': ['MissionDate'],
                 'infer_datetime_format': True}

df = pd.read_csv('../input/ice-air-passengers.csv.gz', **read_csv_opts)

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1826402 entries, 0 to 1826401
Data columns (total 52 columns):
ANumber                         category
LastName                        category
FirstName                       category
DOB                             category
Status                          category
Sex                             category
Convictions                     category
GangMember                      category
ClassLvl                        float32
Age                             float32
MissionDate                     datetime64[ns]
MissionNumber                   uint32
PULOC                           category
DropLoc                         category
StrikeFromList                  float32
ReasonStruck                    category
R-T                             category
Code                            category
CountryOfCitizenship            category
Juvenile                        bool
MissionWeek                     uint8
MissionQuarter                  

In [100]:
with open('../input/airport_dict.yaml', 'r') as yamlfile:
    airport_dict = yaml.load(yamlfile)

In [101]:
airport_metadata = pd.DataFrame(airport_dict).T

In [102]:
# df.reset_index(inplace=True)

In [103]:
df.set_index('MissionDate', inplace=True)

In [104]:
df = df.loc['2018-10-01':'2019-5-30']

In [105]:
pickups = df.groupby('PULOC')['AlienMasterID'].nunique()
dropoffs = df.groupby('DropLoc')['AlienMasterID'].nunique()
missions = df.groupby('PULOC')['MissionID'].nunique()

In [106]:
pickups.name = 'total_pickups'
dropoffs.name = 'total_dropoffs'
# missions.name = 'total_missions'

In [107]:
df.groupby('PULOC')['air_AirportName']

<pandas.core.groupby.groupby.SeriesGroupBy object at 0x11eeb63c8>

In [108]:
temp = pd.concat([pickups,dropoffs,airport_metadata], axis=1)

In [109]:
temp = temp[temp['Country'] == 'USA'].sort_values(by='total_pickups', ascending=False)

In [110]:
temp = temp.reset_index()

In [111]:
temp = temp.rename({'index':'code'}, axis=1)

In [112]:
temp['link'] = temp['code'].apply(lambda x: f'http://airnav.com/airport/{x}')

In [113]:
temp = temp.drop(['LatitudeDecimalDegrees', 'LongitudeDecimalDegrees'], axis=1)

In [114]:
temp.columns

Index(['code', 'total_pickups', 'total_dropoffs', 'AirportName', 'City',
       'Country', 'State', 'link'],
      dtype='object')

In [115]:
temp.columns = ['ICAOCode', 'TotalPickups', 'TotalDropoffs', 'AirportName', 'City',
       'Country', 'State', 'Link']

In [116]:
temp.head()

Unnamed: 0,ICAOCode,TotalPickups,TotalDropoffs,AirportName,City,Country,State,Link
0,KBRO,26987.0,13104.0,Brownsville South Padre Island International A...,Brownsville,USA,TX,http://airnav.com/airport/KBRO
1,KIWA,25098.0,8019.0,Phoenix-Mesa-Gateway Airport,Mesa,USA,AZ,http://airnav.com/airport/KIWA
2,KAEX,20273.0,11375.0,Alexandria International Airport,Alexandria,USA,LA,http://airnav.com/airport/KAEX
3,KELP,7026.0,3736.0,El Paso International Airport,El Paso,USA,TX,http://airnav.com/airport/KELP
4,KIAH,4800.0,1852.0,George Bush Intercontinental Houston Airport,Houston,USA,TX,http://airnav.com/airport/KIAH


In [121]:
temp[['TotalPickups', 'TotalDropoffs']] = temp[['TotalPickups', 'TotalDropoffs']].fillna(0)

In [128]:
temp = temp[(temp['TotalPickups'] >= 1) & (temp['TotalDropoffs']) >= 1]

In [129]:
temp.to_excel('../output/US-airports-FY19.xlsx')