In [86]:
import pandas as pd
import numpy as np
import yaml
import matplotlib.pyplot as plt

In [55]:
csv_opts = {'sep': '|',
           'quotechar': '"',
           'compression': 'gzip',
           'encoding': 'utf-8'}

In [56]:
with open('../hand/arrest_dtypes.yaml', 'r') as yamlfile:
        arrest_dtypes = yaml.load(yamlfile)
with open('../hand/encounter_dtypes.yaml', 'r') as yamlfile:
        encounter_dtypes = yaml.load(yamlfile)
with open('../hand/removal_dtypes.yaml', 'r') as yamlfile:
        removal_dtypes = yaml.load(yamlfile)

In [61]:
arrests = pd.read_csv('../input/arrests.csv.gz', **csv_opts, dtype=arrest_dtypes)
encounters = pd.read_csv('../input/encounters.csv.gz', **csv_opts, dtype=encounter_dtypes)
removals = pd.read_csv('../input/removals.csv.gz', **csv_opts, dtype=removal_dtypes)

In [64]:
arrests['apprehension_date'] = pd.to_datetime(arrests['apprehension_date'], format='%m/%d/%Y')
encounters['event_date'] = pd.to_datetime(encounters['event_date'], format='%m/%d/%Y')
removals['departed_date'] = pd.to_datetime(removals['departed_date'], format='%m/%d/%Y')
removals['removal_date'] = pd.to_datetime(removals['removal_date'], format='%m/%d/%Y')

In [65]:
redacted = ['birth_date']
arrests = arrests.drop(redacted, axis=1)
removals = removals.drop(redacted, axis=1)
encounters = encounters.drop(redacted, axis=1)

In [69]:
arrests['id'] = range(len(arrests))
encounters['id'] = range(len(encounters))
removals['id'] = range(len(removals))

In [75]:
arrests = arrests.rename({'area_of_responsibility': 'aor'}, axis=1)
encounters = encounters.rename({'event_area_of_responsibility': 'aor'}, axis=1)
removals = removals.rename({'area_of_responsibility': 'aor'}, axis=1)

In [76]:
arrests.aor = arrests.aor.str.replace('Area of Responsibility', '')
arrests.aor = arrests.aor.astype('category')
encounters.aor = encounters.aor.str.replace('Area of Responsibility', '')
encounters.aor = encounters.aor.astype('category')
removals.aor = removals.aor.str.replace('Area of Responsibility', '')
removals.aor = removals.aor.astype('category')

In [120]:
with open('../hand/aor_codes.yaml', 'r') as yamlfile:
        aor_codes = yaml.load(yamlfile)

In [122]:
arrests['aor'] = arrests['aor'].str.strip().replace(aor_codes)

In [123]:
encounters['aor'] = encounters['aor'].str.strip().replace(aor_codes)

In [125]:
removals['aor'] = removals['aor'].str.strip().replace(aor_codes)

Using date range of AIC report, numbers very close, difference either due to cleaning decisions or possibly records "back-filled" posterior to date of release to AIC.

In [82]:
print(len(arrests.set_index('apprehension_date').loc['2016-1-1':'2018-9-23']))
print(len(encounters.set_index('event_date').loc['2016-1-1':'2018-9-23']))
print(len(removals.set_index('removal_date').loc['2016-1-1':'2018-9-23']))
print(len(removals.set_index('departed_date').loc['2016-1-1':'2018-9-23']))

381705
1199704
656077
657686


In [99]:
facil_adp = pd.read_csv('../input/facility_adp_by_fy.csv', index_col=0)

In [134]:
facil_adp.head()

Unnamed: 0,ATL,BAL,BOS,BUF,CHI,DAL,DEN,DET,ELP,HOU,...,NYC,PHI,PHO,SEA,SFR,SLC,SNA,SND,SPM,WAS
FY09 ADP,2179,176,928,555,1200,851,643,722,1843,2274,...,968,1275,3103,1083,580,399,4799,1171,679,638
FY10 ADP,2244,232,864,545,1174,796,529,660,1826,2317,...,978,1276,2990,1213,636,415,4597,1154,703,577
FY11 ADP,2564,311,826,595,1521,861,528,709,1823,2161,...,987,1186,2872,1287,695,493,4614,1116,762,965
FY12 ADP,2539,348,858,484,1543,920,488,631,1707,2634,...,1003,1113,3026,1420,715,484,5647,1117,667,997
FY13 ADP,2287,320,739,440,1455,871,509,528,2019,3365,...,948,1066,3334,1436,846,444,6325,1136,460,895


In [138]:
t = ['2008-10-01',
'2009-10-01',
'2010-10-01',
'2011-10-01',
'2012-10-01',
'2013-10-01',
'2014-10-01',
'2015-10-01',
'2016-10-01',
'2017-10-01']

In [140]:
facil_adp.index = t
facil_adp.index = pd.to_datetime(facil_adp.index)

In [148]:
facil_adp_subset = facil_adp.loc['2015':'2017']

In [127]:
arrests_by_aor_fy = arrests.set_index('apprehension_date').groupby([pd.Grouper(freq='AS-OCT'), 'aor'])['id'].count()

In [143]:
arrests_by_aor_fy = arrests_by_aor_fy.unstack()

In [154]:
arrests_by_aor_fy_subset = arrests_by_aor_fy.loc['2015':'2017']

In [155]:
arrests_by_aor_fy_subset.T/ facil_adp_subset.T

apprehension_date,2015-10-01 00:00:00,2016-10-01 00:00:00,2017-10-01 00:00:00
ATL,4.201896,4.19276,4.338475
BAL,4.693182,5.764706,5.223926
BOS,3.347748,4.229851,3.786458
BUF,1.929276,2.330733,2.49921
CHI,4.912953,5.370787,5.686769
DAL,10.460369,16.874362,15.302689
DEN,4.63286,3.808599,4.244992
DET,4.236295,4.010588,3.229391
ELP,0.652491,0.611506,0.84303
HOU,3.944937,4.176416,5.360135
