# Elections
Looking at 2019 and 2020

In [1]:
# Pandas settings to include on import
import pandas as pd
import numpy as np
pd.set_option('display.max_rows',1000)
pd.set_option('display.max_columns',1000)

# Includes commas in outputs > 1,000, and formats as integers if integers
# If not integers, formats to two decimal places
pd.set_option('display.float_format', lambda x: "{:,.0f}".format(x) if x.is_integer()
              else "{:,.2f}".format(x))

In [11]:
def get_pres_data():
    ct = pd.read_csv('../data/ct.csv',dtype={'FIPS':object})
    pres = pd.read_csv('../data/president_counties.csv')
    pres['turnout'] =  pres.total_votes/pres.people
    cols = [i for i in pres.columns]
    cols.append('fips_county')
    pres['fips_countyname_county'] = [str(i.split(',')[0])+' County' 
                                      for i in pres.countyname]
    pres['state'] = [str
                     (i.split(',')[1][1:]) for i in pres.countyname]
    return pres
get_pres_data().head()

Unnamed: 0,FIPS,countyname,state,clinton,trump,johnson,ind_cand,mcmullin,total_votes,AGI,num_returns,num_dependents,num_tot_inc,tot_inc_amt,people,turnout,fips_countyname_county
0,2000,"Alaska, AK",AK,93007,130415,14593,23166,0,246588,24270413,361110,209280,360720,24628251,570390,0.43,Alaska County
1,1107,"Pickens, AL",AL,3972,5456,49,114,0,9542,301640,7530,6000,7500,305093,13530,0.71,Pickens County
2,1007,"Bibb, AL",AL,1874,6738,124,207,0,8819,365681,7980,6340,7960,369326,14320,0.62,Bibb County
3,1079,"Lawrence, AL",AL,3627,10833,250,369,0,14829,601382,13540,9010,13510,606944,22550,0.66,Lawrence County
4,1115,"St. Clair, AL",AL,5589,31651,684,1160,0,38400,1794459,35230,24400,35140,1814483,59630,0.64,St. Clair County


In [12]:
get_pres_data().state.nunique()

51

In [6]:
def get_imm_estimates():
    imm = pd.read_csv('../data/State-county-unauthorized-estimates.csv')
    imm['county_split']=[i.split(',') for i in imm.County]
    imm['county_name']=[i[0] for i in imm.county_split]
    imm = imm.drop('county_split',axis=1)
    imm['countyname_split']=[i.split(' ') for i in imm.county_name]
    imm['County_UA_pop'] = [int(i.replace(',','')) for i in imm['Total Unauthorized Population']]
    imm.drop('Total Unauthorized Population',axis=1, inplace=True)
    # There's sometimes a couple of counties all together
    imm['mult_counties'] = 0
    for i,v in enumerate(imm.countyname_split):
        if v[-1] == 'Counties':
            imm.loc[i,'mult_counties'] = 1
    imm = imm.drop('countyname_split',axis=1)
    return imm
get_imm_estimates().State.nunique()

39

In [8]:
def get_ua_state_estimates():
    ua_state = pd.read_csv('../data/State-unauthorized-estimates.csv')
    ua_state['Statewide_UA_pop'] = [int(i.replace(',','')) for i in ua_state['Total Unauthorized Population']]
    ua_state.drop('Total Unauthorized Population',axis=1, inplace=True)
    return ua_state

get_ua_state_estimates().head()

Unnamed: 0,State,State_init,Statewide_UA_pop
0,California,CA,3019000
1,Texas,TX,1470000
2,New York,NY,850000
3,Florida,FL,610000
4,Illinois,IL,519000


In [13]:
get_ua_state_estimates().State_init.nunique()

48

In [68]:
def apply_fips(lista):
    tup = tuple(lista)
    fips = get_pres_data().set_index(['state','fips_countyname_county'])['FIPS'].to_dict()
    if tup in fips.keys():
        return fips[tup]
    else:
        return 'XX'
    
apply_fips(['AL','Bibb County'])

1007

In [74]:
def get_ice_data():
    ice = pd.read_csv('../data/county_rollup_ice_detainees.csv')
    ice = ice[ice.County!='All']
    ice['pct_ice']=ice.Yes/ice.All
    ice.rename(columns={'All':'total_detained',
                        'Yes':'ICE_custody',
                        'No':'No_ICE_custody'},
               inplace=True)
    # It's annoying that we don't have the state init here, so I'll import it from the state estimates
    state_inits = get_ua_state_estimates().set_index('State')['State_init'].to_dict()
    ice['State_init'] = ice.State.map(state_inits)
    
    # similarly for fips
#     fips = get_pres_data().set_index(['state','fips_countyname_county'])['FIPS'].to_dict()
#     fips_keys = list(fips.keys())
    ice['FIPS'] = ice[['State_init','County']].apply(lambda x: apply_fips(x.values),axis=1)
#     ice['FIPS'] = ice.set_index(['State_init','County']).map(fips)
    return ice
get_ice_data().head()

KeyboardInterrupt: 

In [None]:
get_ice_data().iloc[0][['State_init','County']]

In [39]:
get_pres_data().set_index(
        ['state','fips_countyname_county'])['FIPS'].to_dict()

{('AK', 'Alaska County'): 2000,
 ('AL', 'Pickens County'): 1107,
 ('AL', 'Bibb County'): 1007,
 ('AL', 'Lawrence County'): 1079,
 ('AL', 'St. Clair County'): 1115,
 ('AL', 'Pike County'): 1109,
 ('AL', 'Marengo County'): 1091,
 ('AL', 'Houston County'): 1069,
 ('AL', 'Baldwin County'): 1003,
 ('AL', 'Chilton County'): 1021,
 ('AL', 'Lamar County'): 1075,
 ('AL', 'Winston County'): 1133,
 ('AL', 'Fayette County'): 1057,
 ('AL', 'Escambia County'): 1053,
 ('AL', 'Tallapoosa County'): 1123,
 ('AL', 'Washington County'): 1129,
 ('AL', 'DeKalb County'): 1049,
 ('AL', 'Marshall County'): 1095,
 ('AL', 'Macon County'): 1087,
 ('AL', 'Wilcox County'): 1131,
 ('AL', 'Blount County'): 1009,
 ('AL', 'Coosa County'): 1037,
 ('AL', 'Clarke County'): 1025,
 ('AL', 'Talladega County'): 1121,
 ('AL', 'Franklin County'): 1059,
 ('AL', 'Bullock County'): 1011,
 ('AL', 'Coffee County'): 1031,
 ('AL', 'Colbert County'): 1033,
 ('AL', 'Jackson County'): 1071,
 ('AL', 'Marion County'): 1093,
 ('AL', 'Lee Co

In [46]:
get_ice_data()[['State_init','County']].apply(
    lambda x: get_pres_data().set_index(
        ['state','fips_countyname_county'])
    ['FIPS'].to_dict()
    [
        (x[0],x[1])
    ] if x[1] in [i for i[1] in fips.keys()] else pass,
    axis=1)

SyntaxError: invalid syntax (<ipython-input-46-abdecac56eb3>, line 7)

In [19]:
get_ice_data().State_init.unique()

array(['AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'DC', 'FL', 'GA',
       nan, 'HI', 'ID', 'IL', 'IN', 'IA', 'KY', 'LA', 'ME', 'MD', 'MA',
       'MI', 'MN', 'MS', 'MO', 'NE', 'NV', 'NH', 'NJ', 'NM', 'NY', 'NC',
       'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX', 'UT', 'VA',
       'WA', 'WV', 'WI', 'WY'], dtype=object)

In [17]:
len(get_ua_state_estimates().set_index('State')['State_init'].to_dict().keys())

48

## Apply FIPS to the ICE data

In [None]:
def apply_fips(ice, pres):
    fips = pres.set_index(['state','fips_countyname_county'])['FIPS'].to_dict())
    ice['FIPS'] = ice.set_index(['State_init','County']).map(fips)
    return pass

apply_fips(get_ice_data(), get_pres_data())

In [22]:
get_pres_data().set_index(['state','fips_countyname_county'])['FIPS'].to_dict()

{('AK', 'Alaska County'): 2000,
 ('AL', 'Pickens County'): 1107,
 ('AL', 'Bibb County'): 1007,
 ('AL', 'Lawrence County'): 1079,
 ('AL', 'St. Clair County'): 1115,
 ('AL', 'Pike County'): 1109,
 ('AL', 'Marengo County'): 1091,
 ('AL', 'Houston County'): 1069,
 ('AL', 'Baldwin County'): 1003,
 ('AL', 'Chilton County'): 1021,
 ('AL', 'Lamar County'): 1075,
 ('AL', 'Winston County'): 1133,
 ('AL', 'Fayette County'): 1057,
 ('AL', 'Escambia County'): 1053,
 ('AL', 'Tallapoosa County'): 1123,
 ('AL', 'Washington County'): 1129,
 ('AL', 'DeKalb County'): 1049,
 ('AL', 'Marshall County'): 1095,
 ('AL', 'Macon County'): 1087,
 ('AL', 'Wilcox County'): 1131,
 ('AL', 'Blount County'): 1009,
 ('AL', 'Coosa County'): 1037,
 ('AL', 'Clarke County'): 1025,
 ('AL', 'Talladega County'): 1121,
 ('AL', 'Franklin County'): 1059,
 ('AL', 'Bullock County'): 1011,
 ('AL', 'Coffee County'): 1031,
 ('AL', 'Colbert County'): 1033,
 ('AL', 'Jackson County'): 1071,
 ('AL', 'Marion County'): 1093,
 ('AL', 'Lee Co