# Elections
Looking at 2019 and 2020

In [104]:
# Pandas settings to include on import
import pandas as pd
import numpy as np
pd.set_option('display.max_rows',1000)
pd.set_option('display.max_columns',1000)

# Includes commas in outputs > 1,000, and formats as integers if integers
# If not integers, formats to two decimal places
pd.set_option('display.float_format', lambda x: "{:,.0f}".format(x) if x.is_integer()
              else "{:,.2f}".format(x))

import matplotlib.pyplot as plt, mpld3

In [82]:
def get_pres_data():
    ct = pd.read_csv('../data/ct.csv',dtype={'FIPS':object})
    fips = ct.set_index('FIPS')['fips_county'].to_dict()
    pres = pd.read_csv('../data/president_counties.csv')
    pres.FIPS = pres.FIPS.apply(lambda x: str(x).zfill(5))
    pres['full_county_name'] = pres.FIPS.map(fips).str.upper()
    pres['turnout'] =  pres.total_votes/pres.people
    cols = [i for i in pres.columns]
    cols.append('fips_county')
    pres['fips_county'] = [str(i.split(',')[0]) for i in pres.countyname]
    pres['fips_countyname_county'] = [str(i.split(',')[0])+' County' 
                                      for i in pres.countyname]
    pres['state'] = [str
                     (i.split(',')[1][1:]) for i in pres.countyname]
    return pres[cols]
get_pres_data().head()

Unnamed: 0,FIPS,countyname,state,clinton,trump,johnson,ind_cand,mcmullin,total_votes,AGI,num_returns,num_dependents,num_tot_inc,tot_inc_amt,people,full_county_name,turnout,fips_county
0,2000,"Alaska, AK",AK,93007,130415,14593,23166,0,246588,24270413,361110,209280,360720,24628251,570390,AK,0.43,Alaska
1,1107,"Pickens, AL",AL,3972,5456,49,114,0,9542,301640,7530,6000,7500,305093,13530,"PICKENS COUNTY, AL",0.71,Pickens
2,1007,"Bibb, AL",AL,1874,6738,124,207,0,8819,365681,7980,6340,7960,369326,14320,"BIBB COUNTY, AL",0.62,Bibb
3,1079,"Lawrence, AL",AL,3627,10833,250,369,0,14829,601382,13540,9010,13510,606944,22550,"LAWRENCE COUNTY, AL",0.66,Lawrence
4,1115,"St. Clair, AL",AL,5589,31651,684,1160,0,38400,1794459,35230,24400,35140,1814483,59630,"ST. CLAIR COUNTY, AL",0.64,St. Clair


In [54]:
pf = get_pres_data()
pf[pf.state=='AK']

Unnamed: 0,FIPS,countyname,state,clinton,trump,johnson,ind_cand,mcmullin,total_votes,AGI,num_returns,num_dependents,num_tot_inc,tot_inc_amt,people,full_county_name,turnout,fips_county
0,2000,"Alaska, AK",AK,93007,130415,14593,23166,0,246588,24270413,361110,209280,360720,24628251,570390,AK,0.43,Alaska


In [39]:
pf.shape

(3113, 17)

In [3]:
get_pres_data().state.nunique()

51

In [4]:
def get_imm_estimates():
    imm = pd.read_csv('../data/State-county-unauthorized-estimates.csv')
    imm['county_split']=[i.split(',') for i in imm.County]
    imm['county_name']=[i[0] for i in imm.county_split]
    imm = imm.drop('county_split',axis=1)
    imm['countyname_split']=[i.split(' ') for i in imm.county_name]
    imm['County_UA_pop'] = [int(i.replace(',','')) for i in imm['Total Unauthorized Population']]
    imm.drop('Total Unauthorized Population',axis=1, inplace=True)
    # There's sometimes a couple of counties all together
    imm['mult_counties'] = 0
    for i,v in enumerate(imm.countyname_split):
        if v[-1] == 'Counties':
            imm.loc[i,'mult_counties'] = 1
    imm = imm.drop('countyname_split',axis=1)
    return imm
get_imm_estimates().State.nunique()

39

In [5]:
def get_ua_state_estimates():
    ua_state = pd.read_csv('../data/State-unauthorized-estimates.csv')
    ua_state['Statewide_UA_pop'] = [int(i.replace(',','')) for i in ua_state['Total Unauthorized Population']]
    ua_state.drop('Total Unauthorized Population',axis=1, inplace=True)
    return ua_state

get_ua_state_estimates().head()

Unnamed: 0,State,State_init,Statewide_UA_pop
0,California,CA,3019000
1,Texas,TX,1470000
2,New York,NY,850000
3,Florida,FL,610000
4,Illinois,IL,519000


In [6]:
get_ua_state_estimates().State_init.nunique()

48

In [107]:
def apply_fips(string):
#     tup = tuple(lista)
#     fips = get_pres_data().set_index(['state','full_county_name'])['FIPS'].to_dict()
    fips = get_pres_data().set_index('full_county_name')['FIPS'].to_dict()
    return fips[string]
#     if tup in fips.keys():
#         return fips[tup]
#     else:
#         return 'XX'
    
apply_fips('BIBB COUNTY, AL')

'01007'

In [83]:
def get_ice_data():
    ice = pd.read_csv('../data/county_rollup_ice_detainees.csv')
    ice = ice[ice.County!='All']
    ice['pct_ice']=ice.Yes/ice.All
    ice.rename(columns={'All':'total_detained',
                        'Yes':'ICE_custody',
                        'No':'No_ICE_custody'},
               inplace=True)
    # It's annoying that we don't have the state init here, so I'll import it from the state estimates
    state_inits = get_ua_state_estimates().set_index('State')['State_init'].to_dict()
    ice['State_init'] = ice.State.map(state_inits)
    ice.loc[ice.State=="North Dakota",'State_init'] = "ND"
    ice.loc[ice.State=="Puerto Rico",'State_init'] = "PR"
    ice.loc[ice.State=="Vermont",'State_init'] = "VT"
    ice.loc[ice.State=="Virgin Islands",'State_init'] = "VI"

    
    # similarly for fips
    ice['county_plus_init'] = ice.County+", "+ice.State_init
    ice['county_plus_init_upper'] = ice.county_plus_init.str.upper()
    ice['FIPS'] = ice.county_plus_init_upper.map(get_pres_data().set_index('full_county_name')['FIPS'].to_dict())
#     ice['FIPS'] = ice[['State_init','County']].apply(lambda x: apply_fips(x.values),axis=1)
    return ice


In [84]:
%%time
ice = get_ice_data()

CPU times: user 195 ms, sys: 9.65 ms, total: 205 ms
Wall time: 226 ms


In [85]:
ice.head(10)

Unnamed: 0,State,County,total_detained,ICE_custody,No_ICE_custody,pct_ice,State_init,county_plus_init,county_plus_init_upper,FIPS
1,Alabama,Autauga County,67,45,22,0.67,AL,"Autauga County, AL","AUTAUGA COUNTY, AL",1001
2,Alabama,Baldwin County,738,401,337,0.54,AL,"Baldwin County, AL","BALDWIN COUNTY, AL",1003
3,Alabama,Barbour County,11,5,6,0.45,AL,"Barbour County, AL","BARBOUR COUNTY, AL",1005
4,Alabama,Bibb County,14,13,1,0.93,AL,"Bibb County, AL","BIBB COUNTY, AL",1007
5,Alabama,Blount County,43,15,28,0.35,AL,"Blount County, AL","BLOUNT COUNTY, AL",1009
6,Alabama,Bullock County,8,4,4,0.5,AL,"Bullock County, AL","BULLOCK COUNTY, AL",1011
7,Alabama,Butler County,22,16,6,0.73,AL,"Butler County, AL","BUTLER COUNTY, AL",1013
8,Alabama,Calhoun County,109,31,78,0.28,AL,"Calhoun County, AL","CALHOUN COUNTY, AL",1015
9,Alabama,Chambers County,41,33,8,0.8,AL,"Chambers County, AL","CHAMBERS COUNTY, AL",1017
10,Alabama,Cherokee County,22,8,14,0.36,AL,"Cherokee County, AL","CHEROKEE COUNTY, AL",1019


In [102]:
def combine_datasets(ice, pres):
    df =  pd.merge(left=ice,right=pres,how='outer',left_on='FIPS',right_on='FIPS')
    df['pct_clinton'] = df.clinton/df.total_votes
    df['inc_per_person'] = df.tot_inc_amt/df.people
    cols = ['FIPS','State','County','State_init', 'county_plus_init',
           'total_detained', 'ICE_custody','No_ICE_custody','pct_ice',
            'clinton','pct_clinton','trump','johnson','ind_cand','mcmullin','total_votes',
            'turnout','people','tot_inc_amt','inc_per_person']
    
    return df[cols]
cdf = combine_datasets(get_ice_data(), get_pres_data())

## The actual elections part

In [110]:
cdf[cdf.State_init=='MS'].head()

Unnamed: 0,FIPS,State,County,State_init,county_plus_init,total_detained,ICE_custody,No_ICE_custody,pct_ice,clinton,pct_clinton,trump,johnson,ind_cand,mcmullin,total_votes,turnout,people,tot_inc_amt,inc_per_person
1073,28001,Mississippi,Adams County,MS,"Adams County, MS",2245,1200.0,1045.0,0.53,7757,0.56,5874,125,205,0,13836,0.63,22040,615151,27.91
1074,28003,Mississippi,Alcorn County,MS,"Alcorn County, MS",1,1.0,,1.0,2684,0.18,11819,167,280,0,14783,0.6,24530,600006,24.46
1075,28007,Mississippi,Attala County,MS,"Attala County, MS",2,1.0,1.0,0.5,3242,0.39,4897,57,117,0,8256,0.6,13690,307058,22.43
1076,28011,Mississippi,Bolivar County,MS,"Bolivar County, MS",8,,8.0,,9046,0.65,4590,100,188,0,13824,0.53,25990,577967,22.24
1077,28013,Mississippi,Calhoun County,MS,"Calhoun County, MS",15,4.0,11.0,0.27,1910,0.3,4390,49,96,0,6396,0.61,10540,205159,19.46


In [187]:
def get_2019_dict(statecountyimm):
    county_19_dict = {}

    county_19_dict['MS'] = list(statecountyimm.County[statecountyimm.State_init =='MS'])
    county_19_dict['LA'] = [i for i in 
                            list(statecountyimm.County[statecountyimm.State_init =='LA']) 
                            if i not in ['Orleans Parish']]

    county_19_dict['FL'] = ['Duval County']

    county_19_dict['NJ'] = ['Bergen County', 'Burlington County','Hudson County', 
                            'Hunterdon County', 'Middlesex County', 'Monmouth County',
                            'Morriw County', 'Ocean County', 'Passaic County', 'Somerset County',
                            'Sussex County','Warren County']
    county_19_dict['NY'] = ['Albany County','Cortland County','Dutchess County','Fulton County',
                            'Greene County','Herkimer County','Lewis County','Orleans County',
                            'Rensselaer County','Rockland County','Seneca County',
                            'St. Lawrence County','Sullivan County','Tioga County',
                            'Warren County','Washington County','Yates County']
    county_19_dict['PA'] = ['Beaver County','Bedford County','Berks County','Bradford County',
                            'Carbon County','Centre County','Chester County','Clinton County',
                            'Crawford County','Dauphin County','Elk County','Fayette County',
                            'Franklin County','Fulton County','Indiana County',
                            'Jefferson County','Lancaster County','Lehigh County',
                            'Luzerne County','Mercer County','Monroe County',
                            'Montgomery County','Montour County','Northampton County',
                            'Northumberland County','Philadelphia County',
                            'Potter County','Snyder County','Somerset County',
                            'Sullivan County','Warren County','Wayne County','Westmoreland County',
                            'Wyoming County','York County']
    county_19_dict['VA'] = list(statecountyimm.County[statecountyimm.State_init =='VA'])
    county_19_dict['WA'] = ['Snohomish County','Whatcom County']
    return county_19_dict

In [100]:
def to_tuples(dictionary):
    tups = []
    for k in dictionary.keys():
        counties = dictionary[k]
        for c in counties:
            tups.append((k,c))
    return tups

In [202]:
def graph_elex(cdf, state="Nationwide", year=2019, detained_num=0, save=False):
    
    df = pd.DataFrame(columns = cdf.columns)
    if year==2019:
        tups = to_tuples(get_2019_dict(cdf))
    elif year==2020:
        tups = to_tuples(get_2020_dict(cdf))

    for i in tups:
        df = pd.concat([df,cdf[
            (cdf.State_init==i[0]) & (cdf.County==i[1])
                   ]])
    
    df = df[df.total_detained>detained_num]
    if state is not "Nationwide":
        df = df[df.State_init==state]
    x = df.total_detained
    y = df.pct_ice
    colors = df.pct_clinton
    cm = plt.cm.get_cmap('RdBu')
    area= 400
#     area = (df.total_votes/df.total_detained)*10
    text = df['county_plus_init']

    plt.rc('font', weight='bold', family='sans-serif', size=12)


    fig,ax = plt.subplots(figsize=(10,10))
    sct = ax.scatter(x, y, linewidths=2, s=area, edgecolor='w',c=colors,cmap=cm)
#     plugins.connect(fig, plugins.PointLabelTooltip(fig))
    sct.set_alpha(0.75)

    labels = ['{}'.format(i) for i in text]
    label2 = ['{} detainees'.format(i) for i in df.total_detained]
    
#     tooltip = mpld3.plugins.PointLabelTooltip(sct, labels=zip(labels,label2))
#     mpld3.plugins.connect(fig, tooltip)

    
#     for labeli, xi, yi in zip(text, x, y):
#         ax.annotate(labeli,xy=(xi, yi))
            
            
    for labeli, xi, yi in zip(text, x, y):
        if xi>5000:
            ax.annotate(labeli,xy=(xi, yi))
        if yi>(.1*max(y)):
            ax.annotate(labeli,xy=(xi, yi))

    hfont = {'fontname':'DejaVu Sans'}

    
    ax.set_xlabel('Total Detained', fontsize=16,**hfont)
    ax.set_ylabel('Percentage of Detainees in ICE custody ',fontsize=16,**hfont)
    plt.title(f'Counties -- {state}',fontsize=22,**hfont)
    if state=="Nationwide":        
        plt.text(.5, .99,f'Elections in {year} and {detained_num}+ detentions',
            horizontalalignment='center',verticalalignment='center', transform=ax.transAxes, fontsize=16)
    else:
        plt.text(.5, .99,f'Elections in {year}',
            horizontalalignment='center',verticalalignment='center', transform=ax.transAxes, fontsize=16)
        
    # sct_html = fig_to_html(fig)
    if save==True:
        mpld3.save_html(fig,f'../findings/elex_19_20/{year}/elections_{year}_counties_{state}.html')
    return mpld3.display()

# graph_elex(cdf,detained_num=300, save=True)

### 2019 state-by-state

In [179]:
# Virginia
va = graph_elex(cdf, state="VA", year=2019, detained_num=1, save=True)
# Pennsylvania
pa = graph_elex(cdf, state="PA", year=2019, detained_num=0, save=True)
# New York
ny = graph_elex(cdf, state="NY", year=2019, detained_num=0, save=True)

# New Jersey
nj = graph_elex(cdf, state="NJ", year=2019, detained_num=0, save=True)


In [189]:
graph_elex(cdf, state="LA", year=2019, detained_num=0, save=True)

In [176]:
cdf[cdf.county_plus_init=='Rockland County, NY']

Unnamed: 0,FIPS,State,County,State_init,county_plus_init,total_detained,ICE_custody,No_ICE_custody,pct_ice,clinton,pct_clinton,trump,johnson,ind_cand,mcmullin,total_votes,turnout,people,tot_inc_amt,inc_per_person
1367,36087,New York,Rockland County,NY,"Rockland County, NY",1221,905,316,0.74,69342,0.51,60911,2284,4834,0,135087,0.5,270160,12353314,45.73


## 2020

In [190]:
cdf.head()

Unnamed: 0,FIPS,State,County,State_init,county_plus_init,total_detained,ICE_custody,No_ICE_custody,pct_ice,clinton,pct_clinton,trump,johnson,ind_cand,mcmullin,total_votes,turnout,people,tot_inc_amt,inc_per_person
0,1001,Alabama,Autauga County,AL,"Autauga County, AL",67,45,22,0.67,5936,0.24,18172,546,865,0,24973,0.6,41730,1250116,29.96
1,1003,Alabama,Baldwin County,AL,"Baldwin County, AL",738,401,337,0.54,18458,0.19,72883,2464,3874,0,95215,0.65,146550,5331680,36.38
2,1005,Alabama,Barbour County,AL,"Barbour County, AL",11,5,6,0.45,4871,0.47,5454,93,144,0,10469,0.6,17590,378961,21.54
3,1007,Alabama,Bibb County,AL,"Bibb County, AL",14,13,1,0.93,1874,0.21,6738,124,207,0,8819,0.62,14320,369326,25.79
4,1009,Alabama,Blount County,AL,"Blount County, AL",43,15,28,0.35,2156,0.08,22859,338,573,0,25588,0.66,38690,1043008,26.96


In [206]:
def get_2020_dict(df):
    county_20_dict = {}

    county_20_dict['TX'] = list(df.County[df.State_init =='TX'])
    county_20_dict['FL'] = [i for i in 
                        list(df.County[df.State_init =='FL']) 
                        if i not in ['Duval County']]
    county_20_dict['GA'] = list(df.County[df.State_init =='GA'])
    county_20_dict['NM'] = list(df.County[df.State_init =='NM'])
    county_20_dict['AZ'] = list(df.County[df.State_init =='AZ'])
    county_20_dict['MI'] = list(df.County[df.State_init =='MI'])
    county_20_dict['MA'] = list(df.County[df.State_init =='MA'])
    county_20_dict['OR'] = list(df.County[df.State_init =='OR'])

    
    return county_20_dict

In [207]:
graph_elex(cdf, state="Nationwide", year=2020, detained_num=1000, save=True)

In [209]:
for s in get_2020_dict(cdf).keys():
    graph_elex(cdf, state=s, year=2020, detained_num=0, save=True)

In [138]:
cdf.to_csv('../findings/elex_19_20/full_dataset.csv',index=False)