#### Script to Alter Risk Indicators and Geographies 
  

##### Requirements
The Script requires two forms of data:
 1. County level raw data with state abbreviation, 
 2. A decision matrix.  
 
 With these two forms of data, you can select your metrics, indicators, and key states.
 
##### Changing Parameters
 To use the function get_final_scores, you will:
 * update a list to include only states you care about
 * revise a dictionary where the KEY will be your category (or metric) and the value will be a TUPLE containing an abbreviation, and the list of the variable names to be included in your category.
 
 By feeding the risk indicators into a dictionary form and editing which states to include (key  states), the script will populate a CSV. 

In [84]:
import os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot  as plt
%matplotlib inline

In [85]:
path = os.getcwd() 
data_root = path + '\\DATA\\'
data_root

'C:\\Users\\Kaipable\\Desktop\\GitHub\\voter-protection-corps\\pollworkers_src\\DATA\\'

In [86]:
pd.set_option('display.max_rows', 50)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 10000)

In [87]:
#Read In Raw Data
all_df = pd.read_csv(data_root+'all_states_risk_data.csv')
all_df=all_df.replace([np.inf, -np.inf], np.nan)

In [88]:
# Read In Decision Matrix
actions = pd.read_csv('action_map.csv')
actions.columns
actions.head()

Unnamed: 0,Priority Population,Recruiting Difficulty,Demand Risk,Resource Supply,concat,Action
0,Low,Low,Low,Low,LowLowLowLow,Monitor
1,Low,Low,Low,Medium,LowLowLowMedium,Recruit
2,Low,Low,Low,High,LowLowLowHigh,Recruit
3,Low,Low,Medium,Low,LowLowMediumLow,Monitor
4,Low,Low,Medium,Medium,LowLowMediumMedium,Recruit


In [89]:
#Understand  all available variables
print(all_df.columns)
all_df.shape

Index(['Unnamed: 0', 'Full State', 'short_fips', 'State_Abbr', ' County Name', 'Non-white Residents', 'Senior Residents', 'Senior Poll Workers', 'Reported Difficulty ', 'In-Person Voters', 'Inactive Voters', 'Voters Per Location', 'Voters Per Machine', 'Voters Per Pollworker', 'Wait Times', 'Wait Count', '# of Same Day Locations', '# of Machines', '# of Poll Workers', '# of Senior Poll Workers', '# of Voters Removed', '# of Precincts', '#  Mail In Ballots', '% Early Vote', '%  Rejected Ballots', 'Median Age', 'Median Household Income', 'Total Population', 'Total Eligible Voters', '% Senior Poll Worker', '% Votes in Person', '% Inactive Voters', '%Nonwhite', '% Residents Over 60'], dtype='object')


(2831, 34)

### Customize Metrics and  Indicators
Define Your metrics, indicators, and abbreviations Below in metric_map

In [90]:
# Choose Metrics and Geographies   

#The key is the metric, and the indicators are what's included
metric_map = { # <--------
    'priority' : ('PRI',['Non-white Residents','Senior Residents']),
    'recruiting risk' : ('REC',['Senior Poll Workers','Reported Difficulty ']),
     'demand risk' : ('D',['In-Person Voters', 'Inactive Voters']),
    'resource supply' : ('S',['Voters Per Location','Voters Per Machine','Voters Per Pollworker','Wait Times']),
}

#Default is All States -- update list to include only relevant states using the 2 Letter state abbreviation
all_states = [state for state in all_df['State_Abbr'].unique()] #<----- default
key_states = ['TX','OH', 'AZ', 'PA', 'WI', 'NC','FL', 'MI']  #<----- customize this as function input

agg_method='median'
# After filling this out, simply run all chunks below

In [91]:
# Automatically populates based on the dictionary
the_metrics = [i for i in metric_map.keys()]
risk_indicators = [var for k in the_metrics for var in metric_map[k][1]]
agg_metrics = ('mean', 'median', 'max', 'min')
region_ids = ['Unnamed: 0', 'Full State', 'short_fips', 'State_Abbr', ' County Name']

In [92]:
#Generating these percentiles for plug and play
other_vars = [j for j in all_df.columns if (j not in risk_indicators) & (j not in region_ids)]
#other_vars

In [93]:
print('The metrics we are looking at are:', the_metrics)
print('The indicators that compose the metrics are:', risk_indicators)
print('The geographiesbeing looked at are:', key_states)

The metrics we are looking at are: ['priority', 'recruiting risk', 'demand risk', 'resource supply']
The indicators that compose the metrics are: ['Non-white Residents', 'Senior Residents', 'Senior Poll Workers', 'Reported Difficulty ', 'In-Person Voters', 'Inactive Voters', 'Voters Per Location', 'Voters Per Machine', 'Voters Per Pollworker', 'Wait Times']
The geographiesbeing looked at are: ['TX', 'OH', 'AZ', 'PA', 'WI', 'NC', 'FL', 'MI']


In [110]:
def get_percentiles(df):
    df = df[df['State_Abbr'].isin(key_states)]
    for i in metric_map.keys():
        for j in metric_map[i][1]:
            keep_series=df.groupby("State_Abbr")[j].count() >0
            viable_states = keep_series[keep_series==True].index
            temp_df = df[df['State_Abbr'].isin(viable_states)]
            intermed= temp_df.groupby("State_Abbr")[j].rank(pct=True, na_option="keep")
            df["("+metric_map[i][0]+") "+j] = intermed
    for other_var in other_vars:
        keep_series=df.groupby("State_Abbr")[other_var].count() >0
        viable_states = keep_series[keep_series==True].index
        temp_df = df[df['State_Abbr'].isin(viable_states)]
        intermed= temp_df.groupby("State_Abbr")[other_var].rank(pct=True, na_option="keep")
        df["(O) "+other_var.title()] = intermed
    return df
        
def get_agg_columns(df, agg_method="median"):
    funct_map = {'median': np.nanmedian,
            'mean': np.nanmean, 
            'min': np.min,
            'max': np.max}
    for metric in metric_map.keys():
        percent_list = ["("+metric_map[metric][0]+") " + j for j in metric_map[metric][1]] 
        df[metric.title()+" ("+agg_method.title()+")"] = funct_map[agg_method](df[percent_list], axis=1)
        if agg_method not in funct_map.keys():
            print('Please select an acceptable metric: "min", "max", "mean", or "median".')
    return df

def get_final_scores(df, agg_method='median'):
    for i in metric_map.keys():
        keep_series=df.groupby("State_Abbr")[i.title()+" ("+agg_method.title()+")"].count() >0
        viable_states = keep_series[keep_series==True].index
        temp_df = df[df['State_Abbr'].isin(viable_states)]
        rel_scores = temp_df.groupby("State_Abbr")[i.title()+" ("+agg_method.title()+")"].rank(pct=True, na_option="keep")
        rel_heat = rel_scores.apply(lambda x: "High" if x > 0.75  else ("Low" if x <0.5 else (np.nan if np.isnan(x) else "Medium")))
        df[i.title()+" (Final)"] = rel_scores
        df[i.title()+" (LMH)"]= rel_heat
    return df

def get_final_df(df, action_df, metric_map, agg_method):
    agg_method = agg_method
    func_list = [get_percentiles, get_agg_columns, get_final_scores]
    for i in func_list:
        df = i(df)
    final_metrics = [metric for metric in df.columns if "(LMH)" in metric]
    df[final_metrics] = df[final_metrics].fillna("Unknown")
    df['concat']=  df[final_metrics].agg("".join, axis=1)
    final_df = pd.merge(df,action_df[['concat', 'Action']], on='concat', how='left')
    return final_df


In [112]:
scored_df = get_final_df(all_df, actions, metric_map, 'median')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/panda

In [None]:
#Test with Arizona
scored_df[scored_df['State_Abbr']=="AZ"]

The End :)

In [None]:
# Write Output
final_df.to_csv('all_states_risk_scores.csv')
final_df[final_df['State_Abbr'].isin(key_states)].to_csv('key_states_risk_scores.csv')

In [114]:
# #TEST STEP CHUNK
# a = get_percentiles(all_df)
# a.head()

# b=get_agg_columns(a)
# b.head()

# c=get_final_scores(b)
# c.head()

# final_metrics = [metric for metric in c.columns if "(LMH)" in metric]
# c[final_metrics] = c[final_metrics].fillna("Unknown")
# c['concat']=  c[final_metrics].agg("".join, axis=1)
# final_df = pd.merge(c,actions[['concat', 'Action']], on='concat', how='left')

In [101]:
final_df.head()

Unnamed: 0.1,Unnamed: 0,Full State,short_fips,State_Abbr,County Name,Non-white Residents,Senior Residents,Senior Poll Workers,Reported Difficulty,In-Person Voters,Inactive Voters,Voters Per Location,Voters Per Machine,Voters Per Pollworker,Wait Times,Wait Count,# of Same Day Locations,# of Machines,# of Poll Workers,# of Senior Poll Workers,# of Voters Removed,# of Precincts,# Mail In Ballots,% Early Vote,% Rejected Ballots,Median Age,Median Household Income,Total Population,Total Eligible Voters,% Senior Poll Worker,% Votes in Person,% Inactive Voters,%Nonwhite,% Residents Over 60,(PRI) Non-white Residents,(PRI) Senior Residents,(REC) Senior Poll Workers,(REC) Reported Difficulty,(D) In-Person Voters,(D) Inactive Voters,(S) Voters Per Location,(S) Voters Per Machine,(S) Voters Per Pollworker,(S) Wait Times,(O) Wait Count,(O) # Of Same Day Locations,(O) # Of Machines,(O) # Of Poll Workers,(O) # Of Senior Poll Workers,(O) # Of Voters Removed,(O) # Of Precincts,(O) # Mail In Ballots,(O) % Early Vote,(O) % Rejected Ballots,(O) Median Age,(O) Median Household Income,(O) Total Population,(O) Total Eligible Voters,(O) % Senior Poll Worker,(O) % Votes In Person,(O) % Inactive Voters,(O) %Nonwhite,(O) % Residents Over 60,Priority (Median),Recruiting Risk (Median),Demand Risk (Median),Resource Supply (Median),Priority (Final),Priority (LMH),Recruiting Risk (Final),Recruiting Risk (LMH),Demand Risk (Final),Demand Risk (LMH),Resource Supply (Final),Resource Supply (LMH),concat,Action
0,69,ARIZONA,4001,AZ,APACHE COUNTY,55160.0,14758.0,99.0,4.0,13440.0,4971.0,1306.666667,1193.043478,197.410072,,,42.0,46.0,278.0,99.0,4930.0,44.0,-88.0,0.034939,0.01712,34.7,32963.0,71522.0,54880.0,0.356115,0.525842,0.090579,0.771231,0.206342,0.733333,0.333333,0.4,0.466667,0.533333,0.333333,0.2,0.4,0.133333,,,0.666667,0.533333,0.6,0.4,0.4,0.566667,0.066667,0.666667,0.8,0.333333,0.066667,0.4,0.4,0.2,1.0,0.133333,1.0,0.333333,0.533333,0.433333,0.433333,0.2,0.466667,Low,0.333333,Low,0.4,Low,0.166667,Low,LowLowLowLow,Monitor
1,70,ARIZONA,4003,AZ,COCHISE COUNTY,19036.0,36129.0,77.0,4.0,8591.0,16381.0,5131.470588,545.21875,510.146199,,,17.0,160.0,171.0,77.0,8539.0,49.0,34144.0,0.015873,0.016885,40.6,48649.0,126279.0,87235.0,0.450292,0.187058,0.18778,0.150746,0.286105,0.466667,0.6,0.333333,0.466667,0.4,0.6,0.8,0.133333,0.8,,,0.433333,0.866667,0.466667,0.333333,0.6,0.733333,0.6,0.2,0.733333,0.733333,0.533333,0.533333,0.533333,0.266667,0.333333,0.8,0.333333,0.733333,0.533333,0.4,0.5,0.8,0.466667,Low,0.266667,Low,0.466667,Low,0.8,High,LowLowLowHigh,Recruit
2,71,ARIZONA,4005,AZ,COCONINO COUNTY,50715.0,24202.0,161.0,4.0,15391.0,15500.0,1795.254545,836.771186,281.307692,,,55.0,118.0,351.0,161.0,15114.0,71.0,37112.0,0.03278,0.00527,30.6,57616.0,140217.0,98739.0,0.458689,0.275095,0.15698,0.361689,0.172604,0.6,0.466667,0.566667,0.466667,0.666667,0.533333,0.333333,0.266667,0.333333,,,0.733333,0.8,0.8,0.566667,0.733333,0.8,0.666667,0.6,0.533333,0.066667,0.866667,0.6,0.6,0.333333,0.666667,0.666667,0.866667,0.066667,0.533333,0.516667,0.6,0.333333,0.466667,Low,0.533333,Medium,0.6,Medium,0.266667,Low,LowMediumMediumLow,Monitor
3,72,ARIZONA,4007,AZ,GILA COUNTY,12305.0,19272.0,161.0,5.0,3635.0,7433.0,2178.470588,1157.3125,356.096154,,,17.0,32.0,104.0,161.0,4539.0,39.0,17144.0,0.037311,0.025941,49.7,42092.0,53400.0,37034.0,1.548077,0.17455,0.200707,0.230431,0.360899,0.333333,0.4,0.566667,0.9,0.333333,0.4,0.466667,0.333333,0.6,,,0.433333,0.333333,0.333333,0.566667,0.333333,0.466667,0.4,0.733333,0.933333,0.8,0.333333,0.333333,0.333333,1.0,0.2,0.866667,0.6,0.8,0.366667,0.733333,0.366667,0.466667,0.333333,Low,0.933333,High,0.333333,Low,0.4,Low,LowHighLowLow,Monitor
4,73,ARIZONA,4009,AZ,GRAHAM COUNTY,7962.0,7083.0,25.0,3.0,3469.0,3250.0,2350.666667,1511.142857,480.818182,,,9.0,14.0,44.0,25.0,2903.0,22.0,7204.0,0.097627,0.015295,33.3,51352.0,37879.0,21156.0,0.568182,0.321621,0.153621,0.210196,0.18699,0.266667,0.133333,0.133333,0.133333,0.266667,0.266667,0.6,0.533333,0.733333,,,0.233333,0.1,0.133333,0.133333,0.266667,0.266667,0.266667,0.866667,0.666667,0.133333,0.733333,0.2,0.2,0.466667,0.8,0.6,0.466667,0.2,0.2,0.133333,0.266667,0.6,0.2,Low,0.066667,Low,0.266667,Low,0.533333,Medium,LowLowLowMedium,Recruit
