### Script to Alter Risk Indicators and Geographies


The Script requires two forms of data:
 1.) County Level Raw Data with a 2 Letter State Abbreviation, and
 2.) A Decision Matrix 
 
 To use the function get_final_scores, you will:
 * update a list to include only states you care about
 * revise a dictionary where the KEY will be your category (or metric) and the value will be a TUPLE containing an abbreviation, and the list of the variable names to be included in your category.
 
 With these two forms of data, you can select your metrics, indicators, and key states. By feeding the risk indicators into a dictionary form and editing which states to include (key  states), the script will populate a CSV. 

In [673]:
import os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot  as plt
%matplotlib inline

In [674]:
path = os.getcwd() 
data_root = path + '\\DATA\\'
data_root

'C:\\Users\\Kaipable\\Desktop\\GitHub\\voter-protection-corps\\pollworkers_src\\DATA\\'

In [675]:
pd.set_option('display.max_rows', 50)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 10000)

In [676]:
#Read In Raw Data
all_df = pd.read_csv(data_root+'all_states_risk_data.csv')

In [None]:
# Read In Decision Matrix
actions = pd.read_csv('action_map.csv')
actions.columns
actions.head()

In [677]:
#Understand  all available variables
print(all_df.columns)
all_df.shape

Index(['Unnamed: 0', 'Full State', 'short_fips', 'State_Abbr', ' County Name', 'Non-white Residents', 'Senior Residents', 'Senior Poll Workers', 'Reported Difficulty ', 'In-Person Voters', 'Inactive Voters', 'Voters Per Location', 'Voters Per Machine', 'Voters Per Pollworker', 'Wait Times', 'Wait Count', '# of Same Day Locations', '# of Machines', '# of Poll Workers', '# of Senior Poll Workers', '# of Voters Removed', '# of Precincts', '#  Mail In Ballots', '% Early Vote', '%  Rejected Ballots', 'Median Age', 'Median Household Income', 'Total Population', 'Total Eligible Voters', '% Senior Poll Worker', '% Votes in Person', '% Inactive Voters', '%Nonwhite', '% Residents Over 60'], dtype='object')


(2760, 34)

### Customize Metrics and  Indicators
Define Your metrics, indicators, and abbreviations Below in metric_map

In [694]:
# Choose Metrics and Geographies   

#The key is the metric, and the indicators are what's included
metric_map = { # <--------
    'priority' : ('PRI',['Non-white Residents','Senior Residents']),
    'recruiting risk' : ('REC',['Senior Poll Workers','Reported Difficulty ']),
     'demand risk' : ('D',['In-Person Voters', 'Inactive Voters']),
    'resource supply' : ('S',['Voters Per Location','Voters Per Machine','Voters Per Pollworker','Wait Times']),
}

#Default is All States -- update list to include only relevant states using the 2 Letter state abbreviation
key_states = [state for state in all_df['State_Abbr'].unique()] #<-------

# After filling this out, simply run all chunks below

In [679]:
the_metrics = [i for i in metric_map.keys()]
risk_indicators = [var for k in the_metrics for var in metric_map[k][1]]
agg_metrics = ('mean', 'median', 'max', 'min')
region_ids = ['Unnamed: 0', 'Full State', 'short_fips', 'State_Abbr', ' County Name']
other_vars = [j for j in all_df.columns if (j not in risk_indicators) & (j not in region_ids)]

In [698]:
print('The metrics we are looking at are:', the_metrics)
print('The indicators that compose the metrics are:', risk_indicators)
print('The geographiesbeing looked at are:', key_states)

The metrics we are looking at are: ['priority', 'recruiting risk', 'demand risk', 'resource supply']
The indicators that compose the metrics are: ['Non-white Residents', 'Senior Residents', 'Senior Poll Workers', 'Reported Difficulty ', 'In-Person Voters', 'Inactive Voters', 'Voters Per Location', 'Voters Per Machine', 'Voters Per Pollworker', 'Wait Times']
The geographiesbeing looked at are: ['AL', 'AS', 'AZ', 'AR', 'CA', 'CO', 'DE', 'DC', 'FL', 'GA', 'GU', 'HI', 'ID', 'IN', 'IA', 'KS', 'KY', 'LA', 'MD', 'MI', 'MN', 'MS', 'MT', 'NE', 'NV', 'NJ', 'NM', 'NY', 'NC', 'ND', 'OH', 'OK', 'OR', 'PA', 'SC', 'SD', 'TN', 'TX', 'UT', 'VI', 'VA', 'WA', 'WV', 'WY']


In [691]:
def get_percentiles(df):
    df = df[df['State_Abbr'].isin(key_states)]
    for i in metric_map.keys():
        for j in metric_map[i][1]:
            keep_series=df.groupby("State_Abbr")[j].count() >0
            viable_states = keep_series[keep_series==True].index
            temp_df = df[df['State_Abbr'].isin(viable_states)]
            intermed= temp_df.groupby("State_Abbr")[j].rank(pct=True, na_option="keep")
            df["("+metric_map[i][0]+") "+j] = intermed
    for other_var in other_vars:
        keep_series=df.groupby("State_Abbr")[other_var].count() >0
        viable_states = keep_series[keep_series==True].index
        temp_df = df[df['State_Abbr'].isin(viable_states)]
        intermed= temp_df.groupby("State_Abbr")[other_var].rank(pct=True, na_option="keep")
        df["(O) "+other_var.title()] = intermed
        return df
        
def get_agg_columns(df):
    funct_map = {'median': np.nanmedian,
            'mean': np.nanmean, 
            'min': np.min,
            'max': np.max}
    for metric in metric_map.keys():
        percent_list = ["("+metric_map[metric][0]+") " + j for j in metric_map[metric][1]] 
        df[metric.title()+" ("+agg_method.title()+")"] = funct_map[agg_method](df[percent_list], axis=1)
        if agg_method not in funct_map.keys():
            print('Please select an acceptable metric: "min", "max", "mean", or "median".')
    return df

def get_final_scores(df):
    for i in metric_map.keys():
        keep_series=df.groupby("State_Abbr")[i.title()+" ("+agg_method.title()+")"].count() >0
        viable_states = keep_series[keep_series==True].index
        temp_df = df[df['State_Abbr'].isin(viable_states)]
        rel_scores = temp_df.groupby("State_Abbr")[i.title()+" ("+agg_method.title()+")"].rank(pct=True, na_option="keep")
        rel_heat = rel_scores.apply(lambda x: "High" if x > 0.75  else ("Low" if x <0.5 else (np.nan if np.isnan(x) else "Medium")))
        df[i.title()+" (Final)"] = rel_scores
        df[i.title()+" (LMH)"]= rel_heat
    return df

def get_final_df(df, action_df, metric_map, agg_method="median"):
    agg_method = agg_method
    func_list = [get_percentiles, get_agg_columns, get_final_scores]
    for i in func_list:
        df = i(df)
    final_metrics = [metric for metric in df.columns if "(LMH)" in metric]
    df[final_metrics] = df[final_metrics].fillna("Unknown")
    df['concat']=  all_df[final_metrics].agg("".join, axis=1)
    final_df = pd.merge(df,action_df[['concat', 'Action']], on='concat', how='left')
    return final_df


In [692]:
scored_df = get_final_df(all_df, actions, 'median', metric_map)

In [693]:
#Test with Arizona
scored_df[scored_df['State_Abbr']=="AZ"]

Unnamed: 0.1,Unnamed: 0,Full State,short_fips,State_Abbr,County Name,Non-white Residents,Senior Residents,Senior Poll Workers,Reported Difficulty,In-Person Voters,Inactive Voters,Voters Per Location,Voters Per Machine,Voters Per Pollworker,Wait Times,Wait Count,# of Same Day Locations,# of Machines,# of Poll Workers,# of Senior Poll Workers,# of Voters Removed,# of Precincts,# Mail In Ballots,% Early Vote,% Rejected Ballots,Median Age,Median Household Income,Total Population,Total Eligible Voters,% Senior Poll Worker,% Votes in Person,% Inactive Voters,%Nonwhite,% Residents Over 60,(PRI) Non-white Residents,(PRI) Senior Residents,(REC) Senior Poll Workers,(REC) Reported Difficulty,(D) In-Person Voters,(D) Inactive Voters,(S) Voters Per Location,(S) Voters Per Machine,(S) Voters Per Pollworker,(S) Wait Times,(O) Wait Count,Priority (Median),Recruiting Risk (Median),Demand Risk (Median),Resource Supply (Median),Priority (Final),Priority (LMH),Recruiting Risk (Final),Recruiting Risk (LMH),Demand Risk (Final),Demand Risk (LMH),Resource Supply (Final),Resource Supply (LMH),concat,Action
68,69,ARIZONA,4001,AZ,APACHE COUNTY,55160.0,14758.0,99.0,4.0,13440.0,4971.0,1306.666667,1193.043478,197.410072,,,42.0,46.0,278.0,99.0,4930.0,44.0,-88.0,0.034939,0.01712,34.7,32963.0,71522.0,54880.0,0.356115,0.525842,0.090579,0.771231,0.206342,0.733333,0.333333,0.4,0.466667,0.533333,0.333333,0.2,0.4,0.133333,,,0.533333,0.433333,0.433333,0.2,0.466667,Low,0.333333,Low,0.4,Low,0.166667,Low,LowLowLowLow,Monitor
69,70,ARIZONA,4003,AZ,COCHISE COUNTY,19036.0,36129.0,77.0,4.0,8591.0,16381.0,5131.470588,545.21875,510.146199,,,17.0,160.0,171.0,77.0,8539.0,49.0,34144.0,0.015873,0.016885,40.6,48649.0,126279.0,87235.0,0.450292,0.187058,0.18778,0.150746,0.286105,0.466667,0.6,0.333333,0.466667,0.4,0.6,0.8,0.133333,0.8,,,0.533333,0.4,0.5,0.8,0.466667,Low,0.266667,Low,0.466667,Low,0.8,High,LowLowLowHigh,Recruit
70,71,ARIZONA,4005,AZ,COCONINO COUNTY,50715.0,24202.0,161.0,4.0,15391.0,15500.0,1795.254545,836.771186,281.307692,,,55.0,118.0,351.0,161.0,15114.0,71.0,37112.0,0.03278,0.00527,30.6,57616.0,140217.0,98739.0,0.458689,0.275095,0.15698,0.361689,0.172604,0.6,0.466667,0.566667,0.466667,0.666667,0.533333,0.333333,0.266667,0.333333,,,0.533333,0.516667,0.6,0.333333,0.466667,Low,0.533333,Medium,0.6,Medium,0.266667,Low,LowMediumMediumLow,Monitor
71,72,ARIZONA,4007,AZ,GILA COUNTY,12305.0,19272.0,161.0,5.0,3635.0,7433.0,2178.470588,1157.3125,356.096154,,,17.0,32.0,104.0,161.0,4539.0,39.0,17144.0,0.037311,0.025941,49.7,42092.0,53400.0,37034.0,1.548077,0.17455,0.200707,0.230431,0.360899,0.333333,0.4,0.566667,0.9,0.333333,0.4,0.466667,0.333333,0.6,,,0.366667,0.733333,0.366667,0.466667,0.333333,Low,0.933333,High,0.333333,Low,0.4,Low,LowHighLowLow,Monitor
72,73,ARIZONA,4009,AZ,GRAHAM COUNTY,7962.0,7083.0,25.0,3.0,3469.0,3250.0,2350.666667,1511.142857,480.818182,,,9.0,14.0,44.0,25.0,2903.0,22.0,7204.0,0.097627,0.015295,33.3,51352.0,37879.0,21156.0,0.568182,0.321621,0.153621,0.210196,0.18699,0.266667,0.133333,0.133333,0.133333,0.266667,0.266667,0.6,0.533333,0.733333,,,0.2,0.133333,0.266667,0.6,0.2,Low,0.066667,Low,0.266667,Low,0.533333,Medium,LowLowLowMedium,Recruit
73,74,ARIZONA,4011,AZ,GREENLEE COUNTY,862.0,1698.0,0.0,5.0,609.0,510.0,1009.0,210.208333,180.178571,,,5.0,24.0,28.0,0.0,872.0,8.0,1905.0,0.732205,0.031195,34.2,60962.0,9504.0,5045.0,0.0,0.234321,0.10109,0.090699,0.178662,0.066667,0.066667,0.066667,0.9,0.066667,0.066667,0.133333,0.066667,0.066667,,,0.066667,0.483333,0.066667,0.066667,0.066667,Low,0.4,Low,0.066667,Low,0.066667,Low,LowLowLowLow,Monitor
74,75,ARIZONA,4012,AZ,LA PAZ COUNTY,4787.0,9337.0,40.0,4.0,1729.0,1966.0,1501.75,750.875,203.627119,,,8.0,16.0,59.0,40.0,1678.0,11.0,3367.0,0.029822,0.004959,56.0,33333.0,20701.0,12014.0,0.677966,0.334818,0.163642,0.231245,0.451041,0.133333,0.2,0.233333,0.466667,0.133333,0.133333,0.266667,0.2,0.2,,,0.166667,0.35,0.133333,0.2,0.133333,Low,0.166667,Low,0.133333,Low,0.166667,Low,LowLowLowLow,Monitor
75,76,ARIZONA,4013,AZ,MARICOPA COUNTY,952099.0,844570.0,1848.0,5.0,262283.0,302686.0,5176.684211,2557.282,909.741017,,,494.0,1000.0,2811.0,1848.0,223369.0,748.0,1181093.0,0.017545,0.002189,36.1,61606.0,4253913.0,2557282.0,0.611718,0.180374,0.118362,0.223817,0.19854,1.0,1.0,1.0,0.9,1.0,1.0,0.866667,0.8,0.933333,,,1.0,0.95,1.0,0.866667,1.0,High,1.0,High,1.0,High,0.866667,High,HighHighHighHigh,Recruit Immediately
76,77,ARIZONA,4015,AZ,MOHAVE COUNTY,19554.0,76142.0,245.0,4.0,21375.0,35602.0,7.108678,3376.622222,448.224189,,,21375.0,45.0,339.0,245.0,13761.0,24.0,50131.0,0.050906,0.005352,50.9,43266.0,206064.0,151948.0,0.698006,0.294645,0.234304,0.094893,0.369507,0.533333,0.733333,0.733333,0.466667,0.8,0.866667,0.066667,0.933333,0.666667,,,0.633333,0.6,0.833333,0.666667,0.666667,Medium,0.666667,Medium,0.866667,High,0.633333,Medium,MediumMediumHighMedium,Recruit Immediately
77,78,ARIZONA,4017,AZ,NAVAJO COUNTY,56771.0,25215.0,132.0,5.0,15134.0,10427.0,1874.307692,1198.327869,261.064286,,,39.0,61.0,280.0,132.0,7209.0,14.0,21643.0,0.023082,0.000649,36.4,40054.0,108705.0,73098.0,0.464789,0.407134,0.142644,0.522248,0.231958,0.8,0.533333,0.466667,0.9,0.6,0.466667,0.4,0.466667,0.266667,,,0.666667,0.683333,0.533333,0.4,0.8,High,0.866667,High,0.533333,Medium,0.333333,Low,HighHighMediumLow,Recruit Immediately


Fill out decision matrix in csv to read in here as "action_map.csv"

In [None]:
final_df.to_csv('all_states_risk_scores.csv')