## Post Processing from scripts/run_EI.R output
This notebook will turn a prec_means csv into a csv that contains estimated counts for each of the precincts, that can then be compared against other prec_means csv's made from other EI runs. These counts csv's will be stored in the **../counts** folder

In [1]:
import pandas as pd
import numpy as np
import itertools
# pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 100)

In [2]:
# Path names: (eventually, abstract out to run #...)
cvap_path = "../resources/TX_cvap_for_EI.csv"
EI_output_path = "../EI_outputs/run_2/12G_President_prec_means.csv"
counts_path = "../counts/gabe_EI_2.csv"

In [3]:
# get CVAPs for each precinct
CD = "CD"
SD = "sldu172"
HD = "sldl358"
cvap_df = pd.read_csv(cvap_path)
cvap_cols = list(filter(lambda x:'CVAP_2012' in x, cvap_df.columns))
cvap_cols += ["X12G_President_CVAP", CD, SD, HD]
cvap_df = cvap_df[cvap_cols]
cvap_df.iloc[0:3]

Unnamed: 0,CVAP_2012,BCVAP_2012,WCVAP_2012,HCVAP_2012,OCVAP_2012,X12G_President_CVAP,CD,sldu172,sldl358
0,1858,281,1439,41,97,0,4,3,8
1,2995,237,2534,199,25,0,4,5,57
2,1225,54,1121,34,16,0,4,3,8


In [4]:
# parse and format prec_means from EI output
prec_means = pd.read_csv(EI_output_path)

# think about order here....(just for ease of comparison)
candidates = "ObamaD_12G_President RomneyR_12G_President X12G_President_abstain".split()
categories = "BCVAP_2012 HCVAP_2012 OCVAP_2012 WCVAP_2012 X12G_President_CVAP".split() # CHANGE WHEN COMBINING W,O
cols = []

print("Initializing df with categories for each candidate...")
for category in categories:
    for candidate in candidates:
        cols.append(category + "." + candidate)
zeros = np.zeros(shape=(len(cvap_df), len(cols)))
df = pd.DataFrame(zeros, columns=cols)

print(f'Populating df with data from {EI_output_path}...')
for idx, row in prec_means.iterrows():
    colList = row[0].split(".") # separate sections of the key
    col = colList[1] + "." + colList[2] # remove head 'beta.' and trailing '.#'
    i = int(colList[3])
    val = row[1]
    df[col][i-1] = val
print(f'Populated df with {len(df)} precincts of EI proportions')
df.iloc[0:3]

Initializing df with categories for each candidate...
Populating df with data from ../EI_outputs/run_2/12G_President_prec_means.csv...
Populated df with 9082 precincts of EI proportions


Unnamed: 0,BCVAP_2012.ObamaD_12G_President,BCVAP_2012.RomneyR_12G_President,BCVAP_2012.X12G_President_abstain,HCVAP_2012.ObamaD_12G_President,HCVAP_2012.RomneyR_12G_President,HCVAP_2012.X12G_President_abstain,OCVAP_2012.ObamaD_12G_President,OCVAP_2012.RomneyR_12G_President,OCVAP_2012.X12G_President_abstain,WCVAP_2012.ObamaD_12G_President,WCVAP_2012.RomneyR_12G_President,WCVAP_2012.X12G_President_abstain,X12G_President_CVAP.ObamaD_12G_President,X12G_President_CVAP.RomneyR_12G_President,X12G_President_CVAP.X12G_President_abstain
0,0.429034,0.105166,0.465801,0.295512,0.091056,0.613433,0.51698,0.198533,0.284487,0.054567,0.48749,0.457943,0.391665,0.407353,0.200982
1,0.410541,0.105108,0.484352,0.277755,0.094308,0.627937,0.489148,0.204211,0.306641,0.025169,0.446294,0.528537,0.402672,0.397815,0.199512
2,0.471169,0.098247,0.430584,0.330446,0.082936,0.586619,0.52075,0.193919,0.285331,0.055866,0.570238,0.373896,0.3855,0.425355,0.189144


In [5]:
# Multiplying EI proportions by CVAP for each precinct
if df[df.columns[0]][0] < 1:    
    for i in range(len(df)):
        for col in cols:
            CVAP_TOT = col.split(".")[0]
            df[col][i] = df[col][i] * cvap_df[CVAP_TOT][i]
else:
    print("Error: EI data are not proportions (have you already run this step?)")
df.iloc[0:3]

Unnamed: 0,BCVAP_2012.ObamaD_12G_President,BCVAP_2012.RomneyR_12G_President,BCVAP_2012.X12G_President_abstain,HCVAP_2012.ObamaD_12G_President,HCVAP_2012.RomneyR_12G_President,HCVAP_2012.X12G_President_abstain,OCVAP_2012.ObamaD_12G_President,OCVAP_2012.RomneyR_12G_President,OCVAP_2012.X12G_President_abstain,WCVAP_2012.ObamaD_12G_President,WCVAP_2012.RomneyR_12G_President,WCVAP_2012.X12G_President_abstain,X12G_President_CVAP.ObamaD_12G_President,X12G_President_CVAP.RomneyR_12G_President,X12G_President_CVAP.X12G_President_abstain
0,120.558469,29.551583,130.889949,12.115974,3.733289,25.150737,50.147089,19.257674,27.595237,78.521466,701.498298,658.980236,0.0,0.0,0.0
1,97.298122,24.910494,114.791384,55.273221,18.767312,124.959467,12.228693,5.105279,7.666028,63.779035,1130.907921,1339.313044,0.0,0.0,0.0
2,25.443132,5.305355,23.251514,11.235151,2.819808,19.945041,8.332,3.102701,4.565299,62.625348,639.237338,419.137314,0.0,0.0,0.0


In [6]:
# Add assignments, save csv
for level in [CD, SD, HD]:
    df[level] = cvap_df[level]
df.to_csv(counts_path, index=False)