# Step 1: Match voter file counts to ACS CVAP Counts

First, establish the "count" of registered voters in a given area.  I do this by narrowing to active voters only and those that are currently living at their address (per TargetSmart data).

In [1]:
#import dependencies
import pandas as pd 
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

In [2]:
# Load .csv files- voter file data and acs data
# Unzip BlockGr file first
acs_data = "Resources/SLDU.csv"
vf_data = "Resources/az_statewide/az_statewide.csv"

# convert .csv data to Pandas dataframe
az_df = pd.read_csv(vf_data, encoding = "ISO-8859-1", dtype={'reg_census_id': object, 'vf_hd': object})
acs_cvap = pd.read_csv(acs_data, encoding = "ISO-8859-1")

In [3]:
az_df_count = az_df['voterbase_id'].count()
az_df_count

4254481

In [4]:
#Building our list of current voters. First, narrow to just active voters
reg_voters = az_df.loc[az_df["vf_voter_status"] == "Active", :]
reg_voters_count = reg_voters['voterbase_id'].count()
reg_voters_count

3770894

In [5]:
#Then, narrow to just those voters with a street level geocode
geo_voters = reg_voters.loc[reg_voters["reg_level"] == "Street", :]
geo_voters_count = geo_voters['voterbase_id'].count()
geo_voters_count

3632837

In [6]:
#Then, remove voters who are no longer living at their current address (according to targetsmart data)
#This will be the list the count of voters we compare to ACS CVAP data
vf_df = geo_voters.loc[geo_voters["voterbase_mover_status"] == "Resides at Current Address", :]
vf_count = vf_df['voterbase_id'].count()
vf_count

3141632

In [7]:
# convert reg_census_id to string, create new census block group column and state fips column from reg_census_id
vf_df.reg_census_id = vf_df.reg_census_id.astype(str)
vf_df['block_group'] = vf_df.reg_census_id.str[:12]
vf_df['state_fips'] = vf_df.reg_census_id.str[:2]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[name] = value
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.


In [8]:
# insert check to make sure just records with An Arizona census block are making it through (AZ FIPS code = 04)
az_df = vf_df.loc[vf_df["state_fips"] == "04", :]
az_count = az_df['voterbase_id'].count()
az_count

3141630

In [10]:
az_df.head()

Unnamed: 0,voterbase_id,vf_voter_status,vf_county_name,vf_hd,reg_level,reg_census_id,voterbase_mover_status,tsmr_race,block_group,state_fips
4887,AZ-000000562269,Active,MARICOPA,27,Street,40131149001045,Resides at Current Address,Hispanic,40131149001,4
4889,AZ-6734255,Active,MARICOPA,27,Street,40131142001057,Resides at Current Address,Hispanic,40131142001,4
4891,AZ-000000554541,Active,MARICOPA,27,Street,40131142001053,Resides at Current Address,Hispanic,40131142001,4
4893,AZ-6941812,Active,MARICOPA,24,Street,40131131001008,Resides at Current Address,Caucasian,40131131001,4
4894,AZ-5340916,Active,MARICOPA,24,Street,40131131001001,Resides at Current Address,Caucasian,40131131001,4


In [11]:
# Registered voters by leg district
ld_counts = az_df["vf_hd"].value_counts()
reg_table = pd.DataFrame({"reg_count": ld_counts,
                                 })
reg_table.head(30)

Unnamed: 0,reg_count
12,140710
23,138009
22,134009
1,131697
18,126867
15,124272
17,123756
11,123712
28,117078
6,115273


In [29]:
az_cvap = acs_cvap[acs_cvap['GEONAME'].str.contains("Arizona")]
az_cvap['LD'] = az_cvap['GEOID'].str[9:]
az_cvap = az_cvap.rename(columns={'CVAP_EST': 'CVAP_estimate',
                                   'CVAP_MOE': 'CVAP_margin_of_error',
                                   })
az_cvap = az_cvap.loc[az_cvap["LNTITLE"] == "Total", :]
az_cvap

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


Unnamed: 0,GEONAME,LNTITLE,GEOID,LNNUMBER,TOT_EST,TOT_MOE,ADU_EST,ADU_MOE,CIT_EST,CIT_MOE,CVAP_estimate,CVAP_margin_of_error,LD
715,"State Senate District 1 (2016), Arizona",Total,61000US04001,1,226955,2669,183385,2092,220565,2802,177575,2243,1
728,"State Senate District 2 (2016), Arizona",Total,61000US04002,1,209660,2300,155995,1633,185500,2268,133670,1632,2
741,"State Senate District 3 (2016), Arizona",Total,61000US04003,1,210480,3519,164670,2561,189510,3451,145110,2615,3
754,"State Senate District 4 (2016), Arizona",Total,61000US04004,1,215680,2917,155620,2263,186185,2929,128470,2268,4
767,"State Senate District 5 (2016), Arizona",Total,61000US04005,1,223450,295,183290,232,214780,1083,175075,1034,5
780,"State Senate District 6 (2016), Arizona",Total,61000US04006,1,220045,1937,180910,1706,213205,1963,174330,1809,6
793,"State Senate District 7 (2016), Arizona",Total,61000US04007,1,205145,1525,144800,1200,202245,1553,142200,1194,7
806,"State Senate District 8 (2016), Arizona",Total,61000US04008,1,219725,2717,168095,2023,205200,2766,154105,2069,8
819,"State Senate District 9 (2016), Arizona",Total,61000US04009,1,211470,2934,172975,2053,198420,2747,161040,1994,9
832,"State Senate District 10 (2016), Arizona",Total,61000US04010,1,214185,2844,171010,2117,204800,2818,162330,2077,10


In [37]:
#az_cvap = az_cvap.drop(columns=['GEONAME', 'LNTITLE', 'GEOID', 'CIT_EST', 'CIT_MOE', 'TOT_EST', 'ADU_EST', 'ADU_MOE' ])
az_cvap = az_cvap[["LD","CVAP_estimate"]]
az_cvap.set_index('LD')
az_cvap.head()

Unnamed: 0,LD,CVAP_estimate
715,1,177575
728,2,133670
741,3,145110
754,4,128470
767,5,175075


In [44]:
# Combine ACS CVAP count with voter file registration count by census block group
merged_data = pd.merge(reg_table, az_cvap, left_index=True, right_on='LD')
merged_data = merged_data[["LD", "CVAP_estimate", "reg_count"]]
merged_data.set_index('LD')
merged_data.sort_values(by=['LD'])
merged_data.head(30)

Unnamed: 0,LD,CVAP_estimate,reg_count
858,12,166315,140710
1001,23,183790,138009
988,22,178815,134009
715,1,177575,131697
936,18,170005,126867
897,15,163830,124272
923,17,156055,123756
845,11,165255,123712
1066,28,160545,117078
780,6,174330,115273


In [45]:
#calculate the percent of each LD that is registered
merged_data['reg_potential'] = merged_data.CVAP_estimate - merged_data.reg_count
merged_data['percent_reg'] = merged_data.reg_count / merged_data.CVAP_estimate

In [46]:
#sort data on registration potential
merged_data.sort_values("reg_potential", inplace=True, ascending=False)
merged_data.head(25)

cm = sns.light_palette("green", as_cmap=True)

final_table= merged_data.style.background_gradient(cmap=cm)

final_table

Unnamed: 0,LD,CVAP_estimate,reg_count,reg_potential,percent_reg
793,7,142200,34361,107839,0.241639
806,8,154105,83464,70641,0.541605
1040,26,152105,82219,69886,0.540541
910,16,180570,111801,68769,0.619156
767,5,175075,107660,67415,0.614936
780,6,174330,115273,59057,0.661234
741,3,145110,86757,58353,0.597871
962,20,166305,108090,58215,0.64995
975,21,167760,109824,57936,0.654649
1027,25,171175,113704,57471,0.664256


In [47]:
# Export file as a CSV
merged_data.to_csv("Output/statewide_reg_analysis.csv", index=False, header=True)