In [14]:
import pandas as pd
import numpy as np
import matplotlib.pylab as plt
from itertools import cycle, islice

# This line lets us plot on our ipython notebook
%matplotlib inline

In [15]:
#Load cleaned county results data 2016
df_16 = pd.read_table("proj_data/PAelectionresults16_bar.csv", sep=',', low_memory=False)

#preview first 5 records...
df_16.head()

Unnamed: 0,county,GEOID,cand,votes
0,Philadelphia County,42101,Hillary Clinton,560542
1,Philadelphia County,42101,Donald Trump,105418
2,Philadelphia County,42101,Gary Johnson,6786
3,Philadelphia County,42101,Jill Stein,6452
4,Philadelphia County,42101,Darrell Castle,1029


In [16]:
df_16.shape

(335, 4)

In [17]:
df_16['votes'].dtype

dtype('int64')

In [18]:
df_16['GEOID'].dtype

dtype('int64')

In [19]:
#List unique values in the df_16['cand'] column
df_16.cand.unique()

array(['Hillary Clinton', 'Donald Trump', 'Gary Johnson', 'Jill Stein',
       'Darrell Castle'], dtype=object)

In [20]:
## create new column that is number of voters for hillary
# Create a new column called df_16.democrat where the value is the votes if df_16.cand equals 'Hillary Clinton'
# and 0 if not.
df_16['democrat'] = df_16['votes'].where(df_16['cand']=='Hillary Clinton', 0)
df_16

Unnamed: 0,county,GEOID,cand,votes,democrat
0,Philadelphia County,42101,Hillary Clinton,560542,560542
1,Philadelphia County,42101,Donald Trump,105418,0
2,Philadelphia County,42101,Gary Johnson,6786,0
3,Philadelphia County,42101,Jill Stein,6452,0
4,Philadelphia County,42101,Darrell Castle,1029,0
5,Allegheny County,42003,Hillary Clinton,363017,363017
6,Allegheny County,42003,Donald Trump,257488,0
7,Allegheny County,42003,Gary Johnson,15854,0
8,Allegheny County,42003,Jill Stein,5021,0
9,Allegheny County,42003,Darrell Castle,1793,0


In [21]:
## create new column that is number of voters for trump
# Create a new column called df_16.other where the value is the votes if df_16.cand equals 'Donald Trump' or 
# and 0 if not.
df_16['republican'] = df_16['votes'].where(df_16['cand']=='Donald Trump', 0)
df_16

Unnamed: 0,county,GEOID,cand,votes,democrat,republican
0,Philadelphia County,42101,Hillary Clinton,560542,560542,0
1,Philadelphia County,42101,Donald Trump,105418,0,105418
2,Philadelphia County,42101,Gary Johnson,6786,0,0
3,Philadelphia County,42101,Jill Stein,6452,0,0
4,Philadelphia County,42101,Darrell Castle,1029,0,0
5,Allegheny County,42003,Hillary Clinton,363017,363017,0
6,Allegheny County,42003,Donald Trump,257488,0,257488
7,Allegheny County,42003,Gary Johnson,15854,0,0
8,Allegheny County,42003,Jill Stein,5021,0,0
9,Allegheny County,42003,Darrell Castle,1793,0,0


In [22]:
## create new column that is number of voters for any other candidate
# Create a new column called df_16.other where the value is the votes if df_16.cand equals 
#'Gary Johnson','Jill Stein' or 'Darrell Castle' and 0 if not.
df_16['other'] = df_16['votes'].where((df_16['cand']=='Gary Johnson') | (df_16['cand']=='Jill Stein') | (df_16['cand']=='Darrell Castle'),0)
df_16

Unnamed: 0,county,GEOID,cand,votes,democrat,republican,other
0,Philadelphia County,42101,Hillary Clinton,560542,560542,0,0
1,Philadelphia County,42101,Donald Trump,105418,0,105418,0
2,Philadelphia County,42101,Gary Johnson,6786,0,0,6786
3,Philadelphia County,42101,Jill Stein,6452,0,0,6452
4,Philadelphia County,42101,Darrell Castle,1029,0,0,1029
5,Allegheny County,42003,Hillary Clinton,363017,363017,0,0
6,Allegheny County,42003,Donald Trump,257488,0,257488,0
7,Allegheny County,42003,Gary Johnson,15854,0,0,15854
8,Allegheny County,42003,Jill Stein,5021,0,0,5021
9,Allegheny County,42003,Darrell Castle,1793,0,0,1793


In [24]:
#group by GEOID
grp = df_16.groupby(['county',"GEOID"])

#calculate totals by GEOID of democrat, republican, and other
df_16partyvotes = grp[['democrat','republican','other']].sum()
df_16partyvotes

Unnamed: 0_level_0,Unnamed: 1_level_0,democrat,republican,other
GEOID,county,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
42001,Adams County,14077,31249,1812
42003,Allegheny County,363017,257488,22668
42005,Armstrong County,6849,22676,921
42007,Beaver County,30225,46081,2730
42009,Bedford County,3613,19455,436
42011,Berks County,75169,93094,7671
42013,Blair County,13093,37224,1712
42015,Bradford County,6263,17957,1179
42017,Bucks County,165861,163873,12838
42019,Butler County,26834,61388,3805


In [25]:
list(df_16partyvotes)

['democrat', 'republican', 'other']

In [26]:
# export election results by county in PA, 2016
df_16partyvotes.to_csv('electionresults16_PAcounties.csv')

### Create dataset for map with victory margins for MAP

In [27]:
#Load cleaned county results data 2016 with victory margins
df_16vict = pd.read_table("proj_data/PAelectionresults16_map.csv", sep=',', low_memory=False)

#preview first 5 records...
df_16vict.head()

Unnamed: 0,state_abbr,county_name,id,victory_margin
0,PA,Adams County,42001,0.364292
1,PA,Allegheny County,42003,-0.164076
2,PA,Armstrong County,42005,0.519838
3,PA,Beaver County,42007,0.200617
4,PA,Bedford County,42009,0.674013


In [29]:
#Load cleaned county results data 2016 with victory margins
df_16votes = pd.read_table("electionresults16_PAcounties.csv", sep=',', low_memory=False)

#preview first 5 records...
df_16votes.head()

Unnamed: 0,GEOID,county,democrat,republican,other
0,42001,Adams County,14077,31249,1812
1,42003,Allegheny County,363017,257488,22668
2,42005,Armstrong County,6849,22676,921
3,42007,Beaver County,30225,46081,2730
4,42009,Bedford County,3613,19455,436


In [30]:
#check datatypes to see if join is possible...
print('victory id:', df_16vict['id'].dtype, "\n",'votes GEOID:', df_16votes['GEOID'].dtype)

victory id: int64 
 votes GEOID: int64


In [31]:
##merge datasets by id/GEOID

#pull out id and victory margin
victory16=df_16vict.iloc[:,[2,3]]

#merge poverty data to pums
df16_victvotes = df_16votes.merge(victory16, left_on='GEOID', right_on='id')
df16_victvotes


Unnamed: 0,GEOID,county,democrat,republican,other,id,victory_margin
0,42001,Adams County,14077,31249,1812,42001,0.364292
1,42003,Allegheny County,363017,257488,22668,42003,-0.164076
2,42005,Armstrong County,6849,22676,921,42005,0.519838
3,42007,Beaver County,30225,46081,2730,42007,0.200617
4,42009,Bedford County,3613,19455,436,42009,0.674013
5,42011,Berks County,75169,93094,7671,42011,0.101885
6,42013,Blair County,13093,37224,1712,42013,0.463799
7,42015,Bradford County,6263,17957,1179,42015,0.460412
8,42017,Bucks County,165861,163873,12838,42017,-0.005803
9,42019,Butler County,26834,61388,3805,42019,0.375477


In [32]:
# export election results with VICTORY MARGINS by county in PA, 2016
df16_victvotes.to_csv('electionresults16_PAcounties2.csv')