# Voting Data
- Create df with all the voting results for each county for each year (2000-2016)
    - Number of votes
    - % republican votes
    - % democrat votes
    - State abbrev, RegionName, FIPS
  
Saved here: './data/dataframes/voting.csv'

In [1]:
import pandas as pd
import numpy as np
%matplotlib inline
import matplotlib.pyplot as pl
import datetime
import pickle
from pygeocoder import Geocoder

colors = ['y', 'm', 'c', 'r', 'g', 'b', 'k', 'DeepPink',\
          'DodgerBlue', 'Aquamarine', 'MediumSpringGreen',\
         'MidnightBlue', 'Gold', 'DarkSlateGray', 'LimeGreen', \
         'LightSeaGreen', 'Salmon', 'Indigo', 'DarkMagenta', \
         'Thistle', 'DeepSkyBlue', 'CadetBlue', \
         'BlueViolet', 'Chocolate', 'SaddleBrown', 'Maroon', \
         'Peru', 'DarkOrange', 'Teal', 'DarkKhaki']

In [2]:
# DL FIPS reference dataframe
fips_codes = pd.read_excel('US_FIPS_Codes.xls')
fips_codes.columns = fips_codes.iloc[0]
fips_codes.drop(0, inplace=True)
fips_codes.head(3)

Unnamed: 0,State,County Name,FIPS State,FIPS County
1,Alabama,Autauga,1,1
2,Alabama,Baldwin,1,3
3,Alabama,Barbour,1,5


In [3]:
#download State Abbreviations
pkl_file = open('./data/dataframes/states_abbrev.pkl', 'rb')
states = pickle.load(pkl_file)
pkl_file.close()

In [4]:
#replace state full names with abbreviations and rename county column
fips_codes.State = fips_codes.State.replace(states)
fips_codes['FIPS'] = fips_codes['FIPS State'] + fips_codes['FIPS County']
fips_codes.rename(columns={'County Name': 'RegionName'}, inplace=True)
fips_codes.head(3)

Unnamed: 0,State,RegionName,FIPS State,FIPS County,FIPS
1,AL,Autauga,1,1,1001
2,AL,Baldwin,1,3,1003
3,AL,Barbour,1,5,1005


In [5]:
fips_codes.to_csv('./data/dataframes/FIPS_codes.csv')

# voting data

In [6]:
elections= ['00', '04', '08', '12', '16']

In [7]:
elections = ['2008_general_NYT', '2008_general_wiki', 'general_FIPS12', 'general_FIPS16',
             'general_FIPS00', 'general_FIPS04']

In [8]:
voting = {}
for fil in elections:
    print (fil)
    path = './data/County_Elections/{}.csv'.format(fil)
    #path = './data/County_Elections/general_FIPS{}.csv'.format(fil)
    voting[fil] =  pd.read_csv(path)
    voting[fil].FIPS = voting[fil].FIPS.fillna('00000')
    voting[fil].FIPS = voting[fil].FIPS.apply(int).apply(str).str.zfill(5)
    voting[fil]['TotalVotes'] = voting[fil][['votes1', 'votes2', 'votes3']].sum(axis=1)

2008_general_NYT
2008_general_wiki
general_FIPS12
general_FIPS16
general_FIPS00
general_FIPS04


In [9]:
voting[elections[0]].tail(3)

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,state,county,1st,2nd,3rd,votes1,votes2,votes3,pct1,pct2,pct3,party1,party2,party3,Abbreviation,FIPS,TotalVotes
3115,3115,0,Wyoming,Uinta,McCain,Obama,,5759,2317,,69.0,28.0,,R,D,,WY,56041,8076.0
3116,3116,0,Wyoming,Washakie,McCain,Obama,,2956,1042,,73.0,26.0,,R,D,,WY,56043,3998.0
3117,3117,0,Wyoming,Weston,McCain,Obama,,2618,658,,78.0,20.0,,R,D,,WY,56045,3276.0


In [10]:
def party_pct(df):
    df['Rpct'] = 'x'
    df['Dpct'] = 'x'
    for i in df.index:
        if df.iloc[i]['party1'] == 'R':
            df['Rpct'].iloc[i] = df.iloc[i]['pct1']
            df['Dpct'].iloc[i] = df.iloc[i]['pct2']
        else:
            df['Rpct'].iloc[i] = df.iloc[i]['pct2']
            df['Dpct'].iloc[i] = df.iloc[i]['pct1']
    return df

In [None]:
for df in elections:
    voting[df] = party_pct(voting[df])

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)


In [None]:
output = open('./data/County_Elections/Voting.pkl', 'wb')
pickle.dump(voting, output)
output.close()

In [17]:
voting[elections[0]][voting[elections[0]].county == 'Bristol']

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,state,county,1st,2nd,3rd,votes1,votes2,votes3,...,pct2,pct3,party1,party2,party3,Abbreviation,FIPS,TotalVotes,Rpct,Dpct
1189,1189,0,Massachusetts,Bristol,Obama,McCain,,146311,90284,,...,37.0,,D,R,,MA,25005,236595.0,37,61
2281,2281,0,Rhode Island,Bristol,Obama,McCain,,15185,8701,,...,36.0,,D,R,,,0,23886.0,36,63
2805,2805,0,Virginia,Bristol,McCain,Obama,,4579,2665,,...,36.0,,R,D,,VA,51520,7244.0,62,36


In [74]:
pkl_file = open('./data/County_Elections/Voting.pkl', 'rb')
voting = pickle.load(pkl_file)
pkl_file.close()

In [75]:
for df in elections:
    voting[df] =  voting[df][['Abbreviation','state', 'FIPS', 'county', 'TotalVotes', 'Rpct', 'Dpct']]

In [97]:
for df in elections:
    print (voting[df].isnull().sum())

State         0
state         0
FIPS          0
RegionName    0
TotalVotes    0
Rpct          0
Dpct          0
dtype: int64
State          0
state          0
FIPS           0
RegionName     0
TotalVotes     0
Rpct          83
Dpct          83
dtype: int64
State         0
state         0
FIPS          0
RegionName    0
TotalVotes    0
Rpct          0
Dpct          0
dtype: int64
State         0
state         0
FIPS          0
RegionName    0
TotalVotes    0
Rpct          0
Dpct          0
dtype: int64
State         0
state         0
FIPS          0
RegionName    1
TotalVotes    0
Rpct          0
Dpct          0
dtype: int64


In [None]:
for df in elections:
    voting[df].Abbreviation.fillna(value=voting[df].state, inplace=True)

In [110]:
for df in elections:
    voting[df].State = voting[df].State.replace(states)

In [116]:
for df in elections:
    print (voting[df].State.value_counts())

GA    159
IL    102
VA    100
IA     99
OH     88
AR     75
FL     67
AL     67
CO     63
CA     58
ID     44
WA     39
OR     36
NV     16
ME     16
AZ     15
MA     14
VT     14
NH     10
CT      8
RI      5
HI      4
DE      3
Name: State, dtype: int64
TX    254
GA    159
VA    144
KY    120
MO    116
KS    105
IL    102
NC    100
IA     99
TN     95
NE     93
IN     92
OH     88
MN     87
MI     83
MS     82
OK     77
AR     75
WI     72
FL     67
PA     67
AL     67
SD     66
LA     64
CO     64
NY     62
CA     58
MT     56
WV     55
ND     53
SC     46
ID     44
WA     39
OR     36
NM     33
UT     29
MD     24
WY     23
NJ     21
NV     17
AZ     15
MA     14
VT     14
NH     10
CT      8
HI      5
RI      5
DE      3
Name: State, dtype: int64
TX    254
GA    159
VA    139
KY    120
MO    115
KS    105
IL    102
NC    100
IA     99
TN     95
NE     93
IN     92
OH     88
MN     87
MI     83
MS     82
OK     77
AR     75
WI     72
FL     67
PA     67
AL     67
SD     66
LA     6

In [115]:
voting[df].State.replace(to_replace='District-Of-Columbia', value='District of Columbia', inplace=True)

In [95]:
for df in elections:
    #voting[df] = pd.melt(voting[df], id_vars=['Abbreviation', 'county', 'FIPS']).sort_values(['Abbreviation', 'county'])
    voting[df].rename(columns={'Abbreviation':'State', 'county':'RegionName'.format(df)}, inplace=True)
    
    
    

In [118]:
for df in elections:
    voting[df] = pd.merge(voting[df], fips_codes, on=['State', 'RegionName'])
    voting[df].FIPS_x.replace(to_replace='00000', value=voting[df].FIPS_y, inplace=True)

In [130]:
for df in elections:
    #voting[df].drop(['FIPS State', 'FIPS County', 'FIPS_y', 'state'], axis=1, inplace=True)
    voting[df] = pd.melt(voting[df], id_vars=['State', 'RegionName', 'FIPS_x']).sort_values(['State', 'RegionName'])
    voting[df].rename(columns={'FIPS_x':'FIPS',
                              'variable':'feature', 'value': '20{}'.format(df)}, inplace=True)

In [176]:
df = '16'
print (len(voting[df]))
print (len(voting[df].drop_duplicates(keep='first')))

9048
9048


In [None]:
for df in elections:
    voting[df]

### fixing cities and counties that are miscoded

In [359]:
voting['00'].drop([951, 2036, 3121], inplace=True)

In [347]:
voting['16'].drop([2733, 5749, 8765], inplace=True)
voting['12'].drop([2735, 5753, 8771], inplace=True)
voting['04'].drop([2703, 5686, 8673, 2702, 5687, 8672], inplace=True)
voting['04'].loc[[2704, 5689, 8674], 'RegionName'] = 'Bedford City'
voting['00'].loc[[958, 2043, 3128], 'RegionName'] = 'Bedford City'

In [320]:
voting['08'].drop([1130, 4146, 7162], inplace=True)
voting['12'].drop([1131, 4149, 7167], inplace=True)
voting['16'].drop([1133, 4149, 7165, 1132, 4148, 7164], inplace=True)
voting['16'].loc[[1134, 4150, 7166], 'RegionName'] = 'Baltimore City'

In [294]:
voting['16'].loc[[2754, 5770, 8786], 'RegionName'] = 'Fairfax City'
voting['16'].drop([2753, 5769, 8785, 2752, 5768, 8784], inplace=True)
voting['12'].drop([2756, 5774, 8792], inplace=True)
voting['08'].drop([2754, 5770, 8786], inplace=True)
voting['04'].loc[[2726, 5711, 8696], 'RegionName'] = 'Fairfax City'
voting['04'].drop([2725, 5710, 8695, 2724, 5709, 8694], inplace=True)

In [180]:
#distinguishing between Richmond Coutny (FIPS = 51159) and Richmond City
voting['04'].loc[[2779,5764,8749], 'RegionName'] = 'Richmond City'
voting['04'].loc[[2777, 5762, 8747], 'FIPS'] = '51159'
voting['00'].loc[[1025, 2110, 3195], 'RegionName'] = 'Richmond City'
voting['08'].loc[[2806, 5822, 8838], 'FIPS'] = '51159'
voting['16'].loc[[2805, 5821, 8837], 'FIPS'] = '51159'
voting['16'].loc[[2807, 5823, 8839], 'RegionName'] = 'Richmond City'
voting['16'].drop([2805, 2806, 5821, 5822, 8837, 8838], inplace=True)
voting['12'].drop([2807, 5825, 8843], inplace=True)
voting['08'].drop([2805, 5821, 8837], inplace=True)
voting['04'].drop([2778, 1778, 5762, 5763, 8747, 8748], inplace=True)

In [193]:
voting['16'].loc[[2759, 5775, 8791], 'FIPS'] = '51067'
voting['16'].drop([2759, 2760, 5775, 5776, 8791, 8792], inplace=True)
voting['16'].loc[[2761, 5777, 8793], 'RegionName'] = 'Franklin City'
voting['12'].drop([2762, 5780, 8798], inplace=True)
voting['08'].drop([2760, 5776, 8792], inplace=True)
voting['04'].loc[[2733, 5718, 8703], 'RegionName'] = 'Franklin City'
voting['04'].drop([2731, 5716, 8701, 2732, 5717, 8702], inplace=True)
voting['00'].loc[[983, 2068, 3153], 'RegionName'] = 'Franklin City'

In [227]:
voting['00'].loc[[1027, 2112, 3197], 'RegionName'] = 'Roanoke City'
voting['04'].drop([2782, 5767, 8750, 2781, 5766, 8751], inplace=True)
voting['04'].loc[[2783, 5768, 8753], 'RegionName'] = 'Roanoke City'
voting['08'].drop([2809, 5825, 8841], inplace=True)
voting['12'].drop([2811, 5829, 8847], inplace=True)
voting['16'].drop([2810, 5826, 8842, 2809, 5825, 8841], inplace=True)
voting['16'].loc[[2811, 5827, 8843], 'RegionName'] = 'Roanoke City'

### Merge together all the year dataframes

In [416]:
print ('Vot_df[0]: {}'.format(len(voting[elections[0]])))
print ('Vot_df[1]: {}'.format(len(voting[elections[1]])))
df_vot = pd.merge(voting[elections[0]], voting[elections[1]], on=['State', 'FIPS', 'RegionName', 'feature', ], how='outer')
print ('merged_Df: {}'.format(len(df_vot)))

Vot_df[0]: 3252
Vot_df[1]: 8924
merged_Df: 8975


In [417]:
print ('Vot_df[2]: {}'.format(len(voting[elections[2]])))
print ('merged: {}'.format(len(df_vot)))
df_vot = pd.merge(df_vot, voting[elections[2]], on=['State', 'RegionName','FIPS', 'feature'], how='outer')
print ('new merged_Df: {}'.format(len(df_vot)))

Vot_df[2]: 9030
merged: 8975
new merged_Df: 9054


In [418]:
print ('Vot_df[3]: {}'.format(len(voting[elections[3]])))
print ('merged: {}'.format(len(df_vot)))
df_vot = pd.merge(df_vot, voting[elections[3]], on=['State', 'RegionName','FIPS', 'feature'], how='outer')
print ('new merged_Df: {}'.format(len(df_vot)))

Vot_df[3]: 9036
merged: 9054
new merged_Df: 9060


In [419]:
print ('Vot_df[4]: {}'.format(len(voting[elections[2]])))
print ('merged: {}'.format(len(df_vot)))
df_vot = pd.merge(df_vot, voting[elections[4]], on=['State', 'RegionName','FIPS', 'feature'], how='outer')
print ('new merged_Df: {}'.format(len(df_vot)))

Vot_df[4]: 9030
merged: 9060
new merged_Df: 9081


In [421]:
df_vot.to_csv('./data/dataframes/voting.csv')

In [423]:
df_vot.head()

Unnamed: 0,State,RegionName,FIPS,feature,2000,2004,2008,2012,2016
0,AL,Autauga,1001,TotalVotes,17208.0,20081.0,23489,23856.0,24556.0
1,AL,Autauga,1001,Rpct,69.69,75.7,74,0.726,0.734
2,AL,Autauga,1001,Dpct,28.72,23.7,26,0.266,0.24
3,AL,Baldwin,1003,TotalVotes,56480.0,69320.0,80554,84698.0,93637.0
4,AL,Baldwin,1003,Rpct,72.37,76.4,75,0.774,0.774
