In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Read and Drop

In [2]:
votes = pd.read_csv("Voting.csv")
votes.columns

Index(['FIPS', 'stateFIPS', 'state', 'county', 'stateAbb', 'region',
       'earlyVoting', 'democrat', 'republican', 'medianIncome',
       'populationOver18', 'medianAge', 'educTillHS', 'educSomeCollege',
       'educCollegeUp', 'shareWhite', 'shareBlack', 'shareAsian',
       'shareOtherRace', 'shareHispanic', 'prcpMm', 'minTempC', 'maxTempC',
       'prcpMm1015', 'minTempC1015', 'maxTempC1015', 'demSenate', 'repSenate',
       'totalSenate', 'registered', 'numPollWorkers',
       'difficultToObtainPollWorkers', 'voted', 'votedPhysical', 'votedUOCAVA',
       'votedAbsentee', 'votedProvisional', 'votedEarlyVoteCenter',
       'votedByMail', 'votedOther', 'votedOtherExplanation', 'allowSameDay',
       'repState', 'demState', 'closeElection', 'turnout', 'turnout2',
       'haveSenate', 'closeSenate', 'closePresSenate', 'freezing',
       'shareSameDay', 'shareRep'],
      dtype='object')

In [3]:
drops = ['stateFIPS', 'educTillHS', 'educSomeCollege', 'educCollegeUp',
         'state', 'shareWhite', 'shareBlack', 'shareAsian', 'shareOtherRace', 'shareHispanic', 
         'demSenate', 'repSenate', 'totalSenate', 'haveSenate', 'closeSenate', 'closePresSenate',
         'numPollWorkers', 'difficultToObtainPollWorkers', 
         'allowSameDay', 'earlyVoting', 'closeElection', 'shareSameDay']
# 'voted', 
votes = votes.drop(columns=drops)
votes

Unnamed: 0,FIPS,county,stateAbb,region,democrat,republican,medianIncome,populationOver18,medianAge,prcpMm,...,votedEarlyVoteCenter,votedByMail,votedOther,votedOtherExplanation,repState,demState,turnout,turnout2,freezing,shareRep
0,1001,Autauga,AL,South,5936.0,18172,53.099,41196,37.8,0.000000,...,,,,,1318250,729547.0,0.585202,0.610399,,0.753775
1,1003,Baldwin,AL,South,18458.0,72883,51.365,155240,42.3,0.110000,...,,,,,1318250,729547.0,0.588386,0.619872,,0.797922
2,1005,Barbour,AL,South,4871.0,5454,33.956,20879,38.7,0.000000,...,,,,,1318250,729547.0,0.494516,0.505005,False,0.528232
3,1007,Bibb,AL,South,1874.0,6738,39.776,17816,40.2,0.000000,...,,,,,1318250,729547.0,0.483386,0.496913,False,0.782397
4,1009,Blount,AL,South,2156.0,22859,46.212,44103,40.8,0.000000,...,,,,,1318250,729547.0,0.567195,0.583634,False,0.913812
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3105,56037,Sweetwater,WY,West,3231.0,12154,68.233,32468,33.8,0.000000,...,1.0,0.0,,,174419,55973.0,0.473851,,True,0.789990
3106,56039,Teton,WY,West,7314.0,3921,75.594,18212,38.4,0.033333,...,1.0,4663.0,,,174419,55973.0,0.616901,,True,0.348999
3107,56041,Uinta,WY,West,1202.0,6154,53.323,14717,34.8,0.000000,...,1.0,1372.0,,,174419,55973.0,0.499830,,False,0.836596
3108,56043,Washakie,WY,West,532.0,2911,46.212,6279,43.5,0.000000,...,0.0,0.0,,,174419,55973.0,0.548336,,True,0.845484


# Ensuring there are an adequate amount of in-person votes

In [4]:
# Replace all NAs for voting numbers to 0
votes[['voted', 'votedPhysical', 'votedUOCAVA', 'votedAbsentee', 'votedProvisional', 'votedEarlyVoteCenter', 'votedByMail', 'votedOther', 'votedOtherExplanation']] = votes[['voted', 'votedPhysical', 'votedUOCAVA', 'votedAbsentee', 'votedProvisional', 'votedEarlyVoteCenter', 'votedByMail', 'votedOther', 'votedOtherExplanation']].fillna(0)

In [5]:
# Drop records that don't have a voted or votedPhysical component (no data on in-person votes)
votes.loc[((votes['votedPhysical']==0) & (votes['voted']==0)), 'noVotedData'] = True
votes = votes[votes['noVotedData'] != True]
votes = votes.drop(columns=['noVotedData'])

## 3 Cases:

In [6]:
# Useful partial for sum of non votedPhysical
votes['sumOtherVotes'] = votes['votedUOCAVA'] +  votes['votedAbsentee'] + votes['votedProvisional'] + votes['votedEarlyVoteCenter'] + votes['votedByMail'] + votes['votedOther'] + votes['votedOtherExplanation']

In [7]:
# Case 1: voted and votedPhysical are not null
votes.loc[((votes['votedPhysical']!=0) & (votes['voted']!=0)), 'sumInPerson'] = votes['votedPhysical']
votes.loc[((votes['votedPhysical']!=0) & (votes['voted']!=0)), 'sumNotInPerson'] = votes['voted'] - votes['votedPhysical']

In [8]:
# Case 2: votedPhysical is null -> use voted
votes.loc[((votes['voted']!=0) & (votes['sumInPerson'].isna())), 'sumInPerson'] = votes['voted'] - votes['sumOtherVotes']
votes.loc[((votes['voted']!=0) & (votes['sumNotInPerson'].isna())), 'sumNotInPerson'] = votes['sumOtherVotes']

In [9]:
# Case 3: voted is null -> use votedPhysical
votes.loc[((votes['votedPhysical']!=0) & (votes['sumInPerson'].isna())), 'sumInPerson'] = votes['votedPhysical']
votes.loc[((votes['votedPhysical']!=0) & (votes['sumNotInPerson'].isna())), 'sumNotInPerson'] = votes['sumOtherVotes']

In [10]:
# Drop useless columns and keep only records
votes = votes.drop(columns=['voted', 'votedPhysical', 'votedUOCAVA', 'votedAbsentee', 'votedProvisional', 'votedEarlyVoteCenter', 'votedByMail', 'votedOther', 'votedOtherExplanation', 'sumOtherVotes'])

In [11]:
votes['totalVotes'] = votes['sumInPerson'] + votes['sumNotInPerson']
votes['inPersonComp'] = votes['sumInPerson']/votes['totalVotes']
votes = votes[votes['inPersonComp'] >= .7]

# Party Lines

In [12]:
# Calculate number of 3rd party voters
votes['sumDemRep'] =  votes['democrat'] + votes['republican']
votes = votes[votes['totalVotes'] >= votes['sumDemRep']]
votes['third'] = votes['totalVotes'] - votes['sumDemRep']

In [13]:
# Calculate shares of each party per registered
votes['shareRep'] = votes['republican']/votes['registered']
votes['shareDem'] = votes['democrat']/votes['registered']
votes['shareThird'] = votes['third']/votes['registered']

In [14]:
# Calculate dem or rep state
votes['isDem'] = votes['demState'] > votes['repState']
votes.columns

Index(['FIPS', 'county', 'stateAbb', 'region', 'democrat', 'republican',
       'medianIncome', 'populationOver18', 'medianAge', 'prcpMm', 'minTempC',
       'maxTempC', 'prcpMm1015', 'minTempC1015', 'maxTempC1015', 'registered',
       'repState', 'demState', 'turnout', 'turnout2', 'freezing', 'shareRep',
       'sumInPerson', 'sumNotInPerson', 'totalVotes', 'inPersonComp',
       'sumDemRep', 'third', 'shareDem', 'shareThird', 'isDem'],
      dtype='object')

In [15]:
# Drop unused columns
votes = votes.drop(columns=['sumInPerson', 'sumNotInPerson', 'populationOver18', 'medianAge', 'totalVotes', 'inPersonComp', 'sumDemRep', 'registered', 'repState', 'demState', 'turnout', 'turnout2'])

# Weather Finding

In [16]:
# Calculate avg temp for that election day
votes = votes[votes['minTempC'].notna() & votes['maxTempC'].notna()]
votes['avgTemp'] = votes[['minTempC', 'maxTempC']].mean(axis=1)

In [17]:
# Calculate avg temp of past years
votes = votes[votes['minTempC1015'].notna() & votes['maxTempC1015'].notna()]
#votes['20range'] = (votes['maxTempC1015']-votes['minTempC1015'])*.2
votes['avgTempPast'] = votes[['minTempC1015', 'maxTempC1015']].mean(axis=1)

In [18]:
# Calculate difference between avg temp of that election day and before
votes['tempDiff'] = votes['avgTemp'] - votes['avgTempPast']

In [19]:
# Calculate difference between precipitation of that day and before
votes = votes[votes['prcpMm'].notna()]
votes['rainDiff'] = votes['prcpMm'] - votes['prcpMm1015']

# Exporting

In [20]:
votes.to_csv("votesCleaned.csv")