In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Cleanup data - 2000

## Read in the CSV as a DataFrame

In [2]:
p2000 = pd.read_csv("Datasets/2000P.csv")
p2000.head()

Unnamed: 0,STATE,Popular Vote - Bush (R),Popular Vote - Gore (D),Popular Vote - All Others
0,AL,941173,692611,32488
1,AK,167398,79004,39158
2,AZ,781652,685341,65023
3,AR,472940,422768,26073
4,CA,4567429,5861203,537224


## Rename the columns since we don't care about the candidate's name

In [3]:
p2000 = p2000.rename(columns={"STATE":"State",
                              "Popular Vote - Bush (R)":"Republican",
                              "Popular Vote - Gore (D)":"Democratic",
                              "Popular Vote - All Others":"Others"})
p2000.head()

Unnamed: 0,State,Republican,Democratic,Others
0,AL,941173,692611,32488
1,AK,167398,79004,39158
2,AZ,781652,685341,65023
3,AR,472940,422768,26073
4,CA,4567429,5861203,537224


## See if there are any N/A rows and to see how the Total designation looks.

In [4]:
p2000.tail()

Unnamed: 0,State,Republican,Democratic,Others
47,WA,1108864,1247652,130917
48,WV,336475,295497,16152
49,WI,1237279,1242987,118341
50,WY,147947,60481,9923
51,Total:,50456002,50999897,3949201


## Drop any N/A rows and rename the "State" for the total to simply "Total" without any colons or spaces.

In [5]:
p2000 = p2000.dropna()
p2000[-1:]["State"]
p2000["State"] = p2000["State"].replace(p2000[-1:]["State"],"Total")
p2000.tail()

Unnamed: 0,State,Republican,Democratic,Others
47,WA,1108864,1247652,130917
48,WV,336475,295497,16152
49,WI,1237279,1242987,118341
50,WY,147947,60481,9923
51,Total,50456002,50999897,3949201


## Replace the commas and change to integers.

In [6]:
p2000['Republican'] = p2000['Republican'].str.replace(',', '').astype(int)
p2000['Democratic'] = p2000['Democratic'].str.replace(',', '').astype(int)
p2000['Others'] = p2000['Others'].str.replace(',', '').astype(int)
p2000.head()

Unnamed: 0,State,Republican,Democratic,Others
0,AL,941173,692611,32488
1,AK,167398,79004,39158
2,AZ,781652,685341,65023
3,AR,472940,422768,26073
4,CA,4567429,5861203,537224


## Add the Year and Election type

In [7]:
p2000["Year"] = 2000
p2000["Election"] = "Pres"
p2000.head()

Unnamed: 0,State,Republican,Democratic,Others,Year,Election
0,AL,941173,692611,32488,2000,Pres
1,AK,167398,79004,39158,2000,Pres
2,AZ,781652,685341,65023,2000,Pres
3,AR,472940,422768,26073,2000,Pres
4,CA,4567429,5861203,537224,2000,Pres


## Calculate the Percentage of the Democratic Vote

In [8]:
p2000["PercentD"] = (p2000["Democratic"] / (p2000["Republican"] + p2000["Democratic"] + p2000["Others"])) * 100
p2000.head()

Unnamed: 0,State,Republican,Democratic,Others,Year,Election,PercentD
0,AL,941173,692611,32488,2000,Pres,41.566503
1,AK,167398,79004,39158,2000,Pres,27.66634
2,AZ,781652,685341,65023,2000,Pres,44.734585
3,AR,472940,422768,26073,2000,Pres,45.864256
4,CA,4567429,5861203,537224,2000,Pres,53.449571


# Same for 2004

In [9]:
p2004 = pd.read_csv("Datasets/2004P.csv")
p2004.head()

Unnamed: 0,STATE,Popular Vote Bush (R),Popular Vote Kerry (D),Popular Vote All Others
0,AL,1176394,693933,13122
1,AK,190889,111025,10684
2,AZ,1104294,893524,14767
3,AR,572898,469953,12094
4,CA,5509826,6745485,166541


In [10]:
p2004 = p2004.rename(columns={"STATE":"State",
                              "Popular Vote Bush (R)":"Republican",
                              "Popular Vote Kerry (D)":"Democratic",
                              "Popular Vote All Others":"Others"})
p2004.head()

Unnamed: 0,State,Republican,Democratic,Others
0,AL,1176394,693933,13122
1,AK,190889,111025,10684
2,AZ,1104294,893524,14767
3,AR,572898,469953,12094
4,CA,5509826,6745485,166541


In [11]:
p2004.tail()

Unnamed: 0,State,Republican,Democratic,Others
48,WV,423778.0,326541.0,5568.0
49,WI,1478120.0,1489504.0,29383.0
50,WY,167629.0,70776.0,5023.0
51,Total:,62040610.0,59028444.0,1226291.0
52,,,,


In [12]:
p2004 = p2004.dropna()
p2004[-1:]["State"]
p2004["State"] = p2004["State"].replace(p2004[-1:]["State"],"Total")
p2004.tail()

Unnamed: 0,State,Republican,Democratic,Others
47,WA,1304894,1510201,43989
48,WV,423778,326541,5568
49,WI,1478120,1489504,29383
50,WY,167629,70776,5023
51,Total,62040610,59028444,1226291


In [13]:
p2004['Republican'] = p2004['Republican'].str.replace(',', '').astype(int)
p2004['Democratic'] = p2004['Democratic'].str.replace(',', '').astype(int)
p2004['Others'] = p2004['Others'].str.replace(',', '').astype(int)
p2004.head()

Unnamed: 0,State,Republican,Democratic,Others
0,AL,1176394,693933,13122
1,AK,190889,111025,10684
2,AZ,1104294,893524,14767
3,AR,572898,469953,12094
4,CA,5509826,6745485,166541


In [14]:
p2004["Year"] = 2004
p2004["Election"] = "Pres"
p2004.head()

Unnamed: 0,State,Republican,Democratic,Others,Year,Election
0,AL,1176394,693933,13122,2004,Pres
1,AK,190889,111025,10684,2004,Pres
2,AZ,1104294,893524,14767,2004,Pres
3,AR,572898,469953,12094,2004,Pres
4,CA,5509826,6745485,166541,2004,Pres


In [15]:
p2004["PercentD"] = (p2004["Democratic"] / (p2004["Republican"] + p2004["Democratic"] + p2004["Others"])) * 100
p2004.head()

Unnamed: 0,State,Republican,Democratic,Others,Year,Election,PercentD
0,AL,1176394,693933,13122,2004,Pres,36.843737
1,AK,190889,111025,10684,2004,Pres,35.516862
2,AZ,1104294,893524,14767,2004,Pres,44.396833
3,AR,572898,469953,12094,2004,Pres,44.54763
4,CA,5509826,6745485,166541,2004,Pres,54.303376


# Same for 2008

In [16]:
p2008 = pd.read_csv("Datasets/2008P.csv")
p2008.head()

Unnamed: 0,STATE,Obama (D),McCain (R),All Others
0,AL,813479,1266546,19794
1,AK,123594,193841,8762
2,AZ,1034707,1230111,28657
3,AR,422310,638017,26290
4,CA,8274473,5011781,275646


In [17]:
p2008 = p2008.rename(columns={"STATE":"State",
                              "McCain (R)":"Republican",
                              "Obama (D)":"Democratic",
                              "All Others":"Others"})
p2008.head()

Unnamed: 0,State,Democratic,Republican,Others
0,AL,813479,1266546,19794
1,AK,123594,193841,8762
2,AZ,1034707,1230111,28657
3,AR,422310,638017,26290
4,CA,8274473,5011781,275646


In [18]:
p2008.tail()

Unnamed: 0,State,Democratic,Republican,Others
47,WA,1750848,1229216,56814
48,WV,303857,397466,12128
49,WI,1677211,1262393,43813
50,WY,82868,164958,6832
51,Total:,69498516,59948323,1866981


In [19]:
# p2008 = p2008.dropna()
p2008[-1:]["State"]
p2008["State"] = p2008["State"].replace(p2008[-1:]["State"],"Total")
p2008.tail()

Unnamed: 0,State,Democratic,Republican,Others
47,WA,1750848,1229216,56814
48,WV,303857,397466,12128
49,WI,1677211,1262393,43813
50,WY,82868,164958,6832
51,Total,69498516,59948323,1866981


In [20]:
# p2008['Republican'] = p2008['Republican'].str.replace(',', '').astype(int)
# p2008['Democratic'] = p2008['Democratic'].str.replace(',', '').astype(int)
# p2008['Others'] = p2008['Others'].str.replace(',', '').astype(int)
# p2008.head()

In [21]:
p2008["Year"] = 2008
p2008["Election"] = "Pres"
p2008.head()

Unnamed: 0,State,Democratic,Republican,Others,Year,Election
0,AL,813479,1266546,19794,2008,Pres
1,AK,123594,193841,8762,2008,Pres
2,AZ,1034707,1230111,28657,2008,Pres
3,AR,422310,638017,26290,2008,Pres
4,CA,8274473,5011781,275646,2008,Pres


In [22]:
p2008["PercentD"] = (p2008["Democratic"] / (p2008["Republican"] + p2008["Democratic"] + p2008["Others"])) * 100
p2008.head()

Unnamed: 0,State,Democratic,Republican,Others,Year,Election,PercentD
0,AL,813479,1266546,19794,2008,Pres,38.740434
1,AK,123594,193841,8762,2008,Pres,37.889374
2,AZ,1034707,1230111,28657,2008,Pres,45.115251
3,AR,422310,638017,26290,2008,Pres,38.86466
4,CA,8274473,5011781,275646,2008,Pres,61.012638


# Same for 2012

In [23]:
p2012 = pd.read_csv("Datasets/2012P.csv")
p2012.head()

Unnamed: 0,STATE,Obama (D),Romney (R),All Others
0,AL,795696,1255925,22717
1,AK,122640,164676,13179
2,AZ,1025232,1233654,40368
3,AR,394409,647744,27315
4,CA,7854285,4839958,344304


In [24]:
p2012 = p2012.rename(columns={"STATE":"State",
                              "Romney (R)":"Republican",
                              "Obama (D)":"Democratic",
                              "All Others":"Others"})
p2012.head()

Unnamed: 0,State,Democratic,Republican,Others
0,AL,795696,1255925,22717
1,AK,122640,164676,13179
2,AZ,1025232,1233654,40368
3,AR,394409,647744,27315
4,CA,7854285,4839958,344304


In [25]:
p2012.tail()

Unnamed: 0,State,Democratic,Republican,Others
47,WA,1755396,1290670,79450
48,WV,238269,417655,14514
49,WI,1620985,1407966,39483
50,WY,69286,170962,8813
51,Total:,65915795,60933504,2236111


In [26]:
# p2012 = p2012.dropna()
p2012[-1:]["State"]
p2012["State"] = p2012["State"].replace(p2012[-1:]["State"],"Total")
p2012.tail()

Unnamed: 0,State,Democratic,Republican,Others
47,WA,1755396,1290670,79450
48,WV,238269,417655,14514
49,WI,1620985,1407966,39483
50,WY,69286,170962,8813
51,Total,65915795,60933504,2236111


In [27]:
# p2012['Republican'] = p2012['Republican'].str.replace(',', '').astype(int)
# p2012['Democratic'] = p2012['Democratic'].str.replace(',', '').astype(int)
# p2012['Others'] = p2012['Others'].str.replace(',', '').astype(int)
# p2012.head()

In [28]:
p2012["Year"] = 2012
p2012["Election"] = "Pres"
p2012.head()

Unnamed: 0,State,Democratic,Republican,Others,Year,Election
0,AL,795696,1255925,22717,2012,Pres
1,AK,122640,164676,13179,2012,Pres
2,AZ,1025232,1233654,40368,2012,Pres
3,AR,394409,647744,27315,2012,Pres
4,CA,7854285,4839958,344304,2012,Pres


In [29]:
p2012["PercentD"] = (p2012["Democratic"] / (p2012["Republican"] + p2012["Democratic"] + p2012["Others"])) * 100
p2012.head()

Unnamed: 0,State,Democratic,Republican,Others,Year,Election,PercentD
0,AL,795696,1255925,22717,2012,Pres,38.359033
1,AK,122640,164676,13179,2012,Pres,40.812659
2,AZ,1025232,1233654,40368,2012,Pres,44.589767
3,AR,394409,647744,27315,2012,Pres,36.87899
4,CA,7854285,4839958,344304,2012,Pres,60.238959


# Same for 2016

In [30]:
p2016 = pd.read_csv("Datasets/2016P.csv")
p2016.head()

Unnamed: 0.1,Unnamed: 0,Trump (R),Clinton (D),All Others
0,AL,1318255,729547,75570
1,AK,163387,116454,38767
2,AZ,1252401,1161167,159597
3,AR,684872,380494,65310
4,CA,4483814,8753792,943998


In [31]:
p2016 = p2016.rename(columns={"Unnamed: 0":"State",
                              "Trump (R)":"Republican",
                              "Clinton (D)":"Democratic",
                              "All Others":"Others"})
p2016.head()

Unnamed: 0,State,Republican,Democratic,Others
0,AL,1318255,729547,75570
1,AK,163387,116454,38767
2,AZ,1252401,1161167,159597
3,AR,684872,380494,65310
4,CA,4483814,8753792,943998


In [32]:
p2016.tail()

Unnamed: 0,State,Republican,Democratic,Others
48,WV,489371.0,188794.0,36258.0
49,WI,1405284.0,1382536.0,188330.0
50,WY,174419.0,55973.0,25457.0
51,Total:,62984828.0,65853514.0,7830934.0
52,,,,


In [33]:
p2016 = p2016.dropna()
p2016[-1:]["State"]
p2016["State"] = p2016["State"].replace(p2016[-1:]["State"],"Total")
p2016.tail()

Unnamed: 0,State,Republican,Democratic,Others
47,WA,1221747,1742718,352554
48,WV,489371,188794,36258
49,WI,1405284,1382536,188330
50,WY,174419,55973,25457
51,Total,62984828,65853514,7830934


In [34]:
p2016['Republican'] = p2016['Republican'].str.replace(',', '').astype(int)
p2016['Democratic'] = p2016['Democratic'].str.replace(',', '').astype(int)
p2016['Others'] = p2016['Others'].str.replace(',', '').astype(int)
p2016.head()

Unnamed: 0,State,Republican,Democratic,Others
0,AL,1318255,729547,75570
1,AK,163387,116454,38767
2,AZ,1252401,1161167,159597
3,AR,684872,380494,65310
4,CA,4483814,8753792,943998


In [35]:
p2016["Year"] = 2016
p2016["Election"] = "Pres"
p2016.head()

Unnamed: 0,State,Republican,Democratic,Others,Year,Election
0,AL,1318255,729547,75570,2016,Pres
1,AK,163387,116454,38767,2016,Pres
2,AZ,1252401,1161167,159597,2016,Pres
3,AR,684872,380494,65310,2016,Pres
4,CA,4483814,8753792,943998,2016,Pres


In [36]:
p2016["PercentD"] = (p2016["Democratic"] / (p2016["Republican"] + p2016["Democratic"] + p2016["Others"])) * 100
p2016.head()

Unnamed: 0,State,Republican,Democratic,Others,Year,Election,PercentD
0,AL,1318255,729547,75570,2016,Pres,34.357946
1,AK,163387,116454,38767,2016,Pres,36.550871
2,AZ,1252401,1161167,159597,2016,Pres,45.126022
3,AR,684872,380494,65310,2016,Pres,33.651904
4,CA,4483814,8753792,943998,2016,Pres,61.726389


# Inserting changes in Democratic vote to the previous election

In [37]:
change = []
for state in p2016["State"]:
    chg = (p2016.loc[p2016["State"] == state, "PercentD"].iloc[0] - p2012.loc[p2012["State"] == state, "PercentD"].iloc[0])
    change.append(chg)
p2016["Change"] = change
p2016.head()

Unnamed: 0,State,Republican,Democratic,Others,Year,Election,PercentD,Change
0,AL,1318255,729547,75570,2016,Pres,34.357946,-4.001087
1,AK,163387,116454,38767,2016,Pres,36.550871,-4.261788
2,AZ,1252401,1161167,159597,2016,Pres,45.126022,0.536255
3,AR,684872,380494,65310,2016,Pres,33.651904,-3.227086
4,CA,4483814,8753792,943998,2016,Pres,61.726389,1.487429


In [38]:
change = []
for state in p2012["State"]:
    chg = (p2012.loc[p2012["State"] == state, "PercentD"].iloc[0] - p2008.loc[p2008["State"] == state, "PercentD"].iloc[0])
    change.append(chg)
p2012["Change"] = change
p2012.head()

Unnamed: 0,State,Democratic,Republican,Others,Year,Election,PercentD,Change
0,AL,795696,1255925,22717,2012,Pres,38.359033,-0.381401
1,AK,122640,164676,13179,2012,Pres,40.812659,2.923286
2,AZ,1025232,1233654,40368,2012,Pres,44.589767,-0.525484
3,AR,394409,647744,27315,2012,Pres,36.87899,-1.985669
4,CA,7854285,4839958,344304,2012,Pres,60.238959,-0.773679


In [39]:
change = []
for state in p2008["State"]:
    chg = (p2008.loc[p2008["State"] == state, "PercentD"].iloc[0] - p2004.loc[p2004["State"] == state, "PercentD"].iloc[0])
    change.append(chg)
p2008["Change"] = change
p2008.head()

Unnamed: 0,State,Democratic,Republican,Others,Year,Election,PercentD,Change
0,AL,813479,1266546,19794,2008,Pres,38.740434,1.896697
1,AK,123594,193841,8762,2008,Pres,37.889374,2.372512
2,AZ,1034707,1230111,28657,2008,Pres,45.115251,0.718418
3,AR,422310,638017,26290,2008,Pres,38.86466,-5.682971
4,CA,8274473,5011781,275646,2008,Pres,61.012638,6.709262


In [41]:
change = []
for state in p2004["State"]:
    chg = (p2004.loc[p2004["State"] == state, "PercentD"].iloc[0] - p2000.loc[p2000["State"] == state, "PercentD"].iloc[0])
    change.append(chg)
p2004["Change"] = change
p2004.head()

Unnamed: 0,State,Republican,Democratic,Others,Year,Election,PercentD,Change
0,AL,1176394,693933,13122,2004,Pres,36.843737,-4.722766
1,AK,190889,111025,10684,2004,Pres,35.516862,7.850522
2,AZ,1104294,893524,14767,2004,Pres,44.396833,-0.337752
3,AR,572898,469953,12094,2004,Pres,44.54763,-1.316626
4,CA,5509826,6745485,166541,2004,Pres,54.303376,0.853805
