In [2]:
#import dependencies
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as st
import numpy as np
from scipy.stats import sem
from scipy.stats import linregress
from pprint import pprint

#file locations
election_data_2000_csv = "Resources/election_data2000.csv"
census_csv = "Resources/population2000-2016ALL.csv"

In [3]:
#read CSV files
election_data_2000= pd.read_csv(election_data_2000_csv)
census_data= pd.read_csv(census_csv)

In [4]:
#election data 2000
election_data_2000.head()

Unnamed: 0,year,state,state_po,county,candidate,party,candidatevotes,totalvotes
0,2000,Alabama,AL,Autauga,Al Gore,democrat,4942.0,17208
1,2000,Alabama,AL,Autauga,George W. Bush,republican,11993.0,17208
2,2000,Alabama,AL,Autauga,Ralph Nader,green,160.0,17208
3,2000,Alabama,AL,Baldwin,Al Gore,democrat,13997.0,56480
4,2000,Alabama,AL,Baldwin,George W. Bush,republican,40872.0,56480


In [5]:
#Census data
census_data.head()

Unnamed: 0,State,County,Location,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016
0,Alabama,Autauga,"Alabama, Autauga",44021,44889,45909,46800,48366,49676,51328,52405,53277,54135,54773,55227,54954,54727,54893,54864,55243
1,Alabama,Baldwin,"Alabama, Baldwin",141342,144875,147957,151509,156266,162183,168121,172404,175827,179406,183112,186558,190145,194885,199183,202939,207601
2,Alabama,Barbour,"Alabama, Barbour",29015,28863,28653,28594,28287,28027,27861,27757,27808,27657,27327,27341,27169,26937,26755,26283,25806
3,Alabama,Bibb,"Alabama, Bibb",19913,21028,21199,21399,21721,22042,22099,22438,22705,22941,22870,22745,22667,22521,22553,22566,22586
4,Alabama,Blount,"Alabama, Blount",51107,51845,52551,53457,54124,54624,55485,56240,57055,57341,57376,57560,57580,57619,57526,57526,57494


In [6]:
#creat 'Location' column in election data
election_data_2000['Location'] = (election_data_2000.state +", " +election_data_2000.county)
election_data_2000.head()

Unnamed: 0,year,state,state_po,county,candidate,party,candidatevotes,totalvotes,Location
0,2000,Alabama,AL,Autauga,Al Gore,democrat,4942.0,17208,"Alabama, Autauga"
1,2000,Alabama,AL,Autauga,George W. Bush,republican,11993.0,17208,"Alabama, Autauga"
2,2000,Alabama,AL,Autauga,Ralph Nader,green,160.0,17208,"Alabama, Autauga"
3,2000,Alabama,AL,Baldwin,Al Gore,democrat,13997.0,56480,"Alabama, Baldwin"
4,2000,Alabama,AL,Baldwin,George W. Bush,republican,40872.0,56480,"Alabama, Baldwin"


In [7]:
#check political parites
election_data_2000['party'].value_counts()

democrat      3152
republican    3152
green         2881
Name: party, dtype: int64

In [8]:
#filter out data to only see democrat party so there are less repeats in data when comparing
democrat_2000 = election_data_2000[election_data_2000['party']=="democrat"]
democrat_2000

Unnamed: 0,year,state,state_po,county,candidate,party,candidatevotes,totalvotes,Location
0,2000,Alabama,AL,Autauga,Al Gore,democrat,4942.0,17208,"Alabama, Autauga"
3,2000,Alabama,AL,Baldwin,Al Gore,democrat,13997.0,56480,"Alabama, Baldwin"
6,2000,Alabama,AL,Barbour,Al Gore,democrat,5188.0,10395,"Alabama, Barbour"
9,2000,Alabama,AL,Bibb,Al Gore,democrat,2710.0,7101,"Alabama, Bibb"
12,2000,Alabama,AL,Blount,Al Gore,democrat,4977.0,17973,"Alabama, Blount"
...,...,...,...,...,...,...,...,...,...
9170,2000,Alaska,AK,District 36,Al Gore,democrat,1945.0,5577,"Alaska, District 36"
9173,2000,Alaska,AK,District 37,Al Gore,democrat,1821.0,5028,"Alaska, District 37"
9176,2000,Alaska,AK,District 38,Al Gore,democrat,2015.0,4976,"Alaska, District 38"
9179,2000,Alaska,AK,District 39,Al Gore,democrat,2282.0,5188,"Alaska, District 39"


In [22]:
#create separate lists of the location column in each dataframe
csv2 = democrat_2000.Location.tolist()
csv3 = census_data.Location.tolist()

In [23]:
#find differences between columns
list_difference = []
for item in csv2:
  if item not in csv3:
    list_difference.append(item)

In [24]:
len(list_difference)

147

In [25]:
list_difference2 = []
for item in csv3:
  if item not in csv2:
    list_difference2.append(item)

In [26]:
len(list_difference2)

144

In [27]:
#create new dataframes from each list, name the only column 'location'
election_locations_2000 = pd.DataFrame(csv2, columns = ['location'])
census_locations_2000 = pd.DataFrame(csv3, columns = ['location'])

In [28]:
election_locations_2000

Unnamed: 0,location
0,"Alabama, Autauga"
1,"Alabama, Baldwin"
2,"Alabama, Barbour"
3,"Alabama, Bibb"
4,"Alabama, Blount"
...,...
3147,"Alaska, District 36"
3148,"Alaska, District 37"
3149,"Alaska, District 38"
3150,"Alaska, District 39"


In [29]:
census_locations_2000

Unnamed: 0,location
0,"Alabama, Autauga"
1,"Alabama, Baldwin"
2,"Alabama, Barbour"
3,"Alabama, Bibb"
4,"Alabama, Blount"
...,...
3139,"Wyoming, Sweetwater"
3140,"Wyoming, Teton"
3141,"Wyoming, Uinta"
3142,"Wyoming, Washakie"


In [30]:
result_locations_2000 = pd.merge(election_locations_2000, census_locations_2000, how='inner', left_on="location", right_on="location",
         left_index=False, right_index=False, sort=True,
         suffixes=('_x', '_y'), copy=True, indicator=False,
         validate=None)

In [31]:
result_locations_2000

Unnamed: 0,location
0,"Alabama, Autauga"
1,"Alabama, Baldwin"
2,"Alabama, Barbour"
3,"Alabama, Bibb"
4,"Alabama, Blount"
...,...
3000,"Wyoming, Sweetwater"
3001,"Wyoming, Teton"
3002,"Wyoming, Uinta"
3003,"Wyoming, Washakie"


In [38]:
#inner merge the location data with the election data to drop the locations not present in both data sets.
election_result_locations_2000 = pd.merge(result_locations_2000, election_data_2000, how='inner', left_on="location", right_on="Location",
         left_index=False, right_index=False, sort=True,
         suffixes=('_x', '_y'), copy=True, indicator=False,
         validate=None)

In [39]:
election_result_locations_2000

Unnamed: 0,location,year,state,state_po,county,candidate,party,candidatevotes,totalvotes,Location
0,"Alabama, Autauga",2000,Alabama,AL,Autauga,Al Gore,democrat,4942.0,17208,"Alabama, Autauga"
1,"Alabama, Autauga",2000,Alabama,AL,Autauga,George W. Bush,republican,11993.0,17208,"Alabama, Autauga"
2,"Alabama, Autauga",2000,Alabama,AL,Autauga,Ralph Nader,green,160.0,17208,"Alabama, Autauga"
3,"Alabama, Baldwin",2000,Alabama,AL,Baldwin,Al Gore,democrat,13997.0,56480,"Alabama, Baldwin"
4,"Alabama, Baldwin",2000,Alabama,AL,Baldwin,George W. Bush,republican,40872.0,56480,"Alabama, Baldwin"
...,...,...,...,...,...,...,...,...,...,...
8770,"Wyoming, Washakie",2000,Wyoming,WY,Washakie,George W. Bush,republican,3138.0,4051,"Wyoming, Washakie"
8771,"Wyoming, Washakie",2000,Wyoming,WY,Washakie,Ralph Nader,green,29.0,4051,"Wyoming, Washakie"
8772,"Wyoming, Weston",2000,Wyoming,WY,Weston,Al Gore,democrat,449.0,3060,"Wyoming, Weston"
8773,"Wyoming, Weston",2000,Wyoming,WY,Weston,George W. Bush,republican,2521.0,3060,"Wyoming, Weston"


In [40]:
election_result_locations_2000.count()

location          8775
year              8775
state             8775
state_po          8775
county            8775
candidate         8775
party             8775
candidatevotes    8775
totalvotes        8775
Location          8775
dtype: int64

In [41]:
#export to csv
election_result_locations_2000.to_csv(r'Resources/ELECTION_results_FINAL_LOCATIONS_2000.csv', index = False, header=True)


In [42]:
#merging census data with common locations 

census_result_locations_2000 = pd.merge(result_locations_2000, census_data, how='inner', left_on="location", right_on="Location",
         left_index=False, right_index=False, sort=True,
         suffixes=('_x', '_y'), copy=True, indicator=False,
         validate=None)

In [43]:
census_result_locations_2000

Unnamed: 0,location,State,County,Location,2000,2001,2002,2003,2004,2005,...,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016
0,"Alabama, Autauga",Alabama,Autauga,"Alabama, Autauga",44021,44889,45909,46800,48366,49676,...,52405,53277,54135,54773,55227,54954,54727,54893,54864,55243
1,"Alabama, Baldwin",Alabama,Baldwin,"Alabama, Baldwin",141342,144875,147957,151509,156266,162183,...,172404,175827,179406,183112,186558,190145,194885,199183,202939,207601
2,"Alabama, Barbour",Alabama,Barbour,"Alabama, Barbour",29015,28863,28653,28594,28287,28027,...,27757,27808,27657,27327,27341,27169,26937,26755,26283,25806
3,"Alabama, Bibb",Alabama,Bibb,"Alabama, Bibb",19913,21028,21199,21399,21721,22042,...,22438,22705,22941,22870,22745,22667,22521,22553,22566,22586
4,"Alabama, Blount",Alabama,Blount,"Alabama, Blount",51107,51845,52551,53457,54124,54624,...,56240,57055,57341,57376,57560,57580,57619,57526,57526,57494
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3000,"Wyoming, Sweetwater",Wyoming,Sweetwater,"Wyoming, Sweetwater",37552,36899,37428,37450,38026,38739,...,41470,42358,44133,43574,43986,45002,45157,44948,44719,44222
3001,"Wyoming, Teton",Wyoming,Teton,"Wyoming, Teton",18381,18653,18837,19066,19467,19632,...,20472,20988,21232,21296,21414,21624,22315,22773,23047,23234
3002,"Wyoming, Uinta",Wyoming,Uinta,"Wyoming, Uinta",19666,19413,19587,19480,19470,19494,...,20171,20613,21054,21089,20896,20996,20951,20822,20763,20682
3003,"Wyoming, Washakie",Wyoming,Washakie,"Wyoming, Washakie",8252,8068,7988,7976,7960,8022,...,8169,8229,8423,8530,8449,8409,8413,8273,8278,8165


In [44]:
#export census data to csv
census_result_locations_2000.to_csv(r'Resources/CENSUS_results_FINAL_LOCATIONS_2000.csv', index = False, header=True)
