In [1]:
#import dependencies
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as st
import numpy as np
from scipy.stats import sem
from scipy.stats import linregress
from pprint import pprint

#file locations
#election file
election_data_2004_csv = "Resources/election_data2004.csv"
#census file
census_data_2004_csv = "Resources/population2000-2016ALL.csv"

In [2]:
#read CSV files
election_data_2004= pd.read_csv(election_data_2004_csv)
census_data_2004= pd.read_csv(census_data_2004_csv)


In [3]:
#create column which combines State and County name into one column (Location) for precision when merging
#some county names are repeated in multiple states
election_data_2004['Location'] = (election_data_2004.state +", " +election_data_2004.county)
election_data_2004

Unnamed: 0,year,state,state_code,county,candidate,party,candidate_votes,total_votes,Location
0,2004,Alabama,AL,Autauga,John Kerry,democrat,4758.0,20081,"Alabama, Autauga"
1,2004,Alabama,AL,Autauga,George W. Bush,republican,15196.0,20081,"Alabama, Autauga"
2,2004,Alabama,AL,Baldwin,John Kerry,democrat,15599.0,69320,"Alabama, Baldwin"
3,2004,Alabama,AL,Baldwin,George W. Bush,republican,52971.0,69320,"Alabama, Baldwin"
4,2004,Alabama,AL,Barbour,John Kerry,democrat,4832.0,10777,"Alabama, Barbour"
...,...,...,...,...,...,...,...,...,...
6303,2004,Alaska,AK,District 38,George W. Bush,republican,2459.0,5075,"Alaska, District 38"
6304,2004,Alaska,AK,District 39,John Kerry,democrat,2365.0,5568,"Alaska, District 39"
6305,2004,Alaska,AK,District 39,George W. Bush,republican,2881.0,5568,"Alaska, District 39"
6306,2004,Alaska,AK,District 40,John Kerry,democrat,2328.0,5788,"Alaska, District 40"


In [4]:
#remove the word 'county' from county column in census data to make more location matches, create new column with these cleaner names
census_data_2004.head()
census_data_2004['new_county'] = census_data_2004['County'].str.replace(' County', '')


In [34]:
#census_data_2004
#confirm individual political parties, no green/other/nan present for this yeaer
election_data_2004['party'].value_counts()

democrat      3154
republican    3154
Name: party, dtype: int64

In [6]:
#filter out data to only see democrat party so there are less repeats in data when comparing
election_data_2004_11 = election_data_2004[election_data_2004['party']=="democrat"]
election_data_2004_11

Unnamed: 0,year,state,state_code,county,candidate,party,candidate_votes,total_votes,Location
0,2004,Alabama,AL,Autauga,John Kerry,democrat,4758.0,20081,"Alabama, Autauga"
2,2004,Alabama,AL,Baldwin,John Kerry,democrat,15599.0,69320,"Alabama, Baldwin"
4,2004,Alabama,AL,Barbour,John Kerry,democrat,4832.0,10777,"Alabama, Barbour"
6,2004,Alabama,AL,Bibb,John Kerry,democrat,2089.0,7600,"Alabama, Bibb"
8,2004,Alabama,AL,Blount,John Kerry,democrat,3938.0,21504,"Alabama, Blount"
...,...,...,...,...,...,...,...,...,...
6298,2004,Alaska,AK,District 36,John Kerry,democrat,3245.0,9241,"Alaska, District 36"
6300,2004,Alaska,AK,District 37,John Kerry,democrat,1970.0,5208,"Alaska, District 37"
6302,2004,Alaska,AK,District 38,John Kerry,democrat,2366.0,5075,"Alaska, District 38"
6304,2004,Alaska,AK,District 39,John Kerry,democrat,2365.0,5568,"Alaska, District 39"


In [7]:
#create separate lists of the location column in each dataframe
csv2 = election_data_2004_11.Location.tolist()
csv3 = census_data_2004.Location.tolist()

In [24]:
#find differences between columns
list_difference = []
for item in csv2:
  if item not in csv3:
    list_difference.append(item)

In [25]:
#print(list_difference)

In [26]:
#find differences in the other direction
list_difference2 = []
for item in csv3:
  if item not in csv2:
    list_difference2.append(item)

In [27]:
#print(list_difference2)

In [28]:
#count the total length of each difference list
len(list_difference2)

143

In [29]:
#count the total length of each difference list
len(list_difference)

148

In [14]:
#create new dataframes from each list, name the only column 'location'
election_locations_2004 = pd.DataFrame(csv2, columns = ['location'])
census_locations_2004 = pd.DataFrame(csv3, columns = ['location'])

In [30]:
#census_locations_2004

In [31]:
#merging election data with common locations to create shorter, but matching data
result_locations_2004 = pd.merge(election_locations_2004, census_locations_2004, how='inner', left_on="location", right_on="location",
         left_index=False, right_index=False, sort=True,
         suffixes=('_x', '_y'), copy=True, indicator=False,
         validate=None)
            

In [32]:
#view resuts, 3006 rows.
result_locations_2004

Unnamed: 0,location
0,"Alabama, Autauga"
1,"Alabama, Baldwin"
2,"Alabama, Barbour"
3,"Alabama, Bibb"
4,"Alabama, Blount"
...,...
3001,"Wyoming, Sweetwater"
3002,"Wyoming, Teton"
3003,"Wyoming, Uinta"
3004,"Wyoming, Washakie"


In [18]:
election_result_locations_2004 = pd.merge(result_locations_2004, election_data_2004, how='inner', left_on="location", right_on="Location",
         left_index=False, right_index=False, sort=True,
         suffixes=('_x', '_y'), copy=True, indicator=False,
         validate=None)

In [19]:
#view results
election_result_locations_2004

Unnamed: 0,location,year,state,state_code,county,candidate,party,candidate_votes,total_votes,Location
0,"Alabama, Autauga",2004,Alabama,AL,Autauga,John Kerry,democrat,4758.0,20081,"Alabama, Autauga"
1,"Alabama, Autauga",2004,Alabama,AL,Autauga,George W. Bush,republican,15196.0,20081,"Alabama, Autauga"
2,"Alabama, Baldwin",2004,Alabama,AL,Baldwin,John Kerry,democrat,15599.0,69320,"Alabama, Baldwin"
3,"Alabama, Baldwin",2004,Alabama,AL,Baldwin,George W. Bush,republican,52971.0,69320,"Alabama, Baldwin"
4,"Alabama, Barbour",2004,Alabama,AL,Barbour,John Kerry,democrat,4832.0,10777,"Alabama, Barbour"
...,...,...,...,...,...,...,...,...,...,...
6027,"Wyoming, Uinta",2004,Wyoming,WY,Uinta,George W. Bush,republican,6081.0,8081,"Wyoming, Uinta"
6028,"Wyoming, Washakie",2004,Wyoming,WY,Washakie,John Kerry,democrat,855.0,4114,"Wyoming, Washakie"
6029,"Wyoming, Washakie",2004,Wyoming,WY,Washakie,George W. Bush,republican,3200.0,4114,"Wyoming, Washakie"
6030,"Wyoming, Weston",2004,Wyoming,WY,Weston,John Kerry,democrat,580.0,3392,"Wyoming, Weston"


In [20]:
#resuults of ELECTION DATA to csv
election_result_locations_2004.to_csv(r'Resources/ELECTION_results_FINAL_LOCATIONS_2004.csv', index = False, header=True)


In [21]:
#merging census data with common locations 

census_result_locations_2004 = pd.merge(result_locations_2004, census_data_2004, how='inner', left_on="location", right_on="Location",
         left_index=False, right_index=False, sort=True,
         suffixes=('_x', '_y'), copy=True, indicator=False,
         validate=None)

In [22]:
census_result_locations_2004

Unnamed: 0,location,State,County,Location,2000,2001,2002,2003,2004,2005,...,2008,2009,2010,2011,2012,2013,2014,2015,2016,new_county
0,"Alabama, Autauga",Alabama,Autauga,"Alabama, Autauga",44021,44889,45909,46800,48366,49676,...,53277,54135,54773,55227,54954,54727,54893,54864,55243,Autauga
1,"Alabama, Baldwin",Alabama,Baldwin,"Alabama, Baldwin",141342,144875,147957,151509,156266,162183,...,175827,179406,183112,186558,190145,194885,199183,202939,207601,Baldwin
2,"Alabama, Barbour",Alabama,Barbour,"Alabama, Barbour",29015,28863,28653,28594,28287,28027,...,27808,27657,27327,27341,27169,26937,26755,26283,25806,Barbour
3,"Alabama, Bibb",Alabama,Bibb,"Alabama, Bibb",19913,21028,21199,21399,21721,22042,...,22705,22941,22870,22745,22667,22521,22553,22566,22586,Bibb
4,"Alabama, Blount",Alabama,Blount,"Alabama, Blount",51107,51845,52551,53457,54124,54624,...,57055,57341,57376,57560,57580,57619,57526,57526,57494,Blount
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3001,"Wyoming, Sweetwater",Wyoming,Sweetwater,"Wyoming, Sweetwater",37552,36899,37428,37450,38026,38739,...,42358,44133,43574,43986,45002,45157,44948,44719,44222,Sweetwater
3002,"Wyoming, Teton",Wyoming,Teton,"Wyoming, Teton",18381,18653,18837,19066,19467,19632,...,20988,21232,21296,21414,21624,22315,22773,23047,23234,Teton
3003,"Wyoming, Uinta",Wyoming,Uinta,"Wyoming, Uinta",19666,19413,19587,19480,19470,19494,...,20613,21054,21089,20896,20996,20951,20822,20763,20682,Uinta
3004,"Wyoming, Washakie",Wyoming,Washakie,"Wyoming, Washakie",8252,8068,7988,7976,7960,8022,...,8229,8423,8530,8449,8409,8413,8273,8278,8165,Washakie


In [23]:
#CENSUS data to CSV
census_result_locations_2004.to_csv(r'Resources/CENSUS_results_FINAL_LOCATIONS_2004.csv', index = False, header=True)
