In [1]:
#import dependencies
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as st
import numpy as np
from scipy.stats import sem
from scipy.stats import linregress
from pprint import pprint

#file locations
#election file
election_data_2004_csv = "Resources/election_data2004.csv"
#census file
census_data_2004_csv = "Resources/population2000-2016ALL.csv"

In [2]:
#read CSV files
election_data_2004= pd.read_csv(election_data_2004_csv)
census_data_2004= pd.read_csv(census_data_2004_csv)


In [3]:
#create column which combines State and County name into one column (Location) for precision when merging
#some county names are repeated in multiple states
election_data_2004['Location'] = (election_data_2004.state +", " +election_data_2004.county)
election_data_2004

Unnamed: 0,year,state,state_code,county,candidate,party,candidate_votes,total_votes,Location
0,2004,Alabama,AL,Autauga,John Kerry,democrat,4758.0,20081,"Alabama, Autauga"
1,2004,Alabama,AL,Autauga,George W. Bush,republican,15196.0,20081,"Alabama, Autauga"
2,2004,Alabama,AL,Baldwin,John Kerry,democrat,15599.0,69320,"Alabama, Baldwin"
3,2004,Alabama,AL,Baldwin,George W. Bush,republican,52971.0,69320,"Alabama, Baldwin"
4,2004,Alabama,AL,Barbour,John Kerry,democrat,4832.0,10777,"Alabama, Barbour"
...,...,...,...,...,...,...,...,...,...
6303,2004,Alaska,AK,District 38,George W. Bush,republican,2459.0,5075,"Alaska, District 38"
6304,2004,Alaska,AK,District 39,John Kerry,democrat,2365.0,5568,"Alaska, District 39"
6305,2004,Alaska,AK,District 39,George W. Bush,republican,2881.0,5568,"Alaska, District 39"
6306,2004,Alaska,AK,District 40,John Kerry,democrat,2328.0,5788,"Alaska, District 40"


In [4]:
#remove the word 'county' from county column in census data to make more location matches, create new column with these cleaner names
census_data_2004.head()
census_data_2004['new_county'] = census_data_2004['County'].str.replace(' County', '')


In [5]:
#census_data_2004
#confirm individual political parties, no green/other/nan present for this yeaer
election_data_2004['party'].value_counts()

democrat      3154
republican    3154
Name: party, dtype: int64

In [6]:
#filter out data to only see democrat party so there are less repeats in data when comparing
election_data_2004_11 = election_data_2004[election_data_2004['party']=="democrat"]
election_data_2004_11

Unnamed: 0,year,state,state_code,county,candidate,party,candidate_votes,total_votes,Location
0,2004,Alabama,AL,Autauga,John Kerry,democrat,4758.0,20081,"Alabama, Autauga"
2,2004,Alabama,AL,Baldwin,John Kerry,democrat,15599.0,69320,"Alabama, Baldwin"
4,2004,Alabama,AL,Barbour,John Kerry,democrat,4832.0,10777,"Alabama, Barbour"
6,2004,Alabama,AL,Bibb,John Kerry,democrat,2089.0,7600,"Alabama, Bibb"
8,2004,Alabama,AL,Blount,John Kerry,democrat,3938.0,21504,"Alabama, Blount"
...,...,...,...,...,...,...,...,...,...
6298,2004,Alaska,AK,District 36,John Kerry,democrat,3245.0,9241,"Alaska, District 36"
6300,2004,Alaska,AK,District 37,John Kerry,democrat,1970.0,5208,"Alaska, District 37"
6302,2004,Alaska,AK,District 38,John Kerry,democrat,2366.0,5075,"Alaska, District 38"
6304,2004,Alaska,AK,District 39,John Kerry,democrat,2365.0,5568,"Alaska, District 39"


In [7]:
#create separate lists of the location column in each dataframe
csv2 = election_data_2004_11.Location.tolist()
csv3 = census_data_2004.Location.tolist()

In [8]:
#find differences between columns
list_difference = []
for item in csv2:
  if item not in csv3:
    list_difference.append(item)

In [82]:
#print(list_difference)

In [10]:
#find differences in the other direction
list_difference2 = []
for item in csv3:
  if item not in csv2:
    list_difference2.append(item)

In [83]:
#print(list_difference2)

In [12]:
#count the total length of each difference list
len(list_difference2)

143

In [13]:
#count the total length of each difference list
len(list_difference)

148

In [14]:
#create new dataframes from each list, name the only column 'location'
election_locations_2004 = pd.DataFrame(csv2, columns = ['location'])
census_locations_2004 = pd.DataFrame(csv3, columns = ['location'])

In [15]:
#census_locations_2004

In [16]:
#merging election data with common locations to create shorter, but matching data
result_locations_2004 = pd.merge(election_locations_2004, census_locations_2004, how='inner', left_on="location", right_on="location",
         left_index=False, right_index=False, sort=True,
         suffixes=('_x', '_y'), copy=True, indicator=False,
         validate=None)
            

In [41]:
#view resuts, 3006 rows.
result_locations_2004.location.value_counts()


Virginia, Bedford          2
Virginia, Franklin         2
Virginia, Roanoke          2
Virginia, Fairfax          2
Virginia, Richmond         2
                          ..
North Carolina, Watauga    1
Texas, Duval               1
North Carolina, Durham     1
Arkansas, Pulaski          1
Nebraska, Wheeler          1
Name: location, Length: 3001, dtype: int64

In [18]:
election_result_locations_2004 = pd.merge(result_locations_2004, election_data_2004, how='inner', left_on="location", right_on="Location",
         left_index=False, right_index=False, sort=True,
         suffixes=('_x', '_y'), copy=True, indicator=False,
         validate=None)

In [19]:
#view results
election_result_locations_2004

election_result_locations_2004_drop = election_result_locations_2004.drop(["location"], axis=1)


In [20]:
#resuults of ELECTION DATA to csv
election_result_locations_2004_drop.to_csv(r'Resources/ELECTION_results_FINAL_LOCATIONS_2004.csv', index = False, header=True)


In [21]:
#merging census data with common locations 

census_result_locations_2004 = pd.merge(result_locations_2004, census_data_2004, how='inner', left_on="location", right_on="Location",
         left_index=False, right_index=False, sort=True,
         suffixes=('_x', '_y'), copy=True, indicator=False,
         validate=None)

In [22]:
census_result_locations_2004

census_result_locations_2004_drop = census_result_locations_2004.drop(['location', '2000', '2001', '2002', '2003', '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012','2013', '2014', '2015', '2016'], axis=1)


In [23]:
#CENSUS data to CSV
census_result_locations_2004_drop.to_csv(r'Resources/CENSUS_results_FINAL_LOCATIONS_2004.csv', index = False, header=True)


In [24]:
#merging census data with common locations 

census_AND_election_2004_1 = pd.merge(election_result_locations_2004_drop, census_result_locations_2004_drop, how='inner', left_on="Location", right_on="Location",
         left_index=False, right_index=False, sort=True,
         suffixes=('_x', '_y'), copy=True, indicator=False,
         validate=None)

In [25]:
census_AND_election_2004_1

Unnamed: 0,year,state,state_code,county,candidate,party,candidate_votes,total_votes,Location,State,County,2004,new_county
0,2004,Alabama,AL,Autauga,John Kerry,democrat,4758.0,20081,"Alabama, Autauga",Alabama,Autauga,48366,Autauga
1,2004,Alabama,AL,Autauga,George W. Bush,republican,15196.0,20081,"Alabama, Autauga",Alabama,Autauga,48366,Autauga
2,2004,Alabama,AL,Baldwin,John Kerry,democrat,15599.0,69320,"Alabama, Baldwin",Alabama,Baldwin,156266,Baldwin
3,2004,Alabama,AL,Baldwin,George W. Bush,republican,52971.0,69320,"Alabama, Baldwin",Alabama,Baldwin,156266,Baldwin
4,2004,Alabama,AL,Barbour,John Kerry,democrat,4832.0,10777,"Alabama, Barbour",Alabama,Barbour,28287,Barbour
...,...,...,...,...,...,...,...,...,...,...,...,...,...
6067,2004,Wyoming,WY,Uinta,George W. Bush,republican,6081.0,8081,"Wyoming, Uinta",Wyoming,Uinta,19470,Uinta
6068,2004,Wyoming,WY,Washakie,John Kerry,democrat,855.0,4114,"Wyoming, Washakie",Wyoming,Washakie,7960,Washakie
6069,2004,Wyoming,WY,Washakie,George W. Bush,republican,3200.0,4114,"Wyoming, Washakie",Wyoming,Washakie,7960,Washakie
6070,2004,Wyoming,WY,Weston,John Kerry,democrat,580.0,3392,"Wyoming, Weston",Wyoming,Weston,6646,Weston


In [26]:
census_AND_election_2004_drop = census_AND_election_2004_1.drop(['state', 'county', 'new_county'], axis=1)


In [27]:
census_AND_election_2004_drop_rename = census_AND_election_2004_drop.rename(columns={"year":"Year", "state_code":"State_Code", "candidate":"Candidate", "party":"Party", "candidate_votes":"Candidate_Votes", "total_votes":"Total_Votes", "2004":"County_Pop_2004"
})




In [28]:
census_AND_election_2004_reorder = census_AND_election_2004_drop_rename[["County", "State", "State_Code", "Location", "Year", "Party", "Candidate", "Candidate_Votes", "Total_Votes", "County_Pop_2004"]]




In [29]:
census_AND_election_2004_reorder

Unnamed: 0,County,State,State_Code,Location,Year,Party,Candidate,Candidate_Votes,Total_Votes,County_Pop_2004
0,Autauga,Alabama,AL,"Alabama, Autauga",2004,democrat,John Kerry,4758.0,20081,48366
1,Autauga,Alabama,AL,"Alabama, Autauga",2004,republican,George W. Bush,15196.0,20081,48366
2,Baldwin,Alabama,AL,"Alabama, Baldwin",2004,democrat,John Kerry,15599.0,69320,156266
3,Baldwin,Alabama,AL,"Alabama, Baldwin",2004,republican,George W. Bush,52971.0,69320,156266
4,Barbour,Alabama,AL,"Alabama, Barbour",2004,democrat,John Kerry,4832.0,10777,28287
...,...,...,...,...,...,...,...,...,...,...
6067,Uinta,Wyoming,WY,"Wyoming, Uinta",2004,republican,George W. Bush,6081.0,8081,19470
6068,Washakie,Wyoming,WY,"Wyoming, Washakie",2004,democrat,John Kerry,855.0,4114,7960
6069,Washakie,Wyoming,WY,"Wyoming, Washakie",2004,republican,George W. Bush,3200.0,4114,7960
6070,Weston,Wyoming,WY,"Wyoming, Weston",2004,democrat,John Kerry,580.0,3392,6646


In [59]:
republican_2004 = census_AND_election_2004_reorder.loc[census_AND_election_2004_reorder["Party"] == 'republican']
republican_2004_1 = pd.DataFrame(republican_2004)


In [60]:
republican_2004_1.sort_values("Location", inplace = True)


In [61]:
republican_2004_1.drop_duplicates(subset ="Location", 
                     keep = False, inplace = True)


In [89]:
republican_2004_1

Unnamed: 0,Total_Votes,2004_republican_votes,Location
1,20081,15196.0,"Alabama, Autauga"
3,69320,52971.0,"Alabama, Baldwin"
5,10777,5899.0,"Alabama, Barbour"
7,7600,5472.0,"Alabama, Bibb"
9,21504,17386.0,"Alabama, Blount"
...,...,...,...
6063,16272,10653.0,"Wyoming, Sweetwater"
6065,11359,5124.0,"Wyoming, Teton"
6067,8081,6081.0,"Wyoming, Uinta"
6069,4114,3200.0,"Wyoming, Washakie"


In [92]:
democrat_2004 = census_AND_election_2004_reorder.loc[census_AND_election_2004_reorder["Party"] == 'democrat']
democrat_2004_1 = pd.DataFrame(democrat_2004)


In [93]:
democrat_2004_1.sort_values("Location", inplace = True)


In [63]:
democrat_2004_1.drop_duplicates(subset ="Location", 
                     keep = False, inplace = True)
democrat_2004_1

Unnamed: 0,County,State,State_Code,Location,Year,Party,Candidate,Candidate_Votes,Total_Votes,County_Pop_2004
0,Autauga,Alabama,AL,"Alabama, Autauga",2004,democrat,John Kerry,4758.0,20081,48366
2,Baldwin,Alabama,AL,"Alabama, Baldwin",2004,democrat,John Kerry,15599.0,69320,156266
4,Barbour,Alabama,AL,"Alabama, Barbour",2004,democrat,John Kerry,4832.0,10777,28287
6,Bibb,Alabama,AL,"Alabama, Bibb",2004,democrat,John Kerry,2089.0,7600,21721
8,Blount,Alabama,AL,"Alabama, Blount",2004,democrat,John Kerry,3938.0,21504,54124
...,...,...,...,...,...,...,...,...,...,...
6062,Sweetwater,Wyoming,WY,"Wyoming, Sweetwater",2004,democrat,John Kerry,5208.0,16272,38026
6064,Teton,Wyoming,WY,"Wyoming, Teton",2004,democrat,John Kerry,5972.0,11359,19467
6066,Uinta,Wyoming,WY,"Wyoming, Uinta",2004,democrat,John Kerry,1815.0,8081,19470
6068,Washakie,Wyoming,WY,"Wyoming, Washakie",2004,democrat,John Kerry,855.0,4114,7960


In [94]:
democrat_2004_1

Unnamed: 0,County,State,State_Code,Location,Year,Party,Candidate,Candidate_Votes,Total_Votes,County_Pop_2004
0,Autauga,Alabama,AL,"Alabama, Autauga",2004,democrat,John Kerry,4758.0,20081,48366
2,Baldwin,Alabama,AL,"Alabama, Baldwin",2004,democrat,John Kerry,15599.0,69320,156266
4,Barbour,Alabama,AL,"Alabama, Barbour",2004,democrat,John Kerry,4832.0,10777,28287
6,Bibb,Alabama,AL,"Alabama, Bibb",2004,democrat,John Kerry,2089.0,7600,21721
8,Blount,Alabama,AL,"Alabama, Blount",2004,democrat,John Kerry,3938.0,21504,54124
...,...,...,...,...,...,...,...,...,...,...
6062,Sweetwater,Wyoming,WY,"Wyoming, Sweetwater",2004,democrat,John Kerry,5208.0,16272,38026
6064,Teton,Wyoming,WY,"Wyoming, Teton",2004,democrat,John Kerry,5972.0,11359,19467
6066,Uinta,Wyoming,WY,"Wyoming, Uinta",2004,democrat,John Kerry,1815.0,8081,19470
6068,Washakie,Wyoming,WY,"Wyoming, Washakie",2004,democrat,John Kerry,855.0,4114,7960


In [95]:
# collecting required columns...
republican_2004_2 = republican_2004_1[['Total_Votes', '2004_republican_votes', 'Location']]
republican_2004_2.columns = ['Total_Votes', '2004_republican_votes', 'Location']
republican_2004_2

Unnamed: 0,Total_Votes,2004_republican_votes,Location
1,20081,15196.0,"Alabama, Autauga"
3,69320,52971.0,"Alabama, Baldwin"
5,10777,5899.0,"Alabama, Barbour"
7,7600,5472.0,"Alabama, Bibb"
9,21504,17386.0,"Alabama, Blount"
...,...,...,...
6063,16272,10653.0,"Wyoming, Sweetwater"
6065,11359,5124.0,"Wyoming, Teton"
6067,8081,6081.0,"Wyoming, Uinta"
6069,4114,3200.0,"Wyoming, Washakie"


In [97]:
democrat_2004_2 = democrat_2004_1[['Total_Votes', 'Candidate_Votes', 'Location']]
democrat_2004_2.columns = ['Total_Votes', '2004_democrat_votes', 'Location']
democrat_2004_2.head()

Unnamed: 0,Total_Votes,2004_democrat_votes,Location
0,20081,4758.0,"Alabama, Autauga"
2,69320,15599.0,"Alabama, Baldwin"
4,10777,4832.0,"Alabama, Barbour"
6,7600,2089.0,"Alabama, Bibb"
8,21504,3938.0,"Alabama, Blount"


In [98]:
election_parties_2004 = pd.merge(republican_2004_2,democrat_2004_2, on='Location')
election_parties_2004
election_parties_2004 = election_parties_2004[['Location','Total_Votes_x','2004_republican_votes','2004_democrat_votes']]
election_parties_2004.columns = ['Location','Total_Votes_2004','2004_republican_votes','2004_democrat_votes']
election_parties_2004


Unnamed: 0,Location,Total_Votes_2004,2004_republican_votes,2004_democrat_votes
0,"Alabama, Autauga",20081,15196.0,4758.0
1,"Alabama, Baldwin",69320,52971.0,15599.0
2,"Alabama, Barbour",10777,5899.0,4832.0
3,"Alabama, Bibb",7600,5472.0,2089.0
4,"Alabama, Blount",21504,17386.0,3938.0
...,...,...,...,...
3311,"Wyoming, Sweetwater",16272,10653.0,5208.0
3312,"Wyoming, Teton",11359,5124.0,5972.0
3313,"Wyoming, Uinta",8081,6081.0,1815.0
3314,"Wyoming, Washakie",4114,3200.0,855.0


In [70]:
election_parties_2004.to_csv(r'Resources/FOR_JINHO_2004.csv', index = False, header=True)

In [74]:
election_census_parties_2004= pd.merge(election_parties_2004, census_AND_election_2004_reorder, on='Location')
election_census_parties_2004

Unnamed: 0,Location,Total_Votes_2004,2004_republican_votes,2004_democrat_votes,County,State,State_Code,Year,Party,Candidate,Candidate_Votes,Total_Votes,County_Pop_2004
0,"Alabama, Autauga",20081,15196.0,4758.0,Autauga,Alabama,AL,2004,democrat,John Kerry,4758.0,20081,48366
1,"Alabama, Autauga",20081,15196.0,4758.0,Autauga,Alabama,AL,2004,republican,George W. Bush,15196.0,20081,48366
2,"Alabama, Baldwin",69320,52971.0,15599.0,Baldwin,Alabama,AL,2004,democrat,John Kerry,15599.0,69320,156266
3,"Alabama, Baldwin",69320,52971.0,15599.0,Baldwin,Alabama,AL,2004,republican,George W. Bush,52971.0,69320,156266
4,"Alabama, Barbour",10777,5899.0,4832.0,Barbour,Alabama,AL,2004,democrat,John Kerry,4832.0,10777,28287
...,...,...,...,...,...,...,...,...,...,...,...,...,...
5987,"Wyoming, Uinta",8081,6081.0,1815.0,Uinta,Wyoming,WY,2004,republican,George W. Bush,6081.0,8081,19470
5988,"Wyoming, Washakie",4114,3200.0,855.0,Washakie,Wyoming,WY,2004,democrat,John Kerry,855.0,4114,7960
5989,"Wyoming, Washakie",4114,3200.0,855.0,Washakie,Wyoming,WY,2004,republican,George W. Bush,3200.0,4114,7960
5990,"Wyoming, Weston",3392,2739.0,580.0,Weston,Wyoming,WY,2004,democrat,John Kerry,580.0,3392,6646


In [78]:
election_census_parties_2004_drop = election_census_parties_2004.drop(["Total_Votes_2004", "2004_republican_votes", "2004_democrat_votes", "Year", "Candidate_Votes"], axis=1)


In [99]:
election_census_parties_2004_drop.to_csv(r'Resources/Analysis Files/2004/CENSUS_and_POPULATION_FINAL_2004.csv', index = False, header=True)
