In [1]:
#import dependencies
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as st
import numpy as np
from scipy.stats import sem
from scipy.stats import linregress
from pprint import pprint

#file locations
#election file
election_data_2016_csv = "Resources/election_data2016.csv"
#census file
census_data_2016_csv = "Resources/population2000-2016ALL.csv"

In [2]:
#read CSV files
election_data_2016= pd.read_csv(election_data_2016_csv)
census_data_2016= pd.read_csv(census_data_2016_csv)


In [3]:
#create column which combines State and County name into one column (Location) for precision when merging
#some county names are repeated in multiple states
election_data_2016['Location'] = (election_data_2016.state +", " +election_data_2016.county)
election_data_2016

Unnamed: 0,year,state,state_code,county,candidate,party,candidate_votes,total_votes,Location
0,2016,Alabama,AL,Autauga,Hillary Clinton,democrat,5936.0,24973,"Alabama, Autauga"
1,2016,Alabama,AL,Autauga,Donald Trump,republican,18172.0,24973,"Alabama, Autauga"
2,2016,Alabama,AL,Baldwin,Hillary Clinton,democrat,18458.0,95215,"Alabama, Baldwin"
3,2016,Alabama,AL,Baldwin,Donald Trump,republican,72883.0,95215,"Alabama, Baldwin"
4,2016,Alabama,AL,Barbour,Hillary Clinton,democrat,4871.0,10469,"Alabama, Barbour"
...,...,...,...,...,...,...,...,...,...
6301,2016,Alaska,AK,District 38,Donald Trump,republican,1143.0,5095,"Alaska, District 38"
6302,2016,Alaska,AK,District 39,Hillary Clinton,democrat,3142.0,5639,"Alaska, District 39"
6303,2016,Alaska,AK,District 39,Donald Trump,republican,1405.0,5639,"Alaska, District 39"
6304,2016,Alaska,AK,District 40,Hillary Clinton,democrat,2338.0,4610,"Alaska, District 40"


In [4]:
#remove the word 'county' from county column in census data to make more location matches, create new column with these cleaner names
census_data_2016['new_county'] = census_data_2016['County'].str.replace(' County', '')


In [5]:
#census_data_2016
#confirm individual political parties, no green/other/nan present for this yeaer
election_data_2016['party'].value_counts()

republican    3153
democrat      3153
Name: party, dtype: int64

In [6]:
#filter out data to only see democrat party so there are less repeats in data when comparing
election_data_2016_11 = election_data_2016[election_data_2016['party']=="democrat"]
election_data_2016_11

Unnamed: 0,year,state,state_code,county,candidate,party,candidate_votes,total_votes,Location
0,2016,Alabama,AL,Autauga,Hillary Clinton,democrat,5936.0,24973,"Alabama, Autauga"
2,2016,Alabama,AL,Baldwin,Hillary Clinton,democrat,18458.0,95215,"Alabama, Baldwin"
4,2016,Alabama,AL,Barbour,Hillary Clinton,democrat,4871.0,10469,"Alabama, Barbour"
6,2016,Alabama,AL,Bibb,Hillary Clinton,democrat,1874.0,8819,"Alabama, Bibb"
8,2016,Alabama,AL,Blount,Hillary Clinton,democrat,2156.0,25588,"Alabama, Blount"
...,...,...,...,...,...,...,...,...,...
6296,2016,Alaska,AK,District 36,Hillary Clinton,democrat,2693.0,8264,"Alaska, District 36"
6298,2016,Alaska,AK,District 37,Hillary Clinton,democrat,2421.0,5062,"Alaska, District 37"
6300,2016,Alaska,AK,District 38,Hillary Clinton,democrat,2758.0,5095,"Alaska, District 38"
6302,2016,Alaska,AK,District 39,Hillary Clinton,democrat,3142.0,5639,"Alaska, District 39"


In [7]:
#create separate lists of the location column in each dataframe
csv2 = election_data_2016_11.Location.tolist()
csv3 = census_data_2016.Location.tolist()

In [8]:
#find differences between columns
list_difference = []
for item in csv2:
  if item not in csv3:
    list_difference.append(item)

In [67]:
#print(list_difference)

In [10]:
#find differences in the other direction
list_difference2 = []
for item in csv3:
  if item not in csv2:
    list_difference2.append(item)

In [68]:
#print(list_difference2)

In [12]:
#count the total length of each difference list
len(list_difference2)

143

In [13]:
#count the total length of each difference list
len(list_difference)

148

In [14]:
#create new dataframes from each list, name the only column 'location'
election_locations_2016 = pd.DataFrame(csv2, columns = ['location'])
census_locations_2016 = pd.DataFrame(csv3, columns = ['location'])

In [15]:
#census_locations_2016

In [16]:
#election_locations_2016

In [17]:
#merging election data with common locations to create shorter, but matching data
result_locations_2016 = pd.merge(election_locations_2016, census_locations_2016, how='inner', left_on="location", right_on="location",
         left_index=False, right_index=False, sort=True,
         suffixes=('_x', '_y'), copy=True, indicator=False,
         validate=None)
            

In [69]:
#view resuts, 3005 rows.
result_locations_2016.location.value_counts()

Virginia, Fairfax        2
Virginia, Roanoke        2
Virginia, Richmond       2
Virginia, Franklin       2
Texas, Newton            1
                        ..
Wisconsin, Jefferson     1
Minnesota, Kanabec       1
North Dakota, LaMoure    1
Michigan, Van Buren      1
Montana, McCone          1
Name: location, Length: 3001, dtype: int64

In [19]:
election_result_locations_2016 = pd.merge(result_locations_2016, election_data_2016, how='inner', left_on="location", right_on="Location",
         left_index=False, right_index=False, sort=True,
         suffixes=('_x', '_y'), copy=True, indicator=False,
         validate=None)

In [20]:
#view results, 6026 rows
election_result_locations_2016

election_result_locations_2016_drop = election_result_locations_2016.drop(["location"], axis=1)


In [21]:
#resuults of ELECTION DATA to csv
election_result_locations_2016_drop.to_csv(r'Resources/ELECTION_results_FINAL_LOCATIONS_2016.csv', index = False, header=True)


In [22]:
#merging census data with common locations 

census_result_locations_2016 = pd.merge(result_locations_2016, census_data_2016, how='inner', left_on="location", right_on="Location",
         left_index=False, right_index=False, sort=True,
         suffixes=('_x', '_y'), copy=True, indicator=False,
         validate=None)

In [23]:
census_result_locations_2016

census_result_locations_2016_drop = census_result_locations_2016.drop(['location', '2000', '2001', '2002', '2003', '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012','2013', '2014', '2015', '2004'], axis=1)


In [24]:
#CENSUS data to CSV
census_result_locations_2016_drop.to_csv(r'Resources/CENSUS_results_FINAL_LOCATIONS_2016.csv', index = False, header=True)


In [25]:
census_AND_election_2016_1 = pd.merge(election_result_locations_2016_drop, census_result_locations_2016_drop, how='inner', left_on="Location", right_on="Location",
         left_index=False, right_index=False, sort=True,
         suffixes=('_x', '_y'), copy=True, indicator=False,
         validate=None)

In [26]:
census_AND_election_2016_1

Unnamed: 0,year,state,state_code,county,candidate,party,candidate_votes,total_votes,Location,State,County,2016,new_county
0,2016,Alabama,AL,Autauga,Hillary Clinton,democrat,5936.0,24973,"Alabama, Autauga",Alabama,Autauga,55243,Autauga
1,2016,Alabama,AL,Autauga,Donald Trump,republican,18172.0,24973,"Alabama, Autauga",Alabama,Autauga,55243,Autauga
2,2016,Alabama,AL,Baldwin,Hillary Clinton,democrat,18458.0,95215,"Alabama, Baldwin",Alabama,Baldwin,207601,Baldwin
3,2016,Alabama,AL,Baldwin,Donald Trump,republican,72883.0,95215,"Alabama, Baldwin",Alabama,Baldwin,207601,Baldwin
4,2016,Alabama,AL,Barbour,Hillary Clinton,democrat,4871.0,10469,"Alabama, Barbour",Alabama,Barbour,25806,Barbour
...,...,...,...,...,...,...,...,...,...,...,...,...,...
6053,2016,Wyoming,WY,Uinta,Donald Trump,republican,6154.0,8470,"Wyoming, Uinta",Wyoming,Uinta,20682,Uinta
6054,2016,Wyoming,WY,Washakie,Hillary Clinton,democrat,532.0,3814,"Wyoming, Washakie",Wyoming,Washakie,8165,Washakie
6055,2016,Wyoming,WY,Washakie,Donald Trump,republican,2911.0,3814,"Wyoming, Washakie",Wyoming,Washakie,8165,Washakie
6056,2016,Wyoming,WY,Weston,Hillary Clinton,democrat,299.0,3526,"Wyoming, Weston",Wyoming,Weston,7220,Weston


In [27]:
census_AND_election_2016_drop = census_AND_election_2016_1.drop(['state', 'county', 'new_county'], axis=1)


In [28]:
census_AND_election_2016_drop_rename = census_AND_election_2016_drop.rename(columns={"year":"Year", "state_code":"State_Code", "candidate":"Candidate", "party":"Party", "candidate_votes":"Candidate_Votes", "total_votes":"Total_Votes", "2016":"County_Pop_2016"
})




In [29]:
census_AND_election_2016_reorder = census_AND_election_2016_drop_rename[["County", "State", "State_Code", "Location", "Year", "Party", "Candidate", "Candidate_Votes", "Total_Votes", "County_Pop_2016"]]


In [30]:
census_AND_election_2016_reorder

Unnamed: 0,County,State,State_Code,Location,Year,Party,Candidate,Candidate_Votes,Total_Votes,County_Pop_2016
0,Autauga,Alabama,AL,"Alabama, Autauga",2016,democrat,Hillary Clinton,5936.0,24973,55243
1,Autauga,Alabama,AL,"Alabama, Autauga",2016,republican,Donald Trump,18172.0,24973,55243
2,Baldwin,Alabama,AL,"Alabama, Baldwin",2016,democrat,Hillary Clinton,18458.0,95215,207601
3,Baldwin,Alabama,AL,"Alabama, Baldwin",2016,republican,Donald Trump,72883.0,95215,207601
4,Barbour,Alabama,AL,"Alabama, Barbour",2016,democrat,Hillary Clinton,4871.0,10469,25806
...,...,...,...,...,...,...,...,...,...,...
6053,Uinta,Wyoming,WY,"Wyoming, Uinta",2016,republican,Donald Trump,6154.0,8470,20682
6054,Washakie,Wyoming,WY,"Wyoming, Washakie",2016,democrat,Hillary Clinton,532.0,3814,8165
6055,Washakie,Wyoming,WY,"Wyoming, Washakie",2016,republican,Donald Trump,2911.0,3814,8165
6056,Weston,Wyoming,WY,"Wyoming, Weston",2016,democrat,Hillary Clinton,299.0,3526,7220


In [100]:
republican_2016 = census_AND_election_2016_reorder.loc[census_AND_election_2016_reorder["Party"] == 'republican']
republican_2016_1 = pd.DataFrame(republican_2016)
republican_2016_1

Unnamed: 0,County,State,State_Code,Location,Year,Party,Candidate,Candidate_Votes,Total_Votes,County_Pop_2016
1,Autauga,Alabama,AL,"Alabama, Autauga",2016,republican,Donald Trump,18172.0,24973,55243
3,Baldwin,Alabama,AL,"Alabama, Baldwin",2016,republican,Donald Trump,72883.0,95215,207601
5,Barbour,Alabama,AL,"Alabama, Barbour",2016,republican,Donald Trump,5454.0,10469,25806
7,Bibb,Alabama,AL,"Alabama, Bibb",2016,republican,Donald Trump,6738.0,8819,22586
9,Blount,Alabama,AL,"Alabama, Blount",2016,republican,Donald Trump,22859.0,25588,57494
...,...,...,...,...,...,...,...,...,...,...
6049,Sweetwater,Wyoming,WY,"Wyoming, Sweetwater",2016,republican,Donald Trump,12154.0,17130,44222
6051,Teton,Wyoming,WY,"Wyoming, Teton",2016,republican,Donald Trump,3921.0,12627,23234
6053,Uinta,Wyoming,WY,"Wyoming, Uinta",2016,republican,Donald Trump,6154.0,8470,20682
6055,Washakie,Wyoming,WY,"Wyoming, Washakie",2016,republican,Donald Trump,2911.0,3814,8165


In [101]:
republican_2016_1.sort_values("Location", inplace = True)
republican_2016_1

Unnamed: 0,County,State,State_Code,Location,Year,Party,Candidate,Candidate_Votes,Total_Votes,County_Pop_2016
1,Autauga,Alabama,AL,"Alabama, Autauga",2016,republican,Donald Trump,18172.0,24973,55243
3,Baldwin,Alabama,AL,"Alabama, Baldwin",2016,republican,Donald Trump,72883.0,95215,207601
5,Barbour,Alabama,AL,"Alabama, Barbour",2016,republican,Donald Trump,5454.0,10469,25806
7,Bibb,Alabama,AL,"Alabama, Bibb",2016,republican,Donald Trump,6738.0,8819,22586
9,Blount,Alabama,AL,"Alabama, Blount",2016,republican,Donald Trump,22859.0,25588,57494
...,...,...,...,...,...,...,...,...,...,...
6049,Sweetwater,Wyoming,WY,"Wyoming, Sweetwater",2016,republican,Donald Trump,12154.0,17130,44222
6051,Teton,Wyoming,WY,"Wyoming, Teton",2016,republican,Donald Trump,3921.0,12627,23234
6053,Uinta,Wyoming,WY,"Wyoming, Uinta",2016,republican,Donald Trump,6154.0,8470,20682
6055,Washakie,Wyoming,WY,"Wyoming, Washakie",2016,republican,Donald Trump,2911.0,3814,8165


In [73]:
republican_2016_1.drop_duplicates(subset ="Location", 
                     keep = False, inplace = True)


In [102]:
republican_2016_1

Unnamed: 0,County,State,State_Code,Location,Year,Party,Candidate,Candidate_Votes,Total_Votes,County_Pop_2016
1,Autauga,Alabama,AL,"Alabama, Autauga",2016,republican,Donald Trump,18172.0,24973,55243
3,Baldwin,Alabama,AL,"Alabama, Baldwin",2016,republican,Donald Trump,72883.0,95215,207601
5,Barbour,Alabama,AL,"Alabama, Barbour",2016,republican,Donald Trump,5454.0,10469,25806
7,Bibb,Alabama,AL,"Alabama, Bibb",2016,republican,Donald Trump,6738.0,8819,22586
9,Blount,Alabama,AL,"Alabama, Blount",2016,republican,Donald Trump,22859.0,25588,57494
...,...,...,...,...,...,...,...,...,...,...
6049,Sweetwater,Wyoming,WY,"Wyoming, Sweetwater",2016,republican,Donald Trump,12154.0,17130,44222
6051,Teton,Wyoming,WY,"Wyoming, Teton",2016,republican,Donald Trump,3921.0,12627,23234
6053,Uinta,Wyoming,WY,"Wyoming, Uinta",2016,republican,Donald Trump,6154.0,8470,20682
6055,Washakie,Wyoming,WY,"Wyoming, Washakie",2016,republican,Donald Trump,2911.0,3814,8165


In [103]:
democrat_2016 = census_AND_election_2016_reorder.loc[census_AND_election_2016_reorder["Party"] == 'democrat']
democrat_2016_1 = pd.DataFrame(democrat_2016)


In [104]:
democrat_2016_1.sort_values("Location", inplace = True)


In [105]:
democrat_2016_1.drop_duplicates(subset ="Location", 
                     keep = False, inplace = True)
democrat_2016_1

Unnamed: 0,County,State,State_Code,Location,Year,Party,Candidate,Candidate_Votes,Total_Votes,County_Pop_2016
0,Autauga,Alabama,AL,"Alabama, Autauga",2016,democrat,Hillary Clinton,5936.0,24973,55243
2,Baldwin,Alabama,AL,"Alabama, Baldwin",2016,democrat,Hillary Clinton,18458.0,95215,207601
4,Barbour,Alabama,AL,"Alabama, Barbour",2016,democrat,Hillary Clinton,4871.0,10469,25806
6,Bibb,Alabama,AL,"Alabama, Bibb",2016,democrat,Hillary Clinton,1874.0,8819,22586
8,Blount,Alabama,AL,"Alabama, Blount",2016,democrat,Hillary Clinton,2156.0,25588,57494
...,...,...,...,...,...,...,...,...,...,...
6048,Sweetwater,Wyoming,WY,"Wyoming, Sweetwater",2016,democrat,Hillary Clinton,3231.0,17130,44222
6050,Teton,Wyoming,WY,"Wyoming, Teton",2016,democrat,Hillary Clinton,7314.0,12627,23234
6052,Uinta,Wyoming,WY,"Wyoming, Uinta",2016,democrat,Hillary Clinton,1202.0,8470,20682
6054,Washakie,Wyoming,WY,"Wyoming, Washakie",2016,democrat,Hillary Clinton,532.0,3814,8165


In [106]:
democrat_2016_1

Unnamed: 0,County,State,State_Code,Location,Year,Party,Candidate,Candidate_Votes,Total_Votes,County_Pop_2016
0,Autauga,Alabama,AL,"Alabama, Autauga",2016,democrat,Hillary Clinton,5936.0,24973,55243
2,Baldwin,Alabama,AL,"Alabama, Baldwin",2016,democrat,Hillary Clinton,18458.0,95215,207601
4,Barbour,Alabama,AL,"Alabama, Barbour",2016,democrat,Hillary Clinton,4871.0,10469,25806
6,Bibb,Alabama,AL,"Alabama, Bibb",2016,democrat,Hillary Clinton,1874.0,8819,22586
8,Blount,Alabama,AL,"Alabama, Blount",2016,democrat,Hillary Clinton,2156.0,25588,57494
...,...,...,...,...,...,...,...,...,...,...
6048,Sweetwater,Wyoming,WY,"Wyoming, Sweetwater",2016,democrat,Hillary Clinton,3231.0,17130,44222
6050,Teton,Wyoming,WY,"Wyoming, Teton",2016,democrat,Hillary Clinton,7314.0,12627,23234
6052,Uinta,Wyoming,WY,"Wyoming, Uinta",2016,democrat,Hillary Clinton,1202.0,8470,20682
6054,Washakie,Wyoming,WY,"Wyoming, Washakie",2016,democrat,Hillary Clinton,532.0,3814,8165


In [108]:
# collecting required columns...
republican_2016_2 = republican_2016_1[['Total_Votes', 'Candidate_Votes', 'Location', 'State_Code']]
republican_2016_2.columns = ['Total_Votes', 'Candidate_Votes', 'Location', 'State_Code']
republican_2016_2

Unnamed: 0,Total_Votes,Candidate_Votes,Location,State_Code
1,24973,18172.0,"Alabama, Autauga",AL
3,95215,72883.0,"Alabama, Baldwin",AL
5,10469,5454.0,"Alabama, Barbour",AL
7,8819,6738.0,"Alabama, Bibb",AL
9,25588,22859.0,"Alabama, Blount",AL
...,...,...,...,...
6049,17130,12154.0,"Wyoming, Sweetwater",WY
6051,12627,3921.0,"Wyoming, Teton",WY
6053,8470,6154.0,"Wyoming, Uinta",WY
6055,3814,2911.0,"Wyoming, Washakie",WY


In [114]:
democrat_2016_2 = democrat_2016_1[['Total_Votes', 'Candidate_Votes', 'Location', 'State_Code']]
democrat_2016_2.columns = ['Total_Votes', 'Candidate_Votes', 'Location', 'State_Code']
democrat_2016_2.head()

Unnamed: 0,Total_Votes,Candidate_Votes,Location,State_Code
0,24973,5936.0,"Alabama, Autauga",AL
2,95215,18458.0,"Alabama, Baldwin",AL
4,10469,4871.0,"Alabama, Barbour",AL
6,8819,1874.0,"Alabama, Bibb",AL
8,25588,2156.0,"Alabama, Blount",AL


In [121]:
election_parties_2016 = pd.merge(republican_2016_2,democrat_2016_2, on='Location')
election_parties_2016
election_parties_2016 = election_parties_2016[['Location','Total_Votes_x','Candidate_Votes_x','Candidate_Votes_y', 'State_Code_x']]
election_parties_2016.columns = ['Location','Total_Votes_2016','2016_republican_votes','2016_democrat_votes', 'State_Code']
election_parties_2016


Unnamed: 0,Location,Total_Votes_2016,2016_republican_votes,2016_democrat_votes,State_Code
0,"Alabama, Autauga",24973,18172.0,5936.0,AL
1,"Alabama, Baldwin",95215,72883.0,18458.0,AL
2,"Alabama, Barbour",10469,5454.0,4871.0,AL
3,"Alabama, Bibb",8819,6738.0,1874.0,AL
4,"Alabama, Blount",25588,22859.0,2156.0,AL
...,...,...,...,...,...
2992,"Wyoming, Sweetwater",17130,12154.0,3231.0,WY
2993,"Wyoming, Teton",12627,3921.0,7314.0,WY
2994,"Wyoming, Uinta",8470,6154.0,1202.0,WY
2995,"Wyoming, Washakie",3814,2911.0,532.0,WY


In [122]:
election_parties_2016.to_csv(r'Resources/FOR_JINHO_2016.csv', index = False, header=True)


In [123]:
election_census_parties_2016= pd.merge(election_parties_2016, census_AND_election_2016_reorder, on='Location')
election_census_parties_2016

Unnamed: 0,Location,Total_Votes_2016,2016_republican_votes,2016_democrat_votes,State_Code_x,County,State,State_Code_y,Year,Party,Candidate,Candidate_Votes,Total_Votes,County_Pop_2016
0,"Alabama, Autauga",24973,18172.0,5936.0,AL,Autauga,Alabama,AL,2016,democrat,Hillary Clinton,5936.0,24973,55243
1,"Alabama, Autauga",24973,18172.0,5936.0,AL,Autauga,Alabama,AL,2016,republican,Donald Trump,18172.0,24973,55243
2,"Alabama, Baldwin",95215,72883.0,18458.0,AL,Baldwin,Alabama,AL,2016,democrat,Hillary Clinton,18458.0,95215,207601
3,"Alabama, Baldwin",95215,72883.0,18458.0,AL,Baldwin,Alabama,AL,2016,republican,Donald Trump,72883.0,95215,207601
4,"Alabama, Barbour",10469,5454.0,4871.0,AL,Barbour,Alabama,AL,2016,democrat,Hillary Clinton,4871.0,10469,25806
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5989,"Wyoming, Uinta",8470,6154.0,1202.0,WY,Uinta,Wyoming,WY,2016,republican,Donald Trump,6154.0,8470,20682
5990,"Wyoming, Washakie",3814,2911.0,532.0,WY,Washakie,Wyoming,WY,2016,democrat,Hillary Clinton,532.0,3814,8165
5991,"Wyoming, Washakie",3814,2911.0,532.0,WY,Washakie,Wyoming,WY,2016,republican,Donald Trump,2911.0,3814,8165
5992,"Wyoming, Weston",3526,3033.0,299.0,WY,Weston,Wyoming,WY,2016,democrat,Hillary Clinton,299.0,3526,7220


In [124]:
election_census_parties_2016_drop = election_census_parties_2016.drop(["Total_Votes_2016", "2016_republican_votes", "2016_democrat_votes", "Year", "Candidate_Votes"], axis=1)


In [125]:
election_census_parties_2016_drop.to_csv(r'Resources/Analysis Files/2016/CENSUS_and_POPULATION_FINAL_2016.csv', index = False, header=True)
