In [6]:
import pandas as pd
import numpy as np
import os

In [7]:
#Loading in County Data
Age_Race = pd.read_csv('../../../data/us/demographics/acs_2018.csv', encoding='latin1')
County_Pop_Pop60 = pd.read_csv('../../../data/us/demographics/county_populations.csv', encoding='latin1')
Area_Houses = pd.read_csv('../../../data/us/demographics/county_land_areas.csv', encoding='latin1')
Educ = pd.read_csv('../../../data/us/demographics/education.csv', encoding='latin1')
Unemp = pd.read_csv('../../../data/us/demographics/unemployment.csv', delimiter="\t")
Poverty = pd.read_csv('../../../data/us/demographics/poverty.csv', delimiter="\t")
Votes = pd.read_csv('../../../data/us/demographics/countypres_2000-2016.csv', encoding='latin1')

#State Populations
State_Pop = pd.read_csv('../../../data/us/demographics/state_populations.csv', encoding='latin1')

In [8]:
#Changes prefixes of column names
def drop_prefix(self, prefix, replace = ''):
    self.columns = self.columns.str.replace(prefix, replace)
    return self

In [9]:
#Cleaning Voting Data
Votes = Votes[Votes.party != 'green']
Votes = Votes[Votes.party != 'republican'] #Removing unneeded rows
Votes = Votes[Votes.candidate != 'Other']
Votes = Votes[Votes.FIPS <= 10000000]

Votes = Votes.drop(columns=['state', 'state_po', 'county', 'office', 'candidate', 'version']) #removing uneeded columns

Votes.insert(5, "Prop_Blue", Votes.candidatevotes/Votes.totalvotes, True) #Adding column of proportion of democratic
Votes = Votes.drop(columns=['candidatevotes', 'party'])    
Votes = Votes.pivot(index= 'FIPS', columns = 'year') #making FIPS main index
Votes.to_csv('Votes.csv')

In [10]:
#Cleaning the Racial/Age Data
Age_Race.sort_values(by=['FIPS'])
Age_Race = Age_Race[Age_Race.columns.drop(list(Age_Race.filter(regex='Percent')))]
Age_Race = Age_Race[Age_Race.columns.drop(list(Age_Race.filter(regex='ratio')))]
#Dropping unecessary columns
Age_Race = drop_prefix(Age_Race, 'Estimate!!')
Age_Race = drop_prefix(Age_Race, 'SEX AND AGE!!')
Age_Race = drop_prefix(Age_Race, 'RACE!!')
Age_Race = drop_prefix(Age_Race, 'Total population!!') #Changing column title names
Age_Race = drop_prefix(Age_Race, 'One race!!', 'Race_1 ')
Age_Race = drop_prefix(Age_Race, 'Two or more races!!', 'Race_2+ ')
Age_Race = drop_prefix(Age_Race, 'Race alone or in combination with one or more other races!!', 'Race_Total ')
Age_Race = drop_prefix(Age_Race, 'Race alone or in combination with one or more other races!!', 'Race_Total ')
Age_Race = drop_prefix(Age_Race, 'HISPANIC OR LATINO AND', 'Hispanic:')
Age_Race.to_csv('Age_Race.csv')

In [11]:
#Joining Population and Unemployment Data
Pop_Unemp =  pd.merge(County_Pop_Pop60, Unemp, left_on='FIPS', right_on='FIPS')
#Joining this with Land Area/Density Data
Pop_Unemp_Area = pd.merge(Pop_Unemp, Area_Houses, left_on='FIPS', right_on='County FIPS')
Pop_Unemp_Area = Pop_Unemp_Area.drop(columns=['County FIPS', 'Area in square miles - Total area', 'County Name'])
#Joining this with Education Data
Pop_Unemp_Area_Educ = pd.merge(Pop_Unemp_Area, Educ, left_on='FIPS', right_on='FIPS')
#Joining this with Poverty Data
Pop_Unemp_Area_Educ_Pov = pd.merge(Pop_Unemp_Area_Educ, Poverty, left_on='FIPS', right_on='FIPS')
#Joining this with Voting Data
Pop_Unemp_Area_Educ_Pov_Votes = pd.merge(Pop_Unemp_Area_Educ_Pov, Votes, left_on='FIPS', right_on='FIPS')


#Joining Compiled Data with Racial/Age Data (This is incomplete, not all FIPS included)
Demographics_Full = pd.merge(Pop_Unemp_Area_Educ, Age_Race, left_on='FIPS', right_on='FIPS')



In [12]:
print('County_Pop_Pop60: ' + str(len(County_Pop_Pop60)))
print('Area_Houses: ' + str(len(Area_Houses)))
print('Educ: ' + str(len(Educ)))
print('Unemp: ' + str(len(Unemp)))
print('Poverty: ' + str(len(Poverty)))
print('Votes: ' + str(len(Votes)) + '\n')

print('Age_Race: ' + str(len(Age_Race)) + '\n')

print('Pop_Unemp: ' + str(len(Pop_Unemp)))
print('Pop_Unemp_Area: ' + str(len(Pop_Unemp_Area)))
print('Pop_Unemp_Area_Educ: ' + str(len(Pop_Unemp_Area_Educ)))
print('Pop_Unemp_Area_Educ_Pov: ' + str(len(Pop_Unemp_Area_Educ_Pov)))
print('Pop_Unemp_Area_Educ_Pov_Votes: ' + str(len(Pop_Unemp_Area_Educ_Pov_Votes)) + '\n')

print('Demographics_Full: ' + str(len(Demographics_Full)))

County_Pop_Pop60: 3142
Area_Houses: 3221
Educ: 3283
Unemp: 3141
Poverty: 3136
Votes: 3155

Age_Race: 838

Pop_Unemp: 3141
Pop_Unemp_Area: 3139
Pop_Unemp_Area_Educ: 3139
Pop_Unemp_Area_Educ_Pov: 3134
Pop_Unemp_Area_Educ_Pov_Votes: 3114

Demographics_Full: 827


In [13]:
Pop_Unemp_Area_Educ_Pov_Votes.head()

Unnamed: 0,FIPS,total_pop,60plus,State,Area_name_x,Rural_urban_continuum_code_2013,Urban_influence_code_2013,Metro_2013,Civilian_labor_force_2018,Employed_2018,...,"(totalvotes, 2000)","(totalvotes, 2004)","(totalvotes, 2008)","(totalvotes, 2012)","(totalvotes, 2016)","(Prop_Blue, 2000)","(Prop_Blue, 2004)","(Prop_Blue, 2008)","(Prop_Blue, 2012)","(Prop_Blue, 2016)"
0,1001,55036,10523,AL,"Autauga County, AL",2,2,1,25957,25015,...,17208.0,20081.0,23641.0,23932.0,24973.0,0.287192,0.23694,0.25773,0.265878,0.237697
1,1003,203360,53519,AL,"Baldwin County, AL",3,2,1,93849,90456,...,56480.0,69320.0,81413.0,85338.0,95215.0,0.247822,0.225029,0.238119,0.215894,0.193856
2,1005,26201,6150,AL,"Barbour County, AL",6,6,0,8373,7940,...,10395.0,10777.0,11630.0,11509.0,10469.0,0.499086,0.448362,0.489854,0.513685,0.465278
3,1007,22580,4773,AL,"Bibb County, AL",1,1,1,8661,8317,...,7101.0,7600.0,8644.0,8420.0,8819.0,0.381636,0.274868,0.265965,0.26152,0.212496
4,1009,57667,13600,AL,"Blount County, AL",1,1,1,25006,24128,...,17973.0,21504.0,24267.0,24006.0,25588.0,0.276915,0.183129,0.145135,0.123719,0.084258


In [14]:
list(Pop_Unemp_Area_Educ_Pov_Votes.columns)

['FIPS',
 'total_pop',
 '60plus',
 'State',
 'Area_name_x',
 'Rural_urban_continuum_code_2013',
 'Urban_influence_code_2013',
 'Metro_2013',
 'Civilian_labor_force_2018',
 'Employed_2018',
 'Unemployed_2018',
 'Unemployment_rate_2018',
 'Median_Household_Income_2018',
 'Med_HH_Income_Percent_of_State_Total_2018',
 'Area in square miles - Land area',
 '2010 Density per square mile of land area - Population',
 '2010 Density per square mile of land area - Housing units',
 'Less than a high school diploma, 2014-18',
 'High school diploma only, 2014-18',
 "Some college or associate's degree, 2014-18",
 "Bachelor's degree or higher, 2014-18",
 'Percent of adults with less than a high school diploma, 2014-18',
 'Percent of adults with a high school diploma only, 2014-18',
 "Percent of adults completing some college or associate's degree, 2014-18",
 "Percent of adults with a bachelor's degree or higher, 2014-18",
 'Stabr',
 'Area_name_y',
 'Rural-urban_Continuum_Code_2003',
 'Urban_Influence_C

In [15]:
Demographics_Full.head()

Unnamed: 0,FIPS,total_pop,60plus,State,Area_name,Rural_urban_continuum_code_2013,Urban_influence_code_2013,Metro_2013,Civilian_labor_force_2018,Employed_2018,...,Hispanic: Not Hispanic or Latino!!White alone,Hispanic: Not Hispanic or Latino!!Black or African American alone,Hispanic: Not Hispanic or Latino!!American Indian and Alaska Native alone,Hispanic: Not Hispanic or Latino!!Asian alone,Hispanic: Not Hispanic or Latino!!Native Hawaiian and Other Pacific Islander alone,Hispanic: Not Hispanic or Latino!!Some other race alone,Hispanic: Not Hispanic or Latino!!Two or more races,Hispanic: Not Hispanic or Latino!!Race_2+ Two races including Some other race,"Hispanic: Not Hispanic or Latino!!Race_2+ Two races excluding Some other race, and Three or more races",Total housing units
0,1003,203360,53519,AL,"Baldwin County, AL",3,2,1,93849,90456,...,180926,20328,1209,2338,45,148,2896,127,2769,116632
1,1015,115527,27115,AL,"Calhoun County, AL",3,2,1,45972,43833,...,82308,24737,107,426,0,0,2230,293,1937,53888
2,1043,81703,19667,AL,"Cullman County, AL",4,3,0,37830,36589,...,N,N,N,N,N,N,N,N,N,38029
3,1049,71194,15782,AL,"DeKalb County, AL",6,6,0,30129,28979,...,57199,1335,814,173,0,0,1269,0,1269,31656
4,1051,80989,15929,AL,"Elmore County, AL",2,2,1,37215,35932,...,59771,16836,345,522,0,0,1946,56,1890,34416


In [16]:
list(Demographics_Full.columns)

['FIPS',
 'total_pop',
 '60plus',
 'State',
 'Area_name',
 'Rural_urban_continuum_code_2013',
 'Urban_influence_code_2013',
 'Metro_2013',
 'Civilian_labor_force_2018',
 'Employed_2018',
 'Unemployed_2018',
 'Unemployment_rate_2018',
 'Median_Household_Income_2018',
 'Med_HH_Income_Percent_of_State_Total_2018',
 'Area in square miles - Land area',
 '2010 Density per square mile of land area - Population',
 '2010 Density per square mile of land area - Housing units',
 'Less than a high school diploma, 2014-18',
 'High school diploma only, 2014-18',
 "Some college or associate's degree, 2014-18",
 "Bachelor's degree or higher, 2014-18",
 'Percent of adults with less than a high school diploma, 2014-18',
 'Percent of adults with a high school diploma only, 2014-18',
 "Percent of adults completing some college or associate's degree, 2014-18",
 "Percent of adults with a bachelor's degree or higher, 2014-18",
 'Geographic Area Name',
 'Total population',
 'Male',
 'Female',
 'Under 5 years',

In [17]:
#Repeating similar process to above, but changing order of merges to make maximum rows
#Joining Education & Area
Educ_Area = pd.merge(Educ, Area_Houses, left_on='FIPS', right_on='County FIPS')
Educ_Area = Educ_Area.drop(columns=['County FIPS', 'Area in square miles - Total area', 'County Name'])
#Joining this with Population Data
Educ_Area_Pop = pd.merge(Educ_Area, County_Pop_Pop60, left_on='FIPS', right_on='FIPS')
#Joining this with Unemployment Data
Educ_Area_Pop_Unemp = pd.merge(Educ_Area_Pop, Unemp, left_on='FIPS', right_on='FIPS')
#Joining this with Poverty Data
Educ_Area_Pop_Unemp_Pov = pd.merge(Educ_Area_Pop_Unemp, Poverty, left_on='FIPS', right_on='FIPS')
#Joining this with Voting Data
Educ_Area_Pop_Unemp_Pov_Votes = pd.merge(Pop_Unemp_Area_Educ_Pov, Votes, left_on='FIPS', right_on='FIPS')

In [18]:
print('Educ_Area: ' + str(len(Educ_Area)))
print('Educ_Area_Pop: ' + str(len(Educ_Area_Pop)))
print('Educ_Area_Pop_Unemp: ' + str(len(Educ_Area_Pop_Unemp)))
print('Educ_Area_Pop_Unemp_Pov: ' + str(len(Educ_Area_Pop_Unemp_Pov)))
print('Educ_Area_Pop_Unemp_Pov_Votes: ' + str(len(Educ_Area_Pop_Unemp_Pov_Votes)) + '\n')

Educ_Area: 3219
Educ_Area_Pop: 3140
Educ_Area_Pop_Unemp: 3139
Educ_Area_Pop_Unemp_Pov: 3134
Educ_Area_Pop_Unemp_Pov_Votes: 3114



In [19]:
Educ_Area_Pop_Unemp_Pov_Votes.head()

Unnamed: 0,FIPS,total_pop,60plus,State,Area_name_x,Rural_urban_continuum_code_2013,Urban_influence_code_2013,Metro_2013,Civilian_labor_force_2018,Employed_2018,...,"(totalvotes, 2000)","(totalvotes, 2004)","(totalvotes, 2008)","(totalvotes, 2012)","(totalvotes, 2016)","(Prop_Blue, 2000)","(Prop_Blue, 2004)","(Prop_Blue, 2008)","(Prop_Blue, 2012)","(Prop_Blue, 2016)"
0,1001,55036,10523,AL,"Autauga County, AL",2,2,1,25957,25015,...,17208.0,20081.0,23641.0,23932.0,24973.0,0.287192,0.23694,0.25773,0.265878,0.237697
1,1003,203360,53519,AL,"Baldwin County, AL",3,2,1,93849,90456,...,56480.0,69320.0,81413.0,85338.0,95215.0,0.247822,0.225029,0.238119,0.215894,0.193856
2,1005,26201,6150,AL,"Barbour County, AL",6,6,0,8373,7940,...,10395.0,10777.0,11630.0,11509.0,10469.0,0.499086,0.448362,0.489854,0.513685,0.465278
3,1007,22580,4773,AL,"Bibb County, AL",1,1,1,8661,8317,...,7101.0,7600.0,8644.0,8420.0,8819.0,0.381636,0.274868,0.265965,0.26152,0.212496
4,1009,57667,13600,AL,"Blount County, AL",1,1,1,25006,24128,...,17973.0,21504.0,24267.0,24006.0,25588.0,0.276915,0.183129,0.145135,0.123719,0.084258
