In [58]:
import pandas as pd
import numpy as np
import os

In [59]:
#Loading in County Data
#Datasets that are altered & Resaved as such
Age_Race = pd.read_csv('../../../../data/us/demographics/acs_2018.csv', encoding='latin1')
Votes = pd.read_csv('../../../../data/us/demographics/countypres_2000-2016.csv', encoding='latin1')
Educ = pd.read_csv('../../../../data/us/demographics/education.csv', encoding='latin1')
Density = pd.read_csv('../../../../data/us/demographics/county_land_areas.csv', encoding='latin1')
Unemp = pd.read_csv('../../../../data/us/demographics/unemployment.csv', delimiter="\t")
Poverty = pd.read_csv('../../../../data/us/demographics/poverty.csv', delimiter="\t")

#Key to map FIPs values to State and County Name
Key = pd.read_csv('../../../../data/us/processing_data/fips_key.csv', encoding='latin1')
Key = Key.drop(columns=['MSA/PMSA NECMA']) #Dropping unecessary column
Key = Key.set_index('FIPS')

Key.to_csv('Key.csv')

#Datasets that are not altered
Pop_60 = pd.read_csv('../../../../data/us/demographics/county_populations.csv', encoding='latin1')
Pop_60 = Pop_60.set_index('FIPS')

Pop_60.to_csv('Pop_60.csv')

In [60]:
#Changes prefixes of column names
def drop_prefix(self, prefix, replace = ''):
    self.columns = self.columns.str.replace(prefix, replace)
    return self

In [61]:
#Removes all duplicate columns from dataframes
def drop_dup_col(df):
    df = df.loc[:,~df.columns.duplicated()]
    return df

In [62]:
#Cleaning Voting Data
Votes = Votes[Votes.party != 'green']
Votes = Votes[Votes.party != 'republican'] #Removing unneeded rows
Votes = Votes[Votes.candidate != 'Other']

Votes = Votes[Votes.FIPS >= -1] #Removing NaN rows
################################

Votes = Votes.drop(columns=['state', 'state_po', 'county', 'office', 'candidate', 'version']) #removing uneeded columns

Votes.insert(5, "Prop_Blue", Votes.candidatevotes/Votes.totalvotes, True) #Adding column of fraction of pop that vote dem.
Votes = Votes.drop(columns=['candidatevotes', 'party'])    
Votes = Votes.pivot(index= 'FIPS', columns = 'year') #making FIPS main index

Votes.to_csv('Votes.csv')
#Removing the pivot aspect from the Votes Dataset
Votes = pd.read_csv('Votes.csv', encoding='latin1')
Votes = Votes.drop([0,1])

Votes.columns = ['FIPS', 'Total_Votes_2000', 'Total_Votes_2004', 'Total_Votes_2008', 'Total_Votes_2012',\
                 'Total_Votes_2016', 'Frac_Dem_2000', 'Frac_Dem_2004', 'Frac_Dem_2008', 'Frac_Dem_2012', 'Frac_Dem_2016']
Votes.FIPS = Votes.FIPS.astype(str).astype(float).astype(int) #Rewriting the columns names

Votes = Votes.set_index('FIPS')
Votes.to_csv('Votes.csv')

In [63]:
#Cleaning the Racial/Age Data
Age_Race = Age_Race.sort_values(by=['FIPS'])
#removing these percent/ratio values as these are poorly rounded, can be manually computed later
Age_Race = Age_Race[Age_Race.columns.drop(list(Age_Race.filter(regex='Percent')))]
Age_Race = Age_Race[Age_Race.columns.drop(list(Age_Race.filter(regex='ratio')))]
#Dropping unecessary columns prefixes 
Age_Race = drop_prefix(Age_Race, 'Estimate!!')
Age_Race = drop_prefix(Age_Race, 'SEX AND AGE!!')
Age_Race = drop_prefix(Age_Race, 'RACE!!')
Age_Race = drop_prefix(Age_Race, 'Total population!!') #Changing column title names
Age_Race = drop_prefix(Age_Race, 'One race!!', 'Exclusively ')
Age_Race = drop_prefix(Age_Race, 'Two or more races!!', 'Interracial ')
Age_Race = drop_prefix(Age_Race, 'Race alone or in combination with one or more other races!!', 'Total ')
Age_Race = drop_prefix(Age_Race, 'HISPANIC OR LATINO AND ')

#Dropping unecessary columns
Age_Race = drop_dup_col(Age_Race) #Removes duplicate columns
Age_Race = Age_Race[Age_Race.columns.drop(list(Age_Race.filter(regex='.1')))] #removes extra duplicate columns
Age_Race = Age_Race.drop(columns=['Geographic Area Name', 'Total Total population'])

Age_Race = Age_Race.replace('N', 0) #changing NaN values to 0
#####################################

Age_Race = Age_Race.set_index('FIPS')
Age_Race.to_csv('Age_Race.csv')

In [64]:
#Cleaning Education Data, removing state data from county data 
Educ_County = Educ[Educ['FIPS'] % 1000 != 0]
Educ_County = Educ_County.set_index('FIPS')

Educ_County.to_csv('Educ_County.csv')

In [65]:
#Cleaning Density area Data
Density = Density.drop(columns=['County Name']) #Dropping unecessary column
Density.columns = Density.columns.str.replace('County FIPS','FIPS')
Density = Density.set_index('FIPS')

Density.to_csv('Density.csv')

In [66]:
#Cleaning Unemployment area Data
Unemp = Unemp.drop(columns=['State', 'Area_name'])  #Dropping unecessary columns
Unemp = Unemp.set_index('FIPS')

Unemp.to_csv('Unemp.csv')

In [67]:
#Cleaning Poverty area Data
Poverty = Poverty.drop(columns=['Stabr', 'Area_name', 'Rural-urban_Continuum_Code_2013', 'Urban_Influence_Code_2013']) 
#Dropping unecessary columns
Poverty = Poverty.set_index('FIPS')

Poverty.to_csv('Poverty.csv')

In [68]:
print('Votes: ' + str(len(Votes)))
print('Age_Race: ' + str(len(Age_Race)))
print('Educ_County: ' + str(len(Educ_County)))
print('Density: ' + str(len(Density)))
print('Unemp: ' + str(len(Unemp)))
print('Poverty: ' + str(len(Poverty)))
print('Key: ' + str(len(Key)))
print('Pop_60: ' + str(len(Pop_60)))

Votes: 3155
Age_Race: 838
Educ_County: 3230
Density: 3221
Unemp: 3141
Poverty: 3136
Key: 3246
Pop_60: 3142


In [69]:
Votes.head()

Unnamed: 0_level_0,Total_Votes_2000,Total_Votes_2004,Total_Votes_2008,Total_Votes_2012,Total_Votes_2016,Frac_Dem_2000,Frac_Dem_2004,Frac_Dem_2008,Frac_Dem_2012,Frac_Dem_2016
FIPS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1001,17208.0,20081.0,23641.0,23932.0,24973.0,0.287192,0.23694,0.25773,0.265878,0.237697
1003,56480.0,69320.0,81413.0,85338.0,95215.0,0.247822,0.225029,0.238119,0.215894,0.193856
1005,10395.0,10777.0,11630.0,11509.0,10469.0,0.499086,0.448362,0.489854,0.513685,0.465278
1007,7101.0,7600.0,8644.0,8420.0,8819.0,0.381636,0.274868,0.265965,0.26152,0.212496
1009,17973.0,21504.0,24267.0,24006.0,25588.0,0.276915,0.183129,0.145135,0.123719,0.084258


In [70]:
Age_Race.head()

Unnamed: 0_level_0,Total population,Male,Female,Under 5 years,5 to 9 years,20 to 24 years,25 to 34 years,35 to 44 years,45 to 54 years,55 to 59 years,...,Not Hispanic or Latino!!White alone,Not Hispanic or Latino!!Black or African American alone,Not Hispanic or Latino!!American Indian and Alaska Native alone,Not Hispanic or Latino!!Asian alone,Not Hispanic or Latino!!Native Hawaiian and Other Pacific Islander alone,Not Hispanic or Latino!!Some other race alone,Not Hispanic or Latino!!Two or more races,Not Hispanic or Latino!!Interracial Two races including Some other race,"Not Hispanic or Latino!!Interracial Two races excluding Some other race, and Three or more races",Total housing units
FIPS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1003,218022,105437,112585,10878,9445,10426,23498,25990,30117,16569,...,180926,20328,1209,2338,45,148,2896,127,2769,116632
1015,114277,55054,59223,7142,7158,7451,16273,12608,13616,8305,...,82308,24737,107,426,0,0,2230,293,1937,53888
1043,83442,40794,42648,5329,4798,4500,10274,10131,10843,5407,...,0,0,0,0,0,0,0,0,0,38029
1049,71385,35048,36337,3989,3851,4176,8645,8261,9548,4533,...,57199,1335,814,173,0,0,1269,0,1269,31656
1051,81887,39988,41899,5246,4443,4847,12140,10087,11642,5364,...,59771,16836,345,522,0,0,1946,56,1890,34416


In [71]:
Educ_County.head()

Unnamed: 0_level_0,"Less than a high school diploma, 2014-18","High school diploma only, 2014-18","Some college or associate's degree, 2014-18","Bachelor's degree or higher, 2014-18","Percent of adults with less than a high school diploma, 2014-18","Percent of adults with a high school diploma only, 2014-18","Percent of adults completing some college or associate's degree, 2014-18","Percent of adults with a bachelor's degree or higher, 2014-18"
FIPS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1001,4204,12119,10552,10291,11.3,32.6,28.4,27.7
1003,14310,40579,46025,46075,9.7,27.6,31.3,31.3
1005,4901,6486,4566,2220,27.0,35.7,25.1,12.2
1007,2650,7471,3846,1813,16.8,47.3,24.4,11.5
1009,7861,13489,13267,5010,19.8,34.0,33.5,12.6


In [72]:
Density.head()

Unnamed: 0_level_0,Area in square miles - Total area,Area in square miles - Land area,2010 Density per square mile of land area - Population,2010 Density per square mile of land area - Housing units
FIPS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1001,604.39,594.44,91.8,37.2
1003,2027.31,1589.78,114.6,65.5
1005,904.52,884.88,31.0,13.4
1007,626.17,622.58,36.8,14.4
1009,650.63,644.78,88.9,37.0


In [73]:
Unemp.head()

Unnamed: 0_level_0,Rural_urban_continuum_code_2013,Urban_influence_code_2013,Metro_2013,Civilian_labor_force_2018,Employed_2018,Unemployed_2018,Unemployment_rate_2018,Median_Household_Income_2018,Med_HH_Income_Percent_of_State_Total_2018
FIPS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1001,2,2,1,25957,25015,942,3.6,"$59,338",119.0
1003,3,2,1,93849,90456,3393,3.6,"$57,588",115.5
1005,6,6,0,8373,7940,433,5.2,"$34,382",68.9
1007,1,1,1,8661,8317,344,4.0,"$46,064",92.3
1009,1,1,1,25006,24128,878,3.5,"$50,412",101.1


In [74]:
Poverty.head()

Unnamed: 0_level_0,Rural-urban_Continuum_Code_2003,Urban_Influence_Code_2003,POVALL_2018,CI90LBAll_2018,CI90UBALL_2018,PCTPOVALL_2018,CI90LBALLP_2018,CI90UBALLP_2018,POV017_2018,CI90LB017_2018,...,CI90UB017P_2018,POV517_2018,CI90LB517_2018,CI90UB517_2018,PCTPOV517_2018,CI90LB517P_2018,CI90UB517P_2018,MEDHHINC_2018,CI90LBINC_2018,CI90UBINC_2018
FIPS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1001,2,2,7587,6334,8840,13.8,11.5,16.1,2509,1965,...,23.5,1891,1469,2313,19.5,15.1,23.9,59338,53628,65048
1003,4,5,21069,17390,24748,9.8,8.1,11.5,6442,4723,...,17.6,4534,3223,5845,13.1,9.3,16.9,57588,54437,60739
1005,6,6,6788,5662,7914,30.9,25.8,36.0,2242,1788,...,52.8,1411,1056,1766,36.7,27.5,45.9,34382,31157,37607
1007,1,1,4400,3445,5355,21.8,17.1,26.5,1238,923,...,34.9,840,606,1074,26.3,19.0,33.6,46064,41283,50845
1009,1,1,7527,6045,9009,13.2,10.6,15.8,2374,1784,...,22.5,1520,1053,1987,15.5,10.7,20.3,50412,46157,54667


In [75]:
Key.head()

Unnamed: 0_level_0,ST,COUNTY
FIPS,Unnamed: 1_level_1,Unnamed: 2_level_1
1001,AL,Autauga County
1003,AL,Baldwin County
1005,AL,Barbour County
1007,AL,Bibb County
1009,AL,Blount County


In [76]:
Pop_60.head()

Unnamed: 0_level_0,total_pop,60plus
FIPS,Unnamed: 1_level_1,Unnamed: 2_level_1
1001,55036,10523
1003,203360,53519
1005,26201,6150
1007,22580,4773
1009,57667,13600
