# Import Libraries

In [27]:
import pandas as pd
import numpy as np

# Obtain Data

Read csv with election results into pandas, scrub county names to match formatting of census data, filter for 2020 Trump vote proportion and drop unneeded columns

In [37]:
election_df = pd.read_csv('Data/election_results.csv')
election_df = election_df[(election_df['year']==2020) & (election_df['candidate']=='DONALD J TRUMP')]
election_df['County'] = election_df.apply(lambda x: x['county_name'].title() + ' County, ' + x['state'].title(),
                                            axis=1)
election_df['County'] = election_df['County'].apply(lambda x: x.replace('City County', 'city'))
election_df = election_df[['County', 'candidatevotes', 'totalvotes']]
election_df.columns = ['County', 'Trump Votes', 'Total Votes']
election_df = election_df.groupby(by='County', axis=0).sum()
election_df.reset_index(inplace=True)
print('election_df shape:', election_df.shape)
election_df.head()

election_df shape: (3155, 3)


Unnamed: 0,County,Trump Votes,Total Votes
0,"Abbeville County, South Carolina",8215.0,74598.0
1,"Acadia County, Louisiana",22596.0,28425.0
2,"Accomack County, Virginia",9172.0,50886.0
3,"Ada County, Idaho",130699.0,259389.0
4,"Adair County, Iowa",2922.0,8354.0


Read csv with county areas into pandas, scrub county names to match formatting of census data and drop unneeded columns

In [3]:
#Update Virginia independent cities with city instead of County

area_df = pd.read_csv('Data/area.csv')
area_df = area_df[['Areaname', 'LND010200D']]
area_df.columns = ['County', 'Area']
area_df['County']

state_abbrev_df = pd.read_csv('Data/state_abbrev.csv')
def replace_abbrev(row):
    if len(row.split(',')) == 1:
        return row    
    else:
        state_abbrev = row.split(',')[1][1:]
        state = state_abbrev_df[state_abbrev_df['Code']==state_abbrev]['State'].values[0]
        return row.split(',')[0] + ' County, ' + state
area_df['County'] = area_df['County'].apply(replace_abbrev)
print('area_df shape:', area_df.shape)
area_df.head()

area_df shape: (3198, 2)


Unnamed: 0,County,Area
0,UNITED STATES,3794083.06
1,ALABAMA,52419.02
2,"Autauga County, Alabama",604.45
3,"Baldwin County, Alabama",2026.93
4,"Barbour County, Alabama",904.52


Read csv with county level education data into pandas and drop unneeded columns

In [5]:
education_df = pd.read_csv('Data/education.csv', header=1)
education_df = education_df[['Geographic Area Name',
                            'Estimate!!Total!!AGE BY EDUCATIONAL ATTAINMENT!!Population 25 years and over', 
                            "Estimate!!Total!!AGE BY EDUCATIONAL ATTAINMENT!!Population 25 years and over!!Bachelor's degree or higher"]]
education_df.columns = ['County', 'Persons 25+', 'Persons 25+ w/ Bachelors Degree']
print('education_df shape: ', education_df.shape)
education_df.head()

education_df shape:  (840, 3)


Unnamed: 0,County,Persons 25+,Persons 25+ w/ Bachelors Degree
0,"Baldwin County, Alabama",159717,51471
1,"Calhoun County, Alabama",79084,15257
2,"Cullman County, Alabama",58795,9241
3,"DeKalb County, Alabama",47007,5999
4,"Elmore County, Alabama",57553,14310


Read csv with county level economic data into pandas and drop unneeded columns

In [14]:
economic_df = pd.read_csv('Data/economics.csv', header=1)
economic_df = economic_df[['Geographic Area Name',
                          'Estimate!!INCOME AND BENEFITS (IN 2019 INFLATION-ADJUSTED DOLLARS)!!Per capita income (dollars)',
                          'Estimate!!EMPLOYMENT STATUS!!Population 16 years and over!!In labor force',
                          'Estimate!!EMPLOYMENT STATUS!!Population 16 years and over!!In labor force!!Civilian labor force!!Unemployed',
                          'Estimate!!OCCUPATION!!Civilian employed population 16 years and over',
                          'Estimate!!INDUSTRY!!Civilian employed population 16 years and over!!Manufacturing']]
economic_df.columns = ['County', 'Per Capita Income', 'Persons 16+ in Labor Force', 'Persons 16+ Unemployed',
                       'Persons 16+ Employed', 'Persons 16+ Manufacturing Employed']
print('economic_df shape: ', economic_df.shape)
economic_df.head()

economic_df shape:  (840, 6)


Unnamed: 0,County,Per Capita Income,Persons 16+ in Labor Force,Persons 16+ Unemployed,Persons 16+ Employed,Persons 16+ Manufacturing Employed
0,"Baldwin County, Alabama",32443,101561,5549,96012,8791
1,"Calhoun County, Alabama",24579,51001,3754,45641,7044
2,"Cullman County, Alabama",23968,38222,1783,36439,5282
3,"DeKalb County, Alabama",21939,34492,1606,32544,9052
4,"Elmore County, Alabama",31396,37693,1014,35358,4634


Read csv with ethnicity data into pandas and drop unneeded columns

In [15]:
ethnicity_df = pd.read_csv('Data/ethnicity.csv', header=1)
ethnicity_df = ethnicity_df[['Geographic Area Name',
                             'Estimate!!SEX AND AGE!!Total population!!Sex ratio (males per 100 females)',
                             'Estimate!!SEX AND AGE!!Total population',
                             'Estimate!!HISPANIC OR LATINO AND RACE!!Total population!!Hispanic or Latino (of any race)',
                             'Estimate!!RACE!!Total population!!One race!!White']]
ethnicity_df.columns = ['County', 'Sex Ratio (M/F)', 'Total Persons', 'Hispanic Persons', 'White Persons']
print('ethnicity_df shape: ', ethnicity_df.shape)
ethnicity_df.head()

ethnicity_df shape:  (840, 5)


Unnamed: 0,County,Sex Ratio (M/F),Total Persons,Hispanic Persons,White Persons
0,"Baldwin County, Alabama",95.7,223234,10534,190912
1,"Calhoun County, Alabama",91.5,113605,4614,82323
2,"Cullman County, Alabama",94.0,83768,3752,N
3,"DeKalb County, Alabama",99.6,71513,10775,59305
4,"Elmore County, Alabama",97.4,81209,2563,61634


Read csv with disability data into pandas and drop unneeded columns

In [16]:
disability_df = pd.read_csv('Data/disability.csv', header=1)
disability_df = disability_df[['Geographic Area Name',
    'Estimate!!Percent with a disability!!Total civilian noninstitutionalized population']]
disability_df.columns = ['County', 'Disability Proportion']
print('disability_df shape: ', disability_df.shape)
disability_df.head()

disability_df shape:  (840, 2)


Unnamed: 0,County,Disability Proportion
0,"Baldwin County, Alabama",14.4
1,"Calhoun County, Alabama",20.0
2,"Cullman County, Alabama",17.5
3,"DeKalb County, Alabama",10.8
4,"Elmore County, Alabama",12.9


Read csv with demographic data into pandas and drop unneeded columns

In [17]:
demographics_df = pd.read_csv('Data/demographics.csv', header=1)
demographics_df = demographics_df[['Geographic Area Name',
                                  'Estimate!!Total!!Total population!!SUMMARY INDICATORS!!Median age (years)']]
demographics_df.columns = ['County', 'Median Age']
print('demographics_df shape: ', demographics_df.shape)
demographics_df.head()

demographics_df shape:  (840, 2)


Unnamed: 0,County,Median Age
0,"Baldwin County, Alabama",43.0
1,"Calhoun County, Alabama",39.6
2,"Cullman County, Alabama",41.9
3,"DeKalb County, Alabama",37.7
4,"Elmore County, Alabama",39.0


Read csv with veteran proprtion into pandas and drop unneeded columns

In [18]:
veterans_df = pd.read_csv('Data/veterans.csv', header=1)
veterans_df = veterans_df[['Geographic Area Name',
                           'Estimate!!Total!!Civilian population 18 years and over',
                           'Estimate!!Veterans!!Civilian population 18 years and over']]
veterans_df.columns = ['County', 'Civilians 18+', 'Veterans 18+']
print('veterans_df shape: ', veterans_df.shape)
veterans_df.head()

veterans_df shape:  (840, 3)


Unnamed: 0,County,Civilians 18+,Veterans 18+
0,"Baldwin County, Alabama",176331,19580
1,"Calhoun County, Alabama",87525,8552
2,"Cullman County, Alabama",64955,4907
3,"DeKalb County, Alabama",53737,3499
4,"Elmore County, Alabama",61837,6512


Read csv with occupational data into pandas and drop unneeded columns

In [19]:
occupation_df = pd.read_csv('Data/occupation.csv', header=1)
occupation_df = occupation_df[(occupation_df['Meaning of NAICS code']=='Mining, quarrying, and oil and gas extraction')
                             & (occupation_df['Meaning of Employment size of establishments']=='All establishments')]
occupation_df = occupation_df[['Geographic Area Name', 'Number of employees']]
occupation_df.reset_index(inplace=True, drop=True)
print('occupation_df shape: ', occupation_df.shape)
occupation_df.head()

occupation_df shape:  (1389, 2)


Unnamed: 0,Geographic Area Name,Number of employees
0,"Autauga County, Alabama",84
1,"Baldwin County, Alabama",69
2,"Barbour County, Alabama",91
3,"Colbert County, Alabama",102
4,"Escambia County, Alabama",224


Read csv with agricultural employment data into pandas and drop unneeded columns

In [33]:
agriculture_df = pd.read_csv('Data/agricultural_employment.csv', header=1)
agriculture_df = agriculture_df[['Geographic Area Name', 
                                 'Estimate!!Total!!Civilian employed population 16 years and over',
                                 'Estimate!!Total!!Civilian employed population 16 years and over!!Natural resources, construction, and maintenance occupations:!!Farming, fishing, and forestry occupations']]
agriculture_df.columns = ['County', 'Persons 16+ Employment', 'Agricultural Employment']
print('agriculture_df shape: ', agriculture_df.shape)
agriculture_df.head()

agriculture_df shape:  (840, 3)


Unnamed: 0,County,Persons 16+ Employment,Agricultural Employment
0,"Baldwin County, Alabama",96012,426
1,"Calhoun County, Alabama",45641,83
2,"Cullman County, Alabama",36439,509
3,"DeKalb County, Alabama",32544,147
4,"Elmore County, Alabama",35358,68


In [36]:
agriculture_df[agriculture_df['County']=='Roanoke city, Virginia']

Unnamed: 0,County,Persons 16+ Employment,Agricultural Employment
774,"Roanoke city, Virginia",48902,0
