In [2]:
# Import dependencies including SQLAlchamy
import pandas as pd
import numpy as np

from sqlalchemy import create_engine
import psycopg2

from config import db_password

In [3]:
# Create SQL engine
db_string = f"postgres://postgres:{db_password}@127.0.0.1:5432/vaccine_hesitancy"                                                                                        
engine = create_engine(db_string)

In [4]:
# Convert postgres tables to dataframes
census_demographic_df = pd.read_sql_table("census_demographic", db_string)
vaccine_hesitancy_df = pd.read_sql_table("vaccine_hesitancy_covid", db_string)
county_statistics_df = pd.read_sql_table("county_statistics", db_string)
county_size_df = pd.read_sql_table("county_size", db_string)
full_county_data_df = pd.read_sql_table("full_county_data", db_string)

In [28]:
# Find the columns where each value is null
census_null = [col for col in census_demographic_df.columns if census_demographic_df[col].isnull().all()]

# Drop columns from the dataframe
census_demographic_df.drop(census_null,
        axis=1,
        inplace=True)

# Print Dataframe
census_demographic_df.head()

Unnamed: 0,CountyId,State,County,TotalPop,Men,Women,Hispanic,White,Black,Native,...,Walk,OtherTransp,WorkAtHome,MeanCommute,Employed,PrivateWork,PublicWork,SelfEmployed,FamilyWork,Unemployment
0,1001,Alabama,Autauga County,55036,26899,28137,2.7,75.4,18.9,0.3,...,0.6,1.3,2.5,25.8,24112,74.1,20.2,5.6,0.1,5.2
1,1003,Alabama,Baldwin County,203360,99527,103833,4.4,83.1,9.5,0.8,...,0.8,1.1,5.6,27.0,89527,80.7,12.9,6.3,0.1,5.5
2,1005,Alabama,Barbour County,26201,13976,12225,4.2,45.7,47.8,0.2,...,2.2,1.7,1.3,23.4,8878,74.1,19.1,6.5,0.3,12.4
3,1007,Alabama,Bibb County,22580,12251,10329,2.4,74.6,22.0,0.4,...,0.3,1.7,1.5,30.0,8171,76.0,17.4,6.3,0.3,8.2
4,1009,Alabama,Blount County,57667,28490,29177,9.0,87.4,1.5,0.3,...,0.4,0.4,2.1,35.0,21380,83.9,11.9,4.0,0.1,4.9


In [21]:
# Find the columns where each value is null
hesitancy_null = [col for col in vaccine_hesitancy_df.columns if vaccine_hesitancy_df[col].isnull().all()]

# Drop columns from the dataframe
vaccine_hesitancy_df.drop(hesitancy_null,
        axis=1,
        inplace=True)

# Print Dataframe
vaccine_hesitancy_df.head()

Unnamed: 0,fips_code,county,state,est_hesitant,est_strongly_hesitant,svi,svi_category,cvac,cvac_category,percent_fully_vaccinated,percent_hispanic,percent_american_indian_alaska_native,percent_asian,percent_black,percent_hawaiian_pacific,percent_white,geographical_point,state_code,county_boundary,state_boundary
0,1001,"Autauga County, Alabama",ALABAMA,0.22,0.1,0.44,Moderate Vulnerability,0.61,High Vulnerability,0.114,0.0283,0.0025,0.0103,0.19,0.0001,0.746,POINT (-86.844516 32.756889),AL,MULTIPOLYGON (((-86.90309599999999 32.54062599...,"MULTIPOLYGON (((-88.139988 34.581703, -88.1352..."
1,1003,"Baldwin County, Alabama",ALABAMA,0.2,0.1,0.22,Low Vulnerability,0.23,Low Vulnerability,0.176,0.0456,0.0065,0.0092,0.0917,0.0,0.8307,POINT (-86.844516 32.756889),AL,"MULTIPOLYGON (((-87.990684 30.55549, -87.98783...","MULTIPOLYGON (((-88.139988 34.581703, -88.1352..."
2,1005,"Barbour County, Alabama",ALABAMA,0.23,0.11,1.0,Very High Vulnerability,0.89,Very High Vulnerability,0.128,0.0436,0.0029,0.0048,0.4744,0.0,0.4581,POINT (-86.844516 32.756889),AL,"MULTIPOLYGON (((-85.429819 32.045983, -85.4303...","MULTIPOLYGON (((-88.139988 34.581703, -88.1352..."
3,1007,"Bibb County, Alabama",ALABAMA,0.24,0.12,0.6,High Vulnerability,0.76,High Vulnerability,0.115,0.0257,0.0013,0.0012,0.2214,0.0,0.7453,POINT (-86.844516 32.756889),AL,"MULTIPOLYGON (((-87.312265 33.086219, -87.3121...","MULTIPOLYGON (((-88.139988 34.581703, -88.1352..."
4,1009,"Blount County, Alabama",ALABAMA,0.23,0.11,0.42,Moderate Vulnerability,0.8,High Vulnerability,0.095,0.0926,0.0007,0.0037,0.0153,0.0004,0.8689,POINT (-86.844516 32.756889),AL,"MULTIPOLYGON (((-86.74918799999999 33.997596, ...","MULTIPOLYGON (((-88.139988 34.581703, -88.1352..."


In [23]:
# Find the columns where each value is null
countystats_null = [col for col in county_statistics_df.columns if county_statistics_df[col].isnull().all()]

# Drop columns from the dataframe
county_statistics_df.drop(countystats_null,
        axis=1,
        inplace=True)

# Print DataFrame
county_statistics_df.head()

Unnamed: 0,fips_code,county,state,percentage16_Donald_Trump,percentage16_Hillary_Clinton,total_votes16,votes16_Donald_Trump,votes16_Hillary_Clinton,percentage20_Donald_Trump,percentage20_Joe_Biden,...,Walk,OtherTransp,WorkAtHome,MeanCommute,Employed,PrivateWork,PublicWork,SelfEmployed,FamilyWork,Unemployment
0,1001,autauga county,AL,0.734,0.24,24661.0,18110.0,5908.0,0.715,0.27,...,0.6,1.3,2.5,25.8,24112.0,74.1,20.2,5.6,0.1,5.2
1,1003,baldwin county,AL,0.774,0.196,94090.0,72780.0,18409.0,0.762,0.223,...,0.8,1.1,5.6,27.0,89527.0,80.7,12.9,6.3,0.1,5.5
2,1005,barbour county,AL,0.523,0.467,10390.0,5431.0,4848.0,0.536,0.456,...,2.2,1.7,1.3,23.4,8878.0,74.1,19.1,6.5,0.3,12.4
3,1007,bibb county,AL,0.77,0.214,8748.0,6733.0,1874.0,0.784,0.207,...,0.3,1.7,1.5,30.0,8171.0,76.0,17.4,6.3,0.3,8.2
4,1009,blount county,AL,0.899,0.085,25384.0,22808.0,2150.0,0.896,0.096,...,0.4,0.4,2.1,35.0,21380.0,83.9,11.9,4.0,0.1,4.9


In [24]:
# Find the columns where each value is null
countysize_null = [col for col in county_size_df.columns if county_size_df[col].isnull().all()]

# Drop columns from the dataframe
county_size_df.drop(countysize_null,
        axis=1,
        inplace=True)

# Print DataFrame
county_size_df.head()

Unnamed: 0,fips_code,county,state,population,density
0,1001,Autauga County,AL,55514,urban
1,1003,Baldwin County,AL,190790,urban
2,1005,Barbour County,AL,27201,rural
3,1007,Bibb County,AL,22597,urban
4,1009,Blount County,AL,57826,urban


In [25]:
# Find the columns where each value is null
county_null = [col for col in full_county_data_df.columns if full_county_data_df[col].isnull().all()]

# Drop columns from the dataframe
full_county_data_df.drop(county_null,
        axis=1,
        inplace=True)

# Print DataFrame
full_county_data_df.head()

Unnamed: 0,fips_code,county,state,est_hesitant,est_strongly_hesitant,svi,svi_category,cvac,cvac_category,percent_fully_vaccinated,...,Walk,OtherTransp,WorkAtHome,MeanCommute,Employed,PrivateWork,PublicWork,SelfEmployed,FamilyWork,Unemployment
0,1001.0,"Autauga County, Alabama",ALABAMA,0.22,0.1,0.44,Moderate Vulnerability,0.61,High Vulnerability,0.114,...,0.6,1.3,2.5,25.8,24112,74.1,20.2,5.6,0.1,5.2
1,1003.0,"Baldwin County, Alabama",ALABAMA,0.2,0.1,0.22,Low Vulnerability,0.23,Low Vulnerability,0.176,...,0.8,1.1,5.6,27.0,89527,80.7,12.9,6.3,0.1,5.5
2,1005.0,"Barbour County, Alabama",ALABAMA,0.23,0.11,1.0,Very High Vulnerability,0.89,Very High Vulnerability,0.128,...,2.2,1.7,1.3,23.4,8878,74.1,19.1,6.5,0.3,12.4
3,1007.0,"Bibb County, Alabama",ALABAMA,0.24,0.12,0.6,High Vulnerability,0.76,High Vulnerability,0.115,...,0.3,1.7,1.5,30.0,8171,76.0,17.4,6.3,0.3,8.2
4,1009.0,"Blount County, Alabama",ALABAMA,0.23,0.11,0.42,Moderate Vulnerability,0.8,High Vulnerability,0.095,...,0.4,0.4,2.1,35.0,21380,83.9,11.9,4.0,0.1,4.9
