# Set up

In [2]:
import pandas as pd
from sqlalchemy import create_engine
import json
import requests
from config import db_login_info

## Get NYT county COVID data

In [None]:
# -----------------------
# This is a NYTimes report on national COVID-19 cases and deaths, sorted by county
# From: https://github.com/nytimes/covid-19-data (us-counties.csv)
# -----------------------

# Pull in the live data
counties_path = "https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv"

# Save to a dataframe
covid_all_df = pd.read_csv(counties_path)

#### Build all values dataframe

In [None]:
# Drop rows with null values (we want complete data only)
covid_all_df = covid_all_df.dropna()

# Convert fips to int
covid_all_df = covid_all_df.astype({'fips': 'int'})

In [None]:
# Display the dataframe
#covid_all_df

#### Build latest values dataframe

In [None]:
# Get the most up-to-date data
latest_date = covid_all_df['date'].iloc[-1]

# Make a dataframe holding that most recent data
covid_latest_df = covid_all_df.loc[covid_all_df['date']==latest_date]

In [None]:
# Display the dataframe
#covid_latest_df

## Get mask use data

In [13]:
# Import the mask CSV data
mask_path = "https://raw.githubusercontent.com/nytimes/covid-19-data/master/mask-use/mask-use-by-county.csv"
masks_df = pd.read_csv(mask_path, converters={'COUNTYFP': lambda x: str(x)})


In [14]:
# Clean up column names
masks_df.columns = ['fips','never','rarely','sometimes','frequently','always']

# Change the null values (no data) to 0
#masks_df = masks_df.fillna(0)

In [15]:
# Display the dataframe
masks_df

Unnamed: 0,fips,never,rarely,sometimes,frequently,always
0,01001,0.053,0.074,0.134,0.295,0.444
1,01003,0.083,0.059,0.098,0.323,0.436
2,01005,0.067,0.121,0.120,0.201,0.491
3,01007,0.020,0.034,0.096,0.278,0.572
4,01009,0.053,0.114,0.180,0.194,0.459
...,...,...,...,...,...,...
3137,56037,0.061,0.295,0.230,0.146,0.268
3138,56039,0.095,0.157,0.160,0.247,0.340
3139,56041,0.098,0.278,0.154,0.207,0.264
3140,56043,0.204,0.155,0.069,0.285,0.287


## Get CDC data
#### Source: https://data.cdc.gov/NCHS/Weekly-Counts-of-Deaths-by-State-and-Select-Causes/muzy-jte6

In [3]:
# Convert to a dataframe
cdc_df = pd.read_csv("Weekly_Counts_of_Deaths_by_State_and_Select_Causes__2019-2020.csv")

In [4]:
cdc_df

Unnamed: 0,Jurisdiction of Occurrence,MMWR Year,MMWR Week,Week Ending Date,All Cause,Natural Cause,Septicemia (A40-A41),Malignant neoplasms (C00-C97),Diabetes mellitus (E10-E14),Alzheimer disease (G30),...,flag_alz,flag_inflpn,flag_clrd,flag_otherresp,flag_nephr,flag_otherunk,flag_hd,flag_stroke,flag_cov19mcod,flag_cov19ucod
0,Alabama,2019,1,2019-01-05,1077.0,993.0,30.0,198.0,22.0,60.0,...,,,,,,,,,,
1,Alabama,2019,2,2019-01-12,1090.0,994.0,25.0,187.0,24.0,49.0,...,,,,,,,,,,
2,Alabama,2019,3,2019-01-19,1114.0,1042.0,22.0,238.0,18.0,48.0,...,,,,,,,,,,
3,Alabama,2019,4,2019-01-26,1063.0,994.0,21.0,165.0,22.0,50.0,...,,,,,,,,,,
4,Alabama,2019,5,2019-02-02,1095.0,1026.0,18.0,199.0,19.0,52.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4901,United States,2020,35,2020-08-29,55315.0,52044.0,658.0,10781.0,1620.0,2260.0,...,,,,,,,,,,
4902,United States,2020,36,2020-09-05,52108.0,49073.0,610.0,10179.0,1504.0,2266.0,...,,,,,,,,,,
4903,United States,2020,37,2020-09-12,48078.0,45611.0,549.0,9723.0,1403.0,2025.0,...,,,,,,,,,,
4904,United States,2020,38,2020-09-19,38755.0,37168.0,459.0,8323.0,1105.0,1780.0,...,,,,,,,,,,


In [5]:
# Drop columns we don't need
cdc_df.drop(list(cdc_df)[19:34], axis=1,inplace=True)

# Rename columns
cdc_df.columns = ['state',
                  'year',
                  'week',
                  'week_ending_date',
                  'all_causes',
                  'natural_causes',
                  'septicemia',
                  'malignant_neoplasms',
                  'diabetes',
                  'alzheimers',
                  'influenza_and_pneumonia',
                  'chronic_lower_respiratory',
                  'other_diseases_of_respiratory',
                  'nephritis_nephrotic_syndrome',
                  'symptoms_signs_and_abnormal',
                  'diseases_of_heart',
                  'cerebrovascular_diseases',
                  'covid_19_multiple_causes',
                  'covid_19_underlying_cause']     


# Change the null values (no data) to 0
cdc_df = cdc_df.fillna(0)

In [6]:
# Display the dataframe
cdc_df

Unnamed: 0,state,year,week,week_ending_date,all_causes,natural_causes,septicemia,malignant_neoplasms,diabetes,alzheimers,influenza_and_pneumonia,chronic_lower_respiratory,other_diseases_of_respiratory,nephritis_nephrotic_syndrome,symptoms_signs_and_abnormal,diseases_of_heart,cerebrovascular_diseases,covid_19_multiple_causes,covid_19_underlying_cause
0,Alabama,2019,1,2019-01-05,1077.0,993.0,30.0,198.0,22.0,60.0,21.0,63.0,14.0,21.0,27.0,261.0,53.0,0.0,0.0
1,Alabama,2019,2,2019-01-12,1090.0,994.0,25.0,187.0,24.0,49.0,18.0,85.0,21.0,13.0,11.0,275.0,65.0,0.0,0.0
2,Alabama,2019,3,2019-01-19,1114.0,1042.0,22.0,238.0,18.0,48.0,31.0,80.0,30.0,25.0,15.0,283.0,53.0,0.0,0.0
3,Alabama,2019,4,2019-01-26,1063.0,994.0,21.0,165.0,22.0,50.0,22.0,113.0,14.0,25.0,23.0,279.0,56.0,0.0,0.0
4,Alabama,2019,5,2019-02-02,1095.0,1026.0,18.0,199.0,19.0,52.0,19.0,80.0,20.0,24.0,21.0,290.0,50.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4901,United States,2020,35,2020-08-29,55315.0,52044.0,658.0,10781.0,1620.0,2260.0,617.0,2327.0,678.0,890.0,3028.0,10795.0,2733.0,5106.0,4587.0
4902,United States,2020,36,2020-09-05,52108.0,49073.0,610.0,10179.0,1504.0,2266.0,626.0,2213.0,698.0,802.0,2971.0,10429.0,2515.0,4162.0,3668.0
4903,United States,2020,37,2020-09-12,48078.0,45611.0,549.0,9723.0,1403.0,2025.0,567.0,2093.0,601.0,754.0,3041.0,9533.0,2526.0,3313.0,2923.0
4904,United States,2020,38,2020-09-19,38755.0,37168.0,459.0,8323.0,1105.0,1780.0,465.0,1742.0,533.0,600.0,2372.0,8047.0,1992.0,1902.0,1680.0


## Get Covid Tracking Project data
Source: https://covidtracking.com

#### Build latest values dataframe

In [2]:
covidtracking_current_url = 'https://api.covidtracking.com/v1/states/current.json'
covidtracking_current_response = requests.get(covidtracking_current_url)
covidtracking_current_json = covidtracking_current_response.json()

# Convert to a dataframe
covidtracking_current_df = pd.DataFrame.from_dict(covidtracking_current_json)

In [3]:
# Change the null values (no data) to 0
covidtracking_current_df = covidtracking_current_df.fillna(0)

In [4]:
# Remove columns we don't need
covidtracking_current_df.drop(list(covidtracking_current_df)[48:55], axis=1,inplace=True)
covidtracking_current_df.drop(list(covidtracking_current_df)[39], axis=1,inplace=True)
covidtracking_current_df.drop(list(covidtracking_current_df)[20], axis=1,inplace=True)
covidtracking_current_df.drop(list(covidtracking_current_df)[15:18], axis=1,inplace=True)

In [5]:
covidtracking_current_df.columns = ['date', 'state', 'positive', 'probable_cases', 'negative', 'pending',
       'total_test_results', 'hospitalized_currently', 'hospitalized_cumulative',
       'icu_currently', 'icu_cumulative', 'ventilator_currently',
       'ventilator_cumulative', 'recovered', 'data_quality_grade', 'deaths',
       'hospitalized', 'total_tests_viral', 'positive_tests_viral',
       'negative_tests_viral', 'positive_cases_viral', 'deaths_confirmed',
       'deaths_probable', 'total_test_encounters_viral', 'total_tests_people_viral',
       'total_tests_antibody', 'positive_tests_antibody', 'negative_tests_antibody',
       'total_tests_people_antibody', 'positive_tests_people_antibody',
       'negative_tests_people_antibody', 'total_tests_people_antigen',
       'positive_tests_people_antigen', 'total_tests_antigen',
       'positive_tests_antigen', 'positive_increase', 'negative_increase', 'total',
       'total_test_results_source', 'total_test_results_increase', 'pos_neg',
       'death_increase', 'hospitalized_increase']

In [6]:
# Display the dataframe
#covidtracking_current_df

#### Build all values dataframe

In [7]:
covidtracking_all_url = 'https://api.covidtracking.com/v1/us/daily.json'
covidtracking_all_response = requests.get(covidtracking_all_url)
covidtracking_all_json = covidtracking_all_response.json()

# Convert to a dataframe
covidtracking_all_df = pd.DataFrame.from_dict(covidtracking_all_json)

In [8]:
# Change the null values (no data) to 0
covidtracking_all_df = covidtracking_all_df.fillna(0)

In [9]:
# Remove columns we don't need
covidtracking_all_df.drop(list(covidtracking_all_df)[24], axis=1,inplace=True)
covidtracking_all_df.drop(list(covidtracking_all_df)[16], axis=1,inplace=True)
covidtracking_all_df.drop(list(covidtracking_all_df)[12], axis=1,inplace=True)

In [10]:
# Display the dataframe
#covidtracking_all_df

Unnamed: 0,date,states,positive,negative,pending,hospitalizedCurrently,hospitalizedCumulative,inIcuCurrently,inIcuCumulative,onVentilatorCurrently,...,death,hospitalized,totalTestResults,total,posNeg,deathIncrease,hospitalizedIncrease,negativeIncrease,positiveIncrease,totalTestResultsIncrease
0,20201006,56,7460634,97932855,8680.0,31346.0,414461.0,6438.0,20973.0,1609.0,...,202675.0,414461.0,110226302,0,0,634,-624,722475,38661,823419
1,20201005,56,7421973,97210380,11544.0,30098.0,415085.0,6209.0,20812.0,1515.0,...,202041.0,415085.0,109402883,0,0,326,1519,846256,38133,935481
2,20201004,56,7383840,96364124,11471.0,29944.0,413566.0,5974.0,20729.0,1485.0,...,201715.0,413566.0,108467402,0,0,363,649,824848,38439,955736
3,20201003,56,7345401,95539276,11464.0,30106.0,412917.0,5996.0,20686.0,1501.0,...,201352.0,412917.0,107511666,0,0,741,1166,846390,51372,997984
4,20201002,56,7294029,94692886,10813.0,30697.0,411751.0,6106.0,20612.0,1533.0,...,200611.0,411751.0,106513682,0,0,835,1342,977186,49534,1127307
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
254,20200126,2,0,0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,2,0,0,0,0,0,0,0
255,20200125,2,0,0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,2,0,0,0,0,0,0,0
256,20200124,2,0,0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,2,0,0,0,0,0,0,0
257,20200123,2,0,0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,2,0,0,0,0,0,0,1


In [11]:
covidtracking_all_df.columns

Index(['date', 'states', 'positive', 'negative', 'pending',
       'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently',
       'inIcuCumulative', 'onVentilatorCurrently', 'onVentilatorCumulative',
       'recovered', 'death', 'hospitalized', 'totalTestResults', 'total',
       'posNeg', 'deathIncrease', 'hospitalizedIncrease', 'negativeIncrease',
       'positiveIncrease', 'totalTestResultsIncrease'],
      dtype='object')

In [12]:
covidtracking_all_df.columns = ['date', 'states', 'positive', 'negative', 'pending',
       'hospitalized_currently', 'hospitalized_cumulative', 'icu_currently',
       'icu_cumulative', 'ventilator_currently', 'ventilator_cumulative',
       'recovered', 'deaths', 'hospitalized', 'total_test_results', 'total',
       'pos_neg', 'death_increase', 'hospitalized_increase', 'negative_increase',
       'positive_increase', 'total_test_results_increase']

In [13]:
covidtracking_all_df

Unnamed: 0,date,states,positive,negative,pending,hospitalized_currently,hospitalized_cumulative,icu_currently,icu_cumulative,ventilator_currently,...,deaths,hospitalized,total_test_results,total,pos_neg,death_increase,hospitalized_increase,negative_increase,positive_increase,total_test_results_increase
0,20201006,56,7460634,97932855,8680.0,31346.0,414461.0,6438.0,20973.0,1609.0,...,202675.0,414461.0,110226302,0,0,634,-624,722475,38661,823419
1,20201005,56,7421973,97210380,11544.0,30098.0,415085.0,6209.0,20812.0,1515.0,...,202041.0,415085.0,109402883,0,0,326,1519,846256,38133,935481
2,20201004,56,7383840,96364124,11471.0,29944.0,413566.0,5974.0,20729.0,1485.0,...,201715.0,413566.0,108467402,0,0,363,649,824848,38439,955736
3,20201003,56,7345401,95539276,11464.0,30106.0,412917.0,5996.0,20686.0,1501.0,...,201352.0,412917.0,107511666,0,0,741,1166,846390,51372,997984
4,20201002,56,7294029,94692886,10813.0,30697.0,411751.0,6106.0,20612.0,1533.0,...,200611.0,411751.0,106513682,0,0,835,1342,977186,49534,1127307
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
254,20200126,2,0,0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,2,0,0,0,0,0,0,0
255,20200125,2,0,0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,2,0,0,0,0,0,0,0
256,20200124,2,0,0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,2,0,0,0,0,0,0,0
257,20200123,2,0,0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,2,0,0,0,0,0,0,1


## Get the county information table

In [None]:
# Scrape the county info table from Wikipedia
county_url = 'https://en.wikipedia.org/wiki/User:Michael_J/County_table'
county_table = pd.read_html(county_url)

In [None]:
# Grab the first table on the page and convert to dataframe
county_table_df = county_table[0]

# Drop columns we don't need
county_table_df = county_table_df.drop(columns=['Land Areakm²','Land Areami²','Water Areakm²','Water Areami²','Total Areakm²','Total Areami²','Sort [1]','Population(2010)'])

# Rename the columns
county_table_df.columns = ['state','fips','county','county_seat','lat','lon']

# Remove the + sign from latitude column
county_table_df['lat'] = county_table_df['lat'].str[1:]
county_table_df['lon'] = county_table_df['lon'].str[1:]

# Remove degree symbol from lat and lon
county_table_df['lat'] = county_table_df['lat'].str[:-1]
county_table_df['lon'] = county_table_df['lon'].str[:-1]

# Set as a float for lat and lon
county_table_df['lat'] = county_table_df['lat'].astype(float)
county_table_df['lon'] = county_table_df['lon'].astype(float)

# Convert the lon to an actual negative value (for all, since North America)
county_table_df['lon'] = -county_table_df['lon']

# Change the null values (no data) to 0
county_table_df = county_table_df.fillna(0)

In [None]:
# Display the dataframe
#county_table_df

## Connect and load to postgressql
### Available dataframes:
<b>NYT:</b> covid_all_df, covid_latest_df <br>
<b>Masks:</b> masks_df <br>
<b>CDC:</b> cdc_df, cdc_2019_df, cdc_2020_df <br>
<b>Covid tracking:</b> covidtracking_current_df, covidtracking_all_df<br>
<b>County info:</b> county_table_df

In [7]:
# Connect to database
#
# Note: db_login_info comes from config.py and should have a scheme of:
# "postgresql://<USERNAME>:<PASSWORD>@<HOST>:<PORT>/<DATABASE_NAME>"

engine = create_engine(f'{db_login_info}')

In [8]:
# Check for Tables
table_names = engine.table_names()
table_names

['county',
 'covid',
 'covidtracking_current',
 'covidtracking_all',
 'masks',
 'cdc']

In [None]:
engine.dispose()

### Load data into database

In [None]:
# Load covid_all_df dataframe into database
covid_all_df.to_sql(name='covid', con=engine, if_exists='append', index=False)

In [None]:
# Load mask_df dataframe into database
masks_df.to_sql(name='masks', con=engine, if_exists='append', index=False)

In [None]:
# Load cdc_df dataframe into database
cdc_df.to_sql(name='cdc', con=engine, if_exists='append', index=False)

In [16]:
# Load covidtracking_current_df dataframe into database
covidtracking_current_df.to_sql(name='covidtracking_current', con=engine, if_exists='append', index=False)

In [17]:
# Load covidtracking_all_df dataframe into database
covidtracking_all_df.to_sql(name='covidtracking_all', con=engine, if_exists='append', index=False)

ProgrammingError: (psycopg2.errors.UndefinedColumn) column "deaths" of relation "covidtracking_all" does not exist
LINE 1: ...ator_currently, ventilator_cumulative, recovered, deaths, ho...
                                                             ^

[SQL: INSERT INTO covidtracking_all (date, states, positive, negative, pending, hospitalized_currently, hospitalized_cumulative, icu_currently, icu_cumulative, ventilator_currently, ventilator_cumulative, recovered, deaths, hospitalized, total_test_results, total, pos_neg, death_increase, hospitalized_increase, negative_increase, positive_increase, total_test_results_increase) VALUES (%(date)s, %(states)s, %(positive)s, %(negative)s, %(pending)s, %(hospitalized_currently)s, %(hospitalized_cumulative)s, %(icu_currently)s, %(icu_cumulative)s, %(ventilator_currently)s, %(ventilator_cumulative)s, %(recovered)s, %(deaths)s, %(hospitalized)s, %(total_test_results)s, %(total)s, %(pos_neg)s, %(death_increase)s, %(hospitalized_increase)s, %(negative_increase)s, %(positive_increase)s, %(total_test_results_increase)s)]
[parameters: ({'date': 20201006, 'states': 56, 'positive': 7460634, 'negative': 97932855, 'pending': 8680.0, 'hospitalized_currently': 31346.0, 'hospitalized_cumulative': 414461.0, 'icu_currently': 6438.0, 'icu_cumulative': 20973.0, 'ventilator_currently': 1609.0, 'ventilator_cumulative': 2388.0, 'recovered': 2952390.0, 'deaths': 202675.0, 'hospitalized': 414461.0, 'total_test_results': 110226302, 'total': 0, 'pos_neg': 0, 'death_increase': 634, 'hospitalized_increase': -624, 'negative_increase': 722475, 'positive_increase': 38661, 'total_test_results_increase': 823419}, {'date': 20201005, 'states': 56, 'positive': 7421973, 'negative': 97210380, 'pending': 11544.0, 'hospitalized_currently': 30098.0, 'hospitalized_cumulative': 415085.0, 'icu_currently': 6209.0, 'icu_cumulative': 20812.0, 'ventilator_currently': 1515.0, 'ventilator_cumulative': 2370.0, 'recovered': 2935196.0, 'deaths': 202041.0, 'hospitalized': 415085.0, 'total_test_results': 109402883, 'total': 0, 'pos_neg': 0, 'death_increase': 326, 'hospitalized_increase': 1519, 'negative_increase': 846256, 'positive_increase': 38133, 'total_test_results_increase': 935481}, {'date': 20201004, 'states': 56, 'positive': 7383840, 'negative': 96364124, 'pending': 11471.0, 'hospitalized_currently': 29944.0, 'hospitalized_cumulative': 413566.0, 'icu_currently': 5974.0, 'icu_cumulative': 20729.0, 'ventilator_currently': 1485.0, 'ventilator_cumulative': 2362.0, 'recovered': 2911789.0, 'deaths': 201715.0, 'hospitalized': 413566.0, 'total_test_results': 108467402, 'total': 0, 'pos_neg': 0, 'death_increase': 363, 'hospitalized_increase': 649, 'negative_increase': 824848, 'positive_increase': 38439, 'total_test_results_increase': 955736}, {'date': 20201003, 'states': 56, 'positive': 7345401, 'negative': 95539276, 'pending': 11464.0, 'hospitalized_currently': 30106.0, 'hospitalized_cumulative': 412917.0, 'icu_currently': 5996.0, 'icu_cumulative': 20686.0, 'ventilator_currently': 1501.0, 'ventilator_cumulative': 2358.0, 'recovered': 2897350.0, 'deaths': 201352.0, 'hospitalized': 412917.0, 'total_test_results': 107511666, 'total': 0, 'pos_neg': 0, 'death_increase': 741, 'hospitalized_increase': 1166, 'negative_increase': 846390, 'positive_increase': 51372, 'total_test_results_increase': 997984}, {'date': 20201002, 'states': 56, 'positive': 7294029, 'negative': 94692886, 'pending': 10813.0, 'hospitalized_currently': 30697.0, 'hospitalized_cumulative': 411751.0, 'icu_currently': 6106.0, 'icu_cumulative': 20612.0, 'ventilator_currently': 1533.0, 'ventilator_cumulative': 2348.0, 'recovered': 2873369.0, 'deaths': 200611.0, 'hospitalized': 411751.0, 'total_test_results': 106513682, 'total': 0, 'pos_neg': 0, 'death_increase': 835, 'hospitalized_increase': 1342, 'negative_increase': 977186, 'positive_increase': 49534, 'total_test_results_increase': 1127307}, {'date': 20201001, 'states': 56, 'positive': 7244495, 'negative': 93715700, 'pending': 13003.0, 'hospitalized_currently': 30742.0, 'hospitalized_cumulative': 410409.0, 'icu_currently': 6173.0, 'icu_cumulative': 20492.0, 'ventilator_currently': 1547.0, 'ventilator_cumulative': 2334.0, 'recovered': 2860650.0, 'deaths': 199776.0, 'hospitalized': 410409.0, 'total_test_results': 105386375, 'total': 0, 'pos_neg': 0, 'death_increase': 851, 'hospitalized_increase': 1760, 'negative_increase': 759865, 'positive_increase': 45694, 'total_test_results_increase': 897042}, {'date': 20200930, 'states': 56, 'positive': 7198801, 'negative': 92955835, 'pending': 9431.0, 'hospitalized_currently': 30877.0, 'hospitalized_cumulative': 408649.0, 'icu_currently': 6145.0, 'icu_cumulative': 20390.0, 'ventilator_currently': 1507.0, 'ventilator_cumulative': 2319.0, 'recovered': 2840747.0, 'deaths': 198925.0, 'hospitalized': 408649.0, 'total_test_results': 104489333, 'total': 0, 'pos_neg': 0, 'death_increase': 1061, 'hospitalized_increase': 1590, 'negative_increase': 674267, 'positive_increase': 44424, 'total_test_results_increase': 810800}, {'date': 20200929, 'states': 56, 'positive': 7154377, 'negative': 92281568, 'pending': 7695.0, 'hospitalized_currently': 30391.0, 'hospitalized_cumulative': 407059.0, 'icu_currently': 6106.0, 'icu_cumulative': 20247.0, 'ventilator_currently': 1495.0, 'ventilator_cumulative': 2298.0, 'recovered': 2813305.0, 'deaths': 197864.0, 'hospitalized': 407059.0, 'total_test_results': 103678533, 'total': 0, 'pos_neg': 0, 'death_increase': 739, 'hospitalized_increase': 1733, 'negative_increase': 707529, 'positive_increase': 36947, 'total_test_results_increase': 825759}  ... displaying 10 of 259 total bound parameter sets ...  {'date': 20200123, 'states': 2, 'positive': 0, 'negative': 0, 'pending': 0.0, 'hospitalized_currently': 0.0, 'hospitalized_cumulative': 0.0, 'icu_currently': 0.0, 'icu_cumulative': 0.0, 'ventilator_currently': 0.0, 'ventilator_cumulative': 0.0, 'recovered': 0.0, 'deaths': 0.0, 'hospitalized': 0.0, 'total_test_results': 2, 'total': 0, 'pos_neg': 0, 'death_increase': 0, 'hospitalized_increase': 0, 'negative_increase': 0, 'positive_increase': 0, 'total_test_results_increase': 1}, {'date': 20200122, 'states': 2, 'positive': 0, 'negative': 0, 'pending': 0.0, 'hospitalized_currently': 0.0, 'hospitalized_cumulative': 0.0, 'icu_currently': 0.0, 'icu_cumulative': 0.0, 'ventilator_currently': 0.0, 'ventilator_cumulative': 0.0, 'recovered': 0.0, 'deaths': 0.0, 'hospitalized': 0.0, 'total_test_results': 1, 'total': 0, 'pos_neg': 0, 'death_increase': 0, 'hospitalized_increase': 0, 'negative_increase': 0, 'positive_increase': 0, 'total_test_results_increase': 0})]
(Background on this error at: http://sqlalche.me/e/13/f405)

In [None]:
# Load county_table_df dataframe into database
county_table_df.to_sql(name='county', con=engine, if_exists='append', index=False)

### Confirm data load

In [None]:
# Confirm covid data has been added
pd.read_sql_query('select * from covid', con=engine)

In [None]:
# Confirm mask use data has been added
pd.read_sql_query('select * from masks', con=engine)

In [None]:
# Confirm cdc data has been added
pd.read_sql_query('select * from cdc', con=engine)

In [None]:
# Confirm county data has been added
pd.read_sql_query('select * from county', con=engine)

In [None]:
# Confirm covidtracking_all data has been added
pd.read_sql_query('select * from covidtracking_all', con=engine)

In [None]:
# Confirm covid, mask, and county data has been joined
# Only do so after joining the tables using join_tables.sql
pd.read_sql_query('select * from combined', con=engine)