### Links To CDC for Raw Data

## Covid-19 Infection Data
### url-1: https://data.cdc.gov/Case-Surveillance/United-States-COVID-19-Cases-and-Deaths-by-State-o/9mfq-cb36

## Covid-19 Vaccine Distribution
### url-2: https://data.cdc.gov/Vaccinations/COVID-19-Vaccinations-in-the-United-States-Jurisdi/unsk-b7fc

In [1]:
# import dependencies
import pandas as pd
import datetime as dt
import sqlite3
from sqlite3 import Error
from sqlalchemy import create_engine, inspect, func
import psycopg2
import requests
from sodapy import Socrata

# import cdc key for API
from cdc_token import my_token

## USA Monthly Cases by State 

In [31]:
# set a client with a token
client = Socrata("data.cdc.gov", my_token)

# results returned as JSON from API / converted to Python list of
# dictionaries by sodapy.
results = client.get("9mfq-cb36", limit=100000)

# Convert to pandas DataFrame and display
covid_df = pd.DataFrame.from_records(results)
covid_df.tail()

Unnamed: 0,submission_date,state,tot_cases,conf_cases,prob_cases,new_case,pnew_case,tot_death,new_death,pnew_death,created_at,consent_cases,consent_deaths,conf_death,prob_death
60055,2021-02-09T00:00:00.000,TX,2504556,,,13329.0,2676,43306,207.0,0,2021-02-11T00:00:00.000,Not agree,Not agree,,
60056,2020-11-20T00:00:00.000,FL,913561,,,8217.0,1677,19014,79.0,5,2020-11-20T00:00:00.000,Not agree,Not agree,,
60057,2020-08-17T00:00:00.000,NM,23500,,,92.0,0,682,4.0,0,2020-08-19T00:00:00.000,,Not agree,,
60058,2020-06-17T00:00:00.000,MS,24223,24038.0,185.0,521.0,6,1191,9.0,0,2020-06-19T00:00:00.000,Agree,Agree,1172.0,19.0
60059,2022-05-07T00:00:00.000,MA,1779829,1638144.0,141685.0,0.0,0,20334,0.0,0,2022-05-09T13:14:40.058,Agree,Agree,19198.0,1136.0


In [4]:
# add date columns which only has year and month for use with groupby later on.. 
covid_df['date'] = pd.to_datetime(covid_df['submission_date']).dt.to_period('M').astype(str)
covid_df.tail()

Unnamed: 0,submission_date,state,tot_cases,conf_cases,prob_cases,new_case,pnew_case,tot_death,new_death,pnew_death,created_at,consent_cases,consent_deaths,conf_death,prob_death,date
60055,2021-02-09T00:00:00.000,TX,2504556,,,13329.0,2676,43306,207.0,0,2021-02-11T00:00:00.000,Not agree,Not agree,,,2021-02
60056,2020-11-20T00:00:00.000,FL,913561,,,8217.0,1677,19014,79.0,5,2020-11-20T00:00:00.000,Not agree,Not agree,,,2020-11
60057,2020-08-17T00:00:00.000,NM,23500,,,92.0,0,682,4.0,0,2020-08-19T00:00:00.000,,Not agree,,,2020-08
60058,2020-06-17T00:00:00.000,MS,24223,24038.0,185.0,521.0,6,1191,9.0,0,2020-06-19T00:00:00.000,Agree,Agree,1172.0,19.0,2020-06
60059,2022-05-07T00:00:00.000,MA,1779829,1638144.0,141685.0,0.0,0,20334,0.0,0,2022-05-09T13:14:40.058,Agree,Agree,19198.0,1136.0,2022-05


In [5]:
covid_df.dtypes

submission_date    object
state              object
tot_cases          object
conf_cases         object
prob_cases         object
new_case           object
pnew_case          object
tot_death          object
new_death          object
pnew_death         object
created_at         object
consent_cases      object
consent_deaths     object
conf_death         object
prob_death         object
date               object
dtype: object

In [6]:
# convert objects to float
covid_df["tot_cases"] =  covid_df.tot_cases.astype(float)
covid_df["tot_death"] = covid_df.tot_death.astype(float)

# select only the columns we gonna use and display df
covid_df = covid_df[["date", "state", "tot_cases", "tot_death"]]
covid_df.head()

Unnamed: 0,date,state,tot_cases,tot_death
0,2021-03,KS,297229.0,4851.0
1,2021-12,ND,163565.0,1907.0
2,2022-01,AS,11.0,0.0
3,2021-11,AL,841461.0,16377.0
4,2022-05,AK,251425.0,1252.0


In [6]:
# covid_test = covid_max.loc[(covid_max['state'] == 'TX')]
# covid_test.head(60)

In [12]:
# group by states to be used later
# state_df = covid_df.groupby('state').mean().reset_index()
# state_df.head()

In [7]:
# add total recovery columns to df
covid_df["Total_recovered"] = covid_df.tot_cases - covid_df.tot_death
covid_df.tail()

Unnamed: 0,date,state,tot_cases,tot_death,Total_recovered
60055,2021-02,TX,2504556.0,43306.0,2461250.0
60056,2020-11,FL,913561.0,19014.0,894547.0
60057,2020-08,NM,23500.0,682.0,22818.0
60058,2020-06,MS,24223.0,1191.0,23032.0
60059,2022-05,MA,1779829.0,20334.0,1759495.0


In [8]:
# groupby date and state and take the mean value to get monthly average..
covid_monthly_df = round(covid_df.groupby(["date", "state"]).max().reset_index(), 2)
covid_monthly_df

Unnamed: 0,date,state,tot_cases,tot_death,Total_recovered
0,2020-01,AK,0.0,0.0,0.0
1,2020-01,AL,0.0,0.0,0.0
2,2020-01,AR,0.0,0.0,0.0
3,2020-01,AS,0.0,0.0,0.0
4,2020-01,AZ,1.0,0.0,1.0
...,...,...,...,...,...
2035,2022-10,VT,133845.0,700.0,133145.0
2036,2022-10,WA,1828918.0,14464.0,1814454.0
2037,2022-10,WI,1884206.0,15374.0,1868832.0
2038,2022-10,WV,605012.0,7479.0,597533.0


In [9]:
# rename columns. name gotta be compatable wth sqlite database naming potocol.
covid_monthly_df = covid_monthly_df.rename(columns={"date": "Date", "state": "State", "tot_cases": "Total_cases", "tot_death": "Total_death"})
covid_monthly_df.head()

Unnamed: 0,Date,State,Total_cases,Total_death,Total_recovered
0,2020-01,AK,0.0,0.0,0.0
1,2020-01,AL,0.0,0.0,0.0
2,2020-01,AR,0.0,0.0,0.0
3,2020-01,AS,0.0,0.0,0.0
4,2020-01,AZ,1.0,0.0,1.0


In [10]:
# add death and recovery percentage columns. change NA values to zero
covid_monthly_df['Death_percent'] = round((covid_monthly_df.Total_death/covid_monthly_df.Total_cases)*100, 2)
covid_monthly_df['Recovery_percent'] = round((covid_monthly_df.Total_recovered/covid_monthly_df.Total_cases)*100, 2)
covid_monthly_df=covid_monthly_df.fillna(0)
covid_infec_df = covid_monthly_df.copy()
covid_monthly_df.tail()

Unnamed: 0,Date,State,Total_cases,Total_death,Total_recovered,Death_percent,Recovery_percent
2035,2022-10,VT,133845.0,700.0,133145.0,0.52,99.48
2036,2022-10,WA,1828918.0,14464.0,1814454.0,0.79,99.21
2037,2022-10,WI,1884206.0,15374.0,1868832.0,0.82,99.18
2038,2022-10,WV,605012.0,7479.0,597533.0,1.24,98.76
2039,2022-10,WY,178032.0,1906.0,176126.0,1.07,98.93


In [11]:
# save df as a csv file
covid_monthly_df.to_csv('../Data/monthly.csv')

## USA Covid-19 Infection Map Data

In [12]:
# since data is aggregated using rolling average, we only goinog to keep last month data.
covid_infec_df = covid_infec_df.drop_duplicates(subset=['State'], keep='last', inplace=False, ignore_index=True)
covid_infec_df.tail()

Unnamed: 0,Date,State,Total_cases,Total_death,Total_recovered,Death_percent,Recovery_percent
55,2022-10,VT,133845.0,700.0,133145.0,0.52,99.48
56,2022-10,WA,1828918.0,14464.0,1814454.0,0.79,99.21
57,2022-10,WI,1884206.0,15374.0,1868832.0,0.82,99.18
58,2022-10,WV,605012.0,7479.0,597533.0,1.24,98.76
59,2022-10,WY,178032.0,1906.0,176126.0,1.07,98.93


## USA Covid-19 Vaccine Status by States

In [13]:
# set a client with a token
client = Socrata("data.cdc.gov", my_token)

# results returned as JSON from API / converted to Python list of
# dictionaries by sodapy.
results = client.get("unsk-b7fc", limit=100000)

# Convert to pandas DataFrame and display
covid_vac_df = pd.DataFrame.from_records(results)
covid_vac_df.head()

Unnamed: 0,date,mmwr_week,location,distributed,distributed_janssen,distributed_moderna,distributed_pfizer,distributed_novavax,distributed_unk_manuf,dist_per_100k,...,bivalent_booster_18plus,bivalent_booster_18plus_pop_pct,bivalent_booster_65plus,bivalent_booster_65plus_pop_pct,second_booster,administered_bivalent,admin_bivalent_pfr,admin_bivalent_mod,dist_bivalent_pfr,dist_bivalent_mod
0,2022-12-07T00:00:00.000,49,OH,29711725,1008800,10758240,17922085,22600,0,254183,...,1386545,15.2,715407,35.0,,,,,,
1,2022-12-07T00:00:00.000,49,IA,8731035,292700,3283220,5138915,16200,0,276730,...,463323,19.1,250822,45.4,,,,,,
2,2022-12-07T00:00:00.000,49,VI,167860,3200,46240,118220,200,0,157926,...,2412,3.0,1414,6.9,,,,,,
3,2022-12-07T00:00:00.000,49,TN,16312400,529600,6078280,9690420,14100,0,238863,...,536787,10.1,286123,25.0,,,,,,
4,2022-12-07T00:00:00.000,49,ND,1794020,53600,638220,1100000,2200,0,235417,...,87890,15.1,46773,39.0,,,,,,


In [14]:
covid_vac_df.dtypes

date                     object
mmwr_week                object
location                 object
distributed              object
distributed_janssen      object
                          ...  
administered_bivalent    object
admin_bivalent_pfr       object
admin_bivalent_mod       object
dist_bivalent_pfr        object
dist_bivalent_mod        object
Length: 109, dtype: object

In [15]:
# add date columns which only has year and month for use with groupby later on.. 
covid_vac_df['Date'] = pd.to_datetime(covid_vac_df['date']).dt.to_period('M').astype(str)
covid_vac_df.tail()

Unnamed: 0,date,mmwr_week,location,distributed,distributed_janssen,distributed_moderna,distributed_pfizer,distributed_novavax,distributed_unk_manuf,dist_per_100k,...,bivalent_booster_18plus_pop_pct,bivalent_booster_65plus,bivalent_booster_65plus_pop_pct,second_booster,administered_bivalent,admin_bivalent_pfr,admin_bivalent_mod,dist_bivalent_pfr,dist_bivalent_mod,Date
37075,2020-12-13T00:00:00.000,51,AS,3900,0,0,0,,0,7003,...,,,,,,,,,,2020-12
37076,2020-12-13T00:00:00.000,51,LTC,0,0,0,0,,0,0,...,,,,,,,,,,2020-12
37077,2020-12-13T00:00:00.000,51,VI,975,0,0,0,,0,931,...,,,,,,,,,,2020-12
37078,2020-12-13T00:00:00.000,51,US,13650,0,0,0,,0,4,...,,,,,,,,,,2020-12
37079,2020-12-13T00:00:00.000,51,GU,3900,0,0,0,,0,2353,...,,,,,,,,,,2020-12


In [16]:
# convert objects to float
covid_vac_df["distributed"] =  covid_vac_df.distributed.astype(float)
covid_vac_df["dist_per_100k"] = covid_vac_df.dist_per_100k.astype(float)
covid_vac_df["administered"] =  covid_vac_df.administered.astype(float)
covid_vac_df["admin_per_100k"] = covid_vac_df.admin_per_100k.astype(float)

# select only the columns we gonna use and display df
covid_vac_df = covid_vac_df[["Date", "location", "distributed", "dist_per_100k", "administered", "admin_per_100k"]]
covid_vac_df.head()

Unnamed: 0,Date,location,distributed,dist_per_100k,administered,admin_per_100k
0,2022-12,OH,29711725.0,254183.0,20509252.0,175456.0
1,2022-12,IA,8731035.0,276730.0,5950297.0,188595.0
2,2022-12,VI,167860.0,157926.0,153128.0,144066.0
3,2022-12,TN,16312400.0,238863.0,11104577.0,162605.0
4,2022-12,ND,1794020.0,235417.0,1276889.0,167557.0


In [17]:
# rename columns. name gotta be compatable wth sqlite database naming potocol.
covid_vac_df = covid_vac_df.rename(columns={"location": "State", "distributed": "Distributed", "dist_per_100k": "Dist_per_100k", "administered": "Administered", "admin_per_100k": "Admin_per_100k"})
covid_vac_df.tail()

Unnamed: 0,Date,State,Distributed,Dist_per_100k,Administered,Admin_per_100k
37075,2020-12,AS,3900.0,7003.0,0.0,0.0
37076,2020-12,LTC,0.0,0.0,0.0,0.0
37077,2020-12,VI,975.0,931.0,0.0,0.0
37078,2020-12,US,13650.0,4.0,0.0,0.0
37079,2020-12,GU,3900.0,2353.0,0.0,0.0


In [18]:
# since data is aggregated using rolling average, we only goinog to keep month Data.
covid_vac_df = covid_vac_df.drop_duplicates(subset=['State'], keep='first', inplace=False, ignore_index=True)
covid_vac_df.head()

Unnamed: 0,Date,State,Distributed,Dist_per_100k,Administered,Admin_per_100k
0,2022-12,OH,29711725.0,254183.0,20509252.0,175456.0
1,2022-12,IA,8731035.0,276730.0,5950297.0,188595.0
2,2022-12,VI,167860.0,157926.0,153128.0,144066.0
3,2022-12,TN,16312400.0,238863.0,11104577.0,162605.0
4,2022-12,ND,1794020.0,235417.0,1276889.0,167557.0


In [20]:
covid_vac_df.dtypes

Date               object
State              object
Distributed       float64
Dist_per_100k     float64
Administered      float64
Admin_per_100k    float64
dtype: object

In [21]:
# save df as a csv file
covid_vac_df.to_csv('../Data/vaccine.csv')

## USA Total Cases and Death by State

In [21]:
# upload data file and read it ito a pandas dataframe
# file = "../Data/US_COVID-19_Deaths.csv"
# covid_stats_df = pd.read_csv(file)
# covid_stats_df.head()

In [22]:
# select relevant columns only
# covid_stats_df = covid_stats_df[['State/Territory', 'Total Cases', 'Case Rate per 100000', 'Total Deaths', 'Death Rate per 100000']]
# covid_stats_df.head()

In [23]:
# add a state abbreviation column
# covid_stats_df['State'] = state_df['state']
# covid_stats_df.head()

In [24]:
# rename columns and drop any column with NA
# covid_stats_df = covid_stats_df.rename(columns={"state": "State", "Total Cases": "Total_cases", "Case Rate per 100000": "Case_rate_per_100k", "Total Deaths": "Total_deaths", "Death Rate per 100000": "Death_rate_per_100k"})
# covid_stats_df = covid_stats_df[["State", "Total_cases", "Case_rate_per_100k", "Total_deaths", "Death_rate_per_100k"]].fillna(0)
# covid_stats_df.tail()

## USA COVID-19 data with latitude and longtitude to create interactive map

In [25]:
# upload file and read to pandas dataframe
# file = "../Data/US_COVID-19_stats.csv"
# us_covid_df = pd.read_csv(file)
# us_covid_df.head()

In [26]:
# select relevant columns
# us_covid_df = us_covid_df[["Province_State", "Lat", "Long_", "Confirmed", "Deaths", "Recovered", "Mortality_Rate"]]
# us_covid_df.head()

In [27]:
# add state column to be filled with state abbreviation
# us_covid_df["State"] = "" 
# us_covid_df.head()

In [28]:
# # upload States and their abbreviation
# us_state_abbrev = {
    
#     'Alabama': 'AL',
#     'Alaska': 'AK',
#     'American Samoa': 'AS',
#     'Arizona': 'AZ',
#     'Arkansas': 'AR',
#     'California': 'CA',
#     'Colorado': 'CO',
#     'Connecticut': 'CT',
#     'Delaware': 'DE',
#     'Diamond Princess': 'DP',
#     'District of Columbia': 'DC',
#     'Florida': 'FL',
#     'Georgia': 'GA',
#     'Grand Princess': 'GP',
#     'Guam': 'GU',
#     'Hawaii': 'HI',
#     'Idaho': 'ID',
#     'Illinois': 'IL',
#     'Indiana': 'IN',
#     'Iowa': 'IA',
#     'Kansas': 'KS',
#     'Kentucky': 'KY',
#     'Louisiana': 'LA',
#     'Maine': 'ME',
#     'Maryland': 'MD',
#     'Massachusetts': 'MA',
#     'Michigan': 'MI',
#     'Minnesota': 'MN',
#     'Mississippi': 'MS',
#     'Missouri': 'MO',
#     'Montana': 'MT',
#     'Nebraska': 'NE',
#     'Nevada': 'NV',
#     'New Hampshire': 'NH',
#     'New Jersey': 'NJ',
#     'New Mexico': 'NM',
#     'New York': 'NY',
#     'North Carolina': 'NC',
#     'North Dakota': 'ND',
#     'Northern Mariana Islands':'MP',
#     'Ohio': 'OH',
#     'Oklahoma': 'OK',
#     'Oregon': 'OR',
#     'Pennsylvania': 'PA',
#     'Puerto Rico': 'PR',
#     'Rhode Island': 'RI',
#     'South Carolina': 'SC',
#     'South Dakota': 'SD',
#     'Tennessee': 'TN',
#     'Texas': 'TX',
#     'Utah': 'UT',
#     'Vermont': 'VT',
#     'Virgin Islands': 'VI',
#     'Virginia': 'VA',
#     'Washington': 'WA',
#     'West Virginia': 'WV',
#     'Wisconsin': 'WI',
#     'Wyoming': 'WY'
# }
# abbrev_us_state = dict(map(reversed, us_state_abbrev.items()))

In [29]:
# loop through and assign abbreviation acoordinely
# for index, row in us_covid_df.iterrows():
#     us_covid_df.loc[index, "State"] = us_state_abbrev[row[0]]

# us_covid_df.head()

In [None]:
# loop through and assign abbreviation acoordinely
# for index, row in us_covid_df.iterrows():
#     if row["State"] == "DP":
#         us_covid_df.loc[index, "Lat"] = 14.5214
#         us_covid_df.loc[index, "Long_"] = 120.9709

#     elif row["State"] == "GP":
#         us_covid_df.loc[index, "Lat"] = 32.1584
#         us_covid_df.loc[index, "Long_"] = 117.5676

    # if row["State"] == FSM:
    #     us_covid_df.loc[index, "Lat"] = 6.8874
    #     us_covid_df.loc[index, "Long"] = 158.2150

    # elif row["State"] == NYC:
    #     us_covid_df.loc[index, "Lat"] = 40.7128
    #     us_covid_df.loc[index, "Long"] = -74.0060

    # elif row["State"] == PW:
    #     us_covid_df.loc[index, "Lat"] = 7.5150
    #     us_covid_df.loc[index, "Long"] = 134.5825

    # elif row["State"] == RMI:
    #     us_covid_df.loc[index, "Lat"] = 7.1315
    #     us_covid_df.loc[index, "Long"] = 171.1845

# us_covid_df.head()

In [30]:
# rename columns and add recovery percent column
# us_covid_df = us_covid_df.rename(columns={"Long_": "Long", "Mortality_Rate":"Death_percent"})
# us_covid_df["Recovery_percent"]=us_covid_df.Recovered/us_covid_df.Confirmed
# us_covid_df = us_covid_df[["State", "Lat", "Long", "Death_percent", "Recovery_percent"]].fillna(0)
# us_covid_df.head()

In [32]:
# loop through and assign abbreviation acoordinely
# for index, row in us_covid_df.iterrows():
#     if row["State"] == "DP":
#         us_covid_df.loc[index, "Lat"] = 14.5214
#         us_covid_df.loc[index, "Long"] = 120.9709

#     elif row["State"] == "GP":
#         us_covid_df.loc[index, "Lat"] = 32.1584
#         us_covid_df.loc[index, "Long"] = 117.5676

    # elif row["State"] == FSM:
    #     us_covid_df.loc[index, "Lat"] = 6.8874
    #     us_covid_df.loc[index, "Long"] = 158.2150

    # elif row["State"] == NYC:
    #     us_covid_df.loc[index, "Lat"] = 40.7128
    #     us_covid_df.loc[index, "Long"] = -74.0060

    # elif row["State"] == PW:
    #     us_covid_df.loc[index, "Lat"] = 7.5150
    #     us_covid_df.loc[index, "Long"] = 134.5825

    # elif row["State"] == RMI:
    #     us_covid_df.loc[index, "Lat"] = 7.1315
    #     us_covid_df.loc[index, "Long"] = 171.1845

# us_covid_df.head()

In [33]:
# merge two df into one dataframe using left joint.
# us_covid_combine_df = pd.merge(covid_stats_df, us_covid_df, how='left', on=['State','State'])
# us_covid_combine_df.head()

In [34]:
# save to a csv file
# us_covid_combine_df.to_csv('../Data/states.csv')

## Add Geo property to states in old state file

In [35]:
# Add Lat and Long Columns
# geomap_df = covid_monthly_df.groupby('State').mean().reset_index()
# covid_monthly_df["Long"] = ""
# geomap_df.head()

## World COVID-19 Data by Country

In [36]:
# upload file and read into a pandas dataframe
# file = "../Data/World_COVID-19_Stats.csv"
# world_covid_df = pd.read_csv(file)
# world_covid_df.head()

In [37]:
# select relevant columns only
# world_covid_df = world_covid_df[["Country_Region", "Lat", "Long_", "Confirmed", "Deaths", "Recovered", "Case-Fatality_Ratio"]]
# world_covid_df.head()

In [38]:
# rename columns and add a recovery percent column
# world_covid_df = world_covid_df.rename(columns={"Long_": "Long", "Case-Fatality_Ratio":"Death_percent", "Country_Region": "Country"})
# world_covid_df["Recovery_percent"]=world_covid_df.Recovered/world_covid_df.Confirmed
# world_covid_df.head()

In [39]:
# add missing geoinfo to country
# for index, row in world_covid_df.iterrows(): 
#     if row["Country"] == "Canada":
#         world_covid_df.loc[index, "Lat"] = 56.1304
#         world_covid_df.loc[index, "Long"] = 106.3468
# world_covid_df

In [40]:
# save file as a csv
# world_covid_df.to_csv('../Data/world.csv')

## build SQlite DataBase

In [22]:
# import modules
import sqlite3
from sqlite3 import Error

# series of functions to  build database tables in sqlite database.

def create_connection(db_file):
    """ create a database connection to the SQLite database
        specified by db_file
    :param db_file: database file
    :return: Connection object or None
    """
    conn = None
    try:
        conn = sqlite3.connect(db_file)
        return conn
    except Error as e:
        print(e)
    return conn
def create_table(conn, create_table_sql):
    """ create a table from the create_table_sql statement
    :param conn: Connection object
    :param create_table_sql: a CREATE TABLE statement
    :return:
    """
    try:
        c = conn.cursor()
        c.execute(create_table_sql)
    except Error as e:
        print(e)

def drop_table(conn, drop_table_sql):
    """ drop a table from the drop_table_sql statement
    :param conn: Connection object
    :param drop_table_sql: a drop TABLE statement
    :return:
    """
    try:
        c = conn.cursor()
        c.execute(drop_table_sql)
    except Error as e:
        print(e)

# build empty tables with correct columns names
def main():		
    database = r"../DataBase/covid-19.db"
    sql_create_monthly_table = """CREATE TABLE monthly(
                                        id INTEGER PRIMARY KEY AUTOINCREMENT, 
                                        Date text NOT NULL,
                                        State text,
                                        Total_cases float,
                                        Total_death float,
                                        Total_recovered float,
                                        Death_percent float,
                                        Recovery_percent float
                                    ); """

    sql_drop_monthly_table = """DROP TABLE if exists monthly;"""

    sql_create_infection_table = """CREATE TABLE infection(
                                        id INTEGER PRIMARY KEY AUTOINCREMENT, 
                                        Date text NOT NULL,
                                        State text,
                                        Total_cases float,
                                        Total_death float,
                                        Total_recovered float,
                                        Death_percent float,
                                        Recovery_percent float
                                    ); """

    sql_drop_infection_table = """DROP TABLE if exists infection;"""

    sql_create_vaccine_table = """CREATE TABLE vaccine(
                                        id INTEGER PRIMARY KEY AUTOINCREMENT, 
                                        Date text NOT NULL,
                                        State text,
                                        Distributed float,
                                        Dist_per_100k float,
                                        Administered float,
                                        Admin_per_100k float
                                    ); """

    sql_drop_vaccine_table = """DROP TABLE if exists vaccine;"""
    
    # sql_create_states_table = """CREATE TABLE states(
    #                                     id INTEGER PRIMARY KEY AUTOINCREMENT,
    #                                     State text,
    #                                     Total_cases float,
    #                                     Case_rate_per_100k float,
    #                                     Total_deaths float,
    #                                     Death_rate_per_100k float,
    #                                     Lat float,
    #                                     Long float,
    #                                     Death_percent float,
    #                                     Recovery_percent float
    #                                 );"""

    # sql_drop_states_table = """DROP TABLE if exists states;"""    

    # sql_create_world_table = """CREATE TABLE world(
    #                                     id INTEGER PRIMARY KEY AUTOINCREMENT,
    #                                     Country text,
    #                                     Lat float,
    #                                     Long float,
    #                                     Confirmed float,
    #                                     Deaths float,
    #                                     Recovered float,
    #                                     Recovery_percent float,
    #                                     Death_percent float
    #                                 );"""

    # sql_drop_world_table = """DROP TABLE if exists world;"""

    # create a database connection
    conn = create_connection(database)
    
    # create tables
    if conn is not None:
        # create covid_monthly table
        drop_table(conn, sql_drop_monthly_table)
        create_table(conn, sql_create_monthly_table)

        # create covid_infection table
        drop_table(conn, sql_drop_infection_table)
        create_table(conn, sql_create_infection_table)

        # create covid_vaccine table
        drop_table(conn, sql_drop_vaccine_table)
        create_table(conn, sql_create_vaccine_table)

        # create covid_states table
        # drop_table(conn, sql_drop_states_table)
        # create_table(conn, sql_create_states_table)

        #create covid_world table
        # drop_table(conn, sql_drop_world_table)
        # create_table(conn, sql_create_world_table)
        
    # error handling
    else:
        print("Error! cannot create the database connection.")
if __name__ == '__main__':
    main()

In [23]:
# create engine connection to database
engine = create_engine('sqlite:///../DataBase/covid-19.db')

# # populate tables with information from dataframes
covid_monthly_df.to_sql(name='monthly', con=engine, if_exists = "append", index=False)
covid_infec_df.to_sql(name='infection', con=engine, if_exists = "append", index=False)
covid_vac_df.to_sql(name='vaccine', con=engine, if_exists = "append", index=False)
# world_covid_df.to_sql(name='world', con=engine, if_exists = "append", index= False)
# us_covid_combine_df.to_sql(name='states', con=engine, if_exists = "append", index=False)

In [24]:
# querry monthly data from database. to test and make sure tables are working
pd.read_sql('select * from monthly', engine)

Unnamed: 0,id,Date,State,Total_cases,Total_death,Total_recovered,Death_percent,Recovery_percent
0,1,2020-01,AK,0.0,0.0,0.0,0.00,0.00
1,2,2020-01,AL,0.0,0.0,0.0,0.00,0.00
2,3,2020-01,AR,0.0,0.0,0.0,0.00,0.00
3,4,2020-01,AS,0.0,0.0,0.0,0.00,0.00
4,5,2020-01,AZ,1.0,0.0,1.0,0.00,100.00
...,...,...,...,...,...,...,...,...
2035,2036,2022-10,VT,133845.0,700.0,133145.0,0.52,99.48
2036,2037,2022-10,WA,1828918.0,14464.0,1814454.0,0.79,99.21
2037,2038,2022-10,WI,1884206.0,15374.0,1868832.0,0.82,99.18
2038,2039,2022-10,WV,605012.0,7479.0,597533.0,1.24,98.76


In [25]:
pd.read_sql('select * from vaccine', engine)

Unnamed: 0,id,Date,State,Distributed,Dist_per_100k,Administered,Admin_per_100k
0,1,2022-12,OH,29711725.0,254183.0,20509252.0,175456.0
1,2,2022-12,IA,8731035.0,276730.0,5950297.0,188595.0
2,3,2022-12,VI,167860.0,157926.0,153128.0,144066.0
3,4,2022-12,TN,16312400.0,238863.0,11104577.0,162605.0
4,5,2022-12,ND,1794020.0,235417.0,1276889.0,167557.0
...,...,...,...,...,...,...,...
61,62,2022-12,IL,36059795.0,284567.0,26117630.0,206108.0
62,63,2022-12,DE,3123235.0,320738.0,2023149.0,207766.0
63,64,2022-12,MD,20976410.0,346965.0,14265537.0,235962.0
64,65,2022-02,RP,43990.0,204301.0,44526.0,206790.0


In [26]:
pd.read_sql('select * from infection', engine)

Unnamed: 0,id,Date,State,Total_cases,Total_death,Total_recovered,Death_percent,Recovery_percent
0,1,2022-10,AK,284076.0,1356.0,282720.0,0.48,99.52
1,2,2022-10,AL,1530803.0,20526.0,1510277.0,1.34,98.66
2,3,2022-10,AR,956424.0,12386.0,944038.0,1.3,98.7
3,4,2022-10,AS,8251.0,34.0,8217.0,0.41,99.59
4,5,2022-10,AZ,2277635.0,31455.0,2246180.0,1.38,98.62
5,6,2022-10,CA,11309237.0,95604.0,11213633.0,0.85,99.15
6,7,2022-10,CO,1665451.0,13373.0,1652078.0,0.8,99.2
7,8,2022-10,CT,907038.0,11420.0,895618.0,1.26,98.74
8,9,2022-10,DC,169149.0,1392.0,167757.0,0.82,99.18
9,10,2022-10,DE,311538.0,3136.0,308402.0,1.01,98.99


In [27]:
# make an API call to make sure Flask app is working 
import requests
url="http://127.0.0.1:5000/api/v1.0/monthly"
resp=requests.get(url)
resp

ConnectionError: HTTPConnectionPool(host='127.0.0.1', port=5000): Max retries exceeded with url: /api/v1.0/monthly (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x000001F70B998F98>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it',))

In [None]:
data = resp.json()
data