### Links To CDC for Raw Data

## Covid-19 Infection Data
### url-1: https://data.cdc.gov/Case-Surveillance/United-States-COVID-19-Cases-and-Deaths-by-State-o/9mfq-cb36

## Covid-19 Vaccine Distribution
### url-2: https://data.cdc.gov/Vaccinations/COVID-19-Vaccinations-in-the-United-States-Jurisdi/unsk-b7fc

In [1]:
# import dependencies
import pandas as pd
import datetime as dt
import sqlite3
from sqlite3 import Error
from sqlalchemy import create_engine, inspect, func
import psycopg2
import requests
from sodapy import Socrata

# import cdc key for API
from cdc_token import my_token

## USA Monthly Cases by State 

In [2]:
# set a client with a token
client = Socrata("data.cdc.gov", my_token)

# results returned as JSON from API / converted to Python list of
# dictionaries by sodapy.
results = client.get("9mfq-cb36", limit=50000)

# Convert to pandas DataFrame and display
covid_df = pd.DataFrame.from_records(results)
covid_df.tail()

Unnamed: 0,submission_date,state,tot_cases,conf_cases,prob_cases,new_case,pnew_case,tot_death,new_death,pnew_death,created_at,consent_cases,consent_deaths,conf_death,prob_death
49995,2022-02-10T00:00:00.000,PA,2713459,2251172.0,462287.0,4629.0,913.0,42035,138.0,112.0,2022-02-11T14:35:19.435,Agree,Not agree,,
49996,2021-08-13T00:00:00.000,PA,1251646,1046124.0,205522.0,2020.0,387.0,27957,16.0,12.0,2021-08-13T00:00:00.000,Agree,Not agree,,
49997,2021-02-27T00:00:00.000,RMI,4,4.0,0.0,0.0,0.0,0,0.0,0.0,2021-02-28T14:48:18.530,Agree,Agree,0.0,0.0
49998,2020-03-04T00:00:00.000,OH,0,,,0.0,,0,0.0,,2020-03-26T16:22:39.452,Agree,Agree,,
49999,2020-02-19T00:00:00.000,NM,0,,,0.0,,0,0.0,,2020-03-26T16:22:39.452,,Not agree,,


In [3]:
# add date columns which only has year and month for use with groupby later on.. 
covid_df['date'] = pd.to_datetime(covid_df['submission_date']).dt.to_period('M').astype(str)
covid_df.tail()

Unnamed: 0,submission_date,state,tot_cases,conf_cases,prob_cases,new_case,pnew_case,tot_death,new_death,pnew_death,created_at,consent_cases,consent_deaths,conf_death,prob_death,date
49995,2022-02-10T00:00:00.000,PA,2713459,2251172.0,462287.0,4629.0,913.0,42035,138.0,112.0,2022-02-11T14:35:19.435,Agree,Not agree,,,2022-02
49996,2021-08-13T00:00:00.000,PA,1251646,1046124.0,205522.0,2020.0,387.0,27957,16.0,12.0,2021-08-13T00:00:00.000,Agree,Not agree,,,2021-08
49997,2021-02-27T00:00:00.000,RMI,4,4.0,0.0,0.0,0.0,0,0.0,0.0,2021-02-28T14:48:18.530,Agree,Agree,0.0,0.0,2021-02
49998,2020-03-04T00:00:00.000,OH,0,,,0.0,,0,0.0,,2020-03-26T16:22:39.452,Agree,Agree,,,2020-03
49999,2020-02-19T00:00:00.000,NM,0,,,0.0,,0,0.0,,2020-03-26T16:22:39.452,,Not agree,,,2020-02


In [4]:
covid_df.dtypes

submission_date    object
state              object
tot_cases          object
conf_cases         object
prob_cases         object
new_case           object
pnew_case          object
tot_death          object
new_death          object
pnew_death         object
created_at         object
consent_cases      object
consent_deaths     object
conf_death         object
prob_death         object
date               object
dtype: object

In [5]:
# convert objects to float
covid_df["tot_cases"] =  covid_df.tot_cases.astype(float)
covid_df["tot_death"] = covid_df.tot_death.astype(float)

# select only the columns we gonna use and display df
covid_df = covid_df[["date", "state", "tot_cases", "tot_death"]]
covid_df.head()

Unnamed: 0,date,state,tot_cases,tot_death
0,2022-01,KS,621273.0,7162.0
1,2022-01,AS,11.0,0.0
2,2022-04,FL,5866589.0,73860.0
3,2020-08,AR,56199.0,674.0
4,2021-05,PW,0.0,0.0


In [6]:
# covid_test = covid_max.loc[(covid_max['state'] == 'TX')]
# covid_test.head(60)

In [12]:
# group by states to be used later
# state_df = covid_df.groupby('state').mean().reset_index()
# state_df.head()

In [6]:
# add total recovery columns to df
covid_df["Total_recovered"] = covid_df.tot_cases - covid_df.tot_death
covid_df.tail()

Unnamed: 0,date,state,tot_cases,tot_death,Total_recovered
49995,2022-02,PA,2713459.0,42035.0,2671424.0
49996,2021-08,PA,1251646.0,27957.0,1223689.0
49997,2021-02,RMI,4.0,0.0,4.0
49998,2020-03,OH,0.0,0.0,0.0
49999,2020-02,NM,0.0,0.0,0.0


In [7]:
# groupby date and state and take the mean value to get monthly average..
covid_monthly_df = round(covid_df.groupby(["date", "state"]).max().reset_index(), 2)
covid_monthly_df

Unnamed: 0,date,state,tot_cases,tot_death,Total_recovered
0,2020-01,AK,0.0,0.0,0.0
1,2020-01,AL,0.0,0.0,0.0
2,2020-01,AR,0.0,0.0,0.0
3,2020-01,AS,0.0,0.0,0.0
4,2020-01,AZ,1.0,0.0,1.0
...,...,...,...,...,...
1795,2022-06,VT,124290.0,641.0,123649.0
1796,2022-06,WA,1605181.0,13020.0,1592161.0
1797,2022-06,WI,1691977.0,14671.0,1677306.0
1798,2022-06,WV,520896.0,6997.0,513899.0


In [8]:
# rename columns. name gotta be compatable wth sqlite database naming potocol.
covid_monthly_df = covid_monthly_df.rename(columns={"date": "Date", "state": "State", "tot_cases": "Total_cases", "tot_death": "Total_death"})
covid_monthly_df.head()

Unnamed: 0,Date,State,Total_cases,Total_death,Total_recovered
0,2020-01,AK,0.0,0.0,0.0
1,2020-01,AL,0.0,0.0,0.0
2,2020-01,AR,0.0,0.0,0.0
3,2020-01,AS,0.0,0.0,0.0
4,2020-01,AZ,1.0,0.0,1.0


In [9]:
# add death and recovery percentage columns. change NA values to zero
covid_monthly_df['Death_percent'] = round((covid_monthly_df.Total_death/covid_monthly_df.Total_cases)*100, 2)
covid_monthly_df['Recovery_percent'] = round((covid_monthly_df.Total_recovered/covid_monthly_df.Total_cases)*100, 2)
covid_monthly_df=covid_monthly_df.fillna(0)
covid_infec_df = covid_monthly_df.copy()
covid_monthly_df.tail()

Unnamed: 0,Date,State,Total_cases,Total_death,Total_recovered,Death_percent,Recovery_percent
1795,2022-06,VT,124290.0,641.0,123649.0,0.52,99.48
1796,2022-06,WA,1605181.0,13020.0,1592161.0,0.81,99.19
1797,2022-06,WI,1691977.0,14671.0,1677306.0,0.87,99.13
1798,2022-06,WV,520896.0,6997.0,513899.0,1.34,98.66
1799,2022-06,WY,159707.0,1824.0,157883.0,1.14,98.86


In [10]:
# save df as a csv file
covid_monthly_df.to_csv('../Data/monthly.csv')

## USA Covid-19 Infection Map Data

In [11]:
# since data is aggregated using rolling average, we only goinog to keep last month data.
covid_infec_df = covid_infec_df.drop_duplicates(subset=['State'], keep='last', inplace=False, ignore_index=True)
covid_infec_df.tail()

Unnamed: 0,Date,State,Total_cases,Total_death,Total_recovered,Death_percent,Recovery_percent
55,2022-06,VT,124290.0,641.0,123649.0,0.52,99.48
56,2022-06,WA,1605181.0,13020.0,1592161.0,0.81,99.19
57,2022-06,WI,1691977.0,14671.0,1677306.0,0.87,99.13
58,2022-06,WV,520896.0,6997.0,513899.0,1.34,98.66
59,2022-06,WY,159707.0,1824.0,157883.0,1.14,98.86


## USA Covid-19 Vaccine Status by States

In [12]:
# set a client with a token
client = Socrata("data.cdc.gov", my_token)

# results returned as JSON from API / converted to Python list of
# dictionaries by sodapy.
results = client.get("unsk-b7fc", limit=50000)

# Convert to pandas DataFrame and display
covid_vac_df = pd.DataFrame.from_records(results)
covid_vac_df.head()

Unnamed: 0,date,mmwr_week,location,distributed,distributed_janssen,distributed_moderna,distributed_pfizer,distributed_unk_manuf,dist_per_100k,distributed_per_100k_5plus,...,additional_doses_unk_manuf,second_booster_50plus,second_booster_50plus_vax_pct,second_booster_65plus,second_booster_65plus_vax_pct,second_booster_janssen,second_booster_moderna,second_booster_pfizer,second_booster_unk_manuf,second_booster
0,2022-06-09T00:00:00.000,23,PW,46290,3800,30000,12490,0,214982,227212,...,0,616,13.1,237,15.3,0,629,9,0,
1,2022-06-09T00:00:00.000,23,US,754073055,30913500,280553820,442605735,0,227124,241485,...,35715,15208582,24.1,10619959,30.5,15837,7631542,8295655,11994,15955028.0
2,2022-06-09T00:00:00.000,23,SC,10925375,456700,4587540,5881135,0,212196,224976,...,519,151685,17.6,115776,21.7,143,69381,87064,11,
3,2022-06-09T00:00:00.000,23,DD2,6768210,210500,2219740,4299870,38100,0,0,...,0,0,0.0,0,0.0,0,1,6,0,
4,2022-06-09T00:00:00.000,23,GA,22538695,837600,8800040,12901055,0,212280,226273,...,2052,257532,17.9,175073,22.6,304,140693,132400,361,


In [13]:
covid_vac_df.dtypes

date                        object
mmwr_week                   object
location                    object
distributed                 object
distributed_janssen         object
                             ...  
second_booster_janssen      object
second_booster_moderna      object
second_booster_pfizer       object
second_booster_unk_manuf    object
second_booster              object
Length: 91, dtype: object

In [14]:
# add date columns which only has year and month for use with groupby later on.. 
covid_vac_df['Date'] = pd.to_datetime(covid_vac_df['date']).dt.to_period('M').astype(str)
covid_vac_df.tail()

Unnamed: 0,date,mmwr_week,location,distributed,distributed_janssen,distributed_moderna,distributed_pfizer,distributed_unk_manuf,dist_per_100k,distributed_per_100k_5plus,...,second_booster_50plus,second_booster_50plus_vax_pct,second_booster_65plus,second_booster_65plus_vax_pct,second_booster_janssen,second_booster_moderna,second_booster_pfizer,second_booster_unk_manuf,second_booster,Date
35027,2020-12-13T00:00:00.000,51,LTC,0,0,0,0,0,0,0,...,,,,,,,,,,2020-12
35028,2020-12-13T00:00:00.000,51,VI,975,0,0,0,0,931,0,...,,,,,,,,,,2020-12
35029,2020-12-13T00:00:00.000,51,MP,4875,0,0,0,0,8570,0,...,,,,,,,,,,2020-12
35030,2020-12-13T00:00:00.000,51,US,13650,0,0,0,0,4,0,...,,,,,,,,,,2020-12
35031,2020-12-13T00:00:00.000,51,GU,3900,0,0,0,0,2353,0,...,,,,,,,,,,2020-12


In [15]:
# convert objects to float
covid_vac_df["distributed"] =  covid_vac_df.distributed.astype(float)
covid_vac_df["dist_per_100k"] = covid_vac_df.dist_per_100k.astype(float)
covid_vac_df["administered"] =  covid_vac_df.administered.astype(float)
covid_vac_df["admin_per_100k"] = covid_vac_df.admin_per_100k.astype(float)

# select only the columns we gonna use and display df
covid_vac_df = covid_vac_df[["Date", "location", "distributed", "dist_per_100k", "administered", "admin_per_100k"]]
covid_vac_df.head()

Unnamed: 0,Date,location,distributed,dist_per_100k,administered,admin_per_100k
0,2022-06,PW,46290.0,214982.0,48164.0,223686.0
1,2022-06,US,754073055.0,227124.0,590076511.0,177729.0
2,2022-06,SC,10925375.0,212196.0,7691681.0,149390.0
3,2022-06,DD2,6768210.0,0.0,7560317.0,0.0
4,2022-06,GA,22538695.0,212280.0,15235731.0,143497.0


In [16]:
# rename columns. name gotta be compatable wth sqlite database naming potocol.
covid_vac_df = covid_vac_df.rename(columns={"location": "State", "distributed": "Distributed", "dist_per_100k": "Dist_per_100k", "administered": "Administered", "admin_per_100k": "Admin_per_100k"})
covid_vac_df.tail()

Unnamed: 0,Date,State,Distributed,Dist_per_100k,Administered,Admin_per_100k
35027,2020-12,LTC,0.0,0.0,0.0,0.0
35028,2020-12,VI,975.0,931.0,0.0,0.0
35029,2020-12,MP,4875.0,8570.0,0.0,0.0
35030,2020-12,US,13650.0,4.0,0.0,0.0
35031,2020-12,GU,3900.0,2353.0,0.0,0.0


In [17]:
# since data is aggregated using rolling average, we only goinog to keep month Data.
covid_vac_df = covid_vac_df.drop_duplicates(subset=['State'], keep='first', inplace=False, ignore_index=True)
covid_vac_df.head()

Unnamed: 0,Date,State,Distributed,Dist_per_100k,Administered,Admin_per_100k
0,2022-06,PW,46290.0,214982.0,48164.0,223686.0
1,2022-06,US,754073055.0,227124.0,590076511.0,177729.0
2,2022-06,SC,10925375.0,212196.0,7691681.0,149390.0
3,2022-06,DD2,6768210.0,0.0,7560317.0,0.0
4,2022-06,GA,22538695.0,212280.0,15235731.0,143497.0


In [18]:
covid_vac_df.dtypes

Date               object
State              object
Distributed       float64
Dist_per_100k     float64
Administered      float64
Admin_per_100k    float64
dtype: object

In [19]:
# save df as a csv file
covid_vac_df.to_csv('../Data/vaccine.csv')

## USA Total Cases and Death by State

In [21]:
# upload data file and read it ito a pandas dataframe
# file = "../Data/US_COVID-19_Deaths.csv"
# covid_stats_df = pd.read_csv(file)
# covid_stats_df.head()

In [22]:
# select relevant columns only
# covid_stats_df = covid_stats_df[['State/Territory', 'Total Cases', 'Case Rate per 100000', 'Total Deaths', 'Death Rate per 100000']]
# covid_stats_df.head()

In [23]:
# add a state abbreviation column
# covid_stats_df['State'] = state_df['state']
# covid_stats_df.head()

In [24]:
# rename columns and drop any column with NA
# covid_stats_df = covid_stats_df.rename(columns={"state": "State", "Total Cases": "Total_cases", "Case Rate per 100000": "Case_rate_per_100k", "Total Deaths": "Total_deaths", "Death Rate per 100000": "Death_rate_per_100k"})
# covid_stats_df = covid_stats_df[["State", "Total_cases", "Case_rate_per_100k", "Total_deaths", "Death_rate_per_100k"]].fillna(0)
# covid_stats_df.tail()

## USA COVID-19 data with latitude and longtitude to create interactive map

In [25]:
# upload file and read to pandas dataframe
# file = "../Data/US_COVID-19_stats.csv"
# us_covid_df = pd.read_csv(file)
# us_covid_df.head()

In [26]:
# select relevant columns
# us_covid_df = us_covid_df[["Province_State", "Lat", "Long_", "Confirmed", "Deaths", "Recovered", "Mortality_Rate"]]
# us_covid_df.head()

In [27]:
# add state column to be filled with state abbreviation
# us_covid_df["State"] = "" 
# us_covid_df.head()

In [28]:
# # upload States and their abbreviation
# us_state_abbrev = {
    
#     'Alabama': 'AL',
#     'Alaska': 'AK',
#     'American Samoa': 'AS',
#     'Arizona': 'AZ',
#     'Arkansas': 'AR',
#     'California': 'CA',
#     'Colorado': 'CO',
#     'Connecticut': 'CT',
#     'Delaware': 'DE',
#     'Diamond Princess': 'DP',
#     'District of Columbia': 'DC',
#     'Florida': 'FL',
#     'Georgia': 'GA',
#     'Grand Princess': 'GP',
#     'Guam': 'GU',
#     'Hawaii': 'HI',
#     'Idaho': 'ID',
#     'Illinois': 'IL',
#     'Indiana': 'IN',
#     'Iowa': 'IA',
#     'Kansas': 'KS',
#     'Kentucky': 'KY',
#     'Louisiana': 'LA',
#     'Maine': 'ME',
#     'Maryland': 'MD',
#     'Massachusetts': 'MA',
#     'Michigan': 'MI',
#     'Minnesota': 'MN',
#     'Mississippi': 'MS',
#     'Missouri': 'MO',
#     'Montana': 'MT',
#     'Nebraska': 'NE',
#     'Nevada': 'NV',
#     'New Hampshire': 'NH',
#     'New Jersey': 'NJ',
#     'New Mexico': 'NM',
#     'New York': 'NY',
#     'North Carolina': 'NC',
#     'North Dakota': 'ND',
#     'Northern Mariana Islands':'MP',
#     'Ohio': 'OH',
#     'Oklahoma': 'OK',
#     'Oregon': 'OR',
#     'Pennsylvania': 'PA',
#     'Puerto Rico': 'PR',
#     'Rhode Island': 'RI',
#     'South Carolina': 'SC',
#     'South Dakota': 'SD',
#     'Tennessee': 'TN',
#     'Texas': 'TX',
#     'Utah': 'UT',
#     'Vermont': 'VT',
#     'Virgin Islands': 'VI',
#     'Virginia': 'VA',
#     'Washington': 'WA',
#     'West Virginia': 'WV',
#     'Wisconsin': 'WI',
#     'Wyoming': 'WY'
# }
# abbrev_us_state = dict(map(reversed, us_state_abbrev.items()))

In [29]:
# loop through and assign abbreviation acoordinely
# for index, row in us_covid_df.iterrows():
#     us_covid_df.loc[index, "State"] = us_state_abbrev[row[0]]

# us_covid_df.head()

In [None]:
# loop through and assign abbreviation acoordinely
# for index, row in us_covid_df.iterrows():
#     if row["State"] == "DP":
#         us_covid_df.loc[index, "Lat"] = 14.5214
#         us_covid_df.loc[index, "Long_"] = 120.9709

#     elif row["State"] == "GP":
#         us_covid_df.loc[index, "Lat"] = 32.1584
#         us_covid_df.loc[index, "Long_"] = 117.5676

    # if row["State"] == FSM:
    #     us_covid_df.loc[index, "Lat"] = 6.8874
    #     us_covid_df.loc[index, "Long"] = 158.2150

    # elif row["State"] == NYC:
    #     us_covid_df.loc[index, "Lat"] = 40.7128
    #     us_covid_df.loc[index, "Long"] = -74.0060

    # elif row["State"] == PW:
    #     us_covid_df.loc[index, "Lat"] = 7.5150
    #     us_covid_df.loc[index, "Long"] = 134.5825

    # elif row["State"] == RMI:
    #     us_covid_df.loc[index, "Lat"] = 7.1315
    #     us_covid_df.loc[index, "Long"] = 171.1845

# us_covid_df.head()

In [30]:
# rename columns and add recovery percent column
# us_covid_df = us_covid_df.rename(columns={"Long_": "Long", "Mortality_Rate":"Death_percent"})
# us_covid_df["Recovery_percent"]=us_covid_df.Recovered/us_covid_df.Confirmed
# us_covid_df = us_covid_df[["State", "Lat", "Long", "Death_percent", "Recovery_percent"]].fillna(0)
# us_covid_df.head()

In [32]:
# loop through and assign abbreviation acoordinely
# for index, row in us_covid_df.iterrows():
#     if row["State"] == "DP":
#         us_covid_df.loc[index, "Lat"] = 14.5214
#         us_covid_df.loc[index, "Long"] = 120.9709

#     elif row["State"] == "GP":
#         us_covid_df.loc[index, "Lat"] = 32.1584
#         us_covid_df.loc[index, "Long"] = 117.5676

    # elif row["State"] == FSM:
    #     us_covid_df.loc[index, "Lat"] = 6.8874
    #     us_covid_df.loc[index, "Long"] = 158.2150

    # elif row["State"] == NYC:
    #     us_covid_df.loc[index, "Lat"] = 40.7128
    #     us_covid_df.loc[index, "Long"] = -74.0060

    # elif row["State"] == PW:
    #     us_covid_df.loc[index, "Lat"] = 7.5150
    #     us_covid_df.loc[index, "Long"] = 134.5825

    # elif row["State"] == RMI:
    #     us_covid_df.loc[index, "Lat"] = 7.1315
    #     us_covid_df.loc[index, "Long"] = 171.1845

# us_covid_df.head()

In [33]:
# merge two df into one dataframe using left joint.
# us_covid_combine_df = pd.merge(covid_stats_df, us_covid_df, how='left', on=['State','State'])
# us_covid_combine_df.head()

In [34]:
# save to a csv file
# us_covid_combine_df.to_csv('../Data/states.csv')

## Add Geo property to states in old state file

In [35]:
# Add Lat and Long Columns
# geomap_df = covid_monthly_df.groupby('State').mean().reset_index()
# covid_monthly_df["Long"] = ""
# geomap_df.head()

## World COVID-19 Data by Country

In [36]:
# upload file and read into a pandas dataframe
# file = "../Data/World_COVID-19_Stats.csv"
# world_covid_df = pd.read_csv(file)
# world_covid_df.head()

In [37]:
# select relevant columns only
# world_covid_df = world_covid_df[["Country_Region", "Lat", "Long_", "Confirmed", "Deaths", "Recovered", "Case-Fatality_Ratio"]]
# world_covid_df.head()

In [38]:
# rename columns and add a recovery percent column
# world_covid_df = world_covid_df.rename(columns={"Long_": "Long", "Case-Fatality_Ratio":"Death_percent", "Country_Region": "Country"})
# world_covid_df["Recovery_percent"]=world_covid_df.Recovered/world_covid_df.Confirmed
# world_covid_df.head()

In [39]:
# add missing geoinfo to country
# for index, row in world_covid_df.iterrows(): 
#     if row["Country"] == "Canada":
#         world_covid_df.loc[index, "Lat"] = 56.1304
#         world_covid_df.loc[index, "Long"] = 106.3468
# world_covid_df

In [40]:
# save file as a csv
# world_covid_df.to_csv('../Data/world.csv')

## build SQlite DataBase

In [20]:
# import modules
import sqlite3
from sqlite3 import Error

# series of functions to  build database tables in sqlite database.

def create_connection(db_file):
    """ create a database connection to the SQLite database
        specified by db_file
    :param db_file: database file
    :return: Connection object or None
    """
    conn = None
    try:
        conn = sqlite3.connect(db_file)
        return conn
    except Error as e:
        print(e)
    return conn
def create_table(conn, create_table_sql):
    """ create a table from the create_table_sql statement
    :param conn: Connection object
    :param create_table_sql: a CREATE TABLE statement
    :return:
    """
    try:
        c = conn.cursor()
        c.execute(create_table_sql)
    except Error as e:
        print(e)

def drop_table(conn, drop_table_sql):
    """ drop a table from the drop_table_sql statement
    :param conn: Connection object
    :param drop_table_sql: a drop TABLE statement
    :return:
    """
    try:
        c = conn.cursor()
        c.execute(drop_table_sql)
    except Error as e:
        print(e)

# build empty tables with correct columns names
def main():		
    database = r"../DataBase/covid-19.db"
    sql_create_monthly_table = """CREATE TABLE monthly(
                                        id INTEGER PRIMARY KEY AUTOINCREMENT, 
                                        Date text NOT NULL,
                                        State text,
                                        Total_cases float,
                                        Total_death float,
                                        Total_recovered float,
                                        Death_percent float,
                                        Recovery_percent float
                                    ); """

    sql_drop_monthly_table = """DROP TABLE if exists monthly;"""

    sql_create_infection_table = """CREATE TABLE infection(
                                        id INTEGER PRIMARY KEY AUTOINCREMENT, 
                                        Date text NOT NULL,
                                        State text,
                                        Total_cases float,
                                        Total_death float,
                                        Total_recovered float,
                                        Death_percent float,
                                        Recovery_percent float
                                    ); """

    sql_drop_infection_table = """DROP TABLE if exists infection;"""

    sql_create_vaccine_table = """CREATE TABLE vaccine(
                                        id INTEGER PRIMARY KEY AUTOINCREMENT, 
                                        Date text NOT NULL,
                                        State text,
                                        Distributed float,
                                        Dist_per_100k float,
                                        Administered float,
                                        Admin_per_100k float
                                    ); """

    sql_drop_vaccine_table = """DROP TABLE if exists vaccine;"""
    
    # sql_create_states_table = """CREATE TABLE states(
    #                                     id INTEGER PRIMARY KEY AUTOINCREMENT,
    #                                     State text,
    #                                     Total_cases float,
    #                                     Case_rate_per_100k float,
    #                                     Total_deaths float,
    #                                     Death_rate_per_100k float,
    #                                     Lat float,
    #                                     Long float,
    #                                     Death_percent float,
    #                                     Recovery_percent float
    #                                 );"""

    # sql_drop_states_table = """DROP TABLE if exists states;"""    

    # sql_create_world_table = """CREATE TABLE world(
    #                                     id INTEGER PRIMARY KEY AUTOINCREMENT,
    #                                     Country text,
    #                                     Lat float,
    #                                     Long float,
    #                                     Confirmed float,
    #                                     Deaths float,
    #                                     Recovered float,
    #                                     Recovery_percent float,
    #                                     Death_percent float
    #                                 );"""

    # sql_drop_world_table = """DROP TABLE if exists world;"""

    # create a database connection
    conn = create_connection(database)
    
    # create tables
    if conn is not None:
        # create covid_monthly table
        drop_table(conn, sql_drop_monthly_table)
        create_table(conn, sql_create_monthly_table)

        # create covid_infection table
        drop_table(conn, sql_drop_infection_table)
        create_table(conn, sql_create_infection_table)

        # create covid_vaccine table
        drop_table(conn, sql_drop_vaccine_table)
        create_table(conn, sql_create_vaccine_table)

        # create covid_states table
        # drop_table(conn, sql_drop_states_table)
        # create_table(conn, sql_create_states_table)

        #create covid_world table
        # drop_table(conn, sql_drop_world_table)
        # create_table(conn, sql_create_world_table)
        
    # error handling
    else:
        print("Error! cannot create the database connection.")
if __name__ == '__main__':
    main()

In [21]:
# create engine connection to database
engine = create_engine('sqlite:///../DataBase/covid-19.db')

# # populate tables with information from dataframes
covid_monthly_df.to_sql(name='monthly', con=engine, if_exists = "append", index=False)
covid_infec_df.to_sql(name='infection', con=engine, if_exists = "append", index=False)
covid_vac_df.to_sql(name='vaccine', con=engine, if_exists = "append", index=False)
# world_covid_df.to_sql(name='world', con=engine, if_exists = "append", index= False)
# us_covid_combine_df.to_sql(name='states', con=engine, if_exists = "append", index=False)

In [22]:
# querry monthly data from database. to test and make sure tables are working
pd.read_sql('select * from monthly', engine)

Unnamed: 0,id,Date,State,Total_cases,Total_death,Total_recovered,Death_percent,Recovery_percent
0,1,2020-01,AK,0.0,0.0,0.0,0.00,0.00
1,2,2020-01,AL,0.0,0.0,0.0,0.00,0.00
2,3,2020-01,AR,0.0,0.0,0.0,0.00,0.00
3,4,2020-01,AS,0.0,0.0,0.0,0.00,0.00
4,5,2020-01,AZ,1.0,0.0,1.0,0.00,100.00
...,...,...,...,...,...,...,...,...
1795,1796,2022-06,VT,124290.0,641.0,123649.0,0.52,99.48
1796,1797,2022-06,WA,1605181.0,13020.0,1592161.0,0.81,99.19
1797,1798,2022-06,WI,1691977.0,14671.0,1677306.0,0.87,99.13
1798,1799,2022-06,WV,520896.0,6997.0,513899.0,1.34,98.66


In [23]:
pd.read_sql('select * from vaccine', engine)

Unnamed: 0,id,Date,State,Distributed,Dist_per_100k,Administered,Admin_per_100k
0,1,2022-06,PW,46290.0,214982.0,48164.0,223686.0
1,2,2022-06,US,754073055.0,227124.0,590076511.0,177729.0
2,3,2022-06,SC,10925375.0,212196.0,7691681.0,149390.0
3,4,2022-06,DD2,6768210.0,0.0,7560317.0,0.0
4,5,2022-06,GA,22538695.0,212280.0,15235731.0,143497.0
...,...,...,...,...,...,...,...
61,62,2022-06,AR,6429680.0,213058.0,4324520.0,143300.0
62,63,2022-06,TX,63769535.0,219926.0,47788655.0,164812.0
63,64,2022-06,LA,8524350.0,183367.0,6358128.0,136769.0
64,65,2022-02,RP,43990.0,204301.0,44526.0,206790.0


In [56]:
pd.read_sql('select * from infection', engine)

Unnamed: 0,id,Date,State,Total_cases,Total_death,Total_recovered,Death_percent,Recovery_percent
0,1,2022-03,AK,235505.0,1168.0,234337.0,0.5,99.5
1,2,2022-03,AL,1289351.0,18890.0,1270461.0,1.47,98.53
2,3,2022-03,AR,825653.0,10864.0,814789.0,1.32,98.68
3,4,2022-03,AS,443.0,0.0,443.0,0.0,100.0
4,5,2022-03,AZ,1987318.0,28090.0,1959228.0,1.41,98.59
5,6,2022-03,CA,9019412.0,86387.0,8933025.0,0.96,99.04
6,7,2022-03,CO,1325063.0,11840.0,1313224.0,0.89,99.11
7,8,2022-03,CT,729697.0,10648.0,719049.0,1.46,98.54
8,9,2022-03,DC,135223.0,1326.0,133897.0,0.98,99.02
9,10,2022-03,DE,258024.0,2753.0,255271.0,1.07,98.93


In [45]:
# make an API call to make sure Flask app is working 
import requests
url="http://127.0.0.1:5000/api/v1.0/monthly"
resp=requests.get(url)
resp

ConnectionError: HTTPConnectionPool(host='127.0.0.1', port=5000): Max retries exceeded with url: /api/v1.0/monthly (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x0000022F371938D0>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it',))

In [None]:
data = resp.json()
data