### Links To CDC for Raw Data

## Covid-19 Infection Data
### url-1: https://data.cdc.gov/Case-Surveillance/United-States-COVID-19-Cases-and-Deaths-by-State-o/9mfq-cb36

## Covid-19 Vaccine Distribution
### url-2: https://data.cdc.gov/Vaccinations/COVID-19-Vaccinations-in-the-United-States-Jurisdi/unsk-b7fc

In [1]:
# import dependencies
import pandas as pd
import datetime as dt
import sqlite3
from sqlite3 import Error
from sqlalchemy import create_engine, inspect, func
import psycopg2
import requests
from sodapy import Socrata

# import cdc key for API
from cdc_token import my_token

## USA Monthly Cases by State 

In [2]:
# set a client with a token
client = Socrata("data.cdc.gov", my_token)

# results returned as JSON from API / converted to Python list of
# dictionaries by sodapy.
results = client.get("9mfq-cb36", limit=50000)

# Convert to pandas DataFrame and display
covid_df = pd.DataFrame.from_records(results)
covid_df.tail()

Unnamed: 0,submission_date,state,tot_cases,conf_cases,prob_cases,new_case,pnew_case,tot_death,conf_death,prob_death,new_death,pnew_death,created_at,consent_cases,consent_deaths
44035,2021-11-09T00:00:00.000,MN,826404,,,7165.0,1408,9006,8365.0,641.0,20.0,4,2021-11-10T15:25:35.567,,Agree
44036,2020-07-19T00:00:00.000,NJ,178541,,,-10.0,21,15706,13732.0,1974.0,34.0,27,2020-07-19T00:00:00.000,Not agree,Agree
44037,2021-02-10T00:00:00.000,FSM,1,1.0,0.0,0.0,0,0,0.0,0.0,0.0,0,2021-02-11T14:50:55.787,Agree,Agree
44038,2022-01-12T00:00:00.000,WY,123743,97745.0,25998.0,989.0,246,1588,1588.0,0.0,0.0,0,2022-01-13T14:34:51.057,Agree,Agree
44039,2020-06-25T00:00:00.000,SC,28858,28772.0,86.0,1303.0,17,686,,,10.0,0,2020-06-25T00:00:00.000,Agree,Not agree


In [3]:
# add date columns which only has year and month for use with groupby later on.. 
covid_df['date'] = pd.to_datetime(covid_df['submission_date']).dt.to_period('M').astype(str)
covid_df.tail()

Unnamed: 0,submission_date,state,tot_cases,conf_cases,prob_cases,new_case,pnew_case,tot_death,conf_death,prob_death,new_death,pnew_death,created_at,consent_cases,consent_deaths,date
44035,2021-11-09T00:00:00.000,MN,826404,,,7165.0,1408,9006,8365.0,641.0,20.0,4,2021-11-10T15:25:35.567,,Agree,2021-11
44036,2020-07-19T00:00:00.000,NJ,178541,,,-10.0,21,15706,13732.0,1974.0,34.0,27,2020-07-19T00:00:00.000,Not agree,Agree,2020-07
44037,2021-02-10T00:00:00.000,FSM,1,1.0,0.0,0.0,0,0,0.0,0.0,0.0,0,2021-02-11T14:50:55.787,Agree,Agree,2021-02
44038,2022-01-12T00:00:00.000,WY,123743,97745.0,25998.0,989.0,246,1588,1588.0,0.0,0.0,0,2022-01-13T14:34:51.057,Agree,Agree,2022-01
44039,2020-06-25T00:00:00.000,SC,28858,28772.0,86.0,1303.0,17,686,,,10.0,0,2020-06-25T00:00:00.000,Agree,Not agree,2020-06


In [4]:
covid_df.dtypes

submission_date    object
state              object
tot_cases          object
conf_cases         object
prob_cases         object
new_case           object
pnew_case          object
tot_death          object
conf_death         object
prob_death         object
new_death          object
pnew_death         object
created_at         object
consent_cases      object
consent_deaths     object
date               object
dtype: object

In [5]:
# convert objects to float
covid_df["tot_cases"] =  covid_df.tot_cases.astype(float)
covid_df["tot_death"] = covid_df.tot_death.astype(float)

# select only the columns we gonna use and display df
covid_df = covid_df[["date", "state", "tot_cases", "tot_death"]]
covid_df.head()

Unnamed: 0,date,state,tot_cases,tot_death
0,2020-06,AL,23339.0,1041.0
1,2021-09,ND,118491.0,1562.0
2,2021-08,MD,473969.0,9881.0
3,2020-05,VT,855.0,52.0
4,2021-02,IL,1130917.0,21336.0


In [25]:
# covid_test = covid_max.loc[(covid_max['state'] == 'TX')]
# covid_test.head(60)

In [6]:
# group by states to be used later
state_df = covid_df.groupby('state').mean().reset_index()
state_df.head()

Unnamed: 0,state,tot_cases,tot_death
0,AK,51420.832425,290.43733
1,AL,387665.171662,7863.705722
2,AR,243619.495913,3921.352861
3,AS,1.839237,0.0
4,AZ,592321.30654,11222.790191


In [7]:
# add total recovery columns to df
covid_df["Total_recovered"] = covid_df.tot_cases - covid_df.tot_death
covid_df.tail()

Unnamed: 0,date,state,tot_cases,tot_death,Total_recovered
44035,2021-11,MN,826404.0,9006.0,817398.0
44036,2020-07,NJ,178541.0,15706.0,162835.0
44037,2021-02,FSM,1.0,0.0,1.0
44038,2022-01,WY,123743.0,1588.0,122155.0
44039,2020-06,SC,28858.0,686.0,28172.0


In [8]:
# groupby date and state and take the mean value to get monthly average..
covid_monthly_df = round(covid_df.groupby(["date", "state"]).max().reset_index(), 2)
covid_monthly_df

Unnamed: 0,date,state,tot_cases,tot_death,Total_recovered
0,2020-01,AK,0.0,0.0,0.0
1,2020-01,AL,0.0,0.0,0.0
2,2020-01,AR,0.0,0.0,0.0
3,2020-01,AS,0.0,0.0,0.0
4,2020-01,AZ,1.0,0.0,1.0
...,...,...,...,...,...
1495,2022-01,VT,92148.0,496.0,91652.0
1496,2022-01,WA,1203311.0,10458.0,1192853.0
1497,2022-01,WI,1470412.0,12010.0,1458402.0
1498,2022-01,WV,422265.0,5645.0,416620.0


In [9]:
# rename columns. name gotta be compatable wth sqlite database naming potocol.
covid_monthly_df = covid_monthly_df.rename(columns={"date": "Date", "state": "State", "tot_cases": "Total_cases", "tot_death": "Total_death"})
covid_monthly_df.head()

Unnamed: 0,Date,State,Total_cases,Total_death,Total_recovered
0,2020-01,AK,0.0,0.0,0.0
1,2020-01,AL,0.0,0.0,0.0
2,2020-01,AR,0.0,0.0,0.0
3,2020-01,AS,0.0,0.0,0.0
4,2020-01,AZ,1.0,0.0,1.0


In [10]:
# add death and recovery percentage columns. change NA values to zero
covid_monthly_df['Death_percent'] = round((covid_monthly_df.Total_death/covid_monthly_df.Total_cases)*100, 2)
covid_monthly_df['Recovery_percent'] = round((covid_monthly_df.Total_recovered/covid_monthly_df.Total_cases)*100, 2)
covid_monthly_df=covid_monthly_df.fillna(0)
covid_monthly_df.head()

Unnamed: 0,Date,State,Total_cases,Total_death,Total_recovered,Death_percent,Recovery_percent
0,2020-01,AK,0.0,0.0,0.0,0.0,0.0
1,2020-01,AL,0.0,0.0,0.0,0.0,0.0
2,2020-01,AR,0.0,0.0,0.0,0.0,0.0
3,2020-01,AS,0.0,0.0,0.0,0.0,0.0
4,2020-01,AZ,1.0,0.0,1.0,0.0,100.0


In [11]:
# save df as a csv file
covid_monthly_df.to_csv('../Data/monthly.csv')

## USA Covid-19 Vaccine Status by States

In [12]:
# set a client with a token
client = Socrata("data.cdc.gov", my_token)

# results returned as JSON from API / converted to Python list of
# dictionaries by sodapy.
results = client.get("unsk-b7fc", limit=50000)

# Convert to pandas DataFrame and display
covid_vac_df = pd.DataFrame.from_records(results)
covid_vac_df.head()

Unnamed: 0,date,mmwr_week,location,distributed,distributed_janssen,distributed_moderna,distributed_pfizer,distributed_unk_manuf,dist_per_100k,distributed_per_100k_12plus,...,administered_dose1_recip_5pluspop_pct,series_complete_5plus,series_complete_5pluspop_pct,administered_5plus,admin_per_100k_5plus,distributed_per_100k_5plus,series_complete_moderna_5plus,series_complete_pfizer_5plus,series_complete_janssen_5plus,series_complete_unk_manuf_5plus
0,2022-01-25T00:00:00.000,4,CO,11426955,482600,4209500,6734855,0,198428,231499,...,81.8,3912959,72.1,9978644,183886,210576,1405849,2198699,306058,2353
1,2022-01-25T00:00:00.000,4,GA,19734215,806700,7575160,11352355,0,185866,219559,...,67.6,5588867,56.1,13942036,139968,198118,2104040,3162622,311432,10773
2,2022-01-25T00:00:00.000,4,DC,1702685,69100,601860,1031725,0,241259,279241,...,95.0,491663,74.5,1358543,205721,257834,170576,282410,38310,367
3,2022-01-25T00:00:00.000,4,NM,4076715,185500,1588940,2302275,0,194423,228077,...,89.2,1426781,72.2,3621979,183313,206328,546378,771290,107054,2059
4,2022-01-25T00:00:00.000,4,IA,6009475,283500,2271140,3454835,0,190470,224546,...,70.8,1897842,64.1,4808116,162467,203062,702302,1034999,160244,297


In [13]:
covid_vac_df.dtypes

date                               object
mmwr_week                          object
location                           object
distributed                        object
distributed_janssen                object
                                    ...  
distributed_per_100k_5plus         object
series_complete_moderna_5plus      object
series_complete_pfizer_5plus       object
series_complete_janssen_5plus      object
series_complete_unk_manuf_5plus    object
Length: 80, dtype: object

In [14]:
# add date columns which only has year and month for use with groupby later on.. 
covid_vac_df['Date'] = pd.to_datetime(covid_vac_df['date']).dt.to_period('M').astype(str)
covid_vac_df.tail()

Unnamed: 0,date,mmwr_week,location,distributed,distributed_janssen,distributed_moderna,distributed_pfizer,distributed_unk_manuf,dist_per_100k,distributed_per_100k_12plus,...,series_complete_5plus,series_complete_5pluspop_pct,administered_5plus,admin_per_100k_5plus,distributed_per_100k_5plus,series_complete_moderna_5plus,series_complete_pfizer_5plus,series_complete_janssen_5plus,series_complete_unk_manuf_5plus,Date
26387,2020-12-13T00:00:00.000,51,GU,3900,0,0,0,0,2353,0,...,0,0,0,0,0,,,,,2020-12
26388,2020-12-13T00:00:00.000,51,LTC,0,0,0,0,0,0,0,...,0,0,0,0,0,,,,,2020-12
26389,2020-12-13T00:00:00.000,51,MP,4875,0,0,0,0,8570,0,...,0,0,0,0,0,,,,,2020-12
26390,2020-12-13T00:00:00.000,51,VI,975,0,0,0,0,931,0,...,0,0,0,0,0,,,,,2020-12
26391,2020-12-13T00:00:00.000,51,AS,3900,0,0,0,0,7003,0,...,0,0,0,0,0,,,,,2020-12


In [15]:
# convert objects to float
covid_vac_df["distributed"] =  covid_vac_df.distributed.astype(float)
covid_vac_df["dist_per_100k"] = covid_vac_df.dist_per_100k.astype(float)
covid_vac_df["administered"] =  covid_vac_df.administered.astype(float)
covid_vac_df["admin_per_100k"] = covid_vac_df.admin_per_100k.astype(float)

# select only the columns we gonna use and display df
covid_vac_df = covid_vac_df[["Date", "location", "distributed", "dist_per_100k", "administered", "admin_per_100k"]]
covid_vac_df.head()

Unnamed: 0,Date,location,distributed,dist_per_100k,administered,admin_per_100k
0,2022-01,CO,11426955.0,198428.0,9979697.0,173297.0
1,2022-01,GA,19734215.0,185866.0,13943859.0,131330.0
2,2022-01,DC,1702685.0,241259.0,1358690.0,192517.0
3,2022-01,NM,4076715.0,194423.0,3622196.0,172746.0
4,2022-01,IA,6009475.0,190470.0,4808223.0,152397.0


In [16]:
# rename columns. name gotta be compatable wth sqlite database naming potocol.
covid_vac_df = covid_vac_df.rename(columns={"location": "State", "distributed": "Distributed", "dist_per_100k": "Dist_per_100k", "administered": "Administered", "admin_per_100k": "Admin_per_100k"})
covid_vac_df.tail()

Unnamed: 0,Date,State,Distributed,Dist_per_100k,Administered,Admin_per_100k
26387,2020-12,GU,3900.0,2353.0,0.0,0.0
26388,2020-12,LTC,0.0,0.0,0.0,0.0
26389,2020-12,MP,4875.0,8570.0,0.0,0.0
26390,2020-12,VI,975.0,931.0,0.0,0.0
26391,2020-12,AS,3900.0,7003.0,0.0,0.0


In [18]:
# since data is aggregated using rolling average, we only goinog to keep first duplicates of each month.
covid_vac_df = covid_vac_df.drop_duplicates(subset=['State'], keep='first', inplace=False, ignore_index=True)
covid_vac_df.head()

Unnamed: 0,Date,State,Distributed,Dist_per_100k,Administered,Admin_per_100k
0,2022-01,CO,11426955.0,198428.0,9979697.0,173297.0
1,2022-01,GA,19734215.0,185866.0,13943859.0,131330.0
2,2022-01,DC,1702685.0,241259.0,1358690.0,192517.0
3,2022-01,NM,4076715.0,194423.0,3622196.0,172746.0
4,2022-01,IA,6009475.0,190470.0,4808223.0,152397.0


In [19]:
covid_vac_df.dtypes

Date               object
State              object
Distributed       float64
Dist_per_100k     float64
Administered      float64
Admin_per_100k    float64
dtype: object

In [20]:
# save df as a csv file
covid_vac_df.to_csv('../Data/vaccine.csv')

## USA Total Cases and Death by State

In [21]:
# upload data file and read it ito a pandas dataframe
# file = "../Data/US_COVID-19_Deaths.csv"
# covid_stats_df = pd.read_csv(file)
# covid_stats_df.head()

In [22]:
# select relevant columns only
# covid_stats_df = covid_stats_df[['State/Territory', 'Total Cases', 'Case Rate per 100000', 'Total Deaths', 'Death Rate per 100000']]
# covid_stats_df.head()

In [23]:
# add a state abbreviation column
# covid_stats_df['State'] = state_df['state']
# covid_stats_df.head()

In [24]:
# rename columns and drop any column with NA
# covid_stats_df = covid_stats_df.rename(columns={"state": "State", "Total Cases": "Total_cases", "Case Rate per 100000": "Case_rate_per_100k", "Total Deaths": "Total_deaths", "Death Rate per 100000": "Death_rate_per_100k"})
# covid_stats_df = covid_stats_df[["State", "Total_cases", "Case_rate_per_100k", "Total_deaths", "Death_rate_per_100k"]].fillna(0)
# covid_stats_df.tail()

## USA COVID-19 data with latitude and longtitude to create interactive map

In [25]:
# upload file and read to pandas dataframe
# file = "../Data/US_COVID-19_stats.csv"
# us_covid_df = pd.read_csv(file)
# us_covid_df.head()

In [26]:
# select relevant columns
# us_covid_df = us_covid_df[["Province_State", "Lat", "Long_", "Confirmed", "Deaths", "Recovered", "Mortality_Rate"]]
# us_covid_df.head()

In [27]:
# add state column to be filled with state abbreviation
# us_covid_df["State"] = "" 
# us_covid_df.head()

In [28]:
# # upload States and their abbreviation
# us_state_abbrev = {
    
#     'Alabama': 'AL',
#     'Alaska': 'AK',
#     'American Samoa': 'AS',
#     'Arizona': 'AZ',
#     'Arkansas': 'AR',
#     'California': 'CA',
#     'Colorado': 'CO',
#     'Connecticut': 'CT',
#     'Delaware': 'DE',
#     'Diamond Princess': 'DP',
#     'District of Columbia': 'DC',
#     'Florida': 'FL',
#     'Georgia': 'GA',
#     'Grand Princess': 'GP',
#     'Guam': 'GU',
#     'Hawaii': 'HI',
#     'Idaho': 'ID',
#     'Illinois': 'IL',
#     'Indiana': 'IN',
#     'Iowa': 'IA',
#     'Kansas': 'KS',
#     'Kentucky': 'KY',
#     'Louisiana': 'LA',
#     'Maine': 'ME',
#     'Maryland': 'MD',
#     'Massachusetts': 'MA',
#     'Michigan': 'MI',
#     'Minnesota': 'MN',
#     'Mississippi': 'MS',
#     'Missouri': 'MO',
#     'Montana': 'MT',
#     'Nebraska': 'NE',
#     'Nevada': 'NV',
#     'New Hampshire': 'NH',
#     'New Jersey': 'NJ',
#     'New Mexico': 'NM',
#     'New York': 'NY',
#     'North Carolina': 'NC',
#     'North Dakota': 'ND',
#     'Northern Mariana Islands':'MP',
#     'Ohio': 'OH',
#     'Oklahoma': 'OK',
#     'Oregon': 'OR',
#     'Pennsylvania': 'PA',
#     'Puerto Rico': 'PR',
#     'Rhode Island': 'RI',
#     'South Carolina': 'SC',
#     'South Dakota': 'SD',
#     'Tennessee': 'TN',
#     'Texas': 'TX',
#     'Utah': 'UT',
#     'Vermont': 'VT',
#     'Virgin Islands': 'VI',
#     'Virginia': 'VA',
#     'Washington': 'WA',
#     'West Virginia': 'WV',
#     'Wisconsin': 'WI',
#     'Wyoming': 'WY'
# }
# abbrev_us_state = dict(map(reversed, us_state_abbrev.items()))

In [29]:
# loop through and assign abbreviation acoordinely
# for index, row in us_covid_df.iterrows():
#     us_covid_df.loc[index, "State"] = us_state_abbrev[row[0]]

# us_covid_df.head()

In [None]:
# loop through and assign abbreviation acoordinely
# for index, row in us_covid_df.iterrows():
#     if row["State"] == "DP":
#         us_covid_df.loc[index, "Lat"] = 14.5214
#         us_covid_df.loc[index, "Long_"] = 120.9709

#     elif row["State"] == "GP":
#         us_covid_df.loc[index, "Lat"] = 32.1584
#         us_covid_df.loc[index, "Long_"] = 117.5676

    # if row["State"] == FSM:
    #     us_covid_df.loc[index, "Lat"] = 6.8874
    #     us_covid_df.loc[index, "Long"] = 158.2150

    # elif row["State"] == NYC:
    #     us_covid_df.loc[index, "Lat"] = 40.7128
    #     us_covid_df.loc[index, "Long"] = -74.0060

    # elif row["State"] == PW:
    #     us_covid_df.loc[index, "Lat"] = 7.5150
    #     us_covid_df.loc[index, "Long"] = 134.5825

    # elif row["State"] == RMI:
    #     us_covid_df.loc[index, "Lat"] = 7.1315
    #     us_covid_df.loc[index, "Long"] = 171.1845

# us_covid_df.head()

In [30]:
# rename columns and add recovery percent column
# us_covid_df = us_covid_df.rename(columns={"Long_": "Long", "Mortality_Rate":"Death_percent"})
# us_covid_df["Recovery_percent"]=us_covid_df.Recovered/us_covid_df.Confirmed
# us_covid_df = us_covid_df[["State", "Lat", "Long", "Death_percent", "Recovery_percent"]].fillna(0)
# us_covid_df.head()

In [32]:
# loop through and assign abbreviation acoordinely
# for index, row in us_covid_df.iterrows():
#     if row["State"] == "DP":
#         us_covid_df.loc[index, "Lat"] = 14.5214
#         us_covid_df.loc[index, "Long"] = 120.9709

#     elif row["State"] == "GP":
#         us_covid_df.loc[index, "Lat"] = 32.1584
#         us_covid_df.loc[index, "Long"] = 117.5676

    # elif row["State"] == FSM:
    #     us_covid_df.loc[index, "Lat"] = 6.8874
    #     us_covid_df.loc[index, "Long"] = 158.2150

    # elif row["State"] == NYC:
    #     us_covid_df.loc[index, "Lat"] = 40.7128
    #     us_covid_df.loc[index, "Long"] = -74.0060

    # elif row["State"] == PW:
    #     us_covid_df.loc[index, "Lat"] = 7.5150
    #     us_covid_df.loc[index, "Long"] = 134.5825

    # elif row["State"] == RMI:
    #     us_covid_df.loc[index, "Lat"] = 7.1315
    #     us_covid_df.loc[index, "Long"] = 171.1845

# us_covid_df.head()

In [33]:
# merge two df into one dataframe using left joint.
# us_covid_combine_df = pd.merge(covid_stats_df, us_covid_df, how='left', on=['State','State'])
# us_covid_combine_df.head()

In [34]:
# save to a csv file
# us_covid_combine_df.to_csv('../Data/states.csv')

## Add Geo property to states in old state file

In [35]:
# Add Lat and Long Columns
# geomap_df = covid_monthly_df.groupby('State').mean().reset_index()
# covid_monthly_df["Long"] = ""
# geomap_df.head()

## World COVID-19 Data by Country

In [36]:
# upload file and read into a pandas dataframe
# file = "../Data/World_COVID-19_Stats.csv"
# world_covid_df = pd.read_csv(file)
# world_covid_df.head()

In [37]:
# select relevant columns only
# world_covid_df = world_covid_df[["Country_Region", "Lat", "Long_", "Confirmed", "Deaths", "Recovered", "Case-Fatality_Ratio"]]
# world_covid_df.head()

In [38]:
# rename columns and add a recovery percent column
# world_covid_df = world_covid_df.rename(columns={"Long_": "Long", "Case-Fatality_Ratio":"Death_percent", "Country_Region": "Country"})
# world_covid_df["Recovery_percent"]=world_covid_df.Recovered/world_covid_df.Confirmed
# world_covid_df.head()

In [39]:
# add missing geoinfo to country
# for index, row in world_covid_df.iterrows(): 
#     if row["Country"] == "Canada":
#         world_covid_df.loc[index, "Lat"] = 56.1304
#         world_covid_df.loc[index, "Long"] = 106.3468
# world_covid_df

In [40]:
# save file as a csv
# world_covid_df.to_csv('../Data/world.csv')

## build SQlite DataBase

In [41]:
# import modules
import sqlite3
from sqlite3 import Error

# series of functions to  build database tables in sqlite database.

def create_connection(db_file):
    """ create a database connection to the SQLite database
        specified by db_file
    :param db_file: database file
    :return: Connection object or None
    """
    conn = None
    try:
        conn = sqlite3.connect(db_file)
        return conn
    except Error as e:
        print(e)
    return conn
def create_table(conn, create_table_sql):
    """ create a table from the create_table_sql statement
    :param conn: Connection object
    :param create_table_sql: a CREATE TABLE statement
    :return:
    """
    try:
        c = conn.cursor()
        c.execute(create_table_sql)
    except Error as e:
        print(e)

def drop_table(conn, drop_table_sql):
    """ drop a table from the drop_table_sql statement
    :param conn: Connection object
    :param drop_table_sql: a drop TABLE statement
    :return:
    """
    try:
        c = conn.cursor()
        c.execute(drop_table_sql)
    except Error as e:
        print(e)

# build empty tables with correct columns names
def main():		
    database = r"../DataBase/covid-19.db"
    sql_create_monthly_table = """CREATE TABLE monthly(
                                        id INTEGER PRIMARY KEY AUTOINCREMENT, 
                                        Date text NOT NULL,
                                        State text,
                                        Total_cases float,
                                        Total_death float,
                                        Total_recovered float,
                                        Death_percent float,
                                        Recovery_percent float
                                    ); """

    sql_drop_monthly_table = """DROP TABLE if exists monthly;"""

    sql_create_vaccine_table = """CREATE TABLE vaccine(
                                        id INTEGER PRIMARY KEY AUTOINCREMENT, 
                                        Date text NOT NULL,
                                        State text,
                                        Distributed float,
                                        Dist_per_100k float,
                                        Administered float,
                                        Admin_per_100k float
                                    ); """

    sql_drop_vaccine_table = """DROP TABLE if exists vaccine;"""
    
    # sql_create_states_table = """CREATE TABLE states(
    #                                     id INTEGER PRIMARY KEY AUTOINCREMENT,
    #                                     State text,
    #                                     Total_cases float,
    #                                     Case_rate_per_100k float,
    #                                     Total_deaths float,
    #                                     Death_rate_per_100k float,
    #                                     Lat float,
    #                                     Long float,
    #                                     Death_percent float,
    #                                     Recovery_percent float
    #                                 );"""

    # sql_drop_states_table = """DROP TABLE if exists states;"""    

    # sql_create_world_table = """CREATE TABLE world(
    #                                     id INTEGER PRIMARY KEY AUTOINCREMENT,
    #                                     Country text,
    #                                     Lat float,
    #                                     Long float,
    #                                     Confirmed float,
    #                                     Deaths float,
    #                                     Recovered float,
    #                                     Recovery_percent float,
    #                                     Death_percent float
    #                                 );"""

    # sql_drop_world_table = """DROP TABLE if exists world;"""

    # create a database connection
    conn = create_connection(database)
    
    # create tables
    if conn is not None:
        # create covid_monthly table
        drop_table(conn, sql_drop_monthly_table)
        create_table(conn, sql_create_monthly_table)

        # create covid_vaccine table
        drop_table(conn, sql_drop_vaccine_table)
        create_table(conn, sql_create_vaccine_table)

        # create covid_states table
        # drop_table(conn, sql_drop_states_table)
        # create_table(conn, sql_create_states_table)

        #create covid_world table
        # drop_table(conn, sql_drop_world_table)
        # create_table(conn, sql_create_world_table)
        
    # error handling
    else:
        print("Error! cannot create the database connection.")
if __name__ == '__main__':
    main()

In [42]:
# create engine connection to database
engine = create_engine('sqlite:///../DataBase/covid-19.db')

# # populate tables with information from dataframes
covid_monthly_df.to_sql(name='monthly', con=engine, if_exists = "append", index=False)
covid_vac_df.to_sql(name='vaccine', con=engine, if_exists = "append", index=False)
# world_covid_df.to_sql(name='world', con=engine, if_exists = "append", index= False)
# us_covid_combine_df.to_sql(name='states', con=engine, if_exists = "append", index=False)

In [43]:
# querry monthly data from database. to test and make sure tables are working
pd.read_sql('select * from monthly', engine)

Unnamed: 0,id,Date,State,Total_cases,Total_death,Total_recovered,Death_percent,Recovery_percent
0,1,2020-01,AK,0.0,0.0,0.0,0.00,0.00
1,2,2020-01,AL,0.0,0.0,0.0,0.00,0.00
2,3,2020-01,AR,0.0,0.0,0.0,0.00,0.00
3,4,2020-01,AS,0.0,0.0,0.0,0.00,0.00
4,5,2020-01,AZ,1.0,0.0,1.0,0.00,100.00
...,...,...,...,...,...,...,...,...
1495,1496,2022-01,VT,92148.0,496.0,91652.0,0.54,99.46
1496,1497,2022-01,WA,1203311.0,10458.0,1192853.0,0.87,99.13
1497,1498,2022-01,WI,1470412.0,12010.0,1458402.0,0.82,99.18
1498,1499,2022-01,WV,422265.0,5645.0,416620.0,1.34,98.66


In [44]:
pd.read_sql('select * from vaccine', engine)

Unnamed: 0,id,Date,State,Distributed,Dist_per_100k,Administered,Admin_per_100k
0,1,2022-01,CO,11426955.0,198428.0,9979697.0,173297.0
1,2,2022-01,GA,19734215.0,185866.0,13943859.0,131330.0
2,3,2022-01,DC,1702685.0,241259.0,1358690.0,192517.0
3,4,2022-01,NM,4076715.0,194423.0,3622196.0,172746.0
4,5,2022-01,IA,6009475.0,190470.0,4808223.0,152397.0
...,...,...,...,...,...,...,...
60,61,2022-01,LA,7501190.0,161358.0,5889679.0,126693.0
61,62,2022-01,WV,3702825.0,206614.0,2578513.0,143878.0
62,63,2022-01,HI,3188520.0,225198.0,2684950.0,189632.0
63,64,2022-01,OR,9179145.0,217632.0,7196203.0,170618.0


In [45]:
# make an API call to make sure Flask app is working 
import requests
url="http://127.0.0.1:5000/api/v1.0/monthly"
resp=requests.get(url)
resp

ConnectionError: HTTPConnectionPool(host='127.0.0.1', port=5000): Max retries exceeded with url: /api/v1.0/monthly (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x0000022F371938D0>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it',))

In [None]:
data = resp.json()
data