In [1]:
import requests
import numpy as np
import pandas as pd
import sqlite3

In [2]:
class Covid19:
    def get_lookup_table(self):
        uid_iso_fips_lookup_table = pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/UID_ISO_FIPS_LookUp_Table.csv")
        uid_iso_fips_lookup_table['Country_Region'] = uid_iso_fips_lookup_table['Country_Region'].str.replace('*', '')
        uid_iso_fips_lookup_table['Combined_Key'] = uid_iso_fips_lookup_table['Combined_Key'].str.replace('*', '')
        uid_iso_fips_lookup_table['Population'] = uid_iso_fips_lookup_table['Population'].astype('Int64')
        split_series = uid_iso_fips_lookup_table['Combined_Key'].str.split(', ')
        counties = []
        states = []
        for lst in split_series:
            if len(lst) == 1:
                counties.append(np.nan)
                states.append(np.nan)
            elif len(lst) == 2:
                counties.append(np.nan)
                states.append(lst[0])
            elif len(lst) == 3:
                counties.append(lst[0])
                states.append(lst[1])
        uid_iso_fips_lookup_table['Admin2'] = counties
        uid_iso_fips_lookup_table['Province_State'] = states
        uid_iso_fips_lookup_table = uid_iso_fips_lookup_table[['UID', 'Combined_Key',
                                                               'iso2', 'iso3',
                                                               'Country_Region', 'Province_State', 'Admin2',
                                                               'Lat', 'Long_', 'Population']]
        return uid_iso_fips_lookup_table

    def get_daily_report(self, report_date):
        self._report_date = report_date
        try:
            daily_report = pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/{}.csv".format(report_date))
            daily_report['Country_Region'] = daily_report['Country_Region'].str.replace('*', '')
            daily_report['Combined_Key'] = daily_report['Combined_Key'].str.replace('*', '')
            daily_report = daily_report.drop(labels=['Active', 'Lat', 'Long_', 'FIPS', 'Admin2', 'Province_State', 'Country_Region'], axis=1)
            return daily_report[['Combined_Key', 'Last_Update', 'Confirmed', 'Deaths', 'Recovered', 'Incident_Rate', 'Case_Fatality_Ratio']]
        except:
            print("Wrong format or unavailable report date: {}.".format(report_date))
            print("Expecting mm-dd-yyyy format.")
    def get_time_series(self):
        time_series_confirmed = pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv")
        time_series_deathes = pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv")
        #time_series_recovered = pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv")
        time_series_confirmed['Province/State'] = time_series_confirmed['Province/State'].fillna(time_series_confirmed['Country/Region'])
        time_series_deathes['Province/State'] = time_series_deathes['Province/State'].fillna(time_series_deathes['Country/Region'])
        #time_series_recovered['Province/State'] = time_series_recovered['Province/State'].fillna(time_series_recovered['Country/Region'])
        time_series_confirmed = time_series_confirmed.drop(labels=['Lat', 'Long'], axis=1)
        time_series_deathes = time_series_deathes.drop(labels=['Lat', 'Long'], axis=1)
        #time_series_recovered = time_series_recovered.drop(labels=['Lat', 'Long'], axis=1)
        time_series_confirmed_long = pd.melt(time_series_confirmed, id_vars=['Province/State', 'Country/Region'], var_name='Date', value_name='Confirmed')
        time_series_deathes_long = pd.melt(time_series_deathes, id_vars=['Province/State', 'Country/Region'], var_name='Date', value_name='Deaths')
        #time_series_recovered_long = pd.melt(time_series_recovered, id_vars=['Province/State', 'Country/Region'], var_name='Date', value_name='Recovered')
        time_series = time_series_confirmed_long
        time_series['Deaths'] = time_series_deathes_long['Deaths']
        #time_series['Recovered'] = time_series_recovered_long['Recovered']
        time_series['Date'] = pd.to_datetime(time_series['Date'])
        time_series = time_series[time_series['Date'] <= pd.to_datetime(self._report_date)]
        date_series = time_series['Date'].dt.strftime('%Y-%m-%d')
        time_series = time_series.drop('Date', axis=1)
        time_series['Date'] = date_series
        time_series['Country/Region'] = time_series['Country/Region'].str.replace('*', '')
        groupby_date_country = time_series.groupby(['Date', 'Country/Region'])
        time_series = groupby_date_country[['Confirmed', 'Deaths']].sum().reset_index()
        time_series.columns = ['Date', 'Country_Region', 'Confirmed', 'Deaths']
        confirmed_shifted = time_series.groupby('Country_Region')['Confirmed'].shift(1, fill_value=0)
        deaths_shifted = time_series.groupby('Country_Region')['Deaths'].shift(1, fill_value=0)
        daily_cases = time_series['Confirmed'] - confirmed_shifted
        daily_deaths = time_series['Deaths'] - deaths_shifted
        n_cols = time_series.shape[1]
        time_series.insert(n_cols, 'Daily_Cases', daily_cases)
        n_cols = time_series.shape[1]
        time_series.insert(n_cols, 'Daily_Deaths', daily_deaths)
        return time_series

In [3]:
covid19 = Covid19()
lookup_table = covid19.get_lookup_table()
daily_report = covid19.get_daily_report('03-31-2021')
time_series = covid19.get_time_series()

In [4]:
lookup_table

Unnamed: 0,UID,Combined_Key,iso2,iso3,Country_Region,Province_State,Admin2,Lat,Long_,Population
0,4,Afghanistan,AF,AFG,Afghanistan,,,33.939110,67.709953,38928341
1,8,Albania,AL,ALB,Albania,,,41.153300,20.168300,2877800
2,12,Algeria,DZ,DZA,Algeria,,,28.033900,1.659600,43851043
3,20,Andorra,AD,AND,Andorra,,,42.506300,1.521800,77265
4,24,Angola,AO,AGO,Angola,,,-11.202700,17.873900,32866268
...,...,...,...,...,...,...,...,...,...,...
4170,84056037,"Sweetwater, Wyoming, US",US,USA,US,Wyoming,Sweetwater,41.659439,-108.882788,42343
4171,84056039,"Teton, Wyoming, US",US,USA,US,Wyoming,Teton,43.935225,-110.589080,23464
4172,84056041,"Uinta, Wyoming, US",US,USA,US,Wyoming,Uinta,41.287818,-110.547578,20226
4173,84056043,"Washakie, Wyoming, US",US,USA,US,Wyoming,Washakie,43.904516,-107.680187,7805


In [5]:
daily_report

Unnamed: 0,Combined_Key,Last_Update,Confirmed,Deaths,Recovered,Incident_Rate,Case_Fatality_Ratio
0,Afghanistan,2021-04-01 04:27:05,56454,2484,51550.0,145.020308,4.400043
1,Albania,2021-04-01 04:27:05,125157,2235,91271.0,4349.051359,1.785757
2,Algeria,2021-04-01 04:27:05,117192,3093,81538.0,267.250200,2.639259
3,Andorra,2021-04-01 04:27:05,12010,115,11315.0,15543.907332,0.957535
4,Angola,2021-04-01 04:27:05,22311,537,20493.0,67.884191,2.406884
...,...,...,...,...,...,...,...
3976,Vietnam,2021-04-01 04:27:05,2603,35,2359.0,2.674171,1.344602
3977,West Bank and Gaza,2021-04-01 04:27:05,242353,2627,215429.0,4750.700590,1.083956
3978,Yemen,2021-04-01 04:27:05,4357,888,1676.0,14.608076,20.380996
3979,Zambia,2021-04-01 04:27:05,88418,1208,84592.0,480.951978,1.366238


In [6]:
time_series

Unnamed: 0,Date,Country_Region,Confirmed,Deaths,Daily_Cases,Daily_Deaths
0,2020-01-22,Afghanistan,0,0,0,0
1,2020-01-22,Albania,0,0,0,0
2,2020-01-22,Algeria,0,0,0,0
3,2020-01-22,Andorra,0,0,0,0
4,2020-01-22,Angola,0,0,0,0
...,...,...,...,...,...,...
83515,2021-03-31,Vietnam,2603,35,9,0
83516,2021-03-31,West Bank and Gaza,242353,2627,2288,13
83517,2021-03-31,Yemen,4357,888,110,6
83518,2021-03-31,Zambia,88418,1208,219,6


In [7]:
#lookup_table.to_csv('lookup_table.csv', index=False)
#daily_report.to_csv('daily_report.csv', index=False)
#time_series.to_csv('time_series.csv', index=False)

In [8]:
conn = sqlite3.connect('covid19.db')
lookup_table.to_sql('lookup_table', conn, if_exists='replace', index=False)
daily_report.to_sql('daily_report', conn, if_exists='replace', index=False)
time_series.to_sql('time_series', conn, if_exists='replace', index=False)