In [41]:
# import necessary libraries

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.colors as mc

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, mean_absolute_error

import math
import random

import datetime
import time

import operator

In [42]:
# function to give correct date for making a correct to the github repo of latest data

def getYesterday():
    yesterday = datetime.date.today() - datetime.timedelta(days=1)
    
# returning a 0 at the beginning of the string
# because the repository stores dates as 06 for the month of June for example
    return "{}-{}-{}".format(yesterday.month, yesterday.day, yesterday.year) if yesterday.month >= 10 else "0{}-{}-{}".format(yesterday.month, yesterday.day, yesterday.year) 

In [43]:
# load data sets

confirmed = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv')

deaths = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv')

recovered = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv')

# technically getting data from yesterday, however this is to avoid a http error
# the repository only updates right before midnight
latest = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/{}.csv'.format(getYesterday()))

In [44]:
# retrieve data from respective columns by date

columns = confirmed.keys()

confirmed_cases_by_date = confirmed.loc[ : , columns[4] : columns[-1]]

deaths_by_date = deaths.loc[ : , columns[4] : columns[-1]]

recoveries_by_date = recovered.loc[ : , columns[4] : columns[-1]]

In [45]:
# initialize lists to store data

dates = confirmed_cases_by_date.keys()

total_cases = []
total_deaths = []
mortality_rate = []
recovery_rate = []
total_recovered = []
total_active = []

china = {}
china['cases'] = []
china['deaths'] = []
china['recoveries'] = []

italy = {}
italy['cases'] = []
italy['deaths'] = []
italy['recoveries'] = []

us = {}
us['cases'] = []
us['deaths'] = []
us['recoveries'] = []

spain = {}
spain['cases'] = []
spain['deaths'] = []
spain['recoveries'] = []

france = {}
france['cases'] = []
france['deaths'] = []
france['recoveries'] = []

germany = {}
germany['cases'] = []
germany['deaths'] = []
germany['recoveries'] = []

uk = {}
uk['cases'] = []
uk['deaths'] = []
uk['recoveries'] = []

russia = {}
russia['cases'] = []
russia['deaths'] = []
russia['recoveries'] = []

india = {}
india['cases'] = []
india['deaths'] = []
india['recoveries'] = []

In [49]:
def fillDictionary(listTitle, dataSet):
    
    china[listTitle].append(dataSet[dataSet['Country/Region'] == 'China'][i].sum())
    italy[listTitle].append(dataSet[dataSet['Country/Region'] == 'Italy'][i].sum())
    us[listTitle].append(dataSet[dataSet['Country/Region'] == 'US'][i].sum())
    spain[listTitle].append(dataSet[dataSet['Country/Region'] == 'Spain'][i].sum())
    france[listTitle].append(dataSet[dataSet['Country/Region'] == 'France'][i].sum())
    germany[listTitle].append(dataSet[dataSet['Country/Region'] == 'Germany'][i].sum())
    uk[listTitle].append(dataSet[dataSet['Country/Region'] == 'United Kingdom'][i].sum())
    russia[listTitle].append(dataSet[dataSet['Country/Region'] == 'Russia'][i].sum())
    india[listTitle].append(dataSet[dataSet['Country/Region'] == 'India'][i].sum())

In [51]:
for i in dates:
    
    count_confirmed = confirmed_cases_by_date[i].sum()
    count_deaths = deaths_by_date[i].sum()
    count_recoveries = recoveries_by_date[i].sum()
    
    total_cases.append(count_confirmed)
    total_deaths.append(count_deaths)
    total_recovered.append(count_recoveries)    
    total_active.append(count_confirmed - count_deaths - count_recoveries)
    
    mortality_rate.append(count_deaths / count_confirmed)
    recovery_rate.append(count_recoveries / count_confirmed)
    
    fillDictionary('cases', confirmed)
    fillDictionary('deaths', deaths)
    fillDictionary('recoveries', recovered)
    

[17,
 18,
 26,
 42,
 56,
 82,
 131,
 133,
 171,
 213,
 259,
 361,
 425,
 491,
 563,
 633,
 718,
 805,
 905,
 1012,
 1112,
 1117,
 1369,
 1521,
 1663,
 1766,
 1864,
 2003,
 2116,
 2238,
 2238,
 2443,
 2445,
 2595,
 2665,
 2717,
 2746,
 2790,
 2837,
 2872,
 2914,
 2947,
 2983,
 3015,
 3044,
 3072,
 3100,
 3123,
 3139,
 3161,
 3172,
 3180,
 3193,
 3203,
 3217,
 3230,
 3241,
 3249,
 3253,
 3259,
 3274,
 3274,
 3281,
 3285,
 3291,
 3296,
 3299,
 3304,
 3308,
 3309,
 3316,
 3322,
 3326,
 3330,
 3333,
 3335,
 3335,
 3337,
 3339,
 3340,
 3343,
 3343,
 3345,
 3345,
 3346,
 3346,
 4636,
 4636,
 4636,
 4636,
 4636,
 4636,
 4636,
 4636,
 4636,
 4637,
 4637,
 4637,
 4637,
 4637,
 4637,
 4637,
 4637,
 4637,
 4637,
 4637,
 4637,
 4637,
 4637,
 4637,
 4637,
 4637,
 4637,
 4637,
 4637,
 4638,
 4638,
 4638,
 4638,
 4638,
 4638,
 4638,
 4638,
 4638,
 4638,
 4638,
 4638,
 4638,
 4638,
 4638,
 4638,
 4638,
 4638,
 4638,
 4638,
 4638,
 4638,
 4638,
 4638,
 4638,
 4638,
 4638,
 4638,
 4638,
 4638,
 4638,
 46