In [104]:
# import necessary libraries

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.colors as mc

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, mean_absolute_error

import math
import random

import datetime
import time

import operator

In [105]:
# function to give correct date for making a correct to the github repo of latest data

def getYesterday():
    yesterday = datetime.date.today() - datetime.timedelta(days=1)
    
# returning a 0 at the beginning of the string
# because the repository stores dates as 06 for the month of June for example
    return "{}-{}-{}".format(yesterday.month, yesterday.day, yesterday.year) if yesterday.month >= 10 else "0{}-{}-{}".format(yesterday.month, yesterday.day, yesterday.year) 

In [106]:
# load data sets

confirmed = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv')

deaths = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv')

recovered = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv')

# technically getting data from yesterday, however this is to avoid a http error
# the repository only updates right before midnight
latest = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/{}.csv'.format(getYesterday()))

In [107]:
# retrieve data from respective columns by date

columns = confirmed.keys()

confirmed_cases_by_date = confirmed.loc[ : , columns[4] : columns[-1]]

deaths_by_date = deaths.loc[ : , columns[4] : columns[-1]]

recoveries_by_date = recovered.loc[ : , columns[4] : columns[-1]]

In [108]:
# initialize lists to store data

dates = confirmed_cases_by_date.keys()

total_cases = []
total_deaths = []
mortality_rate = []
recovery_rate = []
total_recovered = []
total_active = []

china = {}
china['cases'] = []
china['deaths'] = []
china['recoveries'] = []

italy = {}
italy['cases'] = []
italy['deaths'] = []
italy['recoveries'] = []

us = {}
us['cases'] = []
us['deaths'] = []
us['recoveries'] = []

spain = {}
spain['cases'] = []
spain['deaths'] = []
spain['recoveries'] = []

france = {}
france['cases'] = []
france['deaths'] = []
france['recoveries'] = []

germany = {}
germany['cases'] = []
germany['deaths'] = []
germany['recoveries'] = []

uk = {}
uk['cases'] = []
uk['deaths'] = []
uk['recoveries'] = []

russia = {}
russia['cases'] = []
russia['deaths'] = []
russia['recoveries'] = []

india = {}
india['cases'] = []
india['deaths'] = []
india['recoveries'] = []

In [109]:
# function to populate data fields for country dictionaries

def fillDictionary(listTitle, dataSet):
    
    china[listTitle].append(dataSet[dataSet['Country/Region'] == 'China'][i].sum())
    italy[listTitle].append(dataSet[dataSet['Country/Region'] == 'Italy'][i].sum())
    us[listTitle].append(dataSet[dataSet['Country/Region'] == 'US'][i].sum())
    spain[listTitle].append(dataSet[dataSet['Country/Region'] == 'Spain'][i].sum())
    france[listTitle].append(dataSet[dataSet['Country/Region'] == 'France'][i].sum())
    germany[listTitle].append(dataSet[dataSet['Country/Region'] == 'Germany'][i].sum())
    uk[listTitle].append(dataSet[dataSet['Country/Region'] == 'United Kingdom'][i].sum())
    russia[listTitle].append(dataSet[dataSet['Country/Region'] == 'Russia'][i].sum())
    india[listTitle].append(dataSet[dataSet['Country/Region'] == 'India'][i].sum())

In [110]:
for i in dates:
    
    count_confirmed = confirmed_cases_by_date[i].sum()
    count_deaths = deaths_by_date[i].sum()
    count_recoveries = recoveries_by_date[i].sum()
    
    total_cases.append(count_confirmed)
    total_deaths.append(count_deaths)
    total_recovered.append(count_recoveries)    
    total_active.append(count_confirmed - count_deaths - count_recoveries)
    
    mortality_rate.append(count_deaths / count_confirmed)
    recovery_rate.append(count_recoveries / count_confirmed)
    
    fillDictionary('cases', confirmed)
    fillDictionary('deaths', deaths)
    fillDictionary('recoveries', recovered)
 

[548,
 643,
 920,
 1406,
 2075,
 2877,
 5509,
 6087,
 8141,
 9802,
 11891,
 16630,
 19716,
 23707,
 27440,
 30587,
 34110,
 36814,
 39829,
 42354,
 44386,
 44759,
 59895,
 66358,
 68413,
 70513,
 72434,
 74211,
 74619,
 75077,
 75550,
 77001,
 77022,
 77241,
 77754,
 78166,
 78600,
 78928,
 79356,
 79932,
 80136,
 80261,
 80386,
 80537,
 80690,
 80770,
 80823,
 80860,
 80887,
 80921,
 80932,
 80945,
 80977,
 81003,
 81033,
 81058,
 81102,
 81156,
 81250,
 81305,
 81435,
 81498,
 81591,
 81661,
 81782,
 81897,
 81999,
 82122,
 82198,
 82279,
 82361,
 82432,
 82511,
 82543,
 82602,
 82665,
 82718,
 82809,
 82883,
 82941,
 83014,
 83134,
 83213,
 83306,
 83356,
 83403,
 83760,
 83787,
 83805,
 83817,
 83853,
 83868,
 83884,
 83899,
 83909,
 83912,
 83918,
 83940,
 83944,
 83956,
 83959,
 83959,
 83964,
 83966,
 83968,
 83970,
 83975,
 83976,
 83990,
 84010,
 84011,
 84018,
 84024,
 84029,
 84038,
 84044,
 84054,
 84063,
 84063,
 84063,
 84063,
 84081,
 84084,
 84095,
 84102,
 84103,
 8410