In [2]:
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import mean_squared_error, mean_absolute_error

import random
import math
import time
import datetime
import operator

In [3]:
plt.style.use('fivethirtyeight')
%matplotlib inline

# Import Datasets

In [4]:
confirmed_cases = pd.read_csv('https://github.com/CSSEGISandData/COVID-19/raw/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv')
confirmed_cases.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,7/6/20,7/7/20,7/8/20,7/9/20,7/10/20,7/11/20,7/12/20,7/13/20,7/14/20,7/15/20
0,,Afghanistan,33.93911,67.709953,0,0,0,0,0,0,...,33190,33384,33594,33908,34194,34366,34451,34455,34740,34994
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,2964,3038,3106,3188,3278,3371,3454,3571,3667,3752
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,16404,16879,17348,17808,18242,18712,19195,19689,20216,20770
3,,Andorra,42.5063,1.5218,0,0,0,0,0,0,...,855,855,855,855,855,855,855,858,861,862
4,,Angola,-11.2027,17.8739,0,0,0,0,0,0,...,346,386,386,396,458,462,506,525,541,576


In [5]:
deaths_reported = pd.read_csv('https://github.com/CSSEGISandData/COVID-19/raw/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv')
deaths_reported.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,7/6/20,7/7/20,7/8/20,7/9/20,7/10/20,7/11/20,7/12/20,7/13/20,7/14/20,7/15/20
0,,Afghanistan,33.93911,67.709953,0,0,0,0,0,0,...,898,920,936,957,971,994,1010,1012,1048,1094
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,79,81,83,83,85,89,93,95,97,101
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,959,968,978,988,996,1004,1011,1018,1028,1040
3,,Andorra,42.5063,1.5218,0,0,0,0,0,0,...,52,52,52,52,52,52,52,52,52,52
4,,Angola,-11.2027,17.8739,0,0,0,0,0,0,...,19,21,21,22,23,23,26,26,26,27


In [6]:
recovered_cases = pd.read_csv('https://github.com/CSSEGISandData/COVID-19/raw/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv')
recovered_cases.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,7/6/20,7/7/20,7/8/20,7/9/20,7/10/20,7/11/20,7/12/20,7/13/20,7/14/20,7/15/20
0,,Afghanistan,33.93911,67.709953,0,0,0,0,0,0,...,20103,20179,20700,20847,20882,21135,21216,21254,21454,22456
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,1702,1744,1791,1832,1875,1881,1946,2014,2062,2091
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,11884,12094,12329,12637,13124,13124,13743,14019,14295,14792
3,,Andorra,42.5063,1.5218,0,0,0,0,0,0,...,800,800,802,802,803,803,803,803,803,803
4,,Angola,-11.2027,17.8739,0,0,0,0,0,0,...,108,117,117,117,117,118,118,118,118,124


In [7]:
# retrieved from daily reports
latest_data = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/07-15-2020.csv')
latest_data.head()

Unnamed: 0,FIPS,Admin2,Province_State,Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,Active,Combined_Key,Incidence_Rate,Case-Fatality_Ratio
0,45001.0,Abbeville,South Carolina,US,2020-07-16 04:44:59,34.223334,-82.461707,165,1,0,164.0,"Abbeville, South Carolina, US",672.728014,0.606061
1,22001.0,Acadia,Louisiana,US,2020-07-16 04:44:59,30.295065,-92.414197,1510,46,0,1464.0,"Acadia, Louisiana, US",2433.717463,3.046358
2,51001.0,Accomack,Virginia,US,2020-07-16 04:44:59,37.767072,-75.632346,1045,14,0,1031.0,"Accomack, Virginia, US",3233.692289,1.339713
3,16001.0,Ada,Idaho,US,2020-07-16 04:44:59,43.452658,-116.241552,4836,28,0,4808.0,"Ada, Idaho, US",1004.17993,0.578991
4,19001.0,Adair,Iowa,US,2020-07-16 04:44:59,41.330756,-94.471059,17,0,0,17.0,"Adair, Iowa, US",237.695749,0.0


# Column Info

In [8]:
# fetch all columns from confirmed dataset
cols = confirmed_cases.keys()
cols

Index(['Province/State', 'Country/Region', 'Lat', 'Long', '1/22/20', '1/23/20',
       '1/24/20', '1/25/20', '1/26/20', '1/27/20',
       ...
       '7/6/20', '7/7/20', '7/8/20', '7/9/20', '7/10/20', '7/11/20', '7/12/20',
       '7/13/20', '7/14/20', '7/15/20'],
      dtype='object', length=180)

In [9]:
# extract the date columns
confirmed = confirmed_cases.loc[:, cols[4] : cols[-1]]
deaths = deaths_reported.loc[:, cols[4] : cols[-1]]
recoveries = recovered_cases.loc[:, cols[4] : cols[-1]]

In [10]:
confirmed

Unnamed: 0,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,1/31/20,...,7/6/20,7/7/20,7/8/20,7/9/20,7/10/20,7/11/20,7/12/20,7/13/20,7/14/20,7/15/20
0,0,0,0,0,0,0,0,0,0,0,...,33190,33384,33594,33908,34194,34366,34451,34455,34740,34994
1,0,0,0,0,0,0,0,0,0,0,...,2964,3038,3106,3188,3278,3371,3454,3571,3667,3752
2,0,0,0,0,0,0,0,0,0,0,...,16404,16879,17348,17808,18242,18712,19195,19689,20216,20770
3,0,0,0,0,0,0,0,0,0,0,...,855,855,855,855,855,855,855,858,861,862
4,0,0,0,0,0,0,0,0,0,0,...,346,386,386,396,458,462,506,525,541,576
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
261,0,0,0,0,0,0,0,0,0,0,...,721,724,724,726,727,727,729,732,732,737
262,0,0,0,0,0,0,0,0,0,0,...,1284,1297,1318,1356,1380,1389,1465,1498,1516,1526
263,0,0,0,0,0,0,0,0,0,0,...,311,311,313,314,314,317,317,317,321,321
264,0,0,0,0,0,0,0,0,0,0,...,6262,6315,6364,6410,6457,6506,6552,6596,6643,6695


# Filter Countries


In [14]:
dates = confirmed.keys()

# across all countries
world_cases = []
total_deaths = []
mortality_rate = []
recovery_rate = []
total_recovered = []
total_active = []

# country specific cases
china_cases = []
india_cases = []
us_cases = []
brazil_cases = []
russia_cases = []

# country specific deaths
china_deaths = []
india_deaths = []
us_deaths = []
brazil_deaths = []
russia_deaths = []

# country specific recoveries
china_recoveries = []
india_recoveries = []
us_recoveries = []
brazil_recoveries = []
russia_recoveries = []

In [15]:
for i in dates:
    # fetch info from all countries based on this date
    confirmed_sum = confirmed[i].sum()
    death_sum = deaths[i].sum()
    recovered_sum = recoveries[i].sum()
    
    # append information above to respective list    
    world_cases.append(confirmed_sum)
    total_deaths.append(death_sum)
    total_recovered.append(recovered_sum)
    total_active.append(confirmed_sum - death_sum - recovered_sum)    
    mortality_rate.append(death_sum / confirmed_sum)
    recovery_rate.append(recovered_sum / confirmed_sum)
    
    # find the total confirmed cases for this date matching with the respective country    
    china_cases.append(confirmed_cases[confirmed_cases['Country/Region'] == 'China'][i].sum())
    india_cases.append(confirmed_cases[confirmed_cases['Country/Region'] == 'India'][i].sum())
    us_cases.append(confirmed_cases[confirmed_cases['Country/Region'] == 'US'][i].sum())
    brazil_cases.append(confirmed_cases[confirmed_cases['Country/Region'] == 'Brazil'][i].sum())
    russia_cases.append(confirmed_cases[confirmed_cases['Country/Region'] == 'Russia'][i].sum())
    
    # find the total reported deaths for this date matching with the respective country    
    china_deaths.append(deaths_reported[deaths_reported['Country/Region'] == 'China'][i].sum())
    india_deaths.append(deaths_reported[deaths_reported['Country/Region'] == 'India'][i].sum())
    us_deaths.append(deaths_reported[deaths_reported['Country/Region'] == 'US'][i].sum())
    brazil_deaths.append(deaths_reported[deaths_reported['Country/Region'] == 'Brazil'][i].sum())
    russia_deaths.append(deaths_reported[deaths_reported['Country/Region'] == 'Russia'][i].sum())
    
    # find the total recovered cases for this date matching with the respective country    
    china_recoveries.append(recovered_cases[recovered_cases['Country/Region'] == 'China'][i].sum())
    india_recoveries.append(recovered_cases[recovered_cases['Country/Region'] == 'India'][i].sum())
    us_recoveries.append(recovered_cases[recovered_cases['Country/Region'] == 'US'][i].sum())
    brazil_recoveries.append(recovered_cases[recovered_cases['Country/Region'] == 'Brazil'][i].sum())
    russia_recoveries.append(recovered_cases[recovered_cases['Country/Region'] == 'Russia'][i].sum())    

In [38]:
# print out the world cases for each date
number_list = [ f"{dates[i]} : {world_cases[i]}" for i in range(len(dates)) ]
number_list

['1/22/20 : 555',
 '1/23/20 : 654',
 '1/24/20 : 941',
 '1/25/20 : 1434',
 '1/26/20 : 2118',
 '1/27/20 : 2927',
 '1/28/20 : 5578',
 '1/29/20 : 6166',
 '1/30/20 : 8234',
 '1/31/20 : 9927',
 '2/1/20 : 12038',
 '2/2/20 : 16787',
 '2/3/20 : 19887',
 '2/4/20 : 23898',
 '2/5/20 : 27643',
 '2/6/20 : 30802',
 '2/7/20 : 34395',
 '2/8/20 : 37129',
 '2/9/20 : 40159',
 '2/10/20 : 42768',
 '2/11/20 : 44810',
 '2/12/20 : 45228',
 '2/13/20 : 60381',
 '2/14/20 : 66908',
 '2/15/20 : 69050',
 '2/16/20 : 71234',
 '2/17/20 : 73269',
 '2/18/20 : 75151',
 '2/19/20 : 75651',
 '2/20/20 : 76211',
 '2/21/20 : 76840',
 '2/22/20 : 78601',
 '2/23/20 : 78979',
 '2/24/20 : 79543',
 '2/25/20 : 80396',
 '2/26/20 : 81373',
 '2/27/20 : 82737',
 '2/28/20 : 84114',
 '2/29/20 : 86009',
 '3/1/20 : 88386',
 '3/2/20 : 90360',
 '3/3/20 : 92938',
 '3/4/20 : 95237',
 '3/5/20 : 98028',
 '3/6/20 : 101971',
 '3/7/20 : 106009',
 '3/8/20 : 109960',
 '3/9/20 : 113860',
 '3/10/20 : 118884',
 '3/11/20 : 126547',
 '3/12/20 : 132297',
 '3/

In [23]:
print(f"Confirmed sum: {confirmed_sum:,}")
print(f"Death sum: {death_sum:,}")
print(f"Recovered sum: {recovered_sum:,}")

Confirmed sum: 13,555,532
Death sum: 584,132
Recovered sum: 7,559,252
