In [11]:
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import mean_squared_error, mean_absolute_error

import random
import math
import time
import datetime
import operator

In [13]:
plt.style.use('fivethirtyeight')
%matplotlib inline

# Import Datasets

In [15]:
confirmed_cases = pd.read_csv('https://github.com/CSSEGISandData/COVID-19/raw/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv')
confirmed_cases.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,7/6/20,7/7/20,7/8/20,7/9/20,7/10/20,7/11/20,7/12/20,7/13/20,7/14/20,7/15/20
0,,Afghanistan,33.93911,67.709953,0,0,0,0,0,0,...,33190,33384,33594,33908,34194,34366,34451,34455,34740,34994
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,2964,3038,3106,3188,3278,3371,3454,3571,3667,3752
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,16404,16879,17348,17808,18242,18712,19195,19689,20216,20770
3,,Andorra,42.5063,1.5218,0,0,0,0,0,0,...,855,855,855,855,855,855,855,858,861,862
4,,Angola,-11.2027,17.8739,0,0,0,0,0,0,...,346,386,386,396,458,462,506,525,541,576


In [21]:
deaths_reported = pd.read_csv('https://github.com/CSSEGISandData/COVID-19/raw/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv')
deaths_reported.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,7/6/20,7/7/20,7/8/20,7/9/20,7/10/20,7/11/20,7/12/20,7/13/20,7/14/20,7/15/20
0,,Afghanistan,33.93911,67.709953,0,0,0,0,0,0,...,898,920,936,957,971,994,1010,1012,1048,1094
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,79,81,83,83,85,89,93,95,97,101
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,959,968,978,988,996,1004,1011,1018,1028,1040
3,,Andorra,42.5063,1.5218,0,0,0,0,0,0,...,52,52,52,52,52,52,52,52,52,52
4,,Angola,-11.2027,17.8739,0,0,0,0,0,0,...,19,21,21,22,23,23,26,26,26,27


In [20]:
recovered_cases = pd.read_csv('https://github.com/CSSEGISandData/COVID-19/raw/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv')
recovered_cases.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,7/6/20,7/7/20,7/8/20,7/9/20,7/10/20,7/11/20,7/12/20,7/13/20,7/14/20,7/15/20
0,,Afghanistan,33.93911,67.709953,0,0,0,0,0,0,...,20103,20179,20700,20847,20882,21135,21216,21254,21454,22456
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,1702,1744,1791,1832,1875,1881,1946,2014,2062,2091
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,11884,12094,12329,12637,13124,13124,13743,14019,14295,14792
3,,Andorra,42.5063,1.5218,0,0,0,0,0,0,...,800,800,802,802,803,803,803,803,803,803
4,,Angola,-11.2027,17.8739,0,0,0,0,0,0,...,108,117,117,117,117,118,118,118,118,124


In [23]:
# retrieved from daily reports
latest_data = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/07-15-2020.csv')
latest_data.head()

Unnamed: 0,FIPS,Admin2,Province_State,Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,Active,Combined_Key,Incidence_Rate,Case-Fatality_Ratio
0,45001.0,Abbeville,South Carolina,US,2020-07-16 04:44:59,34.223334,-82.461707,165,1,0,164.0,"Abbeville, South Carolina, US",672.728014,0.606061
1,22001.0,Acadia,Louisiana,US,2020-07-16 04:44:59,30.295065,-92.414197,1510,46,0,1464.0,"Acadia, Louisiana, US",2433.717463,3.046358
2,51001.0,Accomack,Virginia,US,2020-07-16 04:44:59,37.767072,-75.632346,1045,14,0,1031.0,"Accomack, Virginia, US",3233.692289,1.339713
3,16001.0,Ada,Idaho,US,2020-07-16 04:44:59,43.452658,-116.241552,4836,28,0,4808.0,"Ada, Idaho, US",1004.17993,0.578991
4,19001.0,Adair,Iowa,US,2020-07-16 04:44:59,41.330756,-94.471059,17,0,0,17.0,"Adair, Iowa, US",237.695749,0.0


# Column Info

In [25]:
# fetch all columns from confirmed dataset
cols = confirmed_cases.keys()
cols

Index(['Province/State', 'Country/Region', 'Lat', 'Long', '1/22/20', '1/23/20',
       '1/24/20', '1/25/20', '1/26/20', '1/27/20',
       ...
       '7/6/20', '7/7/20', '7/8/20', '7/9/20', '7/10/20', '7/11/20', '7/12/20',
       '7/13/20', '7/14/20', '7/15/20'],
      dtype='object', length=180)

In [31]:
# extract the date columns
confirmed = confirmed_cases.loc[:, cols[4] : cols[-1]]
deaths = deaths_reported.loc[:, cols[4] : cols[-1]]
recoveries = recovered_cases.loc[:, cols[4] : cols[-1]]

In [30]:
confirmed

Unnamed: 0,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,1/31/20,...,7/6/20,7/7/20,7/8/20,7/9/20,7/10/20,7/11/20,7/12/20,7/13/20,7/14/20,7/15/20
0,0,0,0,0,0,0,0,0,0,0,...,20103,20179,20700,20847,20882,21135,21216,21254,21454,22456
1,0,0,0,0,0,0,0,0,0,0,...,1702,1744,1791,1832,1875,1881,1946,2014,2062,2091
2,0,0,0,0,0,0,0,0,0,0,...,11884,12094,12329,12637,13124,13124,13743,14019,14295,14792
3,0,0,0,0,0,0,0,0,0,0,...,800,800,802,802,803,803,803,803,803,803
4,0,0,0,0,0,0,0,0,0,0,...,108,117,117,117,117,118,118,118,118,124
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
248,0,0,0,0,0,0,0,0,0,0,...,269,279,283,283,284,284,286,286,286,322
249,0,0,0,0,0,0,0,0,0,0,...,575,591,595,619,630,642,659,675,685,694
250,0,0,0,0,0,0,0,0,0,0,...,266,266,272,272,272,296,296,296,302,302
251,0,0,0,0,0,0,0,0,0,0,...,4914,4965,5011,5067,5115,5176,5228,5278,5332,5383
