In [10]:
import warnings, requests, pandas, numpy
from datetime import datetime
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR

warnings.filterwarnings("ignore")

In [2]:
BASE_URL = "https://covid.ourworldindata.org/data"
covid_data = pandas.read_csv(f"{BASE_URL}/ecdc/full_data.csv")
population_data = pandas.read_csv(f"{BASE_URL}/ecdc/locations.csv")

In [3]:
new_df = pandas.merge(covid_data, population_data, on = 'location', how = 'left')
new_df.drop(['countriesAndTerritories', 'population'], axis = 1, inplace = True)
new_df = new_df[ new_df.population_year == 2020.0]
new_df['population_year'] = new_df['population_year'].astype(int)

In [4]:
new_df

Unnamed: 0,date,location,new_cases,new_deaths,total_cases,total_deaths,continent,population_year
0,2019-12-31,Afghanistan,0,0,0,0,Asia,2020
1,2020-01-01,Afghanistan,0,0,0,0,Asia,2020
2,2020-01-02,Afghanistan,0,0,0,0,Asia,2020
3,2020-01-03,Afghanistan,0,0,0,0,Asia,2020
4,2020-01-04,Afghanistan,0,0,0,0,Asia,2020
...,...,...,...,...,...,...,...,...
8996,2020-04-01,Zimbabwe,1,0,8,1,Africa,2020
8997,2020-04-02,Zimbabwe,0,0,8,1,Africa,2020
8998,2020-04-03,Zimbabwe,0,0,8,1,Africa,2020
8999,2020-04-04,Zimbabwe,1,0,9,1,Africa,2020


In [27]:
new_df = new_df[ new_df.location == 'India' ]
df = new_df[['date', 'total_deaths']][10:]
df

Unnamed: 0,date,total_deaths
3631,2020-01-10,0
3632,2020-01-11,0
3633,2020-01-12,0
3634,2020-01-13,0
3635,2020-01-14,0
...,...,...
3712,2020-04-01,35
3713,2020-04-02,50
3714,2020-04-03,56
3715,2020-04-04,68


In [28]:
df.drop('date', axis = 1, inplace = True)

## Predicting N Days out into the Future.
prediction_days = 30

## Shifted N Units Up!
df['prediction'] = df[['total_deaths']].shift(-prediction_days)

In [29]:
## Create Independent DataSet.
## Convert DataFrame to Numpy Array and Drop Prediction Column.
X = numpy.array(df.drop(['prediction'], 1))

## Remove the Last N rows Where N is Prediction Days.
X = X[ : len(df) - prediction_days]

X

array([[0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0]], dtype=int64)

In [30]:
## Create Dependeent DataSet.
## Convert DataFrame to Numpy Array.
y = numpy.array(df['prediction'])

## Get ALL the Values Except the Last N rows.
y = y[ : - prediction_days]

y

array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  1.,  2.,  2.,  2.,  3.,  3.,  3.,
        4.,  4.,  4.,  7.,  9.,  9., 13., 17., 19., 25., 29., 32., 35.,
       50., 56., 68., 77.])

In [31]:
## Split the Dataset into 80 % Training and 20 % Testing.
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

In [32]:
## Set Prediction Days Array to Last 30 Rows from Original Dataset.
prediction_days_array = numpy.array(df.drop(['prediction'], 1))[-prediction_days : ]
print(prediction_days_array)

[[ 0]
 [ 0]
 [ 0]
 [ 0]
 [ 0]
 [ 0]
 [ 1]
 [ 2]
 [ 2]
 [ 2]
 [ 3]
 [ 3]
 [ 3]
 [ 4]
 [ 4]
 [ 4]
 [ 7]
 [ 9]
 [ 9]
 [13]
 [17]
 [19]
 [25]
 [29]
 [32]
 [35]
 [50]
 [56]
 [68]
 [77]]


In [33]:
## Create & Train Support Vector Machine (Regression) 
# using Radial Basis Function. 

svr_rbf = SVR(kernel = 'rbf', C = 1e3, gamma = 0.00001)
svr_rbf.fit(x_train,  y_train)

SVR(C=1000.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma=1e-05,
    kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [34]:
## Testing the Trained Model

svr_rbf_confidence = svr_rbf.score(x_test, y_test)
svr_rbf_confidence

-0.3544582043343647