In [1]:
#import all packeges
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import datetime as dt
from datetime import timedelta
from sklearn.svm import SVR
from statsmodels.tsa.api import Holt,SimpleExpSmoothing,ExponentialSmoothing
from sklearn.metrics import mean_squared_error,r2_score
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.arima_model import ARIMA
from statsmodels.graphics.tsaplots import plot_acf,plot_pacf

In [2]:
#Read the dataset
covid=pd.read_csv("C:/Users/Abbas/Desktop/countries-aggregated.csv")
covid.head()

Unnamed: 0,Date,Country,Confirmed,Recovered,Deaths
0,1/22/2020,Afghanistan,0,0,0
1,1/22/2020,Albania,0,0,0
2,1/22/2020,Algeria,0,0,0
3,1/22/2020,Andorra,0,0,0
4,1/22/2020,Angola,0,0,0


In [3]:
#Extracting India's data 
covid_india=covid[covid['Country']=="India"]

In [4]:
covid_india.head()

Unnamed: 0,Date,Country,Confirmed,Recovered,Deaths
78,1/22/2020,India,0,0,0
263,1/23/2020,India,0,0,0
448,1/24/2020,India,0,0,0
633,1/25/2020,India,0,0,0
818,1/26/2020,India,0,0,0


In [5]:
#Converting the date into Datetime format by creating a new ObservationDate col
covid_india["ObservationDate"]=pd.to_datetime(covid_india["Date"])

In [6]:
covid_india.head()

Unnamed: 0,Date,Country,Confirmed,Recovered,Deaths,ObservationDate
78,1/22/2020,India,0,0,0,2020-01-22
263,1/23/2020,India,0,0,0,2020-01-23
448,1/24/2020,India,0,0,0,2020-01-24
633,1/25/2020,India,0,0,0,2020-01-25
818,1/26/2020,India,0,0,0,2020-01-26


In [7]:
#seperating India's dataset and aggregating all confirmed, recovered and deaths
india_datewise=covid_india.groupby(["ObservationDate"]).agg({"Confirmed":'sum',"Recovered":'sum',"Deaths":'sum'})

In [8]:
#datasset with aggregated confirmed,deaths and recovered
india_datewise

Unnamed: 0_level_0,Confirmed,Recovered,Deaths
ObservationDate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-01-22,0,0,0
2020-01-23,0,0,0
2020-01-24,0,0,0
2020-01-25,0,0,0
2020-01-26,0,0,0
2020-01-27,0,0,0
2020-01-28,0,0,0
2020-01-29,0,0,0
2020-01-30,1,0,0
2020-01-31,1,0,0


In [9]:
#also aggregating days
india_datewise["Days Since"]=(india_datewise.index-india_datewise.index[0])
india_datewise["Days Since"]=india_datewise["Days Since"].dt.days

In [10]:
india_datewise

Unnamed: 0_level_0,Confirmed,Recovered,Deaths,Days Since
ObservationDate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-01-22,0,0,0,0
2020-01-23,0,0,0,1
2020-01-24,0,0,0,2
2020-01-25,0,0,0,3
2020-01-26,0,0,0,4
2020-01-27,0,0,0,5
2020-01-28,0,0,0,6
2020-01-29,0,0,0,7
2020-01-30,1,0,0,8
2020-01-31,1,0,0,9


In [11]:
#training, testing
train_ml=india_datewise.iloc[:int(india_datewise.shape[0]*0.95)]
valid_ml=india_datewise.iloc[int(india_datewise.shape[0]*0.95):]

In [12]:
#giving hyperparameters
svm=SVR(degree=10,kernel='poly')

In [13]:
#fitting Days and Confirmed cases in model
svm.fit(np.array(train_ml["Days Since"]).reshape(-1,1),train_ml["Confirmed"])

SVR(C=1.0, cache_size=200, coef0=0.0, degree=10, epsilon=0.1, gamma='scale',
    kernel='poly', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [14]:
from sklearn import linear_model
model_lr = linear_model.LinearRegression()
model_lr.fit(np.array(train_ml["Days Since"]).reshape(-1,1),train_ml["Confirmed"])
prediction_lr=model_lr.predict(np.array(valid_ml["Days Since"]).reshape(-1,1))
print("Root Mean Square Error for LR Model: ",np.sqrt(mean_squared_error(prediction_lr,valid_ml["Confirmed"])))

Root Mean Square Error for LR Model:  14822.4404839606


In [15]:
#Applyting LR model
from sklearn import tree
model_dt=tree.DecisionTreeClassifier()
model_dt.fit(np.array(train_ml["Days Since"]).reshape(-1,1),train_ml["Confirmed"])
prediction_dt=model_dt.predict(np.array(valid_ml["Days Since"]).reshape(-1,1))
print("Root Mean Square Error for DT Model: ",np.sqrt(mean_squared_error(prediction_dt,valid_ml["Confirmed"])))

Root Mean Square Error for DT Model:  4442.537450601852


In [19]:
#MSE
prediction_svm=svm.predict(np.array(valid_ml["Days Since"]).reshape(-1,1))
print("Root Mean Square Error for SVR Model: ",np.sqrt(mean_squared_error(prediction_svm,valid_ml["Confirmed"])))
print("Root Mean Square Error for SVR Model: ",np.(mean_squared_error(prediction_svm,valid_ml["Confirmed"])))

Root Mean Square Error for SVR Model:  580.4222211774005


In [15]:
#plt.figure(figsize=(11,6))
predictions=svm.predict(np.array(india_datewise["Days Since"]).reshape(-1,1))
#plt.plot(india_datewise["Confirmed"],label="Train Confirmed Cases",linewidth=3)
#plt.plot(india_datewise.index,predictions, linestyle='--',label="Best Fit for SVR",color='black')
#plt.xlabel('Time')
#plt.ylabel('Confirmed Cases')
#plt.title("Confirmed Cases Support Vector Machine Regressor Prediction")
#plt.xticks(rotation=90)
#plt.legend()

In [20]:
#Adding new dates
new_date=[]
new_prediction_svm=[]
for i in range(1,8):
    new_date.append(india_datewise.index[-1]+timedelta(days=i))
    new_prediction_svm.append(svm.predict(np.array(india_datewise["Days Since"].max()+i).reshape(-1,1))[0])

In [22]:
#Model Prediction dataFrame
model_predictions=pd.DataFrame(zip(new_date,new_prediction_svm),columns=["Date","SVM Prediction for Confirmed"])
model_predictions.tail(1)

Unnamed: 0,Date,SVM Prediction for Confirmed
6,2020-04-30,37586.138905


In [None]:
model_predictions.score

In [26]:
##fitting Days and Confirmed cases in model
svm.fit(np.array(train_ml["Days Since"]).reshape(-1,1),train_ml["Deaths"])

SVR(C=1.0, cache_size=200, coef0=0.0, degree=7, epsilon=0.1, gamma='scale',
    kernel='poly', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [27]:
#MSE
prediction_svm=svm.predict(np.array(valid_ml["Days Since"]).reshape(-1,1))
print("Root Mean Square Error for SVR Model: ",np.sqrt(mean_squared_error(prediction_svm,valid_ml["Deaths"])))

Root Mean Square Error for SVR Model:  23.646770539898387


In [74]:
predictions=svm.predict(np.array(india_datewise["Days Since"]).reshape(-1,1))

In [75]:
#Adding new date
new_date=[]
new_prediction_svm=[]
for i in range(1,18):
    new_date.append(india_datewise.index[-1]+timedelta(days=i))
    new_prediction_svm.append(svm.predict(np.array(india_datewise["Days Since"].max()+i).reshape(-1,1))[0])

In [76]:
#Dataframe for deaths prediction
model_predictions=pd.DataFrame(zip(new_date,new_prediction_svm),columns=["Date","SVM Prediction for Deaths"])
model_predictions.tail(1)

Unnamed: 0,Date,SVM Prediction for Deaths
0,2020-04-24,823.878882
1,2020-04-25,888.018869
2,2020-04-26,956.386219
3,2020-04-27,1029.21061
4,2020-04-28,1106.731595


In [77]:
#Applied same steps for recovered
svm.fit(np.array(train_ml["Days Since"]).reshape(-1,1),train_ml["Recovered"])

SVR(C=1.0, cache_size=200, coef0=0.0, degree=7, epsilon=0.1, gamma='scale',
    kernel='poly', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [78]:
prediction_svm=svm.predict(np.array(valid_ml["Days Since"]).reshape(-1,1))
print("Root Mean Square Error for SVR Model: ",np.sqrt(mean_squared_error(prediction_svm,valid_ml["Recovered"])))

Root Mean Square Error for SVR Model:  1756.3295474991912


In [79]:
#Adding new date
new_date=[]
new_prediction_svm=[]
for i in range(1,18):
    new_date.append(india_datewise.index[-1]+timedelta(days=i))
    new_prediction_svm.append(svm.predict(np.array(india_datewise["Days Since"].max()+i).reshape(-1,1))[0])

In [80]:
model_predictions=pd.DataFrame(zip(new_date,new_prediction_svm),columns=["Date","SVM Prediction for Recovered"])
model_predictions.tail(1)

Unnamed: 0,Date,SVM Prediction for Recovered
0,2020-04-24,2779.717262
1,2020-04-25,2995.870476
2,2020-04-26,3226.269999
3,2020-04-27,3471.68985
4,2020-04-28,3732.937331
