In [None]:
#import essential libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go

In [None]:
# Read the data
df = pd.read_csv("https://api.covid19india.org/csv/latest/case_time_series.csv")
df.tail()

Unnamed: 0,Date,Date_YMD,Daily Confirmed,Total Confirmed,Daily Recovered,Total Recovered,Daily Deceased,Total Deceased
615,6 October 2021,2021-10-06,22605,33892780,24610,33192508,316,449294
616,7 October 2021,2021-10-07,21474,33914254,24959,33217467,277,449571
617,8 October 2021,2021-10-08,19868,33934122,23066,33240533,247,449818
618,9 October 2021,2021-10-09,17941,33952063,23612,33264145,213,450031
619,10 October 2021,2021-10-10,19020,33971083,21583,33285728,193,450224


In [None]:
#getting the shape of dataset
df.shape

(620, 8)

In [None]:
#getting information of dataset
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 620 entries, 0 to 619
Data columns (total 8 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Date             620 non-null    object
 1   Date_YMD         620 non-null    object
 2   Daily Confirmed  620 non-null    int64 
 3   Total Confirmed  620 non-null    int64 
 4   Daily Recovered  620 non-null    int64 
 5   Total Recovered  620 non-null    int64 
 6   Daily Deceased   620 non-null    int64 
 7   Total Deceased   620 non-null    int64 
dtypes: int64(6), object(2)
memory usage: 38.9+ KB


In [None]:
#describing the datset
df.describe()

Unnamed: 0,Daily Confirmed,Total Confirmed,Daily Recovered,Total Recovered,Daily Deceased,Total Deceased
count,620.0,620.0,620.0,620.0,620.0,620.0
mean,54792.069355,12051260.0,53686.658065,11318490.0,726.167742,161468.003226
std,79424.685781,11921950.0,77472.135234,11538610.0,992.592759,152273.819835
min,0.0,1.0,0.0,0.0,0.0,0.0
25%,12120.0,644209.5,11728.5,390715.8,137.0,18560.0
50%,32345.0,9626448.0,34130.0,9078836.0,419.0,139388.0
75%,57638.5,22387490.0,58592.75,18399900.0,854.0,242745.25
max,414280.0,33971080.0,422391.0,33285730.0,6139.0,450224.0


In [None]:
# Storethe data in variables
x = np.array(df['Total Confirmed'])
y = np.array(df['Total Recovered'])
z = np.array(df['Total Deceased'])
N = 1370508600

In [None]:
#make the array of susceptible, Infected and Removed peoples
sus = np.array(N-x)[-550:]
inf = np.array(x-y-z)[-550:]
rec = np.array(y+z)[-550:]

In [None]:
# Make the function to find the beta
# beta is average number of contacts per person per time 
def find_beta(sus,inf,N):
  beta = np.zeros(len(sus)-1)
  for i in range(len(sus)-1):
    beta[i] = -((sus[i+1] - sus[i])*N)/(sus[i]*inf[i])
  return beta

In [None]:
# Make the functio to find the beta 
# gamma is probability of an infectious individual recovering in any time interval
def find_gamma(inf,rec):
  gamma = np.zeros(len(rec)-1)
  for i in range(len(rec)-1):
    gamma[i] = (rec[i+1]-rec[i])/(inf[i])
  return gamma

In [None]:
# Find the beta on training set and the testing set
beta_actual = find_beta(sus,inf,N)
gamma_actual = find_gamma(inf,rec)

In [None]:
# Make the variable to predict the number of days
pdays = 30
# form the dataset for alpha and beta
beta_df = pd.DataFrame(data=beta_actual,columns= ['beta_actual'])
gamma_df = pd.DataFrame(data=gamma_actual,columns = ['gamma_actual'])
beta_df['beta_shifted'] = beta_df[['beta_actual']].shift(-pdays)
gamma_df['gamma_shifted'] = gamma_df[['gamma_actual']].shift(-pdays)

In [None]:
# Create the array to fit the model
beta_actual = np.array(beta_df['beta_actual']).reshape(-1,1)[:-pdays]
beta_shifted = np.array(beta_df['beta_shifted']).reshape(-1,1)[:-pdays]
gamma_actual = np.array(gamma_df['gamma_actual']).reshape(-1,1)[:-pdays]
gamma_shifted = np.array(gamma_df['gamma_shifted']).reshape(-1,1)[:-pdays]
# Create the array to predict the beta and gamma
beta_test = np.array(beta_df['beta_actual']).reshape(-1,1)[-pdays:]
gamma_test = np.array(gamma_df['gamma_actual']).reshape(-1,1)[-pdays:]

In [None]:
# Fit the Linear Regreesoin regressor model to predict beta and gamma
from sklearn.linear_model import LinearRegression
linreg_beta = LinearRegression().fit(beta_actual, beta_shifted)
linreg_gamma = LinearRegression().fit(gamma_actual, gamma_shifted)

In [None]:
# Fit the Polynomial Regreesoin regressor model to predict beta and gamma
from sklearn.preprocessing import PolynomialFeatures
polynom_beta = PolynomialFeatures(degree=15)
x_beta = polynom_beta.fit_transform(beta_actual)
polyreg_beta = LinearRegression().fit(x_beta, beta_shifted)

polynom_gamma = PolynomialFeatures(degree=15)
x_gamma = polynom_gamma.fit_transform(gamma_actual)
polyreg_gamma = LinearRegression().fit(x_gamma, gamma_shifted)

In [None]:
# Predict the vlaues of beta and gamma for Linear Regression
lr_pred_beta = linreg_beta.predict(beta_test)
lr_pred_gamma = linreg_gamma.predict(gamma_test)

In [None]:
# Predict the vlaues of beta and gamma for Polynomial Regression
poly_pred_beta = polyreg_beta.predict(polynom_beta.fit_transform(beta_test))
poly_pred_gamma = polyreg_gamma.predict(polynom_gamma.fit_transform(gamma_test))

In [None]:
# Make the empty array to predict the redults 
S_lr = np.zeros(pdays)
I_lr = np.zeros(pdays)
R_lr = np.zeros(pdays)
# Find the present Susceptible, Infected and Recovered peoples and set it into zeroth index
S_lr[0] = sus[len(sus)-1]
I_lr[0] = inf[len(inf)-1]
R_lr[0] = rec[len(rec)-1] 
# Store the prediction of Decision Tree Regressor in variables 
beta_lr = lr_pred_beta
gamma_lr = lr_pred_gamma

In [None]:
# Predict the result using Eulars Integration method
for i in range(len(beta_lr)-1):
  S_lr[i+1] = S_lr[i] - (beta_lr[i]*S_lr[i]*I_lr[i]/N)
  I_lr[i+1] = I_lr[i] - ((beta_lr[i]*S_lr[i]*I_lr[i]/N) - gamma_lr[i]*I_lr[i])
  R_lr[i+1] = R_lr[i] + (gamma_lr[i]*I_lr[i])

In [None]:
# Make the empty array to predict the results 
S_pr = np.zeros(pdays)
I_pr = np.zeros(pdays)
R_pr = np.zeros(pdays)
# Find the present Susceptible, Infected and Recovered peoples and set it into zeroth index
S_pr[0] = sus[len(sus)-1]
I_pr[0] = inf[len(inf)-1]
R_pr[0] = rec[len(rec)-1] 
# Store the prediction of Decision Tree Regressor in variables 
beta_pr = poly_pred_beta
gamma_pr = poly_pred_gamma

In [None]:
# Predict the result using Eulars Integration method
for i in range(len(beta_pr)-1):
  S_pr[i+1] = S_pr[i] - (beta_pr[i]*S_pr[i]*I_pr[i]/N)
  I_pr[i+1] = I_pr[i] - ((beta_pr[i]*S_pr[i]*I_pr[i]/N) - gamma_pr[i]*I_pr[i])
  R_pr[i+1] = R_pr[i] + (gamma_pr[i]*I_pr[i])

In [None]:
# Fit the decision tree regressor model to predict beta and gamma
from sklearn.tree import DecisionTreeRegressor
tree_beta = DecisionTreeRegressor().fit(beta_actual,beta_shifted)
tree_gamma = DecisionTreeRegressor().fit(gamma_actual,gamma_shifted)

In [None]:
# Fit the Random Forest Regressor model to preditct beta and gamma
from sklearn.ensemble import RandomForestRegressor
forest_beta = RandomForestRegressor().fit(beta_actual,beta_shifted)
forest_gamma = RandomForestRegressor().fit(gamma_actual,gamma_shifted)


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().



In [None]:
# Predict the value of beta and gamma for Decision tree regressor
prediction_beta = tree_beta.predict((beta_test))
prediction_gamma = tree_gamma.predict((gamma_test))

In [None]:
# Predict the vlaues of beta and gamma for Random Forest regressor
predict_rf_beta = forest_beta.predict((beta_test))
predict_rf_gamma = forest_gamma.predict((gamma_test))

In [None]:
# Make the empty array to predict the redults 
S_tr = np.zeros(pdays)
I_tr = np.zeros(pdays)
R_tr = np.zeros(pdays)
# Find the present Susceptible, Infected and Recovered peoples and set it into zeroth index
S_tr[0] = sus[len(sus)-1]
I_tr[0] = inf[len(inf)-1]
R_tr[0] = rec[len(rec)-1] 
# Store the prediction of Decision Tree Regressor in variables 
beta_tr = prediction_beta
gamma_tr = prediction_gamma

In [None]:
# Predict the result using Eulars Integration method
for i in range(len(beta_tr)-1):
  S_tr[i+1] = S_tr[i] - (beta_tr[i]*S_tr[i]*I_tr[i]/N)
  I_tr[i+1] = I_tr[i] - ((beta_tr[i]*S_tr[i]*I_tr[i]/N) - gamma_tr[i]*I_tr[i])
  R_tr[i+1] = R_tr[i] + (gamma_tr[i]*I_tr[i])

In [None]:
# Make the empty array to predict the results 
S_rf = np.zeros(pdays)
I_rf = np.zeros(pdays)
R_rf = np.zeros(pdays)
# Find the present Susceptible, Infected and Recovered peoples and set it into zeroth index
S_rf[0] = sus[len(sus)-1]
I_rf[0] = inf[len(inf)-1]
R_rf[0] = rec[len(rec)-1] 
# Store the prediction of Decision Tree Regressor in variables 
beta_rf = predict_rf_beta
gamma_rf = predict_rf_gamma

In [None]:
# Predict the result using Eulars Integration method
for i in range(len(beta_tr)-1):
  S_rf[i+1] = S_rf[i] - (beta_rf[i]*S_rf[i]*I_rf[i]/N)
  I_rf[i+1] = I_rf[i] - ((beta_rf[i]*S_rf[i]*I_rf[i]/N) - gamma_rf[i]*I_rf[i])
  R_rf[i+1] = R_rf[i] + (gamma_rf[i]*I_rf[i])

In [None]:
# Changing format of datetime accouring to preffered format to give to graph to automate date
from datetime import datetime 
date = np.array(df['Date_YMD'][-1:])
date_start = df['Date_YMD'][-1:].apply(lambda x: datetime.strptime(str(x), '%Y-%M-%d').strftime('%d/%m/%Y'))
date_start = np.array(date_start)[0]

In [None]:
# Create a range of dates
date = pd.date_range(start=date_start, periods=29, freq='D')
# Plot the graph of acutal future predictions for LR and PR
fig = go.Figure()
fig.update_layout( title="<b>Future Predictions for LR and PR<b>", xaxis_title="<b>Date<b>", yaxis_title="<b>Active Cases<b>")
fig.add_trace(go.Scatter(x = date, y = I_lr, mode='lines+markers', name='Linear regresssion predictions',line=dict(color='red', width=2)))
fig.add_trace(go.Scatter(x = date, y = I_pr, mode='lines+markers', name='Polynomial Regression Predictions',line=dict(color='blue', width=2)))
fig.show()

In [None]:
# Plot the graph of acutal future predictions for LR and PR
fig = go.Figure()
fig.update_layout( title="<b>Future Predictions for Decision tree and Random forest<b>", xaxis_title="<b>Date<b>", yaxis_title="<b>Active Cases<b>")
fig.add_trace(go.Scatter(x = date, y = I_tr, mode='lines+markers', name='Decision tree predictions',line=dict(color='green', width=2)))
fig.add_trace(go.Scatter(x = date, y = I_rf, mode='lines+markers', name='Random forest Predictions',line=dict(color='blue', width=2)))
fig.show()