In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [2]:
!pip install --upgrade plotly
!pip install pmdarima
print("Done")

Requirement already up-to-date: plotly in /usr/local/lib/python3.7/dist-packages (5.1.0)
Done


In [3]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt
from pandas.plotting import lag_plot, autocorrelation_plot
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.tsa.api import VAR
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from pprint import pprint
from math import sqrt
import itertools

from statsmodels.tsa.arima_model import ARIMA
from statsmodels.tsa.ar_model import AR
from pmdarima import auto_arima

import warnings
warnings.filterwarnings("ignore")

In [4]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline

from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import StackingRegressor
from sklearn.datasets import make_regression
from sklearn.linear_model import RidgeCV, Lasso, LassoCV
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import LinearSVR
from sklearn import linear_model
import xgboost as xgb

from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

SEED = 42

In [5]:
df = pd.read_csv("/content/gdrive/MyDrive/owid-covid-data.csv")

In [6]:
dataset_UK = df[df["location"] == "United Kingdom"]  

In [7]:
dataset_UK.columns

Index(['iso_code', 'continent', 'location', 'date', 'total_cases', 'new_cases',
       'new_cases_smoothed', 'total_deaths', 'new_deaths',
       'new_deaths_smoothed', 'total_cases_per_million',
       'new_cases_per_million', 'new_cases_smoothed_per_million',
       'total_deaths_per_million', 'new_deaths_per_million',
       'new_deaths_smoothed_per_million', 'reproduction_rate', 'icu_patients',
       'icu_patients_per_million', 'hosp_patients',
       'hosp_patients_per_million', 'weekly_icu_admissions',
       'weekly_icu_admissions_per_million', 'weekly_hosp_admissions',
       'weekly_hosp_admissions_per_million', 'new_tests', 'total_tests',
       'total_tests_per_thousand', 'new_tests_per_thousand',
       'new_tests_smoothed', 'new_tests_smoothed_per_thousand',
       'positive_rate', 'tests_per_case', 'tests_units', 'total_vaccinations',
       'people_vaccinated', 'people_fully_vaccinated', 'new_vaccinations',
       'new_vaccinations_smoothed', 'total_vaccinations_per_hun

In [8]:
features = ["date", "population", "new_deaths", "reproduction_rate", "icu_patients"]

In [9]:
dataset_UK = dataset_UK[dataset_UK["date"] >= "2020-05-01"]

In [10]:
dataset_UK = dataset_UK[features]

In [11]:
dataset_UK.head()

Unnamed: 0,date,population,new_deaths,reproduction_rate,icu_patients
90078,2020-05-01,67886004.0,700.0,0.94,2178.0
90079,2020-05-02,67886004.0,585.0,0.9,2068.0
90080,2020-05-03,67886004.0,253.0,0.87,2009.0
90081,2020-05-04,67886004.0,273.0,0.82,1936.0
90082,2020-05-05,67886004.0,725.0,0.81,1874.0


In [12]:
dataset_UK.tail()

Unnamed: 0,date,population,new_deaths,reproduction_rate,icu_patients
90482,2021-06-09,67886004.0,6.0,1.48,159.0
90483,2021-06-10,67886004.0,7.0,,158.0
90484,2021-06-11,67886004.0,17.0,,
90485,2021-06-12,67886004.0,12.0,,
90486,2021-06-13,67886004.0,8.0,,


In [13]:
dataset_UK.isna().sum()

date                 0
population           0
new_deaths           0
reproduction_rate    4
icu_patients         3
dtype: int64

In [14]:
dataset_UK = dataset_UK[dataset_UK["date"] < "2021-06-10"]

In [15]:
dataset_UK.isna().sum()

date                 0
population           0
new_deaths           0
reproduction_rate    0
icu_patients         0
dtype: int64

In [16]:
dataset_UK["Mortality_Rate"] = " "
dataset_UK["Mortality_Rate"] = dataset_UK["new_deaths"]/dataset_UK["population"]

In [17]:
dataset_UK.head()

Unnamed: 0,date,population,new_deaths,reproduction_rate,icu_patients,Mortality_Rate
90078,2020-05-01,67886004.0,700.0,0.94,2178.0,1e-05
90079,2020-05-02,67886004.0,585.0,0.9,2068.0,9e-06
90080,2020-05-03,67886004.0,253.0,0.87,2009.0,4e-06
90081,2020-05-04,67886004.0,273.0,0.82,1936.0,4e-06
90082,2020-05-05,67886004.0,725.0,0.81,1874.0,1.1e-05


In [18]:
len(dataset_UK)

405

In [19]:
dataset_UK.drop(["population", "new_deaths"], axis = 1, inplace = True)

In [20]:
dataset_UK.tail()

Unnamed: 0,date,reproduction_rate,icu_patients,Mortality_Rate
90478,2021-06-05,1.48,135.0,1.914975e-07
90479,2021-06-06,1.48,145.0,5.892231e-08
90480,2021-06-07,1.48,148.0,1.473058e-08
90481,2021-06-08,1.48,154.0,2.062281e-07
90482,2021-06-09,1.48,159.0,8.838346e-08


In [21]:
dataset_UK.set_index(["date"], inplace = True)

In [22]:
dataset_UK.tail()

Unnamed: 0_level_0,reproduction_rate,icu_patients,Mortality_Rate
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2021-06-05,1.48,135.0,1.914975e-07
2021-06-06,1.48,145.0,5.892231e-08
2021-06-07,1.48,148.0,1.473058e-08
2021-06-08,1.48,154.0,2.062281e-07
2021-06-09,1.48,159.0,8.838346e-08


In [23]:
train = dataset_UK[ : 350]
test = dataset_UK[350 : ]

In [24]:
X_train = train.iloc[ : ,  : -1]
Y_train = train.iloc[ : , -1 : ]

X_test = test.iloc[ : ,  : -1]
Y_test = test.iloc[ : , -1 : ]

In [25]:
estimators = [
              ("rf", RandomForestRegressor(random_state = SEED)),
              ("svr", LinearSVR(random_state = SEED)),
              ("rf1", RandomForestRegressor(random_state = SEED)),
              ("xgb", xgb.XGBRegressor(random_state = SEED)),
              ("knn", KNeighborsRegressor(n_neighbors = 7))
]

reg = StackingRegressor(
    estimators = estimators, 
    final_estimator = RandomForestRegressor(random_state = SEED)
)

reg.fit(X_train, Y_train)
predict = reg.predict(X_test)

r2 = r2_score(Y_test, predict)

print("------------------------------------------------")
print("------------------------------------------------")
print("------------------------------------------------")
print("------------------------------------------------")
print(f"The r2 score is {r2}")
print("------------------------------------------------")
print("------------------------------------------------")
print("------------------------------------------------")
print("------------------------------------------------")

------------------------------------------------
------------------------------------------------
------------------------------------------------
------------------------------------------------
The r2 score is -810.2436622602492
------------------------------------------------
------------------------------------------------
------------------------------------------------
------------------------------------------------


In [26]:
regressor = KNeighborsRegressor(n_neighbors = SEED)
regressor.fit(X_train, Y_train)

predict = regressor.predict(X_test)


r2 = r2_score(Y_test, predict)
mae = mean_absolute_error(Y_test, predict)
mse = mean_squared_error(Y_test, predict)
print("------------------------------------------------")
print("------------------------------------------------")
print("------------------------------------------------")
print("------------------------------------------------")
print(f"The r2 score is : {r2}")
print(f"The Mean absolute error is : {mae}")
print(f"The Mean squared error score is : {mse}")
print("------------------------------------------------")
print("------------------------------------------------")
print("------------------------------------------------")
print("------------------------------------------------")

------------------------------------------------
------------------------------------------------
------------------------------------------------
------------------------------------------------
The r2 score is : -1.7847472227485475
The Mean absolute error is : 1.8931023177494953e-07
The Mean squared error score is : 5.692927153211653e-14
------------------------------------------------
------------------------------------------------
------------------------------------------------
------------------------------------------------


In [27]:
estimators = [
              ("knn", KNeighborsRegressor(n_neighbors = 44)),
              ("xgb", xgb.XGBRegressor(random_state = SEED)),

]

reg = StackingRegressor(
    estimators = estimators ,
    final_estimator = KNeighborsRegressor(n_neighbors = 10)
)

reg.fit(X_train, Y_train)
predict = reg.predict(X_test)

r2 = r2_score(Y_test, predict)
mae = mean_absolute_error(Y_test, predict)
mse = mean_squared_error(Y_test, predict)
print("------------------------------------------------")
print("------------------------------------------------")
print("------------------------------------------------")
print("------------------------------------------------")
print(f"The r2 score is : {r2}")
print(f"The Mean absolute error is : {mae}")
print(f"The Mean squared error score is : {mse}")
print("------------------------------------------------")
print("------------------------------------------------")
print("------------------------------------------------")
print("------------------------------------------------")

------------------------------------------------
------------------------------------------------
------------------------------------------------
------------------------------------------------
The r2 score is : -4.244580412219203
The Mean absolute error is : 3.009055110575183e-07
The Mean squared error score is : 1.0721624566863117e-13
------------------------------------------------
------------------------------------------------
------------------------------------------------
------------------------------------------------


In [28]:
lasso = linear_model.Lasso(alpha = 5)
lasso.fit(X_train, Y_train)

predict = lasso.predict(X_test)

r2 = r2_score(Y_test, predict)
mae = mean_absolute_error(Y_test, predict)
mse = mean_squared_error(Y_test, predict)
print("------------------------------------------------")
print("------------------------------------------------")
print("------------------------------------------------")
print("------------------------------------------------")
print(f"The r2 score is : {r2}")
print(f"The Mean absolute error is : {mae}")
print(f"The Mean squared error score is : {mse}")
print("------------------------------------------------")
print("------------------------------------------------")
print("------------------------------------------------")
print("------------------------------------------------")

------------------------------------------------
------------------------------------------------
------------------------------------------------
------------------------------------------------
The r2 score is : -803.8470169124932
The Mean absolute error is : 4.053793535348526e-06
The Mean squared error score is : 1.6453685272877863e-11
------------------------------------------------
------------------------------------------------
------------------------------------------------
------------------------------------------------


In [29]:
estimators = [
              ("knn", KNeighborsRegressor(n_neighbors = 44)),
              ("xgb", xgb.XGBRegressor(random_state = SEED)),
              ("lasso", linear_model.Lasso(alpha = 5))

]

reg = StackingRegressor(
    estimators = estimators ,
    final_estimator = KNeighborsRegressor(n_neighbors = 10)
)

reg.fit(X_train, Y_train)
predict = reg.predict(X_test)

r2 = r2_score(Y_test, predict)
mae = mean_absolute_error(Y_test, predict)
mse = mean_squared_error(Y_test, predict)
print("------------------------------------------------")
print("------------------------------------------------")
print("------------------------------------------------")
print("------------------------------------------------")
print(f"The r2 score is : {r2}")
print(f"The Mean absolute error is : {mae}")
print(f"The Mean squared error score is : {mse}")
print("------------------------------------------------")
print("------------------------------------------------")
print("------------------------------------------------")
print("------------------------------------------------")

------------------------------------------------
------------------------------------------------
------------------------------------------------
------------------------------------------------
The r2 score is : -3.3151997936798656
The Mean absolute error is : 2.718728831993652e-07
The Mean squared error score is : 8.821668938671803e-14
------------------------------------------------
------------------------------------------------
------------------------------------------------
------------------------------------------------


In [30]:
estimators = [
              ("knn", KNeighborsRegressor(n_neighbors = 44)),
              #("xgb", xgb.XGBRegressor(random_state = SEED)),
              ("lasso", linear_model.Lasso(alpha = 5))

]

reg = StackingRegressor(
    estimators = estimators ,
    final_estimator = KNeighborsRegressor(n_neighbors = 10)
)

reg.fit(X_train, Y_train)
predict = reg.predict(X_test)

r2 = r2_score(Y_test, predict)
mae = mean_absolute_error(Y_test, predict)
mse = mean_squared_error(Y_test, predict)
print("------------------------------------------------")
print("------------------------------------------------")
print("------------------------------------------------")
print("------------------------------------------------")
print(f"The r2 score is : {r2}")
print(f"The Mean absolute error is : {mae}")
print(f"The Mean squared error score is : {mse}")
print("------------------------------------------------")
print("------------------------------------------------")
print("------------------------------------------------")
print("------------------------------------------------")

------------------------------------------------
------------------------------------------------
------------------------------------------------
------------------------------------------------
The r2 score is : -4.1431891548593995
The Mean absolute error is : 3.041194551008563e-07
The Mean squared error score is : 1.051434792882335e-13
------------------------------------------------
------------------------------------------------
------------------------------------------------
------------------------------------------------


In [31]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)

Y_train = sc.fit_transform(Y_train)
Y_test = sc.fit_transform(Y_test) 

regressor = KNeighborsRegressor(n_neighbors = SEED)
regressor.fit(X_train, Y_train)

predict = regressor.predict(X_test)

r2 = r2_score(Y_test, predict)
mae = mean_absolute_error(Y_test, predict)
mse = mean_squared_error(Y_test, predict)
print("------------------------------------------------")
print("------------------------------------------------")
print("------------------------------------------------")
print("------------------------------------------------")
print(f"The r2 score is : {r2}")
print(f"The Mean absolute error is : {mae}")
print(f"The Mean squared error score is : {mse}")
print("------------------------------------------------")
print("------------------------------------------------")
print("------------------------------------------------")
print("------------------------------------------------")

------------------------------------------------
------------------------------------------------
------------------------------------------------
------------------------------------------------
The r2 score is : 0.2649956184757376
The Mean absolute error is : 0.6330872729493936
The Mean squared error score is : 0.7350043815242621
------------------------------------------------
------------------------------------------------
------------------------------------------------
------------------------------------------------


In [32]:
estimators = [
              ("knn", KNeighborsRegressor(n_neighbors = 44)),
              #("xgb", xgb.XGBRegressor(random_state = SEED)),
              ("lasso", linear_model.Lasso(alpha = 5))

]

reg = StackingRegressor(
    estimators = estimators ,
    final_estimator = KNeighborsRegressor(n_neighbors = 10)
)

reg.fit(X_train, Y_train)
predict = reg.predict(X_test)

r2 = r2_score(Y_test, predict)
mae = mean_absolute_error(Y_test, predict)
mse = mean_squared_error(Y_test, predict)
print("------------------------------------------------")
print("------------------------------------------------")
print("------------------------------------------------")
print("------------------------------------------------")
print(f"The r2 score is : {r2}")
print(f"The Mean absolute error is : {mae}")
print(f"The Mean squared error score is : {mse}")
print("------------------------------------------------")
print("------------------------------------------------")
print("------------------------------------------------")
print("------------------------------------------------")

------------------------------------------------
------------------------------------------------
------------------------------------------------
------------------------------------------------
The r2 score is : 0.2683285210842543
The Mean absolute error is : 0.6372232312193429
The Mean squared error score is : 0.7316714789157454
------------------------------------------------
------------------------------------------------
------------------------------------------------
------------------------------------------------


In [33]:
lasso = linear_model.Lasso(alpha = 5)
lasso.fit(X_train, Y_train)

predict = lasso.predict(X_test)

r2 = r2_score(Y_test, predict)
mae = mean_absolute_error(Y_test, predict)
mse = mean_squared_error(Y_test, predict)
print("------------------------------------------------")
print("------------------------------------------------")
print("------------------------------------------------")
print("------------------------------------------------")
print(f"The r2 score is : {r2}")
print(f"The Mean absolute error is : {mae}")
print(f"The Mean squared error score is : {mse}")
print("------------------------------------------------")
print("------------------------------------------------")
print("------------------------------------------------")
print("------------------------------------------------")

------------------------------------------------
------------------------------------------------
------------------------------------------------
------------------------------------------------
The r2 score is : -2.220446049250313e-16
The Mean absolute error is : 0.7795200620804997
The Mean squared error score is : 0.9999999999999997
------------------------------------------------
------------------------------------------------
------------------------------------------------
------------------------------------------------
