# Import the libraries

In [409]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import make_column_transformer
import datetime as dt
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
from sklearn import linear_model
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn.linear_model import SGDRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel
from sklearn import tree
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.ensemble import GradientBoostingRegressor
from keras.models import Sequential
from keras.layers import Dense, SimpleRNN
from keras.layers import LSTM
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import VotingRegressor
from sklearn.neighbors import LocalOutlierFactor
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import BaggingRegressor
from sklearn.model_selection import cross_val_score

# Read the data
Please change the path if necessary

In [410]:
cad = pd.read_csv(r'C:\Users\hiten\OneDrive\Desktop\clean_data\cleaned-canada-covid.csv')

Select the features and split the data

In [411]:
X = cad.iloc[:, [2, 4, 5, 10]]
y = cad.iloc[:, 3]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 42)

In [412]:
scorestrain = []
scorestest = []

# Make predictions using various models

## Lasso

In [413]:
model = make_pipeline(
    MinMaxScaler(),
    linear_model.Lasso(alpha=0.1)
)
model.fit(X_train, y_train)
scorestrain.append(model.score(X_train, y_train))
scorestest.append(model.score(X_test, y_test))

## SVR

In [414]:
model = make_pipeline(
    MinMaxScaler(),
    svm.SVR()
)
model.fit(X_train, y_train)
scorestrain.append(model.score(X_train, y_train))
scorestest.append(model.score(X_test, y_test))

## SGD

In [415]:
model = make_pipeline(
    MinMaxScaler(),
    SGDRegressor(max_iter=1000, tol=1e-3)
)
model.fit(X_train, y_train)
scorestrain.append(model.score(X_train, y_train))
scorestest.append(model.score(X_test, y_test))

## KNeighbour

In [416]:
model = make_pipeline(
    MinMaxScaler(),
    KNeighborsRegressor()
)
model.fit(X_train, y_train)
scorestrain.append(model.score(X_train, y_train))
scorestest.append(model.score(X_test, y_test))

## Gausian Process

In [417]:
model = make_pipeline(
    MinMaxScaler(),
    GaussianProcessRegressor()
)
model.fit(X_train, y_train)
scorestrain.append(model.score(X_train, y_train))
scorestest.append(model.score(X_test, y_test))

## Decision Tree

In [418]:
model = make_pipeline(
    MinMaxScaler(),
    tree.DecisionTreeRegressor()
)
model.fit(X_train, y_train)
scorestrain.append(model.score(X_train, y_train))
scorestest.append(model.score(X_test, y_test))

## Ensemble Model(Bagging)

In [419]:
model = make_pipeline(
    MinMaxScaler(),
    BaggingRegressor(svm.SVR())
)
model.fit(X_train, y_train)
scorestrain.append(model.score(X_train, y_train))
scorestest.append(model.score(X_test, y_test))

## Ensemble Model(Random Forest)

In [420]:
model = make_pipeline(
    MinMaxScaler(),
    RandomForestRegressor()
)
model.fit(X_train, y_train)
scorestrain.append(model.score(X_train, y_train))
scorestest.append(model.score(X_test, y_test))

## Extra Trees

In [421]:
model = make_pipeline(
    MinMaxScaler(),
    ExtraTreesRegressor()
)
model.fit(X_train, y_train)
scorestrain.append(model.score(X_train, y_train))
scorestest.append(model.score(X_test, y_test))

## Ada Boost

In [422]:
model = make_pipeline(
    MinMaxScaler(),
    AdaBoostRegressor(
    DecisionTreeRegressor(), n_estimators=300
)
)
model.fit(X_train, y_train)
scorestrain.append(model.score(X_train, y_train))
scorestest.append(model.score(X_test, y_test))

## Gradient Boosting

In [423]:
model = make_pipeline(
    MinMaxScaler(),
    GradientBoostingRegressor()
)
model.fit(X_train, y_train)
scorestrain.append(model.score(X_train, y_train))
scorestest.append(model.score(X_test, y_test))

## Neural Network(MLP)

In [424]:
model = make_pipeline(
    MinMaxScaler(),
    MLPRegressor()
)
model.fit(X_train, y_train)
scorestrain.append(model.score(X_train, y_train))
scorestest.append(model.score(X_test, y_test))



## Voting

In [425]:
model = make_pipeline(
    MinMaxScaler(),
    VotingRegressor([('gp', GaussianProcessRegressor()), ('gb', GradientBoostingRegressor()), ('kn', KNeighborsRegressor())])
)
model.fit(X_train, y_train)
#print(model.score(X_test, y_test))
scorestrain.append(model.score(X_train, y_train))
scorestest.append(model.score(X_test, y_test))

# Results

In [427]:
graph = [['Lasso', scorestrain[0], scorestest[0]], 
         ['SVR', scorestrain[1], scorestest[1]], 
         ['SGD', scorestrain[2], scorestest[2]], 
         ['KNeighbour', scorestrain[3], scorestest[3]],
         ['Gausian Process', scorestrain[4], scorestest[4]],
         ['Decision Tree', scorestrain[5], scorestest[5]],
         ['Bagging', scorestrain[6], scorestest[6]],
         ['Random Forest', scorestrain[7], scorestest[7]],
         ['Extra Trees', scorestrain[8], scorestest[8]],
         ['AdaBoost', scorestrain[9], scorestest[9]],
         ['Gradient Boosting', scorestrain[10], scorestest[10]],
         ['MLP', scorestrain[11], scorestest[11]],
         ['Voting', scorestrain[12], scorestest[12]]]
df = pd.DataFrame(graph, columns=['Algorithm', 'Train', 'Test'])
df

Unnamed: 0,Algorithm,Train,Test
0,Lasso,0.547093,0.668016
1,SVR,-0.105804,-0.087252
2,SGD,0.507794,0.616373
3,KNeighbour,0.906239,0.899622
4,Gausian Process,0.930246,0.894475
5,Decision Tree,1.0,0.775734
6,Bagging,-0.073918,-0.056516
7,Random Forest,0.975829,0.884081
8,Extra Trees,1.0,0.886729
9,AdaBoost,0.997318,0.835712


In [429]:
df.sort_values(by=['Test'], ascending=False)

Unnamed: 0,Algorithm,Train,Test
12,Voting,0.934634,0.920371
10,Gradient Boosting,0.938022,0.907564
3,KNeighbour,0.906239,0.899622
4,Gausian Process,0.930246,0.894475
8,Extra Trees,1.0,0.886729
7,Random Forest,0.975829,0.884081
9,AdaBoost,0.997318,0.835712
5,Decision Tree,1.0,0.775734
0,Lasso,0.547093,0.668016
2,SGD,0.507794,0.616373
