In [1]:
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

In [2]:
import pickle
#Load the trained Best Gradient Boost Regressor model
gb= pickle.load(open('gradient_model.sav', 'rb'))

#Load the trained Best KNN model
knn= pickle.load(open('knn_model.sav', 'rb'))

#Load the trained Best Random Forest Regressor model
rf= pickle.load(open('randomforest_model.sav', 'rb'))

#Load the trained Best SVM model
svm=pickle.load(open('svm_model.sav', 'rb'))

#Load the trained Best Linear Regression model
lr=pickle.load(open('linear_model.sav', 'rb'))

data=pd.read_csv("Final.csv")

columns = [x for x in data.columns if x not in ['Facebook_scaled','LinkedIn_scaled','GooglePlus_scaled']]
x = data[columns]
y = data[['Facebook_scaled']]

#Extracts test set from the dataset
# keeping the random_state value same makes the function to split same rows into training and testing every time
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.3, random_state=30)

In [3]:
#Load dataset from variable selection using LASSO
x1 = pd.read_csv("variable_selection.csv")
y1 = pd.read_csv("variable_labels.csv")
#Extract test set
# keeping the random_state value same makes the function to split same rows into training and testing every time
x_train1, x_test1, y_train1, y_test1 = train_test_split(x1, y1, test_size = 0.3, random_state=30)

#Load dataset from variable selection using BI-Directional elimination
x2 = pd.read_csv("bi_variable_selection.csv")
y2 = pd.read_csv("bi_variable_labels.csv")
#Extract test set
# keeping the random_state value same makes the function to split same rows into training and testing every time
x_train2, x_test2, y_train2, y_test2 = train_test_split(x2, y2, test_size = 0.3, random_state=30)

In [7]:
from tensorflow import keras
#Loads trained Artificial Neural Network Model trained on LASSO variable selection
model_variable = keras.models.load_model('DL_variable_model/DL_variable_model/')

#Loads trained Artificial Neural Network Model trained on Bi-directional variable selection
model_variable2 = keras.models.load_model('DL_bi_variable_model/DL_bi_variable_model')

In [8]:
#Function to calculate and return evaluation metrics on different models
def metrics(name,x_train,y_test,pred):
    k=len(list(x_train.columns))
    n = len(pred)
    MSE=mean_squared_error(y_test,pred)
    RMSE=np.sqrt(mean_squared_error(y_test,pred))
    R2_Score=r2_score(y_test,pred)
    Adjusted_R2_score=1 - ((1-r2_score(y_test,pred))*(n-1)/(n-k-1))
    return [name,MSE,RMSE,R2_Score,Adjusted_R2_score]
#Creates comparison of different models and their metrics on test data
from tabulate import tabulate
head = ["Model", "MSE","RMSE","R2_Score","Adjusted_R2_score"]
mydata=[]
mydata.append(metrics("SVM",x_train,y_test,svm.predict(x_test)))
mydata.append(metrics("Gradient Boost",x_train,y_test,gb.predict(x_test)))
mydata.append(metrics("KNN",x_train,y_test,knn.predict(x_test)))
mydata.append(metrics("Random Forest",x_train,y_test,rf.predict(x_test)))
mydata.append(metrics("Linear Regression",x_train,y_test,lr.predict(x_test)))
mydata.append(metrics("ANN 1st Variable Selection",x_train1,y_test1,model_variable.predict(x_test1)))
mydata.append(metrics("ANN BI directional Elimination",x_train2,y_test2,model_variable2.predict(x_test2)))
print(tabulate(mydata, headers=head, tablefmt="grid"))

+--------------------------------+-----------+----------+------------+---------------------+
| Model                          |       MSE |     RMSE |   R2_Score |   Adjusted_R2_score |
| SVM                            | 0.118923  | 0.344853 |   0.871885 |            0.866715 |
+--------------------------------+-----------+----------+------------+---------------------+
| Gradient Boost                 | 0.0958367 | 0.309575 |   0.896756 |            0.89259  |
+--------------------------------+-----------+----------+------------+---------------------+
| KNN                            | 0.0865688 | 0.294226 |   0.90674  |            0.902977 |
+--------------------------------+-----------+----------+------------+---------------------+
| Random Forest                  | 0.0705562 | 0.265624 |   0.923991 |            0.920923 |
+--------------------------------+-----------+----------+------------+---------------------+
| Linear Regression              | 0.602376  | 0.776129 |   0.351067 |