# Testing Results and Model Comparison

**Importing necessary libraries**

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, roc_auc_score
import matplotlib.pyplot as plt
import joblib
from tensorflow import keras
from preprocess_script import *
from sklearn.metrics import accuracy_score, roc_auc_score, roc_curve, f1_score,  precision_score, recall_score

**Loading trained models**

In [2]:
rfc = joblib.load('rfc.pkl')
svc = joblib.load('svc.pkl')
nb = joblib.load('nb.pkl')
ann = keras.models.load_model('ann5')

**Reading test dataset**

In [3]:
test = pd.read_csv('test.csv')

In [4]:
test = preprocess(test)

In [5]:
label = "is_bad_review"
ignore_cols = [label, "review", "review_clean"]
features = [c for c in test.columns if c not in ignore_cols]

X_test = test[features]
y_test = test[label]

**Test results for Random Forest**

In [6]:
y_rfc = rfc.predict(X_test)

In [7]:
print(f'Accuracy : {accuracy_score(y_test, y_rfc)}')
print(f'AUC Score : {roc_auc_score(y_test, y_rfc)}')
print(f'F1 Score : {f1_score(y_test, y_rfc)}')
print(f'Precision : {precision_score(y_test, y_rfc)}')
print(f'Recall : {recall_score(y_test, y_rfc)}')

Accuracy : 0.8424545454545455
AUC Score : 0.832262629728107
F1 Score : 0.7999999999999999
Precision : 0.8223013048635824
Recall : 0.778876404494382


**Test results for Support Vector Classifier**

In [8]:
y_svc = svc.predict(X_test)

In [9]:
print(f'Accuracy : {accuracy_score(y_test, y_svc)}')
print(f'AUC Score : {roc_auc_score(y_test, y_svc)}')
print(f'F1 Score : {f1_score(y_test, y_svc)}')
print(f'Precision : {precision_score(y_test, y_svc)}')
print(f'Recall : {recall_score(y_test, y_svc)}')

Accuracy : 0.8567272727272728
AUC Score : 0.851271978728879
F1 Score : 0.8228815464149247
Precision : 0.82306654676259
Recall : 0.8226966292134832


**Test Results for Multinomial Naive Bayes**

In [10]:
y_nb = nb.predict(X_test.drop('compound', axis = 1))

In [11]:
print(f'Accuracy : {accuracy_score(y_test, y_nb)}')
print(f'AUC Score : {roc_auc_score(y_test, y_nb)}')
print(f'F1 Score : {f1_score(y_test, y_nb)}')
print(f'Precision : {precision_score(y_test, y_nb)}')
print(f'Recall : {recall_score(y_test, y_nb)}')

Accuracy : 0.8181818181818182
AUC Score : 0.8056128312891329
F1 Score : 0.7670083876980429
Precision : 0.7963231736816643
Recall : 0.7397752808988765


**Test results for ANN**

In [12]:
y_ann = ann.predict(X_test)
        
for i in range(len(y_ann)):
    if y_ann[i]<0.5:
        y_ann[i] = 0
    else:
        y_ann[i] = 1

In [13]:
print(f'Accuracy : {accuracy_score(y_test, y_ann)}')
print(f'AUC Score : {roc_auc_score(y_test, y_ann)}')
print(f'F1 Score : {f1_score(y_test, y_ann)}')
print(f'Precision : {precision_score(y_test, y_ann)}')
print(f'Recall : {recall_score(y_test, y_ann)}')

Accuracy : 0.8572727272727273
AUC Score : 0.8531709409040226
F1 Score : 0.8250111457868925
Precision : 0.8184431667403803
Recall : 0.8316853932584269


**Table of all the computed results**

In [14]:
print('\t \t Accuracy \t ROC-AUC \t F1 Score \t Precision \t Recall')
print(f'Random Forest \t {round(accuracy_score(y_test, y_rfc), 4)} \t {round(roc_auc_score(y_test, y_rfc), 4)} \t {round(f1_score(y_test, y_rfc), 4)} \t \t {round(precision_score(y_test, y_rfc), 4)} \t {round(recall_score(y_test, y_rfc),4)}')

print(f'SVC \t \t {round(accuracy_score(y_test, y_svc), 4)} \t {round(roc_auc_score(y_test, y_svc), 4)} \t {round(f1_score(y_test, y_svc), 4)} \t {round(precision_score(y_test, y_svc), 4)} \t {round(recall_score(y_test, y_svc),4)}')

print(f'Naive Bayes \t {round(accuracy_score(y_test, y_nb), 4)} \t {round(roc_auc_score(y_test, y_nb), 4)} \t {round(f1_score(y_test, y_nb), 4)} \t \t {round(precision_score(y_test, y_nb), 4)} \t {round(recall_score(y_test, y_nb),4)}')

print(f'ANN \t\t {round(accuracy_score(y_test, y_ann), 4)} \t {round(roc_auc_score(y_test, y_ann), 4)} \t {round(f1_score(y_test, y_ann), 4)} \t \t {round(precision_score(y_test, y_ann), 4)} \t {round(recall_score(y_test, y_ann),4)}')

	 	 Accuracy 	 ROC-AUC 	 F1 Score 	 Precision 	 Recall
Random Forest 	 0.8425 	 0.8323 	 0.8 	 	 0.8223 	 0.7789
SVC 	 	 0.8567 	 0.8513 	 0.8229 	 0.8231 	 0.8227
Naive Bayes 	 0.8182 	 0.8056 	 0.767 	 	 0.7963 	 0.7398
ANN 		 0.8573 	 0.8532 	 0.825 	 	 0.8184 	 0.8317
