In [None]:
#Import the necessary libraries
import pandas as pd
import numpy as np
import time
import pickle
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

In [None]:
#Load the testing dataset
df_test1 = pd.read_csv("Dataset2_2Train_new2.csv")
df_test2 = pd.read_csv("Dataset2_2Test_new2.csv")
df_test = pd.concat([df_test1, df_test2], axis=0, ignore_index=True)
df_test = df_test.sample(frac=1).reset_index(drop=True)
X_test1 = df_test['final_cleaned_text'].values
y_test = df_test['label'].values

In [None]:
# Load the tokenizer file
with open("tfidf_vect_fit2.pkl", "rb") as file:
  tfidf_vect_fit = pickle.load(file)
# Load the stored model files
with open("tfidf_mlp_model2.pkl", "rb") as file:
  tfidf_mlp_best_model = pickle.load(file)
with open("tfidf_knn_model2.pkl", "rb") as file:
  tfidf_knn_best_model = pickle.load(file)
with open("tfidf_rf_model2.pkl", "rb") as file:
  tfidf_rf_best_model = pickle.load(file)
with open("tfidf_lr_model2.pkl", "rb") as file:
  tfidf_lr_best_model = pickle.load(file)
with open("tfidf_svc_model2.pkl", "rb") as file:
  tfidf_svc_best_model = pickle.load(file)

In [None]:
#Testing of TFIDF features of testing data using MLP
def check_best_model_MLP(X_test, y_test, tfidf_mlp_best_model, tfidf_vect_fit):
    start_time = time.time()
    X_test = pd.Series(X_test)
    X_test = X_test.fillna('')
    X_test_feature = tfidf_vect_fit.transform(X_test).toarray()
    y_pred = tfidf_mlp_best_model.predict(X_test_feature)
    print(classification_report(y_test, y_pred))
    print(confusion_matrix(y_test, y_pred))
    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
    p = tp/(tp+fp)
    r = tp/(tp+fn)
    fscore = (2*p*r)/(p+r)
    print("Precision:",p)
    print("Recall:",r)
    print("FScore:",fscore)
    accuracy = accuracy_score(y_test, y_pred)
    print("Testing Accuracy:",accuracy)
    end_time = time.time()
    print(f"The total testing time is {end_time-start_time} seconds")
check_best_model_MLP(X_test1, y_test, tfidf_mlp_best_model, tfidf_vect_fit)

In [None]:
#Testing of TFIDF features of testing data using KNN
def check_best_model_KNN(X_test, y_test, tfidf_knn_best_model, tfidf_vect_fit):
    start_time = time.time()
    X_test = pd.Series(X_test)
    X_test = X_test.fillna('')
    X_test_feature = tfidf_vect_fit.transform(X_test).toarray()
    y_pred = tfidf_knn_best_model.predict(X_test_feature)
    print(classification_report(y_test, y_pred))
    print(confusion_matrix(y_test, y_pred))
    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
    p = tp/(tp+fp)
    r = tp/(tp+fn)
    fscore = (2*p*r)/(p+r)
    print("Precision:",p)
    print("Recall:",r)
    print("FScore:",fscore)
    accuracy = accuracy_score(y_test, y_pred)
    print("Testing Accuracy:",accuracy)
    end_time = time.time()
    print(f"The total testing time is {end_time-start_time} seconds")
check_best_model_KNN(X_test1, y_test, tfidf_knn_best_model, tfidf_vect_fit)

In [None]:
#Testing of TFIDF features of testing data using RF
def check_best_model_RF(X_test, y_test, tfidf_rf_best_model, tfidf_vect_fit):
    start_time = time.time()
    X_test = pd.Series(X_test)
    X_test = X_test.fillna('')
    X_test_feature = tfidf_vect_fit.transform(X_test).toarray()
    y_pred = tfidf_rf_best_model.predict(X_test_feature)
    print(classification_report(y_test, y_pred))
    print(confusion_matrix(y_test, y_pred))
    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
    p = tp/(tp+fp)
    r = tp/(tp+fn)
    fscore = (2*p*r)/(p+r)
    print("Precision:",p)
    print("Recall:",r)
    print("FScore:",fscore)
    accuracy = accuracy_score(y_test, y_pred)
    print("Testing Accuracy:",accuracy)
    end_time = time.time()
    print(f"The total testing time is {end_time-start_time} seconds")
check_best_model_RF(X_test1, y_test, tfidf_rf_best_model, tfidf_vect_fit)

In [None]:
#Testing of TFIDF features of testing data using LR
def check_best_model_LR(X_test, y_test, tfidf_lr_best_model, tfidf_vect_fit):
    start_time = time.time()
    X_test = pd.Series(X_test)
    X_test = X_test.fillna('')
    X_test_feature = tfidf_vect_fit.transform(X_test).toarray()
    y_pred = tfidf_lr_best_model.predict(X_test_feature)
    print(classification_report(y_test, y_pred))
    print(confusion_matrix(y_test, y_pred))
    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
    p = tp/(tp+fp)
    r = tp/(tp+fn)
    fscore = (2*p*r)/(p+r)
    print("Precision:",p)
    print("Recall:",r)
    print("FScore:",fscore)
    accuracy = accuracy_score(y_test, y_pred)
    print("Testing Accuracy:",accuracy)
    end_time = time.time()
    print(f"The total testing time is {end_time-start_time} seconds")
check_best_model_LR(X_test1, y_test, tfidf_lr_best_model, tfidf_vect_fit)

In [None]:
#Testing of TFIDF features of testing data using SVC
def check_best_model_SVC(X_test, y_test, tfidf_svc_best_model, tfidf_vect_fit):
    start_time = time.time()
    X_test = pd.Series(X_test)
    X_test = X_test.fillna('')
    X_test_feature = tfidf_vect_fit.transform(X_test).toarray()
    y_pred = tfidf_svc_best_model.predict(X_test_feature)
    print(classification_report(y_test, y_pred))
    print(confusion_matrix(y_test, y_pred))
    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
    p = tp/(tp+fp)
    r = tp/(tp+fn)
    fscore = (2*p*r)/(p+r)
    print("Precision:",p)
    print("Recall:",r)
    print("FScore:",fscore)
    accuracy = accuracy_score(y_test, y_pred)
    print("Testing Accuracy:",accuracy)
    end_time = time.time()
    print(f"The total testing time is {end_time-start_time} seconds")
check_best_model_SVC(X_test1, y_test, tfidf_svc_best_model, tfidf_vect_fit)

In [None]:
all_params = tfidf_svc_best_model.get_params()
print("All parameters:", all_params)

In [None]:
all_params = tfidf_mlp_best_model.get_params()
print("All parameters:", all_params)

In [None]:
all_params = tfidf_knn_best_model.get_params()
print("All parameters:", all_params)

In [None]:
all_params = tfidf_lr_best_model.get_params()
print("All parameters:", all_params)

In [None]:
all_params = tfidf_rf_best_model.get_params()
print("All parameters:", all_params)