In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
import json

In [None]:
from tensorflow.keras.models import load_model
from sklearn.metrics import confusion_matrix, roc_auc_score, f1_score

In [None]:
from google.colab import drive
drive.mount("/content/gdrive")

Mounted at /content/gdrive


In [None]:
X_train = pd.read_pickle('/content/gdrive/My Drive/BT4222/Codes/Cleaned Input Data/Full Text SCAM/X_train_fSC.pkl')
y_train = pd.read_pickle('/content/gdrive/My Drive/BT4222/Codes/Cleaned Input Data/Full Text SCAM/y_train_fSC.pkl')
X_test = pd.read_pickle('/content/gdrive/My Drive/BT4222/Codes/Cleaned Input Data/Full Text SCAM/X_test_fSC.pkl')
y_test = pd.read_pickle('/content/gdrive/My Drive/BT4222/Codes/Cleaned Input Data/Full Text SCAM/y_test_fSC.pkl')

## TF-IDF with top 15 features
X_train_tfidf = pd.read_pickle('/content/gdrive/My Drive/BT4222/Codes/Cleaned Input Data/TFIDF SCAM/X_train_tfSC.pkl')
X_test_tfidf = pd.read_pickle('/content/gdrive/My Drive/BT4222/Codes/Cleaned Input Data/TFIDF SCAM/X_test_tfSC.pkl')

## TF-IDF GloVe with top 15 features
X_train_tfidf_glove = pd.read_pickle('/content/gdrive/My Drive/BT4222/Codes/Cleaned Input Data/TFIDF Glove SCAM/X_train_tfglSC.pkl')
X_test_tfidf_glove = pd.read_pickle('/content/gdrive/My Drive/BT4222/Codes/Cleaned Input Data/TFIDF Glove SCAM/X_test_tfglSC.pkl')

## TF-IDF FastText (cc) with top 15 features
X_train_tfidf_cc = pd.read_pickle('/content/gdrive/My Drive/BT4222/Codes/Cleaned Input Data/TFIDF CC SCAM/X_train_tfccSC.pkl')
X_test_tfidf_cc = pd.read_pickle('/content/gdrive/My Drive/BT4222/Codes/Cleaned Input Data/TFIDF CC SCAM/X_test_tfccSC.pkl')

X_train_infersent = pd.read_pickle('/content/gdrive/My Drive/BT4222/Codes/Cleaned Input Data/InferSent SCAM/X_train_infSC.pkl')
X_test_infersent = pd.read_pickle('/content/gdrive/My Drive/BT4222/Codes/Cleaned Input Data/InferSent SCAM/X_test_infSC.pkl')
y_train_infersent = pd.read_pickle('/content/gdrive/My Drive/BT4222/Codes/Cleaned Input Data/InferSent SCAM/y_train_infSC.pkl')
y_test_infersent = pd.read_pickle('/content/gdrive/My Drive/BT4222/Codes/Cleaned Input Data/InferSent SCAM/y_test_infSC.pkl')

In [None]:
model_tfidf = load_model('/content/gdrive/My Drive/BT4222/Code (Final Submission)/Scam Models/Saved Models/model_Tfidf')
model_tfidf_glove  = load_model('/content/gdrive/My Drive/BT4222/Code (Final Submission)/Scam Models/Saved Models/model_Tfidf_glove')
model_tfidf_cc  = load_model('/content/gdrive/My Drive/BT4222/Code (Final Submission)/Scam Models/Saved Models/model_Tfidf_cc')
model_infersent = load_model('/content/gdrive/My Drive/BT4222/Code (Final Submission)/Scam Models/Saved Models/model_Infersent')

In [None]:
datasets = {'Tfidf' : {'X_train' : X_train_tfidf,
                       'y_train' : y_train,
                       'X_test' : X_test_tfidf,
                       'y_test' : y_test,
                       'Model' : model_tfidf},
            'Tfidf_cc' : {'X_train' : X_train_tfidf_cc,
                          'y_train' : y_train,
                          'X_test' : X_test_tfidf_cc,
                          'y_test' : y_test,
                          'Model' : model_tfidf_cc},
            'Tfidf_glove' : {'X_train' : X_train_tfidf_glove, 
                             'y_train' : y_train, 
                             'X_test' : X_test_tfidf_glove,
                             'y_test' : y_test, 
                             'Model' : model_tfidf_glove},
            'Infersent' : {'X_train' : X_train_infersent,
                           'y_train' : y_train_infersent, 
                           'X_test' : X_test_infersent,
                           'y_test' : y_test_infersent, 
                           'Model' : model_infersent}
            }

results = {'Tfidf' : {'Accuracy' : 0, 'AUC' : 0, 'FPR' : 0, 'Sensitivity' : 0, 'F-Score' : 0 },
           'Tfidf_cc' : {'Accuracy' : 0, 'AUC' : 0, 'FPR' : 0, 'Sensitivity' : 0, 'F-Score' : 0},
           'Tfidf_glove' : {'Accuracy' : 0, 'AUC' : 0, 'FPR' : 0,'Sensitivity' : 0, 'F-Score' : 0},
           'Infersent' : {'Accuracy' : 0, 'AUC' : 0, 'FPR' : 0,'Sensitivity' : 0, 'F-Score' : 0}}

In [None]:
def predict(model, X_test, y_test):
    loss, accuracy = model.evaluate(X_test, y_test)
    y_pred = (model.predict(X_test)> 0.5).astype("int32")
    y_pred_prob = model.predict(X_test)    
    auc = roc_auc_score(y_test, y_pred_prob)
    #print("AUC {}".format(metrics.roc_auc_score(y_test, y_pred_prob)))    
    #print("Recall: {}".format(recall_score(y_test, y_pred_class, average='weighted')))
    #print("Precision: {}".format(precision_score(y_test, y_pred_class, average='weighted')))
    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
    fpr = fp/(fp+tn)
    tpr = tp/(tp + fn)
    #print('False Positive Rate: {}'.format(fp/(fp+tn)))
    f1 = f1_score(y_test, y_pred)
    return (accuracy, auc, fpr, tpr, f1)

for key, value in datasets.items():
    model = value['Model']
    X_test = value['X_test']
    y_test = value['y_test']
    scores =  predict(model, X_test, y_test)
    results[key]['Accuracy'] = scores[0]
    results[key]['AUC'] = scores[1]
    results[key]['FPR'] = scores[2]
    results[key]['Sensitivity'] = scores[3]
    results[key]['F-Score'] = scores[4]



In [None]:
print(json.dumps(results, indent=4, sort_keys=True))

{
    "Infersent": {
        "AUC": 0.8537597746729422,
        "Accuracy": 0.8147100210189819,
        "F-Score": 0.8680765357502518,
        "FPR": 0.34977578475336324,
        "Sensitivity": 0.890495867768595
    },
    "Tfidf": {
        "AUC": 0.9975518478091376,
        "Accuracy": 0.97062748670578,
        "F-Score": 0.9771309771309772,
        "FPR": 0.03018867924528302,
        "Sensitivity": 0.9710743801652892
    },
    "Tfidf_cc": {
        "AUC": 0.9973920162170592,
        "Accuracy": 0.9826435446739197,
        "F-Score": 0.9865841073271415,
        "FPR": 0.026415094339622643,
        "Sensitivity": 0.987603305785124
    },
    "Tfidf_glove": {
        "AUC": 0.9971698113207548,
        "Accuracy": 0.9732977151870728,
        "F-Score": 0.9794661190965094,
        "FPR": 0.04905660377358491,
        "Sensitivity": 0.9855371900826446
    }
}
