In [54]:
import pandas as pd
import pickle

from sklearn.model_selection import train_test_split

from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, classification_report
from sklearn.metrics import f1_score

import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')

In [None]:
root_load_model_path = '../../../models/sentiment_analysis/hyperparameter_tuning'
root_load_model_path = '../../../models/sentiment_analysis/hyperparameter_tuning_and_oversampling'
# root_load_model_path = '../../../models/sentiment_analysis/baseline'
# root_load_model_path = '../../../models/sentiment_analysis/oversampling'

In [57]:
vectorizers = [
    'tfidf_vectorizer', 
    'count_vectorizer'
]
classifiers = [
    'logistic_regression', 
    'linear_svc', 
    'multinomial_nb', 
    'decision_tree', 
    'random_forest'
]

models = {}

for c in classifiers:
    for v in vectorizers:
        name = f'{c}_with_{v}'
        models[name] = pickle.load(open(f'{root_load_model_path}/{name}.pkl', 'rb'))

In [58]:
df_reviews = pd.read_json('../../../data/processed/reviews.json.gz', orient="records", compression="gzip")

In [None]:
x = df_reviews[['cleaned_review']]
y = df_reviews[['sentiment']]

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=42)

x_test_final = x_test['cleaned_review'].values
y_test_final = y_test['sentiment'].values

In [59]:
def cal_sensitivity(cm):
    FN = cm[1, 0]
    TP = cm[1, 1]
    return round(TP/float(FN + TP), 2)

def cal_specificity(cm):
    TN = cm[0, 0]
    FP = cm[0, 1]
    return round(TN / float(TN + FP), 2)

def comparison_table(f1_weighted_score_dict):
    df_model = pd.DataFrame(index=f1_weighted_score_dict.keys(), columns=['f1_weighted_score'])
    df_model['f1_weighted_score'] = f1_weighted_score_dict.values()
    return df_model

In [60]:
f1_weighted_score_test_dict = {}

for key in models:
   model = models[key]

   print(f"Evaluate Model [{key}]:")
   print(model)

   y_pred = model.predict(x_test_final)
   f1_weighted_score_test = f1_score(y_test_final, y_pred, average = 'weighted')
   f1_weighted_score_test_dict[key] = f1_weighted_score_test
   
   print(f'\nEvaluation using hold-out validation (test set):')
   print(f'weighted average f1 score: {f1_weighted_score_test}')
   
   print("\nClassification Report:")
   print(classification_report(y_test_final, y_pred, labels=[0, 1]))

   print("\nConfusion Matrix:")
   cm = confusion_matrix(y_test_final, y_pred, labels=[0, 1])
   cm_display = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=[0, 1])
   fig, ax = plt.subplots(figsize=(4,4))
   cm_display.plot(ax=ax)
   plt.show()
   print("sensitivity (true positive): {}".format(cal_sensitivity(cm)))
   print("specificity (true negative): {}".format(cal_specificity(cm)))
   
   print('----------------------------------------------------------\n')

In [62]:
print('Evaluation Metric for Different Models Using Testing Set:')
print(comparison_table(f1_weighted_score_test_dict))

                                           weighted_avg_f1_score
logistic_regression_with_tfidf_vectorizer               0.888771
logistic_regression_with_count_vectorizer               0.888222
linear_svc_with_tfidf_vectorizer                        0.890962
linear_svc_with_count_vectorizer                        0.892553
multinomial_nb_with_tfidf_vectorizer                    0.880817
multinomial_nb_with_count_vectorizer                    0.892577
decision_tree_with_tfidf_vectorizer                     0.846673
decision_tree_with_count_vectorizer                     0.862782
random_forest_with_tfidf_vectorizer                     0.915327
random_forest_with_count_vectorizer                     0.891617
