## Packages and Assets

In [None]:
import pandas as pd
from sklearn.metrics import confusion_matrix, classification_report

import seaborn as sns

import matplotlib.pyplot as plt

import pickle


## Dependencies

In [None]:
test = pd.read_csv('../../assets/data/splits/test/preprocessed.csv')
y_test = test['label']

In [None]:
with open('../../assets/traditional_assets/cv_set.pkl', 'rb') as fout:
    cv_vec, cv_best_model = pickle.load(fout)

with open('../../assets/traditional_assets/tfidf_set.pkl', 'rb') as fout:
    tfidf_vec, tfidf_best_model = pickle.load(fout)


## Functions

In [None]:
def viewPredictedRows(X_test, y_test, y_pred):
    df = pd.DataFrame({'y_test': y_test, 'y_pred': y_pred})
    df['correct'] = df['y_test'] == df['y_pred']
    df['correct'] = df['correct'].apply(lambda x: 'Correct' if x else 'Incorrect')
    df['title'] = X_test
    return df


In [None]:
def show_confusion_matrix(cm):
        print("Confusion Matrix")
        plt.figure(figsize=(10, 7))

        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Negative', 'Positive'],
                    yticklabels=['Negative', 'Positive'])
        plt.xlabel('Predicted')
        plt.ylabel('Truth')
        plt.title('Confusion Matrix')
        plt.show()

## Predictions considering best traditional models for both vectorization

### CountVectorizer

In [None]:
X_test = cv_vec.transform(test['title']).toarray()
X_test_names = pd.DataFrame(X_test, columns=cv_vec.get_feature_names_out())
X_test_names

In [None]:
y_pred = cv_best_model.predict(X_test)
print(classification_report(y_test, y_pred))
cm = confusion_matrix(y_true=y_test, y_pred=y_pred)
show_confusion_matrix(cm)

In [None]:
df_results = viewPredictedRows(test['title'], y_test, y_pred)
df_results


### TFIDF

In [None]:
X_test = tfidf_vec.transform(test['title']).toarray()
X_test_names = pd.DataFrame(X_test, columns=tfidf_vec.get_feature_names_out())
X_test_names

In [None]:
y_pred = cv_best_model.predict(X_test)
print(classification_report(y_test, y_pred))
cm = confusion_matrix(y_true=y_test, y_pred=y_pred)
show_confusion_matrix(cm)

In [None]:
df_results = viewPredictedRows(test['title'], y_test, y_pred)
df_results