In [3]:
# Import libraries
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# For evaluation
from sklearn.metrics import confusion_matrix, classification_report, roc_curve, auc

# For loading models
import joblib
from tensorflow.keras.models import load_model

# Paths
PROCESSED = "../../data/processed/"
MODELS = "../../models/"

sns.set(style="whitegrid")


In [4]:
X_test = pd.read_csv(os.path.join(PROCESSED, "X_test.csv"))["clean_text"]
y_test = pd.read_csv(os.path.join(PROCESSED, "y_test.csv"))


In [5]:
# Load model
logreg_model = joblib.load(os.path.join(MODELS, "logreg_model.pkl"))
tfidf = joblib.load(os.path.join(MODELS, "tfidf.pkl"))

# Transform test set
X_test_vec = tfidf.transform(X_test)
y_pred = logreg_model.predict(X_test_vec)

# Metrics
print("=== Logistic Regression ===")
print(classification_report(y_test, y_pred))

# Confusion matrix
cm = confusion_matrix(y_test, y_pred)
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
plt.title("LR Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("True")
plt.show()

# ROC curve
fpr, tpr, _ = roc_curve(y_test, logreg_model.predict_proba(X_test_vec)[:,1])
roc_auc = auc(fpr, tpr)

plt.plot(fpr, tpr, label=f"LR (AUC = {roc_auc:.2f})")
plt.plot([0,1],[0,1],'--')
plt.title("ROC Curve - Logistic Regression")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.legend()
plt.show()


FileNotFoundError: [Errno 2] No such file or directory: '../../models/logreg_model.pkl'