# 📈 Model Evaluation & Feature Analysis

In [None]:
import pandas as pd
import numpy as np
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix

In [None]:
df = pd.read_csv('../models/processed_reviews.csv')
X = df['processed']
y = df['sentiment']

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
tfidf = joblib.load('../models/tfidf_vectorizer.pkl')
model = joblib.load('../models/best_model.pkl')

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)
X_test_vec = tfidf.transform(X_test)

In [None]:
y_pred = model.predict(X_test_vec)
print("Classification Report:")
print(classification_report(y_test, y_pred))

cm = confusion_matrix(y_test, y_pred)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=model.classes_, yticklabels=model.classes_)
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()

In [None]:
if hasattr(model, 'coef_'):
    feature_names = tfidf.get_feature_names_out()
    coefs = model.coef_[0]
    top_pos = np.argsort(coefs)[-10:]
    top_neg = np.argsort(coefs)[:10]
    print("Top Positive Features:")
    for i in reversed(top_pos):
        print(f"{feature_names[i]}: {coefs[i]:.4f}")
    print("\nTop Negative Features:")
    for i in top_neg:
        print(f"{feature_names[i]}: {coefs[i]:.4f}")