In [7]:
# ============================
# 1. Import Required Libraries
# ============================
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from scipy.sparse import csr_matrix

# ============================
# 2. Load and Prepare Dataset
# ============================
df = pd.read_csv("emails.csv")  # Use full path if needed
df = df.drop(columns=["Email No."])  # Drop ID column

# Split features and target
X = df.drop(columns=["Prediction"])  # All columns except label
y = df["Prediction"]  # Target label

# Convert to sparse matrix to save memory
X_sparse = csr_matrix(X.values)

# ============================
# 3. Train/Test Split
# ============================
X_train, X_test, y_train, y_test = train_test_split(
    X_sparse, y, test_size=0.2, random_state=42
)

# ============================
# 4. Train Naive Bayes Model
# ============================
model = MultinomialNB()
model.fit(X_train, y_train)

# ============================
# 5. Evaluate Model
# ============================
y_pred = model.predict(X_test)

print("✅ Accuracy:", accuracy_score(y_test, y_pred))
print("\n📊 Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\n📄 Classification Report:\n", classification_report(y_test, y_pred))


✅ Accuracy: 0.9545893719806763

📊 Confusion Matrix:
 [[704  35]
 [ 12 284]]

📄 Classification Report:
               precision    recall  f1-score   support

           0       0.98      0.95      0.97       739
           1       0.89      0.96      0.92       296

    accuracy                           0.95      1035
   macro avg       0.94      0.96      0.95      1035
weighted avg       0.96      0.95      0.96      1035

