In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix

In [None]:
try:
    df = pd.read_csv('spam_nb.csv', encoding='latin-1')
except FileNotFoundError:
    print("File not found. Please ensure 'spam_nb.csv' is present.")
    df = pd.DataFrame()

df.head()

In [None]:
df_p = df.copy()

# Drop unnecessary columns
drop_cols = [col for col in df_p.columns if 'Unnamed' in col]
df_p.drop(columns=drop_cols, inplace=True)

if 'v1' in df_p.columns and 'v2' in df_p.columns:
    df_p.rename(columns={'v1': 'Label', 'v2': 'Message'}, inplace=True)

# Encode Target
df_p['Target'] = df_p['Label'].map({'spam': 1, 'ham': 0})
df_p.head()

In [None]:
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(df_p['Message'])
y = df_p['Target']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
model = MultinomialNB()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

In [None]:
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

In [None]:
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(6, 4))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Ham', 'Spam'], yticklabels=['Ham', 'Spam'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()