In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.feature_extraction.text import TfidfVectorizer

In [None]:
# Step 1: Load the Dataset
df = pd.read_csv('sms_spam.csv')

# Display the first few rows of the dataset
print(df.head())

In [None]:
# Step 2: Clean the Data
# Check for missing values
print(df.isnull().sum())

# Remove special characters and tokenize
df['Message'] = df['Message'].str.replace(r'\W', ' ')

In [None]:
# Step 3: Feature Extraction with TF-IDF
vectorizer = TfidfVectorizer(stop_words='english')
X_tfidf = vectorizer.fit_transform(df['Message'])
y = df['Label']

In [None]:
# Step 4: Split the Data
X_train, X_test, y_train, y_test = train_test_split(X_tfidf, y, test_size=0.2, random_state=42)

In [None]:
# Step 5: Train Naive Bayes Classifier
nb = MultinomialNB()
nb.fit(X_train, y_train)
y_pred_nb = nb.predict(X_test)

In [None]:
# Step 6: Evaluate Model Performance
print("Naive Bayes Evaluation:")
print(f"Accuracy: {accuracy_score(y_test, y_pred_nb)}")
print(confusion_matrix(y_test, y_pred_nb))
print(classification_report(y_test, y_pred_nb))