### Spam Detection Using Naive Bayes

In [9]:

import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
import pickle

# Load the dataset
data = pd.read_csv('spam.csv', encoding='latin-1')

# Check the first few rows to ensure the dataset is loaded correctly
data.head()


Unnamed: 0,Category,Message
0,ham,"Go until jurong point, crazy.. Available only ..."
1,ham,Ok lar... Joking wif u oni...
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...
3,ham,U dun say so early hor... U c already then say...
4,ham,"Nah I don't think he goes to usf, he lives aro..."


In [10]:

# Drop unnecessary columns if they exist
if 'Unnamed: 2' in data.columns:
    data.drop(['Unnamed: 2', 'Unnamed: 3', 'Unnamed: 4'], axis=1, inplace=True)

# Rename columns for clarity
data.rename(columns={'v1': 'Category', 'v2': 'Message'}, inplace=True)


In [11]:

# Map the 'Category' column to numerical values
data['Category'] = data['Category'].map({'ham': 0, 'spam': 1})

# Check for missing values and drop them if present
data.dropna(inplace=True)

# Extract features (x) and labels (y)
x = data['Message']
y = data['Category']

# Convert text data to numerical feature vectors
cv = CountVectorizer()
x = cv.fit_transform(x)


In [12]:

# Split the data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

# Initialize the model
model = MultinomialNB()

# Train the model
model.fit(x_train, y_train)

# Evaluate the model's accuracy
accuracy = model.score(x_test, y_test)
print(f"Model Accuracy: {accuracy * 100:.2f}%")


Model Accuracy: 98.57%


In [13]:

# Example prediction
msg = "You Won 500$"
data_sample = [msg]
vect = cv.transform(data_sample).toarray()
my_prediction = model.predict(vect)
print(f"Prediction: {'Spam' if my_prediction[0] == 1 else 'Ham'}")


Prediction: Spam


In [14]:

# Save the model and vectorizer to files
pickle.dump(model, open('spam.pkl', 'wb'))
pickle.dump(cv, open('transform.pkl', 'wb'))
