In [1]:
# sms_spam_classifier.ipynb

# Import necessary libraries
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import make_pipeline
import pickle

# Load the dataset
data = pd.read_csv('spam.csv', encoding='latin-1')

# Inspect the dataset
data.head()

# Preprocess the data
data = data[['v1', 'v2']]
data.columns = ['label', 'message']

# Encode labels (ham: 0, spam: 1)
data['label'] = data['label'].map({'ham': 0, 'spam': 1})

# Split the data into features and labels
X = data['message']
y = data['label']

# Create a pipeline with TF-IDF and MultinomialNB
model = make_pipeline(TfidfVectorizer(), MultinomialNB())

# Train the model
model.fit(X, y)

# Save the trained model
with open("spam_model.pkl", "wb") as model_file:
    pickle.dump(model, model_file)

# Save the TF-IDF vectorizer separately
tfidf = model.named_steps['tfidfvectorizer']
with open("tfidf_vectorizer.pkl", "wb") as vectorizer_file:
    pickle.dump(tfidf, vectorizer_file)

print("Model and vectorizer saved successfully.")


Model and vectorizer saved successfully.
