In [38]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report


In [44]:
# Load dataset
df = pd.read_csv("sms_spam_dataset.csv", encoding="latin-1")

# First look at data
df.head()


Unnamed: 0,message,label
0,"Dear user, your loan of $5000 has been pre-app...",spam
1,Letâs catch up over coffee soon.,ham
2,"Your parcel is waiting. Pay â¹50,000 to relea...",spam
3,Earn up to $999 per day working from home. Joi...,spam
4,Lunch at 1 PM? Let me know.,ham


In [42]:
df = df.rename(columns={'Label': 'label', 'Message': 'message'})
df.head()
df.describe()

Unnamed: 0,message,label
count,400,400
unique,86,2
top,Donât forget the team call at 4 PM.,spam
freq,36,200


In [43]:
df['label_num'] = df['label'].map({'ham':0, 'spam':1})

In [None]:
X = df['message']
y = df['label_num']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
vectorizer = TfidfVectorizer(stop_words='english')
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)


In [None]:
model = MultinomialNB()
model.fit(X_train_tfidf, y_train)


In [None]:
y_pred = model.predict(X_test_tfidf)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))



In [None]:
sample = ["Win a free iPhone now!!!", "Hey, are we still meeting tomorrow?"]

sample_tfidf = vectorizer.transform(sample)
print(model.predict(sample_tfidf))   # 1 = spam, 0 = ham


In [None]:
pred = model.predict(sample_tfidf)

for msg, label in zip(sample, pred):
    print(f"Message: {msg} --> {'Spam' if label==1 else 'Ham'}")


In [None]:
import joblib

# Save model and vectorizer
joblib.dump(model, "spam_classifier_model.pkl")
joblib.dump(vectorizer, "vectorizer.pkl")

print("✅ Model and vectorizer saved!")


In [None]:
# Load model and vectorizer
loaded_model = joblib.load("spam_classifier_model.pkl")
loaded_vectorizer = joblib.load("vectorizer.pkl")

# Test with new samples
sample = ["You won $10,000! Claim now!", "Hi, let's catch up tomorrow."]
sample_tfidf = loaded_vectorizer.transform(sample)

print(loaded_model.predict(sample_tfidf))
