In [1]:
pip install --upgrade pandas

Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd
import re
import string
import pickle
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score


  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core import (


In [3]:
data = pd.read_csv("spam.csv", encoding="latin1")[["v1", "v2"]]
data.columns = ["label", "message"]
data["label"] = data["label"].map({"ham": 0, "spam": 1})

In [4]:
data = data.dropna()


In [5]:
def preprocess_text(text):
    text = text.lower()
    text = re.sub(f"[{string.punctuation}]", "", text)  # Remove punctuation
    text = re.sub("\d+", "", text)  # Remove numbers
    return text


In [6]:
data["message"] = data["message"].apply(preprocess_text)


In [7]:
X_train, X_test, y_train, y_test = train_test_split(data["message"], data["label"], test_size=0.2, random_state=42)

In [8]:
model = make_pipeline(TfidfVectorizer(), MultinomialNB())
model.fit(X_train, y_train)


Pipeline(steps=[('tfidfvectorizer', TfidfVectorizer()),
                ('multinomialnb', MultinomialNB())])

In [9]:
y_pred = model.predict(X_test)
print(f"Accuracy: {accuracy_score(y_test, y_pred) * 100:.2f}%")


Accuracy: 95.16%


In [10]:
with open("spam_detector.pkl", "wb") as f:
    pickle.dump(model, f)

print("Model trained and saved successfully!")


Model trained and saved successfully!


In [11]:
def predict_sms(text):
    with open("spam_detector.pkl", "rb") as f:
        model = pickle.load(f)
    text = preprocess_text(text)
    prediction = model.predict([text])[0]
    return "Spam" if prediction == 1 else "Not Spam"



In [14]:
user_sms = input("Enter an SMS message: ")
print(f"Message: {user_sms}\nPrediction: {predict_sms(user_sms)}")


Enter an SMS message: "Congratulations! You've won a [prize/gift card]!" 
Message: "Congratulations! You've won a [prize/gift card]!" 
Prediction: Spam
