In [None]:
import pandas as pd
import string
import warnings
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, classification_report

warnings.filterwarnings("ignore")

df = pd.read_csv("/content/spam.csv", encoding="latin-1")[['v1', 'v2']]
df.columns = ['label', 'message']

df['label'] = df['label'].map({'spam': 1, 'ham': 0})

def clean_text(text):
    return text.lower().translate(str.maketrans('', '', string.punctuation))

df['message'] = df['message'].apply(clean_text)

X_train, X_test, y_train, y_test = train_test_split(df['message'], df['label'], test_size=0.2, random_state=42)

model = Pipeline([
    ('vectorizer', CountVectorizer()),
    ('tfidf', TfidfTransformer()),
    ('classifier', MultinomialNB())
])

model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

def predict_spam(message):
    message = clean_text(message)
    prediction = model.predict([message])[0]
    return "Spam" if prediction == 1 else "Not spam"

print("\nTest Message: 'Congratulations! You won a free prize! Claim now.'")
print("Prediction:", predict_spam("Congratulations! You won a free prize! Claim now."))