In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

# 1. Load dataset
df = pd.read_csv("SMSSpamCollection.csv")

# 2. Encode labels (ham=0, spam=1)
df['label'] = df['label'].map({'ham':0, 'spam':1})

# 3. Split data
X_train, X_test, y_train, y_test = train_test_split(
    df['message'], df['label'], test_size=0.2, random_state=42
)

# 4. TF-IDF + N-gram
vectorizer = TfidfVectorizer(ngram_range=(1,2), stop_words='english')
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# 5. Train Naive Bayes
model = MultinomialNB()
model.fit(X_train_tfidf, y_train)

# 6. Predictions on test set
y_pred = model.predict(X_test_tfidf)

# 7. Evaluation metrics
print("---- Model Performance ----")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred, target_names=['Ham','Spam']))

# ---- CUSTOM PREDICTION FUNCTION ----
def predict_message(message):
    msg_tfidf = vectorizer.transform([message])
    prediction = model.predict(msg_tfidf)[0]
    return "Spam" if prediction == 1 else "Ham"

# ---- INTERACTIVE TESTING ----
print("\n---- Try Your Own Messages ----")
while True:
    user_input = input("Enter a message (or type 'exit' to quit): ")
    if user_input.lower() == "exit":
        break
    print("Prediction:", predict_message(user_input))


---- Model Performance ----
Accuracy: 0.9650224215246637
Precision: 1.0
Recall: 0.738255033557047
F1 Score: 0.8494208494208494

Classification Report:
               precision    recall  f1-score   support

         Ham       0.96      1.00      0.98       966
        Spam       1.00      0.74      0.85       149

    accuracy                           0.97      1115
   macro avg       0.98      0.87      0.91      1115
weighted avg       0.97      0.97      0.96      1115


---- Try Your Own Messages ----


Enter a message (or type 'exit' to quit):  WINNER!! As a valued customer you have won a £1000 prize. Text WIN to 80082


Prediction: Spam


Enter a message (or type 'exit' to quit):  I’m running late, will be there in 15 mins.


Prediction: Ham


Enter a message (or type 'exit' to quit):  WINNER!! As a valued customer you have won a £1000 prize. Text WIN to 80082.


Prediction: Spam


Enter a message (or type 'exit' to quit):  Call me once you reach the station


Prediction: Ham


Enter a message (or type 'exit' to quit):  exit
