In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report, accuracy_score

# Load the dataset
file_path = "C:/Users/Angelin mary/Downloads/spam.csv"  # Forward slashes
data = pd.read_csv(file_path)

data.columns = ['category', 'message']

# Encode the target variable: 'spam' = 1, 'ham' = 0
data['category'] = data['category'].map({'spam': 1, 'ham': 0})

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    data['message'], data['category'], test_size=0.3, random_state=42)

# Convert text to numerical data using TF-IDF Vectorizer
vectorizer = TfidfVectorizer(stop_words='english', max_features=3000)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# Train a Naive Bayes classifier
classifier = MultinomialNB()
classifier.fit(X_train_tfidf, y_train)

# Make predictions on the test set
y_pred = classifier.predict(X_test_tfidf)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# Test with a new message
new_messages = ["Congratulations! You've won a $1,000 gift card. Click here to claim your prize.",
                "Don't forget our meeting at 10 AM tomorrow."]
new_messages_tfidf = vectorizer.transform(new_messages)
predictions = classifier.predict(new_messages_tfidf)

# Display predictions
for msg, pred in zip(new_messages, predictions):
    print(f"Message: {msg} | Prediction: {'Spam' if pred == 1 else 'Ham'}")


Accuracy: 0.9808612440191388

Classification Report:
               precision    recall  f1-score   support

           0       0.98      1.00      0.99      1448
           1       0.99      0.87      0.92       224

    accuracy                           0.98      1672
   macro avg       0.98      0.93      0.96      1672
weighted avg       0.98      0.98      0.98      1672

Message: Congratulations! You've won a $1,000 gift card. Click here to claim your prize. | Prediction: Spam
Message: Don't forget our meeting at 10 AM tomorrow. | Prediction: Ham
