In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report, accuracy_score



In [21]:
data = pd.read_csv(r"C:\Users\jefin\Downloads\spam.csv", encoding='latin-1') 



In [23]:
data = data.rename(columns={'v1': 'label', 'v2': 'text'})
data = data[['text', 'label']]
data['label'] = data['label'].map({'spam': 1, 'ham': 0})


In [25]:
X_train, X_test, y_train, y_test = train_test_split(
    data['text'], data['label'], test_size=0.2, random_state=42
)

In [27]:
pipeline = Pipeline([
    ('vect', CountVectorizer()),  # Converts text to Bag-of-Words
    ('tfidf', TfidfTransformer()),  # Applies TF-IDF weighting
    ('clf', MultinomialNB()),  # Naive Bayes classifier
])

In [29]:
# Train the model
pipeline.fit(X_train, y_train)

# Make predictions
y_pred = pipeline.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")
print("Classification Report:\n")
print(classification_report(y_test, y_pred))

Accuracy: 0.96
Classification Report:

              precision    recall  f1-score   support

           0       0.96      1.00      0.98       965
           1       1.00      0.72      0.84       150

    accuracy                           0.96      1115
   macro avg       0.98      0.86      0.91      1115
weighted avg       0.96      0.96      0.96      1115



In [31]:
example_emails = [
    "Congratulations! You've won a $1,000 gift card. Click here to claim now!",
    "Hey, are we still on for the meeting tomorrow? Let me know."
]

predictions = pipeline.predict(example_emails)
for email, pred in zip(example_emails, predictions):
    label = 'Spam' if pred == 1 else 'Ham'
    print(f"Email: {email}\nPredicted Label: {label}\n")


Email: Congratulations! You've won a $1,000 gift card. Click here to claim now!
Predicted Label: Spam

Email: Hey, are we still on for the meeting tomorrow? Let me know.
Predicted Label: Ham

