In [4]:
# Import required libraries
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report

# Sample email dataset
emails = [
    "Get rich quick! Click here to win a million dollars!",
    "Hello, could you please review this document for me",
    "Discounts on luxury watches and handbags!",
    "Meeting scheduled for tomorrow, please confirm your attendance.",
    "Congratulations, you've won a free gift card."
]

# Labels: 1 = Spam, 0 = Not Spam
labels = [1, 0, 1, 0, 1]

# Convert text data into numerical features using CountVectorizer
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(emails)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, labels, test_size=0.2, random_state=42
)

# Create a Multinomial Naive Bayes classifier
model = MultinomialNB()

# Train the model
model.fit(X_train, y_train)

# Make predictions on test data
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)
print("Classification Report:\n", report)

# -------------------------------------------------
# Predict whether a new email is spam or not
# -------------------------------------------------

new_email = ["You've won a free cruise vacation"]

# Convert new email to vector
new_email_vectorized = vectorizer.transform(new_email)

# Predict
predicted_label = model.predict(new_email_vectorized)

# Print result
if predicted_label[0] == 0:
    print("Predicted as NOT spam.")
else:
    print("Predicted as SPAM.")

Accuracy: 1.0
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00         1

    accuracy                           1.00         1
   macro avg       1.00      1.00      1.00         1
weighted avg       1.00      1.00      1.00         1

Predicted as SPAM.
