In [None]:
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB

# Sample emails
emails = [
    "Free money now",
    "Win a free lottery",
    "Hello friend, how are you?", 
    "Meeting at noon",
    "Win money now"
]

labels = [1, 1, 0, 0, 1]  # 1 = spam, 0 = not spam

# Convert text to features
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(emails)

print("Training features:")
print(X.toarray())
print("\nVocabulary:", vectorizer.get_feature_names_out())

# Train the classifier on all data (dataset too small to split)
classifier = MultinomialNB()
classifier.fit(X, labels)

# New emails to classify
new_emails = [
    "Free trip Jamaica",
    "Win a lottery",
    "Your friend called money",
    "AI take jobs",
    "AI not jobs"
]


X_new = vectorizer.transform(new_emails)

predictions = classifier.predict(X_new)

print("\nPredictions:", predictions)
for email, pred in zip(new_emails, predictions):
    print(f"'{email}' -> {'SPAM' if pred == 1 else 'NOT SPAM'}")

Training features:
[[0 0 1 0 0 0 0 0 1 0 1 0 0]
 [0 0 1 0 0 0 1 0 0 0 0 1 0]
 [1 0 0 1 1 1 0 0 0 0 0 0 1]
 [0 1 0 0 0 0 0 1 0 1 0 0 0]
 [0 0 0 0 0 0 0 0 1 0 1 1 0]]

Vocabulary: ['are' 'at' 'free' 'friend' 'hello' 'how' 'lottery' 'meeting' 'money'
 'noon' 'now' 'win' 'you']

Predictions: [1 1 1 1 0]
'Free trip Jamaica' -> SPAM
'Win a lottery' -> SPAM
'Your friend called money' -> SPAM
'AI take jobs' -> SPAM
'Meeting at 1' -> NOT SPAM
