In [16]:
# Dataset
emails = [
    ("win a free lottery ticket", "spam"),
    ("claim your free prize now", "spam"),
    ("let’s meet for lunch", "ham"),
    ("are you coming to the meeting", "ham")
]

totalEmails = len(emails)
classes = set(email[1] for email in emails)

# Vocabulary set (all unique words)
vocab = set()
for text, _ in emails:
    for word in text.split():
        vocab.add(word)


# Count emails in each class
emails_in_class = {}
P_Class = {}

for cls in classes:
    count = 0
    for text, label in emails:
        if label == cls:
            count += 1
    emails_in_class[cls] = count
    P_Class[cls] = count / totalEmails  # Prior probability

print("Emails in each class:", emails_in_class)
print("P(Class):", P_Class)

# Count of each word per class
word_count_per_class = {}
for cls in classes:
    word_count_per_class[cls] = {}
    for word in vocab:
        count = 0
        for text, label in emails:
            if label == cls and word in text:
                count += 1
        word_count_per_class[cls][word] = count

print("Word count per class:", word_count_per_class)

# Calculate P(word|Class) with Laplace 
P_Word_Given_Class = {}
for cls in classes:
    P_Word_Given_Class[cls] = {}
    for word in vocab:
        count = word_count_per_class[cls][word]
        P_Word_Given_Class[cls][word] = (count + 1) / (emails_in_class[cls] + len(vocab))
print("P(word|Class):", P_Word_Given_Class)

new_Email = "let’s win lottery hello"

# classify the email with spam or ham
def classify_email(email):
    scores = {}
    for cls in classes:
        P_Class_Given_Email = P_Class[cls]
        for word in email.split():
            if word in vocab:
                P_Class_Given_Email *= P_Word_Given_Class[cls][word]
        scores[cls] = P_Class_Given_Email
    return max(scores, key=scores.get)

print("Classified email:", classify_email(new_Email))




Emails in each class: {'ham': 2, 'spam': 2}
P(Class): {'ham': 0.5, 'spam': 0.5}
Word count per class: {'ham': {'are': 1, 'to': 1, 'win': 0, 'the': 1, 'your': 0, 'lottery': 0, 'claim': 0, 'now': 0, 'for': 1, 'coming': 1, 'prize': 0, 'lunch': 1, 'meet': 2, 'let’s': 1, 'meeting': 1, 'you': 1, 'ticket': 0, 'a': 1, 'free': 0}, 'spam': {'are': 0, 'to': 0, 'win': 1, 'the': 0, 'your': 1, 'lottery': 1, 'claim': 1, 'now': 1, 'for': 0, 'coming': 0, 'prize': 1, 'lunch': 0, 'meet': 0, 'let’s': 0, 'meeting': 0, 'you': 1, 'ticket': 1, 'a': 2, 'free': 2}}
P(word|Class): {'ham': {'are': 0.09523809523809523, 'to': 0.09523809523809523, 'win': 0.047619047619047616, 'the': 0.09523809523809523, 'your': 0.047619047619047616, 'lottery': 0.047619047619047616, 'claim': 0.047619047619047616, 'now': 0.047619047619047616, 'for': 0.09523809523809523, 'coming': 0.09523809523809523, 'prize': 0.047619047619047616, 'lunch': 0.09523809523809523, 'meet': 0.14285714285714285, 'let’s': 0.09523809523809523, 'meeting': 0.095