In [None]:
# You are building a simple spam detector. From a dataset of 1000 emails, you have the following information:

# 300 emails contain the word "free"
# 400 emails are spam
# 120 emails are both spam and contain the word "free"
# Use this data to compute the probability that an email is spam given that it contains the word "free" — i.e., compute: P(Spam | Free) using Bayes' Theorem.

# P(Spam | Free) = P(Free | Spam) * P(Spam) / P(Free)
# P(Free | Spam) = 120 / 400 = 0.3
# P(Spam) = 400 / 1000 = 0.4
# P(Free) = 300 / 1000 = 0.3
# P(Spam | Free) = 0.3 * 0.4 / 0.3 = 0.4
# P(Spam | Free) = 0.4

# P(Spam | Free) = 0.4

In [4]:
# Function to validate inputs
def validate_inputs(total, free, spam, spam_free):
    if total <= 0:
        return False, "Total emails must be positive"
    if free < 0 or free > total:
        return False, "Emails with 'free' must be non-negative and not exceed total emails"
    if spam < 0 or spam > total:
        return False, "Spam emails must be non-negative and not exceed total emails"
    if spam_free < 0 or spam_free > spam or spam_free > free:
        return False, "Spam emails with 'free' must be non-negative and not exceed spam or free emails"
    return True, ""

# Function to calculate P(Spam | Free) using Bayes' Theorem
def calculate_spam_probability(total_emails, emails_with_free, spam_emails, spam_and_free):
    # Validate inputs
    is_valid, error_message = validate_inputs(total_emails, emails_with_free, spam_emails, spam_and_free)
    if not is_valid:
        return error_message
    
    # Calculate probabilities
    p_spam = spam_emails / total_emails  # P(Spam)
    p_free = emails_with_free / total_emails  # P(Free)
    p_free_given_spam = spam_and_free / spam_emails  # P(Free | Spam)
    
    # Apply Bayes' Theorem: P(Spam | Free) = P(Free | Spam) * P(Spam) / P(Free)
    p_spam_given_free = (p_free_given_spam * p_spam) / p_free
    
    return p_spam_given_free

# Given data
total_emails = 1000
emails_with_free = 300
spam_emails = 400
spam_and_free = 120

# Calculate and print result
result = calculate_spam_probability(total_emails, emails_with_free, spam_emails, spam_and_free)
print(f"P(Spam | Free) = {result:.4f}")

P(Spam | Free) = 0.4000
