In [2]:
import pandas as pd

# Sample dataset: Email ID, Feature (Word "free" Present), Target (Spam or Not Spam)
data = {
    'Email ID': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
    'Free (E)': ['Yes', 'No', 'Yes', 'No', 'Yes', 'No', 'Yes', 'No', 'Yes', 'No'],
    'Spam (H)': ['Spam', 'Not Spam', 'Spam', 'Not Spam', 'Spam', 'Not Spam', 'Spam', 'Not Spam', 'Spam', 'Not Spam']
}

df = pd.DataFrame(data)

# Step 1: Calculate Prior Probabilities (P(H))
prior_spam = df['Spam (H)'].value_counts(normalize=True)['Spam']
prior_not_spam = df['Spam (H)'].value_counts(normalize=True)['Not Spam']

# Step 2: Calculate Likelihoods (P(E|H))
# Likelihood of the word "free" given spam (P(free|spam))
likelihood_free_given_spam = len(df[(df['Free (E)'] == 'Yes') & (df['Spam (H)'] == 'Spam')]) / len(df[df['Spam (H)'] == 'Spam'])

# Likelihood of the word "free" given not spam (P(free|not spam))
likelihood_free_given_not_spam = len(df[(df['Free (E)'] == 'Yes') & (df['Spam (H)'] == 'Not Spam')]) / len(df[df['Spam (H)'] == 'Not Spam'])

# Step 3: Calculate Evidence (P(E))
evidence = (likelihood_free_given_spam * prior_spam) + (likelihood_free_given_not_spam * prior_not_spam)

# Step 4: Apply Bayes' Theorem (P(H|E)) to calculate posterior probability for Spam
posterior_spam_given_free = (likelihood_free_given_spam * prior_spam) / evidence

# Apply Bayes' Theorem (P(H|E)) to calculate posterior probability for Not Spam
posterior_not_spam_given_free = (likelihood_free_given_not_spam * prior_not_spam) / evidence

# Output the results
print(f"Prior Probability of Spam (P(Spam)): {prior_spam}")
print(f"Prior Probability of Not Spam (P(Not Spam)): {prior_not_spam}")
print(f"Likelihood of 'free' given Spam (P(free|spam)): {likelihood_free_given_spam}")
print(f"Likelihood of 'free' given Not Spam (P(free|not spam)): {likelihood_free_given_not_spam}")
print(f"Evidence (P(E)): {evidence}")
print(f"Posterior Probability of Spam given 'free' (P(Spam|free)): {posterior_spam_given_free}")
print(f"Posterior Probability of Not Spam given 'free' (P(Not Spam|free)): {posterior_not_spam_given_free}")


Prior Probability of Spam (P(Spam)): 0.5
Prior Probability of Not Spam (P(Not Spam)): 0.5
Likelihood of 'free' given Spam (P(free|spam)): 1.0
Likelihood of 'free' given Not Spam (P(free|not spam)): 0.0
Evidence (P(E)): 0.5
Posterior Probability of Spam given 'free' (P(Spam|free)): 1.0
Posterior Probability of Not Spam given 'free' (P(Not Spam|free)): 0.0


In [None]:
#New dataset: Employee Job Satisfaction
data = pd.read_csv("employee.csv")

#Create DataFrame
df = pd.DataFrame(data)
df