In [5]:
import pandas as pd

# Load Dataset
df = pd.read_csv(r"C:\Users\vinay\Downloads\NB_Example.csv")
print(df.head())

# Test case to classify: <CGPA = 8; Interactiveness = Yes; Practical Knowledge = Average; Communication Skills = Good>
test_case = {
    'CGPA': '8',
    'Interactiveness': 'Yes',
    'Practical Knowledge': 'Average',
    'Communication Skills': 'Good'
}

# Calculate Prior Probabilities
prior_probabilities = df['Job Offer'].value_counts(normalize=True).to_dict()
prior_probabilities = {k: round(v, 2) for k, v in prior_probabilities.items()}
print("Rounded Prior Probabilities: ", prior_probabilities)

# Compute Likelihoods
likelihood_probabilities = {}
for feature in df.columns[:-1]:  # Exclude target column
    likelihood_probabilities[feature] = {}
    for class_label in df['Job Offer'].unique():
        likelihood_probabilities[feature][class_label] = df[df['Job Offer'] == class_label][feature].value_counts(normalize=True).to_dict()
        likelihood_probabilities[feature][class_label] = {k: round(v, 2) for k, v in likelihood_probabilities[feature][class_label].items()}
        print(f"Likelihood Probabilities for {feature} given {class_label}: ", likelihood_probabilities[feature][class_label])

# Naive Bayes Probability for Job Offer = Yes
prob_yes = prior_probabilities['Yes']
for feature, value in test_case.items():
    prob_yes *= likelihood_probabilities[feature]['Yes'].get(value, 0)

print("Probability of Job Offer = Yes: ", round(prob_yes, 4))

# Naive Bayes Probability for Job Offer = No
prob_no = prior_probabilities['No']
for feature, value in test_case.items():
    prob_no *= likelihood_probabilities[feature]['No'].get(value, 0)

print("Probability of Job Offer = No: ", round(prob_no, 4))

# Prediction
if prob_yes > prob_no:
    print("The predicted class is: Job Offer = Yes")
else:
    print("The predicted class is: Job Offer = No")


   SI No. CGPA Interactiveness Practical Knowledge Communication Skills  \
0       1  >=9             Yes          Very Good                  Good   
1       2  >=8              No                Good             Moderate   
2       3  >=9              No             Average                 Poor   
3       4   <8              No             Average                 Good   
4       5  >=8             Yes                Good             Moderate   

  Job Offer  
0       Yes  
1       Yes  
2        No  
3        No  
4       Yes  
Rounded Prior Probabilities:  {'Yes': 0.7, 'No': 0.3}
Likelihood Probabilities for SI No. given Yes:  {1: 0.14, 2: 0.14, 5: 0.14, 6: 0.14, 8: 0.14, 9: 0.14, 10: 0.14}
Likelihood Probabilities for SI No. given No:  {3: 0.33, 4: 0.33, 7: 0.33}
Likelihood Probabilities for CGPA given Yes:  {'>=9': 0.43, '>=8': 0.29, '<=8': 0.29}
Likelihood Probabilities for CGPA given No:  {'<8': 0.67, '>=9': 0.33}
Likelihood Probabilities for Interactiveness given Yes:  {'Yes': 0

In [6]:
# Using Laplace Smoothing to handle Zero Probability Error
def laplace_smoothing(count, total, k, num_classes):
    return (count + k) / (total + k * num_classes)

k = 1  # Smoothing factor

# Unique values for each feature (needed for Laplace)
unique_vals = {col: df[col].nunique() for col in df.columns[:-1]}

# Compute Likelihoods with Laplace Smoothing
laplace_likelihood = {}
for feature in df.columns[:-1]:
    laplace_likelihood[feature] = {}
    for class_label in df['Job Offer'].unique():
        class_df = df[df['Job Offer'] == class_label]
        total = len(class_df)
        value_counts = class_df[feature].value_counts().to_dict()
        laplace_likelihood[feature][class_label] = {
            val: round(laplace_smoothing(value_counts.get(val, 0), total, k, unique_vals[feature]), 4)
            for val in df[feature].unique()
        }
        print(f"Laplace Likelihood for {feature} given {class_label}: ", laplace_likelihood[feature][class_label])

# Recalculate with Laplace Likelihood
prob_yes = prior_probabilities['Yes']
for feature, value in test_case.items():
    prob_yes *= laplace_likelihood[feature]['Yes'].get(value, 0)

print("Laplace-smoothed Probability of Job Offer = Yes: ", round(prob_yes, 6))

prob_no = prior_probabilities['No']
for feature, value in test_case.items():
    prob_no *= laplace_likelihood[feature]['No'].get(value, 0)

print("Laplace-smoothed Probability of Job Offer = No: ", round(prob_no, 6))

if prob_yes > prob_no:
    print("The predicted class is: Job Offer = Yes")
else:
    print("The predicted class is: Job Offer = No")


Laplace Likelihood for SI No. given Yes:  {1: 0.1176, 2: 0.1176, 3: 0.0588, 4: 0.0588, 5: 0.1176, 6: 0.1176, 7: 0.0588, 8: 0.1176, 9: 0.1176, 10: 0.1176}
Laplace Likelihood for SI No. given No:  {1: 0.0769, 2: 0.0769, 3: 0.1538, 4: 0.1538, 5: 0.0769, 6: 0.0769, 7: 0.1538, 8: 0.0769, 9: 0.0769, 10: 0.0769}
Laplace Likelihood for CGPA given Yes:  {'>=9': 0.3636, '>=8': 0.2727, '<8': 0.0909, '<=8': 0.2727}
Laplace Likelihood for CGPA given No:  {'>=9': 0.2857, '>=8': 0.1429, '<8': 0.4286, '<=8': 0.1429}
Laplace Likelihood for Interactiveness given Yes:  {'Yes': 0.6667, 'No': 0.3333}
Laplace Likelihood for Interactiveness given No:  {'Yes': 0.4, 'No': 0.6}
Laplace Likelihood for Practical Knowledge given Yes:  {'Very Good ': 0.3, 'Good': 0.5, 'Average': 0.2}
Laplace Likelihood for Practical Knowledge given No:  {'Very Good ': 0.1667, 'Good': 0.3333, 'Average': 0.5}
Laplace Likelihood for Communication Skills given Yes:  {'Good': 0.5, 'Moderate': 0.4, 'Poor': 0.1}
Laplace Likelihood for Com