In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.naive_bayes import CategoricalNB
from sklearn.preprocessing import LabelEncoder

# Read the data
df = pd.read_csv('naive.csv')

# Manual Naive Bayes Implementation
class ManualNaiveBayes:
    def __init__(self):
        self.class_probs = {}
        self.feature_probs = {}
    
    def fit(self, X, y):
        n_samples = len(y)
        self.classes = np.unique(y)
        
        # Calculate class probabilities
        for c in self.classes:
            self.class_probs[c] = np.sum(y == c) / n_samples
        
        # Calculate feature probabilities
        for feature in X.columns:
            self.feature_probs[feature] = {}
            for c in self.classes:
                self.feature_probs[feature][c] = {}
                feature_values = X[feature][y == c]
                unique_values = X[feature].unique()
                for val in unique_values:
                    # Add Laplace smoothing (+1) to avoid zero probabilities
                    self.feature_probs[feature][c][val] = \
                        (np.sum(feature_values == val) + 1) / (len(feature_values) + len(unique_values))
    
    def predict(self, X):
        predictions = []
        for _, row in X.iterrows():
            class_scores = {}
            for c in self.classes:
                prob = np.log(self.class_probs[c])
                for feature in X.columns:
                    prob += np.log(self.feature_probs[feature][c][row[feature]])
                class_scores[c] = prob
            predictions.append(max(class_scores, key=class_scores.get))
        return predictions
    
    def predict_proba(self, X):
        probabilities = []
        for _, row in X.iterrows():
            class_scores = {}
            total_prob = 0
            # Calculate unnormalized log probabilities
            for c in self.classes:
                prob = np.log(self.class_probs[c])
                for feature in X.columns:
                    prob += np.log(self.feature_probs[feature][c][row[feature]])
                class_scores[c] = prob
                total_prob += np.exp(prob)  # Convert back from log for normalization
            
            # Normalize probabilities
            normalized_probs = {}
            for c in self.classes:
                normalized_probs[c] = np.exp(class_scores[c]) / total_prob
            probabilities.append(normalized_probs)
        return probabilities

# Prepare data
X = df.drop('Flu', axis=1)
y = df['Flu']

# New patient data
new_patient = pd.DataFrame({
    'Chills': ['Y'],
    'RunnyNose': ['N'],
    'Headache': ['Mild'],
    'Fever': ['Y']
})

# Manual implementation
manual_nb = ManualNaiveBayes()
manual_nb.fit(X, y)
manual_prediction = manual_nb.predict(new_patient)[0]
manual_probs = manual_nb.predict_proba(new_patient)[0]

# Scikit-learn implementation
le = LabelEncoder()
X_encoded = X.copy()
for column in X_encoded.columns:
    X_encoded[column] = le.fit_transform(X[column])

new_patient_encoded = new_patient.copy()
for column in new_patient_encoded.columns:
    new_patient_encoded[column] = le.fit_transform(new_patient[column])

sklearn_nb = CategoricalNB()
sklearn_nb.fit(X_encoded, y)
sklearn_prediction = sklearn_nb.predict(new_patient_encoded)[0]
sklearn_probs = sklearn_nb.predict_proba(new_patient_encoded)[0]

# Print predictions and probabilities
print(f"Manual Naive Bayes Prediction: {'Flu' if manual_prediction == 'Y' else 'No Flu'}")
print("Manual Probabilities:")
print(f"P(No Flu): {manual_probs['N']:.4f}")
print(f"P(Flu): {manual_probs['Y']:.4f}")

print(f"\nSklearn Naive Bayes Prediction: {'Flu' if sklearn_prediction == 'Y' else 'No Flu'}")
print("Sklearn Probabilities:")
print(f"P(No Flu): {sklearn_probs[0]:.4f}")
print(f"P(Flu): {sklearn_probs[1]:.4f}")



Manual Naive Bayes Prediction: No Flu
Manual Probabilities:
P(No Flu): 0.6000
P(Flu): 0.4000

Sklearn Naive Bayes Prediction: No Flu
Sklearn Probabilities:
P(No Flu): 0.6000
P(Flu): 0.4000
