# Project - Naive Bayes Classifier

In [47]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score


In [48]:
# Load the dataset
file_path = 'Social_Network_Ads.csv'
social_data = pd.read_csv(file_path)

# Drop the 'User ID' column and encode 'Gender' to numerical format
social_data = social_data.drop(columns=['User ID'])
le = LabelEncoder()
social_data['Gender'] = le.fit_transform(social_data['Gender'])

# Split data into features (X) and target (y)
X = social_data[['Gender', 'Age', 'EstimatedSalary']]
y = social_data['Purchased']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

In [54]:
social_data

Unnamed: 0,Gender,Age,EstimatedSalary,Purchased
0,1,19,19000,0
1,1,35,20000,0
2,0,26,43000,0
3,0,27,57000,0
4,1,19,76000,0
...,...,...,...,...
395,0,46,41000,1
396,1,51,23000,1
397,0,50,20000,1
398,1,36,33000,0


In [49]:
# Frequentist Naive Bayes Classifier
class FrequentistNaiveBayes:
    def __init__(self):
        self.class_priors = {}
        self.feature_stats = {}
    
    def fit(self, X, y):
        # Calculate class priors
        class_counts = y.value_counts()
        total_samples = len(y)
        self.class_priors = {cls: count / total_samples for cls, count in class_counts.items()}
        
        # Calculate mean and variance for each feature per class
        self.feature_stats = {}
        for cls in class_counts.index:
            class_data = X[y == cls]
            self.feature_stats[cls] = {
                feature: (class_data[feature].mean(), class_data[feature].var())
                for feature in X.columns
            }
    
    def predict(self, X):
        predictions = []
        for _, row in X.iterrows():
            posteriors = {}
            for cls, prior in self.class_priors.items():
                posterior = np.log(prior)
                for feature in X.columns:
                    mean, var = self.feature_stats[cls][feature]
                    likelihood = (1 / np.sqrt(2 * np.pi * var)) * np.exp(-(row[feature] - mean)**2 / (2 * var))
                    posterior += np.log(likelihood)
                posteriors[cls] = posterior
            predictions.append(max(posteriors, key=posteriors.get))
        return predictions

# Naive Bayes Classifier with Uniform Priors
class NaiveBayesUniformPrior:
    def __init__(self):
        self.num_classes = 0
        self.feature_stats = {}
    
    def fit(self, X, y):
        self.num_classes = len(y.unique())
        class_counts = y.value_counts()
        self.feature_stats = {}
        for cls in class_counts.index:
            class_data = X[y == cls]
            self.feature_stats[cls] = {
                feature: (class_data[feature].mean(), class_data[feature].var())
                for feature in X.columns
            }
    
    def predict(self, X):
        predictions = []
        for _, row in X.iterrows():
            posteriors = {}
            for cls in self.feature_stats.keys():
                posterior = np.log(1 / self.num_classes)
                for feature in X.columns:
                    mean, var = self.feature_stats[cls][feature]
                    likelihood = (1 / np.sqrt(2 * np.pi * var)) * np.exp(-(row[feature] - mean)**2 / (2 * var))
                    posterior += np.log(likelihood)
                posteriors[cls] = posterior
            predictions.append(max(posteriors, key=posteriors.get))
        return predictions

In [50]:
# Initialize and train the Frequentist Naive Bayes classifier
freq_nb = FrequentistNaiveBayes()
freq_nb.fit(X_train, y_train)

# Predict on the test set and evaluate the Frequentist model
y_pred_freq = freq_nb.predict(X_test)
freq_accuracy = accuracy_score(y_test, y_pred_freq)
freq_precision = precision_score(y_test, y_pred_freq)
freq_recall = recall_score(y_test, y_pred_freq)
freq_f1 = f1_score(y_test, y_pred_freq)


# Initialize and train the Naive Bayes classifier with Uniform Priors
uniform_nb = NaiveBayesUniformPrior()
uniform_nb.fit(X_train, y_train)

# Predict on the test set and evaluate the Uniform Prior model
y_pred_uniform = uniform_nb.predict(X_test)
uniform_accuracy = accuracy_score(y_test, y_pred_uniform)
uniform_precision = precision_score(y_test, y_pred_uniform)
uniform_recall = recall_score(y_test, y_pred_uniform)
uniform_f1 = f1_score(y_test, y_pred_uniform)

In [51]:
# Print results
print("Frequentist Naive Bayes Performance:")
print(f"Accuracy: {freq_accuracy:.4f}, Precision: {freq_precision:.4f}, Recall: {freq_recall:.4f}, F1 Score: {freq_f1:.4f}")

print("\nNaive Bayes with Uniform Priors Performance:")
print(f"Accuracy: {uniform_accuracy:.4f}, Precision: {uniform_precision:.4f}, Recall: {uniform_recall:.4f}, F1 Score: {uniform_f1:.4f}")


Frequentist Naive Bayes Performance:
Accuracy: 0.9000, Precision: 0.8788, Recall: 0.7838, F1 Score: 0.8286

Naive Bayes with Uniform Priors Performance:
Accuracy: 0.9083, Precision: 0.7955, Recall: 0.9459, F1 Score: 0.8642
