In [None]:
import numpy as np
import pandas as pd
from sklearn.datasets import make_blobs
from sklearn.model_selection import train_test_split
from scipy.stats import norm
from numpy import mean
from numpy import std
from dataclasses import dataclass  # Importing the dataclass module


In [None]:
import numpy as np

# Generate random binary values for X (features) and y (target variable)
X, y = np.random.randint(0, 2, size=(1000, 2)), np.random.randint(0, 2, size=1000)

# Print the first 5 samples to verify
print(X.shape, y.shape)
print(X[:5], y[:5])


(1000, 2) (1000,)
[[1 1]
 [1 0]
 [1 0]
 [0 1]
 [0 1]] [0 0 0 1 1]


In [None]:
import numpy as np
from sklearn.model_selection import train_test_split

class NaiveBayes:
    def __init__(self, X, y, laplace_param=None):
        self.X = X
        self.y = y
        self.laplace_param = laplace_param
        self.X_train, self.X_test, self.y_train, self.y_test = self.splitToTrainTest()
        self.run_model()

    def splitToTrainTest(self):
        X_train, X_test, y_train, y_test = train_test_split(self.X, self.y,
                                                            test_size=0.1,
                                                            random_state=0)
        return X_train, X_test, y_train, y_test

    def get_probability(self, X, likelihood_col1, likelihood_col2, prior):
        probability = prior
        for Pi, likelihood in zip(X, [likelihood_col1, likelihood_col2]):
            probability *= likelihood if Pi == 0 else (1 - likelihood)
        return probability

    def run_model(self):
        self.X0_train = self.X_train[self.y_train == 0]  # Data points for class 0
        self.X1_train = self.X_train[self.y_train == 1]  # Data points for class 1

        # Calculate priors
        self.prior_y0 = len(self.X0_train) / len(self.X_train) # prior for class 0
        self.prior_y1 = len(self.X1_train) / len(self.X_train) # prior for class 1

        # Calculate likelihoods
        self.likelihood_X01 = self.calculate_likelihood(self.X1_train, 0, self.laplace_param) # PROBABILITY OF RECORD BEING 0 GIVEN CLASS IS 1
        self.likelihood_X11 = self.calculate_likelihood(self.X1_train, 1, self.laplace_param)

        self.likelihood_X00 = self.calculate_likelihood(self.X0_train, 0, self.laplace_param)
        self.likelihood_X10 = self.calculate_likelihood(self.X0_train, 1, self.laplace_param)

    def calculate_likelihood(self, data, value, laplace_param):
        if laplace_param is not None:
            count_value = (data == value).sum()
            total_count = data.shape[0]
            return (count_value + laplace_param) / (total_count + 2 * laplace_param)
        else:
            return (data == value).mean()

    def predict(self):
        for i in range(self.X_test.shape[0]):
            X = self.X_test[i]
            prob_1 = self.get_probability(X, self.likelihood_X01, self.likelihood_X11, self.prior_y1)
            prob_0 = self.get_probability(X, self.likelihood_X00, self.likelihood_X10, self.prior_y0)
            predicted_class_label = np.argmax([prob_0, prob_1])
            print('P(y=0| %s) = %.2f' % (X, prob_0*100))
            print('P(y=1| %s) = %.2f' % (X, prob_1*100))
            print("Model predicts {} and class was {}".format(predicted_class_label, self.y_test[i]))
            print("")

    def evaluate(self):
        y_hat = []
        for i in range(self.X_test.shape[0]):
            X = self.X_test[i]
            prob_1 = self.get_probability(X, self.likelihood_X01, self.likelihood_X11, self.prior_y1)
            prob_0 = self.get_probability(X, self.likelihood_X00, self.likelihood_X10, self.prior_y0)
            predicted_class_label = np.argmax([prob_0, prob_1])
            y_hat.append(predicted_class_label)

        y_test = self.y_test
        y = (y_test == 1)
        y_hat = (np.array(y_hat) == 1)

        accuracy = (y == y_hat).sum() / y.size
        precision = (y & y_hat).sum() / y_hat.sum() if y_hat.sum() > 0 else 0
        recall = (y & y_hat).sum() / y.sum() if y.sum() > 0 else 0

        print('Accuracy: ', accuracy)
        print('Precision: ', precision)
        print('Recall: ', recall)


In [None]:
print("Model without Laplace Smoothing")
nb = NaiveBayes(X, y)
nb.run_model()
nb.predict()
nb.evaluate()


Model without Laplace Smoothing
P(y=0| [0 0]) = 12.44
P(y=1| [0 0]) = 12.55
Model predicts 1 and class was 1

P(y=0| [1 1]) = 12.44
P(y=1| [1 1]) = 12.55
Model predicts 1 and class was 1

P(y=0| [0 0]) = 12.44
P(y=1| [0 0]) = 12.55
Model predicts 1 and class was 1

P(y=0| [1 0]) = 12.11
P(y=1| [1 0]) = 11.95
Model predicts 0 and class was 0

P(y=0| [1 0]) = 12.11
P(y=1| [1 0]) = 11.95
Model predicts 0 and class was 0

P(y=0| [0 0]) = 12.44
P(y=1| [0 0]) = 12.55
Model predicts 1 and class was 1

P(y=0| [0 0]) = 12.44
P(y=1| [0 0]) = 12.55
Model predicts 1 and class was 1

P(y=0| [1 0]) = 12.11
P(y=1| [1 0]) = 11.95
Model predicts 0 and class was 1

P(y=0| [1 0]) = 12.11
P(y=1| [1 0]) = 11.95
Model predicts 0 and class was 1

P(y=0| [0 0]) = 12.44
P(y=1| [0 0]) = 12.55
Model predicts 1 and class was 1

P(y=0| [1 0]) = 12.11
P(y=1| [1 0]) = 11.95
Model predicts 0 and class was 1

P(y=0| [0 1]) = 12.78
P(y=1| [0 1]) = 13.17
Model predicts 1 and class was 0

P(y=0| [1 0]) = 12.11
P(y=1| [1 

In [None]:
print("\nModel with Laplace Smoothing")
nb_laplace = NaiveBayes(X, y, laplace_param=1)
nb_laplace.run_model()
nb_laplace.predict()
nb_laplace.evaluate()


Model with Laplace Smoothing
P(y=0| [0 0]) = 49.55
P(y=1| [0 0]) = 49.97
Model predicts 1 and class was 1

P(y=0| [1 1]) = -0.01
P(y=1| [1 1]) = -0.03
Model predicts 0 and class was 1

P(y=0| [0 0]) = 49.55
P(y=1| [0 0]) = 49.97
Model predicts 1 and class was 1

P(y=0| [1 0]) = -0.54
P(y=1| [1 0]) = -1.08
Model predicts 0 and class was 0

P(y=0| [1 0]) = -0.54
P(y=1| [1 0]) = -1.08
Model predicts 0 and class was 0

P(y=0| [0 0]) = 49.55
P(y=1| [0 0]) = 49.97
Model predicts 1 and class was 1

P(y=0| [0 0]) = 49.55
P(y=1| [0 0]) = 49.97
Model predicts 1 and class was 1

P(y=0| [1 0]) = -0.54
P(y=1| [1 0]) = -1.08
Model predicts 0 and class was 1

P(y=0| [1 0]) = -0.54
P(y=1| [1 0]) = -1.08
Model predicts 0 and class was 1

P(y=0| [0 0]) = 49.55
P(y=1| [0 0]) = 49.97
Model predicts 1 and class was 1

P(y=0| [1 0]) = -0.54
P(y=1| [1 0]) = -1.08
Model predicts 0 and class was 1

P(y=0| [0 1]) = 0.78
P(y=1| [0 1]) = 1.36
Model predicts 1 and class was 0

P(y=0| [1 0]) = -0.54
P(y=1| [1 0]) 