In [1]:
import numpy as np

np.random.seed(42)

In [2]:
# Imports
from sklearn.model_selection import train_test_split
from sklearn import datasets

def accuracy(y_true, y_pred):
    accuracy = np.sum(y_true == y_pred) / len(y_true)
    return accuracy

X, y = datasets.load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42)
X_train.shape, y_train.shape, y_test.shape

((75, 4), (75,), (75,))

In [3]:
class MyNaiveBayes:
    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.classes_ = np.unique(y.flatten())
        self.n_classes_ = len(self.classes_)

        self.mean_ = np.empty((self.n_classes_, n_features))
        self.var_ = np.empty((self.n_classes_, n_features))
        self.prior_proba_= np.empty(self.n_classes_)

        for i, label in enumerate(self.classes_):
            label_idx = (y == label)
            X_label = X[label_idx]
            self.mean_[i] = X_label.mean(axis=0)
            self.var_[i] = X_label.var(axis=0)
            self.prior_proba_[i] = X_label.shape[0] / n_samples
    
    def predict(self, X):
        posterior_probas = np.empty((X.shape[0], self.n_classes_))
        for i in range(self.n_classes_):
            prior_proba = np.log(self.prior_proba_[i])
            class_cond_proba = np.log(self._pdf(X, class_idx=i))
            class_cond_proba = np.sum(class_cond_proba, axis=-1)
            posterior_proba = prior_proba + class_cond_proba
            posterior_probas[:, i] = posterior_proba

        return np.argmax(posterior_probas, axis=-1)

    def _pdf(self, X, class_idx):
        mean = self.mean_[class_idx]
        var = self.var_[class_idx]
        numerator = np.exp(-((X - mean) ** 2) / (2 * var))
        denominator = np.sqrt(2 * np.pi * var)
        return numerator / denominator

In [4]:
MyNB = MyNaiveBayes()
MyNB.fit(X_train, y_train)
y_pred = MyNB.predict(X_test)

print("My Implementation:")
print("Naive Bayes classification accuracy", accuracy(y_test, y_pred))
print("Number of mislabeled points out of a total %d points : %d"
      % (X_test.shape[0], (y_test != y_pred).sum()))

My Implementation:
Naive Bayes classification accuracy 0.9866666666666667
Number of mislabeled points out of a total 75 points : 1


In [5]:
from sklearn.naive_bayes import GaussianNB

gnb = GaussianNB()
y_pred = gnb.fit(X_train, y_train).predict(X_test)
print("Sklearn Implementation:")
print("Naive Bayes classification accuracy", accuracy(y_test, y_pred))
print("Number of mislabeled points out of a total %d points : %d"
      % (X_test.shape[0], (y_test != y_pred).sum()))

Sklearn Implementation:
Naive Bayes classification accuracy 0.9866666666666667
Number of mislabeled points out of a total 75 points : 1
