#NAIVE BAYES

In [1]:
import numpy as np

class BernoulliNaiveBayes:
    def fit(self, X, y):
        self.classes = np.unique(y)
        self.feature_probs = {}
        self.class_priors = {}
        for c in self.classes:
            X_c = X[y == c]
            self.class_priors[c] = len(X_c) / len(X)
            self.feature_probs[c] = (np.sum(X_c, axis=0) + 1) / (X_c.shape[0] + 2)

    def _bernoulli_prob(self, x, probs):
        return x * np.log(probs) + (1 - x) * np.log(1 - probs)

    def _predict_single(self, x):
        posteriors = {}
        for c in self.classes:
            prior = np.log(self.class_priors[c])
            cond_likelihood = np.sum(self._bernoulli_prob(x, self.feature_probs[c]))
            posteriors[c] = prior + cond_likelihood
        return max(posteriors, key=posteriors.get)

    def predict(self, X):
        return np.array([self._predict_single(x) for x in X])


In [3]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import Binarizer

data = load_breast_cancer()
x = Binarizer(threshold=10).fit_transform(data.data)
y = data.target

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)

model = BernoulliNaiveBayes()
model.fit(x_train, y_train)
y_pred = model.predict(x_test)

accuracy = np.mean(y_pred == y_test )
print("Accuracy:", accuracy * 100)


Accuracy: 60.23391812865497
