In [1]:
from sklearn.naive_bayes import BernoulliNB
from sklearn.metrics import accuracy_score
import numpy as np

In [51]:
class Bernoulli:
    
    def __init__(self, alpha=1):
        self.alpha = alpha
    
    def _get_ccd(self, X, y):
        ccd = np.zeros((self.n_classes, X.shape[1]))
        for i, label in enumerate(self.labels):
            subset = X[y == label]
            ccd[i] = (subset.sum(axis=0) + self.alpha) / (subset.shape[0] + 2 * self.alpha)
        return ccd
    
    def fit(self, X, y):
        self.labels, counts = np.unique(y, return_counts=True)
        self.n_classes = self.labels.shape[0]
        self.label_priors = np.c_[self.labels, (counts + self.alpha) / (y.shape[0] + self.n_classes * self.alpha)]

        # Compute class conditional densities
        self.ccd = self._get_ccd(X, y)
        return self
    
    def predict(self, X):
        likelihood = np.dot(np.log(self.ccd), X.T).T + self.label_priors[:, 1]
        return self.labels[np.argmax(likelihood, axis=1)]

In [57]:
# Benchmark

X = np.random.choice([0, 1], size=(50, 75))
y = np.random.choice([0, 1], size=(50,))
clf = BernoulliNB()
clf.fit(X, y)
c = Bernoulli().fit(X, y)
accuracy_score(y, clf.predict(X)), accuracy_score(y, c.predict(X))

(0.88, 0.84)

In [8]:
x = X[0]
x

array([0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1,
       0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1,
       0, 1, 0, 0, 1, 1, 0, 1, 0])

In [9]:
c.cc_d.shape

(2, 75)

In [44]:
# Logic gates
X = [[0, 0], [0, 1], [1, 0], [1, 1]]
y_and = [0, 0, 0, 1]
y_or = [0, 1, 1, 1]
y_xor = [0, 1, 1, 0]
X, y_and, y_or, y_xor = map(np.array, [X, y_and, y_or, y_xor])

for gate in [y_and, y_or, y_xor]:
    clf = BernoulliNB().fit(X, gate)
    print(accuracy_score(gate, clf.predict(X)))
    c = Bernoulli().fit(X, gate)
    print(accuracy_score(gate, c.predict(X)))

0.75
0.5
0.75
0.5
0.5
0.5




In [58]:
X = np.array([
    [1, 0],
    [0, 1],
    [0, 1],
    [1, 0] 
])
y = np.array([1, 0, 0, 1])
c = Bernoulli().fit(X, y)

In [59]:
c.label_priors

array([[0. , 0.5],
       [1. , 0.5]])

In [60]:
c.ccd

array([[0.25, 0.75],
       [0.75, 0.25]])

In [61]:
clf = BernoulliNB().fit(X, y)

In [62]:
clf.class_log_prior_

array([-0.69314718, -0.69314718])

In [64]:
clf.feature_log_prob_

array([[-1.38629436, -0.28768207],
       [-0.28768207, -1.38629436]])

In [65]:
np.log(c.ccd)

array([[-1.38629436, -0.28768207],
       [-0.28768207, -1.38629436]])