In [1]:
import pandas as pd
import numpy as np

In [2]:
data = pd.read_csv('bern.csv')
data.head()

Unnamed: 0,f1,f2,class
0,1,0,1
1,0,1,0
2,0,1,0
3,1,0,1


In [13]:
X = data[['f1', 'f2']]
y = data['class']
num_classes = len(np.unique(y))
num_features = X.shape[1]
print(num_classes, num_features)

2 2


In [14]:
likelihood = np.zeros((num_classes, num_features))
prior = np.zeros((num_classes))

In [15]:
C = np.unique(y)
for c in C:
    X_c = X[y==c]
    likelihood[c, :] = np.sum(X_c, axis=0) / X_c.shape[0]
    prior[c] = X_c.shape[0] / X.shape[0]
    
print('Class conditional Density: ', likelihood)
print('Class Priors: ', prior)

Class conditional Density:  [[0. 1.]
 [1. 0.]]
Class Priors:  [0.5 0.5]


In [16]:
C = np.unique(y)
alpha = 1
for c in C:
    X_c = X[y==c]
    likelihood[c, :] = (np.sum(X_c, axis=0) + alpha) / (X_c.shape[0] + 2*alpha)
    prior[c] = (X_c.shape[0] + alpha)/ (X.shape[0] + num_classes * alpha)
    
print('Class conditional Density: ', likelihood)
print('Class Priors: ', prior)

Class conditional Density:  [[0.25 0.75]
 [0.75 0.25]]
Class Priors:  [0.5 0.5]


In [29]:
class BernoulliNB:
    def __init__(self, alpha=1):
        self.alpha = alpha
        
    def fit(self, X, y):
        num_samples = X.shape[0]
        num_features = X.shape[1]
        C = np.unique(y)
        num_classes = len(C)
        
        self.class_conditional = np.zeros((num_classes, num_features))
        self.class_priors = np.zeros(num_classes)
        
        for c in C:
            X_c = X[y == c]
            self.class_conditional[c, :] = (np.sum(X_c, axis=0) + self.alpha) / (X_c.shape[0] + 2 * self.alpha)
            self.class_priors[c] = (X_c.shape[0] + alpha) / (num_samples + num_classes * self.alpha)
            
        print(self.class_conditional)
        print(self.class_priors)
            
    def class_conditional_prior_product(self, X):
        left = X @ (np.log(self.class_conditional).T) 
        mid = (1-X) @ (np.log(1- self.class_conditional).T)
        right = np.log(self.class_priors)
        return left + right + mid
    
    def predict(self, X):
        q = np.array(self.class_conditional_prior_product(X))
        return np.argmax(q, axis=1)
    
    def predict_proba(self, X):
        q = np.array(self.class_conditional_prior_product(X))
        print(q)
        prob = np.exp(q)
        return prob / np.expand_dims(np.sum(prob, axis=1), axis=1)
        pass
        
        
        

In [30]:
clf = BernoulliNB()
clf.fit(X, y)
clf.predict(X)
clf.predict_proba(X)


[[0.25 0.75]
 [0.75 0.25]]
[0.5 0.5]
[[-3.4657359  -1.26851133]
 [-1.26851133 -3.4657359 ]
 [-1.26851133 -3.4657359 ]
 [-3.4657359  -1.26851133]]


array([[0.1, 0.9],
       [0.9, 0.1],
       [0.9, 0.1],
       [0.1, 0.9]])

In [37]:
x1 = np.exp(-3.4657359)
x1

0.03125000008749146

In [38]:
x2 = np.exp(-1.26851133)
x2

0.2812499987241114

In [39]:
z = np.exp(-3.4657359) + np.exp(-1.26851133)
z

0.31249999881160284

In [41]:
x2/z

0.8999999993397403