In [1]:
import numpy as np
    
class MultinomialNB:
    
    def fit(self, X, y):
        self.y_classes, y_counts = np.unique(y, return_counts=True)
        self.x_classes = np.array([np.unique(x) for x in X.T])
        self.phi_y = 1.0 * y_counts/y_counts.sum()
        self.phi_x = self.mean_x(X, y)
        return self
    
    def mean_x(self, X, y):
        return [[(X[:,j][y==k].reshape(-1,1) == self.x_classes[j]).mean(axis=0)
                       for j in range(len(self.x_classes))]
                      for k in self.y_classes]
    
    def predict(self, X):
        return np.apply_along_axis(lambda x: self.compute_probs(x), 1, X)
    
    def compute_probs(self, x):
        probs = np.array([self.compute_prob(x, y) for y in range(len(self.y_classes))])
        return self.y_classes[np.argmax(probs)]
    
    def compute_prob(self, x, y):
        Pxy = 1
        for j in range(len(x)):
            i = list(self.x_classes[j]).index(x[j])
            Pxy *= self.phi_x[y][j][i] # p(xj|y)
        return Pxy * self.phi_y[y]
    
    def evaluate(self, X, y):
        return (self.predict(X) == y).mean()

In [2]:
from sklearn import datasets
from utils import accuracy_score
iris = datasets.load_iris()
X = iris.data  
y = iris.target
MultinomialNB().fit(X, y).evaluate(X, y)

0.9666666666666667

In [3]:
X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
Y = np.array([1, 1, 1, 2, 2, 2])
clf = MultinomialNB().fit(X, Y)
print(clf.predict(X))

[1 1 1 2 2 2]


In [4]:
from sklearn import datasets
digits = datasets.load_digits()
X = digits.data
y = digits.target
MultinomialNB().fit(X, y).evaluate(X, y)

0.9833055091819699