In [None]:
class NaiveBayes1(object):
    """
    Naive Bayes class (1)
    """
    def __init__(self):
        self.pY_ = None # p(Y)
        self.pXgY_ = None # p(X | Y)
        
    def fit(self, X, y):
        n_samples = X.shape[0]
        n_features = X.shape[1]
        n_classes = 2
        n_fvalues = 2
        
        if n_samples != len(y):
            raise ValueError('Mismatched nuber of samples.')
            
        # クラス分布 p(y)を求める
        nY = np.zeros(n_classes, dtype=int)
        for i in range(n_samples):
            nY[y[i]] += 1
            
        self.pY_ = np.empty(n_classes, dtype=float)
        for i in range(n_classes):
            self.pY_[i] = nY[i] / n_samples
        
        # 特徴の分布 p(Xi|Y) を求める
        nXY = np.zeros((n_features, n_fvalues, n_classes), dtype=int)
        for i in range(n_samples):
            for j in range(n_features):
                nXY[j, X[i, j], y[i]] += 1
                
        self.pXgY_ = np.empty((n_features, n_fvalues, n_classes),
                      dtype=float)
        for j in range(n_features): # 素性ごと
            for xi in range(n_fvalues): # 素性の値ごと
                for yi in range(n_classes): # クラス毎
                    self.pXgY_[j, xi, yi] = nXY[j, xi, yi] / float(nY[yi])
    
    def predict(self, X):
        # 入力、複数件のx
        n_samples = X.shape[0]
        n_features = X.shape[1]
        n_classes = 2
        n_fvalues = 2
        
        y = np.empty(n_samples, dtype=int)
        
        # 一件ごと事後確率 p(y|X) を求める
        for i, xi in enumerate(X):
            logpXY = np.log(self.pY_) + np.sum(np.log(self.pXgY_[np.arange(n_features), xi, :]), axis=0)
            # predict class
            y[i] = np.argmax(logpXY)
        return y

In [3]:
from abc import ABCMeta, abstractmethod

class BaseBinaryNaiveBayes(object, metaclass=ABCMeta):
    @abstractmethod
    def fit(self, X, y):
        pass
    
    def predict(self, X):
        # 入力、複数件のx
        n_samples = X.shape[0]
        n_features = X.shape[1]
        n_classes = 2
        n_fvalues = 2
        
        y = np.empty(n_samples, dtype=int)
        
        # 一件ごと事後確率 p(y|X) を求める
        for i, xi in enumerate(X):
            logpXY = np.log(self.pY_) + np.sum(np.log(self.pXgY_[np.arange(n_features), xi, :]), axis=0)
            # predict class
            y[i] = np.argmax(logpXY)
        return y

In [4]:
class NaiveBayes1(BaseBinaryNaiveBayes):
    def __init__(self):
        super(NaiveBayes1, self).__init__()
        
    def fit(self, X, y):
        # constants
        n_samples = X.shape[0]
        n_features = X.shape[1]
        n_classes = 2
        n_fvalues = 2

        # check the size of y
        if n_samples != len(y):
            raise ValueError('Mismatched number of samples.')

        # count up n[yi=y]
        nY = np.sum(y[:, np.newaxis] == np.arange(n_classes)[np.newaxis, :], axis=0)
        # calc pY_
        self.pY_ = nY / n_samples

        # count up n[x_ij=xj, yi=y]
        ary_xi = np.arange(n_fvalues)[np.newaxis, np.newaxis, :, np.newaxis]
        ary_yi = np.arange(n_classes)[np.newaxis, np.newaxis, np.newaxis, :]
        ary_y = y[:, np.newaxis, np.newaxis, np.newaxis]
        ary_X = X[:, :, np.newaxis, np.newaxis]

        nXY = np.sum(np.logical_and(ary_X == ary_xi, ary_y == ary_yi), axis=0)

        # calc pXgY_
        self.pXgY_ = nXY / nY[np.newaxis, np.newaxis, :]

In [34]:
y = np.array([1,0,1,0,0,0,0,1,0,0,1,0,0,1,1])
n_samples = len(y)

In [22]:
ary_y = y[:, np.newaxis]

In [23]:
ary_y

array([[1],
       [0],
       [1],
       [0],
       [0],
       [0],
       [0],
       [1],
       [0],
       [0],
       [1],
       [0],
       [0],
       [1],
       [1]])

In [29]:
n_classes = 2
ary_yi = np.arange(n_classes)[np.newaxis, :]
ary_yi

array([[0, 1]])

In [28]:
cmp_y = (ary_y == ary_yi)
cmp_y

array([[False,  True],
       [ True, False],
       [False,  True],
       [ True, False],
       [ True, False],
       [ True, False],
       [ True, False],
       [False,  True],
       [ True, False],
       [ True, False],
       [False,  True],
       [ True, False],
       [ True, False],
       [False,  True],
       [False,  True]], dtype=bool)

In [32]:
np.sum(cmp_y, axis=0)

array([9, 6])

In [35]:
np.sum(cmp_y, axis=0)/n_samples

array([ 0.6,  0.4])