# NaiveBayes 朴素贝叶斯

In [1]:
from sklearn.datasets import load_iris
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split

import numpy as np
from collections import Counter

In [2]:
iris = load_iris()
x = iris.data
y = iris.target
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=666666)

In [3]:
class GNB:
    def __init__(self):
        self.mean = None
        self.var = None
        self.class_prior_ = None
        self.class_count_ = None
        self.classes_ = None
    
    def fit(self, x_train, y_train):
        self.class_prior_ = self._get_prior(y_train)
        self.classes_ = np.array(set(y_train))
#         self.class_count_ = len(self.class_prior_)
        self.mean = self._get_mean(x_train, y_train)
        self.var = self._get_var(x_train, y_train)
        return self
    
    def predict(self, x_data):
        likelihood = np.apply_along_axis(self._gaussian_func, axis=1, arr=x_data)
        probs = self.class_prior_ * likelihood
        probs_sum = probs.sum(axis=1)
        p = probs / probs_sum[:, None]
        return p.argmax(axis=1)
    
    def score(self, x_test, y_test):
        y_predict = self.predict(x_test)
        return np.sum(y_predict == y_test) / len(y_test)
    
    def _get_mean(self, x_data, y_data):
        return np.array([x_data[y_data == i].mean(axis=0) \
                         for i in range(self.class_count_)])
    
    def _get_var(self, x_data, y_data):
        return np.array([x_data[y_data == i].var(axis=0) \
                         for i in range(self.class_count_)])
    
    def _get_prior(self, y_label):
        cnt = Counter(y_label)
        total_label = len(y_label)
        self.class_count_ = len(cnt)
        return np.array([cnt[i] / total_label for i in range(self.class_count_)])
    
    def _gaussian_func(self, x_data_row):
        return (1 / np.sqrt(2 * np.pi * self.var)
                * np.exp(-(x_data_row - self.mean)**2 / (2 * self.var))).prod(axis=1)
    
    def __repr__(self):
        return "GNB()"

In [4]:
nb = GNB()
nb.fit(x_train, y_train)
nb.predict(x_test)

array([2, 0, 2, 2, 1, 0, 2, 2, 0, 2, 1, 2, 1, 2, 2, 2, 1, 0, 2, 1, 2, 1,
       2, 1, 1, 0, 2, 0, 1, 2], dtype=int64)

In [5]:
nb.score(x_test, y_test)

0.9

In [6]:
nb.class_count_

3

In [7]:
nb.class_prior_

array([0.36666667, 0.33333333, 0.3       ])

In [8]:
nb.classes_

array({0, 1, 2}, dtype=object)