In [3]:
import numpy as np 
from functools import reduce 
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB

import matplotlib.pyplot as plt
%matplotlib inline

In [1]:
class NaiveBayes:
    
    def __init__(self):
        self.data = None
        self.labels = None
        self.feat_ids = None
        self.label_ids = None
        self._means = None
        self._variances = None
        self._prior_probs = None
        
    def fit(self, data, labels):
        self.data = np.array(data)
        self.labels = np.array(labels)
        self.feat_ids = [i for i in range(len(self.data[0]))] 
        self.label_ids = [i for i in set(labels)]
        self._means = None
        self.means
        self._variances = None
        self.variances
        self._prior_probs = None
        self.prior_probs
        
    @property
    def means(self):
        if not self._means:
            self._means = [[self.data[self.labels == label_id, feat_id].mean() 
                            for feat_id in self.feat_ids] 
                           for label_id in self.label_ids]
        return np.array(self._means)
    
    @property
    def variances(self):
        if not self._variances:
            self._variances = [[self.data[self.labels == label_id, feat_id].var() 
                                for feat_id in self.feat_ids] 
                               for label_id in self.label_ids]
        return np.array(self._variances)
    
    @property
    def prior_probs(self):
        if not self._prior_probs:
            self._prior_probs = [self.data[self.labels == label_id].size/self.data.size
                                 for label_id in self.label_ids]
        return np.array(self._prior_probs)
            
    def gaus(self, x, mean, variance):
        exp = np.exp(-(x - mean)**2/(2*variance**2))
        return 1/np.sqrt(2*np.pi*variance**2)*exp

    def cond_prob(self, x, means, variances):
        probs = [self.gaus(_x, mean, variance) 
                 for _x, mean, variance in zip(x, means, variances)]
        prob = reduce(lambda x, y: x*y, probs)
        return prob
    
    def predict(self, x):
        x = np.array(x)
        prediction = []
        for _x in x:
            probs = [prior_prob*self.cond_prob(_x, mean, variance) 
                     for prior_prob, mean, variance in zip(self.prior_probs, self.means, self.variances)]
            probs = np.array(probs)/sum(probs)
            prediction.append(probs.argmax())
        return np.array(prediction)
    
    def predict_probs(self, x):
        x = np.array(x)
        prediction = []
        for _x in x:
            probs = [prior_prob*self.cond_prob(_x, mean, variance) 
                     for prior_prob, mean, variance in zip(self.prior_probs, self.means, self.variances)]
            probs = np.array(probs)/sum(probs)
            prediction.append(probs)
        return np.array(prediction)
            

In [5]:
data = datasets.load_iris()
data, labels = data.data, data.target

train_data, test_data, train_labels, test_labels = train_test_split(data, labels, test_size=0.2)

In [12]:
nb = NaiveBayes()
nb.fit(train_data, train_labels)
prediction = nb.predict(test_data)

sklearn_nb = GaussianNB()
sklearn_nb.fit(train_data, train_labels)
sklearn_prediction = sklearn_nb.predict(test_data)

accuracy = (prediction == test_labels).sum()/len(prediction)
sklearn_accuracy = (sklearn_prediction == test_labels).sum()/len(sklearn_prediction)

print('self-made gaussian naive bayes: {}\nsklearn gaussian naive bayes: {}'.format(accuracy, sklearn_accuracy))

self made gaussian naive bayes: 1.0
sklearn gaussian naive bayes: 1.0
