In [1]:
import numpy as np
from sklearn import datasets, preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [26]:
class NaiveBayes():
    def fit(self, X, y):
        self.X = X
        self.y = y
        self.classes = np.unique(y)
        self.parameters = {}
        for i, c in enumerate(self.classes):
            # Calculate the mean, variance and the prior possibily for all the classes
            X_index_c = X[np.where(y == c)]
            X_index_c_mean = np.mean(X_index_c, axis = 0, keepdims = True)
            X_index_c_var = np.var(X_index_c, axis = 0, keepdims = True)
            parameters = {'mean':X_index_c_mean, 'var': X_index_c_var, 'prior': X_index_c.shape[0]/X.shape[0]}
            self.parameters['class'+str(c)] = parameters
    
    def likelihood(self, X, classes):
        # Calculate the probability density function
        # To avoid the denominator == 0, add eps
        eps = 1e-4
        mean = self.parameters['class'+str(classes)]['mean']
        var = self.parameters['class'+str(classes)]['var']
        
        numerator = np.exp(-(X-mean)**2/(2*var+eps))
        denominator = np.sqrt(2*np.pi*var+eps)
        
        # Assume all features are independent,logarithm for adding instead of multiplying
        result = np.sum(np.log(numerator/denominator), axis = 1, keepdims = True)
        
        return result.T
    
    def posterior(self, X):
        # Calculate the possibility of each class P(Y|x1,x2,x3) =  P(Y)*P(x1|Y)*P(x2|Y)*P(x3|Y)
        output = []
        for y in range(self.classes.shape[0]):
            prior = np.log(self.parameters['class'+str(y)]['prior'])
            output.append(prior + self.likelihood(X, y))
        return output
    
    def predict(self, X):
        # Find the class with the highest probability as the output class
        output = self.posterior(X)
        output = np.reshape(output, (self.classes.shape[0], X.shape[0]))
        prediction = np.argmax(output, axis = 0)
        return prediction

In [33]:
def main():
    data = datasets.load_iris()
    X = preprocessing.normalize(data.data)
    y = data.target
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)
    clf = NaiveBayes()
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print('The accuracy is:', accuracy)
    
if __name__ == '__main__':
    main()

The accuracy is: 0.98
