In [1]:
import numpy as np
from sklearn import datasets
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

%matplotlib widget

In [2]:
class NaiveBayes:
    
    def fit(self,X,y):
        n_samples,n_features = X.shape
        self.unique_cls = np.unique(y)
        tot_distinct_cls = len(self.unique_cls)
        self.cls_mean = np.zeros((tot_distinct_cls,n_features),dtype = np.float64)
        self.cls_var = np.zeros((tot_distinct_cls,n_features),dtype = np.float64)
        self.prior_probs = np.zeros(tot_distinct_cls, dtype = np.float64)
        for cls in self.unique_cls:
            X_cls = X[cls == y]
            self.cls_mean[cls,:] = X_cls.mean(axis=0)
            self.cls_var[cls,:] = X_cls.var(axis=0)
            self.prior_probs[cls] = X_cls.shape[0]/float(n_samples)
        return self
    
    def get_max_prob_cls(self,x):
        prob = []
        for i, cls in enumerate(self.unique_cls):
            prior_prob = np.log(self.prior_probs[i])
            mean = self.cls_mean[i]
            var = self.cls_mean[i]
            pdf_val = (np.exp(-(x-mean)**2/(2*var)))/(np.sqrt(2*np.pi*var))
            cond_prob = np.sum(np.log(pdf_val))
            prob.append(prior_prob+cond_prob)
        max_prob_idx = np.argmax(prob)
        return self.unique_cls[max_prob_idx]
    
    def predict(self,X):
        y_pred = [self.get_max_prob_cls(x) for x in X]
        return y_pred
    
    def accuracy(self,y_pred,y_actual):
        return (y_pred == y_actual).sum()/len(y_actual)
    

In [3]:
iris_dataset = datasets.load_iris()
x = iris_dataset.data
y = iris_dataset.target
feature_names = iris_dataset.feature_names
target_classes = iris_dataset.target_names
train_split,test_split,train_y_split,test_y_split = train_test_split(x,y,test_size=0.1,random_state=123)

In [4]:
nb = NaiveBayes().fit(train_split,train_y_split)
y_pred = nb.predict(test_split)
accuracy = nb.accuracy(y_pred,test_y_split)
print('Accuracy : {}'.format(accuracy))

Accuracy : 0.8666666666666667


In [5]:
y_pred

[1, 2, 2, 1, 0, 1, 1, 0, 0, 1, 2, 0, 1, 1, 2]

In [6]:
test_y_split

array([1, 2, 2, 1, 0, 2, 1, 0, 0, 1, 2, 0, 1, 2, 2])