In [1]:
import numpy as np 


In [2]:
class NaiveBayes:
    def __init__(self, distr='Gaussian'):
        self.distr=distr
        
    def get_stats(self,x):
        mean=[]
        variance=[]
        x=np.array(x)
        for feature in range(x.shape[1]):
            mean.append(np.mean(x[:,feature]))
            variance.append(np.var(x[:,feature]))
        return mean,variance

    def pdf(self,x,mean,variance):
        if self.distr=='Gaussian':
            return 1/(variance*np.sqrt(2*np.pi))*np.exp((-1/2)*(x-mean)**2/variance**2)
        if self.distr=='Laplace':
            b=np.sqrt(variance/2)
            return 1/(2*b)*np.exp(-np.absolute(x-mean)/b)            
        
    def fit(self, X_train, y_train):
        self.means={}
        self.var={}
        self.apriori={}
        X_train=np.array(X_train)
        y_train=np.array(y_train)
        N=y_train.size
        self.classes=list(set(y_train))
        class_data={}
        
        for cl in self.classes:
            class_data[cl]=[]
            
        for obs_index in range(X_train.shape[0]):
            class_data[y_train[obs_index]].append(X_train[obs_index])
                  
        for cl in self.classes:
            self.means[cl]=self.get_stats(class_data[cl])[0]
            self.var[cl]=self.get_stats(class_data[cl])[1]
              
        for cl in self.classes:
            n=0
            for i in y_train:
                if i==cl:
                    n+=1
            self.apriori[cl]=n/N
        return self.means,self.var,self.apriori          
    
    def predict_proba(self,X):
        X=np.array(X)
        self.probs=[]

        for obs in X:
            prob_classes=[]
            for c in self.classes:
                p=1
                for i in range(obs.size):
                    p=p*self.pdf(obs[i],self.means[c][i],self.var[c][i])
                                    
                prob_classes.append(self.apriori[c]*p)
            s=0    
            for i in prob_classes:
                s=s+i
            prob_classes=np.array(prob_classes)
            prob_classes=prob_classes/s
            self.probs.append(prob_classes)

        return self.probs

    def predict(self,x,threshold=0.6):
        y_pred = self.predict_proba(x)
        y_predicted=[]
        for i in y_pred:
            if max(y)>threshold:
                y_predicted.append(np.random.choice(len(self.classes),p=list(i)))
            else:
                y_predicted.append(np.argmax(i))
        
        return y_predicted

    def score(self,y_predicted,y_test):
        comparison=map(lambda x, y: x==y, y_test, y_predicted)
        n_true=0
        N=0
        for i in comparison:
            N+=1
            if i:
                n_true+=1
        return(n_true/N)
                

In [9]:
x=[[1,2,3],[2,3,4],[5,6,7],[7,8,9]]
y=[0,0,1,1]

In [10]:
nb=NaiveBayes()

In [11]:
nb.fit(x,y)

({0: [1.5, 2.5, 3.5], 1: [6.0, 7.0, 8.0]},
 {0: [0.25, 0.25, 0.25], 1: [1.0, 1.0, 1.0]},
 {0: 0.5, 1: 0.5})

In [12]:
nb.predict_proba(x)

[array([1.00000000e+00, 3.26244986e-16]),
 array([1.00000000e+00, 2.37968433e-10]),
 array([5.95722519e-126, 1.00000000e+000]),
 array([1.44486688e-313, 1.00000000e+000])]

In [13]:
nb.predict(x)

[0, 0, 1, 1]