# Coding our own Naive Bayes classifier
###code written by Uday kiran Bakka

In [161]:
#importing required modules,libraries
import numpy as np
from collections import Counter
from sklearn.metrics import confusion_matrix,classification_report
from sklearn import datasets

In [162]:
#loading dataset
from sklearn import model_selection
iris=datasets.load_iris()
x=iris.data
y=iris.target

In [163]:
#function to convert continuous data into discret data
def makeLabelled(column):
    second_limit=column.mean()
    first_limit=0.5*second_limit
    third_limit=1.5*second_limit
    for i in range(0,len(column)):
        if(column[i]<first_limit):
            column[i]=0
        elif(column[i]<second_limit):
            column[i]=1
        elif(column[i]<third_limit):
            column[i]=2
        else:
            column[i]=3
    return column
for i in range(0,x.shape[-1]):
    x[:,i]=makeLabelled(x[:,i])

#splitting data into train test 
x,x_test,y,y_test=model_selection.train_test_split(x,y,test_size=0.25,random_state=0)
print(x)

[[2. 1. 2. 2.]
 [1. 1. 2. 2.]
 [2. 1. 2. 3.]
 [1. 2. 0. 0.]
 [2. 2. 2. 3.]
 [1. 2. 0. 1.]
 [1. 2. 0. 0.]
 [1. 1. 1. 1.]
 [2. 1. 2. 3.]
 [2. 2. 3. 3.]
 [2. 1. 2. 2.]
 [2. 1. 3. 3.]
 [1. 1. 2. 1.]
 [2. 1. 3. 3.]
 [2. 2. 2. 2.]
 [1. 1. 2. 1.]
 [2. 1. 2. 3.]
 [2. 1. 2. 2.]
 [1. 1. 2. 2.]
 [2. 1. 2. 3.]
 [2. 2. 2. 2.]
 [2. 1. 2. 3.]
 [2. 2. 2. 2.]
 [1. 2. 0. 0.]
 [1. 1. 2. 3.]
 [1. 1. 2. 2.]
 [1. 1. 1. 2.]
 [1. 1. 2. 2.]
 [2. 1. 2. 2.]
 [2. 2. 3. 3.]
 [1. 2. 0. 0.]
 [1. 1. 0. 0.]
 [2. 1. 2. 3.]
 [1. 1. 2. 2.]
 [1. 2. 0. 0.]
 [1. 2. 0. 0.]
 [2. 2. 2. 2.]
 [1. 2. 0. 0.]
 [2. 1. 2. 3.]
 [1. 1. 1. 1.]
 [1. 2. 0. 0.]
 [2. 1. 2. 2.]
 [2. 2. 3. 3.]
 [1. 1. 2. 2.]
 [1. 1. 0. 0.]
 [2. 1. 3. 3.]
 [2. 2. 3. 3.]
 [2. 1. 3. 3.]
 [2. 1. 2. 3.]
 [1. 2. 0. 0.]
 [1. 2. 0. 0.]
 [2. 2. 2. 3.]
 [2. 2. 3. 3.]
 [1. 1. 0. 0.]
 [2. 2. 2. 3.]
 [1. 1. 0. 0.]
 [1. 1. 2. 3.]
 [2. 2. 2. 3.]
 [1. 2. 0. 0.]
 [1. 2. 0. 0.]
 [2. 1. 3. 2.]
 [1. 2. 0. 0.]
 [1. 1. 0. 0.]
 [1. 2. 0. 0.]
 [1. 1. 2. 2.]
 [2. 2. 3. 3.]
 [2. 1. 3.

In [165]:
#class which implements Naive bayes classifier
class NaiveBayesClassifier:
    #self.result store the count of all the classes w.r.t outputs
    def __init__(self):
        self.result={}
    
    #fit functions helps in building the self.result map
    def fit(self,x,y):
        x=np.array(x)
        y=np.array(y)
        result={}
        outputs=Counter(y)
        result["total"]=len(y)
        for output in outputs:
            result[output]={}
            result[output]["total"]=outputs[output]
            for feature in range(len(x[0])):
                clses=np.unique(x[:,feature])
                result[output][feature]={}
                total_feature_count=0
                for row in range(len(x)):
                    if y[row]==output:
                        result[output][feature][x[row,feature]]=result[output][feature].get(x[row,feature],0)+1
                        total_feature_count+=1
                result[output][feature]["total"]=total_feature_count
        self.result=result
    
    #function which predicts the output by using bayes theorem
    def predict(self,x):
        result=self.result
        sumi=0
        y_pred=[]
        for point in x:
            answer=-1
            max_probability=None
            for output in result:
                if output!="total":
                    probability=result[output]['total']/result['total']
                    for feature in range(len(point)):
                        probability*=((result[output][feature].get(point[feature],0))/(result['total']))/(result[output]["total"]/result['total'])
                    if max_probability==None or probability>max_probability:
                        max_probability=probability
                        answer=output
            y_pred.append(answer)
                    
        return y_pred
                
clf=NaiveBayesClassifier()
clf.fit(x,y)
y_pre=clf.predict(x_test)


In [166]:
#testing the accuracy our classifier which for which we have written code
print(confusion_matrix(y_test,y_pre))
print(classification_report(y_test,y_pre))

[[11  2  0]
 [ 0 16  0]
 [ 0  1  8]]
              precision    recall  f1-score   support

           0       1.00      0.85      0.92        13
           1       0.84      1.00      0.91        16
           2       1.00      0.89      0.94         9

    accuracy                           0.92        38
   macro avg       0.95      0.91      0.92        38
weighted avg       0.93      0.92      0.92        38



In [167]:
#checking the accuracy of inbuilt classifier 
from sklearn.naive_bayes import GaussianNB
clf=GaussianNB()
clf.fit(x,y)
pred=clf.predict(x_test)
print(confusion_matrix(y_test,pred))
print(classification_report(y_test,pred))

[[11  2  0]
 [ 0 16  0]
 [ 0  3  6]]
              precision    recall  f1-score   support

           0       1.00      0.85      0.92        13
           1       0.76      1.00      0.86        16
           2       1.00      0.67      0.80         9

    accuracy                           0.87        38
   macro avg       0.92      0.84      0.86        38
weighted avg       0.90      0.87      0.87        38



In [168]:
#we have seen that our coded classifier is working fine
#keep coding
#code written by Uday kiran Bakka