# 1) Implementation of Naive Bayes

In [1]:
import numpy as np

In [2]:
def fit(x_train , y_train):
    result = {}
    result["total_data"] = len(y_train)
    class_values = set(y_train)
    for current_class in class_values:
        result[current_class] = {}
        current_class_rows = (y_train == current_class)
        
        x_train_current = x_train[current_class_rows]
        y_train_current = y_train[current_class_rows]
        result[current_class]["total_count"] = len(y_train_current)
        
        num_features = x_train.shape[1]
        for i in range(1 , num_features + 1):
            result[current_class][i] = {};
            all_possible_values = set(x_train[: , i - 1])
            for current_values in all_possible_values: 
                result[current_class][i][current_values] = (x_train_current[: , i - 1] == current_values).sum()
                
    return result

In [3]:
## probability lead to very small number and we could not able tro store 
## that nuch small number therefore we will going to cal. log probility
def probability(dict , x ,current_class):
    output = np.log(dict[current_class]["total_count"]) - np.log(dict["total_data"])
    num_features = len(dict[current_class].keys()) - 1
    for i in range(1 , num_features + 1):
        xi = x[i - 1]
        count_current_class_with_xi = dict[current_class][i][xi] + 1
        count_current_class = dict[current_class]["total_count"] + len(dict[current_class][i].keys())
        current_xi_probability = np.log(count_current_class_with_xi) - np.log(count_current_class)
        output += current_xi_probability
    return output

In [4]:
def predictSinglePoint(dict , x):
    ## for each classes calculate bayes probability 
    ## class whose prob is the smaller will be our output
    classes = dict.keys()
    best_p = -1000
    best_class = -1
    first_run = True
    for current_class in classes:
        if(current_class == 'total_data'):
            continue
        p_current_class = probability(dict , x , current_class)
        if (first_run or p_current_class > best_p):
            best_p = p_current_class
            best_class = current_class
            first_run = False
    return best_class

In [5]:
def predict(dict , x_test):
    y_pred = []
    for x in x_test:
        x_class = predictSinglePoint(dict , x)
        y_pred.append(x_class)
    return y_pred

In [6]:
def makeLabelled(column):
    second_limit = column.mean()
    first_limit = 0.5 * second_limit
    third_limit = 1.5 * second_limit
    for i in range(len(column)):
        if column[i] < first_limit:
            column[i] = 0
        elif column[i] < second_limit:
            column[i] = 1
        elif column[i] < third_limit:
            column[i] = 2
        else:
            column[i] = 3;
    return column

# 2) Testing of Iris Datasets on our Implemented Naive Byes 

In [7]:
from sklearn import datasets
iris = datasets.load_iris()
x = iris.data
y = iris.target

In [8]:
## change continous data into labelled data
for i in range(0 , x.shape[-1]):
    x[: , i] = makeLabelled(x[: , i])

In [9]:
from sklearn import model_selection
x_train , x_test , y_train , y_test = model_selection.train_test_split(x , y , test_size = 0.25 , random_state = 0)

In [10]:
dict = fit(x_train , y_train)

In [12]:
y_pred = predict(dict , x_test)

In [13]:
from sklearn.metrics import classification_report , confusion_matrix
print(classification_report(y_test , y_pred))
print(confusion_matrix(y_test , y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        13
           1       0.94      1.00      0.97        16
           2       1.00      0.89      0.94         9

    accuracy                           0.97        38
   macro avg       0.98      0.96      0.97        38
weighted avg       0.98      0.97      0.97        38

[[13  0  0]
 [ 0 16  0]
 [ 0  1  8]]


# 3) Naive Bayes for Continous Datasets

In [17]:
from sklearn.naive_bayes import GaussianNB
clf = GaussianNB()
clf.fit(x_train , y_train)
y_pred = clf.predict(x_test)
print(classification_report(y_test , y_pred))
print(confusion_matrix(y_test , y_pred))

              precision    recall  f1-score   support

           0       1.00      0.85      0.92        13
           1       0.76      1.00      0.86        16
           2       1.00      0.67      0.80         9

    accuracy                           0.87        38
   macro avg       0.92      0.84      0.86        38
weighted avg       0.90      0.87      0.87        38

[[11  2  0]
 [ 0 16  0]
 [ 0  3  6]]


# 4) Multinomial  Naive Bayes on iris datasets

In [18]:
from sklearn.naive_bayes import MultinomialNB
clf = MultinomialNB()
clf.fit(x_train , y_train)
y_pred = clf.predict(x_test)
print(classification_report(y_test , y_pred))
print(confusion_matrix(y_test , y_pred))

              precision    recall  f1-score   support

           0       1.00      0.85      0.92        13
           1       0.00      0.00      0.00        16
           2       0.36      1.00      0.53         9

    accuracy                           0.53        38
   macro avg       0.45      0.62      0.48        38
weighted avg       0.43      0.53      0.44        38

[[11  2  0]
 [ 0  0 16]
 [ 0  0  9]]
