# Implemetation

In [17]:
import numpy as np

In [67]:
def fit(X_train,Y_train):                          #make dictionaries
    result = {}
    class_values = set(Y_train)                    #different classes present in Output
    
    for current_class in class_values:
        result[current_class] = {}                 #contains all features
        result['total_data'] = len(Y_train)
        current_class_rows = (Y_train == current_class)    # all the values with current class
        X_train_current = X_train[current_class_rows]
        Y_train_current = Y_train[current_class_rows]
        num_features = X_train.shape[1]
        result[current_class]['total_count'] = len(Y_train_current)
        
        for j in range(1,num_features+1):          #dictionary to contains all possible feature values
            result[current_class][j] = {}     
            all_possible_values = set(X_train[:,j-1]) # j-1 bcoz rows in X_train will be from 0 to n-1
            
            for current_value in all_possible_values:
                result[current_class][j][current_value] = (X_train_current[:,j-1] == current_value).sum()
                
    return result

In [83]:
# Function to calculate probability
def probability(dictionary,x,current_class):
    output = np.log(dictionary[current_class]['total_count']) - np.log(dictionary['total_data'])    #P(y = ai)
    num_features = len(dictionary[current_class].keys()) - 1           # -1 to remove result['current_class']['total_count']
    
    for j in range(1,num_features+1):
        xj = x[j-1]  # j-1 bcoz rows in X_train will be from 0 to n-1
        count_current_class_with_value_xj = dictionary[current_class][j][xj] + 1 #Laplace correction
        count_current_class = dictionary[current_class]['total_count'] + len(dictionary[current_class][j].keys()) #Adding no of all possible values of xj for laplace correction
        current_xj_probability = np.log(count_current_class_with_value_xj)  - np.log(count_current_class)
        output = output + current_xj_probability
    
    return output

In [84]:
# Function to predict value based on maximum value of classes
def predictSinglePoint(dictionary,x):
    classes = dictionary.keys()
    best_p = -1000
    best_class = -1
    FirstRun = True
    
    for current_class in classes:
        if(current_class == 'total_data'):
            continue
        p_current_class = probability(dictionary,x,current_class)
        if(FirstRun or p_current_class > best_p):
            best_p = p_current_class
            best_class = current_class
        FirstRun = False
    
    return best_class

In [85]:
def predict(dictionary,X_test):
    Y_pred = []
    for x in X_test:
        x_class = predictSinglePoint(dictionary,x)
        Y_pred.append(x_class)
    return Y_pred

In [86]:
# Function to convert given data into 4 labels(categories)
def MakeLabelled(column):
    second_limit = column.mean()
    first_limit = 0.5*second_limit
    third_limit = 1.5*second_limit
    
    for i in range(0,len(column)):
        if(column[i] < first_limit):
            column[i] = 0
        elif (column[i] < second_limit):
            column[i] = 1
        elif (column[i] < third_limit):
            column[i] = 2
        else:
            column[i] = 3
    
    return column

In [87]:
# Loading Iris dataset
from sklearn import datasets
iris = datasets.load_iris()

In [88]:
X = iris.data
Y = iris.target

In [89]:
# Converting data in labels
for i in range(0,X.shape[1]):
    X[:,i] = MakeLabelled(X[:,i])

In [90]:
# Splitting data unto training and testing data
from sklearn import model_selection
X_train, X_test, Y_train, Y_test = model_selection.train_test_split(X,Y,test_size = 0.25, random_state = 0)

In [91]:
dictionary = fit(X_train,Y_train)

In [92]:
# Predicting values for test data
Y_pred = predict(dictionary,X_test)

In [94]:
# Printing classification report and confusion matrix
from sklearn.metrics import classification_report, confusion_matrix
print(classification_report(Y_test,Y_pred))
print(confusion_matrix(Y_test,Y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        13
           1       0.94      1.00      0.97        16
           2       1.00      0.89      0.94         9

    accuracy                           0.97        38
   macro avg       0.98      0.96      0.97        38
weighted avg       0.98      0.97      0.97        38

[[13  0  0]
 [ 0 16  0]
 [ 0  1  8]]



# GAUSSIAN PROBABILITY

In [96]:
# Printing classification report after applying Gaussian Naive-Bayes on training and testing data
from sklearn.naive_bayes import GaussianNB
clf = GaussianNB()
clf.fit(X_train,Y_train)
Y_pred = clf.predict(X_test)
print(classification_report(Y_test,Y_pred))
print(confusion_matrix(Y_test,Y_pred))

              precision    recall  f1-score   support

           0       1.00      0.85      0.92        13
           1       0.76      1.00      0.86        16
           2       1.00      0.67      0.80         9

    accuracy                           0.87        38
   macro avg       0.92      0.84      0.86        38
weighted avg       0.90      0.87      0.87        38

[[11  2  0]
 [ 0 16  0]
 [ 0  3  6]]
