In [1]:
import numpy as np

In [2]:
def fit(x_train,y_train):
    result={}
    all_classes=set(y_train)
    for current_class in all_classes:
        result[current_class]={}
        result["total_data"]=len(y_train)
        current_class_rows=(y_train==current_class)
        x_train_current=x_train[current_class_rows]
        y_train_current=y_train[current_class_rows]
        num_features=x_train.shape[1]
        result[current_class]["total_count"]=len(y_train_current)
        for j in range(1,num_features+1):
            result[current_class][j]={}
            possible_values=set(x_train[:,j-1])
            for current_value in possible_values:
                result[current_class][j][current_value]=(x_train_current[:,j-1]==current_value).sum()
                
    return result

In [3]:
def probability(dictionary,x,current_class):
    output=np.log(dictionary[current_class]["total_count"])-np.log(dictionary["total_data"])
    num_features=len(dictionary[current_class].keys())-1
    for j in range(1,num_features+1):
        xj=x[j-1]
        count_corresponding_to_xJ_with_current_class=dictionary[current_class][j][xj]+1 #+1 due to laplace correction
        count_corresponding_to_current_class=dictionary[current_class]["total_count"]+len(dictionary[current_class][j].keys())
        current_xj_proba_bility=np.log(count_corresponding_to_xJ_with_current_class)-np.log(count_corresponding_to_current_class)
        output=output+current_xj_proba_bility
    return output

In [4]:
def predictsinglepoint(dictionary,x):
    maxp=-1
    best_class=-1
    first_run=True
    classes=dictionary.keys()
    for current_class in classes:
        if current_class=='total_data':
            continue
        p_current_class=probability(dictionary,x,current_class)
        if first_run or p_current_class>maxp:
            maxp=p_current_class
            best_class=current_class
        first_run=False
            
    return best_class

In [5]:
def predict(dictionary,x_test):
    y_pred=[]
    for x in x_test:
        output=predictsinglepoint(dictionary,x)
        y_pred.append(output)
    return y_pred

In [6]:
def make_labelled(column):
    second_cut=column.mean()
    first_cut=0.5*second_cut
    third_cut=1.5*second_cut
    for x in range(len(column)):
        if(column[x]<first_cut):
            column[x]=0
        elif(column[x]<second_cut):
            column[x]=1
        elif(column[x]<third_cut):
            column[x]=2
        else:
            column[x]=3
    return column

In [7]:
from sklearn import datasets
iris=datasets.load_iris()
x=iris.data
y=iris.target

In [8]:
for i in range(0,x.shape[1]):
    x[:,i]=make_labelled(x[:,i])

In [9]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y)

In [10]:
dictionary=fit(x_train,y_train)

In [11]:
y_pred=predict(dictionary,x_test)

In [12]:
from sklearn.metrics import classification_report,confusion_matrix
print(classification_report(y_test,y_pred))
print(confusion_matrix(y_test,y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        12
           1       1.00      0.90      0.95        10
           2       0.94      1.00      0.97        16

    accuracy                           0.97        38
   macro avg       0.98      0.97      0.97        38
weighted avg       0.98      0.97      0.97        38

[[12  0  0]
 [ 0  9  1]
 [ 0  0 16]]
