In [1]:
import numpy as np

In [2]:
def fit(X_train,Y_train):
    result={}
    unique_class=set(Y_train)
    for current_class in unique_class:
        result[current_class]={}
        result["total_data"]=len(Y_train)
        current_class_rows=(Y_train==current_class)
        X_train_current=X_train[current_class_rows]
        Y_train_current=Y_train[current_class_rows]
        result[current_class]["total_count_current_class"]=len(Y_train_current)
        num_features=X_train.shape[1]
        for j in range(1,num_features+1):
            all_possible_jth_value=set(X_train[:,j-1])
            result[current_class][j]={}
            
            for current_value in all_possible_jth_value:
                result[current_class][j][current_value]=(X_train_current[:,j-1]==current_value).sum()
    
    return result
            

In [3]:
def probability(dictionary, x, current_class):
    output = np.log(dictionary[current_class]["total_count_current_class"]) - np.log(dictionary["total_data"])
    num_features = len(dictionary[current_class].keys()) - 1;
    for j in range(1, num_features + 1):
        xj = x[j - 1]
        count_current_class_with_value_xj = dictionary[current_class][j][xj] + 1
        count_current_class = dictionary[current_class]["total_count_current_class"] + len(dictionary[current_class][j].keys())
        current_xj_probablity = np.log(count_current_class_with_value_xj) - np.log(count_current_class)
        output = output + current_xj_probablity
    return output

In [4]:
def predictSinglePoint(dictionary, x):
    classes = dictionary.keys()
    best_p = -1000
    best_class = -1
    first_run = True
    for current_class in classes:
        if (current_class == "total_data"):
            continue
        p_current_class = probability(dictionary, x, current_class)
        if (first_run or p_current_class > best_p):
            best_p = p_current_class
            best_class = current_class
        first_run = False
    return best_class

In [5]:
def predict(dictionary, X_test):
    y_pred = []
    for x in X_test:
        x_class = predictSinglePoint(dictionary, x)
        y_pred.append(x_class)
    return y_pred

In [6]:
def makelabelled(column):
    second_limit=column.mean()
    first_limit=0.5*second_limit
    third_limit=1.5*second_limit
    
    for i in range(0,len(column)):
        if(column[i]<first_limit):
            column[i]=0
        elif(column[i]<second_limit):
            column[i]=1
        elif(column[i]<third_limit):
            column[i]=2
        else:
            column[i]=3
    return column

In [7]:
from sklearn import datasets

In [8]:
iris=datasets.load_iris()
X=iris.data
Y=iris.target

In [9]:
for i in range(0,X.shape[-1]):
    X[:,i]=makelabelled(X[:,i])
    

In [10]:
from sklearn import model_selection
X_train,X_test,Y_train,Y_test=model_selection.train_test_split(X,Y,random_state=42)

In [11]:
dictionary=fit(X_train,Y_train)

In [12]:
dictionary

{0: {'total_count_current_class': 35,
  1: {1.0: 35, 2.0: 0},
  2: {1.0: 6, 2.0: 29},
  3: {0.0: 33, 1.0: 2, 2.0: 0, 3.0: 0},
  4: {0.0: 34, 1.0: 1, 2.0: 0, 3.0: 0}},
 'total_data': 112,
 1: {'total_count_current_class': 39,
  1: {1.0: 20, 2.0: 19},
  2: {1.0: 34, 2.0: 5},
  3: {0.0: 0, 1.0: 6, 2.0: 33, 3.0: 0},
  4: {0.0: 0, 1.0: 9, 2.0: 29, 3.0: 1}},
 2: {'total_count_current_class': 38,
  1: {1.0: 6, 2.0: 32},
  2: {1.0: 25, 2.0: 13},
  3: {0.0: 0, 1.0: 0, 2.0: 24, 3.0: 14},
  4: {0.0: 0, 1.0: 0, 2.0: 5, 3.0: 33}}}

In [13]:
y_pred=predict(dictionary,X_test)

In [14]:
from sklearn.metrics import classification_report,confusion_matrix
print(classification_report(Y_test,y_pred))
print(confusion_matrix(Y_test,y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        15
           1       1.00      1.00      1.00        11
           2       1.00      1.00      1.00        12

    accuracy                           1.00        38
   macro avg       1.00      1.00      1.00        38
weighted avg       1.00      1.00      1.00        38

[[15  0  0]
 [ 0 11  0]
 [ 0  0 12]]


In [15]:
print(y_pred==Y_test)

[ True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True]


In [16]:
for x in X_test:
    print(x)
    break

[2. 1. 2. 2.]
