In [1]:
import numpy as np

In [2]:
def fit(X_train, Y_train):
    result = {}
    for current_class in set(Y_train):
        result[current_class] = {}
        result["total_data"] = len(Y_train)
        x_train_current = X_train[Y_train == current_class]
        y_train_current = Y_train[Y_train == current_class]
        num_features = X_train.shape[1]
        result[current_class]['total_count'] = len(y_train_current)
        for i in range(num_features):
            feature = X_train[:, i]
            result[current_class][i] = {}
            for label in set(feature):
                result[current_class][i][label] = (x_train_current[:,i] == label).sum()                
    return result

In [3]:
def prob(dcitionary, x, current_class):
    num_features = len(dictionary[current_class].keys()) - 1
    class_prob = np.log(dictionary[current_class]["total_count"]) - np.log(dictionary["total_data"])
    for j in range(num_features):
        xj = x[j]
        num = dictionary[current_class][j][xj] + 1
        den = dictionary[current_class]["total_count"] + len(dictionary[current_class][j].keys())
        class_prob += (np.log(num) - np.log(den))
    return class_prob

In [4]:
def predict_single_point(dictionary, x):
    best_prob = -1000
    best_class = -1
    first_run = True
    for current_class in dictionary.keys():
        if current_class == "total_data":
            continue
        p_curr_class = prob(dictionary, x, current_class)
        if (first_run or p_curr_class > best_prob):
            best_prob = p_curr_class
            best_class = current_class
        first_run = False
    return best_class

In [5]:
def predict(dictionary, X_test):
    y_pred = []
    for x in X_test:
        current_class = predict_single_point(dictionary, x)
        y_pred.append(current_class)
    return y_pred

In [6]:
def make_discrete(x):
    second = x.mean()
    first = 0.5*second
    third = 1.5*second
    for i in range(len(x)):
        if x[i]<first:
            x[i] = 0
        elif x[i]<second:
            x[i] = 1
        elif x[i]<third:
            x[i] = 2
        else:
            x[i] = 3
    return x

In [7]:
from sklearn import datasets
iris = datasets.load_iris()
X = iris.data
Y = iris.target

In [8]:
for i in range(X.shape[-1]):
    X[:, i] = make_discrete(X[:, i])

In [9]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X,Y,test_size=0.25,random_state=0)

In [10]:
dictionary = fit(X_train, Y_train)
y_pred = predict(dictionary, X_test)

In [11]:
from sklearn.metrics import classification_report, confusion_matrix
print(classification_report(Y_test,y_pred))
print(confusion_matrix(Y_test,y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        13
           1       0.94      1.00      0.97        16
           2       1.00      0.89      0.94         9

    accuracy                           0.97        38
   macro avg       0.98      0.96      0.97        38
weighted avg       0.98      0.97      0.97        38

[[13  0  0]
 [ 0 16  0]
 [ 0  1  8]]
