## Imports

In [1]:
from keras.datasets import mnist
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

## Load dataset

In [2]:
(train_X, train_y), (test_X, test_y) = mnist.load_data()

In [3]:
train_X = train_X / 255
test_X = test_X / 255

In [4]:
new_shape = train_X[0].shape[0] * train_X[0].shape[1]

In [5]:
train_X = train_X.reshape(train_X.shape[0],
                         new_shape)
test_X = test_X.reshape(test_X.shape[0],
                         new_shape)

## CBN

In [6]:
class CBN:
    def __init__(self):
        self.no_instances = None
        self.no_features = None
        self.no_classes = None
        
        self.mean = None
        self.stdev = None
        self.label_occurences = None
        
        self.probas = None
        
    def fit(self, X, y):
        self.no_instances, self.no_features = X.shape
        self.no_classes = len(np.unique(y))
        
        self.mean = np.zeros((self.no_classes, self.no_features))
        self.stdev = np.zeros((self.no_classes, self.no_features))
        self.label_occurences = np.zeros(self.no_classes)
        
        # Summarize the dataset
        for label in range(self.no_classes):
            X_label = X[y == label]
            self.mean[label, :] = np.mean(X_label, axis=0)
            self.stdev[label, :] = np.std(X_label, axis=0)
            self.label_occurences[label] = X_label.shape[0] / self.no_instances
            
    def predict(self, X):
        y_pred = [self.compute_class_proba(x) for x in X]
        return np.array(y_pred)
            
    def gaussian_proba_dens_func(self, x, mean, stdev):
        exponent = np.exp(-(((x-mean) ** 2) / (2 * (stdev ** 2))))
        return (1 / np.sqrt(2 * np.pi * (stdev ** 2))) * exponent
    
    def compute_class_proba(self, x):
        self.probas = [occurence for occurence in self.label_occurences]
        for label in range(self.no_classes):
            print(f'label : {label}')
            mean = self.mean[label]
            variance = self.stdev[label]
            proba = self.gaussian_proba_dens_func(x, mean, variance)
            self.probas[label] = self.probas[label] * np.prod(self.gaussian_proba_dens_func(x, mean, variance))
            print(f'probas[{label}] : {self.probas[label]}')
            
        # return the index with the highest class probability
        print(f'final probas : {self.probas}')
        return np.argmax(self.probas)

In [7]:
cbn = CBN()

In [8]:
cbn.fit(train_X, train_y)

In [None]:
test_X[0][0]

In [11]:
cbn.mean

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [None]:
cbn.compute_class_proba(test_X[0])

In [13]:
cbn.gaussian_proba_dens_func(test_X[0], cbn.mean[0], cbn.stdev[0])

In [None]:
pred_y = cbn.predict(test_X)

In [None]:
np.sum(test_y==pred_y) / len(test_y)