# Model

In [None]:
import numpy as np

def gaussian(x, mu, sigma2):

    coeff = 1 / np.sqrt(2 * np.pi * sigma2)
    exponent = np.exp(-((x - mu) ** 2) / (2 * sigma2))

    return coeff * exponent

class GaussNaiveBayes :
    """ Gaussian Naive Bayes model for multiclass classification
    
    @attrs:
        n_classes:    the number of classes
        feature_dist:    a 3D (n_classes x n_features x 2) NumPy array of the attribute distributions: mean and sigma2 for each features for each class
                        
                        ex : [[[mu, sigma2],   of feature 1
                               [mu, sigma2],   of feature 2
                               [mu, sigma2]],  of feature 3 of class 1
                               
                               [[mu, sigma2],  of feature 1
                               [mu, sigma2],   of feature 2
                               [mu, sigma2]]]  of feature 3 of class 2

        priors: a 1D NumPy array of the priors distribution

                        ex : [p_class1, p_class2, p_class3]
    """
    def __init__ (self):
        self.n_classes = None # computed at training
        self.feature_dist = None
        self.priors = None

        self.laplace_smoothing = 1 # Default value for the smoothing parameter


    def train(self, X_train, y_train):
        """ Trains the model, using maximum likelihood estimation.
        @params:
            X_train: a 2D (n_examples x n_features) numpy array
            y_train: a 1D (n_examples) numpy array of the corresponding labels
        @return:
            None
        """
        self.n_classes = len(set(y_train))
        n_features = X_train.shape[1]

        self.priors = np.zeros(self.n_classes)
        self.feature_dist = np.zeros((self.n_classes, n_features, 2))

        n_examples = X_train.shape[0]

        for class_label in range(self.n_classes):
            X_class = X_train [y_train == class_label]

            # Computing prior with Laplace smoothing
            total_class = X_class.shape[0]
            a = self.laplace_smoothing
            self.priors[class_label] = (total_class +a) /(n_examples +a * self.n_classes)

            # Computing the moments (mean mu and variance sigma2)
            mu = np.mean(X_class, axis=1)
            sigma2 = np.var(X_class, axis = 1)
            
            # saving the mu and sigma for later predictions
            self.feature_dist [class_label, :, :] = np.vstack(mu , sigma2).T


    def predict(self, inputs):
        """ Outputs a predicted label for each input in inputs.

        @params:
            inputs: a 2D NumPy array containing inputs (n_requests * n_features)
        @return:
            a 1D numpy array of predictions
        """

        predictions = np.zeros(len(inputs))
        for index, input in enumerate(inputs) :

            logprobs = np.log(self.priors)
            for class_label in range(self.n_classes):
                mu = self.feature_dist[class_label, :, 0] # All mus for all features at a time 
                sigma2 = self.feature_dist[class_label, :, 1]

                logprobs[class_label] += np.sum(np.log(gaussian(input, mu, sigma2)))

            predictions[index] = np.argmax(logprobs)

        return predictions

    def accuracy(self, X_test, y_test):
        """ Outputs the accuracy of the trained model on a given dataset (data).

        @params:
            X_test: a 2D numpy array of examples
            y_test: a 1D numpy array of labels
        @return:
            a float number indicating accuracy (between 0 and 1)
        """

        y_pred = self.predict(X_test)
        return np.mean(y_pred == y_test)

In [None]:

feature_dist = np.array([[[1,2],[3,4]],
                         [[5,6],[7,8]],
                         [[10,11],[12,13]]])


class_label = 2

mu = feature_dist[class_label, :, 0]
sigma2 = feature_dist[class_label, :, 1]

input = [2,3]
print(mu, sigma2)
print(gaussian(input, mu, sigma2))
print(np.log(gaussian(input, mu, sigma2)))
print(np.sum(np.log(gaussian(input, mu, sigma2))))

[10 12] [11 13]
[0.00655861 0.00490845]
[-5.02697708 -5.31679783]
-10.343774906014824
