# Naive Bayes

In [None]:
#Imports

import numpy as np

## Benoulli NB: Implementation

In [None]:
class BernoulliNB(object):

    def __init__(self, alpha=1.0):
        self.alpha = alpha

    def fit(self, X, y):
        '''Estimates parameters for Bernoulli NB
        Args:
            X: Feature matrix of shape (n,m)
            y: Label vector of shape (n,)

        Returns:
            w_{j y_c}, w_prior
        '''
        n_samples, n_features = X.shape
        class_count = np.unique(y)
        n_classes = len(class_count)

        #Initialise the weight vector
        self.w = np.zeros((n_classes, n_features), dtype=np.float64)
        self.w_priors = np.zeros(n_classes, dtype=np.float64)

        for c in range(n_classes):# Processing samples for each class seperately
        
            # Get example with label = c
            X_c = X[y == c]

            ## Estimating w_{j y_c} = P(x_j | y_c = c) 
            self.w[c, :] = (np.sum(X_c, axis=0) + self.alpha)/(X_c.shape[0] + 2.0 * self.alpha)

            # Estimating prior
            self.w_priors[c] = (X_c.shape[0] + self.alpha)/(float(n_samples) + n_classes * self.alpha)

        print("Class conditional density: ", self.w)
        print("Prior: ", self.w_priors)

    def log_likelihood_prior_prod(self, X):
        return X @(np.log(self.w).T) + (1 - X) @ (np.log((1 - self.w)).T) + np.log(self.w_priors)

    def predict_proba(self, X):
        q = self.log_likelihood_prior_prod(X)
        return np.exp(q) / np.expand_dims(np.sum(np.exp(q), axis=1), axis = 1)
    
    def predict(self, X):
        return np.argmax(self.log_likelihood_prior_prod(X), axis=1)


## Demo

### Binary labels

In [None]:
X = np.array([[1,0], [0,1], [0,1], [1,0]])
y = np.array([1, 0, 0, 1])

In [None]:
bernoulli_nb = BernoulliNB()
bernoulli_nb.fit(X, y)

In [None]:
bernoulli_nb.predict(X)

In [None]:
bernoulli_nb.log_likelihood_prior_prod(X)

In [None]:
bernoulli_nb.predict_proba(X)

### Multiclass setup

In [None]:
X = np.array([[1,0], [0,1], [0,1], [1,0], [1, 1], [1, 1]])
y = np.array([1, 0, 0, 1, 2, 2])

In [None]:
bernoulli_nb = BernoulliNB()
bernoulli_nb.fit(X, y)

In [None]:
bernoulli_nb.predict(X)

In [None]:
bernoulli_nb.log_likelihood_prior_prod(X)

In [None]:
bernoulli_nb.predict_proba(X)

## Gaussian NB: Implementation

In [None]:
class GaussianNB(object):
    def fit(self, X, y):
        '''Parameter estimation for Gaussian NB'''

        n_samples, n_features = X.shape
        self._classes = np.unique(y)
        n_classes = len(self._classes)

        # Initialise mean, var, and prior for each class.
        self._mean = np.zeros((n_classes, n_features), dtype=np.float64)
        self._var = np.zeros((n_classes, n_features), dtype=np.float64)
        self._priors = np.zeros(n_classes, dtype=np.float64)

        for idx, c in enumerate(self._classes):

            #Get examples with label y=c
            X_c = X[y == c]

            #Estimate mean from the training examples of class c.
            self._mean[idx, :] = X_c.mean(axis=0)

            #Estimate variance from the training examples of class c.
            self._var[idx, :] = X_c.var(axis=0)

            #Estimate priors.
            self._priors[idx] = X_c.shape[0] / float(n_samples)

        print("Mean: ", self._mean)
        print("Variance: ", self._var)
        print("Priors: ", self._priors)

    def _calc_pdf(self, class_idx, X):
        '''Calculates probability density for samples for class label class_idx'''

        mean = self._mean[class_idx]
        var = np.diag(self._var[class_idx])
        z = np.power(2 * np.pi, X.shape[0]/2) * np.power(np.linalg.det(var), 1/2)
        return (1/z) * np.exp(-(1/2)*(X - mean).T @ (np.linalg.inv(var)) @ (X - mean))

    def _calc_prod_likelihood_prior(self, X):
        '''Calculates product of likelihood and priors.'''

        self._prod_likelihood_prior = np.zeros((X.shape[0], len(self._classes)), dtype=np.float64)

        for x_idx, x in enumerate(X):
            for idx, c in enumerate(self._classes):
                self._prod_likelihood_prior[x_idx, c] = (np.log(self._calc_pdf(idx, x)) + np.log(self._priors[idx]))

    def predict(self, X):
        '''Predicts class labels for each example'''
        
        self._calc_prod_likelihood_prior(X)

        return np.argmax(self._prod_likelihood_prior, axis=1)

    def predict_proba(self, X):
        '''Calculates probability of each example belonging to different classes.'''

        self._calc_prod_likelihood_prior(X)

        return np.exp(self._prod_likelihood_prior) / np.expand_dims(np.sum(np.exp(self._prod_likelihood_prior), axis = 1), axis = 1)

## Demo


### Binary classification

In [None]:
from sklearn.datasets import make_classification, make_blobs
from sklearn.model_selection import train_test_split

# Generate data points
X, y = make_blobs(n_samples = 100,
                n_features=2,
                centers=[[5,5],[10,10]],
                cluster_std=1.5,
                random_state=2)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)

In [None]:
gaussian_nb = GaussianNB()
gaussian_nb.fit(X_train, y_train)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

fig, ax = plt.subplots()
ax.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='RdBu')

xlim = (min(X[:, 0]) - 2, max(X[:, 0]) + 2 )
ylim = (min(X[:, 1]) - 2, max(X[:, 1]) + 2 )

xg = np.linspace(xlim[0], xlim[1], 60)
yg = np.linspace(ylim[0], ylim[1], 40)
xx, yy = np.meshgrid(xg, yg)
Xgrid = np.vstack([xx.ravel(), yy.ravel()]).T

for label, color in enumerate(['red', 'blue']):
    mask = (y == label)
    mu, std = X[mask].mean(0), X[mask].std(0)
    P = np.exp(-0.5 * (Xgrid - mu)**2 / std**2).prod(1)
    Pm = np.ma.masked_array(P, P < 0.03)
    ax.pcolorfast(xg, yg, Pm.reshape(xx.shape), alpha=0.5, cmap=color.title() + 's')
    ax.contour(xx, yy, P.reshape(xx.shape), levels=[0.01, 0.1, 0.5, 0.9], colors = color, alpha=0.2)

ax.set(xlim=xlim, ylim=ylim)

plt.title('Gaussian distribution of data')
plt.show()

In [None]:
gaussian_nb.predict(X_test)

In [None]:
from sklearn.metrics import classification_report
print(classification_report(y_test, gaussian_nb.predict(X_test)))

In [None]:
gaussian_nb.predict_proba(X_test)

### Multiclass classification

In [None]:
X, y = make_blobs(n_samples = 100,
                    n_features =2,
                    centers = [[5,5], [10,10], [20,20]],
                    cluster_std = 1.5,
                    random_state=3)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)

In [None]:
gaussian_nb_mc = GaussianNB()
gaussian_nb_mc.fit(X_train, y_train)

In [None]:
print(classification_report(y_test, gaussian_nb_mc.predict(X_test)))

In [None]:
gaussian_nb_mc.predict_proba(X_test)

## MultinomialNB

In [None]:
class MultinomialNB(object):
    def fit(self, X, y, alpha = 1.0):
        n_samples, n_features = X.shape
        self._classes = np.unique(y)
        n_classes = len(self._classes)

        self.w = np.zeros((n_classes, n_features), dtype=np.float64)
        self.w_prior = np.zeros(n_classes, dtype=np.float64)

        for idx, c in enumerate(n_classes):
            X_c = X[y == c]

            total_count = np.sum(X_c, axis=1)
            self.w[idx, :] = (np.sum(X_c, axis=0) + alpha) / (total_count + alpha * n_features)

            self.w_prior[idx] = (X_c.shape[0] + alpha) / float(n_samples + alpha * n_classes)

    def log_likelihood_prior_prod(self, X):
        return X @ (np.log(self.w).T) + np.log(self.w_prior) 

    def predict(self, X):
        q = self.log_likelihood_prior_prod(X)
        return np.argmax(q, axis = 1)

    def predict_proba(self, X):
        q = self.log_likelihood_prior_prod(X)
        return np.exp(q) / np.expand_dims(np.sum(np.exp(q), axis = 1), axis = 1)