In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs
from sklearn.model_selection import train_test_split

In [2]:
X, y = make_blobs(n_samples=500, centers=3, n_features=2,random_state=0)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=5
)

# Naive Bayes

In [17]:
class NaiveBayes:
    def fit(self,X,y):
        self.classes = np.unique(y)
        n_samples,n_features = X.shape
        n_classes = len(self.classes)
        self.means = np.zeros((n_classes,n_features))
        self.vars = np.zeros((n_classes,n_features)) # we calculate variances not covarinace as features are independant in our assumption
        self.priors = np.zeros(n_classes)
        
        for c in self.classes:
            X_c = X[y==c]
            self.means[c] = np.mean(X_c,axis=0)
            self.vars[c] = np.var(X_c,axis=0)
            self.priors[c] = X_c.shape[0]/n_samples
            
    def predict(self,X):
        preds = []
        for x in X:
            posteriors = []
            for c in self.classes:
                mean_c,var_c,prior_c = self.means[c],self.vars[c],self.priors[c]
                log_prior_c = np.log(prior_c)
                likelihood = self.gaussian(x,mean_c,var_c)
                log_likelihood = np.sum(np.log(likelihood))
                posterior = log_prior_c + log_likelihood
                posteriors.append(posterior)
            preds.append(np.argmax(posteriors))
        return preds
    
    def gaussian(self,x,mean,var):
        numerator = np.exp(-((x - mean) ** 2) / (2 * var))
        denominator = np.sqrt(2 * np.pi * var)
        return numerator / denominator

In [18]:
def accuracy(y_true,y_pred):
    return np.sum(y_true==y_pred)/len(y_true)

In [19]:
nb = NaiveBayes()
nb.fit(X_train,y_train)
y_preds = nb.predict(X_test)

In [20]:
accuracy(y_test,y_preds)

0.92

# Sklearn

In [21]:
from sklearn.naive_bayes import GaussianNB

In [22]:
gnb = GaussianNB()
gnb.fit(X_train, y_train)
sklearn_preds = gnb.predict(X_test)
accuracy(y_test,sklearn_preds)

0.92