Import necessary modules/libraries

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import datasets

Create the Naive-Bayes Classifier class with the predict method and helper functions

In [None]:
class NaiveBayes:

  def fit(self, X, y):
    n_samples, n_features= X.shape
    self._classes= np.unique(y) #get the unique class-labels
    n_classes= len(self._classes) #get no. of unique class labels

    #arrays for storing mean, variance and prior probabilities
    self._mean= np.zeros((n_classes, n_features), dtype=np.float64)
    self._var= np.zeros((n_classes, n_features), dtype=np.float64)
    self._priors= np.zeros((n_classes), dtype=np.float64)

    for idx, c in enumerate(self._classes):
      X_c=X[y==c] #select the rows belonging to a particular class-label

      #calculate mean, variance and prior probabilities of the features for that class-label
      self._mean[idx,:]= X_c.mean(axis=0)
      self._var[idx,:]= X_c.var(axis=0)
      self._priors[idx]= X_c.shape[0]/float(n_samples)

  def predict(self, X):
    y_pred= [self._predict(x) for x in X] #predict class-label for a single row of data in X
    return np.array(y_pred)

  def _predict(self, x):
    probs= []

    for idx, c in enumerate(self._classes):
      prior= np.log(self._priors[idx]) #prior of that class-label c
      posterior= np.sum(np.log(self._pdf(idx, x))) #posterior probability for all features in x
      probs.append(posterior+prior)

    return self._classes[np.argmax(probs)] #return the argmax class-label

  def _pdf(self, class_idx, x):
    mean= self._mean[class_idx]
    var= self._var[class_idx]
    f_val=(np.exp(-((x-mean)**2)/(2*var)))/(np.sqrt(2*np.pi*var)) #using the Gaussian Distribution
    return f_val


Fitting an instance of the classifier, evaluating it's performance

In [None]:
def accuracy(y_true, y_pred):
  acc= np.sum(y_true==y_pred)/len(y_pred)
  return acc

In [None]:
#Create a binary classification dataset with 1000 data-points and 10 features
X,y= datasets.make_classification(n_samples=1000, n_features=10, n_classes=2, random_state=25)
X_train, X_test, y_train, y_test= train_test_split(X, y, test_size=0.2, random_state=25)

In [None]:
nbayes= NaiveBayes() #object of the NaiveBayes class
nbayes.fit(X_train, y_train) #fit to training data
y_preds= nbayes.predict(X_test) #get predicition on test data

print("Classification accuracy: ", accuracy(y_test, y_preds)) #evaluate performance of classsifier

Classification accuracy:  0.915
