In [2]:
import numpy as np



In [7]:
class NaiveBayes:

  def fit(self,X,y):
    no_samples,no_feature = X.shape
    self._classes = np.unique(y)
    no_classes = len(self._classes)

    # we are intializing the variance , mean for each class and each feature and prior for each class ans 0 and after that we are going to calculate the the mean, variance and prior

    self._mean = np.zeros((no_classes,no_feature),dtype=np.float64)
    self._var = np.zeros((no_classes,no_feature),dtype=np.float64)
    self._prior = np.zeros(no_classes,dtype=np.float64)

    for idx,c in enumerate(self._classes):
      X_c = X[y==c]
      self._mean[idx,:] = X_c.mean(axis = 0)
      self._var[idx,:] = X_c.var(axis = 0)
      self._prior[idx] = X_c.shape[0] / float(no_samples)

  def predict(self,X):
    y_pred = [self._predict(x) for x in X]
    return np.array(y_pred)

  def _predict(self,x):
    posteriors = []

    # now we are calculating the posterior probability of each classes

    for idx , c in enumerate(self._classes):
      prior = np.log(self._prior[idx])
      liklihood = np.sum(np.log(self._pdf(idx,x)))
      posterior  = prior + liklihood
      posteriors.append(posterior)

        # returning the highest probabilitity for each class
    return self._classes[np.argmax(posteriors)]


  def _pdf(self,class_idx , x):
    mean = self._mean[class_idx]
    var = self._var[class_idx]
    nume = np.exp(-((x-mean)**2)/(2 * var))
    deno = np.sqrt(2 * np.pi * var)
    return nume/deno







In [8]:
def train_test_split(X, y, test_size=0.2, shuffle=True, random_state=None):

    n_samples = X.shape[0]

    # Convert float test_size to number of samples
    if isinstance(test_size, float):
        n_test = int(n_samples * test_size)
    else:
        n_test = test_size

    if shuffle:
        if random_state is not None:
            np.random.seed(random_state)
        indices = np.random.permutation(n_samples)  # shuffle indices
    else:
        indices = np.arange(n_samples)  # no shuffle

    test_indices = indices[:n_test]
    train_indices = indices[n_test:]

    X_train, X_test = X[train_indices], X[test_indices]
    y_train, y_test = y[train_indices], y[test_indices]

    return X_train, X_test, y_train, y_test


In [9]:
def accuracy(y_true, y_pred):
        accuracy = np.sum(y_true == y_pred) / len(y_true)
        return accuracy

In [36]:
from sklearn import datasets
import matplotlib.pyplot as plt

X,y  = datasets.make_classification(
     n_samples=1000,
    n_features=10,


    n_classes = 2,
    random_state=12
)

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,shuffle=True,random_state=142)

nb  = NaiveBayes()
nb.fit(X_train,y_train)

y_pred = nb.predict(X_test)

print(accuracy(y_test,y_pred) * 100,"%")









89.0 %
