In [1]:
import numpy as np

In [21]:
class GNB:
    def __init__(self):
        self.mean = {}
        self.var = {}
        self.classes = None
        self.priors = {}
    def fit(self,x,y):
        self.classes = set(y)

        for cls in self.classes:
            x_cls = [x[i] for i in range(len(x)) if y[i] == cls]
            self.priors[cls] = len(x_cls)/len(x)
            self.mean[cls] = []
            self.var[cls] = []
            for i in range(x.shape[1]):
                features_values = [val[i] for val in x_cls]
                mean = sum(features_values)/len(features_values)
                # variance formula is summation of (xi- mean) ** 2 / len(features)
                # sum((x - mean) ** 2 for x in feature_values) / len(feature_values)
                var = np.var(features_values) + 1e-9 # for avoid -inf. log(0) is -inf

                self.mean[cls].append(mean)
                self.var[cls].append(var)

    def gaussian_log_pdf(self,x,mean,var):
        # formula is -(0.5 * log(2 *pi * var)) - (((x - mean)**2)/(2*var))
        return - 0.5*np.log(2 * np.pi * var) - ((x- mean) **2) /(2*var)

    def predict(self, X):
        predictions = []

        for x in X:
            class_prob = {}
            for cls  in self.classes:
                # Start with log prior
                prior_prob = np.log(self.priors[cls])
                # find the gaussian log pdf for each feature.
                for i in range(len(x)):

                    prior_prob += self.gaussian_log_pdf(x[i], mean=self.mean[cls][i],var=self.var[cls][i])
                class_prob[cls] = prior_prob

            predictions.append(max(class_prob,key=class_prob.get))
        return predictions


In [22]:
# Training features (continuous values)
X_train = np.array([
    [1.0, 2.0],
    [1.1, 1.8],
    [0.9, 2.2],
    [5.0, 8.0],
    [6.0, 9.0],
    [5.5, 8.5]
])

# Training labels
y_train = np.array([
    0, 0, 0,   # Class 0
    1, 1, 1    # Class 1
])

X_test = np.array([
    [1.0, 2.1],   # Close to class 0
    [5.8, 8.7],   # Close to class 1
    [3.0, 4.0]    # Somewhere in between
])

In [23]:
model = GNB()
model.fit(x=X_train,y=y_train)

In [28]:
predict = model.predict(X_test)

In [29]:
predict

[np.int64(0), np.int64(1), np.int64(1)]

In [None]:
import math
from collections import defaultdict

class MultinomialNaiveBayes:
    def __init__(self, alpha=1.0):
        self.alpha = alpha
        self.class_counts = defaultdict(int)
        self.feature_counts = defaultdict(lambda: defaultdict(int))
        self.class_priors = {}
        self.feature_probs = {}
        self.vocab_size = 0

    def fit(self, X, y):
        """
        X: List of feature count vectors
        y: Labels
        """
        n_samples = len(y)
        n_features = len(X[0])
        self.vocab_size = n_features

        for x, cls in zip(X, y):
            self.class_counts[cls] += 1
            for i in range(n_features):
                self.feature_counts[cls][i] += x[i]

        for cls in self.class_counts:
            self.class_priors[cls] = math.log(self.class_counts[cls] / n_samples)

            total_count = sum(self.feature_counts[cls].values())
            self.feature_probs[cls] = {}

            for i in range(n_features):
                # Laplace smoothing
                prob = (self.feature_counts[cls][i] + self.alpha) / \
                       (total_count + self.alpha * self.vocab_size)
                self.feature_probs[cls][i] = math.log(prob)

    def predict(self, X):
        predictions = []

        for x in X:
            scores = {}

            for cls in self.class_priors:
                score = self.class_priors[cls]
                for i in range(len(x)):
                    score += x[i] * self.feature_probs[cls][i]
                scores[cls] = score

            predictions.append(max(scores, key=scores.get))

        return predictions
