In [1]:
import numpy as np
from random import shuffle
from sklearn.datasets import load_iris
from typing import List

In [2]:
class GaussianNB:
    def __init__(self, num_classes: int):
        self.num_classes = num_classes
        self.log_prior = None
        self.feature_means = None
        self.feature_stds = None
        self.classes = None
        
    def fit(self, X: np.ndarray, y: np.ndarray):
        self.classes, class_counts = np.unique(y, return_counts=True)
        self.log_prior = np.log(class_counts / len(y))
        
        self.feature_means = np.empty((self.num_classes, X.shape[1]))
        self.feature_stds = np.empty((self.num_classes, X.shape[1]))
        for c in self.classes:
            self.feature_means[c] = X[y == c].mean(axis=0)
            self.feature_stds[c] = X[y == c].std(axis=0) + 1e-9
            
    def log_feature_probs(self, x: np.ndarray) -> np.ndarray:
        return - np.log(np.sqrt(2*np.pi) * self.feature_stds) - 0.5 * ((x - self.feature_means)**2 / (self.feature_stds ** 2))
    
    def predict(self, x: List[int]) -> int:
        return self.classes[(self.log_prior + self.log_feature_probs(x).sum(axis=1)).argmax()].item()

In [3]:
data = load_iris(as_frame=True)
X = data["data"].values
y = data["target"].values

idx = list(range(len(X)))
shuffle(idx)
X = X[idx]
y = y[idx]

val_size = int(len(X)*0.1)
train_X, val_X = X[val_size:], X[:val_size]
train_y, val_y = y[val_size:], y[:val_size]

In [4]:
classifier = GaussianNB(len(np.unique(y)))
classifier.fit(train_X, train_y)

In [5]:
preds = []
for x in val_X:
    preds.append(classifier.predict(x))
    
preds = np.array(preds)

(preds == val_y).mean().item()

0.9333333333333333