In [1]:
import time
import numpy as np
import pandas as pd
from sklearn.base import BaseEstimator
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_val_score, StratifiedKFold

In [2]:
df = pd.read_csv("../Dry_Bean_Dataset.csv")
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

In [3]:
class NaiveBayesClassifier(BaseEstimator):
    def fit(self, X, y):
        self.classes = np.unique(y)
        self.priors = {}
        self.likelihoods = {}
        for c in self.classes:
            X_c = X[y == c]
            self.priors[c] = X_c.shape[0] / X.shape[0]
            self.likelihoods[c] = {
                "mean": X_c.mean(axis=0),
                "var": X_c.var(axis=0) + 1e-9,
            }
        return self

    def _gaussian_prob(self, x, mean, var):
        coeff = 1.0 / np.sqrt(2.0 * np.pi * var)
        exponent = np.exp(-((x - mean) ** 2) / (2 * var))
        return coeff * exponent

    def predict(self, X):
        y_pred = []
        for x in X:
            posteriors = []
            for c in self.classes:
                prior = np.log(self.priors[c])
                mean = self.likelihoods[c]["mean"]
                var = self.likelihoods[c]["var"]
                probs = self._gaussian_prob(x, mean, var)
                probs = np.clip(probs, 1e-9, None)
                likelihood = np.sum(np.log(probs))
                posteriors.append(prior + likelihood)
            y_pred.append(self.classes[np.argmax(posteriors)])
        return np.array(y_pred)

    def score(self, X, y):
        y_pred = self.predict(X)
        return accuracy_score(y, y_pred)

In [4]:
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=52)

In [5]:
nb = NaiveBayesClassifier()
start = time.time()
scores = cross_val_score(nb, X, y, cv=skf)
end = time.time()
print(
    f"Наивный Байес: средняя точность = {scores.mean():.4f}, время = {end - start:.2f} с"
)

Наивный Байес: средняя точность = 0.8971, время = 1.78 с


In [6]:
gnb = GaussianNB()
start_skl = time.time()
skl_scores = cross_val_score(gnb, X, y, cv=skf)
end_skl = time.time()
print(
    f"Sklearn GaussianNB: средняя точность = {skl_scores.mean():.4f}, время = {end_skl - start_skl:.2f} с"
)

Sklearn GaussianNB: средняя точность = 0.7640, время = 0.09 с
