In [1]:
import numpy as np
import pandas as pd

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
from load_data import load_scaled_data

df = load_scaled_data()
X, y = df.drop('Class', axis=1).to_numpy(), df['Class'].to_numpy()

  from scipy.sparse import csr_matrix, issparse


In [4]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
from naive_bayes import GaussianNaiveBayesClassifier
# Создание и обучение модели
gnb_custom = GaussianNaiveBayesClassifier()
gnb_custom.fit(X_train, y_train)

# Предсказание на тестовых данных
y_pred = gnb_custom.predict(X_test)

In [6]:
from sklearn.metrics import classification_report

print(classification_report(y_test, y_pred, digits=5))

              precision    recall  f1-score   support

           0    0.99623   0.97243   0.98419      1632
           1    0.97798   0.99701   0.98740      2005

    accuracy                        0.98598      3637
   macro avg    0.98711   0.98472   0.98580      3637
weighted avg    0.98617   0.98598   0.98596      3637



In [7]:
from sklearn.naive_bayes import GaussianNB

# Модель sklearn
gnb_sklearn = GaussianNB()
gnb_sklearn.fit(X_train, y_train)
y_pred_sk = gnb_sklearn.predict(X_test)

In [8]:
print(classification_report(y_test, y_pred_sk, digits=5))

              precision    recall  f1-score   support

           0    0.99623   0.97243   0.98419      1632
           1    0.97798   0.99701   0.98740      2005

    accuracy                        0.98598      3637
   macro avg    0.98711   0.98472   0.98580      3637
weighted avg    0.98617   0.98598   0.98596      3637



In [9]:
from sklearn.metrics import accuracy_score
from sklearn.model_selection import KFold

def cross_validate_model(X, y, model, k=5):
    kf = KFold(n_splits=k, shuffle=True, random_state=42)
    accuracies = []

    for train_index, test_index in kf.split(X):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]

        model.fit(X_train, y_train)

        y_pred = model.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        accuracies.append(accuracy)

    return np.mean(accuracies), np.std(accuracies)

In [10]:

avg_acc_custom, std_acc_custom = cross_validate_model(X, y, GaussianNaiveBayesClassifier(), k=5)
print(f"Средняя точность (Custom): {avg_acc_custom:.4f} ± {std_acc_custom:.4f}")

Средняя точность (Custom): 0.9837 ± 0.0022


In [11]:
avg_acc_sk, std_acc_sk = cross_validate_model(X, y, GaussianNB(), k=5)
print(f"Средняя точность (sklearn): {avg_acc_sk:.4f} ± {std_acc_sk:.4f}")

Средняя точность (sklearn): 0.9837 ± 0.0022


In [12]:
%%timeit
gnb_custom = GaussianNaiveBayesClassifier()
gnb_custom.fit(X_train, y_train)

5.3 ms ± 391 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [13]:
%timeit gnb_custom.predict(X_test)

1.08 ms ± 31.8 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [14]:
%%timeit
gnb_sklearn = GaussianNB()
gnb_sklearn.fit(X_train, y_train)

7.75 ms ± 671 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [15]:
%timeit gnb_sklearn.predict(X_test)

1.16 ms ± 93.7 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
