In [21]:
import numpy as np
import pandas as pd
# import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

from sklearn.naive_bayes import GaussianNB
from GaussianNaiveBayes import GaussianNaiveBayes

import time, timeit

In [22]:
seed = 18

In [23]:
feature_names = [
    "Class",
    "Alcohol",
    "Malic acid",
    "Ash",
    "Alcalinity of ash",
    "Magnesium",
    "Total phenols",
    "Flavanoids",
    "Nonflavanoid phenols",
    "Proanthocyanins",
    "Color intensity",
    "Hue",
    "OD280/OD315 of diluted wines",
    "Proline"
]

In [24]:
data = pd.read_csv('./data/wine/wine.data', header=None, names=feature_names)

In [25]:
data.shape

(178, 14)

In [26]:
data.sample(5)

Unnamed: 0,Class,Alcohol,Malic acid,Ash,Alcalinity of ash,Magnesium,Total phenols,Flavanoids,Nonflavanoid phenols,Proanthocyanins,Color intensity,Hue,OD280/OD315 of diluted wines,Proline
32,1,13.68,1.83,2.36,17.2,104,2.42,2.69,0.42,1.97,3.84,1.23,2.87,990
1,1,13.2,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050
127,2,11.79,2.13,2.78,28.5,92,2.13,2.24,0.58,1.76,3.0,0.97,2.44,466
80,2,12.0,0.92,2.0,19.0,86,2.42,2.26,0.3,1.43,2.5,1.38,3.12,278
149,3,13.08,3.9,2.36,21.5,113,1.41,1.39,0.34,1.14,9.4,0.57,1.33,550


In [27]:
X = data.drop(['Class'], axis=1)
y = data.Class

In [28]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)

## Своя реализация

In [29]:
nb_custom = GaussianNaiveBayes()

In [30]:
%timeit nb_custom.fit(x=X_train, y=y_train)

388 μs ± 27.4 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [12]:
preds_custom = nb_custom.predict(x=X_test)

In [14]:
print(classification_report(y_true=y_test, y_pred=preds_custom))

              precision    recall  f1-score   support

           1       1.00      0.94      0.97        17
           2       0.93      1.00      0.97        14
           3       1.00      1.00      1.00         5

    accuracy                           0.97        36
   macro avg       0.98      0.98      0.98        36
weighted avg       0.97      0.97      0.97        36



In [20]:
print(classification_report(y_true=y_train, y_pred=nb_custom.predict(X_train)))

              precision    recall  f1-score   support

           1       1.00      1.00      1.00        42
           2       1.00      0.96      0.98        57
           3       0.96      1.00      0.98        43

    accuracy                           0.99       142
   macro avg       0.99      0.99      0.99       142
weighted avg       0.99      0.99      0.99       142



## Библиотечная реализация

In [31]:
nb_sklearn = GaussianNB()

In [32]:
%timeit nb_sklearn.fit(X=X_train, y=y_train)

3.3 ms ± 460 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [17]:
preds_sklearn = nb_sklearn.predict(X=X_test)

In [18]:
print(classification_report(y_true=y_test, y_pred=preds_sklearn))

              precision    recall  f1-score   support

           1       1.00      0.94      0.97        17
           2       0.93      1.00      0.97        14
           3       1.00      1.00      1.00         5

    accuracy                           0.97        36
   macro avg       0.98      0.98      0.98        36
weighted avg       0.97      0.97      0.97        36



In [19]:
print(classification_report(y_true=y_train, y_pred=nb_sklearn.predict(X_train)))

              precision    recall  f1-score   support

           1       1.00      1.00      1.00        42
           2       1.00      0.96      0.98        57
           3       0.96      1.00      0.98        43

    accuracy                           0.99       142
   macro avg       0.99      0.99      0.99       142
weighted avg       0.99      0.99      0.99       142

