In [2]:
import sys
print("Python version: {}".format(sys.version))
import pandas as pd
print("pandas version: {}".format(pd.__version__))
import matplotlib
import matplotlib.pyplot as plt
print("matplotlib version: {}".format(matplotlib.__version__))
import numpy as np
print("NumPy version: {}".format(np.__version__))
import scipy as sp
print("SciPy version: {}".format(sp.__version__))
import IPython
print("IPython version: {}".format(IPython.__version__))
import sklearn
print("scikit-learn version: {}".format(sklearn.__version__))
import mglearn

Python version: 3.12.4 | packaged by Anaconda, Inc. | (main, Jun 18 2024, 15:03:56) [MSC v.1929 64 bit (AMD64)]
pandas version: 2.2.2
matplotlib version: 3.8.4
NumPy version: 1.26.4
SciPy version: 1.13.1
IPython version: 8.24.0
scikit-learn version: 1.4.2


### Naive Bayes 

- they tend to be even faster in
training. The price paid for this efficiency is that naive Bayes models often provide
generalization performance that is slightly worse than that of linear classifiers like
LogisticRegression and LinearSVC.

- they learn parameters by
looking at each feature individually and collect simple per-class statistics from each
feature. 

- GaussianNB, BernoulliNB, and MultinomialNB.

-  GaussianNB : can be applied to
any continuous data

- BernoulliNB :  assumes binary data

- MultinomialNB : assumes count data (that is, that each feature represents an integer count of some‐
thing, like how often a word appears in a sentence).

-  BernoulliNB and MultinomialNB
are mostly used in text data classification.


The BernoulliNB classifier counts how often every feature of each class is not zero.
This is most easily understood with an example:

In [3]:
X = np.array([[0, 1, 0, 1],
 [1, 0, 1, 1],
 [0, 0, 0, 1],
 [1, 0, 1, 0]])
y = np.array([0, 1, 0, 1])

Here, we have four data points, with four binary features each. There are two classes,
0 and 1. For class 0 (the first and third data points), the first feature is zero two times
and nonzero zero times, the second feature is zero one time and nonzero one time,
and so on. These same counts are then calculated for the data points in the second
class. Counting the nonzero entries per class in essence looks like this:

In [4]:
counts = {}
for label in np.unique(y):
    # iterate over each class
    # count (sum) entries of 1 per feature
    counts[label] = X[y == label].sum(axis = 0)

print("Feature counts:\n {}".format(counts))

Feature counts:
 {0: array([0, 1, 0, 2]), 1: array([2, 0, 2, 1])}


In [2]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [4]:
data = load_iris()
X = data.data
y = data.target

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.3, random_state = 42)

In [6]:
model = GaussianNB()

model.fit(X_train, y_train)

In [8]:
y_pred = model.predict(X_test)

In [12]:
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy : {accuracy * 100:.2f}%")

cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix : \n", cm)

report =  classification_report(y_test, y_pred)
print("Classification Report : \n", report)

Accuracy : 97.78%
Confusion Matrix : 
 [[19  0  0]
 [ 0 12  1]
 [ 0  0 13]]
Classification Report : 
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       1.00      0.92      0.96        13
           2       0.93      1.00      0.96        13

    accuracy                           0.98        45
   macro avg       0.98      0.97      0.97        45
weighted avg       0.98      0.98      0.98        45

