# Naive Bayes Classifier for iris dataset from Scratch

In [1]:
import numpy as np
import pandas as pd

## Loading data

In [2]:
from sklearn.datasets import load_iris

iris = load_iris()

In [3]:
iris.keys()

dict_keys(['data', 'target', 'target_names', 'DESCR', 'feature_names', 'filename'])

In [4]:
print(iris['DESCR'])

.. _iris_dataset:

Iris plants dataset
--------------------

**Data Set Characteristics:**

    :Number of Instances: 150 (50 in each of three classes)
    :Number of Attributes: 4 numeric, predictive attributes and the class
    :Attribute Information:
        - sepal length in cm
        - sepal width in cm
        - petal length in cm
        - petal width in cm
        - class:
                - Iris-Setosa
                - Iris-Versicolour
                - Iris-Virginica
                
    :Summary Statistics:

                    Min  Max   Mean    SD   Class Correlation
    sepal length:   4.3  7.9   5.84   0.83    0.7826
    sepal width:    2.0  4.4   3.05   0.43   -0.4194
    petal length:   1.0  6.9   3.76   1.76    0.9490  (high!)
    petal width:    0.1  2.5   1.20   0.76    0.9565  (high!)

    :Missing Attribute Values: None
    :Class Distribution: 33.3% for each of 3 classes.
    :Creator: R.A. Fisher
    :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
    :

In [5]:
X = iris['data']
y = iris['target']

print(X.shape, y.shape)

(150, 4) (150,)


## Dividing data into Training set and Testing set

In [6]:
from sklearn.model_selection import train_test_split

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=101)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(105, 4) (105,)
(45, 4) (45,)


## Fitting model on Training set

In [8]:
from scipy.stats import norm

### Function to calculate Gaussian Distribution Curve for the given data

In [9]:
def fit_distribution(data):
    mu = np.mean(data)
    sigma = np.std(data)
    print("Mean =", mu, "Sigma =", sigma)
    dist = norm(mu, sigma)
    return dist

### Dividing data according to class: setosa, versicolor, virginica

In [10]:
iris['target_names']

array(['setosa', 'versicolor', 'virginica'], dtype='<U10')

In [11]:
Xy0 = X_train[y_train==0]
Xy1 = X_train[y_train==1]
Xy2 = X_train[y_train==2]

In [12]:
print(Xy0.shape, Xy1.shape, Xy2.shape)

(37, 4) (30, 4) (38, 4)


### Calculating Prior Probabilities for each class P(yi)

In [13]:
prior0 = len(Xy0) / len(X_train)
prior1 = len(Xy1) / len(X_train)
prior2 = len(Xy2) / len(X_train)

print(prior0, prior1, prior2)

0.3523809523809524 0.2857142857142857 0.3619047619047619


### Calculating Gaussian Distribution Curve for each feature of each class

This is used to calculate **Conditional probabilities** for each feature given each class  
  
**P(x1 | y0)&nbsp;&nbsp;&nbsp;P(x2 | y0)&nbsp;&nbsp;&nbsp;P(x3 | y0)&nbsp;&nbsp;&nbsp;P(x3 | y0)  
P(x1 | y1)&nbsp;&nbsp;&nbsp;P(x2 | y1)&nbsp;&nbsp;&nbsp;P(x3 | y1)&nbsp;&nbsp;&nbsp;P(x3 | y1)  
P(x1 | y2)&nbsp;&nbsp;&nbsp;P(x2 | y2)&nbsp;&nbsp;&nbsp;P(x3 | y2)&nbsp;&nbsp;&nbsp;P(x3 | y2)**

In [14]:
X1y0 = fit_distribution(Xy0[:,0])
X2y0 = fit_distribution(Xy0[:,1])
X3y0 = fit_distribution(Xy0[:,2])
X4y0 = fit_distribution(Xy0[:,3])

X1y1 = fit_distribution(Xy1[:,0])
X2y1 = fit_distribution(Xy1[:,1])
X3y1 = fit_distribution(Xy1[:,2])
X4y1 = fit_distribution(Xy1[:,3])

X1y2 = fit_distribution(Xy2[:,0])
X2y2 = fit_distribution(Xy2[:,1])
X3y2 = fit_distribution(Xy2[:,2])
X4y2 = fit_distribution(Xy2[:,3])

Mean = 5.008108108108108 Sigma = 0.3490360303389882
Mean = 3.4216216216216213 Sigma = 0.32226825751658467
Mean = 1.4756756756756755 Sigma = 0.17305741182250942
Mean = 0.23513513513513512 Sigma = 0.09069168505709468
Mean = 5.946666666666667 Sigma = 0.5031456603763521
Mean = 2.766666666666667 Sigma = 0.32283466700808666
Mean = 4.28 Sigma = 0.5095095681142799
Mean = 1.3199999999999998 Sigma = 0.2196967607104544
Mean = 6.576315789473685 Sigma = 0.6413120640023101
Mean = 3.0078947368421054 Sigma = 0.34594004572390435
Mean = 5.56578947368421 Sigma = 0.5545393136789425
Mean = 2.0526315789473686 Sigma = 0.265307013918496


## Predicting the classes for Testing data set

### Function to predict class of single row.

In [15]:
def probability(x):
    proba = []
    proba.append(X1y0.pdf(x[0]) * X2y0.pdf(x[1]) * X3y0.pdf(x[2]) * X4y0.pdf(x[3]) * prior0)
    proba.append(X1y1.pdf(x[0]) * X2y1.pdf(x[1]) * X3y1.pdf(x[2]) * X4y1.pdf(x[3]) * prior1)
    proba.append(X1y2.pdf(x[0]) * X2y2.pdf(x[1]) * X3y2.pdf(x[2]) * X4y2.pdf(x[3]) * prior2)
    return proba.index(max(proba))

### Function to predict classes of whole Testing set

In [16]:
def predict(X):
    return [probability(X[i]) for i in range(0, len(X))]

In [17]:
predictions = predict(X_test)

## Performance Evaluation

In [18]:
from sklearn.metrics import confusion_matrix, classification_report

In [19]:
print(confusion_matrix(y_test, predictions))

[[13  0  0]
 [ 0 19  1]
 [ 0  1 11]]


In [20]:
print(classification_report(y_test, predictions))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        13
           1       0.95      0.95      0.95        20
           2       0.92      0.92      0.92        12

    accuracy                           0.96        45
   macro avg       0.96      0.96      0.96        45
weighted avg       0.96      0.96      0.96        45

