# Naive Bayes Classifier - Iris Dataset
[Source]
***

In [1]:
import numpy as np
from collections import defaultdict
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

# Loading the data

In [2]:
iris = load_iris()
X, y = iris.data, iris.target

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model

In [4]:
class NaiveBayesClassifier:
    def __init__(self):
        self.class_priors = {}
        self.feature_likelihoods = defaultdict(dict)
        self.classes = []
    
    def fit(self, X, y):
        # Counting unique classes
        self.classes = np.unique(y)
        total_samples = len(y)
        
        # Prior probabilities P(y)
        for cls in self.classes:
            class_count = np.sum(y == cls)
            self.class_priors[cls] = class_count / total_samples
            
            # Likelihood P(x|y) for each feature
            X_class = X[y == cls]
            for feature_index in range(X.shape[1]):
                feature_values, feature_counts = np.unique(X_class[:, feature_index], 
                    return_counts=True)
                likelihood = feature_counts / feature_counts.sum()
                
                self.feature_likelihoods[cls][feature_index] = dict(zip(feature_values,
                 likelihood))
    
    def predict(self, X):
        predictions = []
        for sample in X:
            class_probabilities = {}
            for cls in self.classes:
                class_prob = self.class_priors[cls]
                
                for feature_index, feature_value in enumerate(sample):
                    if feature_value in self.feature_likelihoods[cls][feature_index]:
                        class_prob *= self.feature_likelihoods[cls][feature_index][feature_value]
                    else:
                        # Smoothing factor for unseen features
                        class_prob *= 1e-6
                
                class_probabilities[cls] = class_prob
            
            # Choose the class
            predicted_class = max(class_probabilities, key=class_probabilities.get)
            predictions.append(predicted_class)
        
        return np.array(predictions) # predictions
    
    def accuracy(self, y_true, y_pred):
        return np.sum(y_true == y_pred) / len(y_true)

In [5]:
%%time
clf = NaiveBayesClassifier()
clf.fit(X_train, y_train)

CPU times: user 2.7 ms, sys: 116 Î¼s, total: 2.82 ms
Wall time: 2.19 ms


In [6]:
y_pred = clf.predict(X_test)

In [7]:
print(f"Accuracy: {clf.accuracy(y_test, y_pred):.4f}")

Accuracy: 0.9333


# Summary

The model is able to classify different species of Irises with an accuracy score of **93.34%**.

**End**