In [5]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split,  GridSearchCV
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Load Iris dataset
iris = load_iris()
X, y = iris.data, iris.target

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

# Step-by-step Implementation of Gaussian Naive Bayes
class MyGaussianNB:
    def fit(self, X, y):
        self.classes = np.unique(y)
        self.mean = {}
        self.var = {}
        self.priors = {}
        
        # Calculate mean, variance, and priors for each class
        for c in self.classes:
            X_c = X[y == c]
            self.mean[c] = X_c.mean(axis=0)
            self.var[c] = X_c.var(axis=0) + 1e-9  # Add small value to avoid division by zero
            self.priors[c] = X_c.shape[0] / X.shape[0]
    
    def gaussian_pdf(self, class_idx, x):
        mean = self.mean[class_idx]
        var = self.var[class_idx]
        numerator = np.exp(-((x - mean) ** 2) / (2 * var))
        denominator = np.sqrt(2 * np.pi * var)
        return numerator / denominator

    def predict(self, X):
        y_pred = []
        for x in X:
            posteriors = []
            for c in self.classes:
                prior = np.log(self.priors[c])
                conditional = np.sum(np.log(self.gaussian_pdf(c, x)))
                posterior = prior + conditional
                posteriors.append(posterior)
            y_pred.append(self.classes[np.argmax(posteriors)])
        return np.array(y_pred)

# Custom model
my_gnb = MyGaussianNB()
my_gnb.fit(X_train, y_train)
y_pred_custom = my_gnb.predict(X_test)
acc_custom = accuracy_score(y_test, y_pred_custom)

# In-built sklearn GaussianNB
gnb = GaussianNB()
gnb.fit(X_train, y_train)
y_pred_builtin = gnb.predict(X_test)
acc_builtin = accuracy_score(y_test, y_pred_builtin)

# Display results
print("Step-by-Step Implementation Accuracy: {:.2f}%".format(acc_custom * 100))
print("In-built Accuracy: {:.2f}%".format(acc_builtin * 100))
print("\nPredicted labels (Custom):", y_pred_custom)
print("Predicted labels (In-built):", y_pred_builtin)


Step-by-Step Implementation Accuracy: 97.78%
In-built Accuracy: 97.78%

Predicted labels (Custom): [1 0 2 1 1 0 1 2 1 1 2 0 0 0 0 2 2 1 1 2 0 2 0 2 2 2 2 2 0 0 0 0 1 0 0 2 1
 0 0 0 2 1 1 0 0]
Predicted labels (In-built): [1 0 2 1 1 0 1 2 1 1 2 0 0 0 0 2 2 1 1 2 0 2 0 2 2 2 2 2 0 0 0 0 1 0 0 2 1
 0 0 0 2 1 1 0 0]


In [6]:
param_grid = {'n_neighbors': np.arange(1, 21)}

# Create KNN classifier
knn = KNeighborsClassifier()

# Use GridSearchCV to find the best K
grid = GridSearchCV(knn, param_grid, cv=5)
grid.fit(X, y)

# Best K and accuracy
best_k = grid.best_params_['n_neighbors']
best_score = grid.best_score_

# ----------------------------
# Display Results
# ----------------------------
results = pd.DataFrame({
    "Model": ["Custom GaussianNB", "Built-in GaussianNB", "Best K for KNN"],
    "Accuracy / Best Score": [acc_custom, acc_builtin, best_score],
    "Best Parameter (if any)": ["-", "-", best_k]
})

print("\n=== Model Comparison Results ===")
print(results)


=== Model Comparison Results ===
                 Model  Accuracy / Best Score Best Parameter (if any)
0    Custom GaussianNB               0.977778                       -
1  Built-in GaussianNB               0.977778                       -
2       Best K for KNN               0.980000                       6
