In [None]:
#Q1

import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt

#Iris dataset
iris=datasets.load_iris()
x=iris.data
y=iris.target
feature_names=iris.feature_names
target_names=iris.target_names

x_train,x_test,y_train,y_test = train_test_split(
    x, y, test_size=0.3, random_state=42, stratify=y
)

class GaussianNaiveBayes:
    def fit(self, x, y):
        x = np.asarray(x)
        y = np.asarray(y)
        self.classes=np.unique(y)
        self.prior={}
        self.mean={}
        self.var={}
        self.eps = 1e-9
        for cls in self.classes:
            x_c = x[y == cls]
            self.mean[cls] = x_c.mean(axis=0)
            self.var[cls] = x_c.var(axis=0)
            self.prior[cls] = x_c.shape[0] / x.shape[0]
        return self
    def _gaussian_log_likelihood(self, cls, z):
        mean = self.mean[cls]
        var = self.var[cls]
        # Gaussian log-likelihood
        term1 = -0.5 * np.log(2 * np.pi * var)
        term2 = -((z - mean) ** 2) / (2 * var)
        return np.sum(term1 + term2)

    def predict(self, x):
        x=np.asarray(x)
        preds = []
        for c in x:
            class_scores = {}
            for cls in self.classes:
                log_prior = np.log(self.prior[cls])
                log_likelihood = self._gaussian_log_likelihood(cls, c)
                class_scores[cls] = log_prior + log_likelihood
            preds.append(max(class_scores, key=class_scores.get))
        return np.array(preds)

gnb_custom=GaussianNaiveBayes().fit(x_train, y_train)
y_pred=gnb_custom.predict(x_test)

print("Manual Gaussian Naïve Bayes")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))


Manual Gaussian Naïve Bayes
Accuracy: 0.9111111111111111
Confusion Matrix:
 [[15  0  0]
 [ 0 14  1]
 [ 0  3 12]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        15
           1       0.82      0.93      0.88        15
           2       0.92      0.80      0.86        15

    accuracy                           0.91        45
   macro avg       0.92      0.91      0.91        45
weighted avg       0.92      0.91      0.91        45



In [None]:
#(ii)
gnb=GaussianNaiveBayes()
gnb.fit(x_train,y_train)
y_pred_builtin=gnb.predict(x_test)

print("In-built Gaussian Naive Bayes ")
print("Accuracy:", accuracy_score(y_test, y_pred_builtin))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_builtin))
print("Classification Report:\n", classification_report(y_test, y_pred_builtin))


In-built Gaussian Naïve Bayes (sklearn)
Accuracy: 0.9111111111111111
Confusion Matrix:
 [[15  0  0]
 [ 0 14  1]
 [ 0  3 12]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        15
           1       0.82      0.93      0.88        15
           2       0.92      0.80      0.86        15

    accuracy                           0.91        45
   macro avg       0.92      0.91      0.91        45
weighted avg       0.92      0.91      0.91        45



In [None]:
#Q2
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
knn = KNeighborsClassifier()

# Define grid of 'k' values to search
param_grid={'n_neighbors':np.arange(1, 21)}

grid_search=GridSearchCV(knn,param_grid,cv=5,scoring='accuracy')
grid_search.fit(x_train,y_train)

print("GridSearchCV for KNN")
print("Best K value:",grid_search.best_params_['n_neighbors'])
print("Best Cross-Validation Accuracy:",grid_search.best_score_)

# Evaluate on test 
best_knn = grid_search.best_estimator_
y_pred_knn = best_knn.predict(x_test)

print("Test Accuracy with Best K:", accuracy_score(y_test, y_pred_knn))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_knn))
print("Classification Report:\n", classification_report(y_test, y_pred_knn))

GridSearchCV for KNN
Best K value: 9
Best Cross-Validation Accuracy: 0.980952380952381
Test Accuracy with Best K: 0.9555555555555556
Confusion Matrix:
 [[15  0  0]
 [ 0 15  0]
 [ 0  2 13]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        15
           1       0.88      1.00      0.94        15
           2       1.00      0.87      0.93        15

    accuracy                           0.96        45
   macro avg       0.96      0.96      0.96        45
weighted avg       0.96      0.96      0.96        45

