In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn import datasets
from sklearn.metrics import accuracy_score


In [2]:
# Load the Iris dataset
iris = datasets.load_iris()
X = iris.data
y = iris.target



In [3]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [4]:
# (i) Step-by-Step Implementation
class GaussianNaiveBayes:
    def __init__(self):
        self.classes = None
        self.mean = {}
        self.var = {}
        self.priors = {}

    def fit(self, X, y):
        self.classes = np.unique(y)
        for c in self.classes:
            X_c = X[y == c]
            self.mean[c] = X_c.mean(axis=0)
            self.var[c] = X_c.var(axis=0)
            self.priors[c] = X_c.shape[0] / X.shape[0]

    def gaussian_probability(self, class_idx, x):
        mean = self.mean[class_idx]
        var = self.var[class_idx]
        numerator = np.exp(-(x - mean)**2 / (2 * var))
        denominator = np.sqrt(2 * np.pi * var)
        return numerator / denominator

    def predict(self, X):
        posteriors = []
        for x in X:
            class_probabilities = []
            for c in self.classes:
                prior = np.log(self.priors[c])
                likelihood = np.sum(np.log(self.gaussian_probability(c, x)))
                class_probabilities.append(prior + likelihood)
            posteriors.append(self.classes[np.argmax(class_probabilities)])
        return posteriors


In [5]:
# Create the model using step-by-step Gaussian Naive Bayes
gnb_custom = GaussianNaiveBayes()
gnb_custom.fit(X_train, y_train)

In [6]:
# Make predictions
y_pred_custom = gnb_custom.predict(X_test)
y_pred_custom


[1,
 0,
 2,
 1,
 1,
 0,
 1,
 2,
 1,
 1,
 2,
 0,
 0,
 0,
 0,
 2,
 2,
 1,
 1,
 2,
 0,
 2,
 0,
 2,
 2,
 2,
 2,
 2,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 2,
 1,
 0,
 0,
 0,
 2,
 1,
 1,
 0,
 0]

In [7]:
# Evaluate accuracy
accuracy_custom = accuracy_score(y_test, y_pred_custom)
print("Accuracy (Custom Gaussian Naive Bayes):", accuracy_custom)


Accuracy (Custom Gaussian Naive Bayes): 0.9777777777777777


In [8]:
# (ii) Using In-built Function
# Create the model using the built-in Gaussian Naive Bayes
gnb_builtin = GaussianNB()
gnb_builtin.fit(X_train, y_train)


In [9]:
# Make predictions
y_pred_builtin = gnb_builtin.predict(X_test)
y_pred_builtin


array([1, 0, 2, 1, 1, 0, 1, 2, 1, 1, 2, 0, 0, 0, 0, 2, 2, 1, 1, 2, 0, 2,
       0, 2, 2, 2, 2, 2, 0, 0, 0, 0, 1, 0, 0, 2, 1, 0, 0, 0, 2, 1, 1, 0,
       0])

In [10]:
# Evaluate accuracy
accuracy_builtin = accuracy_score(y_test, y_pred_builtin)
print("Accuracy (Built-in Gaussian Naive Bayes):", accuracy_builtin)

Accuracy (Built-in Gaussian Naive Bayes): 0.9777777777777777


In [None]:
# QUESTION 2

In [11]:
# Import necessary libraries
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neighbors import KNeighborsClassifier

In [12]:
# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

In [13]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [14]:
# Create the K-NN Classifier model
knn = KNeighborsClassifier()

In [15]:
# Define the parameter grid (range of K values to test)
param_grid = {'n_neighbors': np.arange(1, 50)}



In [16]:

# Use GridSearchCV to find the best K value
grid_search = GridSearchCV(knn, param_grid, cv=5)


In [17]:
# Fit the model
grid_search.fit(X_train, y_train)


  _data = np.array(data, dtype=dtype, copy=copy,


In [18]:
# Output the best K value and the corresponding accuracy
best_k = grid_search.best_params_['n_neighbors']
best_score = grid_search.best_score_
print(f"Best K value found: {best_k}")
print(f"Best accuracy score with GridSearchCV: {best_score}")


Best K value found: 1
Best accuracy score with GridSearchCV: 0.9523809523809523
