Q.1


In [1]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import numpy as np

iris = load_iris()
X = iris.data
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [2]:
class ManualGaussianNB:
    def fit(self, X, y):
        self.classes = np.unique(y)
        self.means = {}
        self.vars = {}
        self.priors = {}

        for cls in self.classes:
            X_cls = X[y == cls]
            self.means[cls] = X_cls.mean(axis=0)
            self.vars[cls] = X_cls.var(axis=0)
            self.priors[cls] = X_cls.shape[0] / X.shape[0]

    def gaussian_pdf(self, x, mean, var):
        numerator = np.exp(- ((x - mean) ** 2) / (2 * var))
        denominator = np.sqrt(2 * np.pi * var)
        return numerator / denominator

    def predict(self, X):
        predictions = []
        for x in X:
            posteriors = []
            for cls in self.classes:
                prior = np.log(self.priors[cls])
                class_conditional = np.sum(np.log(self.gaussian_pdf(x, self.means[cls], self.vars[cls])))
                posterior = prior + class_conditional
                posteriors.append(posterior)
            predictions.append(self.classes[np.argmax(posteriors)])
        return np.array(predictions)


In [3]:
manual_nb = ManualGaussianNB()
manual_nb.fit(X_train, y_train)
y_pred_manual = manual_nb.predict(X_test)

accuracy_manual = np.mean(y_pred_manual == y_test)
print("Accuracy (Manual Implementation):", accuracy_manual)


Accuracy (Manual Implementation): 0.9777777777777777


In [4]:
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

gnb = GaussianNB()
gnb.fit(X_train, y_train)
y_pred_builtin = gnb.predict(X_test)

accuracy_builtin = accuracy_score(y_test, y_pred_builtin)
print("Accuracy (Built-in Implementation):", accuracy_builtin)


Accuracy (Built-in Implementation): 0.9777777777777777


Q.2


In [5]:
from sklearn.datasets import load_iris
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import accuracy_score

# Load dataset
iris = load_iris()
X = iris.data
y = iris.target

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define parameter grid for K
param_grid = {'n_neighbors': list(range(1, 21))}   # try k = 1 to 20

# Create KNN model
knn = KNeighborsClassifier()

# GridSearchCV
grid = GridSearchCV(knn, param_grid, cv=5)  # 5-fold cross validation
grid.fit(X_train, y_train)

# Best k value
print("Best K value:", grid.best_params_)

# Train best model
best_knn = grid.best_estimator_
y_pred = best_knn.predict(X_test)

# Accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Test Set Accuracy:", accuracy)


Best K value: {'n_neighbors': 3}
Test Set Accuracy: 1.0
