<a href="https://colab.research.google.com/github/YS-Saini/Machine_learning_lab/blob/main/assgn_6.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load the dataset
iris = load_iris()
X, y = iris.data, iris.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [4]:
class GaussianNaiveBayes:
    def fit(self, X_train, y_train):
        n_samples, n_features = X_train.shape
        self.classes = np.unique(y_train)
        n_classes = len(self.classes)

        # Mean, variance, and prior probability for each class
        self.mean = np.zeros((n_classes, n_features), dtype=np.float64)
        self.var = np.zeros((n_classes, n_features), dtype=np.float64)
        self.priors = np.zeros(n_classes, dtype=np.float64)

        for idx, cls in enumerate(self.classes):
            X_c = X_train[y_train == cls]
            self.mean[idx, :] = X_c.mean(axis=0)
            self.var[idx, :] = X_c.var(axis=0)
            self.priors[idx] = X_c.shape[0] / float(n_samples)

    def _calculate_likelihood(self, class_idx, x):
        mean = self.mean[class_idx]
        var = self.var[class_idx]
        numerator = np.exp(-(x - mean) ** 2 / (2 * var))
        denominator = np.sqrt(2 * np.pi * var)
        return numerator / denominator

    def _calculate_posterior(self, x):
        posteriors = []
        for idx, cls in enumerate(self.classes):
            prior = np.log(self.priors[idx])
            class_likelihood = np.sum(np.log(self._calculate_likelihood(idx, x)))
            posterior = prior + class_likelihood
            posteriors.append(posterior)
        return self.classes[np.argmax(posteriors)]

    def predict(self, X):
        y_pred = [self._calculate_posterior(x) for x in X]
        return np.array(y_pred)

# Train the model
gnb = GaussianNaiveBayes()
gnb.fit(X_train, y_train)

# Predict on the test set
y_pred = gnb.predict(X_test)

# Calculate accuracy
accuracy = np.sum(y_pred == y_test) / len(y_test)
print(f"Accuracy (Step-by-Step): {accuracy * 100:.2f}%")


Accuracy (Step-by-Step): 97.78%


In [5]:
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

# Initialize Gaussian Naive Bayes Classifier
gnb_builtin = GaussianNB()

# Fit the model
gnb_builtin.fit(X_train, y_train)

# Predict on the test set
y_pred_builtin = gnb_builtin.predict(X_test)

# Calculate accuracy
accuracy_builtin = accuracy_score(y_test, y_pred_builtin)
print(f"Accuracy (In-Built): {accuracy_builtin * 100:.2f}%")



Accuracy (In-Built): 97.78%


In [6]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Load the Iris dataset
iris = load_iris()
X, y = iris.data, iris.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [7]:
# Define the KNN model
knn = KNeighborsClassifier()

# Define the parameter grid
param_grid = {'n_neighbors': np.arange(1, 31)}

# Set up the GridSearchCV
grid_search = GridSearchCV(knn, param_grid, cv=5)


In [8]:
# Fit the model with GridSearchCV
grid_search.fit(X_train, y_train)

# Find the best parameter (best K)
best_k = grid_search.best_params_['n_neighbors']
print(f"Best value of K: {best_k}")


Best value of K: 1


In [9]:
# Train the KNN model with the best K
best_knn = KNeighborsClassifier(n_neighbors=best_k)
best_knn.fit(X_train, y_train)

# Make predictions
y_pred = best_knn.predict(X_test)

# Calculate the accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy with best K (K={best_k}): {accuracy * 100:.2f}%")


Accuracy with best K (K=1): 100.00%
