In [1]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

iris_data = load_iris()
features = iris_data.data
labels = iris_data.target


train_features, test_features, train_labels, test_labels = train_test_split(
    features, labels, test_size=0.3, random_state=42
)
class Manual:
    def fit(self, train_features, train_labels):
        self.unique_classes = np.unique(train_labels)
        self.class_mean = {}
        self.class_variance = {}
        self.class_prior_probability = {}

        for class_value in self.unique_classes:
            class_features = train_features[train_labels == class_value]
            self.class_mean[class_value] = class_features.mean(axis=0)
            self.class_variance[class_value] = class_features.var(axis=0)
            self.class_prior_probability[class_value] = (
                len(class_features) / len(train_labels)
            )

    def gaussian_probability(self, value, mean, variance):
        tiny_value = 1e-6
        return (1 / np.sqrt(2 * np.pi * (variance + tiny_value))) * np.exp(
            -(value - mean) ** 2 / (2 * (variance + tiny_value))
        )

    def predict(self, test_features):
        predictions = []

        for sample in test_features:
            class_probabilities = []

            for class_value in self.unique_classes:
                prior_log = np.log(self.class_prior_probability[class_value])
                gaussian_log = np.sum(
                    np.log(
                        self.gaussian_probability(
                            sample,
                            self.class_mean[class_value],
                            self.class_variance[class_value],
                        )
                    )
                )
                total_log_probability = prior_log + gaussian_log
                class_probabilities.append(total_log_probability)

            predicted_class = self.unique_classes[np.argmax(class_probabilities)]
            predictions.append(predicted_class)

        return np.array(predictions)



manual_nb_model = Manual()
manual_nb_model.fit(train_features, train_labels)
predicted_labels_manual = manual_nb_model.predict(test_features)

print("Manual Gaussian Accuracy:", accuracy_score(test_labels, predicted_labels_manual))


Manual Gaussian Accuracy: 0.9777777777777777


In [3]:
from sklearn.naive_bayes import GaussianNB

gaussian_nb_model = GaussianNB()

gaussian_nb_model.fit(train_features, train_labels)
predicted_labels_sklearn = gaussian_nb_model.predict(test_features)

print("Sklearn Gaussian NB Accuracy:", accuracy_score(test_labels, predicted_labels_sklearn))


Sklearn Gaussian NB Accuracy: 0.9777777777777777


In [2]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
import numpy as np


k_choices = np.arange(1, 31)

hyperparameter_grid = {"n_neighbors": k_choices}
knn_model = KNeighborsClassifier()


grid_search = GridSearchCV(knn_model, hyperparameter_grid, cv=5)

grid_search.fit(features, labels)


print("Best K value:", grid_search.best_params_)
print("Best cross validation accuracy:", grid_search.best_score_)


Best K value: {'n_neighbors': np.int64(6)}
Best cross validation accuracy: 0.9800000000000001
