# Installing Libraries (Python version >= 3.8)

In [None]:
import sys
version = sys.version_info
print(version)
assert version.major == 3 and version.minor >= 8

In [None]:
!python -m pip install numpy==1.24.4 pandas==2.0.3 scikit-learn==1.3.2 matplotlib==3.7.4

# Downloading/Visualizing Dataset

In [None]:
import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn import model_selection


dataset = datasets.load_iris()
X = pd.DataFrame(data=dataset.data, columns=dataset.feature_names)
y = pd.Series(data=dataset.target, name="target")

X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, random_state=0)

display(pd.concat([X, y], axis=1).head())
print("samples: {}; features: {}".format(*X.shape))
print("samples: {}; values: {}".format(*y.shape, y.unique()))

# Training Model

In [None]:
from typing import List


class KNeighborsClassifier:

    def __init__(self) -> None:
        self._X_train = None  # The training features to be saved
        self._y_train = None  # The training target to be saved

    def fit(self, X: pd.DataFrame, y: pd.Series) -> None:
        """Fit the model from the training dataset.

        :param X: The training features.
        :param y: The training target.
        """

        self._X_train = X
        self._y_train = y

    def predict(self, X: pd.DataFrame) -> np.ndarray:
        """Predict the class labels for the provided data.

        :param X: The data to be classified.
        :return: The class labels for the provided data.
        """
        classlabels = []
        for p0 in X.values:
            distances = []
            for p1 in self._X_train.values:
                # Calculate the Euclidean distance between two points.
                distance = self.calculate_euclidean_distance(p0, p1)
                distances.append(distance)

            # In this classification model, the nearest point is the class label.
            # It is possible to use a different number of nearest points to get outcomes in other problems.
            nearest_index = np.array(distances).argmin()
            classlabels.append(self._y_train.values[nearest_index])

        return classlabels

    def calculate_euclidean_distance(self, p0: List[float], p1: List[float]) -> float:
        """Calculate the Euclidean distance between two points.

        :param p0: The first point.
        :param p1: The second point.
        :return: The Euclidean distance between the two points.
        """
        return np.sqrt(np.sum((p0 - p1) ** 2, axis=0))

In [None]:
model = KNeighborsClassifier()
model.fit(X_train, y_train)

# Evaluating Model

In [None]:
from sklearn.metrics import accuracy_score

y_train_pred = model.predict(X_train)
print(f"Accuracy score for train data: {accuracy_score(y_train, y_train_pred)}")
y_test_pred = model.predict(X_test)
print(f"Accuracy score for test data: {accuracy_score(y_test, y_test_pred)}")