In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report

X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [2]:
class KNNClassifier:
    def __init__(self, k=3):
        self.k = k

    def fit(self, X, y):
        self.X_train = X
        self.y_train = y

    def euclidean_distance(self, x1, x2):
        return np.sqrt(np.sum((x1 - x2) ** 2))

    def predict(self, X):
        y_pred = [self._predict(x) for x in X]
        return np.array(y_pred)

    def _predict(self, x):
        # calcula as distâncias entre x e todos os registros do conjunto de treinamento.
        distances = [self.euclidean_distance(x, x_train) for x_train in self.X_train]
        
        # ordena pela distância e retorna os índices referentes aos primeiros 'k' vizinhos.
        k_indices = np.argsort(distances)[:self.k]
        
        # extrai os rótulos dos k vizinhos mais próximos.
        k_nearest_labels = [self.y_train[i] for i in k_indices]
        
        # retorna o rótulo da classe mais frequente.
        most_common = np.bincount(k_nearest_labels).argmax()
        return most_common

In [3]:
knn = KNNClassifier()
knn.fit(X_train, y_train)

y_pred = knn.predict(X_test)
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))


[[ 8  0  0]
 [ 0 12  0]
 [ 0  1  9]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         8
           1       0.92      1.00      0.96        12
           2       1.00      0.90      0.95        10

    accuracy                           0.97        30
   macro avg       0.97      0.97      0.97        30
weighted avg       0.97      0.97      0.97        30

