# KNN

In [10]:
import numpy as np
from collections import Counter

from sklearn.metrics import accuracy_score, mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris, load_diabetes

## 1. From Scratch

### KNN Classifier

In [3]:
class KNNClassifier:
    def __init__(self, k = 3, distance_metric = 'euclidean'):
        self.k = k
        self.distance_metric = distance_metric
        self.X_train = None
        self.y_train = None
	
    def fit(self, X, y):
        self.X_train = X
        self.y_train = y
        return self
	
    def _calculate_distance(self, x1, x2):
        if self.distance_metric == 'euclidean':
            return np.sqrt(np.sum((x1 - x2) ** 2))
        elif self.distance_metric == 'manhattan':
            return np.sum(np.abs(x1 - x2))
        elif self.distance_metric == 'minkowski':
            p = 3
            return np.power(np.sum(np.abs(x1 - x2) ** p), 1/p)
        else:
            raise ValueError("Error")
	
    def _predict_single(self, x):
        distances = [self._calculate_distance(x, x_train) for x_train in self.X_train]
        #Get k-nearest neightbors
        k_indices = np.argsort(distances)[:self.k]
        k_nearest_labels = [self.y_train[i] for i in k_indices]
        most_common = Counter(k_nearest_labels).most_common(1)
        return most_common[0][0]
	
    def predict(self, X):
        return np.array([self._predict_single(x) for x in X])
	
    def score(self, X, y):
        y_pred = self.predict(X)
        return accuracy_score(y, y_pred)

In [6]:
iris = load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

knn = KNNClassifier(k = 5, distance_metric = 'euclidean')
knn.fit(X_train, y_train)

y_pred = knn.predict(X_test)
accuracy = knn.score(X_test, y_test)
print(f"Accuracy: {accuracy:.4f}")

Accuracy: 1.0000


### KNN Regressor

In [7]:
class KNNRegressor:
    def __init__(self, k):
        self.k = k
        self.X_train = None
        self.y_train = None

    def fit(self, X, y):
        self.X_train = X
        self.y_train = y
        return self

    def predict(self, X):
        y_pred = []
        for x in X:
            distances = [np.sqrt(np.sum((x - x_train)**2)) for x_train in self.X_train]
            #Get k-nearest neightbors
            k_indices = np.argsort(distances)[:self.k]
            k_nearest_values = [self.y_train[i] for i in k_indices]
            pred = np.mean(k_nearest_values)
            y_pred.append(pred)
        return np.array(y_pred)

In [None]:
diabetes = load_diabetes()
X, y = diabetes.data, diabetes.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

knn_reg = KNNRegressor(k=3)
knn_reg.fit(X_train, y_train)
y_pred = knn_reg.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"Mean Squared Error: {mse:.4f}")
print(f"R-squared: {r2:.4f}")

Mean Squared Error: 3364.3933
R-squared: 0.3650


## 2. Scikit-Learn

In [14]:
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from sklearn.datasets import load_iris, load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, mean_squared_error, r2_score

# KNN Classifier --------------------------------------------------------------------------------
iris = load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)

print("KNN Classifier")
print("- Classification Report")
print(classification_report(y_test, y_pred, target_names=iris.target_names))
print("- Confusion Matrix")
print(confusion_matrix(y_test, y_pred))

# KNN Regressor --------------------------------------------------------------------------------
diabetes = load_diabetes() # Thay đổi ở đây
X, y = diabetes.data, diabetes.target # Thay đổi ở đây
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

knn_reg = KNeighborsRegressor(n_neighbors=3)
knn_reg.fit(X_train, y_train)
y_pred = knn_reg.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print("\n\nKNN Regressor")
print(f"Mean Squared Error: {mse:.4f}")
print(f"R-squared: {r2:.4f}")

KNN Classifier
- Classification Report
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        10
  versicolor       1.00      1.00      1.00         9
   virginica       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

- Confusion Matrix
[[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]


KNN Regressor
Mean Squared Error: 3364.3933
R-squared: 0.3650


****