In [1]:
import pandas as pd
import numpy as np

## Собственная реализация KNN с полным перебором для классификации

In [29]:
from sklearn.base import BaseEstimator, ClassifierMixin

class MyKNN_Regressor(BaseEstimator, ClassifierMixin):
    def __init__(self, n_neighbors = 5, fit_intercept = True):
        self.fit_intercept = fit_intercept
        self.n_neighbors = n_neighbors
        
    def fit(self, X, y):
        self.data = [(x, y) for x, y in zip(X, y)]
        return self

    def predict(self, X):
        def gauss_core(distance, h = 1.0):
            return np.exp(-0.5 * ((distance / h) ** 2.0))
        def minkowski_distance(first, second, p = 2.0):
            return np.sum(abs(first - second) ** p) ** (1.0 / p)
        
        def make_decision(obj, neighbors):
            cores = np.array([neighbor[0] for neighbor in neighbors])
            targets = np.array([neighbor[1] for neighbor in neighbors])
            
            prediction = np.sum(cores*targets) / np.sum(cores)
            return prediction
            
        predictions = []
        for obj in X:
            nearest_neighbors = []
            for features, target in self.data:
                nearest_neighbors.append((gauss_core(minkowski_distance(obj, features)), target))
            k_neighbors = sorted(nearest_neighbors, key = lambda x: x[0])[-self.n_neighbors:]
            predictions.append(make_decision(obj, k_neighbors))
        return predictions
        

## Сравним качество работы алгоритмов

In [30]:
from sklearn.datasets import load_diabetes
from sklearn.preprocessing import StandardScaler
data = load_diabetes()
X = data.data
y = data.target

In [31]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

### Моя реализация

In [32]:
from sklearn.model_selection import cross_val_score
my_knn = MyKNN_Regressor()
print(f'RMSE on cross_validation, (number of folds is 7): {np.mean(np.sqrt(-cross_val_score(my_knn, X_scaled, y, cv = 7, scoring = 'neg_mean_squared_error')))}')

RMSE on cross_validation, (number of folds is 7): 60.39874438149396


### Готовая реализация в sklearn

In [33]:
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import cross_val_score
knn = KNeighborsRegressor(algorithm = 'brute')
print(f'RMSE on cross_validation, (number of folds is 7): {np.mean(np.sqrt(-cross_val_score(knn, X_scaled, y, cv = 7, scoring = 'neg_mean_squared_error')))}')

RMSE on cross_validation, (number of folds is 7): 60.678440946008536
