### Регрессия Надарая-Ватсона

In [80]:
import numpy as np
from sklearn.datasets import load_boston
from sklearn.base import BaseEstimator
from scipy.spatial.distance import cdist
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_absolute_error

In [81]:
X, y = load_boston(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.5, shuffle=True)

In [82]:
def K(distance,h=1):
     ret = np.array(distance) / h
     return np.exp(-ret ** 2)

In [83]:
class NV(BaseEstimator):
    def __init__(self, h=1):
        self.X, self.Y = None, None
        self.h=h
        self.kernel = lambda x: K(x, self.h)

    def predict(self, X):
        features = np.sum(self.Y * self.kernel(cdist(X, self.X)), axis=-1)
        return features / (np.sum(self.kernel(cdist(X, self.X)), axis=-1) + 1e-10)

    def fit(self, X, Y):
        self.X, self.Y = np.array(X), np.array(Y)

In [84]:
model = NV(h=100)
model.fit(X_train, y_train)
mean_absolute_error(model.predict(X_test), y_test)

5.541783504716953

In [85]:
grid_search = GridSearchCV(model,{'h': list(range(10,1000))}, scoring='r2')
grid_search.fit(X_train,y_train)
grid_search.best_params_

{'h': 21}

In [86]:
mean_absolute_error(grid_search.predict(X_test), y_test)

4.751955316647246