In [136]:
import numpy as np
from sklearn.datasets import load_boston
import pandas as pd

In [8]:
X, y = load_boston(return_X_y=True)

In [None]:
np.linalg.norm()

In [40]:
from sklearn.model_selection import train_test_split

In [46]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 42)

In [177]:
class KNeighborsRegressionML:
    def __init__(self, k=5):
        self.k = k
        self.X_train = None
        self.y_train = None
        
    def fit(self, X, y):
        self.X_train = X
        self.y_train = y
    
    def predict(self, Xnew):
        #for every new observation, measure the distance to each known point
        all_dists = []
        
        for x in Xnew:
            dist = np.apply_along_axis(lambda x_tmp: np.linalg.norm(x_tmp - x), 1, self.X_train)
            all_dists.append(dist)
        all_dists = np.array(all_dists)
        
        nearest_k = []
        for i in range(len(all_dists)):
            top_k = y_train[all_dists[i].argsort()][:self.k]
            nearest_k.append(top_k)
        
        preds = [np.mean(votes) for votes in nearest_k]
        
        return(preds)
    
    #     Scoring
    def score(self, X, y):
        n = len(y)
        y_pred = self.predict(X)
        
        y_bar = np.mean(y)
        total_sum_square = np.sum((y - y_bar)**2)
        resid_sum_square = np.sum((y - y_pred)**2)
        
        r_square = 1 - (resid_sum_square / total_sum_square)
        return r_square

In [178]:
knr = KNeighborsRegressionML()

In [179]:
knr.fit(X_train, y_train)

In [180]:
pred = knr.predict(X_test)

In [188]:
%%timeit
knr.score(X_test, y_test)

411 ms ± 9.18 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [182]:
from sklearn.neighbors import KNeighborsRegressor

In [183]:
skl_knn = KNeighborsRegressor(n_neighbors=5)

In [184]:
skl_knn.fit(X_train, y_train)

KNeighborsRegressor(algorithm='auto', leaf_size=30, metric='minkowski',
          metric_params=None, n_jobs=1, n_neighbors=5, p=2,
          weights='uniform')

In [187]:
%%timeit
skl_knn.score(X_test, y_test)

969 µs ± 29.6 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
