In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsRegressor

# KNN Regressor

In [36]:
class KNN:
    def __init__(self, k, p, weight="uniform"):
        self.k = k
        self.p = p
        self.weight = weight
        
        
    def fit(self, X, y):
        self.X = X
        self.y = y
    
    def predict(self, X_test):
        X1 = self.X.reshape(*self.X.shape,1)
        X_test1 = X_test.T.reshape(1,X_test.shape[1],X_test.shape[0])
        distance = ((np.abs(X_test1 - X1))**self.p).sum(axis=1)**(1/self.p)
        sıra_no = np.argsort(distance, axis=0)
        
        if self.weight == "uniform":
            return self.y[sıra_no][:self.k,:].mean(axis=0)  

        elif self.weight == "distance":
            d2 = distance.copy()
            d2.sort(axis=0)
            return (1/d2[:self.k,:] * self.y[sıra_no][:self.k,:]).sum(axis=0) / (1/d2[:self.k,:]).sum(axis=0) # ağırlıklara göre
            
        else:
            raise ValueError("Geçersiz ağırlık seçimi")

In [33]:
model1 = KNN(3,1)
model1.fit(X, y_train)
model1.predict(X_test) # aşağıdaki elle çözüme ulaşmış olduk

array([ 0.14164375,  0.32495674,  0.4133476 , -0.3815586 ])

In [34]:
model2 = KNN(3,1,weight="distance")
model2.fit(X, y_train)
model2.predict(X_test) # aşağıdaki elle çözüme ulaşmış olduk

array([-0.12817393,  0.29421101,  0.35976981, -0.4116225 ])

# Example

In [35]:
%%time
X2 = np.random.randn(900,15)
y2 = np.random.randn(900 = KNN(4,2,weight="distance")
model1.fit(X2, y2)
_= model1.predict(X2)

SyntaxError: invalid syntax (<unknown>, line 3)

In [None]:
%%time
model3 = KNeighborsRegressor(n_neighbors=4,weights="distance",algorithm="brute")
model3.fit(X2, y2)
_= model3.predict(X2)

# matris brodcast metodu

In [7]:
a = np.array([[1,2,3]]).reshape(3,1)
a

array([[1],
       [2],
       [3]])

In [8]:
b = np.array([[2,4,6]]).reshape(1,3)
b

array([[2, 4, 6]])

In [9]:
a-b

array([[-1, -3, -5],
       [ 0, -2, -4],
       [ 1, -1, -3]])

# Kenar Çözüm (yukarıya taşıyacağımız şekli)

In [10]:
X = np.random.randn(9,2)

In [11]:
X_test = np.random.randn(4,2)

In [12]:
X1 = X.reshape(9,2,1)

In [13]:
X_test1 = X_test.T.reshape(1,2,4)

In [14]:
y_train = np.random.randn(9)

In [15]:
(X_test1 - X1).shape

(9, 2, 4)

In [16]:
(X_test1 - X1)[4,:,2]

array([-2.14048727,  0.75478711])

In [17]:
X_test[2] - X[4]

array([-2.14048727,  0.75478711])

In [18]:
((X_test1 - X1)**2).sum(axis=1)**0.5

array([[2.63039677, 0.81699737, 2.55113485, 2.09985312],
       [2.24864189, 1.07654873, 2.53838501, 1.27647288],
       [1.96527614, 0.59389414, 2.09618679, 1.45790535],
       [1.1208196 , 1.07965443, 1.60034198, 1.06102347],
       [1.78067428, 1.20538862, 2.26966723, 0.77898519],
       [1.00014375, 1.76779118, 1.90382541, 0.77585302],
       [0.29969117, 1.62376014, 0.98152211, 1.68577245],
       [3.69821189, 2.22304948, 3.90328462, 2.43986658],
       [1.25063112, 0.72808983, 1.05734346, 1.85719483]])

In [19]:
# minkowski metriği çalıma mantığı
# p = 2 öklid mesafesi
# p = 1 manhattan mesafesi
p = 1 
distance = ((np.abs(X_test1 - X1))**p).sum(axis=1)**(1/p)
distance

array([[3.67944028, 0.96291702, 3.02353382, 2.4696546 ],
       [2.84233958, 1.5172622 , 2.84715773, 1.63255389],
       [2.67681939, 0.83895417, 2.16884969, 1.46703371],
       [1.41696379, 1.29955947, 2.1314269 , 1.48614227],
       [2.04160662, 1.56537885, 2.89527438, 0.83182093],
       [1.27116582, 2.09627129, 2.69217962, 0.92538955],
       [0.42049539, 2.29602787, 1.36797402, 2.24959516],
       [4.65519114, 2.47496765, 3.99928468, 3.44540546],
       [1.7277195 , 0.98880376, 1.07181304, 2.57489608]])

In [20]:
sıra_no = np.argsort(distance, axis=0) # index lere göre sıralama yapıyor.

In [21]:
y_train[sıra_no]

array([[-0.46868298, -0.26435916,  0.66486031, -0.7300654 ],
       [-0.15025125,  0.57436907, -0.46868298, -0.15025125],
       [ 1.04386547,  0.66486031,  1.04386547, -0.26435916],
       [ 0.66486031,  1.04386547, -0.26435916,  1.04386547],
       [-0.7300654 , -1.80460881, -0.15025125, -1.80460881],
       [-0.26435916, -0.7300654 , -1.80460881, -0.46868298],
       [-1.80460881, -0.15025125, -0.7300654 ,  0.57436907],
       [ 0.57436907, -0.46868298,  0.57436907,  0.66486031],
       [-1.23003569, -1.23003569, -1.23003569, -1.23003569]])

In [22]:
X_df = pd.DataFrame(X)
X_df["y_train"] = y_train

In [23]:
X_df

Unnamed: 0,0,1,y_train
0,1.061172,1.194306,0.574369
1,1.081972,0.336405,-1.804609
2,0.660058,0.592799,-0.264359
3,0.011419,-0.018418,1.043865
4,0.705664,-0.08802,-0.730065
5,-0.10656,-0.69715,-0.150251
6,-0.868542,-0.134925,-0.468683
7,2.467251,0.763978,-1.230036
8,-0.37758,0.681337,0.66486


In [24]:
sıra_no

array([[6, 2, 8, 4],
       [5, 0, 6, 5],
       [3, 8, 3, 2],
       [8, 3, 2, 3],
       [4, 1, 5, 1],
       [2, 4, 1, 6],
       [1, 5, 4, 0],
       [0, 6, 0, 8],
       [7, 7, 7, 7]], dtype=int64)

In [25]:
y_train[sıra_no]

array([[-0.46868298, -0.26435916,  0.66486031, -0.7300654 ],
       [-0.15025125,  0.57436907, -0.46868298, -0.15025125],
       [ 1.04386547,  0.66486031,  1.04386547, -0.26435916],
       [ 0.66486031,  1.04386547, -0.26435916,  1.04386547],
       [-0.7300654 , -1.80460881, -0.15025125, -1.80460881],
       [-0.26435916, -0.7300654 , -1.80460881, -0.46868298],
       [-1.80460881, -0.15025125, -0.7300654 ,  0.57436907],
       [ 0.57436907, -0.46868298,  0.57436907,  0.66486031],
       [-1.23003569, -1.23003569, -1.23003569, -1.23003569]])

In [26]:
pd.DataFrame(distance)

Unnamed: 0,0,1,2,3
0,3.67944,0.962917,3.023534,2.469655
1,2.84234,1.517262,2.847158,1.632554
2,2.676819,0.838954,2.16885,1.467034
3,1.416964,1.299559,2.131427,1.486142
4,2.041607,1.565379,2.895274,0.831821
5,1.271166,2.096271,2.69218,0.92539
6,0.420495,2.296028,1.367974,2.249595
7,4.655191,2.474968,3.999285,3.445405
8,1.72772,0.988804,1.071813,2.574896


In [27]:
k=3
y_train[sıra_no][:k,:].mean(axis=0)

array([ 0.14164375,  0.32495674,  0.4133476 , -0.3815586 ])

In [28]:
d2 = distance.copy()
d2.sort(axis=0)
d2[:k,:]

array([[0.42049539, 0.83895417, 1.07181304, 0.83182093],
       [1.27116582, 0.96291702, 1.36797402, 0.92538955],
       [1.41696379, 0.98880376, 2.1314269 , 1.46703371]])

In [29]:
y_train[sıra_no][:k,:]

array([[-0.46868298, -0.26435916,  0.66486031, -0.7300654 ],
       [-0.15025125,  0.57436907, -0.46868298, -0.15025125],
       [ 1.04386547,  0.66486031,  1.04386547, -0.26435916]])

In [30]:
(1/d2[:k,:] * y_train[sıra_no][:k,:]).sum(axis=0) / (1/d2[:k,:]).sum(axis=0) # ağırlıklara göre mesafeler

array([-0.12817393,  0.29421101,  0.35976981, -0.4116225 ])

# KNN ile sağlaması

In [31]:
model = KNeighborsRegressor(n_neighbors=3, p=1)
model.fit(X,y_train)
model.predict(X_test)

array([ 0.14164375,  0.32495674,  0.4133476 , -0.3815586 ])

In [32]:
model = KNeighborsRegressor(n_neighbors=3, p=1, weights="distance")
model.fit(X,y_train)
model.predict(X_test)

array([-0.12817393,  0.29421101,  0.35976981, -0.4116225 ])