In [1]:
from sklearn.datasets import load_iris, fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_val_score
import pandas as pd
import numpy as np
from scipy import stats 

In [11]:
X, y = load_iris(return_X_y=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, stratify=y, random_state=42)

In [None]:
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)

In [12]:
neighbors = list(range(1, 50, 2))

accuracy = []

for k in neighbors:
    knn = KNeighborsClassifier(n_neighbors=k)
    score = cross_val_score(knn, X_train, y_train, cv=10, scoring='accuracy')
    accuracy.append((k, score.mean()))

pd.DataFrame(accuracy, columns=['k', 'accuracy']).to_csv('cv.csv', index=False)

In [7]:
X, y = fetch_california_housing(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)

knn_r = KNeighborsRegressor(n_neighbors=5)
knn_r.fit(X_train, y_train)
y_pred = knn_r.predict(X_test)

print(f'R2: {r2_score(y_test, y_pred):.2f}, błąd średniokwadratowy: {mean_squared_error(y_test, y_pred):.2f}')

R2: 0.15, błąd średniokwadratowy: 1.15


In [5]:
class KNNClassifier:
    def __init__(self, n_neighbors):
        self.n_neighbors=n_neighbors
    
    def fit(self, X_train, y_train):
        self.X_train = X_train
        self.y_train = y_train
       
    def predict(self, X_test):
        
        y_pred = [] 
        
        for _, test_row in enumerate(X_test):  
            
            distances = []
            
            for train_index, train_row in enumerate(self.X_train):
                distance = np.sqrt(np.sum(np.square(test_row - train_row)))
                distances.append((distance, self.y_train[train_index]))
            
            # sortowanie listy rosnąco
            distances = sorted(distances, key=lambda x: x[0])
            neighbors = [label for _, label in distances[:self.n_neighbors]]
            y_pred.append(stats.mode(neighbors, keepdims=False).mode)
            
        return y_pred

In [6]:
N_NEIGHBORS = 13
        
knn = KNNClassifier(n_neighbors=N_NEIGHBORS)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)

knn_sklearn = KNeighborsClassifier(n_neighbors=N_NEIGHBORS)
knn_sklearn.fit(X_train, y_train)
y_pred_sklearn = knn_sklearn.predict(X_test)

print(f"Dokładność naszego k-NN to {accuracy_score(y_test, y_pred)*100:.1f}%") 
print(f"Dokładność k-NN z sci-kit learn to {accuracy_score(y_test, y_pred_sklearn)*100:.1f}%") 

Dokładność naszego k-NN to 94.7%
Dokładność k-NN z sci-kit learn to 94.7%


In [3]:
class KNNRegressor:
    def __init__(self, n_neighbors):
        self.n_neighbors=n_neighbors
    
    def fit(self, X_train, y_train):
        self.X_train = X_train
        self.y_train = y_train
       
    def predict(self, X_test):
        
        y_pred = []
        
        for test_index, test_row in enumerate(X_test):  
            
            distances = []
            
            for train_index, train_row in enumerate(self.X_train):
                distance = np.sqrt(np.sum(np.square(test_row - train_row)))
                distances.append((distance, self.y_train[train_index]))
                
            distances = sorted(distances, key=lambda x: x[0])
            neighbors = [target for distance, target in distances[:self.n_neighbors]]
            y_pred.append(np.round(np.mean(neighbors), 4))
            
        return y_pred
    