### Import Packages

In [152]:
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

### Import Datasets

In [119]:
iris = datasets.load_iris() # Classification
wine = datasets.load_wine() # Classification
breast_cancer = datasets.load_breast_cancer() # Classification
boston = datasets.load_boston() # Regression
diabetes = datasets.load_diabetes() # Regression

### Create distance functions

In [118]:
def minkowski_dist(x_1, x_2, p):
    return np.sum(np.abs(x_1 - x_2) ** p) ** (1 / p)

def taxicab_dist(x_1, x_2):
    return minkowski_dist(x_1, x_2, 1)

def euclidian_dist(x_1, x_2):
    return minkowski_dist(x_1, x_2, 2)

def chebychev_dist(x_1, x_2):
    return np.max(np.abs(x_1 - x_2))

def canberra_dist(x_1, x_2):
    return np.sum(np.abs(x_1 - x_2) / (np.abs(x_1) + np.abs(x_2)))

### Find Nearest Neighbors

In [117]:
def get_kNN_index(X, x_pred, k, distance_fn):
    dists = np.apply_along_axis(distance_fn, axis = 1, arr = X, x_2 = x_pred)
    return np.argpartition(dists, k)[:k]

### Create Prediction Functions

In [116]:
def predict_mode(y):
    return np.bincount(y).argmax()

def predict_mean(y):
    return np.mean(y)

### Predict Response

In [115]:
def predict(y, idx, predict_fn):
    return predict_fn(y[idx])

### Return Predictions

In [144]:
def get_preds(train_X, train_y, test_X, k, distance_fn, prediction_fn):
    return np.array([predict(train_y,
                   get_kNN_index(train_X, test_X[i,:], k, distance_fn),
                   prediction_fn) for i in range(test_X.shape[0])])     

In [184]:
train_X, test_X, train_y, test_y = train_test_split(breast_cancer['data'], breast_cancer['target'],
                                                    test_size = .3, random_state = 0)

scaler_X = StandardScaler().fit(train_X)
train_X = scaler_X.transform(train_X)
test_X = scaler_X.transform(test_X)


np.mean(get_preds(train_X, train_y, test_X, 5, euclidian_dist, predict_mean) == test_y)
# np.sqrt(np.mean((get_preds(train_X, train_y, test_X, 5, taxicab_dist, predict_mean) - test_y) ** 2))


0.7953216374269005