In [2]:
import numpy as np


In [6]:
d, N = 10, 100 # dimension, number of training points
X = np.random.randn(N, d) # N d-dimensional points
z = np.random.randn(d)


In [8]:
X

array([[-6.46094880e-01,  4.98882964e-01,  4.78914291e-01,
         1.77304275e-01,  1.61646059e+00,  3.03064667e-01,
        -1.96227703e-01,  1.97325717e-01, -2.70947935e-01,
        -1.64918735e-04],
       [-2.49274661e+00,  5.53699106e-01, -1.22136926e+00,
         8.96535514e-01,  3.82936726e-01, -1.24918593e-01,
         3.14631082e-01, -1.38527593e+00,  3.61830912e-02,
         1.18946891e+00],
       [ 7.70619566e-01,  2.99903758e+00, -7.71395664e-01,
         1.05559730e+00, -9.08625147e-01, -4.86272577e-02,
        -1.45205099e+00, -5.02765303e-01, -2.59564360e-03,
         1.13094757e-01],
       [ 2.50738874e-01,  1.46019232e-01,  1.69054172e+00,
         9.19501501e-01,  4.79310185e-01,  5.14975184e-01,
        -7.18575394e-01, -1.57942360e+00,  5.07353190e-01,
        -2.62945501e-01],
       [ 7.91131828e-01, -9.57178207e-01,  5.60532724e-01,
        -1.37920065e+00, -6.53333555e-01, -1.75712927e+00,
        -6.18226107e-01, -1.19831874e+00, -1.29905752e+00,
         4.

In [9]:
z

array([-0.52140606,  0.39562151,  1.51351426,  0.33794569,  1.32655529,
       -0.05499047, -1.03693498,  0.12364097, -1.52864269,  0.97645629])

In [10]:
def dist_pp(z, x):
    d = z - x.reshape(z.shape) # force x and z to have the same dims
    return np.sum(d*d)

In [12]:
def dist_ps_naive(z, X):
    N = X.shape[0]
    res = np.zeros((1, N))
    for i in range(N):
        res[0][i] = dist_pp(z, X[i])
    return res


In [36]:
def dist_ps_fast(z, X):
    X2 = np.sum(X*X, 1) # square of l2 norm of each ROW of X
    z2 = np.sum(z*z) # square of l2 norm of z
    return X2 + z2 - 2*X.dot(z) # z2 can be ignored

In [48]:
from sklearn import neighbors, datasets
from sklearn.model_selection import train_test_split # for splitting data
from sklearn.metrics import accuracy_score # for evaluating results


In [56]:
np.random.seed(10)
iris = datasets.load_iris()
iris_X = iris.data
iris_y = iris.target
print('Labels:', np.unique(iris_y))
# split train and test
X_train, X_test, y_train, y_test = train_test_split(iris_X, iris_y, test_size=130)
print('Train size:', X_train.shape[0], ', test size:', X_test.shape[0])

Labels: [0 1 2]
Train size: 20 , test size: 130


In [57]:
model = neighbors.KNeighborsClassifier(n_neighbors = 1, p = 2)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print("Accuracy of 1NN: %.2f %%" %(100*accuracy_score(y_test, y_pred)))


Accuracy of 1NN: 96.92 %


In [58]:
model = neighbors.KNeighborsClassifier(n_neighbors = 7, p = 2)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print("Accuracy of 7NN: %.2f %%" %(100*accuracy_score(y_test, y_pred)))

Accuracy of 7NN: 90.77 %


In [59]:
model = neighbors.KNeighborsClassifier(n_neighbors = 7, p = 2, weights = 'distance')
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print("Accuracy of 7NN, distance: %.2f %%" %(100*accuracy_score(y_test, y_pred)))

Accuracy of 7NN, distance: 93.08 %
