### KNN - Linear Algebra with NumPy

$v$ = target vector

$X$ = data points matrix with the vectors as row vectors (not column)

$y$ = the predicted labeled output

Mathematically this implementation can be represented as:

$D = X_m - v^T$ | Difference matrix, subtract vector v from every row (vector) $m \in X$

The squared length will be at every pivot position in the matrix after multiplying accordingly:

$P = A^TA$ 

$l_n \in P : l_n = \sqrt{P_{n,n}}$

Now we have the length of all vectors $v \in D$ the rest will be done by using argsort and take the majority vote with np functions. 

Note that the matrix we get in the function is already transposed so what we are doing in the function below is essentially: $A^{T}(A^T)^{T} = A^{T}A$


In [33]:
import pandas as pd
import numpy as np
from statistics import mode 

df = pd.read_csv('../datasets/iris_train.csv')
df_test = pd.read_csv('../datasets/iris_test.csv')
targets = ['Iris-virginica', 'Iris-setosa', 'Iris-versicolor']

for i in range (len(targets)):
    df.loc[df['class'] == targets[i], 'class'] = i
    df_test.loc[df_test['class'] == targets[i], 'class'] = i

data = df.values[:,:4]
y = df.values[:,4]
point = df_test.values[10, :4]
data_test = df_test.values[:, :4]
y_test = df_test.values[:, 4]



In [34]:
def knn_numpy(k, X, y, v):
    global targets
    
    d_vectors = X - v
    vector_length_list = np.array(list(map(lambda v : (v @ v) ** (1/2), d_vectors)))
    
    sorted_by_indices = np.argsort(vector_length_list)
    closest_vectors_indices = sorted_by_indices[:k]

    majority_list = y[closest_vectors_indices]
    prediction = mode(majority_list)
    return targets[prediction]

knn_numpy(10, data, y, point)

'Iris-setosa'

In [31]:
def knn_linear_algebra(k, X, y, v):
    global targets
    
    difference_vectors = X - v
    
    #Length of vectors n will be in (n,n), i.e in the pivots/diagonal
    vector_length_matrix = (difference_vectors @ np.transpose(difference_vectors)) ** (1 / 2)
    
    vector_length_list = np.diagonal(vector_length_matrix)
    sorted_length_indices = np.argsort(vector_length_list)
    closest_vectors = sorted_length_indices[:k]
    
    majority_list = y[closest_vectors]
    prediction = mode(majority_list)
    
    return targets[prediction]


knn_linear_algebra(13, data, y, point)

'Iris-setosa'