# Classification: K Nearest Neighbours

#### Classification with Iris Dataset

The KNN Classifier labels a data point by observing its distance relative to the points present in the existing dataset

In [4]:
import tensorflow as tf
import numpy as np
from sklearn import datasets

In [52]:
k = 3 # No. of clusters

#### load and preprocess dataset


In [48]:
iris = datasets.load_iris()
x = iris.data
y = iris.target

In [44]:
x.shape

(150, 4)

In [45]:
flower_labels = ['iris setosa', 'iris virginica', 'iris versicolor']


In [49]:
# one hot encoding if labels
y_unique = np.unique(y)
y  = np.eye(y_unique.size)[y]


# Normalize features

x_min = x.min(axis=0)
x = (x - x_min) / (x.max(axis=0) - x_min)

# Split dataset

split = .90
n_features = x.shape[0]

np.random.shuffle(x)
train_idx = np.random.choice(n_features, size=int(n_features * split), replace=False)
test_idx = np.array(list(set(range(n_features))- set(train_idx)))

x_train = x[train_idx]
x_test = x[test_idx]

y_train = y[train_idx]
y_test = y[test_idx]

In [51]:
y_test.shape

(15, 3)

In [63]:
def get_distances(x, y, x_test, k):
    """
        Gets the Manhattan distances between the data points
        |x1 - x2|
    """
    d0 = tf.expand_dims(x_test, axis=1)
    d1 = tf.abs(tf.subtract(x_train, d0))
    return tf.reduce_sum(input_tensor=d1, axis=2)


distances = predict(x_train, y_train, x_test, k)

In [97]:
def predict():
    """
        Predicts labels
    """
    
    # Get indices of the KNN
    values, k_indices = tf.nn.top_k(tf.negative(distances), k=k)

    # Get labels associated with top k indices
    k_labels = tf.gather(y_train, k_indices)
    print('k_lables: ', k_labels.shape)

    pred_sum = tf.reduce_sum(input_tensor=k_labels, axis=1)
    
    # Predict labels from the index of the maximum
    pred = tf.argmax(pred_sum, axis=1)
    
    return pred
    