In [0]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets
import tensorflow as tf

In [0]:
# load data
iris = datasets.load_iris()
x_vals = np.array([x[0:4] for x in iris.data])
y_vals = np.array(iris.target)

In [0]:
# creat a one-hot encoding for the lables since it is a categorical class
# read more about one-hot encoding here https://machinelearningmastery.com/why-one-hot-encode-data-in-machine-learning/
y_vals = np.eye(len(set(y_vals)))[y_vals]

In [0]:
# normalize training data
# note 0 represents the axis (sort of like vertical). ptp gives the range
x_vals = (x_vals - x_vals.min(0)) / x_vals.ptp(0)

In [0]:
# train-test split. 42 is a conventional seed number. 
# note replace indicates sampling w/ or w/o replacements
np.random.seed(42)
train_indices = np.random.choice(len(x_vals), round(len(x_vals) * 0.8), replace=False)
test_indices =np.array(list(set(range(len(x_vals))) - set(train_indices)))

x_vals_train = x_vals[train_indices]
x_vals_test = x_vals[test_indices]
y_vals_train = y_vals[train_indices]
y_vals_test = y_vals[test_indices]

In [0]:
feature_number = len(x_vals_train[0])
class_number = len(y_vals[0])

# how many neighbors we want to consider
k = 5

In [0]:
# tensorflow construction phase. Read more about how it works here: https://www.tensorflow.org/guide/graphs

x_data_train = tf.placeholder(shape=[None, feature_number], dtype=tf.float32)
y_data_train = tf.placeholder(shape=[None, class_number], dtype=tf.float32)
x_data_test = tf.placeholder(shape=[None, feature_number], dtype=tf.float32)

# using manhattan distance here
distance = tf.reduce_sum(tf.abs(tf.subtract(x_data_train, tf.expand_dims(x_data_test, 1))), axis=2)

# nearest k points
# tf.nn.top_k(): to get biggest k values and indices
# tf.negative(): to make the values negative
# tf.gather(): to extract values relevant to the specific indices
# tf.reduce_sum(): to get sums of elements
# tf.argmax(): to get max value's index
  
_, top_k_indices = tf.nn.top_k(tf.negative(distance), k=k)
top_k_label = tf.gather(y_data_train, top_k_indices)

# now we have a tensor with sahpe [k, class_number], so we want to know 
# which class has the most "votes" by the nearest neighbors by looking at the second axis
sum_up_predictions = tf.reduce_sum(top_k_label, axis=1)
prediction = tf.argmax(sum_up_predictions, axis=1)

In [0]:
sess = tf.Session()
prediction_outcome = sess.run(prediction, feed_dict={x_data_train: x_vals_train,
                               x_data_test: x_vals_test,
                               y_data_train: y_vals_train})

# evaluation
accuracy = 0
for pred, actual in zip(prediction_outcome, y_vals_test):
    if pred == np.argmax(actual):
        accuracy += 1

print(accuracy / len(prediction_outcome))

0.9666666666666667


In [0]:
# try to make improvements!