In [1]:
#Importing relevant libraries

import numpy as np
from sklearn import datasets

In [2]:
#Loading the iris datset

iris = datasets.load_iris()
X = iris.data  
y = iris.target

In [3]:
#Printing the dataset's shape 

print('X shape:',X.shape)
print('y shape:',y.shape)

X shape: (150, 4)
y shape: (150,)


In [4]:
#Reshaping y to avoid shape inconsistencies later on

y=np.reshape(y,[150,1])
print('y shape:',y.shape)

y shape: (150, 1)


In [5]:
#Combining the dataset so that the labels do not get messed up, shuffling

Xy=np.concatenate((X,y),axis=1)
np.random.shuffle(Xy)

In [6]:
#Splitting to get the shuffled dataset back

(X,y)=np.split(Xy,[4],axis=1)

In [7]:
#Looking at the first few examples

print('First 5 examples in X:\n',X[0:5,:])
print('\nFirst 5 labels in y:\n',y[0:5])

First 5 examples in X:
 [[5.2 3.5 1.5 0.2]
 [6.9 3.1 5.1 2.3]
 [4.8 3.4 1.9 0.2]
 [5.1 3.8 1.6 0.2]
 [5.  3.4 1.5 0.2]]

First 5 labels in y:
 [[0.]
 [2.]
 [0.]
 [0.]
 [0.]]


In [8]:
#Choosing sizes of training and test sets

num_train=120
num_test=30

In [9]:
#Splitting the dataset into training and test sets

X_train=X[0:num_train,:]
y_train=y[0:num_train,:]

X_test=X[num_train:num_train+num_test,:]
y_test=y[num_train:num_train+num_test,:]

In [10]:
#Calculating the distance matrix between the training and test sets for X

dists = np.reshape(np.sum(X_test**2, axis=1), [num_test,1]) + np.sum(X_train**2, axis=1) - 2 * np.matmul(X_test, X_train.T)
dists = np.sqrt(dists)

In [11]:
#Choosing a value for k

k=5

In [12]:
#Generating predictions on our test set

y_pred = np.zeros((num_test,1))
for i in range(num_test):
    closest_y = []
    closest_y = list(y_train[np.argsort(dists[i,:])[:k]]) 
    y_pred[i] = max(closest_y, key = closest_y.count)

In [13]:
#Comparing some of our predictions to the actual values

for i in range(10):
    print('Prediction:%d Actual value:%d\n' % (y_pred[i],y_test[i]))

Prediction:0 Actual value:0

Prediction:0 Actual value:0

Prediction:2 Actual value:2

Prediction:1 Actual value:1

Prediction:1 Actual value:2

Prediction:0 Actual value:0

Prediction:1 Actual value:1

Prediction:2 Actual value:2

Prediction:1 Actual value:1

Prediction:2 Actual value:2



In [14]:
#Calculating the accuracy of our K Nearest Neighbour classifier

num_correct = np.sum(y_pred == y_test)
accuracy = float(num_correct) / num_test
print ('Accuracy:%d%%' % (accuracy*100))

Accuracy:96%
