In [None]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier as knc
from sklearn import metrics
import numpy as np

In [None]:
# Load dataset from sklearn.
iris = datasets.load_iris()

In [None]:
# Data in x and y below represented as np arrays
x = iris.data
y = iris.target

In [None]:
# x is the data/features of the dataset - all sepal and petal measurements in cm.
# Represented here as a numpy array of arrays.
x

In [None]:
# y is the labels - iris species.
# Represented here as a numpy array: 0 = setosa; 1 = versicolor; 2 = virginica.
y

In [None]:
# The dataset is split to provide a portion to train the model and a portion to test the model.
# 80% training and 20% testing (determined to be marginally more accurate than a 70/30 split - see section 5.4).
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2)

In [None]:
# Use K Nearest Neighbour and set it to examine 13 closest neighbours (determined to be most accurate in testing - see section 5.4)
knn = knc(n_neighbors = 13)

In [None]:
# Pass the x and y training values through the KNN fit function to train the model
# This can then be used to evaluate the test data
knn.fit(x_train, y_train)

In [None]:
# Testing the model with the test data
test_prediction = knn.predict(x_test)
test_prediction # predictions listed as array representation                

In [None]:
# Check the accuracy of the above test predictions.
metrics.accuracy_score(y_test, test_prediction)

In [None]:
# Create new data array to further test the model. 
new_data = np.array([[4, 3, 2, 1], [6, 3, 4, 1], [7, 4, 6, 2]]) # numpy array of 4 measurements for 3 hypothetical iris plants

In [None]:
# Apply the new data to the prediction function
new_data_prediction = knn.predict(new_data) 
new_data_prediction # returns array representation 

In [None]:
# To see species names in the prediction instead of the array
new_data_prediction = iris.target_names[knn.predict(new_data)]
new_data_prediction