# K-Nearest Neighbors algorithm
This algorithm is used for classification and regression.

The input consists of the k closest training examples of a dataset. The output depends on whether the algorithm is used for classification or regression.
- Classification: the output is a class membership. An object is classified by a plurality vote of its neighbors, with the object being assigned to the class most common among its k nearest neighbors.
- Regression: the output is the property value for the object. The value is the average of the values of the k nearest neighbors.

In [None]:
!pip install ucimlrepo

Collecting ucimlrepo
  Downloading ucimlrepo-0.0.3-py3-none-any.whl (7.0 kB)
Installing collected packages: ucimlrepo
Successfully installed ucimlrepo-0.0.3


In [None]:
#@title Imports
from ucimlrepo import fetch_ucirepo
import sklearn
from sklearn.utils import shuffle
from sklearn.neighbors import KNeighborsClassifier
import pandas as pd
import numpy as np
from sklearn import linear_model, preprocessing

## Dataset
The dataset comes from https://archive.ics.uci.edu/dataset/19/car+evaluation.
The Car Evaluation Database contains examples with the structural information removed, i.e., directly relates CAR to the six input attributes:
- buying - buying price - vhigh, high, med, low
- maint - price for maintenance - vhigh, high, med, low
- doors - number of doors - 2, 3, 4, 5more
- persons - capacity in terms of persons to carry - 2, 4, more
- lug_boot - size of luggage boot - small, med, big
- safety - estimated safety of the car - low, med high

The labels we use to categorize the cars:
- class - evaulation level - unacc (unacceptable), acc (acceptable), good, vgood (very good)

In [None]:
#@title Fetch dataset
car_evaluation = fetch_ucirepo(id=19)

# data (as pandas dataframes)
X = car_evaluation.data.features
y = car_evaluation.data.targets
data = X.join(y)

# This function will help us resolve the problem arising from the dataset not being numerical
le = preprocessing.LabelEncoder()

# these are numpy arrays, where the values have been transformed from text to integers
buying = le.fit_transform(list(X['buying']))
maint = le.fit_transform(list(X['maint']))
doors = le.fit_transform(list(X['doors']))
persons = le.fit_transform(list(X['persons']))
lug_boot = le.fit_transform(list(X['lug_boot']))
safety = le.fit_transform(list(X['safety']))
class_ = le.fit_transform(list(y['class']))

pred = 'class'

X = list(zip(buying, maint, doors, persons, lug_boot, safety))
y = list(class_)

x_train, x_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, test_size=0.1)
print(len(x_train), len(y_train))

1555 1555


##Implementing the KNN algorithm using Sklearn

In [None]:

# n_neighbors should be an odd number
model = KNeighborsClassifier(n_neighbors=7)

model.fit(x_train, y_train)
acc = model.score(x_test, y_test)
print(acc)

predicted = model.predict(x_test)
names = ["unacc", "acc", "good", "vgood"]

for x in range(10):
  print("Predicted: ", names[predicted[x]], "\tData: ", x_test[x], "\tActual: " , names[y_test[x]])

0.9132947976878613
Predicted:  good 	Data:  (3, 0, 2, 1, 0, 2) 	Actual:  good
Predicted:  acc 	Data:  (2, 1, 3, 1, 1, 2) 	Actual:  acc
Predicted:  good 	Data:  (1, 0, 1, 2, 2, 1) 	Actual:  good
Predicted:  good 	Data:  (2, 3, 3, 0, 2, 1) 	Actual:  good
Predicted:  unacc 	Data:  (0, 2, 1, 1, 2, 0) 	Actual:  unacc
Predicted:  good 	Data:  (3, 3, 2, 2, 1, 0) 	Actual:  good
Predicted:  vgood 	Data:  (1, 1, 3, 1, 1, 0) 	Actual:  vgood
Predicted:  unacc 	Data:  (3, 1, 1, 1, 0, 0) 	Actual:  unacc
Predicted:  good 	Data:  (2, 1, 1, 0, 0, 0) 	Actual:  good
Predicted:  good 	Data:  (3, 0, 3, 1, 0, 0) 	Actual:  good


## Implementing the KNN algorithm using Tensorflow

In [None]:
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt

In [None]:
#@title Define KNNModel class

class KNNModel(keras.models.Model):
  def __init__(self, k):
    super(KNNModel, self).__init__()
    self.k = k

  # define call function to perform prediction based on the k-nearest neighbor
  def call(self, x):
    # calculate L2 (Euclidean) distance
    dists = tf.norm(x - self.train_data, axis=1)

    # find indices of k smallest distances
    idxs = tf.math.top_k(-dists, k=self.k).indices  # top_k looks for maximum, that's why -dists

    # gather labels corresponding to the indices
    k_nearest_labels = tf.gather(self.train_labels, idxs)

    # make prediction
    unique_labels, counts = np.unique(k_nearest_labels, return_counts=True)
    predicted_label = unique_labels[np.argmax(counts)]
    return predicted_label



In [None]:
X_np = np.array(X)
y_np = np.array(y)

x_train, x_test, y_train, y_test = sklearn.model_selection.train_test_split(X_np, y_np, train_size=0.9)

knn_model = KNNModel(k=11)
knn_model.train_data = x_train
knn_model.train_labels = y_train

In [None]:
correct_predictions = 0
for i in range(len(x_test)):
    prediction = knn_model(tf.convert_to_tensor(x_test[i], dtype=tf.float32))
    if prediction == y_test[i]:
        correct_predictions += 1

accuracy = correct_predictions / len(x_test)
print("Accuracy:", accuracy)

# Print some predictions
names = ["unacc", "acc  ", "good ", "vgood"]
for x in range(20):
    prediction = knn_model(tf.convert_to_tensor(x_test[x], dtype=tf.float32))
    print("Predicted:", names[prediction],  "\tData:", x_test[x], "\tActual:", names[y_test[x]])



Accuracy: 0.930635838150289
Predicted: good  	Data: [2 2 1 1 0 1] 	Actual: good 
Predicted: unacc 	Data: [2 1 2 1 1 2] 	Actual: acc  
Predicted: good  	Data: [3 1 2 0 1 1] 	Actual: good 
Predicted: unacc 	Data: [2 3 3 1 0 0] 	Actual: unacc
Predicted: good  	Data: [2 3 0 0 2 1] 	Actual: good 
Predicted: good  	Data: [1 2 3 0 0 0] 	Actual: good 
Predicted: unacc 	Data: [2 2 1 1 1 2] 	Actual: unacc
Predicted: good  	Data: [0 3 2 2 0 1] 	Actual: good 
Predicted: good  	Data: [1 2 2 2 1 1] 	Actual: good 
Predicted: good  	Data: [0 1 2 0 1 1] 	Actual: good 
Predicted: good  	Data: [3 2 1 0 1 2] 	Actual: good 
Predicted: unacc 	Data: [2 0 0 2 0 2] 	Actual: unacc
Predicted: good  	Data: [2 2 0 1 2 1] 	Actual: good 
Predicted: good  	Data: [1 2 1 2 1 1] 	Actual: good 
Predicted: unacc 	Data: [0 1 3 1 0 0] 	Actual: unacc
Predicted: good  	Data: [1 2 3 2 0 1] 	Actual: good 
Predicted: good  	Data: [0 1 3 0 2 0] 	Actual: good 
Predicted: unacc 	Data: [1 3 3 1 1 0] 	Actual: unacc
Predicted: unacc 	