In [None]:
# Importing libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import threading
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix ,classification_report
import warnings
warnings.filterwarnings('ignore')
import sys

In [None]:
data = {
    'x': [2, 4, 4, 4, 6, 6], 
    'y': [4, 2, 4, 6, 2, 4],
    'label':["orange", "orange", "blue", "orange", "blue", "orange"]
} 
df = pd.DataFrame.from_dict(data) 
label = "label"

In [None]:
df.head()

Unnamed: 0,x,y,label
0,2,4,orange
1,4,2,orange
2,4,4,blue
3,4,6,orange
4,6,2,blue


In [None]:
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
x,6.0,4.333333,1.505545,2.0,4.0,4.0,5.5,6.0
y,6.0,3.666667,1.505545,2.0,2.5,4.0,4.0,6.0


In [None]:
df['label'].value_counts()

orange    4
blue      2
Name: label, dtype: int64

In [None]:
df.shape

(6, 3)

In [None]:
# Function to validate the provided data to k-NN algorithm
def check_data(df, k, input_val, weight, metric):
    if len(input_val) != df.shape[1]-1:
        print("Provided input points are invalid")
        return False
    if k > len(df):
        print("k value cannot be greater than size of the dataset")
        return False
    if weight not in ['distance', 'uniform']:
        print("Param weight takes values distance or uniform")
        return False
    if metric not in ['manhattan','euclidean']:
        print("Param metric takes values manhattan or euclidean")
        return False
    return True

In [None]:
# Function to calculate euclidean distance between two points
def euclidean_distance(point_a, point_b):
    '''
    params: {point_a, point_b}
    - point_a, point_b : { coordinates of a point in the dataset }
    - type : list
    
    - returns: {distance}
    - distance : { Euclidean distance between point_a and point_b }
    - type: float
    '''
    distance = 0
    for i in range(len(point_a)):
        distance = distance + (point_a[i]-point_b[i])**2
    return distance**0.5

In [None]:
# Function to calculate manhattan distance between two points
def manhattan_distance(point_a, point_b):
    '''
    params: {point_a, point_b}
    - point_a, point_b : { coordinates of a point in the dataset }
    - type : list
    
    - returns: {distance}
    - distance : { Manhattan distance between point_a and point_b }
    - type: float
    '''
    distance = 0
    for i in range(len(point_a)):
        distance = distance + abs(point_a[i]-point_b[i])
    return distance

In [None]:
# Method to predict class based on the weighted frequency of nearest neighbors
def weighted_prediction(nearest_neighbors):
    '''
    params: {nearest_neighbors}
    - nearest_neighbors : { distance and class of k points from the dataset, nearest to input value }
    - type : list - [(distance, class),(distance, class), ...]
    
    - returns: {prediction}
    - prediction : { Output label i.e blue or orange }
    - type: string
    '''
    label_frequency = {}
    # Calculate weight for each label
    for distance, label in nearest_neighbors:
        if int(distance) == 0:
            label_frequency[label] = sys.maxsize
            break
        if label in label_frequency:
            label_frequency[label] += 1/distance
        else: label_frequency[label] = 1/distance
    # Return label having maximum weight
    return max(label_frequency, key=label_frequency.get)

In [None]:
def chooseK(arr):
    print("Size of array :",arr.shape[0])
    k=round(math.sqrt(arr.shape[0]))
    if(k%2==0):
      
        k=k+1;
    #k should be odd so that classfication can be done properly(No chance of 50%-50% classification)
    print("Choosen value of K : ",k)
    return k;

In [None]:
# Method to implement k nearest neighbors algorithm
def KNearestNeighbors(X, y, k, input_val, weight, metric):
    '''
    params: {X, y, k, input_val, weight, metric}
    - X : { Input data }
    - type : list 
    
    - y : {Ouput label}
    - type : list
    
    - k : { number of neighbors }
    - type : int
    
    - input_val : { value for which output label is to be determined }
    - type : list - [x-coordinate, y-coordinate]
    
    - weight : { type of k-NN }
    - type : string
    
    - metric : { method for calculating distance }
    - type : string
    
    - returns: {prediction}
    - prediction : { Output label i.e blue or orange }
    - type: string
    '''
    # Calculate distance depending on the metrics
    distances = []
    if metric == 'manhattan':
        for i in range(len(X)):
            distances.append((manhattan_distance(X[i], input_val),y[i]))
    else:
        for i in range(len(X)):
            distances.append((euclidean_distance(X[i], input_val),y[i]))
    # Sort points according to the calculated distance 
    distances.sort(key=lambda distance: distance[0])
    if weight == 'distance': # Perform prediction using distance as weight
        nearest_neighbors = distances[:k].copy()
        prediction = weighted_prediction(nearest_neighbors)
    else: # Perform prediction based on frequency of ouput label in nearest neighbors(uniform)
        nearest_neighbors = distances[:k].copy()
        if int(distances[0][0]) == 0:
            prediction = distances[0][1]
            return prediction
        neighbor_classes = [label[1] for label in nearest_neighbors]
        prediction = max(set(neighbor_classes), key=neighbor_classes.count)
    return prediction            

In [None]:
k = 3
metric = 'euclidean'
weight = 'uniform'
input_val = [4,4]
X=None
y=None
if check_data(df, k, input_val, weight, metric):
    X = df.drop(columns=[label]).values.tolist()
    y = df[label].values.tolist()
    print("Predicted value: ",KNearestNeighbors(X, y, k, input_val, weight, metric))

Predicted value:  blue


In [None]:
# Function to take input values from the user
def predict(X,y):
    '''
    params: {X, y, k, input_val, weight, metric}
    - X : { Input data }
    - type : list 
    
    - y : {Ouput label}
    - type : list
    
    - returns: {prediction}
    - prediction : { Output label i.e blue or orange }
    - type: string
    '''
    x_c = float(input('Enter x-coordinate: '))
    y_c = float(input('Enter y-coordinate: '))
    
    k = 3
        
    '''flag = True
    while flag:
        k = int(input('Enter k value: '))
        if k<3 or k>len(X):
            print("Please select provide a valid k value!")
        else:
            flag = False '''
        
    flag = True
    while flag:
        weight_inp = int(input('For distance weighted K-NN enter 1 otherwise enter 0: '))
        if weight_inp not in [0,1]:
            print("Please select provide a valid input!")
        else:
            if weight_inp == 1: weight = 'distance'
            else: weight = 'uniform'
            flag = False

    flag = True
    while flag:
        metric_inp = int(input('Metric - For manhatten distance enter 0 for euclidean distance enter 1: '))
        if metric_inp not in [0,1]:
            print("Please select provide a valid input!")
        else:
            if metric_inp == 1: metric = 'manhattan'
            else: metric = 'euclidean'
            flag = False
    print("\nPredicted value: ",KNearestNeighbors(X, y,  k, [x_c,y_c], weight=weight, metric=metric))
    return {'k':k, 'input_val': [x_c,y_c], 'weight':weight, 'metric':metric}

In [None]:
input_dict = predict(X, y)

Enter x-coordinate: 5
Enter y-coordinate: 3
For distance weighted K-NN enter 1 otherwise enter 0: 1
Metric - For manhatten distance enter 0 for euclidean distance enter 1: 0

Predicted value:  blue
