---
# **K-Nearest Neighbours Algorithm**
---

# Importing the needed libraries

In [1]:
import pandas as pd
import math

# Function to calculate Euclidean distance between two points, which are represented as lists, rounded to three decimal-points

In [2]:
def euclidean_distance(point1, point2):
    distance = 0
    for i in range(len(point1)):
        distance += (float(point1[i]) - float(point2[i])) ** 2
    return round(math.sqrt(distance),3)

# KNN Function

In [3]:
def knn(train_points, test_point, k):
    table = [] # Output table of KNN
    for row in train_points:
        dist = euclidean_distance(row[:-1], test_point) # -1 specifies that we exclude the target column
        table.append((row[:-1], test_point, dist, row[-1]))
    # Append the train point, test point, distance and class of train point to each row of the table
    table.sort(key=lambda x: x[2])  # Sort the table in ascending order of distances

    class_count = {} # To count each class

    # Display the table
    print("=========================K-Nearest Points=========================")
    print("Point\t\tTest Point\t\tDistance\tClass")
    for i in range(k):
        print(f"{table[i][0]}\t{table[i][1]}\t{table[i][2]}\t\t{table[i][3]}")
        label = table[i][3]  # Last column is the target
        if label in class_count: # If class is already present, increment its count
            class_count[label] += 1
        else:
            class_count[label] = 1
    print("==================================================================")

    # Displaying the class count
    print("======================")
    for class_name, count in class_count.items():
        print(f"Class: {class_name} | Count: {count}")
    print("======================")

    # Most occuring class among the K nearest points, is the predicted target
    predicted_label = max(class_count, key=class_count.get)
    return predicted_label

# Reading the Dataset

In [4]:
df = pd.read_csv("https://raw.githubusercontent.com/harrishragavendar/Datasets/main/diabetes.csv")

# Selection of features

In [5]:
print("======Available features======")
for index, feature in enumerate(df.columns[:-1]):
    print(f"{index} -> {feature}")
print("==============================")
feature_indices = input("Enter selected features indices: ").split(',')

0 -> Pregnancies
1 -> Glucose
2 -> BloodPressure
3 -> SkinThickness
4 -> Insulin
5 -> BMI
6 -> DiabetesPedigreeFunction
7 -> Age
Enter selected features indices: 0,1,2


# Retrieving the column names from indices

In [6]:
selected_features = [df.columns[int(index)] for index in feature_indices]
print(f"Selected features are {selected_features}")

Selected features are ['Pregnancies', 'Glucose', 'BloodPressure']


# Also, select the target column, for counting classes at the end
# This will not be used for training

In [7]:
selected_features.append(df.columns[-1])

# Extract the selected columns, with all rows from the dataframe and convert each row to a list of values, ultimately a list of points [ [ ],[ ],[ ],...]

In [8]:
train_points = df.loc[:, selected_features].values.tolist()

In [9]:
while(True):
    print("========MENU========")
    print("1 > Test new point")
    print("2 > Exit")
    print("====================")
    ch = int(input("Enter your choice: "))

    if(ch==1):
        # Get the test point as input
        test_point = []
        for feature in selected_features[:-1]:
            value = float(input(f"Enter value for {feature}: "))
            test_point.append(value)
        print(f"Test point: {test_point}")

        k = int(input("Enter the value of K: "))
        predicted_target = knn(train_points, test_point, k)
        print(f"\nPredicted Target: {predicted_target}")
    elif(ch==2):
        break
    else:
        print("Enter a valid choice.")

1 > Test new point
2 > Exit
Enter your choice: 1
Enter value for Pregnancies: 3
Enter value for Glucose: 124
Enter value for BloodPressure: 76
Test point: [3.0, 124.0, 76.0]
Enter the value of K: 5
Point		Test Point		Distance	Class
[2, 122, 76]	[3.0, 124.0, 76.0]	2.236		0
[5, 124, 74]	[3.0, 124.0, 76.0]	2.828		1
[3, 122, 78]	[3.0, 124.0, 76.0]	2.828		0
[1, 124, 74]	[3.0, 124.0, 76.0]	2.828		0
[5, 123, 74]	[3.0, 124.0, 76.0]	3.0		0
Class: 0 | Count: 4
Class: 1 | Count: 1

Predicted Target: 0
1 > Test new point
2 > Exit
Enter your choice: 2


---