In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import pandas as pd
import numpy as np
import math
import operator

# Read the CSV file
data = pd.read_csv("/content/drive/MyDrive/ML LAB/LAB-7/kmeans_1BM20CS059.csv")

# Print the first few rows of the dataset
print(data.head())

# Define a function that calculates the Euclidean distance between two data points
def euclideanDistance(data1, data2, length):
    distance = 0
    for x in range(length):
        distance += np.square(data1[x] - data2[x])
    return np.sqrt(distance)

# Define our KNN model
def knn(trainingSet, testSet, k):
    distances = {}
    length = len(testSet)

    # Calculating Euclidean distance between each row of training data and test data
    for x in range(len(trainingSet)):
        dist = euclideanDistance(testSet, trainingSet.iloc[x], length)
        distances[x] = dist

    # Sorting them based on distance
    sorted_d = sorted(distances.items(), key=operator.itemgetter(1))

    neighbors = []

    # Extracting top k neighbors
    for x in range(k):
        neighbors.append(sorted_d[x][0])

    classVotes = {}

    # Determining the most frequent class in the neighbors array
    for x in range(len(neighbors)):
        response = trainingSet.iloc[neighbors[x]]['cluster']

        if response in classVotes:
            classVotes[response] += 1
        else:
            classVotes[response] = 1

    sortedVotes = sorted(classVotes.items(), key=operator.itemgetter(1), reverse=True)
    return sortedVotes[0][0], neighbors

# Define the test set
testSet = [24.412, 32]

# Make predictions
print('\n\nWith 5 Nearest Neighbors\n')
k = 5
result, neighbors = knn(data, testSet, k)
print('Predicted class:', result)
print('Nearest Neighbors:', neighbors)


   ID       x       y  cluster
0   0  24.412  32.932        2
1   1  35.190  12.189        1
2   2  26.288  41.718        2
3   3   0.376  15.506        0
4   4  26.116   3.963        1


With 5 Nearest Neighbors

Predicted class: 2.0
Nearest Neighbors: [27, 29, 30, 19, 25]


In [4]:
# Define the actual classes
actualClasses = data['cluster']

# Calculate the predicted classes using the KNN model
predictedClasses = []

for idx in range(len(data)):
    testSet = [data.iloc[idx]['x'], data.iloc[idx]['y']]
    result, _ = knn(data, testSet, k)
    predictedClasses.append(result)

# Compute accuracy
accuracy = (predictedClasses == actualClasses).sum() / len(data) * 100

# Compute precision, recall, and F1-score
from sklearn.metrics import classification_report

print('Accuracy:', accuracy)
print(classification_report(actualClasses, predictedClasses))


Accuracy: 28.333333333333332
              precision    recall  f1-score   support

           0       0.04      0.07      0.05        15
           1       0.00      0.00      0.00        22
           2       0.55      0.70      0.62        23

    accuracy                           0.28        60
   macro avg       0.20      0.25      0.22        60
weighted avg       0.22      0.28      0.25        60

