Name: Jeremy Marcus Tan <br>
ID No: 204947 <br>
Date: August 21, 2024

In [2]:
import pandas as pd
import sklearn

## Weighted KNN
- We assign weights to each of the nearest neighbors, where the weight is the reciprocal of the distance.
- The algorithm classifies the test data point based on the class with the greater weight.
- This differs from regular KNN, where all nearest neighbors are given equal weight.

In [3]:
# Importing the Data
df = pd.read_csv("Case Study 1.csv")

In [4]:
df

Unnamed: 0,x1,x2,x3,y,Distance Function
0,0.2,0.48,0.46,0,0.374166
1,0.45,0.04,0.09,0,0.286705
2,0.08,0.41,0.17,0,0.339706
3,0.02,0.05,0.04,0,0.482183
4,0.02,0.35,0.28,0,0.391535
5,0.29,0.21,0.35,0,0.20664
6,0.43,0.27,0.08,0,0.127279
7,0.03,0.49,0.5,0,0.512835
8,0.32,0.18,0.08,0,0.187617
9,0.05,0.03,0.23,0,0.443058


In [52]:
from sklearn.neighbors import KNeighborsClassifier

# Define the train and test sets
# The first twenty rows of the dataset are the sample points (training set) while the
# 21st row is the data point we are going to classify (test set).
X_train = df.iloc[0:21, 0:3]
X_test = df.iloc[21, 0:3]
Y_train = df.iloc[0:21, 3]
Y_test = df.iloc[21, 3]

# We consider the k = 3 nearest neighbors.
# The weights will be the inverse of the distance (Weighted KNN)
knn = KNeighborsClassifier(n_neighbors = 3, weights = 'distance')

knn.fit(X_train, Y_train)

# We use the kneighbors function to get the K-neighbors of the test point
distances, indices = knn.kneighbors(X_test.values.reshape(1,-1))

weights = (1/distances)

labels = []
for n in range(len(indices[0])):
    labels.append(df.iloc[n, 3])
    
print(f"Test point: {X_test.to_dict()}")
print(f"Nearest neighbors' indices: {indices[0]}")
print(f"Distances: {distances[0]}")
print(f"Weights: {weights[0]}")

# We use weighted sums to classify the test point into either 0 or 1.

wsum0 = 0 # weighted sum of class 0  
wsum1 = 0 # weighted sum of class 1  
  
for i, w in enumerate(weights[0]): 
    if labels[i] == 0: 
        wsum0 += w
          
    if labels[i] == 1:  
        wsum1 += w

print(f'The weighted sum of class 0 is {wsum0}')
print(f'The weighted sum of class 1 is {wsum1}')
if wsum0 > wsum1:
    print("The label for the test point is 0.")
else:
    print("The label for the test point is 1.")


Test point: {'x1': 0.4, 'x2': 0.3, 'x3': 0.2}
Nearest neighbors' indices: [6 8 5]
Distances: [0.12727922 0.18761663 0.20663978]
Weights: [7.85674201 5.33001791 4.83933918]
The weighted sum of class 0 is 18.026099107032394
The weighted sum of class 1 is 0
The label for the test point is 0.


