# Mini Project 7: KNN with NumPy
### 1. Simple nearest neighbor:
#### - Generate a list of n-dimensional points, call this the train set, and a test point.
#### - Calculate the distance between the test point and each point in the train set.
#### - Determine the closest point in the train set.
### 2. Labelling
#### - In part 1, assign a label to each point in the train set. Choose a suitable number of labels.
#### - Return the label of the closest point, rather than the closest point.
### 3. K nearest neighbors:
#### - Determine the K nearest points. 
#### - Find out the majority label.
#### - Run for different values of K


In [None]:
import numpy as np

In [15]:
# Part 1: Finding the Nearest Neighbor

def generate_data(n, dim, num_labels):
    train_data = np.random.rand(n, dim) * 10  
    labels = np.random.randint(0, num_labels, n)  
    return train_data, labels

def euclidean_distance(p1, p2):
    return np.sqrt(np.sum((p1 - p2) ** 2))

def find_nearest_neighbor(train_data, test_point):
    distances = [euclidean_distance(p, test_point) for p in train_data]
    return np.argmin(distances)

In [17]:
# Create data
n, dim, num_labels = 10, 2, 3  # 10 points, 2D space, 3 labels
train_data, labels = generate_data(n, dim, num_labels)
test_point = np.random.rand(dim) * 10  # Random test point

nearest_index = find_nearest_neighbor(train_data, test_point)
print("Nearest neighbor is at index:", nearest_index)
print("Nearest neighbor coordinates:", train_data[nearest_index])

Nearest neighbor is at index: 5
Nearest neighbor coordinates: [0.34743066 8.33607922]


In [19]:
# Part 2: Getting the Label

def get_label(nearest_index, labels):
    return labels[nearest_index]

nearest_label = get_label(nearest_index, labels)
print("Label of the nearest neighbor:", nearest_label)

Label of the nearest neighbor: 2


In [21]:
# Part 3: K-Nearest Neighbors

def find_k_nearest_neighbors(train_data, test_point, k):
    distances = [euclidean_distance(p, test_point) for p in train_data]
    return np.argsort(distances)[:k]

def majority_label(nearest_indices, labels):
    nearest_labels = labels[nearest_indices]
    values, counts = np.unique(nearest_labels, return_counts=True)
    return values[np.argmax(counts)]


In [23]:
# Check different K values
for k in [1, 3, 5]:
    k_nearest_indices = find_k_nearest_neighbors(train_data, test_point, k)
    k_nearest_label = majority_label(k_nearest_indices, labels)
    print("For K=", k, "most common label:", k_nearest_label)


For K= 1 most common label: 2
For K= 3 most common label: 2
For K= 5 most common label: 2
