## k-Nearest Neighbors Classification Algorithm

### Distance Metrics

In [1]:
import numpy as np

def euclidean_distance(x1, x2):
    return np.sqrt(np.sum((x1 - x2) ** 2))

def manhattan_distance(x1, x2):
    return np.sum(np.abs(x1 - x2))

def minkowski_distance(x1, x2, p=3):
    return np.sum(np.abs(x1 - x2) ** p) ** (1 / p)

def cosine_distance(x1, x2):
    dot_product = np.dot(x1, x2)
    norm_x1 = np.linalg.norm(x1)
    norm_x2 = np.linalg.norm(x2)
    return 1 - dot_product / (norm_x1 * norm_x2)

def chebyshev_distance(x1, x2):
    return np.max(np.abs(x1 - x2))

# Define two 3-dimensional data points
x1 = np.array([2, 4, 6])
x2 = np.array([3, 5, 7])

# Calculate distances using the defined functions
euclidean_dist = euclidean_distance(x1, x2)
manhattan_dist = manhattan_distance(x1, x2)
minkowski_dist = minkowski_distance(x1, x2, p=3)  # Using p=3 for Minkowski distance
cosine_dist = cosine_distance(x1, x2)
chebyshev_dist = chebyshev_distance(x1, x2)

# Print the results
print("Distances between points x1 and x2:")
print(f"Euclidean Distance: {euclidean_dist:.3f}")
print(f"Manhattan Distance: {manhattan_dist:.3f}")
print(f"Minkowski Distance (p=3): {minkowski_dist:.3f}")
print(f"Cosine Distance: {cosine_dist:.3f}")
print(f"Chebyshev Distance: {chebyshev_dist:.3f}")

Distances between points x1 and x2:
Euclidean Distance: 1.732
Manhattan Distance: 3.000
Minkowski Distance (p=3): 1.442
Cosine Distance: 0.003
Chebyshev Distance: 1.000


### Algorithm Implementation

In [2]:
from collections import Counter

def k_nearest_neighbors(training_data, test_data, k, metric='euclidean', p=3):
    distance_func = {
        'euclidean': euclidean_distance,
        'manhattan': manhattan_distance,
        'minkowski': lambda x1, x2: minkowski_distance(x1, x2, p),
        'cosine': cosine_distance,
        'chebyshev': chebyshev_distance
    }

    if metric not in distance_func:
        raise ValueError("Unsupported metric")

    predictions = []
    for test_point in test_data:
        distances = []
        for train_point in training_data:
            dist = distance_func[metric](test_point[:-1], train_point[:-1])
            distances.append((train_point, dist))
        distances.sort(key=lambda x: x[1])
        k_nearest = distances[:k]
        k_labels = [label[0][-1] for label in k_nearest]
        most_common = Counter(k_labels).most_common(1)
        predictions.append(most_common[0][0])
        
        print(f"Test Point: {test_point[:-1]}")
        print("Top k neighbors:")
        for neighbor in k_nearest:
            print(f"Neighbor: {neighbor[0][:-1]}, Distance: {neighbor[1]:.2f}, Class: {neighbor[0][-1]}")
        print(f"Predicted class: {most_common[0][0]}\n")
    
    return predictions

# Example usage
training_data = np.array([
    [1, 1, 1, 'Blue'],
    [1, 3, 2, 'Blue'],
    [4, 2, 3, 'Green'],
    [5, 3, 2, 'Green'],
    [6, 1, 1, 'Red']
], dtype=object)

test_data = np.array([
    [3, 2, 2, '?'],
    [2, 1, 3, '?']
], dtype=object)

# Set the parameters
k = 3
metric = 'euclidean'  # Can be 'euclidean', 'manhattan', 'minkowski', 'cosine', 'chebyshev'

# Classify the test data
results = k_nearest_neighbors(training_data, test_data, k, metric)

Test Point: [3 2 2]
Top k neighbors:
Neighbor: [4 2 3], Distance: 1.41, Class: Green
Neighbor: [1 3 2], Distance: 2.24, Class: Blue
Neighbor: [5 3 2], Distance: 2.24, Class: Green
Predicted class: Green

Test Point: [2 1 3]
Top k neighbors:
Neighbor: [1 1 1], Distance: 2.24, Class: Blue
Neighbor: [4 2 3], Distance: 2.24, Class: Green
Neighbor: [1 3 2], Distance: 2.45, Class: Blue
Predicted class: Blue

