Euclidean Distance Function

In [1]:
def euclidean_distance(point1, point2):
    return sum((p1 - p2) ** 2 for p1, p2 in zip(point1, point2)) ** 0.5

KNN Classifier Class

In [2]:
class KNNClassifier:
    def __init__(self, k=3):
        self.k = k
        self.X_train = []
        self.y_train = []

    def fit(self, X, y):
        self.X_train = X
        self.y_train = y

    def predict(self, X):
        predictions = [self._predict(x) for x in X]
        return predictions

    def _predict(self, x):
        # Compute distances between x and all examples in the training set
        distances = [euclidean_distance(x, x_train) for x_train in self.X_train]

        # Get the k nearest neighbors
        k_indices = sorted(range(len(distances)), key=lambda i: distances[i])[:self.k]
        k_nearest_labels = [self.y_train[i] for i in k_indices]

        # Tie-breaking: Choose the label of the nearest neighbor among the tied labels
        label_counts = {}
        for label in k_nearest_labels:
            if label not in label_counts:
                label_counts[label] = 0
            label_counts[label] += 1
        max_count = max(label_counts.values())
        tied_labels = [label for label, count in label_counts.items() if count == max_count]
        
        # Choose the label of the nearest neighbor among the tied labels
        for i in k_indices:
            if self.y_train[i] in tied_labels:
                return self.y_train[i]

Example Usage

In [3]:
# Example training data
X_train = [[0, 0], [1, 1], [1, 0], [0, 1]]
y_train = [0, 1, 1, 0]

# Example test data
X_test = [[0.5, 0.5], [1.5, 1.5]]

# Create and train the classifier
classifier = KNNClassifier(k=3)
classifier.fit(X_train, y_train)

# Make predictions
predictions = classifier.predict(X_test)
print("Predictions:", predictions)

Predictions: [1, 1]
