In [1]:
import numpy as np
import matplotlib.pyplot as plt
from collections import Counter

def euclidean_distance(p,q):
  return np.sqrt(np.sum((np.array(p)-np.array(q))**2))

In [2]:
class KNN:
  def __init__(self,neighbors=3):
    self.k = neighbors
    self.points = None

  def fit(self,points):
    self.points = points

  def predict(self,new_point):
    distances = []

    for category in self.points:
      for point in self.points[category]:
        distance = euclidean_distance(point,new_point)
        distances.append([distance,category])

    categories = [category[1] for category in sorted(distances)[:self.k]]
    result = Counter(categories).most_common(1)[0][0]

    return result


In [3]:
clf = KNN()

In [4]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

iris = load_iris()

X = iris.data
y = iris.target
class_names = iris.target_names

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Converting Format
def convert_to_category_format(X, y, class_names):
    points = {}
    for class_name in class_names:
        points[class_name] = []

    for point, label in zip(X, y):
        class_name = class_names[label]
        points[class_name].append(point.tolist())

    return points

train_points = convert_to_category_format(X_train, y_train, class_names)

print("=== KNN IRIS CLASSIFIER ===")
print(f"Training samples: {len(X_train)}")
print(f"Test samples: {len(X_test)}")
print()

# Test different k values
for k in [1, 3, 5, 7]:
    knn = KNN(neighbors=k)
    knn.fit(train_points)

    # Predict
    predictions = [knn.predict(test_point.tolist()) for test_point in X_test]

    # Calculate accuracy
    correct = sum(1 for true, pred in zip(y_test, predictions) if class_names[true] == pred)

    accuracy = correct / len(y_test)

    print(f"K={k}: {accuracy:.3f} accuracy ({correct}/{len(y_test)})")

print("\nDone!")


=== KNN IRIS CLASSIFIER ===
Training samples: 105
Test samples: 45

K=1: 1.000 accuracy (45/45)
K=3: 1.000 accuracy (45/45)
K=5: 1.000 accuracy (45/45)
K=7: 1.000 accuracy (45/45)

Done!


# Harder Dataset


In [11]:
from sklearn.datasets import load_wine

wine = load_wine()
X = wine.data
y = wine.target
class_names = wine.target_names

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

train_points = convert_to_category_format(X_train, y_train, class_names)

print("=== KNN WINE CLASSIFIER ===")
print(f"Training samples: {len(X_train)}")
print(f"Test samples: {len(X_test)}")
print()

# Test different k values
for k in [1, 3, 5, 7,11,13,15,]:
    knn = KNN(neighbors=k)
    knn.fit(train_points)

    # Predict
    predictions = [knn.predict(test_point.tolist()) for test_point in X_test]

    # Calculate accuracy
    correct = sum(1 for true, pred in zip(y_test, predictions) if class_names[true] == pred)

    accuracy = correct / len(y_test)

    print(f"K={k}: {accuracy:.3f} accuracy ({correct}/{len(y_test)})")

print("\nDone!")


=== KNN WINE CLASSIFIER ===
Training samples: 124
Test samples: 54

K=1: 0.796 accuracy (43/54)
K=3: 0.741 accuracy (40/54)
K=5: 0.741 accuracy (40/54)
K=7: 0.741 accuracy (40/54)
K=11: 0.741 accuracy (40/54)
K=13: 0.759 accuracy (41/54)
K=15: 0.759 accuracy (41/54)

Done!


# ANOTHER DATASET

In [13]:
from sklearn.datasets import load_breast_cancer
cancer = load_breast_cancer()
X = cancer.data
y = cancer.target
class_names = cancer.target_names

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

train_points = convert_to_category_format(X_train, y_train, class_names)

print("=== KNN Breast Cancer CLASSIFIER ===")
print(f"Training samples: {len(X_train)}")
print(f"Test samples: {len(X_test)}")
print()

# Test different k values
for k in [1, 3, 5, 7,11,13,15,]:
    knn = KNN(neighbors=k)
    knn.fit(train_points)

    # Predict
    predictions = [knn.predict(test_point.tolist()) for test_point in X_test]

    # Calculate accuracy
    correct = sum(1 for true, pred in zip(y_test, predictions) if class_names[true] == pred)

    accuracy = correct / len(y_test)

    print(f"K={k}: {accuracy:.3f} accuracy ({correct}/{len(y_test)})")

print("\nDone!")


=== KNN Breast Cancer CLASSIFIER ===
Training samples: 398
Test samples: 171

K=1: 0.936 accuracy (160/171)
K=3: 0.942 accuracy (161/171)
K=5: 0.959 accuracy (164/171)
K=7: 0.965 accuracy (165/171)
K=11: 0.977 accuracy (167/171)
K=13: 0.965 accuracy (165/171)
K=15: 0.965 accuracy (165/171)

Done!
