In [None]:
!gdown 1ZdhRqYv-JizWV6DxO6C4R_k1kxPhmlF2

Downloading...
From: https://drive.google.com/uc?id=1ZdhRqYv-JizWV6DxO6C4R_k1kxPhmlF2
To: /content/multiclass.csv
  0% 0.00/14.6k [00:00<?, ?B/s]100% 14.6k/14.6k [00:00<00:00, 43.4MB/s]


**Step 1:** Import Libraries & Load Data

In [None]:
import pandas as pd
import numpy as np
from collections import Counter
from sklearn.model_selection import train_test_split

# Load dataset (already uploaded in Colab)
df = pd.read_csv("multiclass.csv")

print("First 5 rows:")
print(df.head())

First 5 rows:
   Region  Fresh  Milk  Grocery  Frozen  Detergents_Paper  Delicassen  class
0       3  12669  9656     7561     214              2674        1338      2
1       3   7057  9810     9568    1762              3293        1776      2
2       3   6353  8808     7684    2405              3516        7844      2
3       3  13265  1196     4221    6404               507        1788      1
4       3  22615  5410     7198    3915              1777        5185      1


**Step 2:** Preprocessing (Split Features & Labels)

In [None]:
# Separate features and target
X = df.drop('class', axis=1).values   # all columns except "class"
y = df['class'].values                # target column

# Train-test split (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

print("Training set size:", X_train.shape)
print("Test set size:", X_test.shape)

Training set size: (352, 7)
Test set size: (88, 7)


**Step 3:** Define Distance Function

In [None]:
# Euclidean distance between two points
def euclidean_distance(x1, x2):
    return np.sqrt(np.sum((x1 - x2)**2))

**Step 4:** Build KNN Classifier (from Scratch)

In [None]:
class KNN:
    def __init__(self, k=3):
        self.k = k   # number of neighbors

    def fit(self, X, y):
        # Just store the training data (lazy learner)
        self.X_train = X
        self.y_train = y

    def predict(self, X):
        # Predict for each test point
        predictions = [self._predict(x) for x in X]
        return np.array(predictions)

    def _predict(self, x):
        # Step 1: Compute distance from x to all training samples
        distances = [euclidean_distance(x, x_train) for x_train in self.X_train]

        # Step 2: Sort by distance and get indices of k nearest neighbors
        k_indices = np.argsort(distances)[:self.k]

        # Step 3: Collect the labels of the k nearest neighbors
        k_nearest_labels = [self.y_train[i] for i in k_indices]

        # Step 4: Majority vote → most common class
        most_common = Counter(k_nearest_labels).most_common(1)
        return most_common[0][0]

**Step 5:** Train and Test Scratch KNN

In [None]:
# Create model
knn_scratch = KNN(k=3)

# Fit training data
knn_scratch.fit(X_train, y_train)

# Predict test data
predictions = knn_scratch.predict(X_test)

# Evaluate accuracy
accuracy = np.mean(predictions == y_test)
print("Scratch KNN Accuracy:", round(accuracy, 2))

Scratch KNN Accuracy: 0.92
