<a href="https://colab.research.google.com/github/ishanjabade/BML_ISHAN/blob/main/Experiment_7_KNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Experiment 7: Build the K-Nearest Neighbours model on suitable dataset.**

**Dataset Used: Iris Dataset**

Step 1: Load and prepare the dataset.

In [2]:
import pandas as pd

file_path = '/content/IRIS.csv'

column_names = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'class']

df = pd.read_csv(file_path, header = None, names = column_names)

print(df.head())

from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
df['class'] = le.fit_transform(df['class'])
df

x = df[['sepal_length','sepal_width','petal_length','petal_width']]
y = df['class']

from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

print("Training size:", x_train.shape)
print("Testing size:", x_test.shape)

   sepal_length  sepal_width  petal_length  petal_width        class
0  sepal_length  sepal_width  petal_length  petal_width      species
1           5.1          3.5           1.4          0.2  Iris-setosa
2           4.9            3           1.4          0.2  Iris-setosa
3           4.7          3.2           1.3          0.2  Iris-setosa
4           4.6          3.1           1.5          0.2  Iris-setosa
Training size: (120, 4)
Testing size: (31, 4)


**Step 2: Manual Implementation**

In [7]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score

# ----------------------------------------------------------
# 1. Load dataset
# ----------------------------------------------------------
df = pd.read_csv("heart.csv")

# ----------------------------------------------------------
# 2. Convert categorical columns â†’ numeric
# ----------------------------------------------------------
le = LabelEncoder()
for col in df.columns:
    if df[col].dtype == 'object':
        df[col] = le.fit_transform(df[col])

# ----------------------------------------------------------
# 3. Split features & target
# ----------------------------------------------------------
X = df.drop("target", axis=1)
y = df["target"]

# ----------------------------------------------------------
# 4. Train-test split
# ----------------------------------------------------------
x_train, x_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# ----------------------------------------------------------
# 5. Manual KNN Function
# ----------------------------------------------------------
def knn_predict(x_train, y_train, X_test, k):
    y_pred = []

    for test_sample in X_test.values:
        # distance calculation
        distances = np.sqrt(np.sum((x_train.values - test_sample)**2, axis=1))

        # find k smallest distances
        k_neighbors = np.argsort(distances)[:k]

        # get labels of k neighbors
        k_labels = [y_train.iloc[i] for i in k_neighbors]

        # majority vote
        y_pred.append(max(set(k_labels), key=k_labels.count))

    return np.array(y_pred)

# ----------------------------------------------------------
# 6. Predict (Manual KNN)
# ----------------------------------------------------------
k = 5
y_pred_manual = knn_predict(x_train, y_train, x_test, k)

# Manual accuracy
accuracy_manual = np.mean(y_pred_manual == y_test.values)
print("Manual KNN Accuracy:", accuracy_manual)

# ----------------------------------------------------------
# 7. Sklearn KNN (for comparison)
# ----------------------------------------------------------
from sklearn.neighbors import KNeighborsClassifier

model = KNeighborsClassifier(n_neighbors=5)
model.fit(x_train, y_train)
y_pred_sklearn = model.predict(x_test)

print("Sklearn KNN Accuracy:", accuracy_score(y_test, y_pred_sklearn))


Manual KNN Accuracy: 0.7317073170731707
Sklearn KNN Accuracy: 0.7317073170731707


**Step 3: Built-in method**

In [8]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Initialize KNN model
model = KNeighborsClassifier(n_neighbors=20)

# Fit the model
model.fit(x_train, y_train)

# Predictions
y_pred = model.predict(x_test)

# Evaluation
print("Testing Score:", model.score(x_test, y_test))
print("Training Score:", model.score(x_train, y_train))
print("Accuracy Score:", accuracy_score(y_test, y_pred))


Testing Score: 0.7170731707317073
Training Score: 0.7682926829268293
Accuracy Score: 0.7170731707317073
