# **K Nearest Neighbours - Practical Implementation**

In [1]:
# importing libraries
import numpy as np
import pandas as pd
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [2]:
# loading dataset
df = sns.load_dataset('iris')
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


**Label Encoding**

In [3]:
df['species'].unique()

array(['setosa', 'versicolor', 'virginica'], dtype=object)

In [4]:
df['species'] = df['species'].map({'setosa': 0, 'versicolor': 1, 'virginica': 2})
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


**Train Test Split**

In [6]:
# importing library
from sklearn.model_selection import train_test_split

In [7]:
# splitting
X = df.drop('species', axis=1)
y = df[['species']]

In [33]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

**Standardizing**

In [9]:
# importing library
from sklearn.preprocessing import StandardScaler

In [34]:
# standardizing
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

## **K Nearest Neighbour Implementation**

In [13]:
# importing k nearest neighbour
from sklearn.neighbors import KNeighborsClassifier

# importing GridSearchCV
from sklearn.model_selection import GridSearchCV

**Training**

In [26]:
knn = KNeighborsClassifier()

In [35]:
parameters = {
    'n_neighbors': [1, 2, 3, 4, 5, 6, 7, 8, 9],
    'metric': ['euclidean', 'manhattan', 'minkowski']
}
knn_cv = GridSearchCV(knn, param_grid=parameters, scoring='accuracy', cv=5)
knn_cv.fit(X_train, y_train)

In [36]:
# best parameters
knn_cv.best_params_

{'metric': 'euclidean', 'n_neighbors': 3}

In [37]:
# best score
knn_cv.best_score_

0.9428571428571428

**Testing**

In [20]:
# importing metrics
from sklearn.metrics import accuracy_score, classification_report

In [38]:
knn_cv_pred = knn_cv.predict(X_test)

In [39]:
# accuracy score
accuracy_score(knn_cv_pred, y_test)

1.0

In [40]:
# classification report
print(classification_report(knn_cv_pred, y_test))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       1.00      1.00      1.00        13
           2       1.00      1.00      1.00        13

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00      1.00        45



In [43]:
y_test.value_counts()

species
0          19
1          13
2          13
dtype: int64