Q1. Write a Python code to implement the KNN classifier algorithm on load_iris dataset in
sklearn.datasets.<Br>
Q2. Write a Python code to implement the KNN regressor algorithm on load_boston dataset in
sklearn.datasets.<Br>
Q3. Write a Python code snippet to find the optimal value of K for the KNN classifier algorithm using
cross-validation on load_iris dataset in sklearn.datasets.<Br>
Q4. Implement the KNN regressor algorithm with feature scaling on load_boston dataset in
sklearn.datasets.<Br>
Q5. Write a Python code snippet to implement the KNN classifier algorithm with weighted voting on
load_iris dataset in sklearn.datasets.<Br>
Q6. Implement a function to standardise the features before applying KNN classifier.<Br>
Q7. Write a Python function to calculate the euclidean distance between two points.<Br>
Q8. Write a Python function to calculate the manhattan distance between two points.<Br>

In [None]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report
import warnings

### SOLUTION 1

In [None]:
warnings.filterwarnings(action='ignore')

In [None]:
X,y = load_iris(return_X_y=True)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=42)

In [None]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
accuracy

In [None]:
#classification report

print(classification_report(y_test, y_pred))

In [None]:
#confusion matrix

from sklearn.metrics import confusion_matrix
pd.DataFrame(confusion_matrix(y_test, y_pred), columns=range(3))

### SOLUTION 2

In [None]:
from sklearn.datasets import load_boston
X,y = load_boston(return_X_y=True)

In [None]:
#train test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=42)

In [None]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
from sklearn.neighbors import KNeighborsRegressor

knn_regressor = KNeighborsRegressor(n_neighbors=3)
knn_regressor.fit(X_train, y_train)
y_pred = knn_regressor.predict(X_test)

In [None]:
from sklearn.metrics import mean_squared_error, r2_score
print(f'MSE: {mean_squared_error(y_test, y_pred)}')
print(f'R2_Score: {r2_score(y_test, y_pred)}')

In [None]:
#heuristic
kh = np.sqrt(X.shape[0])
kh = 23

In [None]:
knn_rh = KNeighborsRegressor(n_neighbors=kh)
knn_rh.fit(X_train, y_train)
y_pred = knn_rh.predict(X_test)

print(f'MSE: {mean_squared_error(y_test, y_pred)}')
print(f'R2_Score: {r2_score(y_test, y_pred)}')

* according to heuristic approach, k=23, but this is gives lower performance metrics than k=3.so, we'll now use cross-validation approach for determining an optimal value of k

In [None]:
#cross-validation approach
r2 = []

for i in range(1,23):
    knn = KNeighborsRegressor(n_neighbors=i)
    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_test)
    
    r2.append(r2_score(y_test, y_pred))

In [None]:
import matplotlib.pyplot as plt
plt.plot(range(1,23), r2)
plt.title('k vs. r2_score')
plt.xlabel('k')
plt.ylabel('r2_score');

* This shows that we get the best value of r2 score with k=3.

### SOLUTION 3

In [None]:
X, y = load_iris(return_X_y=True)

#train test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=42)

#feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

#cross validation approach
scores = []

for i in range(1,15):
    knn = KNeighborsClassifier(n_neighbors=i)
    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_test)
    scores.append(accuracy_score(y_test, y_pred))
    

In [None]:
plt.plot(range(1,15), scores);

we are getting an accuracy of 1 for all the values of k

### SOLUTION 4
This task has already been done in Solution 2

### SOLUTION 5

In [None]:
X, y = load_iris(return_X_y=True)

#train test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=42)

#feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

#weighted knn
weighted_knn = KNeighborsClassifier(n_neighbors=3, weights='distance')
weighted_knn.fit(X_train, y_train)
y_pred = weighted_knn.predict(X_test)
accuracy_score(y_test, y_pred)

### SOLUTION 6

* Feature scaling can be done using standard scaler from sklearn's preprocessing module.
* Feature scaling has been applied to KNN classifier built for iris dataset in solution 3.

### SOLUTION 7

In [None]:
#euclidean distance between 2 points
a = np.array([0,0])
b = np.array([1,1])
np.linalg.norm(a-b)

In [None]:
#calculating euclidean distance without numpy

distances = []
for i in range(len(a)):
    for j in range(len(b)):
        if i==j:
            distances.append((a[i] - b[j])**2)
            
euclidean_distance = (sum(distances))**0.5
euclidean_distance

### SOLUTION 8

In [None]:
#calculating manhattan distance
def manhattan_dist(a,b):
    distances =[]
    
    for i in range(len(a)):
        for j in range(len(b)):
            if i==j:
                if (a[i] - b[i])<0:
                    distances.append(-1*(a[i] -b[i]))
                else:
                    distances.append(a[i] - b[i])
                    
    return sum(distances)

manhattan_dist(a,b)