## KNN Classifier

In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

%matplotlib inline

In [2]:
from sklearn.datasets import make_classification

X, y = make_classification(
    n_samples=1000,
    n_features=3,
    n_redundant=1,
    n_classes=2,
    random_state=999
)

In [4]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [5]:
from sklearn.neighbors import KNeighborsClassifier

when p is 1 => manhattan when p is 2 => euclidean

In [6]:
classifier = KNeighborsClassifier(n_neighbors=5, algorithm='auto')
classifier.fit(X_train, y_train)

In [7]:
y_pred = classifier.predict(X_test)

In [8]:
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

In [9]:
print(confusion_matrix(y_test, y_pred))
print(accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

[[158  11]
 [ 20 141]]
0.906060606060606
              precision    recall  f1-score   support

           0       0.89      0.93      0.91       169
           1       0.93      0.88      0.90       161

    accuracy                           0.91       330
   macro avg       0.91      0.91      0.91       330
weighted avg       0.91      0.91      0.91       330



#### Hyper Parameter Tuning

In [14]:
from sklearn.model_selection import GridSearchCV

grid = GridSearchCV(estimator=KNeighborsClassifier(), param_grid={'n_neighbors': [1,2,3,4,5,6,7,8,9,10],
                                                       'algorithm': ['ball_tree', 'kd_tree', 'brute'],
                                                       'p': [1,2]},
                   refit=True)

In [15]:
grid.fit(X_train, y_train)

In [16]:
grid.best_params_

{'algorithm': 'ball_tree', 'n_neighbors': 9, 'p': 2}

In [17]:
grid.fit(X_train, y_train)

In [18]:
y_pred = grid.predict(X_test)

In [19]:
print(confusion_matrix(y_test, y_pred))
print(accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

[[156  13]
 [ 16 145]]
0.9121212121212121
              precision    recall  f1-score   support

           0       0.91      0.92      0.91       169
           1       0.92      0.90      0.91       161

    accuracy                           0.91       330
   macro avg       0.91      0.91      0.91       330
weighted avg       0.91      0.91      0.91       330



## KNN Regressor

In [20]:
from sklearn.datasets import make_regression
X, y = make_regression(n_samples=1000, n_features=2, noise=10, random_state=42)

In [21]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [23]:
from sklearn.neighbors import KNeighborsRegressor

In [24]:
regressor = KNeighborsRegressor(n_neighbors=6, algorithm='auto')
regressor.fit(X_train, y_train)

In [25]:
y_pred = regressor.predict(X_test)

In [27]:
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

In [28]:
print(r2_score(y_test, y_pred))
print(mean_absolute_error(y_test, y_pred))
print(mean_squared_error(y_test, y_pred))
print(np.sqrt(mean_squared_error(y_test, y_pred)))

0.9189275159979495
9.009462452972217
127.45860414317289
11.289756602476995
