## KNN Classifier

In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

In [6]:
from sklearn.datasets import make_classification

X,y=make_classification(n_samples=100,n_features=3,n_redundant=1,n_classes=2,random_state=999)

In [7]:
from sklearn.model_selection import train_test_split

In [8]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.33, random_state=42)

In [11]:
from sklearn.neighbors import KNeighborsClassifier

In [14]:
classifier=KNeighborsClassifier(n_neighbors=5,algorithm='auto')
classifier.fit(X_train,y_train)

In [15]:
y_pred=classifier.predict(X_test)

In [16]:
from sklearn.metrics import confusion_matrix,accuracy_score,classification_report
print(confusion_matrix(y_pred,y_test))
print(accuracy_score(y_pred,y_test))
print(classification_report(y_pred,y_test))

[[15  1]
 [ 1 16]]
0.9393939393939394
              precision    recall  f1-score   support

           0       0.94      0.94      0.94        16
           1       0.94      0.94      0.94        17

    accuracy                           0.94        33
   macro avg       0.94      0.94      0.94        33
weighted avg       0.94      0.94      0.94        33



In [17]:
# hyperparameter tuning 
from sklearn.model_selection import GridSearchCV
param_grid = {
    'n_neighbors': [1,2,3,4,5,6,7,8,9,10],  # Number of neighbors
    'weights': ['uniform', 'distance'],  # Weight function used in prediction
    'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute']  # Algorithm to compute the nearest neighbors
}

In [21]:
grid_search = GridSearchCV(estimator=classifier, param_grid=param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

In [22]:
print("Best Parameters:", grid_search.best_params_)
print("Best Score:", grid_search.best_score_)

Best Parameters: {'algorithm': 'auto', 'n_neighbors': 1, 'weights': 'uniform'}
Best Score: 0.9857142857142858


In [23]:
best_knn = grid_search.best_estimator_
test_score = best_knn.score(X_test, y_test)
print("Test Score:", test_score)

Test Score: 0.8787878787878788


#### Full Code Example On IRIS dataset

In [24]:
from sklearn.datasets import load_iris
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.neighbors import KNeighborsClassifier

# Load dataset
data = load_iris()
X = data.data
y = data.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Define the KNN model
knn = KNeighborsClassifier()

# Set up the parameter grid for Grid Search
param_grid = {
    'n_neighbors': [3, 5, 7, 10],
    'weights': ['uniform', 'distance'],
    'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute']
}

# Initialize Grid Search with KNN, parameter grid, cross-validation, and scoring
grid_search = GridSearchCV(estimator=knn, param_grid=param_grid, cv=5, scoring='accuracy')

# Fit Grid Search to the training data
grid_search.fit(X_train, y_train)

# Print the best parameters and score found by Grid Search
print("Best Parameters:", grid_search.best_params_)
print("Best Score:", grid_search.best_score_)

# Evaluate the best model on test data
best_knn = grid_search.best_estimator_
test_score = best_knn.score(X_test, y_test)
print("Test Score:", test_score)


Best Parameters: {'algorithm': 'auto', 'n_neighbors': 7, 'weights': 'uniform'}
Best Score: 0.9523809523809523
Test Score: 1.0


## KNN REGRESSOR

In [25]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

In [26]:
from sklearn.datasets import make_regression
X, y = make_regression(n_samples=1000, n_features=2, noise=10, random_state=42)

In [27]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.33, random_state=42)

In [28]:
from sklearn.neighbors import KNeighborsRegressor

In [29]:
regressor=KNeighborsRegressor(n_neighbors=6,algorithm='auto')
regressor.fit(X_train,y_train)

In [30]:
y_pred=regressor.predict(X_test)

In [31]:
from sklearn.metrics import r2_score,mean_absolute_error,mean_squared_error
print(r2_score(y_test,y_pred))
print(mean_absolute_error(y_test,y_pred))
print(mean_squared_error(y_test,y_pred))

0.9189275159979495
9.009462452972217
127.45860414317289
