# Chapter 15

## K-nearest neighbors

### 15.1 Finding an observation's nearest neighbors

In [None]:
from sklearn import datasets
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import StandardScaler

In [None]:
iris = datasets.load_iris()
features = iris.data

standardizer = StandardScaler()
standardized_features = standardizer.fit_transform(features)

nearest_neighbors = NearestNeighbors(n_neighbors=2).fit(standardized_features)

In [None]:
new_observation = [1, 1, 1, 1]
distances, indices = nearest_neighbors.kneighbors([new_observation])

print(
    f'Nearest neighbors: {standardized_features[indices]}',
    f'Distance: {distances}',
    sep = '\n'
)

### 15.2 Creating a k-nearest neighbors classifier

In [None]:
from sklearn.neighbors import KNeighborsClassifier 
from sklearn.preprocessing import StandardScaler 
from sklearn import datasets

In [None]:
iris = datasets.load_iris()
X = iris.data
y = iris.target

In [None]:
standardizer = StandardScaler() 
X_std = standardizer.fit_transform(X)

knn = KNeighborsClassifier(n_neighbors=5, n_jobs=-1).fit(X_std, y)

new_observations = [[ 0.75, 0.75, 0.75, 0.75], [1, 1, 1, 1]]
knn.predict(new_observations)

In [None]:
knn.predict_proba(new_observations)

### 15.3 Identifying the best neighbor size

In [None]:
import numpy as np
from sklearn.neighbors import KNeighborsClassifier 
from sklearn import datasets
from sklearn.preprocessing import StandardScaler 
from sklearn.pipeline import Pipeline, FeatureUnion 
from sklearn.model_selection import GridSearchCV

In [None]:
iris = datasets.load_iris()
features = iris.data
target = iris.target

In [None]:
standardizer = StandardScaler()
features_standardized = standardizer.fit_transform(features)

knn = KNeighborsClassifier(n_neighbors=5, n_jobs=-1) 

pipe = Pipeline([("standardizer", standardizer), ("knn", knn)])
search_space = [{"knn__n_neighbors": np.arange(1, 11, 1)}]

classifier = GridSearchCV(pipe, search_space, cv=5, verbose=0)
fitted = classifier.fit(features_standardized, target)


In [None]:
fitted.best_estimator_.get_params()["knn__n_neighbors"]

### 15.4 Creating a radious-based nearest neighbors classifier

In [None]:
from sklearn.neighbors import RadiusNeighborsClassifier 
from sklearn.preprocessing import StandardScaler
from sklearn import datasets

In [None]:
iris = datasets.load_iris()
features = iris.data
target = iris.target

In [None]:
standardizer = StandardScaler() 
features_standardized = standardizer.fit_transform(features)

rnn = RadiusNeighborsClassifier(radius=.5, n_jobs=-1).fit(features_standardized, target)

In [None]:
new_observations = [[ 1, 1, 1, 1]] 
rnn.predict(new_observations)