## K-Nearest Neighbors Classifier(KNN)

IRIS dataset is used

- Nearest Neighbors
- K-Nearest Neighbors
- Radius Neighbors Classifier

In [1]:
# loading the libraries
from sklearn import datasets
from sklearn.neighbors import NearestNeighbors, KNeighborsClassifier
from sklearn.preprocessing import StandardScaler

In [2]:
# loading the dataset
iris = datasets.load_iris()
features = iris.data
target = iris.target

In [3]:
# creating the standardizer
scaler = StandardScaler()
# standardized features
features_standardized = scaler.fit_transform(features)


In [4]:
# Nearest Neighbors
nearest_neighbors = NearestNeighbors(n_neighbors=2).fit(features_standardized)

In [5]:
# Create an observation
new_observation = [ 1, 1, 1, 1]

# Find distances and indices of the observation's nearest neighbors
distances, indices = nearest_neighbors.kneighbors([new_observation])

# View the nearest neighbors
features_standardized[indices]

array([[[1.03800476, 0.55861082, 1.10378283, 1.18556721],
        [0.79566902, 0.32841405, 0.76275827, 1.05393502]]])

### Using Euclidean distance

In [6]:
#  based on euclidean distance
nearestneighbors_euclidean = NearestNeighbors(n_neighbors=2, metric='euclidean').fit(features_standardized)

In [7]:
# viewing the distance
distances

array([[0.49140089, 0.74294782]])

In [8]:
# creating the graphs
nearest_neighbors_self = nearestneighbors_euclidean.kneighbors_graph(features_standardized).toarray()

In [9]:
nearest_neighbors_self[0]

array([1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [10]:
# training the knn classifier with 5 neighbors
knn = KNeighborsClassifier(n_neighbors=5, n_jobs=-1).fit(features_standardized, target)

In [11]:
# Create two observations
new_observations1 = [[ 0.75, 0.75, 0.75, 0.75],
                    [ 1, 1, 1, 1]]

# Predict the class of two observations
knn.predict(new_observations1)

array([1, 2])

In [12]:
knn.predict_proba(new_observations1)

array([[0. , 0.6, 0.4],
       [0. , 0. , 1. ]])

In [13]:
knn.predict(new_observations1)

array([1, 2])

### Identifying the neighbor size

In [14]:
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.model_selection import GridSearchCV

In [15]:
# Creatin the  pipeline
pipe = Pipeline([("standardizer", scaler), ("knn", knn)])

# Create space of candidate values
search_space = [{"knn__n_neighbors": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}]

# Create grid search
classifier = GridSearchCV(pipe, search_space, cv=5, verbose=0).fit(features_standardized, target)

In [16]:
# Best neighborhood size (k)
classifier.best_estimator_.get_params()["knn__n_neighbors"]

6

## Radius Based Nearest Neighbor Classifier

In [17]:
from sklearn.neighbors import RadiusNeighborsClassifier

In [18]:
# Train a radius neighbors classifier
rnn = RadiusNeighborsClassifier(radius=.5, n_jobs=-1).fit(features_standardized, target)

# Create two observations
new_observations = [[ 1, 1, 1, 1]]

# Predict the class of two observations
rnn.predict(new_observations)

array([2])