Building the model consists only of storing the training dataset. 

To make a prediction for a new data point, the algorithm finds the closest data points in the training dataset—its “nearest neighbors.”

**k-Nearest Classifier**

In [None]:
#importing the modules
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy as sp
import sklearn

In [None]:
#Making a dataset

from sklearn.datasets import make_blobs
def make_forge():       
    # a carefully hand-designed dataset lol
    X, y = make_blobs(centers=2, random_state=4, n_samples=30)
    y[np.array([7, 27])] = 0
    mask = np.ones(len(X), dtype=bool)
    mask[np.array([0, 1, 5, 26])] = 0
    X, y = X[mask], y[mask]
    return X, y

In [None]:
#First, we split our data into a training and a test set 
#so we can evaluate generalization performance


from sklearn.model_selection import train_test_split
X,y=make_forge()
X_train,X_test,y_train,y_test=train_test_split(X,y,random_state=0)

In [None]:
#Next, we import and instantiate the class, this is when we can set parameters
from sklearn.neighbors import KNeighborsClassifier
knn=KNeighborsClassifier(n_neighbors=3)

In [None]:
#Now, we fit the classifier using the training set. 
#For KNeighborsClassifier this means storing the dataset

knn.fit(X_train,y_train)

KNeighborsClassifier(n_neighbors=3)

In [None]:
#To make predictions on the test data, we call the predict method.

print("The predictions on the test set are :\n{}".format(knn.predict(X_test)))

The predictions on the test set are :
[1 0 1 0 1 0 0]


In [None]:
#To evaluate how well our model generalizes, 
#we can call the score method with the test data together with the test labels:

print('Test set accuracy : {:.2f}'.format(knn.score(X_test,y_test)))

Test set accuracy : 0.86


**k-Neighbors Regression**

In [None]:
# The prediction using a single neighbor is 
#just the target value of the nearest neighbor.
#When using multiple nearest neighbors, the prediction is 
#the average, or mean, of the relevant neighbors
#The k-nearest neighbors algorithm for regression is implemented in the 
#KNeighborsRegressor class in scikit-learn.

from sklearn.neighbors import KNeighborsRegressor


#creating dataset 

def make_wave(n_samples=100):
    rnd = np.random.RandomState(42)
    x = rnd.uniform(-3, 3, size=n_samples)
    y_no_noise = (np.sin(4 * x) + x)
    y = (y_no_noise + rnd.normal(size=len(x))) / 2
    return x.reshape(-1, 1), y

  #Creating testset and train set
X,y=make_wave(n_samples=60)
X_train,X_test,y_train,y_test=train_test_split(X,y,random_state=0)


In [None]:
#instantiate the model and set the neighbors to be considered to 3

reg=KNeighborsRegressor(n_neighbors=3)


#fit the model using the training dataset and training target

reg.fit(X_train,y_train)

KNeighborsRegressor(n_neighbors=3)

In [None]:
#Predicting the outcome for test data

reg.predict(X_test)

array([-1.48111563,  0.70792211, -0.18289804,  0.70624175,  0.74912844,
        0.50442638,  0.74912844, -0.40989144, -1.13013686, -0.42316952,
       -1.40093968, -0.40989144,  0.80969902,  0.67223847,  0.74912844])

In [None]:
#We can evaluate the model using score method which returns R2 value.
#The R2 score, also known as the coefficient of determination, 
#is a measure of goodness of a prediction for a regression model, 
#and yields a score between 0 and 1.

print("Score of the model :\n{:.2f}".format(reg.score(X_test,y_test)))

Score of the model :
0.56


The KNN algorithm assumes that similar things exist in close proximity. In other words, similar things are near to each other.

“Birds of a feather flock together.”