In [61]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

In [62]:
def euclidian_distance(a, b):
    return np.sqrt(np.sum((a-b)**2, axis=1))

In [64]:
def Kneighbours(X_test,return_distance=False,k=5):
    dist=[]
    neigh_ind=[]
    point_dist=[euclidian_distance(x_test,X_train) for x_test in X_test]
    for row in point_dist:
        enum_neigh=enumerate(row)
        sorted_neigh=sorted(enum_neigh,key=lambda x :x[1])[:k]
        ind_list=[tup[0] for tup in sorted_neigh]
        dist_list=[tup[1] for tup in sorted_neigh]
        
        
        dist.append(dist_list)
        neigh_ind.append(ind_list)
        
        if return_distance:
            return np.array(dist), np.array(neigh_ind)
        
        return np.array(neigh_ind)
        
    
    

In [None]:
X, y = make_classification(n_samples = 1000, n_features=2, n_redundant=0, n_informative=2,
                             n_clusters_per_class=1, n_classes=3, random_state=21)
mu=np.mean(X,0)
sigma=np.std(X,0)
X=(X-mu)/sigma
X

array([[ 1.79511018,  0.34137747],
       [-0.31340475,  0.38363642],
       [-0.37620692, -1.3831852 ],
       ...,
       [-1.75629977,  1.75656304],
       [ 1.68736291, -0.61352284],
       [ 0.15599353,  0.84178659]])

In [None]:
data=np.hstack((X,y[:,np.newaxis]))
np.random.shuffle(data)


In [None]:
train,test=train_test_split(data)
X_train=train[:,:-1]
y_train=train[:,-1]

X_test=test[:,:-1]
y_test=test[:,-1]
y_train=y_train.astype(int)
y_test=y_test.astype(int)


In [73]:
def predict(X_test,weights='uniform'):
    class_num=3
    if weights=='uniform':
        neighbours=Kneighbours(X_test)
        y_pred=np.array([np.argmax(np.bincount(y_train[neighbor])) for neighbor in neighbours])
        
    if weights=='distance':
            

In [66]:
neighbours=Kneighbours(X_test)
neighbours

array([[530, 595, 232, 522, 206]])

In [71]:
[np.argmax(np.bincount(y_train[neighbour]))for neighbour in neighbours]

[0]

In [None]:
import numpy as np


class KNearestNeighbors():
    def __init__(self, X_train, y_train, n_neighbors=5, weights='uniform'):

        self.X_train = X_train
        self.y_train = y_train

        self.n_neighbors = n_neighbors
        self.weights = weights

        self.n_classes = 3

    def euclidian_distance(self, a, b):
        return np.sqrt(np.sum((a - b)**2, axis=1))

    def kneighbors(self, X_test, return_distance=False):

        dist = []
        neigh_ind = []

        point_dist = [self.euclidian_distance(x_test, self.X_train) for x_test in X_test]

        for row in point_dist:
            enum_neigh = enumerate(row)
            sorted_neigh = sorted(enum_neigh,
                                  key=lambda x: x[1])[:self.n_neighbors]

            ind_list = [tup[0] for tup in sorted_neigh]
            dist_list = [tup[1] for tup in sorted_neigh]

            dist.append(dist_list)
            neigh_ind.append(ind_list)

        if return_distance:
            return np.array(dist), np.array(neigh_ind)

        return np.array(neigh_ind)

    def predict(self, X_test):

        if self.weights == 'uniform':
            neighbors = self.kneighbors(X_test)
            y_pred = np.array([
                np.argmax(np.bincount(self.y_train[neighbor]))
                for neighbor in neighbors
            ])

            return y_pred

        if self.weights == 'distance':

            dist, neigh_ind = self.kneighbors(X_test, return_distance=True)

            inv_dist = 1 / dist

            mean_inv_dist = inv_dist / np.sum(inv_dist, axis=1)[:, np.newaxis]

            proba = []

            for i, row in enumerate(mean_inv_dist):

                row_pred = self.y_train[neigh_ind[i]]

                for k in range(self.n_classes):
                    indices = np.where(row_pred == k)
                    prob_ind = np.sum(row[indices])
                    proba.append(np.array(prob_ind))

            predict_proba = np.array(proba).reshape(X_test.shape[0],
                                                    self.n_classes)

            y_pred = np.array([np.argmax(item) for item in predict_proba])

            return y_pred

    def score(self, X_test, y_test):
        y_pred = self.predict(X_test)

        return float(sum(y_pred == y_test)) / float(len(y_test))