Import required libraries

In [1]:
import numpy as np
from sklearn.datasets import load_digits
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.base import BaseEstimator
from sklearn.base import ClassifierMixin

Weighted KNN implementation

In [2]:
def dist(a, b):
    d = 0.0
    for i in range(len(a)):
        d += (a[i] - b[i])**2
    return d**0.5

class PotentialKnn(BaseEstimator, ClassifierMixin):
    def __init__(self, k, N):
        print(k, N)
        self.weights = np.zeros((N))
        self.k = k
        self.N = N

    def fit(self, X_train, y_train):
        iter = 0
        self.x = X_train
        self.y = y_train
        #initialize predictions; after it we will make a prediction for every point while it is the real one
        predictions = self.predict(X_train) 
        while self.score(X_train, y_train) < 0.9:
            iter += 1
            for _ in range(100): #process 100 points in one time; it is too long to do it one by one
                i = np.random.randint(self.N) #choose random point from dataset 
                if predictions[i] != y_train[i]: #and check if its class coinside with real one
                    self.weights[i] += 1 #if itsn't recalculate weights
            predictions = self.predict(X_train)
        return self.weights

    def predict(self, test_data):
        listofpred = []
        k = self.k
        for test_point in (test_data):
            j = 0
            d = [[dist(test_point, point), self.y[ind]]
                 for ind, point in enumerate(self.x)]
            stat = [0 for _ in range(10)]
            for z in sorted(d)[0:k]:
                j += 1
                stat[z[1]] += self.weights[j] * 1 / (z[0] + 1) #weighted KNN kernel
                #choose 10 classes sort values of weighted KNN function and get a class - number from 0 to 9
            listofpred.append(sorted(zip(stat, range(10)), reverse=True)[0][1]) 
        return listofpred

Read initial data and split into train and test

In [3]:
digits = load_digits()
(X_train, X_test, y_train, y_test) = train_test_split(digits.data, digits.target, test_size=0.25)

Sklearn KNN 

In [8]:
%%time
knn = KNeighborsClassifier()
knn.fit(X_train, y_train) 
y_predicted = knn.predict(X_test)
print(accuracy_score(y_predicted, y_test))
print(confusion_matrix(y_predicted, y_test))

0.9888888888888889
[[41  0  0  0  0  0  0  0  0  0]
 [ 0 47  0  0  0  0  0  0  0  1]
 [ 0  0 42  0  0  0  0  0  0  0]
 [ 0  0  0 43  0  0  0  0  0  1]
 [ 0  0  0  0 48  0  0  0  0  0]
 [ 0  0  0  0  0 51  0  0  0  0]
 [ 0  0  0  0  0  1 49  0  0  0]
 [ 0  0  0  1  0  0  0 46  0  0]
 [ 0  0  0  0  0  0  0  0 45  1]
 [ 0  0  0  0  0  0  0  0  0 33]]
Wall time: 146 ms


Check the solution

In [10]:
%%time
model = PotentialKnn(3, X_train.shape[0])
model.fit(X_train, y_train)
y_predicted = model.predict(X_test)
print(accuracy_score(y_predicted, y_test))
print(confusion_matrix(y_predicted, y_test))

3 1347
0.9888888888888889
[[41  0  0  0  0  0  0  0  0  0]
 [ 0 47  0  0  0  0  0  0  0  0]
 [ 0  0 42  0  0  0  0  0  0  0]
 [ 0  0  0 42  0  0  0  0  0  1]
 [ 0  0  0  0 48  0  0  0  0  0]
 [ 0  0  0  0  0 51  0  0  0  0]
 [ 0  0  0  0  0  1 49  0  0  0]
 [ 0  0  0  1  0  0  0 46  0  0]
 [ 0  0  0  0  0  0  0  0 45  1]
 [ 0  0  0  1  0  0  0  0  0 34]]
Wall time: 20min 3s
