In [None]:
from collections import defaultdict
from __future__ import print_function
from sklearn.neighbors import KNeighborsClassifier
import numpy as np

In [None]:
class MyKNeighborsClassifier(object):
    def __init__(self, n_neighbors=1):
        self.n_neighbors = n_neighbors

    def fit(self, X, y):
        """
        Fixar dados de treino
        """
        self.X = X
        self.y = y

    def _distance(self, data1, data2):
        return np.sqrt(sum((data1 - data2)**2))

    def _compute_weights(self, distances):
        """
        distances -> lista de tuplas do tipo (distancia, label)
        """
        return [(1, y) for d, y in distances]

    def _predict_one(self, test):
        distances = sorted((self._distance(x, test), y) for x, y in zip(self.X, self.y))
        weights = self._compute_weights(distances[:self.n_neighbors])
        weights_by_class = defaultdict(list)
        
        for d, c in weights:
            weights_by_class[c].append(d)
        return max((sum(val), key) for key, val in weights_by_class.items())[1]

    def predict(self, X):
        """
        Para cada vetor em x, a predicao sera feita
        """
        return [self._predict_one(x) for x in X]

    def score(self, X, y):
        """
        Para fazer o score (valor de acerto), é preciso chamar o predict para as features e labes e,
        para cada valor predito corretamente, temos o retorno de 1 na lista que sera somada e
        dividida pela quantidade de vetores usados
        """
        return sum(1 for p, t in zip(self.predict(X), y) if p == t) / len(y)

In [None]:
data = np.loadtxt("haberman.data",delimiter=",")
ndata = np.random.permutation(data)

size = len(ndata)
nt = int(int(size*0.7))
trfeatures = ndata[0:nt,0:3]
ttfeatures = ndata[nt:size,0:3]
trlabels = ndata[0:nt,3]
ttlabels = ndata[nt:size,3]

In [None]:
for i in range(1,11):
    print("Com k = " + str(i))
    clf1 = MyKNeighborsClassifier(i)
    clf1.fit(trfeatures, trlabels)
    
    clf2 = KNeighborsClassifier(n_neighbors=i)
    clf2.fit(trfeatures, trlabels)
    
    print("MyKNeighborsClassifier")
    print("Treino: ", clf1.score(trfeatures, trlabels))
    print("Teste: ", clf1.score(ttfeatures, ttlabels))
    print()
    
    print("KNeighborsClassifier")
    print("Treino: ", clf2.score(trfeatures, trlabels))
    print("Teste: ", clf2.score(ttfeatures, ttlabels))
    print()
