In [29]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn.linear_model import LinearRegression
from sklearn import metrics

In [3]:
from sklearn.datasets import load_breast_cancer
data = load_breast_cancer()

In [7]:
X, y = data['data'], data['target']

In [8]:
from sklearn.model_selection import train_test_split

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, shuffle=True)

In [10]:
from sklearn.neighbors import KNeighborsClassifier

In [30]:
class MyKNN:
    def __init__(self, k = 3, metric = 'euclidean'):
        self.k = k
        self.metric = metric

    def fit(self, X_train, y_train):
        self.X_train = X_train
        self.y_train = y_train

    def calculate_euc_distance(self, x, y):
        return np.sqrt(((x - y)**2).sum())

    def calculate_man_distance(self, x, y):
        return np.abs(x-y).sum()

    def calculate_cos_distance(self, x, y):
        s = x @ y
        dlina_x = np.sqrt((x ** 2).sum())
        dlina_y = np.sqrt((y ** 2).sum())

        if dlina_x == 0 or dlina_y == 0:
          return 1
        return 1 - (s / (dlina_x * dlina_y))

    def calculate_distance(self, x, y):
        if self.metric == 'euclidean':
            return self.calculate_euc_distance(x, y)
        elif self.metric == 'manhattan':
            return self.calculate_man_distance(x, y)
        elif self.metric == 'cosine':
            return self.calculate_cos_distance(x, y)

    def calculate_matrix(self, X_test):
        distances = np.zeros((X_test.shape[0], self.X_train.shape[0]))
        for i in range(X_test.shape[0]):
            for j in range(self.X_train.shape[0]):
                distances[i, j] = self.calculate_distance(X_test[i], self.X_train[j])
        return distances

    def predict(self, X_test):
        matrix = self.calculate_matrix(X_test)
        matrix_idx = np.argsort(matrix, axis=1,)[:, :self.k]
        res_matrix = np.array([self.y_train[x] for x in matrix_idx]).mean(axis=1)
        return res_matrix


In [31]:
knn = MyKNN(3, metric = 'cosine')

In [32]:
knn.fit(X_train, y_train)

In [33]:
my_preds = knn.predict(X_test)

In [34]:
metrics.f1_score(y_test, my_preds.astype(int))

0.9113924050632911