In [None]:
import numpy as np
from sklearn.datasets import load_breast_cancer
data_before_split = load_breast_cancer()

### KNN Implementation

In [None]:
class Knn:
    """Knn """
    def __init__(self, k, threshold=0.5):
        self.value = k
        self.threshold = threshold
    
    def fit(self, xtrain, ytrain):
        self.data = xtrain
        self.target = ytrain
    
    def predict(self, xtest):
        proba = self.predict_proba(xtest)
        return (proba[:,1] > self.threshold).astype(np.float32)

    def predict_proba(self, xtest):
        
        return_list = []
        for x in xtest:
            distances = (((self.data - x) ** 2).sum(axis=1))
            indices = np.argsort(distances)[:self.value]
            probability = self.target[indices].mean()
            return_list.append(probability)
        
        return_array = np.array(return_list)
        return np.stack([1- return_array, return_array], axis=1)
            
first_try = Knn(10)
first_try.fit(data_before_split.data[290:], data_before_split.target[290:])
print(first_try.predict_proba(data_before_split.data[290:]))
#print(data_before_split.target[290:])
#use argsort to speed up

### KNN Testing

In [None]:
def data_splitter(data, trainamt):
    """Returns training and test data sets based on the percentage of training set inputted."""
    copy_of = data
    num_sample = int((data.data.shape)[0] * trainamt)
    indices = np.arange(data.data.shape[0])
    np.random.shuffle(indices)
    indices = indices[:num_sample]
    is_test = np.ones(data.data.shape[0]).astype(bool)
    is_test[indices] = False
    xtrain = data.data[~is_test]
    ytrain = data.target[~indices]
    xtest = data.data[is_test]
    ytest = data.target[is_test]
    return xtrain, ytrain, xtest, ytest

In [None]:
def cross_validation(data, trainamt, validamt, testamt):
    xtrain_valid, ytrain_valid, testamt, testtarget = data_splitter(data, trainamt + validamt)
    split = xtrain_valid.shape[0] // 5  
    np.split(xtrain_valid, split)
    np.split(ytrain_valid, split)

In [None]:
def accuracy_check(prediction, ytest):
    final = prediction == ytest
    return final.astype(int).sum() / final.shape[0]