In [6]:

import numpy as np
from scipy.spatial.distance import cdist

class KNN:
    '''KNN Classifier.

    Attributes
    ----------
    k : int
        Number of neighbors to consider.
    '''
    def __init__(self, k):
        '''Initialization.
        Parameters are stored as member variables/attributes.
        
        Parameters
        ----------
        k : int
            Number of neighbors to consider.
        '''
        self.k = k

    def fit(self, X, y):
        '''Fit routine.
        Training data is stored within object.
        
        Parameters
        ----------
        X : numpy.array, shape=(n_samples, n_attributes)
            Training data.
        y : numpy.array shape=(n_samples)
            Training labels.
        '''
        self.train_data = X
        self.train_labels = y

    def predict(self, X):
        '''Prediction routine.
        Predict class association of each sample of X.
        
        Parameters
        ----------
        X : numpy.array, shape=(n_samples, n_attributes)
            Data to classify.
        
        Returns
        -------
        prediction : numpy.array, shape=(n_samples)
            Predictions, containing the predicted label of each sample.
        '''
        
        self.test_data = X
        
        predicted_labels = []
        
        for i in range(len(self.test_data)):
            background = 0
            signal = 0
            
            xx = self.test_data[i]
            
            distance = cdist(xx, self.train_data)[0]
            distance_list = np.stack((distance, self.train_labels), axis=1) 
            #Eine Liste mit den Abständen der Trainingsdaten zu Testdaten und der zugehörigen Labels der Trainingsdaten
            
            sorted_distance_list = distance_list[np.argsort(distance_list[:,0])] #Liste wird sortiert (anhand Distanz)
            neighbours = sorted_distance_list[:self.k] #Bestimmung der k Trainingsevents mit dem kleinsten Abstand
            #Bestimmung des Labels, was am häufigsten vorkommt:
            for j in range(self.k):
                if neighbours[j,1] == 0:
                    background += 1
                else:
                    signal +=1
            if background >= signal:
                predicted_labels.append(0)
            elif background < signal:
                predicted_labels.append(1)
                
        return np.array(predicted_labels)
      #  print(np.array(predicted_labels))