In [2]:
# Step 1 - Calculate Euclidean Distance
# Step 2 - Get Nearest Neighbors
# Step 3 - Make Predictions

# Import the necessary modules and libraries.

from math import sqrt
import numpy as np

In [3]:
class KNearestNeighborsClassifier:
    """
    A simple attempt at creating a K-Nearest Neighbors algorithm.

    n_neighbors: int, default=5
        Number of neighbors to use by default in classification.
    """

    def __init__(self, n_neighbors=5):
        """Initialize the classifier."""
        self.neighbors = n_neighbors
        self.X = None
        self.y = None

    def fit(self, X_train, y_train):
        """
        Fit the train data. X_train can be multidimensional array.
        y_train can be one dimensional array that matches the length
        of the X_train.

        X: numpy array, training data
        y: numpy array, target values
        """
        self.X = X_train
        self.y = y_train

    def predict(self, X_test):
        """
        Predict the class labels for provided data.
        X_test: numpy array
        """
        predictions = []

        for row in X_test:
            prediction = self._make_prediction(
                row, self.X, self.y, self.neighbors)
            predictions.append(prediction)

        return np.array(predictions)

    def _eucl_dist(self, test_v, train_v):
        """
        Helper function to calculate the Euclidean distances of
        each test vector to each train vector.
        """

        dist = sum([(test_v[i] - train_v[i])**2 for i in range(len(test_v))])
        return sqrt(dist)

    def _get_neighbors(self, test_v, train_v, y_train, n_neighbors):
        """
        Helper function to calculate the nearest neighbors.
        """

        distances = []
        # Once the distance is calculated for each vector,
        # we attach the train vector, its associated y value
        # and actual distance to a list.
        for i in range(len(train_v)):
            dist = self._eucl_dist(test_v, train_v[i])
            distances.append((train_v[i], y_train[i], dist))
        # Sort the list based on the distance value.
        distances.sort(key=lambda item: item[2])

        # Get the number of neighbors from the distance list.
        # And return them.
        neighbors = []
        for i in range(n_neighbors):
            neighbors.append(distances[i])
        return neighbors

    def _make_prediction(self, test_v, train_v, y_train, n_neighbors):
        """
        Helper function to make prediction based on the number of neighbors.
        """

        neighbors = self._get_neighbors(
            test_v, train_v, y_train, n_neighbors
        )
        output_class = [row[-2] for row in neighbors]
        # Make the prediction based on the most voted class member.
        pred = max(set(output_class), key=output_class.count)
        return pred

In [5]:
### Testing the code with dummy data.

X_train = np.random.rand(30, 2)
y_train = np.random.randint(0, 2, 30)
X_test = np.random.rand(5, 2)

print(X_train)
print('----------')
print(y_train)
print('----------')
print(X_test)

[[0.88056825 0.43803249]
 [0.36249415 0.2627769 ]
 [0.814737   0.8533251 ]
 [0.34070768 0.44930606]
 [0.9265359  0.09071336]
 [0.09151409 0.29862093]
 [0.80975248 0.17831496]
 [0.36671694 0.17705965]
 [0.33113829 0.88323793]
 [0.0714098  0.34945408]
 [0.57076975 0.50807954]
 [0.78138549 0.48379512]
 [0.19047819 0.29995754]
 [0.7258243  0.55711282]
 [0.83557905 0.17139172]
 [0.61716107 0.57919871]
 [0.90633259 0.85387053]
 [0.83430923 0.70518756]
 [0.32691902 0.18348916]
 [0.60518977 0.24830267]
 [0.79002628 0.23099387]
 [0.21420421 0.3324604 ]
 [0.48859638 0.1609039 ]
 [0.00438245 0.48596902]
 [0.25485178 0.61224045]
 [0.82218707 0.05591586]
 [0.66030309 0.49211148]
 [0.56334091 0.98052004]
 [0.40553282 0.0047375 ]
 [0.8992913  0.09396375]]
----------
[0 0 0 1 1 1 0 0 1 1 1 0 0 1 1 0 1 0 0 1 0 1 1 0 0 1 0 0 0 1]
----------
[[0.14272049 0.66840598]
 [0.49501985 0.80810383]
 [0.00575174 0.10832311]
 [0.07810268 0.43417147]
 [0.8915953  0.34110017]]


In [6]:
test_classifier = KNearestNeighborsClassifier(n_neighbors=5)

In [7]:
test_classifier.fit(X_train, y_train)

In [8]:
test_classifier.predict(X_test)

array([1, 0, 1, 1, 0])