# K-Nearest Neighbors

The following code trains a K-Nearest Neighbors Classifier to predict the distance between the Raspberrry Pis.

In [15]:
import pandas as pd
from pathlib import Path
from pi_pact_sort import categorize
import numpy as np
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import MinMaxScaler

DROP_COLUMNS = ['ADDRESS', 'TIMESTAMP', 'UUID', 'MAJOR', 'MINOR', 'TX POWER', 'TEMPERATURE',
                'PITCH', 'ROLL', 'YAW', 'SCAN']
SAMPLE_SIZE = 30000



"""Trains a K-Nearest Neighbors classifier to predict a distance range given RSSI values and other variables.
"""

# Initialize DataFrame
data: pd.DataFrame = pd.DataFrame(columns=['RSSI', 'DISTANCE', 'HUMIDITY', 'PRESSURE'])
data_copy: pd.DataFrame = data.copy()
csv_file: Path
for csv_file in Path('.').glob('indoor-noObstruct-SenseHat*/*.csv'):
    datapart: pd.DataFrame = pd.read_csv(csv_file)
    for column in DROP_COLUMNS:
        if column in datapart.columns:
            datapart = datapart.drop([column], 1)
    data_copy = data_copy.append(datapart)

# Categorize distance
data_copy['DISTANCE'] = data_copy['DISTANCE'].map(categorize)

# Sample data from each distance category
for value in data_copy['DISTANCE'].unique():
    datapart = data_copy[data_copy.DISTANCE == value]
    datapart = datapart.sample(SAMPLE_SIZE, random_state=1)
    data = data.append(datapart)

# Assign features and labels
min_max_scaler = MinMaxScaler()
X: np.array = min_max_scaler.fit_transform(data.drop(['DISTANCE'], 1).to_numpy())
y: np.array = data['DISTANCE'].to_numpy(dtype=int)

# Hyperparameter tuning
n_neighbors: np.array = 2 * np.arange(4, 9, 1) + 1
grid = GridSearchCV(KNeighborsClassifier(), {'n_neighbors': n_neighbors,
                                             'metric': ['minkowski', 'manhattan', 'chebyshev']}, n_jobs=1)
grid.fit(X, y)

GridSearchCV(estimator=KNeighborsClassifier(), n_jobs=1,
             param_grid={'metric': ['minkowski', 'manhattan', 'chebyshev'],
                         'n_neighbors': array([ 9, 11, 13, 15, 17])})

In [16]:
print(grid.best_params_)
print('accuracy =', grid.best_score_)

{'metric': 'manhattan', 'n_neighbors': 15}
accuracy = 0.9303333333333332
