# K-Nearest Neighbors

The following code trains a K-Nearest Neighbors Classifier to predict the distance between the Raspberrry Pis.

In [5]:
import pandas as pd
from pathlib import Path
from pi_pact_sort import categorize
import numpy as np
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler, PolynomialFeatures

DROP_COLUMNS = ['ADDRESS', 'TIMESTAMP', 'UUID', 'MAJOR', 'MINOR', 'TX POWER', 'TEMPERATURE',
                'PITCH', 'ROLL', 'YAW', 'SCAN']
SAMPLE_SIZE = 30000



"""Trains a K-Nearest Neighbors classifier to predict a distance range given RSSI values and other variables.
"""

# Initialize DataFrame
data: pd.DataFrame = pd.DataFrame(columns=['RSSI', 'DISTANCE', 'HUMIDITY', 'PRESSURE'])
data_copy: pd.DataFrame = data.copy()
csv_file: Path
for csv_file in Path('.').glob('indoor-noObstruct-SenseHat*/*.csv'):
    datapart: pd.DataFrame = pd.read_csv(csv_file)
    for column in DROP_COLUMNS:
        if column in datapart.columns:
            datapart = datapart.drop([column], 1)
    data_copy = data_copy.append(datapart)

# Categorize distance
data_copy['DISTANCE'] = data_copy['DISTANCE'].map(categorize)

# Sample data from each distance category
for value in data_copy['DISTANCE'].unique():
    datapart = data_copy[data_copy.DISTANCE == value]
    datapart = datapart.sample(SAMPLE_SIZE, random_state=1)
    data = data.append(datapart)

# Assign features and labels
X: np.array = data.drop(['DISTANCE'], 1).to_numpy()
y: np.array = data['DISTANCE'].to_numpy(dtype=int)

pipe = Pipeline(steps=[('min_max', MinMaxScaler()),
                       ('interactions', PolynomialFeatures(interaction_only=True,
                                                           include_bias=False)),
                       ('model', KNeighborsClassifier())])

# Hyperparameter tuning
n_neighbors: np.array = 2 * np.arange(0, 5000, 500) + 1
grid = GridSearchCV(pipe, {'n_neighbors': n_neighbors,
                           'metric': ['minkowski', 'manhattan', 'chebyshev'],
                           'p': [2, 3, 4],
                           'leaf_size': [20, 30, 40]}, n_jobs=1)
grid.fit(X, y)

ValueError: Invalid parameter leaf_size for estimator Pipeline(steps=[('min_max', MinMaxScaler()),
                ('interactions',
                 PolynomialFeatures(include_bias=False, interaction_only=True)),
                ('model', KNeighborsClassifier())]). Check the list of available parameters with `estimator.get_params().keys()`.

In [None]:
print(grid.best_params_)
print('accuracy =', grid.best_score_)

In [None]:
# Pickle model
import pickle
with open("knn-models/4var-b3-polyFeatures-knn-model.pickle", "wb") as f:
    pickle.dump(grid.best_estimator_, f)