### Import dependencies

In [4]:
import numpy as np 
import pandas as pd

from sklearn.datasets import make_blobs
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split

### Read dataset

In [5]:
data = pd.read_csv('../reduced.csv')
data.drop('Index', axis=1, inplace=True)
data.head()

x = data.iloc[:, :-1]
y = data.iloc[:, -1]

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state=1)
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.25, random_state=1) 

### Models

In [6]:
model = KNeighborsClassifier()
n_neighbors = range(1, 21, 2)
weights = ['uniform', 'distance']
metric = ['euclidean', 'manhattan', 'minkowski']

### Define Grid

In [7]:
grid = dict(n_neighbors=n_neighbors,weights=weights,metric=metric)
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
grid_search = GridSearchCV(estimator=model, param_grid=grid, n_jobs=-1, cv=cv, scoring='accuracy',error_score=0)
grid_result = grid_search.fit(x, y)

In [8]:
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.462665 using {'metric': 'euclidean', 'n_neighbors': 19, 'weights': 'distance'}
0.418928 (0.009813) with: {'metric': 'euclidean', 'n_neighbors': 1, 'weights': 'uniform'}
0.418928 (0.009813) with: {'metric': 'euclidean', 'n_neighbors': 1, 'weights': 'distance'}
0.386259 (0.010101) with: {'metric': 'euclidean', 'n_neighbors': 3, 'weights': 'uniform'}
0.432806 (0.009552) with: {'metric': 'euclidean', 'n_neighbors': 3, 'weights': 'distance'}
0.403112 (0.006455) with: {'metric': 'euclidean', 'n_neighbors': 5, 'weights': 'uniform'}
0.444284 (0.008850) with: {'metric': 'euclidean', 'n_neighbors': 5, 'weights': 'distance'}
0.405245 (0.008803) with: {'metric': 'euclidean', 'n_neighbors': 7, 'weights': 'uniform'}
0.449956 (0.009981) with: {'metric': 'euclidean', 'n_neighbors': 7, 'weights': 'distance'}
0.408805 (0.007243) with: {'metric': 'euclidean', 'n_neighbors': 9, 'weights': 'uniform'}
0.453207 (0.009742) with: {'metric': 'euclidean', 'n_neighbors': 9, 'weights': 'distance'}
0.411297

In [9]:
import pickle
with open('./models/knn.pickle', 'wb') as f:
    pickle.dump(grid_result, f)