In [1]:
import numpy as np
import matplotlib.pyplot as plt

In [2]:
D_train = np.loadtxt('sample_data/mnist_train_small.csv', delimiter=',')
X_train = D_train[:, 1:]
y_train = D_train[:, 0].astype(int)

In [3]:
D_test = np.loadtxt('sample_data/mnist_test.csv', delimiter=',')
X_test = D_test[:, 1:]
y_test = D_test[:, 0].astype(int)

## k-Nearest Neighbor Classification

In [4]:
from sklearn.neighbors import KNeighborsClassifier

model = KNeighborsClassifier(n_neighbors=7, metric='euclidean')
model.fit(X_train, y_train)
y_predicted = model.predict(X_train)

In [5]:
from sklearn.metrics import accuracy_score

acc_train = accuracy_score(y_train, y_predicted)
print(f"{acc_train:.4f}")

0.9703


In [6]:
y_predicted = model.predict(X_test)
acc_test = accuracy_score(y_test, y_predicted)
print(f"{acc_test:.4f}")

0.9572


## Tuning hyperparameters

In [None]:
from sklearn.model_selection import GridSearchCV

parameters = {'n_neighbors':[3, 5, 7], 'metric':['manhattan', 'euclidean']}
knn = KNeighborsClassifier()
gcv = GridSearchCV(knn, parameters, cv=5, verbose=2)
gcv.fit(X_train, y_train)


In [12]:
from sklearn.experimental import enable_halving_search_cv  # noqa
from sklearn.model_selection import HalvingGridSearchCV

parameters = {'n_neighbors':[3, 5, 7], 'metric':['manhattan', 'euclidean']}
knn = KNeighborsClassifier()
g = HalvingGridSearchCV(knn, parameters, cv=5, factor=2, 
                        aggressive_elimination=False, verbose=2)
g.fit(X_train, y_train)


n_iterations: 3
n_required_iterations: 3
n_possible_iterations: 3
min_resources_: 5000
max_resources_: 20000
aggressive_elimination: False
factor: 2
----------
iter: 0
n_candidates: 6
n_resources: 5000
Fitting 5 folds for each of 6 candidates, totalling 30 fits
[CV] END ....................metric=manhattan, n_neighbors=3; total time=   3.8s
[CV] END ....................metric=manhattan, n_neighbors=3; total time=   3.8s
[CV] END ....................metric=manhattan, n_neighbors=3; total time=   4.0s
[CV] END ....................metric=manhattan, n_neighbors=3; total time=   3.8s
[CV] END ....................metric=manhattan, n_neighbors=3; total time=   5.3s
[CV] END ....................metric=manhattan, n_neighbors=5; total time=   3.7s
[CV] END ....................metric=manhattan, n_neighbors=5; total time=   4.2s
[CV] END ....................metric=manhattan, n_neighbors=5; total time=   3.7s
[CV] END ....................metric=manhattan, n_neighbors=5; total time=   3.7s
[CV] END 

In [13]:
g.best_estimator_