In [1]:
from sklearn.datasets import fetch_openml
dataset = fetch_openml("mnist_784")

In [2]:
X = dataset.data
y = dataset.target

In [3]:
from sklearn.model_selection import train_test_split

In [4]:
import numpy as np

In [5]:
# turn the target into integers
y = y.astype(np.uint8)

In [6]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2
)

In [7]:
X_train.shape

(56000, 784)

In [8]:
y_train.shape

(56000,)

In [9]:
from sklearn.neighbors import KNeighborsClassifier

In [10]:
from sklearn.model_selection import cross_val_score

In [11]:
kneighbors = KNeighborsClassifier(n_neighbors=3, n_jobs=-1)

In [12]:
kneighbors.fit(X_train, y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=-1, n_neighbors=3, p=2,
                     weights='uniform')

In [13]:
y_pred = kneighbors.predict(X_test)

In [14]:
def get_accuracy(y_pred, y_test):
    return np.sum(y_pred == y_test) / len(y_test)

In [15]:
get_accuracy(y_pred, y_test)

0.9726428571428571

In [16]:
from sklearn.model_selection import GridSearchCV

In [17]:
params = {
    "n_neighbors": [i for i in range(3, 7)],
    "weights": ("uniform", "distance")
}

In [18]:
kneighbors = KNeighborsClassifier(n_jobs=-1)

In [19]:
clf = GridSearchCV(kneighbors, params)

In [20]:
clf.fit(X_train, y_train)

GridSearchCV(cv=None, error_score=nan,
             estimator=KNeighborsClassifier(algorithm='auto', leaf_size=30,
                                            metric='minkowski',
                                            metric_params=None, n_jobs=-1,
                                            n_neighbors=5, p=2,
                                            weights='uniform'),
             iid='deprecated', n_jobs=None,
             param_grid={'n_neighbors': [3, 4, 5, 6],
                         'weights': ('uniform', 'distance')},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=0)

In [21]:
clf.best_estimator_

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=-1, n_neighbors=4, p=2,
                     weights='distance')

In [22]:
clf.cv_results_

{'mean_fit_time': array([9.50207305, 9.14835234, 9.26637325, 9.44394536, 8.51849666,
        8.5427083 , 8.52654743, 8.47185588]),
 'std_fit_time': array([0.89663481, 0.0433753 , 0.12875915, 1.26176369, 0.10457276,
        0.06945564, 0.03629137, 0.03327225]),
 'mean_score_time': array([129.47155313, 126.95115914, 126.40787759, 127.01169105,
        115.61957517, 115.23867865, 115.27471385, 115.10547099]),
 'std_score_time': array([ 2.75713777,  0.88101306,  0.4021508 , 11.26354535,  0.34875094,
         0.53575893,  0.42539618,  0.67013313]),
 'param_n_neighbors': masked_array(data=[3, 3, 4, 4, 5, 5, 6, 6],
              mask=[False, False, False, False, False, False, False, False],
        fill_value='?',
             dtype=object),
 'param_weights': masked_array(data=['uniform', 'distance', 'uniform', 'distance',
                    'uniform', 'distance', 'uniform', 'distance'],
              mask=[False, False, False, False, False, False, False, False],
        fill_value='?',
    