In [1]:
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
import numpy as np


In [2]:
mnist = fetch_openml('mnist_784', version=1, as_frame=False)
X, y = mnist["data"], mnist["target"].astype(np.uint8)


In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=10000, random_state=42)


In [4]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [None]:
param_grid = {
    "n_neighbors": [3, 4, 5],
    "weights": ["uniform", "distance"]
}

knn_clf = KNeighborsClassifier()

grid_search = GridSearchCV(knn_clf, param_grid, cv=3, verbose=2, n_jobs=-1)
grid_search.fit(X_train_scaled, y_train)


Fitting 3 folds for each of 6 candidates, totalling 18 fits


In [None]:
print("Best Params:", grid_search.best_params_)

best_knn = grid_search.best_estimator_
y_test_pred = best_knn.predict(X_test_scaled)

accuracy = accuracy_score(y_test, y_test_pred)
print("Test Accuracy:", accuracy)
