In [1]:
import numpy as np


np.random.seed(42)
from sklearn.datasets import load_digits
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

#### Helper

In [18]:
def print_grid_cv_results(grid_result: GridSearchCV) -> None:
    print(
        f"Best model score: {grid_result.best_score_} "
        f"Best model params: {grid_result.best_params_} "
    )
    means = grid_result.cv_results_["mean_test_score"]
    stds = grid_result.cv_results_["std_test_score"]
    params = grid_result.cv_results_["params"]

    zipped = sorted(zip(means, stds, params), key=lambda x: x[0], reverse=True)

    for mean, std, param in zipped[:10]:
        mean = round(mean, 4)
        std = round(std, 4)
        print(f"{mean} (+/- {2 * std}) with: {param}")

#### LOAD DATASET

In [3]:
mnist = load_digits()
x: np.ndarray = mnist.data
y: np.ndarray = mnist.target

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)

#### NORMALIZE DATASET

In [4]:
scaler = StandardScaler()
scaler.fit(x_train)

x_train: np.ndarray = scaler.transform(x_train)
x_test: np.ndarray = scaler.transform(x_test)

#### KNN CLASSIFICATION

In [15]:
from sklearn.neighbors import KNeighborsClassifier


params = {
    "n_neighbors": [i for i in range(2, 22, 2)],
    "weights": ["uniform", "distance"],
}

clf = KNeighborsClassifier()
grid = GridSearchCV(clf, params, cv=3)
grid_result = grid.fit(x_train, y_train)

print_grid_cv_results(grid_result)

Best model score: 0.9697692919649961 Best model params: {'n_neighbors': 2, 'weights': 'distance'} 
0.9698 (+/- 0.0022) with: {'n_neighbors': 2, 'weights': 'distance'}
0.9682 (+/- 0.0082) with: {'n_neighbors': 4, 'weights': 'distance'}
0.9682 (+/- 0.0022) with: {'n_neighbors': 6, 'weights': 'distance'}
0.9682 (+/- 0.0082) with: {'n_neighbors': 8, 'weights': 'distance'}
0.9674 (+/- 0.0126) with: {'n_neighbors': 10, 'weights': 'distance'}
0.9626 (+/- 0.0022) with: {'n_neighbors': 6, 'weights': 'uniform'}
0.9602 (+/- 0.0098) with: {'n_neighbors': 12, 'weights': 'distance'}
0.9586 (+/- 0.0098) with: {'n_neighbors': 8, 'weights': 'uniform'}
0.9578 (+/- 0.009) with: {'n_neighbors': 4, 'weights': 'uniform'}
0.9547 (+/- 0.0156) with: {'n_neighbors': 2, 'weights': 'uniform'}


#### RANDOM FOREST CLASSIFICATION

In [19]:
from sklearn.ensemble import RandomForestClassifier


params = {
    "n_estimators": [50 * i for i in range(4, 10)],
    "max_depth": [i for i in range(20, 51, 10)] + [None],
}

clf = RandomForestClassifier()

grid = GridSearchCV(clf, params, cv=3, n_jobs=-1)
grid_result = grid.fit(x_train, y_train)

print_grid_cv_results(grid_result)

Best model score: 0.9705648369132857 Best model params: {'max_depth': 30, 'n_estimators': 300} 
0.9706 (+/- 0.0046) with: {'max_depth': 30, 'n_estimators': 300}
0.9706 (+/- 0.0046) with: {'max_depth': 30, 'n_estimators': 350}
0.9706 (+/- 0.0022) with: {'max_depth': 40, 'n_estimators': 450}
0.9706 (+/- 0.0082) with: {'max_depth': 40, 'n_estimators': 400}
0.9698 (+/- 0.009) with: {'max_depth': 50, 'n_estimators': 250}
0.9698 (+/- 0.006) with: {'max_depth': None, 'n_estimators': 250}
0.969 (+/- 0.0104) with: {'max_depth': 20, 'n_estimators': 450}
0.9682 (+/- 0.0136) with: {'max_depth': 20, 'n_estimators': 350}
0.9682 (+/- 0.0046) with: {'max_depth': 20, 'n_estimators': 400}
0.9682 (+/- 0.0098) with: {'max_depth': 40, 'n_estimators': 350}


#### GRADIENT BOOSTING CLASSIFICATION

In [20]:
from sklearn.ensemble import GradientBoostingClassifier


params = {
    "n_estimators": [50 * i for i in range(4, 10)],
    "max_depth": [i for i in range(20, 51, 10)] + [None],
}

clf = GradientBoostingClassifier()

grid = GridSearchCV(clf, params, cv=3, n_jobs=-1)
grid_result = grid.fit(x_train, y_train)

print_grid_cv_results(grid_result)

Best model score: 0.8838504375497216 Best model params: {'max_depth': 30, 'n_estimators': 450} 
0.8839 (+/- 0.0228) with: {'max_depth': 30, 'n_estimators': 450}
0.8831 (+/- 0.017) with: {'max_depth': 20, 'n_estimators': 350}
0.8823 (+/- 0.0214) with: {'max_depth': 30, 'n_estimators': 300}
0.8823 (+/- 0.0184) with: {'max_depth': 50, 'n_estimators': 250}
0.8815 (+/- 0.0238) with: {'max_depth': 20, 'n_estimators': 300}
0.8815 (+/- 0.0228) with: {'max_depth': 20, 'n_estimators': 450}
0.8815 (+/- 0.0176) with: {'max_depth': 40, 'n_estimators': 250}
0.8815 (+/- 0.0162) with: {'max_depth': 50, 'n_estimators': 200}
0.8815 (+/- 0.02) with: {'max_depth': None, 'n_estimators': 350}
0.8807 (+/- 0.014) with: {'max_depth': 20, 'n_estimators': 250}


#### SVM CLASSIFICATION:

In [None]:
from sklearn.svm import SVC


params = {"kernel": ["linear", "sigmoid", "rbf", "poly"]}

clf = SVC()

grid = GridSearchCV(clf, params, cv=3, n_jobs=-1)
grid_result = grid.fit(x_train, y_train)

print_grid_cv_results(grid_result)

Best model score: 0.9737470167064438 Best model params: {'kernel': 'rbf'} 
0.9674 (+/- 0.0148) with: {'kernel': 'linear'}
0.9475 (+/- 0.0104) with: {'kernel': 'sigmoid'}
0.9737 (+/- 0.0104) with: {'kernel': 'rbf'}
0.9109 (+/- 0.0158) with: {'kernel': 'poly'}


#### BEST MODEL:

In [None]:
from sklearn.svm import SVC


best_params = {"kernel": "rbf"}
best_classifier = SVC

regr = best_classifier(**best_params)
regr.fit(x_train, y_train)
y_pred = regr.predict(x_test)

cm = confusion_matrix(y_test, y_pred)
acc = accuracy_score(y_test, y_pred)

print(f"Accuracy: {acc}")
print(f"Confusion matrix:\n{cm}")

Accuracy: 0.9796296296296296
Confusion matrix:
[[53  0  0  0  0  0  0  0  0  0]
 [ 0 50  0  0  0  0  0  0  0  0]
 [ 0  0 47  0  0  0  0  0  0  0]
 [ 0  0  2 51  0  1  0  0  0  0]
 [ 0  0  0  0 60  0  0  0  0  0]
 [ 0  0  0  0  0 66  0  0  0  0]
 [ 0  0  0  0  0  0 53  0  0  0]
 [ 0  0  0  0  1  0  0 53  0  1]
 [ 0  0  1  1  0  0  0  0 41  0]
 [ 0  0  0  0  0  1  1  0  2 55]]
