In [7]:
import numpy as np
np.random.seed(42)
import matplotlib.pyplot as plt
from sklearn.datasets import load_digits
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler

#### Helper

In [None]:
def print_grid_cv_results(grid_result):
    print(
        f"Best model score: {grid_result.best_score_} "
        f"Best model params: {grid_result.best_params_} "
    )
    means = grid_result.cv_results_["mean_test_score"]
    stds = grid_result.cv_results_["std_test_score"]
    params = grid_result.cv_results_["params"]

    for mean, std, param in zip(means, stds, params):
        mean = round(mean, 4)
        std = round(std, 4)
        print(f"{mean} (+/- {2 * std}) with: {param}")

#### LOAD DATASET

In [3]:
mnist = load_digits()
x = mnist.data
y = mnist.target

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)

#### NORMALIZE DATASET

In [4]:
scaler = StandardScaler()
scaler.fit(x_train)

x_train = scaler.transform(x_train)
x_test = scaler.transform(x_test)

#### KNN CLASSIFICATION

In [5]:
from sklearn.neighbors import KNeighborsClassifier

params = {
    "n_neighbors": [i for i in range(2, 22, 2)]
}

clf = KNeighborsClassifier()

grid = GridSearchCV(clf, params, cv=3)
grid_result = grid.fit(x_train, y_train)

print_grid_cv_results(grid_result)

Best model params: 0.9626093874303899 Best model score: {'n_neighbors': 6} 
0.954653937947494 (+/- 0.01558943352606642) with: {'n_neighbors': 2}
0.9578361177406522 (+/- 0.009000563642788277) with: {'n_neighbors': 4}
0.9626093874303899 (+/- 0.0022501409106970953) with: {'n_neighbors': 6}
0.958631662688942 (+/- 0.009808136838455038) with: {'n_neighbors': 8}
0.954653937947494 (+/- 0.010311441047586103) with: {'n_neighbors': 10}
0.9498806682577565 (+/- 0.0038973583815165823) with: {'n_neighbors': 12}
0.9498806682577565 (+/- 0.016988191331792) with: {'n_neighbors': 14}
0.9474940334128878 (+/- 0.01698819133179204) with: {'n_neighbors': 16}
0.9411296738265712 (+/- 0.014755160692912809) with: {'n_neighbors': 18}
0.9395385839299921 (+/- 0.019999689880630885) with: {'n_neighbors': 20}


#### RANDOM FOREST CLASSIFICATION

In [6]:
from sklearn.ensemble import RandomForestClassifier

params = {
    "n_estimators": [340],
    "max_depth": [i for i in range(20, 52, 2)]
}

clf = RandomForestClassifier()

grid = GridSearchCV(clf, params, cv=3, n_jobs=-1)
grid_result = grid.fit(x_train, y_train)

print_grid_cv_results(grid_result)

Best model params: 0.971360381861575 Best model score: {'max_depth': 32, 'n_estimators': 340} 
0.9689737470167064 (+/- 0.007794716763033165) with: {'max_depth': 20, 'n_estimators': 340}
0.9681782020684168 (+/- 0.00811299843053744) with: {'max_depth': 22, 'n_estimators': 340}
0.9673826571201273 (+/- 0.011250704553485162) with: {'max_depth': 24, 'n_estimators': 340}
0.9689737470167064 (+/- 0.0038973583815165823) with: {'max_depth': 26, 'n_estimators': 340}
0.9681782020684168 (+/- 0.00811299843053744) with: {'max_depth': 28, 'n_estimators': 340}
0.9689737470167064 (+/- 0.014052125483417333) with: {'max_depth': 30, 'n_estimators': 340}
0.971360381861575 (+/- 0.007794716763033165) with: {'max_depth': 32, 'n_estimators': 340}
0.9673826571201273 (+/- 0.009000563642788173) with: {'max_depth': 34, 'n_estimators': 340}
0.969769291964996 (+/- 0.011906626529113546) with: {'max_depth': 36, 'n_estimators': 340}
0.9673826571201273 (+/- 0.014755160692912809) with: {'max_depth': 38, 'n_estimators': 340

#### GRADIENT BOOSTING CLASSIFICATION

In [8]:
from sklearn.ensemble import GradientBoostingClassifier

params = {
    "n_estimators": [340],
    "max_depth": [i for i in range(20, 52, 2)],
    "learning_rate": [0.07]
}

clf = GradientBoostingClassifier()

grid = GridSearchCV(clf, params, cv=3, n_jobs=-1)
grid_result = grid.fit(x_train, y_train)

print_grid_cv_results(grid_result)

#### SVM CLASSIFICATION:

In [18]:
from sklearn.svm import SVC

params = {
    "kernel": ["linear", "sigmoid", "rbf", "poly"]
}

clf = SVC()

grid = GridSearchCV(clf, params, cv=3, n_jobs=-1)
grid_result = grid.fit(x_train, y_train)

print_grid_cv_results(grid_result)

Best: 0.972156 using {'kernel': 'rbf'}
0.968178 (0.008364) with: {'kernel': 'linear'}
0.945903 (0.004744) with: {'kernel': 'sigmoid'}
0.972156 (0.006230) with: {'kernel': 'rbf'}
0.893397 (0.018509) with: {'kernel': 'poly'}


#### BEST MODEL:

In [20]:
regr = SVC(kernel="rbf")
regr.fit(x_train, y_train)
y_pred = regr.predict(x_test)

cm = confusion_matrix(y_test, y_pred)
acc = accuracy_score(x_test, y_test)

print(f"Accuracy: {acc}")
print(f"Confusion matrix:\n{cm}")

[[53  0  0  0  0  0  0  0  0  0]
 [ 0 50  0  0  0  0  0  0  0  0]
 [ 0  0 47  0  0  0  0  0  0  0]
 [ 0  0  2 51  0  1  0  0  0  0]
 [ 0  0  0  0 60  0  0  0  0  0]
 [ 0  0  0  0  0 66  0  0  0  0]
 [ 0  0  0  0  0  0 53  0  0  0]
 [ 0  0  0  0  1  0  0 53  0  1]
 [ 0  0  1  1  0  0  0  0 41  0]
 [ 0  0  0  0  0  1  1  0  2 55]]
