In [1]:
import numpy as np
np.random.seed(42)
import matplotlib.pyplot as plt
from sklearn.datasets import load_digits
from sklearn.metrics import *
from sklearn.model_selection import *
from sklearn.preprocessing import *

#### LOAD DATASET

In [2]:
mnist = load_digits()
x, y = mnist.data, mnist.target
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=42, test_size=0.3)

#### NORMALIZE DATASET

In [3]:
scaler = StandardScaler()
scaler.fit(x_train)
x_train = scaler.transform(x_train)
x_test = scaler.transform(x_test)

#### KNN CLASSIFICATION

In [4]:
from sklearn.neighbors import KNeighborsClassifier

params = {"n_neighbors": [i for i in range(2, 22, 2)]}

clf = KNeighborsClassifier()
grid = GridSearchCV(clf, params, cv=3)

grid_result = grid.fit(x_train, y_train)

# Summary
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_["mean_test_score"]
stds = grid_result.cv_results_["std_test_score"]
params = grid_result.cv_results_["params"]

for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.962609 using {'n_neighbors': 6}
0.954654 (0.007795) with: {'n_neighbors': 2}
0.957836 (0.004500) with: {'n_neighbors': 4}
0.962609 (0.001125) with: {'n_neighbors': 6}
0.958632 (0.004904) with: {'n_neighbors': 8}
0.954654 (0.005156) with: {'n_neighbors': 10}
0.949881 (0.001949) with: {'n_neighbors': 12}
0.949881 (0.008494) with: {'n_neighbors': 14}
0.947494 (0.008494) with: {'n_neighbors': 16}
0.941130 (0.007378) with: {'n_neighbors': 18}
0.939539 (0.010000) with: {'n_neighbors': 20}


In [5]:
regr = KNeighborsClassifier(n_neighbors=6)
regr.fit(x_train, y_train)
y_pred = regr.predict(x_test)

acc = accuracy_score(y_test, y_pred)

print("ACC: ", acc)

ACC:  0.9722222222222222


#### RANDOM FOREST CLASSIFICATION

In [6]:
from sklearn.ensemble import RandomForestClassifier

params = {"n_estimators": [340],
         "max_depth": [i for i in range(20, 52, 2)]}

clf = RandomForestClassifier()
grid = GridSearchCV(clf, params, cv=3, n_jobs=-1)

grid_result = grid.fit(x_train, y_train)

# Summary
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_["mean_test_score"]
stds = grid_result.cv_results_["std_test_score"]
params = grid_result.cv_results_["params"]

for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.971360 using {'max_depth': 42, 'n_estimators': 340}
0.968974 (0.005156) with: {'max_depth': 20, 'n_estimators': 340}
0.967383 (0.004056) with: {'max_depth': 22, 'n_estimators': 340}
0.968178 (0.002977) with: {'max_depth': 24, 'n_estimators': 340}
0.967383 (0.007875) with: {'max_depth': 26, 'n_estimators': 340}
0.966587 (0.003897) with: {'max_depth': 28, 'n_estimators': 340}
0.968178 (0.004904) with: {'max_depth': 30, 'n_estimators': 340}
0.968178 (0.007875) with: {'max_depth': 32, 'n_estimators': 340}
0.970565 (0.008787) with: {'max_depth': 34, 'n_estimators': 340}
0.964996 (0.004056) with: {'max_depth': 36, 'n_estimators': 340}
0.968178 (0.002977) with: {'max_depth': 38, 'n_estimators': 340}
0.968178 (0.001125) with: {'max_depth': 40, 'n_estimators': 340}
0.971360 (0.005156) with: {'max_depth': 42, 'n_estimators': 340}
0.964996 (0.005953) with: {'max_depth': 44, 'n_estimators': 340}
0.967383 (0.006264) with: {'max_depth': 46, 'n_estimators': 340}
0.970565 (0.001125) with: {'ma

In [7]:
regr = RandomForestClassifier(max_depth=30, n_estimators=340)
regr.fit(x_train, y_train)
y_pred = regr.predict(x_test)

acc = accuracy_score(y_test, y_pred)

print("ACC: ", acc)

ACC:  0.9740740740740741


#### GRADIENT BOOSTING CLASSIFICATION

In [15]:
from sklearn.ensemble import GradientBoostingClassifier

params = {"n_estimators": [340],
         "max_depth": [i for i in range(20, 52, 2)],
         "learning_rate": [0.07]}

clf = GradientBoostingClassifier()
grid = GridSearchCV(clf, params, cv=3, n_jobs=-1)

grid_result = grid.fit(x_train, y_train)

# Summary
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_["mean_test_score"]
stds = grid_result.cv_results_["std_test_score"]
params = grid_result.cv_results_["params"]

for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.893397 using {'learning_rate': 0.07, 'max_depth': 38, 'n_estimators': 340}
0.891806 (0.009045) with: {'learning_rate': 0.07, 'max_depth': 20, 'n_estimators': 340}
0.891806 (0.010196) with: {'learning_rate': 0.07, 'max_depth': 22, 'n_estimators': 340}
0.891010 (0.014176) with: {'learning_rate': 0.07, 'max_depth': 24, 'n_estimators': 340}
0.890215 (0.012423) with: {'learning_rate': 0.07, 'max_depth': 26, 'n_estimators': 340}
0.890215 (0.011002) with: {'learning_rate': 0.07, 'max_depth': 28, 'n_estimators': 340}
0.890215 (0.013157) with: {'learning_rate': 0.07, 'max_depth': 30, 'n_estimators': 340}
0.891010 (0.016120) with: {'learning_rate': 0.07, 'max_depth': 32, 'n_estimators': 340}
0.889419 (0.014110) with: {'learning_rate': 0.07, 'max_depth': 34, 'n_estimators': 340}
0.890215 (0.011439) with: {'learning_rate': 0.07, 'max_depth': 36, 'n_estimators': 340}
0.893397 (0.011162) with: {'learning_rate': 0.07, 'max_depth': 38, 'n_estimators': 340}
0.891806 (0.009306) with: {'learning_

In [17]:
regr = GradientBoostingClassifier(max_depth=50, n_estimators=240, learning_rate=0.07)
regr.fit(x_train, y_train)
y_pred = regr.predict(x_test)

acc = accuracy_score(y_test, y_pred)

print("ACC: ", acc)

ACC:  0.9333333333333333


#### SVM CLASSIFICATION:

In [18]:
from sklearn.svm import SVC

params = {"kernel": ["linear", "sigmoid", "rbf", "poly"]}

clf = SVC()
grid = GridSearchCV(clf, params, cv=3, n_jobs=-1)

grid_result = grid.fit(x_train, y_train)

# Summary
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_["mean_test_score"]
stds = grid_result.cv_results_["std_test_score"]
params = grid_result.cv_results_["params"]

for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.972156 using {'kernel': 'rbf'}
0.968178 (0.008364) with: {'kernel': 'linear'}
0.945903 (0.004744) with: {'kernel': 'sigmoid'}
0.972156 (0.006230) with: {'kernel': 'rbf'}
0.893397 (0.018509) with: {'kernel': 'poly'}


In [19]:
regr = SVC(kernel="rbf")
regr.fit(x_train, y_train)
y_pred = regr.predict(x_test)

acc = accuracy_score(y_test, y_pred)

print("ACC: ", acc)

ACC:  0.9796296296296296


#### BEST MODEL:

In [20]:
regr = SVC(kernel="rbf")
regr.fit(x_train, y_train)
y_pred = regr.predict(x_test)

cm = confusion_matrix(y_test, y_pred)
print(cm)

[[53  0  0  0  0  0  0  0  0  0]
 [ 0 50  0  0  0  0  0  0  0  0]
 [ 0  0 47  0  0  0  0  0  0  0]
 [ 0  0  2 51  0  1  0  0  0  0]
 [ 0  0  0  0 60  0  0  0  0  0]
 [ 0  0  0  0  0 66  0  0  0  0]
 [ 0  0  0  0  0  0 53  0  0  0]
 [ 0  0  0  0  1  0  0 53  0  1]
 [ 0  0  1  1  0  0  0  0 41  0]
 [ 0  0  0  0  0  1  1  0  2 55]]
