In [1]:
import numpy as np
np.random.seed(42)
import matplotlib.pyplot as plt
from sklearn.datasets import load_digits
from sklearn.metrics import *
from sklearn.model_selection import *
from sklearn.preprocessing import *

In [2]:
mnist = load_digits()
x, y = mnist.data, mnist.target
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=42, test_size=0.3)

In [3]:
scaler = StandardScaler()
scaler.fit(x_train)
x_train = scaler.transform(x_train)
x_test = scaler.transform(x_test)

#### KNN CLASSIFICATION

In [4]:
from sklearn.neighbors import KNeighborsClassifier

params = {"n_neighbors": [i for i in range(2, 22, 2)]}

clf = KNeighborsClassifier()
grid = GridSearchCV(clf, params, cv=3)

grid_result = grid.fit(x_train, y_train)

# Summary
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_["mean_test_score"]
stds = grid_result.cv_results_["std_test_score"]
params = grid_result.cv_results_["params"]

for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.961814 using {'n_neighbors': 6}
0.954654 (0.008149) with: {'n_neighbors': 2}
0.957836 (0.004777) with: {'n_neighbors': 4}
0.961814 (0.001815) with: {'n_neighbors': 6}
0.958632 (0.004794) with: {'n_neighbors': 8}
0.954654 (0.005104) with: {'n_neighbors': 10}
0.949881 (0.001558) with: {'n_neighbors': 12}
0.949085 (0.009269) with: {'n_neighbors': 14}
0.946698 (0.009216) with: {'n_neighbors': 16}
0.940334 (0.008104) with: {'n_neighbors': 18}
0.939539 (0.009658) with: {'n_neighbors': 20}


In [5]:
# FINAL MODEL
clf = KNeighborsClassifier(n_neighbors=6)
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
acc = accuracy_score(y_test, y_pred)

print("Acc: ", acc)

Acc:  0.9722222222222222


#### RANDOM FOREST CLASSIFICATION

In [6]:
from sklearn.ensemble import RandomForestClassifier

params = {"n_estimators": [240, 300, 340],
         "max_depth": [i for i in range(50, 65, 2)]}

clf = RandomForestClassifier()
grid = GridSearchCV(clf, params, cv=3)

grid_result = grid.fit(x_train, y_train)

# Summary
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_["mean_test_score"]
stds = grid_result.cv_results_["std_test_score"]
params = grid_result.cv_results_["params"]

for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))


Best: 0.972951 using {'max_depth': 56, 'n_estimators': 240}
0.964996 (0.002712) with: {'max_depth': 50, 'n_estimators': 240}
0.968974 (0.006558) with: {'max_depth': 50, 'n_estimators': 300}
0.970565 (0.003843) with: {'max_depth': 50, 'n_estimators': 340}
0.968178 (0.000916) with: {'max_depth': 52, 'n_estimators': 240}
0.969769 (0.000931) with: {'max_depth': 52, 'n_estimators': 300}
0.968974 (0.003656) with: {'max_depth': 52, 'n_estimators': 340}
0.968178 (0.001151) with: {'max_depth': 54, 'n_estimators': 240}
0.969769 (0.007899) with: {'max_depth': 54, 'n_estimators': 300}
0.964996 (0.004636) with: {'max_depth': 54, 'n_estimators': 340}
0.972951 (0.002767) with: {'max_depth': 56, 'n_estimators': 240}
0.962609 (0.004748) with: {'max_depth': 56, 'n_estimators': 300}
0.969769 (0.003825) with: {'max_depth': 56, 'n_estimators': 340}
0.966587 (0.001836) with: {'max_depth': 58, 'n_estimators': 240}
0.970565 (0.001332) with: {'max_depth': 58, 'n_estimators': 300}
0.966587 (0.003637) with: {'ma

In [7]:
clf = RandomForestClassifier(n_estimators=240, max_depth=56)
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
acc = accuracy_score(y_test, y_pred)

print("Acc: ", acc)

Acc:  0.9740740740740741


#### GRADIENT BOOSTING CLASSIFICATION

In [8]:
from sklearn.ensemble import GradientBoostingClassifier

params = {"learning_rate": [0.06, 0.07],
         "n_estimators": [180, 220, 240],
         "max_depth": [i for i in range(50, 65, 2)]}

clf = GradientBoostingClassifier()
grid = GridSearchCV(clf, params, cv=3)

grid_result = grid.fit(x_train, y_train)

# Summary
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_["mean_test_score"]
stds = grid_result.cv_results_["std_test_score"]
params = grid_result.cv_results_["params"]

for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.894988 using {'learning_rate': 0.07, 'max_depth': 60, 'n_estimators': 240}
0.884646 (0.011371) with: {'learning_rate': 0.06, 'max_depth': 50, 'n_estimators': 180}
0.887033 (0.013291) with: {'learning_rate': 0.06, 'max_depth': 50, 'n_estimators': 220}
0.891010 (0.010300) with: {'learning_rate': 0.06, 'max_depth': 50, 'n_estimators': 240}
0.885442 (0.014351) with: {'learning_rate': 0.06, 'max_depth': 52, 'n_estimators': 180}
0.890215 (0.014338) with: {'learning_rate': 0.06, 'max_depth': 52, 'n_estimators': 220}
0.886237 (0.010314) with: {'learning_rate': 0.06, 'max_depth': 52, 'n_estimators': 240}
0.887828 (0.012429) with: {'learning_rate': 0.06, 'max_depth': 54, 'n_estimators': 180}
0.887033 (0.010858) with: {'learning_rate': 0.06, 'max_depth': 54, 'n_estimators': 220}
0.888624 (0.011289) with: {'learning_rate': 0.06, 'max_depth': 54, 'n_estimators': 240}
0.890215 (0.013804) with: {'learning_rate': 0.06, 'max_depth': 56, 'n_estimators': 180}
0.889419 (0.010205) with: {'learning_

In [9]:
clf = GradientBoostingClassifier(n_estimators=240, max_depth=60, learning_rate=0.06)
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
acc = accuracy_score(y_test, y_pred)

print("Acc: ", acc)

Acc:  0.9407407407407408


#### SVM CLASSIFICATION:

In [10]:
from sklearn.svm import SVC

params = {"kernel": ["linear", "rbf", "sigmoid"]}

clf = SVC()
grid = GridSearchCV(clf, params, cv=3)

grid_result = grid.fit(x_train, y_train)

# Summary
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_["mean_test_score"]
stds = grid_result.cv_results_["std_test_score"]
params = grid_result.cv_results_["params"]

for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.972156 using {'kernel': 'rbf'}
0.968178 (0.008364) with: {'kernel': 'linear'}
0.972156 (0.006230) with: {'kernel': 'rbf'}
0.945903 (0.004744) with: {'kernel': 'sigmoid'}


In [11]:
clf = SVC(kernel="rbf")
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
acc = accuracy_score(y_test, y_pred)

print("Acc: ", acc)

Acc:  0.9796296296296296


#### BEST MODEL: