In [1]:
import time
import numpy as np
import tensorflow as tf
from keras.datasets import fashion_mnist #needed to use keras 2.3.0!
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import Flatten
from keras.constraints import maxnorm
from keras.optimizers import SGD
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.utils import np_utils
from keras import backend as K
K.set_image_data_format("channels_last")

import matplotlib.pyplot as plt
#from livelossplot import PlotLossesKeras
from PIL import Image
import datetime
import cv2

from scipy import stats
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler, RobustScaler, MinMaxScaler, Normalizer
from sklearn import tree
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.feature_selection import SelectPercentile, chi2, SelectFromModel
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV, RandomizedSearchCV
from sklearn.svm import SVC, LinearSVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.ensemble import ExtraTreesClassifier, RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
Using TensorFlow backend.


In [2]:
(X_train_orig, y_train_orig), (X_test_orig, y_test_orig) = fashion_mnist.load_data()

In [3]:
#This will just be done first for the training and then the test set
dataTrain=[]
dataTest=[]

for i in range(0,60000):
    imagePILTrain = Image.fromarray(X_train_orig[i])
    #All FashionMNIST images are greyscale
    featureVectorTrain=imagePILTrain.histogram()

    if (len(featureVectorTrain) != 256): # just a sanity check
        print("Unexpected length of feature vector for training set: " + str(len(featureVectorTrain)) + " at index: " + str(i))
    
    dataTrain.append(featureVectorTrain)

for i in range(0,10000):
    imagePILTest = Image.fromarray(X_test_orig[i])
    #All CIFAR10 images are in RGB, so no conversion needed
    featureVectorTest=imagePILTest.histogram()
    
    if (len(featureVectorTest) != 256):
        print( "Unexpected length of feature vector for test set: " + str(len(featureVectorTest)) + " at index: " + str(i))

    dataTest.append((featureVectorTest))    
    

    
flatten = lambda l: [item for sublist in l for item in sublist]

print("Extracting features using OpenCV" + " (" + str(datetime.datetime.now()) + ")")
dataOpenCVTrain_128=[]
dataOpenCVTrain_64=[]
dataOpenCVTrain_32=[]   

for i in range(0,60000):    
    imagePILTrain = Image.fromarray(X_train_orig[i])
    imageOpenCVTrain = np.array(imagePILTrain) 
    
    # First we do also features per channel, but this time, we aggregate them into a smaller number of bins
    # I.e. we do not have 256 values per channel, but less
    #because there are no channels we just do three additional bin numbers
    featuresOpenCV_128 = []
    featuresOpenCV_64 = []
    featuresOpenCV_32 = []
    
    bins = 128
    histOpenCV = cv2.calcHist([imageOpenCVTrain], [0], None, [bins], [0, 256])
    featuresOpenCV_128.extend(histOpenCV)
    featureVectorOpenCV_128 = flatten(featuresOpenCV_128) 
    dataOpenCVTrain_128.append(featureVectorOpenCV_128) 
    if (len(featureVectorOpenCV_128) != bins): # sanity check, in case we had a wrong number of bins...
        print( "Unexpected length of feature vector for training set (128): " + str(len(featureVectorOpenCV_1D)) + " at index: " + str(i))
       
    bins = 64
    histOpenCV = cv2.calcHist([imageOpenCVTrain], [0], None, [bins], [0, 256])
    featuresOpenCV_64.extend(histOpenCV)
    featureVectorOpenCV_64 = flatten(featuresOpenCV_64) 
    dataOpenCVTrain_64.append(featureVectorOpenCV_64) 
    if (len(featureVectorOpenCV_64) != bins): # sanity check, in case we had a wrong number of channels...
        print( "Unexpected length of feature vector for training set (64): " + str(len(featureVectorOpenCV_1D)) + " at index: " + str(i))    
    
    bins = 32
    histOpenCV = cv2.calcHist([imageOpenCVTrain], [0], None, [bins], [0, 256])
    featuresOpenCV_32.extend(histOpenCV)
    featureVectorOpenCV_32 = flatten(featuresOpenCV_32) 
    dataOpenCVTrain_32.append(featureVectorOpenCV_32)     
    if (len(featureVectorOpenCV_32) != bins): # sanity check, in case we had a wrong number of channels...
        print( "Unexpected length of feature vector for training set (32): " + str(len(featureVectorOpenCV_1D)) + " at index: " + str(i))

dataOpenCVTest_128=[]
dataOpenCVTest_64=[]
dataOpenCVTest_32=[]           
        
for i in range(0,10000):    
    imagePILTest = Image.fromarray(X_test_orig[i])
    imageOpenCVTest = np.array(imagePILTest) 
    
    # First we do also features per channel, but this time, we aggregate them into a smaller number of bins
    # I.e. we do not have 256 values per channel, but less
    #-> because there are no channels we just do three additional bin numbers
    
    featuresOpenCV_128 = []
    featuresOpenCV_64 = []
    featuresOpenCV_32 = []
    
    bins = 128
    histOpenCV = cv2.calcHist([imageOpenCVTest], [0], None, [bins], [0, 256])
    featuresOpenCV_128.extend(histOpenCV)
    featureVectorOpenCV_128 = flatten(featuresOpenCV_128) 
    dataOpenCVTest_128.append(featureVectorOpenCV_128) 
    if (len(featureVectorOpenCV_128) != bins): # sanity check, in case we had a wrong number of bins...
        print( "Unexpected length of feature vector for test set (128): " + str(len(featureVectorOpenCV_1D)) + " at index: " + str(i))
       
    bins = 64
    histOpenCV = cv2.calcHist([imageOpenCVTest], [0], None, [bins], [0, 256])
    featuresOpenCV_64.extend(histOpenCV)
    featureVectorOpenCV_64 = flatten(featuresOpenCV_64) 
    dataOpenCVTest_64.append(featureVectorOpenCV_64) 
    if (len(featureVectorOpenCV_64) != bins): # sanity check, in case we had a wrong number of channels...
        print( "Unexpected length of feature vector for test set (64): " + str(len(featureVectorOpenCV_1D)) + " at index: " + str(i))    
    
    bins = 32
    histOpenCV = cv2.calcHist([imageOpenCVTest], [0], None, [bins], [0, 256])
    featuresOpenCV_32.extend(histOpenCV)
    featureVectorOpenCV_32 = flatten(featuresOpenCV_32) 
    dataOpenCVTest_32.append(featureVectorOpenCV_32)     
    if (len(featureVectorOpenCV_32) != bins): # sanity check, in case we had a wrong number of channels...
        print( "Unexpected length of feature vector for test set (32): " + str(len(featureVectorOpenCV_1D)) + " at index: " + str(i))        
        

print( ".... done" + " (" + str(datetime.datetime.now()) + ")")

Extracting features using OpenCV (2021-01-14 22:44:26.865411)
.... done (2021-01-14 22:44:40.147417)


## Prepare Training and Test Data

In [4]:
# these are our feature sets; we will use each of them individually to train classifiers
trainingSets = [dataOpenCVTrain_128, dataOpenCVTrain_64, dataOpenCVTrain_32, dataTrain]
testSets = [dataOpenCVTest_128, dataOpenCVTest_64, dataOpenCVTest_32, dataTest]

## Models Prediction

**Types:**
- Nearest Neighbor
- Support Vector Machines
- Decision Trees
- Neural Networks

In [5]:
NN = 1

d = {}

d["KNearest Neighbors ("+ str(NN) + ")"] = KNeighborsClassifier(n_neighbors=NN)
d["SVM rbf"] = SVC()
d["Decision Tree"] = DecisionTreeClassifier()
d["Multi-layer Perceptron Classifier"] = MLPClassifier(max_iter=1000)


#y_train and y_test are column vectors instead of 1D arrays...
y_train = y_train_orig.flatten()
y_test = y_test_orig.flatten()

### Find best classifier and best training set

In [6]:


for index, X_train in enumerate(trainingSets):
    print('==================================')
    print('index = ', index)
    X_test = testSets[index]
    scoreList = []
    nameList = []

    for name, clf in d.items():
        start = time.time()
        print("\n--------------",name,"---------------\n")
        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)
        score = accuracy_score(y_test, y_pred)
        scoreList.append(accuracy_score(y_test, y_pred))
        nameList.append(name)

        end = time.time()
        
        print("- Accuracy: %0.3f" % score, "- Time: %0.4f" % (end - start), "seconds")
        #print("\n Number of mislabeled points out of a total %d points : %d \n\n"% (X_test.shape[0], (y_test != y_pred).sum()))
        print(confusion_matrix(y_test, y_pred))
        print(classification_report(y_test, y_pred))

    print("###############")
    ranking = sorted(zip(scoreList,nameList))[::-1]
    #print(sorted(zip(scoreList,nameList)))
    print("\nClassifiers from best to worst:")
    for i in range(0, len(ranking)):
        print(i+1, ') {0:35} score: {1}'.format(ranking[i][1], ranking[i][0]))

index =  0

-------------- KNearest Neighbors (1) ---------------

- Accuracy: 0.414 - Time: 64.0081 seconds
[[319  14 121  62 140  10 155   5 104  70]
 [  8 672  12 120  21  20  10  82  12  43]
 [ 93  10 294  27 211   1 242   1  89  32]
 [ 50 118  30 310  75  57  60  78  64 158]
 [ 68  12 117  51 373   6 201   4  89  79]
 [  8  12   1  53   5 685   8 149  26  53]
 [123  10 162  42 234   6 277   3  95  48]
 [  4  90   0  79   5 195   3 498  13 113]
 [ 97  13 101  67 149  32 122  21 210 188]
 [ 45  15  11 114  79  49  34  77  72 504]]
              precision    recall  f1-score   support

           0       0.39      0.32      0.35      1000
           1       0.70      0.67      0.68      1000
           2       0.35      0.29      0.32      1000
           3       0.34      0.31      0.32      1000
           4       0.29      0.37      0.33      1000
           5       0.65      0.69      0.66      1000
           6       0.25      0.28      0.26      1000
           7       0.54    

- Accuracy: 0.535 - Time: 220.2711 seconds
[[469  19  73  56 103   9 120   6 110  35]
 [  6 775   4  56  15  12  12  90  20  10]
 [ 74   4 405  17 163   2 212   2 106  15]
 [ 42 169  24 346  76  37  24 106  63 113]
 [ 55   4  94  38 548   3 130   6  67  55]
 [  2   4   0  12   3 797   2 143  16  21]
 [158  12 188  41 220   5 254   3  95  24]
 [  0  68   0  28   0 116   0 742   4  42]
 [ 86  12  82  45  92  53  77  24 432  97]
 [ 31  15   7  91  36  30   2 100 101 587]]
              precision    recall  f1-score   support

           0       0.51      0.47      0.49      1000
           1       0.72      0.78      0.74      1000
           2       0.46      0.41      0.43      1000
           3       0.47      0.35      0.40      1000
           4       0.44      0.55      0.49      1000
           5       0.75      0.80      0.77      1000
           6       0.30      0.25      0.28      1000
           7       0.61      0.74      0.67      1000
           8       0.43      0.43      

- Accuracy: 0.437 - Time: 358.8528 seconds
[[478   5  67  55 161   3 105   9  28  89]
 [ 18 715   4  54  10  58   8  39   3  91]
 [128   1 365  10 196   0 251   0  27  22]
 [ 69 214  11 256  40  52  28  50   7 273]
 [108   3 116  33 466   1 135   5  23 110]
 [  4 114   1  35   0 554   1 221   4  66]
 [206   5 177  24 196   4 295   3  24  66]
 [  1 228   0  20   1 133   0 544   2  71]
 [129  38  98  49 224  12  95  18  94 243]
 [ 43 111  10  91  91  17   5  18  15 599]]
              precision    recall  f1-score   support

           0       0.40      0.48      0.44      1000
           1       0.50      0.71      0.59      1000
           2       0.43      0.36      0.39      1000
           3       0.41      0.26      0.31      1000
           4       0.34      0.47      0.39      1000
           5       0.66      0.55      0.60      1000
           6       0.32      0.29      0.31      1000
           7       0.60      0.54      0.57      1000
           8       0.41      0.09      

### Best training set:

In [7]:
bestIndex = 3 #the different descriptors are ordered before such that the best one is at index 3
X_train = trainingSets[bestIndex]
X_test = testSets[bestIndex]

## Hyperparameter Optimization

### SVM

In [17]:
param_grid = {'C': [1, 10], 
              'gamma': [0.001, 0.01, 1]
             }


start = time.time()
clf_gridsearch = GridSearchCV(SVC(), param_grid, verbose = 0)
clf_gridsearch.fit(X_train, y_train)
print(clf_gridsearch.best_params_)
predictions = clf_gridsearch.predict(X_test)
score_gridsearch = accuracy_score(y_test, predictions)
end = time.time()
print("GridSearchCV - Accuracy: %0.3f" % score_gridsearch, "- Time: %0.2f" % (end - start), "seconds")

{'C': 1, 'gamma': 0.001}
GridSearchCV - Accuracy: 0.565 - Time: 114183.15 seconds
Previous Accuracy:  0.4964

[[340   5 107  78 117   7 198   5 109  34]
 [  8 673   8 114  13  56  10  40  14  64]
 [ 61   2 482   9 139   5 200   1  86  15]
 [ 45  97  26 425  59  55  45  35  48 165]
 [ 57   2 109  45 475   1 187   2  66  56]
 [  3  61   2  47   2 678   1 142  14  50]
 [100   3 204  54 201   1 341   1  68  27]
 [  0 154   0  36   0 191   2 533   6  78]
 [ 70  23  94  60  88  22  88  17 375 163]
 [ 20  57  13  93  39  22  10  25  79 642]]
              precision    recall  f1-score   support

           0       0.48      0.34      0.40      1000
           1       0.62      0.67      0.65      1000
           2       0.46      0.48      0.47      1000
           3       0.44      0.42      0.43      1000
           4       0.42      0.47      0.45      1000
           5       0.65      0.68      0.67      1000
           6       0.32      0.34      0.33      1000
           7       0.67   

### Decision Tree

In [14]:
param_grid = {'criterion' : ['gini', 'entropy'],
              'max_depth': [10, 50, 100, None],
              'max_features' : ['auto', 'sqrt', 'log2', None],
              'splitter' : ['best', 'random'],
              'min_samples_leaf': [1, 2, 4]}

start = time.time()
clf_gridsearch = GridSearchCV(DecisionTreeClassifier(), param_grid, verbose = 0)
clf_gridsearch.fit(X_train, y_train)
print(clf_gridsearch.best_params_)
predictions = clf_gridsearch.predict(X_test)
score_gridsearch = accuracy_score(y_test, predictions)
end = time.time()
print("GridSearchCV - Accuracy: %0.3f" % score_gridsearch, "- Time: %0.2f" % (end - start), "seconds")

{'criterion': 'gini', 'max_depth': 10, 'max_features': None, 'min_samples_leaf': 4, 'splitter': 'best'}
GridSearchCV - Accuracy: 0.464 - Time: 3132.95 seconds
Previous Accuracy:  0.3863

[[340   5 107  78 117   7 198   5 109  34]
 [  8 673   8 114  13  56  10  40  14  64]
 [ 61   2 482   9 139   5 200   1  86  15]
 [ 45  97  26 425  59  55  45  35  48 165]
 [ 57   2 109  45 475   1 187   2  66  56]
 [  3  61   2  47   2 678   1 142  14  50]
 [100   3 204  54 201   1 341   1  68  27]
 [  0 154   0  36   0 191   2 533   6  78]
 [ 70  23  94  60  88  22  88  17 375 163]
 [ 20  57  13  93  39  22  10  25  79 642]]
              precision    recall  f1-score   support

           0       0.48      0.34      0.40      1000
           1       0.62      0.67      0.65      1000
           2       0.46      0.48      0.47      1000
           3       0.44      0.42      0.43      1000
           4       0.42      0.47      0.45      1000
           5       0.65      0.68      0.67      1000
   

### KNN

In [15]:
param_grid = {'n_neighbors': [1,5,10,50,100],
              'weights': ['uniform', 'distance'],
              'p': [1, 2]}

start = time.time()
clf_gridsearch = GridSearchCV(KNeighborsClassifier(), param_grid, verbose = 0)
clf_gridsearch.fit(X_train, y_train)
print(clf_gridsearch.best_params_)
predictions = clf_gridsearch.predict(X_test)
score_gridsearch = accuracy_score(y_test, predictions)
end = time.time()
print("GridSearchCV - Accuracy: %0.3f" % score_gridsearch, "- Time: %0.2f" % (end - start), "seconds")

{'n_neighbors': 50, 'p': 1, 'weights': 'distance'}
GridSearchCV - Accuracy: 0.513 - Time: 21774.02 seconds
Previous Accuracy:  0.4366

[[340   5 107  78 117   7 198   5 109  34]
 [  8 673   8 114  13  56  10  40  14  64]
 [ 61   2 482   9 139   5 200   1  86  15]
 [ 45  97  26 425  59  55  45  35  48 165]
 [ 57   2 109  45 475   1 187   2  66  56]
 [  3  61   2  47   2 678   1 142  14  50]
 [100   3 204  54 201   1 341   1  68  27]
 [  0 154   0  36   0 191   2 533   6  78]
 [ 70  23  94  60  88  22  88  17 375 163]
 [ 20  57  13  93  39  22  10  25  79 642]]
              precision    recall  f1-score   support

           0       0.48      0.34      0.40      1000
           1       0.62      0.67      0.65      1000
           2       0.46      0.48      0.47      1000
           3       0.44      0.42      0.43      1000
           4       0.42      0.47      0.45      1000
           5       0.65      0.68      0.67      1000
           6       0.32      0.34      0.33      1000
 

### Multi-Layer-Perceptron

In [16]:
param_grid = {'hidden_layer_sizes': [(10,30,10),(20,)],
              'activation': ['tanh', 'relu'],
              'solver': ['sgd', 'adam'],
              'alpha': [0.0001, 0.05],
              'learning_rate': ['constant','adaptive']}

start = time.time()
clf_gridsearch = GridSearchCV(MLPClassifier(), param_grid, verbose = 0)
clf_gridsearch.fit(X_train, y_train)
print(clf_gridsearch.best_params_)
predictions = clf_gridsearch.predict(X_test)
score_gridsearch = accuracy_score(y_test, predictions)
end = time.time()
print("GridSearchCV - Accuracy: %0.3f" % score_gridsearch, "- Time: %0.2f" % (end - start), "seconds")





{'activation': 'relu', 'alpha': 0.05, 'hidden_layer_sizes': (10, 30, 10), 'learning_rate': 'adaptive', 'solver': 'adam'}
GridSearchCV - Accuracy: 0.557 - Time: 21834.19 seconds
Previous Accuracy:  0.4366

[[340   5 107  78 117   7 198   5 109  34]
 [  8 673   8 114  13  56  10  40  14  64]
 [ 61   2 482   9 139   5 200   1  86  15]
 [ 45  97  26 425  59  55  45  35  48 165]
 [ 57   2 109  45 475   1 187   2  66  56]
 [  3  61   2  47   2 678   1 142  14  50]
 [100   3 204  54 201   1 341   1  68  27]
 [  0 154   0  36   0 191   2 533   6  78]
 [ 70  23  94  60  88  22  88  17 375 163]
 [ 20  57  13  93  39  22  10  25  79 642]]
              precision    recall  f1-score   support

           0       0.48      0.34      0.40      1000
           1       0.62      0.67      0.65      1000
           2       0.46      0.48      0.47      1000
           3       0.44      0.42      0.43      1000
           4       0.42      0.47      0.45      1000
           5       0.65      0.68      

### Runtime of the best parameter version of each classifier

### SVM

In [19]:
start = time.time()
clf = SVC(C = 1, gamma = 0.001)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
end = time.time()
print("total runtime = ", end - start)
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

total runtime =  3053.6037588119507
[[603   5  74  60  69   5  78   2  59  45]
 [ 22 792  14  87   7   3   5  49  12   9]
 [123   0 526  15 137   0 162   0  26  11]
 [ 65 113  34 444  49  21  34  78  31 131]
 [ 52   2 130  64 517   2 117   3  54  59]
 [  6   3   1  21   1 777   3 133  19  36]
 [212   3 227  43 171   3 242   4  57  38]
 [  1  49   0  50   0  66   2 747   0  85]
 [135   7 116  68  89  27  87  16 320 135]
 [ 36   8   4  94  29  21  12  54  57 685]]
              precision    recall  f1-score   support

           0       0.48      0.60      0.53      1000
           1       0.81      0.79      0.80      1000
           2       0.47      0.53      0.49      1000
           3       0.47      0.44      0.46      1000
           4       0.48      0.52      0.50      1000
           5       0.84      0.78      0.81      1000
           6       0.33      0.24      0.28      1000
           7       0.69      0.75      0.72      1000
           8       0.50      0.32      0.39   

### Decision Tree

In [20]:
start = time.time()
clf = DecisionTreeClassifier(criterion='gini', max_depth=10, max_features=None, min_samples_leaf=4, splitter='best')
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
end = time.time()
print("total runtime = ", end - start)
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

total runtime =  5.649099826812744
[[487  21  81  65  81   5  90   1  70  99]
 [ 16 744   8 107   5   3  18  75   9  15]
 [ 82   1 437  14 160   0 218   0  66  22]
 [ 59 164  38 341  52  27  17  95  30 177]
 [106   5 245  56 311   5 130   3  71  68]
 [ 16   7   0  42   0 675   1 178  14  67]
 [193  10 237  58 124   5 275   2  58  38]
 [  2  56   0  95   1  81   0 652   4 109]
 [138  13 137  80 111  37  97  19 200 168]
 [ 94   2  16 140  46  57   5  59  60 521]]
              precision    recall  f1-score   support

           0       0.41      0.49      0.44      1000
           1       0.73      0.74      0.74      1000
           2       0.36      0.44      0.40      1000
           3       0.34      0.34      0.34      1000
           4       0.35      0.31      0.33      1000
           5       0.75      0.68      0.71      1000
           6       0.32      0.28      0.30      1000
           7       0.60      0.65      0.63      1000
           8       0.34      0.20      0.25    

### KNN

In [21]:
start = time.time()
clf = KNeighborsClassifier(n_neighbors=50, p=1, weights='distance')
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
end = time.time()
print("total runtime = ", end - start)
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

total runtime =  347.9531605243683
[[421   3  44  86 202   9 123   8  36  68]
 [  7 786   3  79  19  21  10  38   3  34]
 [ 50   0 292  14 323   2 279   0  15  25]
 [ 25 132  11 370  89  59  24  78   8 204]
 [ 32   2  43  35 670   5 111   4  11  87]
 [  3  14   2  14   1 763   3 125   0  75]
 [125   7 115  50 306   4 316   6  18  53]
 [  0  88   0  23   0 140   1 645   1 102]
 [ 59   6  60  63 233  30 124  19 137 269]
 [ 13  21   1  72  75  22  11  48  10 727]]
              precision    recall  f1-score   support

           0       0.57      0.42      0.49      1000
           1       0.74      0.79      0.76      1000
           2       0.51      0.29      0.37      1000
           3       0.46      0.37      0.41      1000
           4       0.35      0.67      0.46      1000
           5       0.72      0.76      0.74      1000
           6       0.32      0.32      0.32      1000
           7       0.66      0.65      0.65      1000
           8       0.57      0.14      0.22    

### Multi-Layer-Perceptron

In [23]:
start = time.time()
clf = MLPClassifier(activation='relu', alpha=0.05, hidden_layer_sizes=(10, 30, 10), learning_rate='adaptive', solver='adam')
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
end = time.time()
print("total runtime = ", end - start)
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))



total runtime =  306.36153626441956
[[591  17  61  59  69   9  50   3  99  42]
 [ 10 803  12  57   6   0   3  64  38   7]
 [ 61   0 581  10 136   0 117   1  76  18]
 [ 38 148  41 373  61  17  12 100  65 145]
 [ 80   4 209  45 454   3  86   2  60  57]
 [  7   2   0  12   0 750   0 170  21  38]
 [209   3 346  44 155   0 114   3  87  39]
 [  1  55   0  36   0  48   0 786   1  73]
 [103  10 125  54  67  33  41  21 415 131]
 [ 49   6   4  76  33  14   1  79  51 687]]
              precision    recall  f1-score   support

           0       0.51      0.59      0.55      1000
           1       0.77      0.80      0.78      1000
           2       0.42      0.58      0.49      1000
           3       0.49      0.37      0.42      1000
           4       0.46      0.45      0.46      1000
           5       0.86      0.75      0.80      1000
           6       0.27      0.11      0.16      1000
           7       0.64      0.79      0.71      1000
           8       0.45      0.41      0.43   