In [3]:
# Import all libraries needed

import collections
from sklearn import preprocessing
from sklearn.decomposition import PCA
import numpy as np
import pandas as pd
import time
from sklearn import metrics
from IPython.display import Markdown, display
import random
import matplotlib.pyplot as plt
from keras.datasets import fashion_mnist

In [4]:
# Show Shapes

(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
print("Shape of x_train: {}".format(x_train.shape))
print("Shape of y_train: {}".format(y_train.shape))
print()
print("Shape of x_test: {}".format(x_test.shape))
print("Shape of y_test: {}".format(y_test.shape))

Shape of x_train: (60000, 28, 28)
Shape of y_train: (60000,)

Shape of x_test: (10000, 28, 28)
Shape of y_test: (10000,)


In [5]:
# Change the Shapes and show it
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255.0
x_test /= 255.0
x_train = x_train.reshape(x_train.shape[0], x_train.shape[1] * x_train.shape[2])
x_test = x_test.reshape(x_test.shape[0], x_test.shape[1] * x_test.shape[2])

print(x_train.shape)
print(x_test.shape)

(60000, 784)
(10000, 784)


In [6]:
# Import specified libraries

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import classification_report

In [16]:
# Define parameter grid (ranges)

clf = RandomForestClassifier(random_state=42)


param_grid = {
    'criterion' : ['gini', 'entropy'],
    'n_estimators': [400],
    'max_depth': [200],
    'min_samples_leaf': [1],
    'min_samples_split': [10],
    'max_features': ['auto', 'sqrt'],
}

In [17]:
# Define Randomized Search

rand_search = RandomizedSearchCV(
    clf,
    param_grid,
    cv=2,
    verbose=1,   
    scoring='accuracy',
    random_state=42,
    n_jobs=-1,
)

rand_search.fit(x_train, y_train)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.


Fitting 2 folds for each of 4 candidates, totalling 8 fits


[Parallel(n_jobs=-1)]: Done   3 out of   8 | elapsed:  3.9min remaining:  6.5min
[Parallel(n_jobs=-1)]: Done   8 out of   8 | elapsed:  4.7min finished


RandomizedSearchCV(cv=2, estimator=RandomForestClassifier(random_state=42),
                   n_jobs=-1,
                   param_distributions={'criterion': ['gini', 'entropy'],
                                        'max_depth': [200],
                                        'max_features': ['auto', 'sqrt'],
                                        'min_samples_leaf': [1],
                                        'min_samples_split': [10],
                                        'n_estimators': [400]},
                   random_state=42, scoring='accuracy', verbose=1)

In [18]:
# Results

clf = RandomForestClassifier(**rand_search.best_params_, random_state=42)
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
print(classification_report(y_test, y_pred, digits=4))
print('Best Results:\n', rand_search.best_params_)

              precision    recall  f1-score   support

           0     0.8217    0.8620    0.8414      1000
           1     0.9938    0.9600    0.9766      1000
           2     0.7654    0.7930    0.7790      1000
           3     0.8779    0.9130    0.8951      1000
           4     0.7692    0.8230    0.7952      1000
           5     0.9796    0.9600    0.9697      1000
           6     0.7206    0.5880    0.6476      1000
           7     0.9271    0.9540    0.9404      1000
           8     0.9549    0.9740    0.9644      1000
           9     0.9517    0.9460    0.9488      1000

    accuracy                         0.8773     10000
   macro avg     0.8762    0.8773    0.8758     10000
weighted avg     0.8762    0.8773    0.8758     10000

Best Results:
 {'n_estimators': 400, 'min_samples_split': 10, 'min_samples_leaf': 1, 'max_features': 'auto', 'max_depth': 200, 'criterion': 'entropy'}
