In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import accuracy_score, classification_report

In [2]:
df_train = pd.read_csv('fashion-mnist_train.csv')
df_test = pd.read_csv('fashion-mnist_test.csv')

In [3]:
X_train = df_train.drop(columns='label').to_numpy()
y_train = df_train['label'].to_numpy()
X_test = df_test.drop(columns='label').to_numpy()
y_test = df_test['label'].to_numpy()

In [4]:
classes = ['T-shirt/top', 'Trouser', 'Pullover',
           'Dress', 'Coat', 'Sandal', 'Shirt',
           'Sneaker', 'Bag', 'Ankle boot']

forest_test = RandomForestClassifier(random_state=0)

param_grid = {
    'n_estimators': [200],
    'max_features': ['auto', 'sqrt', 'log2'],
    'max_depth': [8],
    'criterion': ['gini', 'entropy']
}
test = GridSearchCV(forest_test, param_grid, cv=5, n_jobs=-1, verbose=2)
test.fit(X_train, y_train)
test.best_params_

In [5]:
forest = RandomForestClassifier(n_estimators=300, max_features='auto', criterion='entropy', max_depth=15, random_state=0)
pipe = make_pipeline(StandardScaler(), forest)
y_pred_model = cross_val_predict(pipe, X_train, y_train,
                                 cv=5, n_jobs=-1, verbose=2)
print(classification_report(y_train, y_pred_model, digits=4, target_names=classes))

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 out of   5 | elapsed:  2.7min remaining:  4.1min
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:  2.7min remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:  2.7min finished


              precision    recall  f1-score   support

 T-shirt/top     0.8151    0.8697    0.8415      6000
     Trouser     0.9940    0.9637    0.9786      6000
    Pullover     0.7807    0.8068    0.7935      6000
       Dress     0.8757    0.9182    0.8964      6000
        Coat     0.7609    0.8348    0.7962      6000
      Sandal     0.9746    0.9520    0.9632      6000
       Shirt     0.7505    0.5770    0.6524      6000
     Sneaker     0.9292    0.9390    0.9341      6000
         Bag     0.9573    0.9725    0.9649      6000
  Ankle boot     0.9411    0.9527    0.9468      6000

    accuracy                         0.8786     60000
   macro avg     0.8779    0.8786    0.8768     60000
weighted avg     0.8779    0.8786    0.8768     60000



pipe.fit(X_train, y_train)
y_pred = pipe.predict(X_test)
print(classification_report(y_test, y_pred, digits=4, target_names=classes))