In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import accuracy_score, classification_report

In [2]:
df_train = pd.read_csv('fashion-mnist_train.csv')
df_test = pd.read_csv('fashion-mnist_test.csv')

In [3]:
X_train = df_train.drop(columns='label').to_numpy()
y_train = df_train['label'].to_numpy()
X_test = df_test.drop(columns='label').to_numpy()
y_test = df_test['label'].to_numpy()

gbc_test = GradientBoostingClassifier(random_state=0)

param_grid = {
    'loss': ['deviance'],
    'n_estimators': [200],
    'max_features': ['auto', 'sqrt', 'log2'],
    'max_depth': [8],
    'criterion': ['friedman_mse', 'mae'],
    'learning_rate': [1.0]
}
test = GridSearchCV(gbc_test, param_grid, cv=3, n_jobs=-1, verbose=2)
test.fit(X_train, y_train)
test.best_params_

In [6]:
classes = ['T-shirt/top', 'Trouser', 'Pullover',
           'Dress', 'Coat', 'Sandal', 'Shirt',
           'Sneaker', 'Bag', 'Ankle boot']
gbc = GradientBoostingClassifier(loss='deviance', n_estimators=100, max_features='auto', max_depth=8, criterion='friedman_mse', learning_rate=1.0, random_state=0)
pipe = make_pipeline(StandardScaler(), gbc)
y_pred_model = cross_val_predict(pipe, X_train, y_train,
                                 cv=8, n_jobs=-1, verbose=2)
classification_report(y_train, y_pred_model, digits=4, target_names=classes)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   3 out of   8 | elapsed: 431.8min remaining: 719.6min
[Parallel(n_jobs=-1)]: Done   8 out of   8 | elapsed: 433.3min remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   8 out of   8 | elapsed: 433.3min finished


'              precision    recall  f1-score   support\n\n T-shirt/top     0.7490    0.7543    0.7516      6000\n     Trouser     0.9569    0.9277    0.9420      6000\n    Pullover     0.6961    0.7113    0.7037      6000\n       Dress     0.8145    0.8117    0.8131      6000\n        Coat     0.7018    0.7112    0.7065      6000\n      Sandal     0.8820    0.9053    0.8935      6000\n       Shirt     0.5767    0.5950    0.5857      6000\n     Sneaker     0.8879    0.8868    0.8874      6000\n         Bag     0.9038    0.8550    0.8787      6000\n  Ankle boot     0.8923    0.8822    0.8872      6000\n\n    accuracy                         0.8041     60000\n   macro avg     0.8061    0.8040    0.8049     60000\nweighted avg     0.8061    0.8041    0.8049     60000\n'

In [None]:
pipe.fit(X_train, y_train)
y_pred = pipe.predict(X_test)
print(classification_report(y_test, y_pred, digits=4, target_names=classes))