Math 5750/6880: Mathematics of Data Science \
Project 3

# 1. Fashion-MNIST image classification using sklearn

In [1]:
from tensorflow.keras.datasets import fashion_mnist
from sklearn.preprocessing import StandardScaler

# Load Fashion-MNIST
# Classes (0-9): T-shirt/top, Trouser, Pullover, Dress, Coat, Sandal, Shirt, Sneaker, Bag, Ankle boot
(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()
X_train = X_train.reshape(len(X_train), -1)
X_test  = X_test.reshape(len(X_test), -1)

# Scale features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
[1m29515/29515[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
[1m26421880/26421880[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
[1m5148/5148[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz
[1m4422102/4422102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [4]:
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import time

#For this command I asked chatgpt to give me an explination of what each parameter of the function MLPClassifier does to better understand how to modify them

#Inital model with default parameters
mlp_baseline = MLPClassifier(hidden_layer_sizes=(128,),
                             activation='relu',
                             solver='adam',
                             learning_rate_init=0.001,
                             early_stopping=False,
                             alpha=0.0001,
                             max_iter=20,
                             random_state=42,
                             verbose=False)

start_time = time.time()
mlp_baseline.fit(X_train, y_train)
train_time = time.time() - start_time

y_pred = mlp_baseline.predict(X_test)

print("Baseline Accuracy:", accuracy_score(y_test, y_pred))
print(f"Baseline Training Time: {train_time:.2f} s")
print(confusion_matrix(y_test, y_pred))


Baseline Accuracy: 0.886
Baseline Training Time: 66.09 s
[[823   2  22  21   4   2 120   0   6   0]
 [  2 980   2   9   4   0   2   0   1   0]
 [ 19   0 847   9  63   1  59   0   2   0]
 [ 18   8  22 879  48   0  23   0   2   0]
 [  0   1 119  21 807   0  50   0   2   0]
 [  0   0   0   1   0 961   0  16   3  19]
 [102   2 106  20  70   0 691   0   9   0]
 [  0   0   0   0   0  19   0 954   0  27]
 [  7   0   7   4   5   6   8   3 959   1]
 [  1   0   0   0   0   9   1  30   0 959]]




In [5]:
#Funcation to iterate over different conditions with the baseline as default
def train_mlp(X_train, y_train, X_test, y_test,
              layers=(128,),           # hidden_layer_sizes
              activation='relu',           # activation
              solver='adam',           # solver
              learning=0.001,            # learning_rate_init
              estop=False,            # early_stopping
              alpha=0.0001,       # regularization strength
              max_iter=20,        # max iterations
              random_state=42,    # random seed
              verbose=False):     # training verbosity

    #Define Model
    model = MLPClassifier(hidden_layer_sizes=layers,
                          activation=activation,
                          solver=solver,
                          learning_rate_init=learning,
                          early_stopping=estop,
                          alpha=alpha,
                          max_iter=max_iter,
                          random_state=random_state,
                          verbose=verbose)

    #Train
    start_time = time.time()
    model.fit(X_train, y_train)
    train_time = time.time() - start_time

    #Evaluation Metrics
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    conf_mat = confusion_matrix(y_test, y_pred)

    print(f"Accuracy: {acc:.4f} | Training Time: {train_time:.2f} s")

    return model, acc, conf_mat

In [6]:
#Iterate over different conditions
params = [
    ((128,), 'relu', 'adam', 0.001, False),
    ((256,128), 'relu', 'adam', 0.001, False),
    ((256,128,64), 'tanh', 'adam', 0.001, False),
    ((128,), 'relu', 'sgd', 0.01, False),
    ((128,), 'relu', 'adam', 0.001, True),
]

results = []
for i, (a,b,c,d,e) in enumerate(params, 1):
    print(f"\n--- Training Model {i} ---")
    _, acc, _ = train_mlp(X_train, y_train, X_test, y_test, a, b, c, d, e)
    results.append((i, acc))


--- Training Model 1 ---




Accuracy: 0.8860 | Training Time: 61.59 s

--- Training Model 2 ---




Accuracy: 0.8901 | Training Time: 142.37 s

--- Training Model 3 ---




Accuracy: 0.8812 | Training Time: 186.63 s

--- Training Model 4 ---




Accuracy: 0.8850 | Training Time: 53.54 s

--- Training Model 5 ---
Accuracy: 0.8860 | Training Time: 54.34 s




# 3. Fashion-MNIST image classification  using pytorch

In [None]:
import numpy as np
from tensorflow.keras.datasets import fashion_mnist
import torch
from torch.utils.data import TensorDataset, DataLoader

# Load Fashion-MNIST
# Classes (0-9): T-shirt/top, Trouser, Pullover, Dress, Coat, Sandal, Shirt, Sneaker, Bag, Ankle boot
(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()

# scale to [0,1], add channel dimension -> (N, 1, 28, 28)
X_train = (X_train.astype("float32") / 255.0)[:, None, :, :]
X_test  = (X_test.astype("float32")  / 255.0)[:,  None, :, :]

y_train = y_train.astype(np.int64)
y_test  = y_test.astype(np.int64)

# train/val split: last 10k of train as validation
X_tr, X_val = X_train[:50000], X_train[50000:]
y_tr, y_val = y_train[:50000], y_train[50000:]

# wrap in PyTorch TensorDatasets and DataLoaders
train_ds = TensorDataset(torch.from_numpy(X_tr),  torch.from_numpy(y_tr))
val_ds   = TensorDataset(torch.from_numpy(X_val), torch.from_numpy(y_val))
test_ds  = TensorDataset(torch.from_numpy(X_test), torch.from_numpy(y_test))

train_loader = DataLoader(train_ds, batch_size=128, shuffle=True)
val_loader   = DataLoader(val_ds,   batch_size=256, shuffle=False)
test_loader  = DataLoader(test_ds,  batch_size=256, shuffle=False)

In [None]:
import torch.nn as nn
import torch.optim as optim

# In colab, you should ``change runtime type'' to GPU.
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# your code here