Math 5750/6880: Mathematics of Data Science \
Project 3

# 1. Fashion-MNIST image classification using sklearn

In [1]:
from tensorflow.keras.datasets import fashion_mnist
from sklearn.preprocessing import StandardScaler

# Load Fashion-MNIST
# Classes (0-9): T-shirt/top, Trouser, Pullover, Dress, Coat, Sandal, Shirt, Sneaker, Bag, Ankle boot
(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()
X_train = X_train.reshape(len(X_train), -1)
X_test  = X_test.reshape(len(X_test), -1)

# Scale features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [2]:
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# your code here
import time

# Define a baseline MLP model
mlp = MLPClassifier(hidden_layer_sizes=(100,), activation='relu',
                    solver='adam', max_iter=20, random_state=42)

start = time.time()
mlp.fit(X_train, y_train)
end = time.time()

# Predictions
y_pred = mlp.predict(X_test)

# Evaluation
print(f"Accuracy: {accuracy_score(y_test, y_pred):.4f}")
print(f"Training time: {end - start:.2f} seconds")
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

mlp2 = MLPClassifier(hidden_layer_sizes=(256, 128),
                     activation='relu',
                     solver='adam',
                     learning_rate_init=0.0005,
                     early_stopping=True,
                     max_iter=30,
                     random_state=42)

start = time.time()
mlp2.fit(X_train, y_train)
end = time.time()

y_pred2 = mlp2.predict(X_test)

print(f"Accuracy: {accuracy_score(y_test, y_pred2):.4f}")
print(f"Training time: {end - start:.2f} seconds")



Accuracy: 0.8804
Training time: 50.94 seconds
Confusion Matrix:
 [[768   3  17  28   7   0 168   0   9   0]
 [  2 973   1  16   4   0   3   0   1   0]
 [ 13   1 781  14 116   1  74   0   0   0]
 [ 17  10  12 884  38   0  36   0   3   0]
 [  1   1  61  23 862   1  49   0   2   0]
 [  0   0   0   1   0 955   0  27   1  16]
 [ 79   2  80  27  74   0 729   0   9   0]
 [  0   0   0   0   0  16   0 972   0  12]
 [  9   0   6   3   7   5   8   4 957   1]
 [  0   0   1   0   0  10   1  63   2 923]]
Classification Report:
               precision    recall  f1-score   support

           0       0.86      0.77      0.81      1000
           1       0.98      0.97      0.98      1000
           2       0.81      0.78      0.80      1000
           3       0.89      0.88      0.89      1000
           4       0.78      0.86      0.82      1000
           5       0.97      0.95      0.96      1000
           6       0.68      0.73      0.71      1000
           7       0.91      0.97      0.94    

# 3. Fashion-MNIST image classification  using pytorch

In [3]:
import numpy as np
from tensorflow.keras.datasets import fashion_mnist
import torch
from torch.utils.data import TensorDataset, DataLoader

# Load Fashion-MNIST
# Classes (0-9): T-shirt/top, Trouser, Pullover, Dress, Coat, Sandal, Shirt, Sneaker, Bag, Ankle boot
(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()

# scale to [0,1], add channel dimension -> (N, 1, 28, 28)
X_train = (X_train.astype("float32") / 255.0)[:, None, :, :]
X_test  = (X_test.astype("float32")  / 255.0)[:,  None, :, :]

y_train = y_train.astype(np.int64)
y_test  = y_test.astype(np.int64)

# train/val split: last 10k of train as validation
X_tr, X_val = X_train[:50000], X_train[50000:]
y_tr, y_val = y_train[:50000], y_train[50000:]

# wrap in PyTorch TensorDatasets and DataLoaders
train_ds = TensorDataset(torch.from_numpy(X_tr),  torch.from_numpy(y_tr))
val_ds   = TensorDataset(torch.from_numpy(X_val), torch.from_numpy(y_val))
test_ds  = TensorDataset(torch.from_numpy(X_test), torch.from_numpy(y_test))

train_loader = DataLoader(train_ds, batch_size=128, shuffle=True)
val_loader   = DataLoader(val_ds,   batch_size=256, shuffle=False)
test_loader  = DataLoader(test_ds,  batch_size=256, shuffle=False)

In [5]:
import torch.nn as nn
import torch.optim as optim

# In colab, you should ``change runtime type'' to GPU.
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# your code here
# ----- 1. Define a simple CNN model -----
class SimpleCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv = nn.Conv2d(1, 16, 3, padding=1)  # 1 input channel â†’ 16 filters
        self.pool = nn.MaxPool2d(2, 2)              # reduce image size by half
        self.fc = nn.Linear(16 * 14 * 14, 10)       # fully connected layer (10 classes)

    def forward(self, x):
        x = torch.relu(self.conv(x))
        x = self.pool(x)
        x = x.view(-1, 16 * 14 * 14)  # flatten
        x = self.fc(x)
        return x

# ----- 2. Create model, loss function, and optimizer -----
model = SimpleCNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# ----- 3. Train the model -----
for epoch in range(5):  # small number of epochs for simplicity
    model.train()
    total_loss = 0
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {total_loss / len(train_loader):.4f}")

# ----- 4. Evaluate on test set -----
model.eval()
correct, total = 0, 0
with torch.no_grad():
    for X_test, y_test in test_loader:
        X_test, y_test = X_test.to(device), y_test.to(device)
        outputs = model(X_test)
        _, predicted = torch.max(outputs, 1)
        total += y_test.size(0)
        correct += (predicted == y_test).sum().item()

print(f"Test Accuracy: {100 * correct / total:.2f}%")


Using device: cpu
Epoch 1, Loss: 0.6025
Epoch 2, Loss: 0.3716
Epoch 3, Loss: 0.3308
Epoch 4, Loss: 0.3098
Epoch 5, Loss: 0.2923
Test Accuracy: 88.25%
