In [61]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from dotenv import load_dotenv
import os

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

import pytorch_lightning as pl
from pytorch_lightning.loggers import WandbLogger
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from pytorch_lightning.loggers import WandbLogger

import wandb
import optuna
from optuna.integration.wandb import WeightsAndBiasesCallback
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import train_test_split

wandb.login(key=os.getenv("WANDB_API_KEY"))

True

# Dataset

In [62]:
class FashionMNISTDataset(Dataset):
    def __init__(self, X, y, transform=None):
        self.X = X
        self.y = y
        self.transform = transform
    
    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        image = self.X.iloc[idx].values.reshape(28, 28).astype(np.float32) / 255.0
        image = torch.from_numpy(image)
        
        label = torch.tensor(self.y.iloc[idx], dtype=torch.long)

        if self.transform is not None:
            image = self.transform(image)

        return image, label

# Data Module

In [63]:
class FashionMNISTDataModule(pl.LightningDataModule):
    def __init__(self, data_dir='data/fashion_mnist', batch_size=64, val_split=0.2, transform=None):
        super().__init__()
        self.data_dir = data_dir
        self.batch_size = batch_size
        self.val_split = val_split
        self.transform = transform

    def setup(self, stage=None):
        train_df = pd.read_csv('data/fashion_mnist/fashion-mnist_train.csv')
        test_df = pd.read_csv('data/fashion_mnist/fashion-mnist_test.csv')

        train_df, val_df = train_test_split(train_df, test_size=self.val_split, random_state=42, stratify=train_df['label'])

        X_train = train_df.drop(columns=['label'])
        y_train = train_df['label']

        X_val = val_df.drop(columns=['label'])
        y_val = val_df['label']

        X_test = test_df.drop(columns=['label'])
        y_test = test_df['label']

        # Create datasets based on stage
        if stage == 'fit' or stage is None:
            self.train_dataset = FashionMNISTDataset(X_train, y_train, transform=self.transform)
            self.val_dataset = FashionMNISTDataset(X_val, y_val, transform=self.transform)
        
        if stage == 'test' or stage is None:
            self.test_dataset = FashionMNISTDataset(X_test, y_test, transform=self.transform)

    def train_dataloader(self):
        return DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=True)

    def val_dataloader(self):
        return DataLoader(self.val_dataset, batch_size=self.batch_size, shuffle=False)

    def test_dataloader(self):
        return DataLoader(self.test_dataset, batch_size=self.batch_size, shuffle=False)

# Lightning Module

In [64]:
class FashionMNISTLightningModule(pl.LightningModule):
    def __init__(self, dropout_rate=0.25, learning_rate=0.001):
        super().__init__()
        
        self.model = CNNModel(dropout_rate=dropout_rate)
        self.learning_rate = learning_rate
        self.loss = nn.CrossEntropyLoss()
        
        self.save_hyperparameters()

    def forward(self, x):
        if x.dim() == 3:
            x = x.unsqueeze(1)  # Add channel dimension -> (batch, 1, height, width)
        return self.model(x)

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_pred = self(x)
        loss = self.loss(y_pred, y)
        preds = torch.argmax(y_pred, dim=1)
        acc = (preds == y).float().mean()
        self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        self.log('train_acc', acc, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_pred = self(x)
        loss = self.loss(y_pred, y)
        preds = torch.argmax(y_pred, dim=1)
        acc = (preds == y).float().mean()
        self.log('val_loss', loss, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        self.log('val_acc', acc, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        return loss

    def test_step(self, batch, batch_idx):
        x, y = batch
        y_pred = self(x)
        loss = self.loss(y_pred, y)
        preds = torch.argmax(y_pred, dim=1)
        acc = (preds == y).float().mean()
        self.log('test_loss', loss, prog_bar=True)
        self.log('test_acc', acc, prog_bar=True)
        return loss

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
        return optimizer

# Model

In [65]:
class CNNModel(nn.Module):
    def __init__(self, dropout_rate=0.25):
        super().__init__()

        self.conv = nn.Sequential(
            nn.Conv2d(1, 16, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(16, 32, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )

        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(32 * 7 * 7, 64),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(64, 10)
        )

    def forward(self, x):
        x = self.conv(x)
        x = self.fc(x)
        return x

# Baseline Training Pipeline

In [None]:
# 1. Create model
print("=" * 60)
print("STEP 1: Creating model...")
print("=" * 60)
model = FashionMNISTLightningModule(dropout_rate=0.25, learning_rate=0.001)
print(f"✓ Model created with {sum(p.numel() for p in model.parameters()):,} parameters")

# 2. Set up data module
print("\n" + "=" * 60)
print("STEP 2: Setting up data module...")
print("=" * 60)
data_module = FashionMNISTDataModule()
# Note: setup() will be called automatically by PyTorch Lightning
# But we can call it manually to see the data info
data_module.setup()
print(f"✓ Training samples: {len(data_module.train_dataset):,}")
print(f"✓ Validation samples: {len(data_module.val_dataset):,}")
print(f"✓ Test samples: {len(data_module.test_dataset):,}")

# 3. Set up WandB logger
print("\n" + "=" * 60)
print("STEP 3: Initializing WandB...")
print("=" * 60)
wandb_logger = WandbLogger(
    project='Fashion_MNIST', 
    name='baseline-cnn',
    log_model=True  # Log model checkpoints to WandB
)
print("✓ WandB logger initialized")

# 4. Set up callbacks
print("\n" + "=" * 60)
print("STEP 4: Setting up callbacks...")
print("=" * 60)
checkpoint_callback = ModelCheckpoint(
    monitor='val_loss',
    dirpath='checkpoints/',
    filename='fashion-mnist-{epoch:02d}-{val_loss:.2f}',
    save_top_k=3,
    mode='min',
    verbose=True  # Print when checkpoints are saved
)

early_stop_callback = EarlyStopping(
    monitor='val_loss',
    patience=5,
    mode='min',
    verbose=True
)
print("✓ ModelCheckpoint callback configured")
print("✓ EarlyStopping callback configured")

# 5. Create trainer
print("\n" + "=" * 60)
print("STEP 5: Creating trainer...")
print("=" * 60)
trainer = pl.Trainer(
    max_epochs=10,
    logger=wandb_logger,
    callbacks=[checkpoint_callback, early_stop_callback],
    accelerator='auto',
    devices='auto',
    enable_progress_bar=True,  # Show progress bar
    enable_model_summary=True,  # Show model summary
    log_every_n_steps=50,  # Log every 50 steps
    val_check_interval=1.0,  # Validate after each epoch
    check_val_every_n_epoch=1  # Check validation every epoch
)
print("✓ Trainer created")
print(f"✓ Max epochs: {trainer.max_epochs}")
print(f"✓ Batch size: {data_module.batch_size}")

# 6. Train the model
print("\n" + "=" * 60)
print("STEP 6: Starting training...")
print("=" * 60)
print("You will see progress bars showing:")
print("  - Current epoch / Total epochs")
print("  - Training loss and accuracy")
print("  - Validation loss and accuracy")
print("  - Progress through batches")
print("=" * 60 + "\n")

trainer.fit(model, data_module)

print("\n" + "=" * 60)
print("✓ Training completed!")
print("=" * 60)

# Show best model info
if checkpoint_callback.best_model_path:
    print(f"\nBest model saved at: {checkpoint_callback.best_model_path}")
    print(f"Best validation loss: {checkpoint_callback.best_model_score:.4f}")

# 7. Test the model
print("\n" + "=" * 60)
print("STEP 7: Evaluating on test set...")
print("=" * 60)
test_results = trainer.test(model, data_module)
print("=" * 60)
print("✓ Testing completed!")

# 8. Finish WandB run
print("\n" + "=" * 60)
print("STEP 8: Finalizing WandB...")
print("=" * 60)
wandb.finish()
print("✓ WandB run finished!")
print("\nView your results at: https://wandb.ai")
print("=" * 60)


# Optuna
- Optuna callback logs trial summaries to WandB during hyperparameter optimisation.
- WandB logger logs train/val loss curves during final training of the best model

In [None]:
def objective(trial):
    dropout_rate = trial.suggest_float('dropout_rate', 0.1, 0.5, step=0.1)
    learning_rate = trial.suggest_float('learning_rate', 1e-4, 1e-2, log=True)  # log as in logarithmic scale
    batch_size = trial.suggest_categorical('batch_size', [32, 64, 128])
    
    model = FashionMNISTLightningModule(
        dropout_rate=dropout_rate,
        learning_rate=learning_rate
    )
    
    data_module = FashionMNISTDataModule(batch_size=batch_size)
    data_module.setup()
    
    # No WandB logger here - Optuna callback will handle trial logging
    trainer = pl.Trainer(
        max_epochs=5,  # Fewer epochs for faster hyperparameter search
        logger=False,  # Disable detailed logging - Optuna callback will log trial summaries
        enable_progress_bar=True,
        enable_model_summary=False,
        callbacks=[EarlyStopping(monitor='val_loss', patience=3, mode='min')]
    )
    
    trainer.fit(model, data_module)
    
    return trainer.callback_metrics['val_loss'].item()

print("=" * 60)
print("Starting Optuna Hyperparameter Optimization...")
print("=" * 60)
print("Tuning: dropout_rate, learning_rate, batch_size")
print("=" * 60)

study = optuna.create_study(
    direction='minimize',  # Minimize validation loss
    study_name='fashion_mnist_hyperopt',
    pruner=optuna.pruners.MedianPruner()
)

wandb_kwargs = {
    "project": "Fashion_MNIST",
    "group": "hyperparameter-optimization",
    "job_type": "optuna-search"
}

wandb_callback = WeightsAndBiasesCallback(
    metric_name="val_loss",
    wandb_kwargs=wandb_kwargs,
    as_multirun=True  # Creates separate WandB run for each trial
)

print("\nRunning optimization with WandB logging...")
study.optimize(objective, n_trials=20, callbacks=[wandb_callback])

print("\n" + "=" * 60)
print("Best Hyperparameters Found:")
print("=" * 60)
for key, value in study.best_params.items():
    print(f"  {key}: {value}")
print(f"\nBest validation loss: {study.best_value:.4f}")
print("=" * 60)


In [69]:
study.best_params

{'dropout_rate': 0.2, 'learning_rate': 0.0027002487286870982, 'batch_size': 64}

In [72]:
study

<optuna.study.study.Study at 0x230d77e7350>

# Train best model

In [None]:
best_params = study.best_params
print("=" * 60)
print("Training final model with best hyperparameters:")
print("=" * 60)
for key, value in best_params.items():
    print(f"  {key}: {value}")
print("=" * 60)

# Create final model with best hyperparameters
final_model = FashionMNISTLightningModule(
    dropout_rate=best_params['dropout_rate'],
    learning_rate=best_params['learning_rate']
)

# Create data module with best batch size
final_data_module = FashionMNISTDataModule(batch_size=best_params['batch_size'])
final_data_module.setup()

print(f"\n✓ Training samples: {len(final_data_module.train_dataset):,}")
print(f"✓ Validation samples: {len(final_data_module.val_dataset):,}")
print(f"✓ Test samples: {len(final_data_module.test_dataset):,}")

# Set up WandB logger for final training (full curve logging)
wandb_logger_final = WandbLogger(
    project='Fashion_MNIST', 
    name='optimized-model-v2',
    group='final-training',
    log_model=True
)

# Set up callbacks
checkpoint_callback_final = ModelCheckpoint(
    monitor='val_loss',
    dirpath='checkpoints/',
    filename='fashion-mnist-optimized-{epoch:02d}-{val_loss:.2f}',
    save_top_k=1,
    mode='min',
    verbose=True
)

early_stop_callback_final = EarlyStopping(
    monitor='val_loss',
    patience=5,
    mode='min',
    verbose=True
)

# Create trainer with full epochs and WandB logging
trainer_final = pl.Trainer(
    max_epochs=10,
    logger=wandb_logger_final,  # Full WandB logging for training/validation curves
    callbacks=[checkpoint_callback_final, early_stop_callback_final],
    accelerator='auto',
    devices='auto',
    enable_progress_bar=True,
    enable_model_summary=True,
    log_every_n_steps=50
)

# Train final model
print("\n" + "=" * 60)
print("Training final optimized model with full WandB logging...")
print("=" * 60)
trainer_final.fit(final_model, final_data_module)

print("\n" + "=" * 60)
print("✓ Training completed!")
print("=" * 60)

# Test final model
print("\n" + "=" * 60)
print("Evaluating optimized model on test set...")
print("=" * 60)
trainer_final.test(final_model, final_data_module)
print("=" * 60)
print("✓ Testing completed!")

# Finish WandB
wandb.finish()
print("\n✓ WandB run finished. Check your dashboard at https://wandb.ai")


Training final model with best hyperparameters:
  dropout_rate: 0.2
  learning_rate: 0.0027002487286870982
  batch_size: 64

✓ Training samples: 48,000
✓ Validation samples: 12,000
✓ Test samples: 10,000


GPU available: False, used: False
TPU available: False, using: 0 TPU cores



Training final optimized model with full WandB logging...


c:\Users\Bruno\.pyenv\pyenv-win\versions\3.11.9\Lib\site-packages\pytorch_lightning\callbacks\model_checkpoint.py:751: Checkpoint directory C:\Users\Bruno\Desktop\MLOps\homework1\checkpoints exists and is not empty.

  | Name  | Type             | Params | Mode 
---------------------------------------------------
0 | model | CNNModel         | 105 K  | train
1 | loss  | CrossEntropyLoss | 0      | train
---------------------------------------------------
105 K     Trainable params
0         Non-trainable params
105 K     Total params
0.423     Total estimated model params size (MB)
15        Modules in train mode
0         Modules in eval mode


                                                                           

c:\Users\Bruno\.pyenv\pyenv-win\versions\3.11.9\Lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:433: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
c:\Users\Bruno\.pyenv\pyenv-win\versions\3.11.9\Lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:433: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Epoch 0: 100%|██████████| 750/750 [00:40<00:00, 18.63it/s, v_num=ussr, train_loss_step=0.597, train_acc_step=0.812, val_loss=0.358, val_acc=0.868, train_loss_epoch=0.517, train_acc_epoch=0.812]

Metric val_loss improved. New best score: 0.358
Epoch 0, global step 750: 'val_loss' reached 0.35781 (best 0.35781), saving model to 'C:\\Users\\Bruno\\Desktop\\MLOps\\homework1\\checkpoints\\fashion-mnist-optimized-epoch=00-val_loss=0.36.ckpt' as top 1


Epoch 1: 100%|██████████| 750/750 [00:38<00:00, 19.50it/s, v_num=ussr, train_loss_step=0.299, train_acc_step=0.844, val_loss=0.309, val_acc=0.890, train_loss_epoch=0.349, train_acc_epoch=0.874]

Metric val_loss improved by 0.049 >= min_delta = 0.0. New best score: 0.309
Epoch 1, global step 1500: 'val_loss' reached 0.30857 (best 0.30857), saving model to 'C:\\Users\\Bruno\\Desktop\\MLOps\\homework1\\checkpoints\\fashion-mnist-optimized-epoch=01-val_loss=0.31.ckpt' as top 1


Epoch 2: 100%|██████████| 750/750 [00:49<00:00, 15.22it/s, v_num=ussr, train_loss_step=0.151, train_acc_step=0.938, val_loss=0.313, val_acc=0.886, train_loss_epoch=0.303, train_acc_epoch=0.891]

Epoch 2, global step 2250: 'val_loss' was not in top 1


Epoch 3: 100%|██████████| 750/750 [00:43<00:00, 17.33it/s, v_num=ussr, train_loss_step=0.461, train_acc_step=0.859, val_loss=0.267, val_acc=0.902, train_loss_epoch=0.274, train_acc_epoch=0.899] 

Metric val_loss improved by 0.042 >= min_delta = 0.0. New best score: 0.267
Epoch 3, global step 3000: 'val_loss' reached 0.26682 (best 0.26682), saving model to 'C:\\Users\\Bruno\\Desktop\\MLOps\\homework1\\checkpoints\\fashion-mnist-optimized-epoch=03-val_loss=0.27.ckpt' as top 1


Epoch 4: 100%|██████████| 750/750 [00:40<00:00, 18.34it/s, v_num=ussr, train_loss_step=0.235, train_acc_step=0.922, val_loss=0.251, val_acc=0.907, train_loss_epoch=0.251, train_acc_epoch=0.908] 

Metric val_loss improved by 0.016 >= min_delta = 0.0. New best score: 0.251
Epoch 4, global step 3750: 'val_loss' reached 0.25123 (best 0.25123), saving model to 'C:\\Users\\Bruno\\Desktop\\MLOps\\homework1\\checkpoints\\fashion-mnist-optimized-epoch=04-val_loss=0.25-v1.ckpt' as top 1


Epoch 5: 100%|██████████| 750/750 [00:44<00:00, 16.91it/s, v_num=ussr, train_loss_step=0.265, train_acc_step=0.875, val_loss=0.249, val_acc=0.912, train_loss_epoch=0.233, train_acc_epoch=0.913] 

Metric val_loss improved by 0.002 >= min_delta = 0.0. New best score: 0.249
Epoch 5, global step 4500: 'val_loss' reached 0.24907 (best 0.24907), saving model to 'C:\\Users\\Bruno\\Desktop\\MLOps\\homework1\\checkpoints\\fashion-mnist-optimized-epoch=05-val_loss=0.25.ckpt' as top 1


Epoch 6: 100%|██████████| 750/750 [00:39<00:00, 18.80it/s, v_num=ussr, train_loss_step=0.375, train_acc_step=0.891, val_loss=0.262, val_acc=0.906, train_loss_epoch=0.219, train_acc_epoch=0.920] 

Epoch 6, global step 5250: 'val_loss' was not in top 1


Epoch 7: 100%|██████████| 750/750 [00:40<00:00, 18.42it/s, v_num=ussr, train_loss_step=0.243, train_acc_step=0.922, val_loss=0.258, val_acc=0.907, train_loss_epoch=0.208, train_acc_epoch=0.923] 

Epoch 7, global step 6000: 'val_loss' was not in top 1


Epoch 8: 100%|██████████| 750/750 [00:40<00:00, 18.34it/s, v_num=ussr, train_loss_step=0.142, train_acc_step=0.953, val_loss=0.250, val_acc=0.911, train_loss_epoch=0.191, train_acc_epoch=0.928] 

Epoch 8, global step 6750: 'val_loss' was not in top 1


Epoch 9: 100%|██████████| 750/750 [00:41<00:00, 18.21it/s, v_num=ussr, train_loss_step=0.255, train_acc_step=0.906, val_loss=0.251, val_acc=0.913, train_loss_epoch=0.179, train_acc_epoch=0.932] 

Epoch 9, global step 7500: 'val_loss' was not in top 1
`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 750/750 [00:41<00:00, 18.20it/s, v_num=ussr, train_loss_step=0.255, train_acc_step=0.906, val_loss=0.251, val_acc=0.913, train_loss_epoch=0.179, train_acc_epoch=0.932]

✓ Training completed!

Evaluating optimized model on test set...


c:\Users\Bruno\.pyenv\pyenv-win\versions\3.11.9\Lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:433: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Testing DataLoader 0: 100%|██████████| 157/157 [00:04<00:00, 37.35it/s]


✓ Testing completed!


0,1
epoch,▁▁▁▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆█████
test_acc,▁
test_loss,▁
train_acc_epoch,▁▅▆▆▇▇▇▇██
train_acc_step,▁▇▃▃▁▃▅▃▂▂▅▇▅▅▅▂▅▇▅▂▇▅▅▇▄▅▅▆█▇▅▇▆▇█▆▇▇▇▅
train_loss_epoch,█▅▄▃▂▂▂▂▁▁
train_loss_step,▆▇▇▆▄█▄▆▇▅▆▄▅█▃▂▆▅▅▄▃▃▁▃▃▂▂▂▃▃▄▂▂▂▁▃▂▃▂▂
trainer/global_step,▁▁▁▁▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▆▆▆▇▇▇▇█
val_acc,▁▄▄▆▇█▇▇██
val_loss,█▅▅▂▁▁▂▂▁▁

0,1
epoch,10.0
test_acc,0.9189
test_loss,0.2334
train_acc_epoch,0.93238
train_acc_step,0.90625
train_loss_epoch,0.17933
train_loss_step,0.25466
trainer/global_step,7500.0
val_acc,0.91342
val_loss,0.25134



✓ WandB run finished. Check your dashboard at https://wandb.ai


# Plots
### Baseline model:
Robust steady and slow decrese in loss.
- Test loss: 0.2314
- Test accuracy: 91.3%

<img src="images/baseline_model-train_val_loss.png" width=60%>

### Hyperparameter finetuning:
```
dropout_rate = trial.suggest_float('dropout_rate', 0.1, 0.5, step=0.1)
learning_rate = trial.suggest_float('learning_rate', 1e-4, 1e-2, log=True)
batch_size = trial.suggest_categorical('batch_size', [32, 64, 128])
```

<img src="images/tuning-val_loss.png" width=60%>

### Tuned model:
Faster initial drop in loss, but overfitting in later stages.
- Test loss: 0.2334
- Test accuracy: 91.9%

<img src="images/optimized_model-train_val_loss.png" width=60%>