In [1]:
import os
from glob import glob
from tqdm.notebook import tqdm

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import random_split, Subset
from torch.utils.data import Dataset, DataLoader


import pytorch_lightning as pl
from pytorch_lightning.callbacks import TQDMProgressBar, EarlyStopping
from pytorch_lightning.loggers import TensorBoardLogger

import torchvision
from torchvision import transforms
from torchvision.datasets import FashionMNIST

from sklearn.model_selection import train_test_split

from torchmetrics import Accuracy, F1Score, AUROC, MetricCollection

In [2]:
lr = 0.001
random_seed = 42
batch_size = 32

In [3]:
pl.seed_everything(random_seed) 

Seed set to 42


42

In [4]:
class FashionMNISTDataModule(pl.LightningDataModule):
    def __init__(self, batch_size=64):
        super().__init__()
        self.batch_size = batch_size

    def prepare_data(self):
        # Download the dataset
        FashionMNIST(root='data', train=True, download=True)
        FashionMNIST(root='data', train=False, download=True)

    def setup(self, stage=None):
        # Load the dataset
        full_train = FashionMNIST(root='data', train=True, transform=transforms.ToTensor())
        test_dataset = FashionMNIST(root='data', train=False, transform=transforms.ToTensor())
        
        # Split the training data into train and validation
        train_size = int(0.8 * len(full_train))
        val_size = len(full_train) - train_size
        self.train_dataset, self.val_dataset = random_split(full_train, [train_size, val_size])
        
        self.test_dataset = test_dataset

    def train_dataloader(self):
        return DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=True)

    def val_dataloader(self):
        return DataLoader(self.val_dataset, batch_size=self.batch_size)

    def test_dataloader(self):
        return DataLoader(self.test_dataset, batch_size=self.batch_size)


In [5]:
class FashionMNISTModel(pl.LightningModule):
    def __init__(self):
        super(FashionMNISTModel, self).__init__()

        # Convolutional layers
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)  # Reduces feature map size by half

        # Fully connected layers
        self.fc1 = nn.Linear(64 * 14 * 14, 128)  # Flattened input after pooling
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 10)  # Output layer for 10 classes

        # Metrics
        self.metrics = MetricCollection({
            'accuracy': Accuracy(task='multiclass', num_classes=10),
            'f1': F1Score(task='multiclass', average='macro', num_classes=10),
            'auroc': AUROC(task='multiclass', num_classes=10),
        })

        self.val_metrics = self.metrics.clone(prefix='val_')
        self.test_metrics = self.metrics.clone(prefix='test_')

    def forward(self, x):
        # Convolutional layers with ReLU activation and pooling
        x = F.relu(self.conv1(x))
        x = self.pool(F.relu(self.conv2(x)))

        # Flatten the feature maps
        x = torch.flatten(x, start_dim=1)

        # Fully connected layers
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)  # No activation for logits
        return x

    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self.forward(x)
        loss = F.cross_entropy(logits, y)
        self.log("train_loss", loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self.forward(x)
        loss = F.cross_entropy(logits, y)
        self.val_metrics.update(logits, y)
        self.log('val_loss', loss, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        return loss

    def on_validation_epoch_end(self):
        self.log_dict(self.val_metrics.compute(), prog_bar=True, on_epoch=True)
        self.val_metrics.reset()

    def test_step(self, batch, batch_idx):
        x, y = batch
        logits = self.forward(x)
        self.test_metrics.update(logits, y)

    def on_test_epoch_end(self):
        self.log_dict(self.test_metrics.compute(), prog_bar=True, on_epoch=True)
        self.test_metrics.reset()

    def configure_optimizers(self):
        optimizer = torch.optim.AdamW(self.parameters(), lr=0.001)
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer, mode='min', factor=0.1, patience=3
        )
        return {
            'optimizer': optimizer,
            'lr_scheduler': {
                'scheduler': scheduler,
                'monitor': 'val_loss'
            }
        }

In [6]:
data_module = FashionMNISTDataModule(batch_size=batch_size)
base_model = FashionMNISTModel()

In [7]:
trainer = pl.Trainer(
    max_epochs=100,
    log_every_n_steps=10,
    callbacks=[
        EarlyStopping(monitor="val_loss", mode="min", patience=5),
        TQDMProgressBar(refresh_rate=10)
    ],
    logger=TensorBoardLogger(save_dir='lightning_logs', name='fashion_mnist_conv_model')
)

trainer.fit(model=base_model, datamodule=data_module)

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name         | Type             | Params | Mode 
----------------------------------------------------------
0 | conv1        | Conv2d           | 320    | train
1 | conv2        | Conv2d           | 18.5 K | train
2 | pool         | MaxPool2d        | 0      | train
3 | fc1          | Linear           | 1.6 M  | train
4 | fc2          | Linear           | 8.3 K  | train
5 | fc3          | Linear           | 650    | train
6 | metrics      | MetricCollection | 0      | train
7 | val_metrics  | MetricCollection | 0      | train
8 | test_metrics | MetricCollection | 0      | train
----------------------------------------------------------
1.6 M     Trainable params
0         Non-trainable params
1.6 M     Total params
6.534     Total estimated model params size (MB)
18        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/Users/ekaterina.baru/miniconda3/envs/myenv/lib/python3.9/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=10` in the `DataLoader` to improve performance.
/Users/ekaterina.baru/miniconda3/envs/myenv/lib/python3.9/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=10` in the `DataLoader` to improve performance.


Training: |          | 0/? [00:00<?, ?it/s]

In [None]:
trainer.test(datamodule=data_module)

In [None]:
%reload_ext tensorboard
%tensorboard --logdir lightning_logs/