In [1]:
from image_classification.models.mlp import MLPClassifier
from image_classification.models.resnet import ResNetClassifier
from layers import ResNetBlock
import lightning.pytorch as pl
import lightning.pytorch.loggers as pl_loggers
import lightning.pytorch.callbacks as pl_callbacks

from util import set_seed
from weight_init import init_for_relu

In [2]:
from datasets import FashionMNIST

In [4]:
pl.seed_everything(42, True)

train_dl, valid_dl, test_dl = FashionMNIST.get_dataloaders(batch_size=16,
                                                           pin_memory=True,
                                                           num_workers=4,
                                                           persistent_workers=True)

n_classes = len(train_dl.dataset.classes)

model = ResNetClassifier(
    n_classes=n_classes,
    opt='AdamW',
    lr=1e-1,
    wd=1e-4, ).apply(init_for_relu)

# model = MLPClassifier(
#     input_sz=28 * 28,
#     n_classes=n_classes,
#     n_features=(16, 32, 64, 128,)
# ).apply(init_for_relu)

checkpoint_callback = pl.callbacks.ModelCheckpoint(
    save_top_k=1,
    monitor="accuracy/val",
    mode="max",
    filename="best-{epoch:02d}",
    save_last=True,
)

tb_logger = pl_loggers.TensorBoardLogger(save_dir='./results', name='fashion_mnist',
                                         sub_dir=model.__class__.__name__)

trainer = pl.Trainer(max_epochs=1,
                     limit_train_batches=10,
                     limit_val_batches=10,
                     callbacks=[
                         pl.callbacks.LearningRateFinder(),
                         checkpoint_callback,
                     ],
                     logger=tb_logger,
                     )

trainer.fit(model=model,
            train_dataloaders=train_dl,
            val_dataloaders=valid_dl,
            # ckpt_path='./results/fashion_mnist/version_5/checkpoints/best-epoch=04.ckpt'
            )

Seed set to 42
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loading `train_dataloader` to estimate number of stepping batches.


Finding best initial lr:   0%|          | 0/100 [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=1` reached.
LR finder stopped early after 10 steps due to diverging loss.
Failed to compute suggestion for learning rate because there are not enough points. Increase the loop iteration limits or the size of your dataset/dataloader.
Restoring states from the checkpoint path at C:\Users\George Tzoupis\source\ailab\image_classification\.lr_find_e8d6779f-ec3c-4765-8542-ee6cbd109c5b.ckpt
Restored all states from the checkpoint at C:\Users\George Tzoupis\source\ailab\image_classification\.lr_find_e8d6779f-ec3c-4765-8542-ee6cbd109c5b.ckpt

  | Name       | Type       | Params
------------------------------------------
0 | classifier | Sequential | 1.2 M 
------------------------------------------
1.2 M     Trainable params
0         Non-trainable params
1.2 M     Total params
4.920     Total estimated model params size (MB)
Restored all states from the checkpoint at C:\Users\George Tzoupis\source\ailab\image_classification\.lr_find_e8d6779f-ec3c-4765-8542-e

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=1` reached.
