In [1]:
import torch
import sys
import os
import lightning as L
from lightning.pytorch.callbacks import ModelCheckpoint, EarlyStopping
from lightning.pytorch.loggers import MLFlowLogger

project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))


if project_root not in sys.path:
    sys.path.append(project_root)
    
from model_utils import PretrainedModelMobileNet, ClassificationData, no_augmentation

torch.cuda.empty_cache()
torch.set_float32_matmul_precision("medium")
L.seed_everything(111)

Seed set to 111


111

### With Data Augmentation

In [2]:
model = PretrainedModelMobileNet(batch_size=64)
data = ClassificationData(batch_size=64)
logger = MLFlowLogger(experiment_name="MobileNetV3", save_dir="mlruns")
trainer = L.Trainer(max_epochs=20, 
                    logger=logger, 
                    callbacks=[ModelCheckpoint(monitor="val_f1_macro", mode="max", dirpath="checkpoints/mobile_net", filename="augmentation-{epoch:02d}-{val_f1_macro:.2f}"), 
                               EarlyStopping(monitor="train_loss", min_delta=0.01, patience=3, mode="min", verbose=True)], 
                    precision="16-mixed", 
                    num_sanity_val_steps=0)
trainer.fit(model, datamodule=data)
trainer.test(model, datamodule=data)

Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type             | Params | Mode 
-----------------------------------------------------------
0 | model         | MobileNetV3      | 4.2 M  | train
1 | loss_fn       | CrossEntropyLoss | 0      | train
2 | train_metrics | MetricCollection | 0      | train
3 | val_metrics   | MetricCollection | 0      | train
4 | test_metrics  | MetricCollection | 0      | train
-----------------------------------------------------------
4.2 M     Trainable params
0         Non-trainable params
4.2 M     Total params
16.859    Total estimated model params size (MB)
271       Modules in train mode
0         Modules in eval mode


Epoch 0: 100%|██████████| 1407/1407 [00:59<00:00, 23.55it/s, v_num=b7bf]

Metric train_loss improved. New best score: 1.502


Epoch 1: 100%|██████████| 1407/1407 [00:39<00:00, 35.65it/s, v_num=b7bf]

Metric train_loss improved by 0.248 >= min_delta = 0.01. New best score: 1.254


Epoch 2: 100%|██████████| 1407/1407 [00:39<00:00, 35.70it/s, v_num=b7bf]

Metric train_loss improved by 0.082 >= min_delta = 0.01. New best score: 1.172


Epoch 3: 100%|██████████| 1407/1407 [00:39<00:00, 35.33it/s, v_num=b7bf]

Metric train_loss improved by 0.020 >= min_delta = 0.01. New best score: 1.152


Epoch 6: 100%|██████████| 1407/1407 [00:38<00:00, 36.26it/s, v_num=b7bf]

Metric train_loss improved by 0.030 >= min_delta = 0.01. New best score: 1.122


Epoch 7: 100%|██████████| 1407/1407 [00:38<00:00, 36.30it/s, v_num=b7bf]

Metric train_loss improved by 0.028 >= min_delta = 0.01. New best score: 1.094


Epoch 9: 100%|██████████| 1407/1407 [00:39<00:00, 35.76it/s, v_num=b7bf]

Metric train_loss improved by 0.036 >= min_delta = 0.01. New best score: 1.058


Epoch 11: 100%|██████████| 1407/1407 [00:39<00:00, 35.90it/s, v_num=b7bf]

Metric train_loss improved by 0.014 >= min_delta = 0.01. New best score: 1.044


Epoch 14: 100%|██████████| 1407/1407 [00:39<00:00, 36.04it/s, v_num=b7bf]

Monitored metric train_loss did not improve in the last 3 records. Best score: 1.044. Signaling Trainer to stop.


Epoch 14: 100%|██████████| 1407/1407 [00:39<00:00, 35.89it/s, v_num=b7bf]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
c:\Users\SPCX\Desktop\github-repositories\dl-cnn\.venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:420: Consider setting `persistent_workers=True` in 'test_dataloader' to speed up the dataloader worker initialization.


Testing DataLoader 0: 100%|██████████| 1407/1407 [00:10<00:00, 138.78it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       test_auroc           0.9280180335044861
      test_f1_macro         0.5942773818969727
     test_precision         0.6012427806854248
       test_recall          0.6001111268997192
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


[{'test_auroc': 0.9280180335044861,
  'test_f1_macro': 0.5942773818969727,
  'test_precision': 0.6012427806854248,
  'test_recall': 0.6001111268997192}]

### No Data Augmentation

In [2]:
bs = 64
model = PretrainedModelMobileNet(batch_size=bs)
data = ClassificationData(batch_size=bs, transform=no_augmentation)
logger = MLFlowLogger(experiment_name="MobileNetV3-no-augmentation", save_dir="mlruns")
trainer = L.Trainer(max_epochs=20, 
                    logger=logger, 
                    callbacks=[ModelCheckpoint(monitor="val_f1_macro", mode="max", dirpath="checkpoints/mobile_net", filename=f"no-augmentation_bs_{bs}" + "-{epoch:02d}-{val_f1_macro:.2f}"), 
                               EarlyStopping(monitor="train_loss", min_delta=0.01, patience=3, mode="min", verbose=True)], 
                    precision="16-mixed", 
                    num_sanity_val_steps=0)
trainer.fit(model, datamodule=data)
trainer.test(model, datamodule=data)

Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Experiment with name MobileNetV3-no-augmentation not found. Creating it.
c:\Users\SPCX\Desktop\github-repositories\dl-cnn\.venv\Lib\site-packages\lightning\pytorch\callbacks\model_checkpoint.py:654: Checkpoint directory C:\Users\SPCX\Desktop\github-repositories\dl-cnn\model\checkpoints\mobile_net exists and is not empty.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type             | Params | Mode 
-----------------------------------------------------------
0 | model         | MobileNetV3      | 4.2 M  | train
1 | loss_fn       | CrossEntropyLoss | 0      | train
2 | train_metrics | MetricCollection | 0      | train
3 | val_metrics   | MetricCollection | 0      | train
4 | test_metrics  | MetricCollection | 0      | train
-----------------------------------------------------------
4.2 M     Trainable params
0  

Epoch 0: 100%|██████████| 1407/1407 [00:54<00:00, 25.83it/s, v_num=d56e]

Metric train_loss improved. New best score: 1.243


Epoch 1: 100%|██████████| 1407/1407 [00:36<00:00, 38.21it/s, v_num=d56e]

Metric train_loss improved by 0.262 >= min_delta = 0.01. New best score: 0.981


Epoch 2: 100%|██████████| 1407/1407 [00:37<00:00, 37.62it/s, v_num=d56e]

Metric train_loss improved by 0.011 >= min_delta = 0.01. New best score: 0.969


Epoch 3: 100%|██████████| 1407/1407 [00:37<00:00, 37.90it/s, v_num=d56e]

Metric train_loss improved by 0.083 >= min_delta = 0.01. New best score: 0.887


Epoch 6: 100%|██████████| 1407/1407 [00:37<00:00, 37.98it/s, v_num=d56e]

Metric train_loss improved by 0.101 >= min_delta = 0.01. New best score: 0.786


Epoch 8: 100%|██████████| 1407/1407 [00:37<00:00, 37.28it/s, v_num=d56e]

Metric train_loss improved by 0.029 >= min_delta = 0.01. New best score: 0.757


Epoch 9: 100%|██████████| 1407/1407 [00:36<00:00, 38.14it/s, v_num=d56e]

Metric train_loss improved by 0.045 >= min_delta = 0.01. New best score: 0.712


Epoch 12: 100%|██████████| 1407/1407 [00:36<00:00, 38.06it/s, v_num=d56e]

Metric train_loss improved by 0.011 >= min_delta = 0.01. New best score: 0.700


Epoch 13: 100%|██████████| 1407/1407 [00:37<00:00, 37.56it/s, v_num=d56e]

Metric train_loss improved by 0.037 >= min_delta = 0.01. New best score: 0.663


Epoch 14: 100%|██████████| 1407/1407 [00:37<00:00, 37.31it/s, v_num=d56e]

Metric train_loss improved by 0.016 >= min_delta = 0.01. New best score: 0.648


Epoch 15: 100%|██████████| 1407/1407 [00:37<00:00, 37.91it/s, v_num=d56e]

Metric train_loss improved by 0.023 >= min_delta = 0.01. New best score: 0.625


Epoch 16: 100%|██████████| 1407/1407 [00:36<00:00, 38.18it/s, v_num=d56e]

Metric train_loss improved by 0.023 >= min_delta = 0.01. New best score: 0.602


Epoch 19: 100%|██████████| 1407/1407 [00:36<00:00, 38.33it/s, v_num=d56e]

Monitored metric train_loss did not improve in the last 3 records. Best score: 0.602. Signaling Trainer to stop.
`Trainer.fit` stopped: `max_epochs=20` reached.


Epoch 19: 100%|██████████| 1407/1407 [00:36<00:00, 38.14it/s, v_num=d56e]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
c:\Users\SPCX\Desktop\github-repositories\dl-cnn\.venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:420: Consider setting `persistent_workers=True` in 'test_dataloader' to speed up the dataloader worker initialization.


Testing DataLoader 0: 100%|██████████| 1407/1407 [00:08<00:00, 159.66it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       test_auroc           0.8889406323432922
      test_f1_macro         0.5015732049942017
     test_precision         0.5226379632949829
       test_recall          0.5098222494125366
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


[{'test_auroc': 0.8889406323432922,
  'test_f1_macro': 0.5015732049942017,
  'test_precision': 0.5226379632949829,
  'test_recall': 0.5098222494125366}]

## Check model behaviour on higher batch size and more epochs

### With Data Augmentation

In [3]:
bs = 256
model = PretrainedModelMobileNet(batch_size=bs)
data = ClassificationData(batch_size=bs)
logger = MLFlowLogger(experiment_name="MobileNetV3", save_dir="mlruns")
trainer = L.Trainer(max_epochs=50, 
                    logger=logger, 
                    callbacks=[ModelCheckpoint(monitor="val_f1_macro", mode="max", dirpath="checkpoints/mobile_net", filename=f"augmentation_bs_{bs}" + "-{epoch:02d}-{val_f1_macro:.2f}"), 
                               EarlyStopping(monitor="train_loss", min_delta=0.01, patience=3, mode="min", verbose=True)], 
                    precision="16-mixed", 
                    num_sanity_val_steps=0)
trainer.fit(model, datamodule=data)
trainer.test(model, datamodule=data)

Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\SPCX\Desktop\github-repositories\dl-cnn\.venv\Lib\site-packages\lightning\pytorch\callbacks\model_checkpoint.py:654: Checkpoint directory C:\Users\SPCX\Desktop\github-repositories\dl-cnn\model\checkpoints\mobile_net exists and is not empty.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type             | Params | Mode 
-----------------------------------------------------------
0 | model         | MobileNetV3      | 4.2 M  | train
1 | loss_fn       | CrossEntropyLoss | 0      | train
2 | train_metrics | MetricCollection | 0      | train
3 | val_metrics   | MetricCollection | 0      | train
4 | test_metrics  | MetricCollection | 0      | train
-----------------------------------------------------------
4.2 M     Trainable params
0         Non-trainable params
4.2 M     Total params
16.859    Total estima

Epoch 0: 100%|██████████| 352/352 [00:32<00:00, 10.80it/s, v_num=164d]

Metric train_loss improved. New best score: 1.473


Epoch 1: 100%|██████████| 352/352 [00:16<00:00, 21.71it/s, v_num=164d]

Metric train_loss improved by 0.302 >= min_delta = 0.01. New best score: 1.171


Epoch 2: 100%|██████████| 352/352 [00:16<00:00, 21.83it/s, v_num=164d]

Metric train_loss improved by 0.068 >= min_delta = 0.01. New best score: 1.103


Epoch 3: 100%|██████████| 352/352 [00:16<00:00, 21.82it/s, v_num=164d]

Metric train_loss improved by 0.062 >= min_delta = 0.01. New best score: 1.041


Epoch 4: 100%|██████████| 352/352 [00:16<00:00, 21.88it/s, v_num=164d]

Metric train_loss improved by 0.028 >= min_delta = 0.01. New best score: 1.013


Epoch 5: 100%|██████████| 352/352 [00:16<00:00, 21.84it/s, v_num=164d]

Metric train_loss improved by 0.032 >= min_delta = 0.01. New best score: 0.981


Epoch 6: 100%|██████████| 352/352 [00:16<00:00, 21.94it/s, v_num=164d]

Metric train_loss improved by 0.021 >= min_delta = 0.01. New best score: 0.961


Epoch 8: 100%|██████████| 352/352 [00:16<00:00, 21.72it/s, v_num=164d]

Metric train_loss improved by 0.032 >= min_delta = 0.01. New best score: 0.928


Epoch 9: 100%|██████████| 352/352 [00:16<00:00, 21.54it/s, v_num=164d]

Metric train_loss improved by 0.026 >= min_delta = 0.01. New best score: 0.903


Epoch 10: 100%|██████████| 352/352 [00:16<00:00, 21.40it/s, v_num=164d]

Metric train_loss improved by 0.019 >= min_delta = 0.01. New best score: 0.883


Epoch 11: 100%|██████████| 352/352 [00:16<00:00, 21.68it/s, v_num=164d]

Metric train_loss improved by 0.010 >= min_delta = 0.01. New best score: 0.873


Epoch 13: 100%|██████████| 352/352 [00:16<00:00, 21.97it/s, v_num=164d]

Metric train_loss improved by 0.025 >= min_delta = 0.01. New best score: 0.848


Epoch 16: 100%|██████████| 352/352 [00:16<00:00, 21.88it/s, v_num=164d]

Metric train_loss improved by 0.019 >= min_delta = 0.01. New best score: 0.828


Epoch 17: 100%|██████████| 352/352 [00:16<00:00, 21.84it/s, v_num=164d]

Metric train_loss improved by 0.020 >= min_delta = 0.01. New best score: 0.808


Epoch 19: 100%|██████████| 352/352 [00:16<00:00, 21.92it/s, v_num=164d]

Metric train_loss improved by 0.013 >= min_delta = 0.01. New best score: 0.795


Epoch 20: 100%|██████████| 352/352 [00:16<00:00, 21.84it/s, v_num=164d]

Metric train_loss improved by 0.012 >= min_delta = 0.01. New best score: 0.783


Epoch 22: 100%|██████████| 352/352 [00:16<00:00, 21.92it/s, v_num=164d]

Metric train_loss improved by 0.023 >= min_delta = 0.01. New best score: 0.761


Epoch 24: 100%|██████████| 352/352 [00:16<00:00, 21.81it/s, v_num=164d]

Metric train_loss improved by 0.014 >= min_delta = 0.01. New best score: 0.746


Epoch 26: 100%|██████████| 352/352 [00:16<00:00, 21.85it/s, v_num=164d]

Metric train_loss improved by 0.014 >= min_delta = 0.01. New best score: 0.733


Epoch 28: 100%|██████████| 352/352 [00:16<00:00, 21.82it/s, v_num=164d]

Metric train_loss improved by 0.017 >= min_delta = 0.01. New best score: 0.716


Epoch 31: 100%|██████████| 352/352 [00:16<00:00, 21.93it/s, v_num=164d]

Metric train_loss improved by 0.019 >= min_delta = 0.01. New best score: 0.696


Epoch 34: 100%|██████████| 352/352 [00:15<00:00, 22.02it/s, v_num=164d]

Monitored metric train_loss did not improve in the last 3 records. Best score: 0.696. Signaling Trainer to stop.


Epoch 34: 100%|██████████| 352/352 [00:16<00:00, 21.95it/s, v_num=164d]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
c:\Users\SPCX\Desktop\github-repositories\dl-cnn\.venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:420: Consider setting `persistent_workers=True` in 'test_dataloader' to speed up the dataloader worker initialization.


Testing DataLoader 0: 100%|██████████| 352/352 [00:07<00:00, 49.24it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       test_auroc           0.9349632263183594
      test_f1_macro          0.628425121307373
     test_precision         0.6415094137191772
       test_recall          0.6286333203315735
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


[{'test_auroc': 0.9349632263183594,
  'test_f1_macro': 0.628425121307373,
  'test_precision': 0.6415094137191772,
  'test_recall': 0.6286333203315735}]

### No data augmentation

In [4]:
bs = 256
model = PretrainedModelMobileNet(batch_size=bs)
data = ClassificationData(batch_size=bs, transform=no_augmentation)
logger = MLFlowLogger(experiment_name="MobileNetV3-no-augmentation", save_dir="mlruns")
trainer = L.Trainer(max_epochs=50, 
                    logger=logger, 
                    callbacks=[ModelCheckpoint(monitor="val_f1_macro", mode="max", dirpath="checkpoints/mobile_net", filename=f"no-augmentation_bs_{bs}" + "-{epoch:02d}-{val_f1_macro:.2f}"), 
                               EarlyStopping(monitor="train_loss", min_delta=0.01, patience=3, mode="min", verbose=True)], 
                    precision="16-mixed", 
                    num_sanity_val_steps=0)
trainer.fit(model, datamodule=data)
trainer.test(model, datamodule=data)

Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type             | Params | Mode 
-----------------------------------------------------------
0 | model         | MobileNetV3      | 4.2 M  | train
1 | loss_fn       | CrossEntropyLoss | 0      | train
2 | train_metrics | MetricCollection | 0      | train
3 | val_metrics   | MetricCollection | 0      | train
4 | test_metrics  | MetricCollection | 0      | train
-----------------------------------------------------------
4.2 M     Trainable params
0         Non-trainable params
4.2 M     Total params
16.859    Total estimated model params size (MB)
271       Modules in train mode
0         Modules in eval mode


Epoch 0: 100%|██████████| 352/352 [00:28<00:00, 12.24it/s, v_num=7044]

Metric train_loss improved. New best score: 1.282


Epoch 1: 100%|██████████| 352/352 [00:11<00:00, 29.55it/s, v_num=7044]

Metric train_loss improved by 0.370 >= min_delta = 0.01. New best score: 0.913


Epoch 2: 100%|██████████| 352/352 [00:11<00:00, 29.55it/s, v_num=7044]

Metric train_loss improved by 0.128 >= min_delta = 0.01. New best score: 0.784


Epoch 3: 100%|██████████| 352/352 [00:11<00:00, 29.81it/s, v_num=7044]

Metric train_loss improved by 0.083 >= min_delta = 0.01. New best score: 0.702


Epoch 4: 100%|██████████| 352/352 [00:11<00:00, 29.57it/s, v_num=7044]

Metric train_loss improved by 0.062 >= min_delta = 0.01. New best score: 0.639


Epoch 5: 100%|██████████| 352/352 [00:11<00:00, 29.45it/s, v_num=7044]

Metric train_loss improved by 0.057 >= min_delta = 0.01. New best score: 0.582


Epoch 6: 100%|██████████| 352/352 [00:12<00:00, 28.65it/s, v_num=7044]

Metric train_loss improved by 0.045 >= min_delta = 0.01. New best score: 0.538


Epoch 7: 100%|██████████| 352/352 [00:11<00:00, 29.69it/s, v_num=7044]

Metric train_loss improved by 0.042 >= min_delta = 0.01. New best score: 0.496


Epoch 8: 100%|██████████| 352/352 [00:11<00:00, 29.46it/s, v_num=7044]

Metric train_loss improved by 0.038 >= min_delta = 0.01. New best score: 0.458


Epoch 9: 100%|██████████| 352/352 [00:11<00:00, 29.65it/s, v_num=7044]

Metric train_loss improved by 0.026 >= min_delta = 0.01. New best score: 0.431


Epoch 10: 100%|██████████| 352/352 [00:12<00:00, 29.12it/s, v_num=7044]

Metric train_loss improved by 0.032 >= min_delta = 0.01. New best score: 0.399


Epoch 11: 100%|██████████| 352/352 [00:11<00:00, 29.63it/s, v_num=7044]

Metric train_loss improved by 0.017 >= min_delta = 0.01. New best score: 0.383


Epoch 12: 100%|██████████| 352/352 [00:11<00:00, 29.62it/s, v_num=7044]

Metric train_loss improved by 0.019 >= min_delta = 0.01. New best score: 0.363


Epoch 14: 100%|██████████| 352/352 [00:11<00:00, 29.39it/s, v_num=7044]

Metric train_loss improved by 0.032 >= min_delta = 0.01. New best score: 0.332


Epoch 16: 100%|██████████| 352/352 [00:11<00:00, 29.64it/s, v_num=7044]

Metric train_loss improved by 0.020 >= min_delta = 0.01. New best score: 0.311


Epoch 18: 100%|██████████| 352/352 [00:11<00:00, 29.46it/s, v_num=7044]

Metric train_loss improved by 0.019 >= min_delta = 0.01. New best score: 0.292


Epoch 20: 100%|██████████| 352/352 [00:11<00:00, 29.71it/s, v_num=7044]

Metric train_loss improved by 0.011 >= min_delta = 0.01. New best score: 0.282


Epoch 23: 100%|██████████| 352/352 [00:11<00:00, 29.74it/s, v_num=7044]

Metric train_loss improved by 0.015 >= min_delta = 0.01. New best score: 0.267


Epoch 26: 100%|██████████| 352/352 [00:11<00:00, 29.61it/s, v_num=7044]

Monitored metric train_loss did not improve in the last 3 records. Best score: 0.267. Signaling Trainer to stop.


Epoch 26: 100%|██████████| 352/352 [00:11<00:00, 29.50it/s, v_num=7044]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 352/352 [00:03<00:00, 88.71it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       test_auroc           0.9404104351997375
      test_f1_macro         0.6755831837654114
     test_precision         0.6842420697212219
       test_recall          0.6728554964065552
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


[{'test_auroc': 0.9404104351997375,
  'test_f1_macro': 0.6755831837654114,
  'test_precision': 0.6842420697212219,
  'test_recall': 0.6728554964065552}]