In [1]:
import torch
import sys
import os
import lightning as L
from lightning.pytorch.callbacks import ModelCheckpoint, EarlyStopping
from lightning.pytorch.loggers import MLFlowLogger

project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))


if project_root not in sys.path:
    sys.path.append(project_root)
    
from model_utils import PretrainedModelMobileNet, ClassificationData, no_augmentation

torch.cuda.empty_cache()
torch.set_float32_matmul_precision("medium")
L.seed_everything(111)

Seed set to 111


111

### With Data Augmentation

In [2]:
model = PretrainedModelMobileNet(batch_size=64)
data = ClassificationData(batch_size=64)
logger = MLFlowLogger(experiment_name="MobileNetV3", save_dir="mlruns")
trainer = L.Trainer(max_epochs=20, 
                    logger=logger, 
                    callbacks=[ModelCheckpoint(monitor="val_f1_macro", mode="max", dirpath="checkpoints/mobile_net", filename="augmentation-{epoch:02d}-{val_f1_macro:.2f}"), 
                               EarlyStopping(monitor="train_loss", min_delta=0.01, patience=3, mode="min", verbose=True)], 
                    precision="16-mixed", 
                    num_sanity_val_steps=0)
trainer.fit(model, datamodule=data)
trainer.test(model, datamodule=data)

Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type             | Params | Mode 
-----------------------------------------------------------
0 | model         | MobileNetV3      | 4.2 M  | train
1 | loss_fn       | CrossEntropyLoss | 0      | train
2 | train_metrics | MetricCollection | 0      | train
3 | val_metrics   | MetricCollection | 0      | train
4 | test_metrics  | MetricCollection | 0      | train
-----------------------------------------------------------
4.2 M     Trainable params
0         Non-trainable params
4.2 M     Total params
16.859    Total estimated model params size (MB)
271       Modules in train mode
0         Modules in eval mode


Epoch 0: 100%|██████████| 1407/1407 [00:59<00:00, 23.55it/s, v_num=b7bf]

Metric train_loss improved. New best score: 1.502


Epoch 1: 100%|██████████| 1407/1407 [00:39<00:00, 35.65it/s, v_num=b7bf]

Metric train_loss improved by 0.248 >= min_delta = 0.01. New best score: 1.254


Epoch 2: 100%|██████████| 1407/1407 [00:39<00:00, 35.70it/s, v_num=b7bf]

Metric train_loss improved by 0.082 >= min_delta = 0.01. New best score: 1.172


Epoch 3: 100%|██████████| 1407/1407 [00:39<00:00, 35.33it/s, v_num=b7bf]

Metric train_loss improved by 0.020 >= min_delta = 0.01. New best score: 1.152


Epoch 6: 100%|██████████| 1407/1407 [00:38<00:00, 36.26it/s, v_num=b7bf]

Metric train_loss improved by 0.030 >= min_delta = 0.01. New best score: 1.122


Epoch 7: 100%|██████████| 1407/1407 [00:38<00:00, 36.30it/s, v_num=b7bf]

Metric train_loss improved by 0.028 >= min_delta = 0.01. New best score: 1.094


Epoch 9: 100%|██████████| 1407/1407 [00:39<00:00, 35.76it/s, v_num=b7bf]

Metric train_loss improved by 0.036 >= min_delta = 0.01. New best score: 1.058


Epoch 11: 100%|██████████| 1407/1407 [00:39<00:00, 35.90it/s, v_num=b7bf]

Metric train_loss improved by 0.014 >= min_delta = 0.01. New best score: 1.044


Epoch 14: 100%|██████████| 1407/1407 [00:39<00:00, 36.04it/s, v_num=b7bf]

Monitored metric train_loss did not improve in the last 3 records. Best score: 1.044. Signaling Trainer to stop.


Epoch 14: 100%|██████████| 1407/1407 [00:39<00:00, 35.89it/s, v_num=b7bf]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
c:\Users\SPCX\Desktop\github-repositories\dl-cnn\.venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:420: Consider setting `persistent_workers=True` in 'test_dataloader' to speed up the dataloader worker initialization.


Testing DataLoader 0: 100%|██████████| 1407/1407 [00:10<00:00, 138.78it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       test_auroc           0.9280180335044861
      test_f1_macro         0.5942773818969727
     test_precision         0.6012427806854248
       test_recall          0.6001111268997192
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


[{'test_auroc': 0.9280180335044861,
  'test_f1_macro': 0.5942773818969727,
  'test_precision': 0.6012427806854248,
  'test_recall': 0.6001111268997192}]

### No Data Augmentation

In [None]:
bs = 64
model = PretrainedModelMobileNet(batch_size=bs)
data = ClassificationData(batch_size=bs, transform=no_augmentation)
logger = MLFlowLogger(experiment_name="MobileNetV3-no-augmentation", save_dir="mlruns")
trainer = L.Trainer(max_epochs=20, 
                    logger=logger, 
                    callbacks=[ModelCheckpoint(monitor="val_f1_macro", mode="max", dirpath="checkpoints/mobile_net", filename=f"no-augmentation_bs_{bs}" + "-{epoch:02d}-{val_f1_macro:.2f}"), 
                               EarlyStopping(monitor="train_loss", min_delta=0.01, patience=3, mode="min", verbose=True)], 
                    precision="16-mixed", 
                    num_sanity_val_steps=0)
trainer.fit(model, datamodule=data)
trainer.test(model, datamodule=data)

Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Experiment with name MobileNetV3-no-augmentation not found. Creating it.
c:\Users\SPCX\Desktop\github-repositories\dl-cnn\.venv\Lib\site-packages\lightning\pytorch\callbacks\model_checkpoint.py:654: Checkpoint directory C:\Users\SPCX\Desktop\github-repositories\dl-cnn\model\checkpoints\mobile_net exists and is not empty.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type             | Params | Mode 
-----------------------------------------------------------
0 | model         | MobileNetV3      | 4.2 M  | train
1 | loss_fn       | CrossEntropyLoss | 0      | train
2 | train_metrics | MetricCollection | 0      | train
3 | val_metrics   | MetricCollection | 0      | train
4 | test_metrics  | MetricCollection | 0      | train
-----------------------------------------------------------
4.2 M     Trainable params
0  

Epoch 0: 100%|██████████| 1407/1407 [00:54<00:00, 25.83it/s, v_num=d56e]

Metric train_loss improved. New best score: 1.243


Epoch 1: 100%|██████████| 1407/1407 [00:36<00:00, 38.21it/s, v_num=d56e]

Metric train_loss improved by 0.262 >= min_delta = 0.01. New best score: 0.981


Epoch 2: 100%|██████████| 1407/1407 [00:37<00:00, 37.62it/s, v_num=d56e]

Metric train_loss improved by 0.011 >= min_delta = 0.01. New best score: 0.969


Epoch 3: 100%|██████████| 1407/1407 [00:37<00:00, 37.90it/s, v_num=d56e]

Metric train_loss improved by 0.083 >= min_delta = 0.01. New best score: 0.887


Epoch 4: 100%|██████████| 1407/1407 [00:26<00:00, 53.10it/s, v_num=d56e]

## Check model behaviour on higher batch size and more epochs

### With Data Augmentation

In [None]:
bs = 256
model = PretrainedModelMobileNet(batch_size=bs)
data = ClassificationData(batch_size=bs)
logger = MLFlowLogger(experiment_name="MobileNetV3", save_dir="mlruns")
trainer = L.Trainer(max_epochs=50, 
                    logger=logger, 
                    callbacks=[ModelCheckpoint(monitor="val_f1_macro", mode="max", dirpath="checkpoints/mobile_net", filename=f"augmentation_bs_{bs}" + "-{epoch:02d}-{val_f1_macro:.2f}"), 
                               EarlyStopping(monitor="train_loss", min_delta=0.01, patience=3, mode="min", verbose=True)], 
                    precision="16-mixed", 
                    num_sanity_val_steps=0)
trainer.fit(model, datamodule=data)
trainer.test(model, datamodule=data)

### No data augmentation

In [None]:
bs = 256
model = PretrainedModelMobileNet(batch_size=bs)
data = ClassificationData(batch_size=bs, transform=no_augmentation)
logger = MLFlowLogger(experiment_name="MobileNetV3-no-augmentation", save_dir="mlruns")
trainer = L.Trainer(max_epochs=50, 
                    logger=logger, 
                    callbacks=[ModelCheckpoint(monitor="val_f1_macro", mode="max", dirpath="checkpoints/mobile_net", filename=f"no-augmentation_bs_{bs}" + "-{epoch:02d}-{val_f1_macro:.2f}"), 
                               EarlyStopping(monitor="train_loss", min_delta=0.01, patience=3, mode="min", verbose=True)], 
                    precision="16-mixed", 
                    num_sanity_val_steps=0)
trainer.fit(model, datamodule=data)
trainer.test(model, datamodule=data)