Direct Pytorch Lightning training, using dataloader with augmentations and custom LightningFashionMNIST classifier

In [8]:
# https://ipython.readthedocs.io/en/stable/config/extensions/autoreload.html
%load_ext autoreload
%autoreload 2

In [20]:
import os
import logging
import math
from filelock import FileLock
import random
import sys
import time

# __import_lightning_begin__
import torch
import pytorch_lightning as pl
from torch.utils.data import DataLoader, random_split
from torch.nn import functional as F
from torchvision.datasets import FashionMNIST
from torchvision import transforms
# __import_lightning_end__

from helpers import utils
from FashionMNISTLightningDataModule import FashionMNISTLightningDataModule
from augmentation.augmentation import TRANSFORM_NAMES
from FashionMNISTLightningModule import FashionMNISTLightningModule

log = logging.getLogger('App')
logging.basicConfig(level=logging.INFO)

In [21]:
logging.basicConfig(stream=sys.stderr, level=logging.INFO)
log.info("Starting...")

utils.set_seed(1234)

# set up the augmentations
# tuple of augmentation name and its magnitude
augmentations = []

for tfn_name in TRANSFORM_NAMES:
    level = random.random()
    level = 0
    augmentations.append((tfn_name, level))

batch_and_lr_multiplier = 5

conf = {
    "progress_bar_refresh_rate": 25,
    "layer_1_size": 512,
    "layer_2_size": 512,
    "lr": 0.0001 * batch_and_lr_multiplier,
    "batch_size": 32 * (2 ** batch_and_lr_multiplier),
    "data_dir": "./data",
    # Fashion mnist mean and std
    "data_mean": 0.28604063391685486,
    "data_std": 0.35302430391311646,
    "augmentations": augmentations,
}

log.info(f"Conf {conf}")

data = FashionMNISTLightningDataModule(conf=conf)

model = FashionMNISTLightningModule(conf=conf)

early_stopping = pl.callbacks.EarlyStopping('avg_val_loss', verbose=True)

trainer = pl.Trainer(
    default_root_dir="./data",
    gpus=-1 if torch.cuda.device_count() > 0 else 0,
    max_epochs=30,
    progress_bar_refresh_rate=conf["progress_bar_refresh_rate"],
    num_sanity_val_steps=0,
    callbacks=[early_stopping]
)

start_time = time.time()
trainer.fit(model, data)
print("--- %s seconds ---" % (time.time() - start_time))

trainer.test(model, data)


INFO:App:Starting...
INFO:App:Conf {'progress_bar_refresh_rate': 25, 'layer_1_size': 512, 'layer_2_size': 512, 'lr': 0.0005, 'batch_size': 1024, 'data_dir': './data', 'data_mean': 0.28604063391685486, 'data_std': 0.35302430391311646, 'augmentations': [('blur', 0), ('rotate_left', 0), ('rotate_right', 0)]}
INFO:FashionMNISTLightningDataModule:batch_size: 1024. num_workers: 10. data_dir: ./data.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs

  | Name    | Type   | Params
-----------------------------------
0 | layer_1 | Linear | 401 K 
1 | layer_2 | Linear | 262 K 
2 | layer_3 | Linear | 5.1 K 
-----------------------------------
669 K     Trainable params
0         Non-trainable params
669 K     Total params
2.679     Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Metric avg_val_loss improved. New best score: 0.452


Validating: 0it [00:00, ?it/s]

Metric avg_val_loss improved by 0.062 >= min_delta = 0.0. New best score: 0.390


Validating: 0it [00:00, ?it/s]

Metric avg_val_loss improved by 0.020 >= min_delta = 0.0. New best score: 0.370


Validating: 0it [00:00, ?it/s]

Metric avg_val_loss improved by 0.008 >= min_delta = 0.0. New best score: 0.362


Validating: 0it [00:00, ?it/s]

Metric avg_val_loss improved by 0.038 >= min_delta = 0.0. New best score: 0.323


Validating: 0it [00:00, ?it/s]

Metric avg_val_loss improved by 0.009 >= min_delta = 0.0. New best score: 0.314


Validating: 0it [00:00, ?it/s]

Metric avg_val_loss improved by 0.007 >= min_delta = 0.0. New best score: 0.308


Validating: 0it [00:00, ?it/s]

Metric avg_val_loss improved by 0.009 >= min_delta = 0.0. New best score: 0.299


Validating: 0it [00:00, ?it/s]

Metric avg_val_loss improved by 0.004 >= min_delta = 0.0. New best score: 0.295


Validating: 0it [00:00, ?it/s]

Metric avg_val_loss improved by 0.001 >= min_delta = 0.0. New best score: 0.293


Validating: 0it [00:00, ?it/s]

Metric avg_val_loss improved by 0.003 >= min_delta = 0.0. New best score: 0.290


Validating: 0it [00:00, ?it/s]

Metric avg_val_loss improved by 0.002 >= min_delta = 0.0. New best score: 0.288


Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Monitored metric avg_val_loss did not improve in the last 3 records. Best score: 0.288. Signaling Trainer to stop.


--- 180.2996962070465 seconds ---


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'avg_test_acc': 0.7705576419830322,
 'avg_test_loss': 1.0802701711654663,
 'test_acc': 0.7705000042915344,
 'test_acc_epoch': 0.7705000042915344,
 'test_loss': 1.0809059143066406,
 'test_loss_epoch': 1.0809059143066406}
--------------------------------------------------------------------------------


[{'test_loss': 1.0809059143066406,
  'test_loss_epoch': 1.0809059143066406,
  'test_acc': 0.7705000042915344,
  'test_acc_epoch': 0.7705000042915344,
  'avg_test_loss': 1.0802701711654663,
  'avg_test_acc': 0.7705576419830322}]

In [16]:
trainer.test(model, datamodule=data)

  rank_zero_deprecation(


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'avg_test_acc': 0.7025011777877808,
 'avg_test_loss': 1.3828788995742798,
 'test_acc': 0.7027999758720398,
 'test_acc_epoch': 0.7027999758720398,
 'test_loss': 1.3831374645233154,
 'test_loss_epoch': 1.3831374645233154}
--------------------------------------------------------------------------------


  rank_zero_deprecation(


[{'test_loss': 1.3831374645233154,
  'test_loss_epoch': 1.3831374645233154,
  'test_acc': 0.7027999758720398,
  'test_acc_epoch': 0.7027999758720398,
  'avg_test_loss': 1.3828788995742798,
  'avg_test_acc': 0.7025011777877808}]