In [1]:
# https://ipython.readthedocs.io/en/stable/config/extensions/autoreload.html
%load_ext autoreload
%autoreload 2

In [4]:
import logging
import os
import random
import sys
from filelock import FileLock
import utils

# __import_lightning_begin__
import torch
import pytorch_lightning as pl
from torch.utils.data import DataLoader, random_split
from torch.nn import functional as F
from torchvision.datasets import FashionMNIST
from torchvision import transforms
from pytorch_lightning.callbacks import EarlyStopping
# __import_lightning_end__

import torchvision.transforms.functional as TF

from MNISTDataModule import MNISTDataModule
from augmentation import TRANSFORM_NAMES

from LightningFashionMNIST import LightningFashionMNIST

log = logging.getLogger(__name__)


In [6]:
logging.basicConfig(stream=sys.stderr, level=logging.INFO)
log.info("Starting...")

utils.set_seed(1234)

# set up the augmentations
# tuple of augmentation name and its magnitude
augmentations = []

for tfn_name in TRANSFORM_NAMES:
    level = random.random()
    level = 0.1
    augmentations.append((tfn_name, level))

batch_and_lr_multiplier = 5

conf = {
    "progress_bar_refresh_rate": 25,
    "layer_1_size": 512,
    "layer_2_size": 512,
    "lr": 0.00001 * batch_and_lr_multiplier,
    "batch_size": 32 * (2 ** batch_and_lr_multiplier),
    "data_dir": "./data",
    # Fashion mnist mean and std
    "data_mean": 0.28604063391685486,
    "data_std": 0.35302430391311646,
    "augmentations": augmentations,
}

log.info(f"Conf {conf}")

data = MNISTDataModule(conf=conf)

model = LightningFashionMNIST(conf=conf)

early_stopping = EarlyStopping('avg_val_loss', verbose=True)

trainer = pl.Trainer(
    default_root_dir="./data",
    gpus=-1 if torch.cuda.device_count() > 0 else 0,
    max_epochs=15,
    progress_bar_refresh_rate=conf["progress_bar_refresh_rate"],
    num_sanity_val_steps=0,
    # callbacks=[early_stopping]
)

trainer.fit(model, data)

trainer.test(ckpt_path="best")


INFO:__main__:Starting...
INFO:__main__:Conf {'progress_bar_refresh_rate': 25, 'layer_1_size': 512, 'layer_2_size': 512, 'lr': 5e-05, 'batch_size': 1024, 'data_dir': './data', 'data_mean': 0.28604063391685486, 'data_std': 0.35302430391311646, 'augmentations': [('blur', 0.1), ('rotate_left', 0.1), ('rotate_right', 0.1)]}
INFO:MNISTDataModule:batch_size: 1024. num_workers: 8. data_dir: ./data.
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
INFO:filelock:Lock 140456953863280 acquired on /home/akaver/.data.lock
INFO:filelock:Lock 140456953863280 released on /home/akaver/.data.lock
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type   | Params
-----------------------------------
0 | layer_1 | Linear | 401 K 
1 | layer_2 | Linear | 262 K 
2 | layer_3 | Linear | 5.1 K 
-----------------------------------
669 K     Trainable params
0         Non-trainable params
669 K     Total params
2.679     Total estimated model params

Training: -1it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'avg_test_acc': 0.7405053973197937,
 'avg_test_loss': 1.2792999744415283,
 'test_acc': 0.7404000163078308,
 'test_acc_epoch': 0.7404000163078308,
 'test_loss': 1.2797513008117676,
 'test_loss_epoch': 1.2797513008117676}
--------------------------------------------------------------------------------


[{'test_loss': 1.2797513008117676,
  'test_loss_epoch': 1.2797513008117676,
  'test_acc': 0.7404000163078308,
  'test_acc_epoch': 0.7404000163078308,
  'avg_test_loss': 1.2792999744415283,
  'avg_test_acc': 0.7405053973197937}]

In [22]:
trainer.test(ckpt_path="best")

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'avg_test_acc': 0.7418726086616516,
 'avg_test_loss': 1.2794551849365234,
 'test_acc': 0.7418000102043152,
 'test_acc_epoch': 0.7418000102043152,
 'test_loss': 1.279908299446106,
 'test_loss_epoch': 1.279908299446106}
--------------------------------------------------------------------------------


[{'test_loss': 1.279908299446106,
  'test_loss_epoch': 1.279908299446106,
  'test_acc': 0.7418000102043152,
  'test_acc_epoch': 0.7418000102043152,
  'avg_test_loss': 1.2794551849365234,
  'avg_test_acc': 0.7418726086616516}]

Reusing TensorBoard on port 6007 (pid 87603), started 0:01:45 ago. (Use '!kill 87603' to kill it.)