# Imports

In [1]:
%%time
import pytorch_lightning as pl
from torch.utils.data import random_split, DataLoader

from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping, GPUStatsMonitor
from pytorch_lightning.loggers import TensorBoardLogger
# Note - you must have torchvision installed for this example
import torch as th
import torch.nn.functional as F
import os

# data module
from src.dataset import DataModule

# model
from src.model import Model

# config file
from src.config import Config

CPU times: user 582 ms, sys: 169 ms, total: 751 ms
Wall time: 770 ms


# Data module and setup

In [2]:
vars(Config)

mappingproxy({'__module__': 'src.config',
              'data_dir': '/home/zeusdric/Dric/Zindi2020/Coding-Room/data',
              'models_dir': '/home/zeusdric/Dric/Zindi2020/Coding-Room/models',
              'logs_dir': '/home/zeusdric/Dric/Zindi2020/Coding-Room/logs',
              'num_epochs': 2,
              'lr': 0.01,
              'weight_decay': 0.01,
              'eps': 1e-08,
              'train_batch_size': 1024,
              'test_batch_size': 512,
              'base_model': None,
              '__dict__': <attribute '__dict__' of 'Config' objects>,
              '__weakref__': <attribute '__weakref__' of 'Config' objects>,
              '__doc__': None})

In [3]:
config_dict = Config.__dict__.items()
config_dict = dict([item for item in config_dict if '__' not in item[0]])
config_dict

{'data_dir': '/home/zeusdric/Dric/Zindi2020/Coding-Room/data',
 'models_dir': '/home/zeusdric/Dric/Zindi2020/Coding-Room/models',
 'logs_dir': '/home/zeusdric/Dric/Zindi2020/Coding-Room/logs',
 'num_epochs': 2,
 'lr': 0.01,
 'weight_decay': 0.01,
 'eps': 1e-08,
 'train_batch_size': 1024,
 'test_batch_size': 512,
 'base_model': None}

In [4]:
PATH = Config.data_dir
PATH

'/home/zeusdric/Dric/Zindi2020/Coding-Room/data'

In [5]:
%%time
dm = DataModule(config=Config)
dm.setup()

[INFO] Training on 60000
[INFO] Validating on 10000
CPU times: user 0 ns, sys: 48 ms, total: 48 ms
Wall time: 47.6 ms


In [6]:
%%time
model = Model(config=config_dict)

CPU times: user 5.69 ms, sys: 427 µs, total: 6.12 ms
Wall time: 5.47 ms


In [7]:
model

Model(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (classifier): Linear(in_features=3200, out_features=10, bias=True)
)

In [8]:
%%time
ckpt_cb = ModelCheckpoint(
    monitor='val_loss', 
    mode='min', 
    dirpath=Config.models_dir, 
    filename='digit_classifier-{val_acc:.5f}-{val_loss:.5f}'
)

gpu_stats = GPUStatsMonitor(
    memory_utilization=True, 
    gpu_utilization=True, 
    fan_speed=True, 
    temperature=True
)
es = EarlyStopping(
    monitor='val_loss', 
    patience=2, 
    mode='min'
)

Logger = TensorBoardLogger(
    save_dir=Config.logs_dir, 
    name='mnist'
)

Callbacks = [es, ckpt_cb, gpu_stats]

trainer = pl.Trainer(
    gpus=-1, 
    max_epochs=5, 
    precision=16,
    callbacks=Callbacks,
    logger=Logger,
    # fast_dev_run=True
)

GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Using native 16bit precision.


CPU times: user 6.14 ms, sys: 9.91 ms, total: 16 ms
Wall time: 23.9 ms


# Training phase

In [9]:
%%time
trainer.fit(model=model, datamodule=dm)


  | Name       | Type        | Params
-------------------------------------------
0 | conv1      | Conv2d      | 320   
1 | conv2      | Conv2d      | 18.5 K
2 | pool1      | MaxPool2d   | 0     
3 | bn1        | BatchNorm2d | 128   
4 | conv3      | Conv2d      | 73.9 K
5 | pool2      | MaxPool2d   | 0     
6 | bn2        | BatchNorm2d | 256   
7 | classifier | Linear      | 32.0 K
-------------------------------------------
125 K     Trainable params
0         Non-trainable params
125 K     Total params


Validation sanity check: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

CPU times: user 33.8 s, sys: 2.02 s, total: 35.8 s
Wall time: 40.9 s


1

In [10]:
%load_ext tensorboard

In [11]:
%tensorboard --logdir ../logs