In [1]:
import torch
from torch.nn import functional as F
import pytorch_lightning as pl
from model import MNISTClassifier, LightningMNISTClassifier
from data import mnist_dataloaders, MNISTDataModule
import time

## Models

In [2]:
pytorch_model = MNISTClassifier()
optimizer = torch.optim.Adam(pytorch_model.parameters(), lr=1e-3)
pytorch_model

MNISTClassifier(
  (layer_1): Linear(in_features=784, out_features=128, bias=True)
  (layer_2): Linear(in_features=128, out_features=256, bias=True)
  (layer_3): Linear(in_features=256, out_features=10, bias=True)
)

In [3]:
def cross_entropy_loss(logits, labels):
    return F.nll_loss(logits, labels)

In [4]:
lightning_model = LightningMNISTClassifier()
lightning_model

LightningMNISTClassifier(
  (backbone): MNISTClassifier(
    (layer_1): Linear(in_features=784, out_features=128, bias=True)
    (layer_2): Linear(in_features=128, out_features=256, bias=True)
    (layer_3): Linear(in_features=256, out_features=10, bias=True)
  )
)

## Data

In [5]:
mnist_train, mnist_val, mnist_test = mnist_dataloaders()

In [6]:
mnist_data = MNISTDataModule()

## Training Loop

In [7]:
def training_loop(model, optimizer, epochs, train_data, val_data):
    device = next(pytorch_model.parameters()).device
    for epoch in range(1, epochs + 1):
        start_time = time.time()
        # TRAINING LOOP
        for train_batch in train_data:
            x, y = train_batch
            x, y = x.to(device), y.to(device)

            logits = model(x)
            loss = cross_entropy_loss(logits, y)

            loss.backward()

            optimizer.step()
            optimizer.zero_grad()

        # VALIDATION LOOP
        with torch.no_grad():
            val_loss = []
            for val_batch in val_data:
                x, y = val_batch
                x, y = x.to(device), y.to(device)
                logits = model(x)
                val_loss.append(cross_entropy_loss(logits, y).item())
            val_loss = torch.mean(torch.tensor(val_loss))
        end_time = time.time()
        print(
            'Epoch {} in {:.03f} secs: train loss: {:.05f}, val loss: {:.05f}'.format(
                epoch, end_time - start_time, loss.item(), val_loss.item()
            )
        )

In [8]:
def test_model(model, test_data):
    device = next(pytorch_model.parameters()).device
    with torch.no_grad():
        test_loss = []
        test_acc = []
        for test_batch in test_data:
            x, y = test_batch
            x, y = x.to(device), y.to(device)
            logits = model(x)
            labels_hat = torch.argmax(logits, dim=1)
            test_loss.append(cross_entropy_loss(logits, y).item())
            test_acc.append(torch.sum(y == labels_hat).item() / (len(y) * 1.0))
        test_loss = torch.mean(torch.tensor(test_loss))
        test_acc = torch.mean(torch.tensor(test_acc))

    print('Acc: {}, Loss: {}'.format(test_acc, test_loss))

In [9]:
training_loop(pytorch_model, optimizer, 5, mnist_train, mnist_val)
test_model(pytorch_model, mnist_test)

Epoch 1 in 27.706 secs: train loss: 0.18244, val loss: 0.14465
Epoch 2 in 27.193 secs: train loss: 0.17212, val loss: 0.11673
Epoch 3 in 25.817 secs: train loss: 0.07339, val loss: 0.11028
Epoch 4 in 27.108 secs: train loss: 0.05643, val loss: 0.12163
Epoch 5 in 27.289 secs: train loss: 0.07815, val loss: 0.12740
Acc: 0.9722332954406738, Loss: 0.1009707972407341


In [10]:
pytorch_model = MNISTClassifier().to('cuda')
optimizer = torch.optim.Adam(pytorch_model.parameters(), lr=1e-3)

training_loop(pytorch_model, optimizer, 5, mnist_train, mnist_val)
test_model(pytorch_model, mnist_test)

Epoch 1 in 26.141 secs: train loss: 0.15747, val loss: 0.14148
Epoch 2 in 28.221 secs: train loss: 0.14932, val loss: 0.11948
Epoch 3 in 24.991 secs: train loss: 0.09565, val loss: 0.11738
Epoch 4 in 24.814 secs: train loss: 0.03464, val loss: 0.12288
Epoch 5 in 38.835 secs: train loss: 0.17945, val loss: 0.13479
Acc: 0.9663614630699158, Loss: 0.12964561581611633


In [11]:
trainer = pl.Trainer(max_epochs=5, default_root_dir='runs')

trainer.fit(lightning_model, mnist_data)

GPU available: True, used: False
TPU available: None, using: 0 TPU cores

  | Name     | Type            | Params
---------------------------------------------
0 | backbone | MNISTClassifier | 136 K 
---------------------------------------------
136 K     Trainable params
0         Non-trainable params
136 K     Total params
0.544     Total estimated model params size (MB)


Epoch 5: 100%|██████████| 939/939 [00:38<00:00, 24.29it/s, loss=0.0363, v_num=5]


1

In [12]:
trainer.test(lightning_model);

Testing: 100%|██████████| 157/157 [00:06<00:00, 22.74it/s]
--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test_acc': 0.9740999937057495, 'test_loss': 0.10101911425590515}
--------------------------------------------------------------------------------


In [13]:
lightning_model = LightningMNISTClassifier(backbone=MNISTClassifier())
trainer = pl.Trainer(max_epochs=5, default_root_dir='runs', gpus=1)

trainer.fit(lightning_model, mnist_data)
trainer.test(lightning_model);

GPU available: True, used: True
TPU available: None, using: 0 TPU cores

  | Name     | Type            | Params
---------------------------------------------
0 | backbone | MNISTClassifier | 136 K 
---------------------------------------------
136 K     Trainable params
0         Non-trainable params
136 K     Total params
0.544     Total estimated model params size (MB)


Epoch 5: 100%|██████████| 939/939 [00:41<00:00, 22.46it/s, loss=0.0277, v_num=6]
Testing: 100%|██████████| 157/157 [00:06<00:00, 24.58it/s]
--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test_acc': 0.9690999984741211, 'test_loss': 0.1238018050789833}
--------------------------------------------------------------------------------


In [14]:
class MyPrintingCallback(pl.Callback):
    def on_init_start(self, trainer):
        print('Starting to init trainer!')

    def on_init_end(self, trainer):
        print('trainer is init now')

    def on_train_end(self, trainer, pl_module):
        print('do something when training ends')

In [15]:
lightning_model = LightningMNISTClassifier()
trainer = pl.Trainer(max_epochs=1, default_root_dir='runs', gpus=1, callbacks=[MyPrintingCallback()])

trainer.fit(lightning_model, mnist_data)

GPU available: True, used: True
TPU available: None, using: 0 TPU cores

  | Name     | Type            | Params
---------------------------------------------
0 | backbone | MNISTClassifier | 136 K 
---------------------------------------------
136 K     Trainable params
0         Non-trainable params
136 K     Total params
0.544     Total estimated model params size (MB)


Starting to init trainer!
trainer is init now
Epoch 1: 100%|██████████| 939/939 [00:40<00:00, 23.07it/s, loss=0.0959, v_num=7]do something when training ends
Epoch 1: 100%|██████████| 939/939 [00:40<00:00, 23.05it/s, loss=0.0959, v_num=7]


1

In [16]:
lightning_model = LightningMNISTClassifier()
trainer = pl.Trainer(max_epochs=1, default_root_dir='runs', gpus=1, profiler=True)

trainer.fit(lightning_model, mnist_data)

GPU available: True, used: True
TPU available: None, using: 0 TPU cores

  | Name     | Type            | Params
---------------------------------------------
0 | backbone | MNISTClassifier | 136 K 
---------------------------------------------
136 K     Trainable params
0         Non-trainable params
136 K     Total params
0.544     Total estimated model params size (MB)


Epoch 1: 100%|██████████| 939/939 [00:36<00:00, 25.75it/s, loss=0.0988, v_num=8]



Profiler Report

Action                             	|  Mean duration (s)	|Num calls      	|  Total time (s) 	|  Percentage %   	|
------------------------------------------------------------------------------------------------------------------------------------
Total                              	|  -              	|_              	|  93.694         	|  100 %          	|
------------------------------------------------------------------------------------------------------------------------------------
run_training_epoch                 	|  46.723         	|2              	|  93.445         	|  99.734         	|
get_train_batch                    	|  0.037963       	|1720           	|  65.297         	|  69.691         	|
run_training_batch                 	|  0.0077699      	|1720           	|  13.364         	|  14.264         	|
optimizer_step_and_closure_0       	|  0.0063745      	|1720           	|  10.964         	|  11.702         	|
training_step_and_backward         	|  0.




1