In [1]:
import os
import torch
import torch.utils.data as data
from torchvision import datasets
import torchvision.transforms as transforms
from torch import nn
import torch.nn.functional as F
from torch.utils.data import DataLoader

import lightning.pytorch as pl
from lightning.pytorch.callbacks.early_stopping import EarlyStopping


In [2]:
# Load data sets
transform = transforms.ToTensor()

DATASETS_PATH = os.path.join("..", "..", "datasets")
print('DATASETS_PATH: ', DATASETS_PATH)

train_set = datasets.MNIST(root=DATASETS_PATH, download=True, train=True, transform=transform)
test_set = datasets.MNIST(root=DATASETS_PATH, download=True, train=False, transform=transform)

# use 20% of training data for validation
train_set_size = int(len(train_set) * 0.8)
valid_set_size = len(train_set) - train_set_size

# split the train set into two
seed = torch.Generator().manual_seed(42)
train_set, valid_set = data.random_split(train_set, [train_set_size, valid_set_size], generator=seed)


DATASETS_PATH:  ..\..\datasets


In [3]:
class Encoder(nn.Module):
    def __init__(self):
        super().__init__()
        self.l1 = nn.Sequential(nn.Linear(28 * 28, 64), nn.ReLU(), nn.Linear(64, 3))

    def forward(self, x):
        return self.l1(x)

class Decoder(nn.Module):
    def __init__(self):
        super().__init__()
        self.l1 = nn.Sequential(nn.Linear(3, 64), nn.ReLU(), nn.Linear(64, 28 * 28))

    def forward(self, x):
        return self.l1(x)

In [4]:
class LitAutoEncoder(pl.LightningModule):
    def __init__(self, encoder, decoder):
        super().__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.save_hyperparameters()

    def training_step(self, batch, batch_idx):
        # training_step defines the train loop.
        x, y = batch
        x = x.view(x.size(0), -1)
        z = self.encoder(x)
        x_hat = self.decoder(z)
        loss = F.mse_loss(x_hat, x)
        return loss
    
    def test_step(self, batch, batch_idx):
        # this is the test loop
        x, y = batch
        x = x.view(x.size(0), -1)
        z = self.encoder(x)
        x_hat = self.decoder(z)
        test_loss = F.mse_loss(x_hat, x)
        self.log("test_loss", test_loss)
    
    def validation_step(self, batch, batch_idx):
        # this is the validation loop
        x, y = batch
        x = x.view(x.size(0), -1)
        z = self.encoder(x)
        x_hat = self.decoder(z)
        val_loss = F.mse_loss(x_hat, x)
        self.log("val_loss", val_loss)
        return {"val_loss": val_loss}

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
        return optimizer
    
    def forward(self, x):
#         x = torch.Tensor(x)
#         x = [torch.Tensor(x_) for x_ in x]
        
        x = x.view(x.size(0), -1)
        z = self.encoder(x)
        x_hat = self.decoder(z)
        return x_hat
    
    def predict_step(self, batch, batch_idx, dataloader_idx=0):
        for b in batch:
            return self(b)

In [5]:
# model
autoencoder = LitAutoEncoder(Encoder(), Decoder())

# initialize the Trainer
trainer = pl.Trainer(limit_train_batches=100, limit_test_batches=10, limit_val_batches=10,
                     max_epochs=1, profiler="simple")
print('trainer.default_root_dir: ', trainer.default_root_dir)

train_loader = DataLoader(train_set)
valid_loader = DataLoader(valid_set)

# train model
trainer.fit(model=autoencoder, train_dataloaders=train_loader)


  rank_zero_warn(
  rank_zero_warn(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


trainer.default_root_dir:  C:\Users\GyanT\Documents\GitHub\Artificial-Neural-Network\PyTorchLightning


  rank_zero_warn(
Missing logger folder: C:\Users\GyanT\Documents\GitHub\Artificial-Neural-Network\PyTorchLightning\lightning_logs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type    | Params
------------------------------------
0 | encoder | Encoder | 50.4 K
1 | decoder | Decoder | 51.2 K
------------------------------------
101 K     Trainable params
0         Non-trainable params
101 K     Total params
0.407     Total estimated model params size (MB)
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=1` reached.
FIT Profiler Report

---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|  Action                                                                                                                                                        	|  Mean duration (s)	|  Num calls      	|  Total time (s) 	|  Percentage %   	|
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|  Total                                                                                                                                                  

In [6]:
# train with both splits
trainer.fit(autoencoder, train_loader, valid_loader)


  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type    | Params
------------------------------------
0 | encoder | Encoder | 50.4 K
1 | decoder | Decoder | 51.2 K
------------------------------------
101 K     Trainable params
0         Non-trainable params
101 K     Total params
0.407     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(
`Trainer.fit` stopped: `max_epochs=1` reached.
FIT Profiler Report

---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|  Action                                                                                                                                                         	|  Mean duration (s)	|  Num calls      	|  Total time (s) 	|  Percentage %   	|
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|  Total                                                                                                                               

In [7]:
# test the model
trainer.test(model=autoencoder, dataloaders=DataLoader(test_set))

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  rank_zero_warn(


Testing: 0it [00:00, ?it/s]

TEST Profiler Report

---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|  Action                                                                                                                                                         	|  Mean duration (s)	|  Num calls      	|  Total time (s) 	|  Percentage %   	|
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|  Total                                                                                                                                                          	|  -              	|  3890     

[{'test_loss': 0.06349547952413559}]

# Saving and loading checkpoints (basic)

In [8]:
# load checkpoint
checkpoint = "./lightning_logs/version_0/checkpoints/epoch=0-step=100.ckpt"
model = LitAutoEncoder.load_from_checkpoint(checkpoint, encoder=Encoder(), decoder=Decoder())
# print('learning_rate: ', model.learning_rate)

checkpoint_ = torch.load(checkpoint, map_location=lambda storage, loc: storage)
print('checkpoint:', checkpoint_.keys())
print('hyper_parameters:', checkpoint_["hyper_parameters"])


checkpoint: dict_keys(['epoch', 'global_step', 'pytorch-lightning_version', 'state_dict', 'loops', 'callbacks', 'optimizer_states', 'lr_schedulers', 'hparams_name', 'hyper_parameters'])
hyper_parameters: {'encoder': Encoder(
  (l1): Sequential(
    (0): Linear(in_features=784, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=3, bias=True)
  )
), 'decoder': Decoder(
  (l1): Sequential(
    (0): Linear(in_features=3, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=784, bias=True)
  )
)}


  rank_zero_warn(
  rank_zero_warn(


In [9]:
trainer = pl.Trainer(limit_train_batches=100, limit_test_batches=10, limit_val_batches=10, 
                     max_epochs=1, enable_checkpointing=False)

# automatically restores model, epoch, step, LR schedulers, etc...
trainer.fit(model=model, train_dataloaders=train_loader, ckpt_path=checkpoint)


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Restoring states from the checkpoint path at ./lightning_logs/version_0/checkpoints/epoch=0-step=100.ckpt
  rank_zero_warn(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type    | Params
------------------------------------
0 | encoder | Encoder | 50.4 K
1 | decoder | Decoder | 51.2 K
------------------------------------
101 K     Trainable params
0         Non-trainable params
101 K     Total params
0.407     Total estimated model params size (MB)
Restored all states from the checkpoint at ./lightning_logs/version_0/checkpoints/epoch=0-step=100.ckpt
`Trainer.fit` stopped: `max_epochs=1` reached.


# Early Stopping

In [10]:
early_stop_callback = EarlyStopping(monitor="val_loss", mode="min", patience=1)
# early_stop_callback = EarlyStopping(monitor="val_accuracy", min_delta=0.00, patience=3, verbose=False, mode="max")

trainer = pl.Trainer(limit_train_batches=100, limit_test_batches=10, limit_val_batches=10,
                     max_epochs=1, enable_checkpointing=False, callbacks=[early_stop_callback])

# train with both splits
trainer.fit(model, train_loader, valid_loader)


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type    | Params
------------------------------------
0 | encoder | Encoder | 50.4 K
1 | decoder | Decoder | 51.2 K
------------------------------------
101 K     Trainable params
0         Non-trainable params
101 K     Total params
0.407     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=1` reached.


# Deploy models into production (basic)

In [11]:
# disable randomness, dropout, etc...
model.eval()
x = torch.randn(1, 28, 28)

# predict with the model
with torch.no_grad():
    y_hat = model(x)

print('y_hat: ', y_hat.shape)

y_hat:  torch.Size([1, 784])


In [12]:
predictions = trainer.predict(model, valid_loader)
print('predictions: ', predictions.shape)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  rank_zero_warn(


Predicting: 0it [00:00, ?it/s]