# Introduction into deep learning with pytorch and pytorch-lightning

Example code for a simple multilayered perceptron. Main function to make acquainted with a specific structure: 

* data preparation
* model definition
* initialize model and trainer
* set up progress/performance logging (optional)
* training
* model evaluation 

In [1]:
import os

import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader, random_split
from torchvision import transforms
from torchvision.transforms import ToTensor
from torchvision.datasets import MNIST
from torchmetrics import Accuracy

import pytorch_lightning as pl
from pytorch_lightning import Trainer, seed_everything


In [2]:
#data preparation

#defines a sequence of transformation for the images
transform = transforms.Compose([transforms.ToTensor(),                     # ToTensor(): Convert a PIL Image or numpy.ndarray to tensor
                                transforms.Normalize(mean=0.5, std=0.5)])  #Normalize a tensor image with mean and standard deviation

#prepares the data, downloads if necessary and applies transformations defined above
training_data = MNIST(root="data",     
                      train=True,
                      download=True,
                      transform=transform)
#same for the test data
test_data = MNIST(root="data",
                  train=False,
                  download=True,
                  transform=transform)

#splits the training data into training and validation data
training_data, validation_data = random_split(training_data, [55_000, 5_000])

#feeds our data into dataloaders which handle shuffling, creating batches, distribution over cores etc.
training_data = DataLoader(training_data, batch_size=64, shuffle=True, num_workers=4)
validation_data = DataLoader(validation_data, batch_size=64, shuffle=False, num_workers=4)
test_data = DataLoader(test_data, batch_size=64, shuffle=False, num_workers=4)

In [3]:
#model definition
class MNISTModel(pl.LightningModule):
    def __init__(self):
        super().__init__()
        
        #defining the layers
        self.layer_1 = nn.Linear(1 * 28 * 28, 512)   #1*28*28: channel * height * width; 256 is the output of this layer
        self.layer_2 = nn.Linear(512, 10)            #input 256 and output: 10
        self.dropout = nn.Dropout(0.15)              #defines a dropout layer 
        self.flatten = nn.Flatten()
        
        #defining metric logger
        self.train_acc = Accuracy()
        self.val_acc = Accuracy()
        self.test_acc = Accuracy()
        
        
    def forward(self, x):
        #here we define the complete model
        #x = x.view(x.size(0), -1)
        x = self.flatten(x)
        x = F.relu(self.layer_1(x))
        x = self.dropout(x)
        x = self.layer_2(x)
        return x

    def training_step(self, batch, batch_nb):
        #applying the model and computing the loss and some reporting/logging
        x, y = batch
        y_pred = self(x)   #this is the forward step: compute the prediction
        loss = F.cross_entropy(y_pred, y)  #compute the loss
        
        #the rest is reporting: size of loss and accuracy
        accuracy = self.train_acc(y_pred, y) #
        self.log('train_loss', loss)
        self.log('train_acc', accuracy)
        return loss
    
    def validation_step(self, batch, batch_nb):
        #same as training, but without backpropagation, and using our validation data
        #if used, usually done during the training
        x, y = batch
        y_pred = self(x)
        loss = F.cross_entropy(y_pred, y)
        self.val_acc.update(y_pred, y)        
        self.log("val_loss", loss, prog_bar=True)
        self.log("val_acc", self.val_acc, prog_bar=True)
        return loss
    
    def test_step(self, batch, batch_nb):
        #as validation but only done once after the model is trained 
        x, y = batch
        y_pred = self(x)
        loss = F.cross_entropy(y_pred, y)
        accuracy = self.test_acc(y_pred, y)
        self.log('test_loss', loss)            
        self.log("test_acc", accuracy)        
        return loss
    
    def configure_optimizers(self):
        #here we define our optimizer which is used for the backpropagation
        return torch.optim.Adam(self.parameters(), lr=0.0005)
    


In [None]:
#Initialize model and trainer

# Initialize the model
mnist_model = MNISTModel()

# Initialize a trainer
trainer = Trainer(max_epochs=10,     #number of epochs: in one epoch all training data is processed once.
                  accelerator='gpu') #if your computer has a NVIDIA gpu, you can accelerate the training, otherwise: 'None'
                  

In [4]:
#reporting / logging

#to log our training results we use tensorboard, we load it here and 
#then use the refresh button during training to see the current run
%reload_ext tensorboard
%tensorboard --logdir=lightning_logs/

In [5]:
#training

# Train the model: links the trainer to the model, using our training and validation data
#you can watch the progress, by going back to the tensorboard
trainer.fit(model = mnist_model, 
            train_dataloaders=training_data,
            val_dataloaders=validation_data)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type     | Params
---------------------------------------
0 | layer_1   | Linear   | 401 K 
1 | layer_2   | Linear   | 5.1 K 
2 | dropout   | Dropout  | 0     
3 | flatten   | Flatten  | 0     
4 | train_acc | Accuracy | 0     
5 | val_acc   | Accuracy | 0     
6 | test_acc  | Accuracy | 0     
---------------------------------------
407 K     Trainable params
0         Non-trainable params
407 K     Total params
1.628     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

In [9]:
#post training: evaluate the  model on test data
# we use the best model to test on unseen data. This is the real test for the performance of a model 
# and the numbers researcher report in papers.
trainer.test(ckpt_path='best', dataloaders=test_data)

Restoring states from the checkpoint path at D:\mydata\Dropbox\uni\Würzburg\lehre\seminare\einführung in deep learning\notebooks\lightning_logs\version_4\checkpoints\epoch=9-step=8600.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at D:\mydata\Dropbox\uni\Würzburg\lehre\seminare\einführung in deep learning\notebooks\lightning_logs\version_4\checkpoints\epoch=9-step=8600.ckpt


Testing: 0it [00:00, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_acc            0.9785000085830688
        test_loss           0.06816816329956055
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


[{'test_loss': 0.06816816329956055, 'test_acc': 0.9785000085830688}]