In [1]:
import os
import torch
from torch import nn
from torchvision.datasets import MNIST
from torch.utils.data import DataLoader
from torchvision import transforms

It is also possible to perform **Elastic Net Regularization** with PyTorch. This type of regularization essentially computes a weighted combination of L1 and L2 loss, with the weights of both summing to 1.0. In other words, we add $\lambda_{L1} \times \sum_f{ _{i=1}^{n}} | w_i | + \lambda_{L2} \times \sum_f{ _{i=1}^{n}} w_i^2$ to the loss component:

$\text{full_loss = original_loss + } \lambda_{L1} \times \sum_f{ _{i=1}^{n}} | w_i | + \lambda_{L2} \times \sum_f{ _{i=1}^{n}} w_i^2 $

In this example, Elastic Net (L1 + L2) Regularization is implemented with PyTorch:

- You can see that the MLP class representing the neural network provides two defs which are used to compute L1 and L2 loss, respectively.
- In the training loop, these are applied, in a weighted fashion (with weights of 0.3 and 0.7, respectively).
- The loss components are also printed on-screen when the statistics are printed.

In [2]:
class MLP(nn.Module):
    '''
    Multilayer Perceptron.
    '''
    def __init__(self):
        super().__init__()
        self.layers = nn.Sequential(
          nn.Flatten(),
          nn.Linear(28 * 28 * 1, 64),
          nn.ReLU(),
          nn.Linear(64, 32),
          nn.ReLU(),
          nn.Linear(32, 10)
        )

    def forward(self, x):
        '''Forward pass'''
        return self.layers(x)
  
    def compute_l1_loss(self, w):
        return torch.abs(w).sum()
    
    def compute_l2_loss(self, w):
        return torch.square(w).sum()

In [None]:
if __name__ == '__main__':
  
    # Set fixed random number seed
    torch.manual_seed(42)
  
    # Prepare MNIST dataset
    dataset = MNIST(os.getcwd(), download=True, transform=transforms.ToTensor())
    trainloader = torch.utils.data.DataLoader(dataset, batch_size=64, shuffle=True, num_workers=6)
  
    # Initialize the MLP
    mlp = MLP()
  
    # Define the loss function and optimizer
    loss_function = nn.CrossEntropyLoss()
    optimizer = torch.optim.AdamW(mlp.parameters(), lr=1e-4)
  
    # Run the training loop
    for epoch in range(0, 5): # 5 epochs at maximum
    
        # Print epoch
        print(f'Starting epoch {epoch+1}')
    
        # Iterate over the DataLoader for training data
        for i, data in enumerate(trainloader, 0):
      
            # Get inputs
            inputs, targets = data
      
            # Zero the gradients
            optimizer.zero_grad()
      
            # Perform forward pass
            outputs = mlp(inputs)
      
            # Compute loss
            loss = loss_function(outputs, targets)
      
            # Specify L1 and L2 weights
            l1_weight = 0.3
            l2_weight = 0.7
    
            # Compute L1 and L2 loss component
            parameters = []
            for parameter in mlp.parameters():
                parameters.append(parameter.view(-1))
            l1 = l1_weight * mlp.compute_l1_loss(torch.cat(parameters))
            l2 = l2_weight * mlp.compute_l2_loss(torch.cat(parameters))
      
            # Add L1 and L2 loss components
            loss += l1
            loss += l2
      
            # Perform backward pass
            loss.backward()
      
            # Perform optimization
            optimizer.step()
      
            # Print statistics
            minibatch_loss = loss.item()
            if i % 500 == 499:
                print('Loss after mini-batch %5d: %.5f (of which %.5f L1 loss; %0.5f L2 loss)' % (i + 1, minibatch_loss, l1, l2))

    # Process is complete.
    print('Training process has finished.')

Starting epoch 1
Loss after mini-batch   500: 24.79191 (of which 19.75580 L1 loss; 2.73684 L2 loss)
Starting epoch 2
Loss after mini-batch   500: 3.17805 (of which 0.82837 L1 loss; 0.04483 L2 loss)
Starting epoch 3
