![logo](../images/logo-poster.png)

In [None]:
%run supportvectors-common.ipynb

# LR Schedulers and Optimizers - focus on 32 batch size and AdamW

We will revisit the first lab on the California Housing Dataset that we covered in this course in light of the theory on LR Schedulers and Optimizers that we have learnt

First the imports

In [2]:
import numpy as np
import torch

from torch.nn import MSELoss
from torch.optim import Adam, SGD, AdamW, RMSprop, Adagrad
from torch.optim.lr_scheduler import StepLR, ExponentialLR, ReduceLROnPlateau, CosineAnnealingLR
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split

from svlearn.california_housing.pre_process import preprocess_data
from svlearn.california_housing.ingest_data import ingest_cal_housing_data
from svlearn.approximator.regression_network import SimpleFeedForwardNet, \
                                                    SimpleNumpyDataset, \
                                                    create_plots

Now bring in the data

In [3]:
data = ingest_cal_housing_data()
preprocessed_data = preprocess_data(data)
x = preprocessed_data.drop(['y_target'], axis=1).to_numpy(dtype=np.float32)
y = preprocessed_data[['y_target']].to_numpy(dtype=np.float32)
x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.2, random_state=42)
dataset = SimpleNumpyDataset(x_train, y_train)
val_dataset = SimpleNumpyDataset(x_val, y_val)

Set some hyper parameters and the possible batch_sizes, optimizers, schedulers to iterate through

In [4]:
dim_x = x_train.shape[1]
N = x_train.shape[0]
batch_sizes = [32]
optimizers = ["adamw"]
schedulers = ["steplr", "exponentiallr","reducelronplateau", "cosineannealinglr"]
epochs = 50
max_steps_per_epoch = 1000
drop_out = 0.1
loss_function = MSELoss()
lr = 0.01


Define the function to calculate validation loss

In [5]:

def validate(model, val_loader):
    model.eval()  # Set the model to evaluation mode
    val_loss = 0.0
    with torch.no_grad():  # Disable gradient calculation
        for inputs, targets in val_loader:
            outputs = model(inputs)
            loss = loss_function(outputs, targets)
            val_loss += loss.item() * inputs.size(0)  # Accumulate the loss
    
    val_loss /= len(val_loader.dataset)  # Compute average loss over the full dataset
    return val_loss


Define the dictionary of losses, learning rates, steps that will be used to plot the various graphs for each combination

In [6]:
losses_dict = {}
steps_dict = {}
lrs_dict = {}

Define the train double for loop that uses the optimizer and scheduler

In [7]:

def train_loop(loader, val_loader, network, optimizer, scheduler):
    losses = []
    lrs = []
    steps = 0
    for epoch in range(epochs):
        steps_in_epoch = 0
        for data, labels in loader:
            optimizer.zero_grad()  # reset the parameter gradients
            results = network(data, drop_out)  # get predictions
            loss = loss_function(results, labels)  # estimate loss
            loss.backward()  # back-propagate gradients
            optimizer.step()  # update the parameter values (gradient-descent)
            losses.append(loss.data)  # keep track of the loss of this epoch
            steps +=1
            steps_in_epoch +=1
            if (steps_in_epoch > max_steps_per_epoch):
                break
                
        val_loss = validate(network, val_loader)
                
        if isinstance(scheduler, torch.optim.lr_scheduler.ReduceLROnPlateau):
            scheduler.step(val_loss)
        elif scheduler:
            scheduler.step()
            
        if scheduler:            
            lrs.append(scheduler.get_last_lr()[0])
        else:
            lrs.append(lr)
            
    return losses,lrs,steps


```pseudocode
For each of the choices of batch size   
    For each of the choices of optimizers 
        For each of the choices of schedulers ( Step, Exponential, Plateau on validation loss, Cosine annealing)  

            Compute the losses as a function of steps
```

In [None]:

for batch_size in batch_sizes:   
    loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)  
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)    
    for optim in optimizers:
        for schedule in schedulers:
            print(f"Training with Batch size: {batch_size}, Optimizer: {optim}, Scheduler: {schedule}...")
            
            # Initialize network for every iteration (so we restart with random weights and biases)
            network = SimpleFeedForwardNet(input_dimension=dim_x, output_dimension=1)
            
            # Intialize optimizer
            if optim == "sgd_without_momentum":
                optimizer = SGD(network.parameters(), lr=lr)
            elif optim == "sgd":
                optimizer = SGD(network.parameters(), lr=lr, momentum=0.9)
            elif optim == "adam":
                optimizer = Adam(network.parameters(), lr=lr)
            elif optim == "adamw":
                optimizer = AdamW(network.parameters(), lr=lr)
            elif optim == "rmsprop":
                optimizer = RMSprop(network.parameters(), lr=lr)
            else:
                optimizer = Adagrad(network.parameters(), lr=lr)    
                
            # Initialize scheduler 
            scheduler = None
            if schedule == "steplr":
                scheduler = StepLR(optimizer, step_size=10, gamma=0.1)
            elif schedule == "exponentiallr":
                scheduler = ExponentialLR(optimizer, gamma=0.9)
            elif schedule == "reducelronplateau":
                scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10)
            elif schedule == "cosineannealinglr":
                scheduler = CosineAnnealingLR(optimizer, T_max=25)

            # Call the train_loop method
            losses, lrs, steps = train_loop(loader, val_loader, network, optimizer, scheduler)
                    
            losses_dict_key = f"{batch_size}_{optim}_{schedule}"
            losses_dict[losses_dict_key] = [loss for loss in losses]
            steps_dict[losses_dict_key] = steps
            lrs_dict[losses_dict_key] = [lr for lr in lrs]
            print("...finished training")

In [None]:
import matplotlib.pyplot as plt
num_plots = len(losses_dict)

# Create subplots with one row per plot
fig, axes = plt.subplots(num_plots, 1, figsize=(8, 2*num_plots), sharex=True)

# Generate a colormap with enough colors for each list
colors = ['red', 'blue', 'green', 'purple']

# Plot each list in its own subplot
for i, (label, y_values) in enumerate(losses_dict.items()):
    axes[i].plot(y_values, label=label, color=colors[i])
    axes[i].set_ylim(0, 3)  # Set y-axis limits
    axes[i].set_ylabel("Loss")
    axes[i].legend(loc="upper right")

# Set the x-axis label for the last subplot
axes[-1].set_xlabel("Iterations")

# Add a title to the figure
fig.suptitle("Schedulers Loss plots", fontsize=16)

# Adjust layout for better spacing
plt.tight_layout(rect=[0, 0, 1, 0.96])

# Show the plot
plt.show()