# Choosing and Switching Optimizers in PyTorch

## Step 1: Setting Up the PyTorch Environment



In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
# For creating a dummy DataLoader
from torch.utils.data import DataLoader, TensorDataset

## Step 2: Defining a Simple Neural Network


In [None]:
class SimpleNet(nn.Module):
    def __init__(self, input_size=784, num_classes=10):
        super(SimpleNet, self).__init__()
        self.fc1 = nn.Linear(input_size, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, num_classes)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x) # Output raw logits
        return x

# Define model parameters based on common use-case (e.g., MNIST-like)
INPUT_SIZE = 784
NUM_CLASSES = 10
model = SimpleNet(input_size=INPUT_SIZE, num_classes=NUM_CLASSES)

## Step 3: Choosing and Implementing Optimizers


In [None]:
# Initialize the neural network model
if 'model' not in locals():  # Check if model is undefined to handle non-sequential cell execution
    INPUT_SIZE = 784
    NUM_CLASSES = 10
    model = SimpleNet(input_size=INPUT_SIZE, num_classes=NUM_CLASSES)

# Configure optimizers
optimizer_sgd = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)  # SGD with momentum
optimizer_adam = optim.Adam(model.parameters(), lr=0.001)  # Adam optimizer
optimizer_rmsprop = optim.RMSprop(model.parameters(), lr=0.01)  # RMSprop optimizer

print("Optimizers initialized.")

Optimizers initialized.


## Step 4: Training Loops: Switching Optimizers


In [None]:
# Ensure model is defined (e.g., if running this cell standalone)
if 'model' not in locals():
    INPUT_SIZE = 784
    NUM_CLASSES = 10
    model = SimpleNet(input_size=INPUT_SIZE, num_classes=NUM_CLASSES)
    # Re-initialize optimizers if model was re-initialized
    optimizer_adam = optim.Adam(model.parameters(), lr=0.001)
    optimizer_rmsprop = optim.RMSprop(model.parameters(), lr=0.01)


# Define a loss function
criterion = nn.CrossEntropyLoss()

# Create dummy data and DataLoader for demonstration
NUM_EPOCHS = 6  # Reduced for quicker demonstration
BATCH_SIZE = 32
NUM_SAMPLES_PER_EPOCH = 128 # Number of samples for the dummy dataset

# Generate dummy data
dummy_inputs_tensor = torch.randn(NUM_SAMPLES_PER_EPOCH, INPUT_SIZE)
dummy_labels_tensor = torch.randint(0, NUM_CLASSES, (NUM_SAMPLES_PER_EPOCH,)) # Target for CrossEntropyLoss

# Create a TensorDataset and DataLoader
dummy_dataset = TensorDataset(dummy_inputs_tensor, dummy_labels_tensor)
train_loader = DataLoader(dataset=dummy_dataset, batch_size=BATCH_SIZE, shuffle=True)

print(f"Starting training for {NUM_EPOCHS} epochs.")

for epoch in range(NUM_EPOCHS):
    # Switch optimizer halfway through training
    # The optimizers (optimizer_adam, optimizer_rmsprop) were initialized once with model.parameters().
    # They maintain their own internal states. When we switch, the selected optimizer
    # continues with its state from where it left off (or starts fresh if never used).
    if epoch < NUM_EPOCHS // 2:
        optimizer = optimizer_adam
        optimizer_name = "Adam"
    else:
        optimizer = optimizer_rmsprop
        optimizer_name = "RMSprop"

    model.train() # Set model to training mode
    running_loss = 0.0
    num_batches = 0
    for i, batch in enumerate(train_loader):
        inputs, labels = batch

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass and optimize
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        num_batches += 1

    avg_loss = running_loss / num_batches
    print(f"Epoch {epoch + 1}/{NUM_EPOCHS} completed with {optimizer_name}. Average Loss: {avg_loss:.4f}")

print("Training finished.")

Starting training for 6 epochs.
Epoch 1/6 completed with Adam. Average Loss: 2.3116
Epoch 2/6 completed with Adam. Average Loss: 2.0963
Epoch 3/6 completed with Adam. Average Loss: 1.9239
Epoch 4/6 completed with RMSprop. Average Loss: 42.0097
Epoch 5/6 completed with RMSprop. Average Loss: 36.8680
Epoch 6/6 completed with RMSprop. Average Loss: 1.4027
Training finished.
