# Defining a Multi-Layer Perceptron with nn.Module and nn.Sequential

## Step 1: Setting Up the Environment

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

# Optional: for reproducibility
torch.manual_seed(42)

<torch._C.Generator at 0x7a45f4214d30>

# Step 2: Defining an MLP with `nn.Module`


In [None]:
class MLPUsingModule(nn.Module):
    """
    A Multi-Layer Perceptron defined using nn.Module.
    This approach offers maximum flexibility in designing the network architecture
    and defining the forward pass.
    """
    def __init__(self, input_size, hidden_size, output_size):
        super().__init__()  # Updated to modern Python 3 super() call
        # Define the layers
        self.hidden_layer = nn.Linear(input_size, hidden_size)
        self.activation_fn = nn.ReLU()
        self.output_layer = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        """Defines the forward pass of the model."""
        x = self.hidden_layer(x)
        x = self.activation_fn(x)
        x = self.output_layer(x)
        return x

## Step 3: Building an MLP with `nn.Sequential`


In [None]:
def create_mlp_via_sequential(input_size, hidden_size, output_size):
    """
    Creates an MLP using nn.Sequential.
    This is convenient for models where layers are applied in a simple linear sequence.
    """
    model = nn.Sequential(
        nn.Linear(input_size, hidden_size),
        nn.ReLU(),
        nn.Linear(hidden_size, output_size)
    )
    return model

# Example of creating an instance and printing its structure
# This demonstrates how an nn.Sequential model looks when printed.
mlp_sequential_example = create_mlp_via_sequential(4, 5, 3)
print("MLP Sequential Example Architecture (from Step 3):")
print(mlp_sequential_example)

MLP Sequential Example Architecture (from Step 3):
Sequential(
  (0): Linear(in_features=4, out_features=5, bias=True)
  (1): ReLU()
  (2): Linear(in_features=5, out_features=3, bias=True)
)


## Step 4: Practical Application: Training the MLP


In [None]:
def train_mlp(model, data, targets, learning_rate=0.001, epochs=100):
    """
    A basic training loop for an MLP model.
    It uses Stochastic Gradient Descent (SGD) optimizer and Mean Squared Error (MSE) loss.
    """
    # Define the optimizer
    optimizer = optim.SGD(model.parameters(), lr=learning_rate)
    # Define the loss function and pass variable name
    criterion = nn.MSELoss()

    print(f"\nTraining model: {model.__class__.__name__ if not isinstance(model, nn.Sequential) else 'SequentialMLP'}")
    if isinstance(model, nn.Sequential): # nn.Sequential doesn't have a custom __name__ reflecting the MLP
        print("Model type: nn.Sequential based")

    for epoch in range(epochs):
        # 1. Zero the gradients
        optimizer.zero_grad()

        # 2. Forward pass: compute model predictions
        predictions = model(data)

        # 3. Compute loss
        loss = criterion(predictions, targets)

        # 4. Backward pass: compute gradients of the loss with respect to model parameters
        loss.backward()

        # 5. Optimizer step: update model parameters
        optimizer.step()

        # Print progress (e.g., every 10% of epochs and the final epoch)
        if (epoch + 1) % (epochs // 10 or 1) == 0 or epoch == epochs - 1:
            print(f'Epoch {epoch + 1}/{epochs}, Loss: {loss.item():.4f}')

# -- Define common parameters for the models and data --
input_size = 4
hidden_size = 5
output_size = 3
num_samples = 10
num_epochs_demo = 50

# Generate some random data for demonstration
# In a real scenario, this would be your actual dataset.
# For reproducibility of data, you might use torch.manual_seed() before this.
input_data = torch.randn(num_samples, input_size)
# Target data should match the output_size of the MLP
target_data = torch.randn(num_samples, output_size)

# --- Train MLP defined with nn.Module ---
print("--- Training MLP defined with nn.Module ---")
mlp_module_instance = MLPUsingModule(input_size, hidden_size, output_size)
print("MLPUsingModule Architecture:")
print(mlp_module_instance)
train_mlp(mlp_module_instance, input_data, target_data, epochs=num_epochs_demo)

# --- Train MLP defined with nn.Sequential ---
print("\n--- Training MLP defined with nn.Sequential ---")
mlp_sequential_instance = create_mlp_via_sequential(input_size, hidden_size, output_size)
print("Sequential MLP Architecture:")
print(mlp_sequential_instance) # The architecture is inherently shown by nn.Sequential's print
train_mlp(mlp_sequential_instance, input_data, target_data, epochs=num_epochs_demo)

--- Training MLP defined with nn.Module ---
MLPUsingModule Architecture:
MLPUsingModule(
  (hidden_layer): Linear(in_features=4, out_features=5, bias=True)
  (activation_fn): ReLU()
  (output_layer): Linear(in_features=5, out_features=3, bias=True)
)

Training model: MLPUsingModule
Epoch 5/50, Loss: 0.7200
Epoch 10/50, Loss: 0.7184
Epoch 15/50, Loss: 0.7168
Epoch 20/50, Loss: 0.7152
Epoch 25/50, Loss: 0.7136
Epoch 30/50, Loss: 0.7120
Epoch 35/50, Loss: 0.7105
Epoch 40/50, Loss: 0.7090
Epoch 45/50, Loss: 0.7075
Epoch 50/50, Loss: 0.7060

--- Training MLP defined with nn.Sequential ---
Sequential MLP Architecture:
Sequential(
  (0): Linear(in_features=4, out_features=5, bias=True)
  (1): ReLU()
  (2): Linear(in_features=5, out_features=3, bias=True)
)

Training model: SequentialMLP
Model type: nn.Sequential based
Epoch 5/50, Loss: 0.7927
Epoch 10/50, Loss: 0.7908
Epoch 15/50, Loss: 0.7888
Epoch 20/50, Loss: 0.7869
Epoch 25/50, Loss: 0.7850
Epoch 30/50, Loss: 0.7832
Epoch 35/50, Loss: 0.7