In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import time

# Define the 3-layer neural network
class ThreeLayerNN(nn.Module):
    def __init__(self, activation_fn):
        super(ThreeLayerNN, self).__init__()
        self.fc1 = nn.Linear(784, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 10)
        self.activation_fn = activation_fn

    def forward(self, x):
        x = x.view(-1, 784)  # Flatten the input
        x = self.activation_fn(self.fc1(x))
        x = self.activation_fn(self.fc2(x))
        x = self.fc3(x)  # No activation on output layer
        return x

# Load MNIST dataset
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Define activation functions
activation_fns = {
    'Sigmoid': nn.Sigmoid(),
    'ReLU': nn.ReLU(),
    'Leaky ReLU': nn.LeakyReLU(0.01)
}

# Function to compute accuracy
def compute_accuracy(outputs, labels):
    _, predicted = torch.max(outputs, 1)
    correct = (predicted == labels).sum().item()
    return correct / len(labels)

# Train the network
def train_model(activation_fn_name, epochs=10):
    model = ThreeLayerNN(activation_fns[activation_fn_name])
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    start_time = time.time()

    for epoch in range(epochs):
        model.train()  # Set model to training mode
        train_loss, train_acc = 0.0, 0.0

        # Training loop
        for images, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            train_acc += compute_accuracy(outputs, labels)

        # Average training loss and accuracy
        train_loss /= len(train_loader)
        train_acc /= len(train_loader)

        # Validation loop
        model.eval()  # Set model to evaluation mode
        val_loss, val_acc = 0.0, 0.0
        with torch.no_grad():  # Disable gradient computation
            for images, labels in test_loader:
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                val_acc += compute_accuracy(outputs, labels)

        # Average validation loss and accuracy
        val_loss /= len(test_loader)
        val_acc /= len(test_loader)

        # Print metrics
        print(f'Epoch {epoch+1}/{epochs}, Activation: {activation_fn_name}')
        print(f'Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}')
        print(f'Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}')
        print('-' * 50)

    training_time = time.time() - start_time
    return val_acc, training_time

# Compare activation functions
results = {}
for activation_fn_name in activation_fns:
    print(f"Training with {activation_fn_name} activation function...")
    val_acc, train_time = train_model(activation_fn_name)
    results[activation_fn_name] = (val_acc, train_time)

# Print final comparison results
print("\n--- Final Comparison ---")
for activation, (acc, time_taken) in results.items():
    print(f"Activation: {activation} | Final Accuracy: {acc:.4f} | Training Time: {time_taken:.2f} sec")


Training with Sigmoid activation function...
Epoch 1/10, Activation: Sigmoid
Train Loss: 0.6464, Train Acc: 0.8381
Val Loss: 0.2606, Val Acc: 0.9275
--------------------------------------------------
Epoch 2/10, Activation: Sigmoid
Train Loss: 0.2208, Train Acc: 0.9369
Val Loss: 0.1775, Val Acc: 0.9486
--------------------------------------------------
Epoch 3/10, Activation: Sigmoid
Train Loss: 0.1540, Train Acc: 0.9551
Val Loss: 0.1400, Val Acc: 0.9590
--------------------------------------------------
Epoch 4/10, Activation: Sigmoid
Train Loss: 0.1220, Train Acc: 0.9634
Val Loss: 0.1200, Val Acc: 0.9642
--------------------------------------------------
Epoch 5/10, Activation: Sigmoid
Train Loss: 0.0995, Train Acc: 0.9703
Val Loss: 0.1068, Val Acc: 0.9677
--------------------------------------------------
Epoch 6/10, Activation: Sigmoid
Train Loss: 0.0848, Train Acc: 0.9750
Val Loss: 0.1022, Val Acc: 0.9689
--------------------------------------------------
Epoch 7/10, Activation: S