In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
from utils import save_plot

In [2]:
# Simple Feedforward Net with configurable activation
class FFN(nn.Module):
    def __init__(self, activation="relu"):
        super(FFN, self).__init__()
        act = nn.ReLU() if activation == "relu" else nn.Sigmoid()
        self.layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(28*28, 256),
            act,
            nn.Linear(256, 128),
            act,
            nn.Linear(128, 10)
        )

    def forward(self, x):
        return self.layers(x)

In [3]:
def train(activation="relu", epochs=3, lr=0.01):
    transform = transforms.Compose([transforms.ToTensor()])
    trainset = torchvision.datasets.MNIST(root="./data", train=True, download=True, transform=transform)
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

    device = "cuda" if torch.cuda.is_available() else "cpu"
    model = FFN(activation=activation).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    losses = []
    for epoch in range(epochs):
        running_loss = 0
        for X, y in trainloader:
            X, y = X.to(device), y.to(device)
            optimizer.zero_grad()
            out = model(X)
            loss = criterion(out, y)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        losses.append(running_loss/len(trainloader))
        print(f"{activation} Epoch {epoch+1}/{epochs}, Loss: {losses[-1]:.4f}")
    return losses

In [4]:
loss_relu = train("relu")
loss_sigmoid = train("sigmoid")

fig, ax = plt.subplots()
ax.plot(loss_relu, label="ReLU")
ax.plot(loss_sigmoid, label="Sigmoid")
ax.set_title("Activation Function Comparison (MNIST)")
ax.set_xlabel("Epochs")
ax.set_ylabel("Loss")
ax.legend()
save_plot(fig, "activation_relu_vs_sigmoid.png")

relu Epoch 1/3, Loss: 0.2465
relu Epoch 2/3, Loss: 0.1488
relu Epoch 3/3, Loss: 0.1325
sigmoid Epoch 1/3, Loss: 0.2514
sigmoid Epoch 2/3, Loss: 0.1035
sigmoid Epoch 3/3, Loss: 0.0818
[INFO] Saved plot to results\activation_relu_vs_sigmoid.png
