In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms

In [None]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False)

In [3]:
def sigmoid_activation(x):
    return x * torch.sigmoid(x)

**Modified Lanet Architecture**

We used 3×3 filters instead of the original 5×5 filters to reduce the number of parameters while maintaining feature extraction power.

Using MaxPooling instead of average pooling to retain important features.

Softmax layer at the end to convert outputs into probabilities to make interpretation easier.

In [4]:
class ModifiedLeNet(nn.Module):
    def __init__(self):
        super(ModifiedLeNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(6, 16, kernel_size=3)
        self.conv3 = nn.Conv2d(16, 120, kernel_size=3)
        #using maxpool instead of average pool
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(120 * 4 * 4, 84)
        self.fc2 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(sigmoid_activation(self.conv1(x)))
        x = self.pool(sigmoid_activation(self.conv2(x)))
        x = sigmoid_activation(self.conv3(x))
        x = torch.flatten(x, start_dim=1)
        x = sigmoid_activation(self.fc1(x))
        x = self.fc2(x)
        #returning after applying softmax
        return F.softmax(x, dim=1)

We are using device **cuda**, Loss function **CrossEntropyLoss** and optimizer as **Adam**

In [5]:
device = torch.device("cuda")
model = ModifiedLeNet().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [6]:
epochs = 10
for epoch in range(epochs):
    running_loss = 0.0
    for images, labels in trainloader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {running_loss/len(trainloader):.4f}")

Epoch 1, Loss: 1.5543
Epoch 2, Loss: 1.4882
Epoch 3, Loss: 1.4799
Epoch 4, Loss: 1.4764
Epoch 5, Loss: 1.4744
Epoch 6, Loss: 1.4732
Epoch 7, Loss: 1.4728
Epoch 8, Loss: 1.4712
Epoch 9, Loss: 1.4708
Epoch 10, Loss: 1.4707


Checking the accuracy on the test data

In [7]:
correct = 0
total = 0
with torch.no_grad():
    for images, labels in testloader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {100 * correct / total:.2f}%')

Accuracy: 98.77%
