In [4]:
import torch
import torch.nn as nn

In [64]:
class LeNet5(nn.Module):
    def __init__(self):
        super(LeNet5, self).__init__()
        self.features = nn.Sequential(nn.Conv2d(1, 6, 5, 1, padding='same'),
                                    nn.ReLU(),
                                    nn.MaxPool2d(2),
                                    nn.Conv2d(6, 16, 5, 1, padding='same'),
                                    nn.ReLU(),
                                    nn.MaxPool2d(2),
                                    nn.Conv2d(16, 126, 5, 1, padding='same'),
                                    nn.ReLU(),
                                    nn.MaxPool2d(2))
        
        self.flatten = nn.Flatten()
        
        self.classifier = nn.Sequential(nn.Linear(1134 ,128),
                                        nn.ReLU(),
                                        nn.Linear(128, 64),
                                        nn.ReLU(),
                                        nn.Linear(64, 10),
                                       nn.Softmax(dim=1))
    def forward(self, x):
        x = self.features(x)  # [batch_size, 126, 3, 3]
        print(f'Features output shape: {x.shape}')  # Debug: check features output shape
        x = self.flatten(x)   # [batch_size, 1134]
        print(f'Flattened shape: {x.shape}')        # Debug: check flattened shape
        x = self.classifier(x) # [batch_size, 10]
        print(f'Classifier output shape: {x.shape}')  # Debug: check classifier output shape
        return x
                                

In [65]:
model = LeNet5()
model

LeNet5(
  (features): Sequential(
    (0): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1), padding=same)
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1), padding=same)
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(16, 126, kernel_size=(5, 5), stride=(1, 1), padding=same)
    (7): ReLU()
    (8): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (classifier): Sequential(
    (0): Linear(in_features=1134, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=64, bias=True)
    (3): ReLU()
    (4): Linear(in_features=64, out_features=10, bias=True)
    (5): Softmax(dim=1)
  )
)

In [66]:
import torch.optim as optim

loss = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

In [67]:
from torchvision import datasets
from torch.utils.data import DataLoader
from torchvision.transforms import v2

In [68]:
dataset = datasets.MNIST('data', download=True, transform=v2.ToTensor())

In [69]:
data_loader=DataLoader(dataset, batch_size=32)

In [70]:
for X_train, y_label in data_loader:
    print(X_train.shape, y_label.shape)
    break

torch.Size([32, 1, 28, 28]) torch.Size([32])


In [71]:
model(X_train)

Features output shape: torch.Size([32, 126, 3, 3])
Flattened shape: torch.Size([32, 1134])
Classifier output shape: torch.Size([32, 10])


tensor([[0.1098, 0.0880, 0.1033, 0.1004, 0.0989, 0.1123, 0.1081, 0.0937, 0.0913,
         0.0942],
        [0.1098, 0.0881, 0.1034, 0.1004, 0.0989, 0.1122, 0.1081, 0.0938, 0.0912,
         0.0942],
        [0.1100, 0.0882, 0.1032, 0.1001, 0.0989, 0.1123, 0.1084, 0.0937, 0.0911,
         0.0942],
        [0.1100, 0.0880, 0.1033, 0.1004, 0.0988, 0.1122, 0.1082, 0.0937, 0.0911,
         0.0942],
        [0.1098, 0.0881, 0.1033, 0.1002, 0.0988, 0.1124, 0.1083, 0.0936, 0.0912,
         0.0942],
        [0.1098, 0.0881, 0.1032, 0.1002, 0.0988, 0.1122, 0.1084, 0.0937, 0.0913,
         0.0943],
        [0.1098, 0.0881, 0.1032, 0.1003, 0.0988, 0.1121, 0.1084, 0.0937, 0.0913,
         0.0942],
        [0.1098, 0.0881, 0.1034, 0.1003, 0.0989, 0.1124, 0.1082, 0.0936, 0.0911,
         0.0942],
        [0.1098, 0.0882, 0.1032, 0.1003, 0.0989, 0.1121, 0.1084, 0.0938, 0.0912,
         0.0941],
        [0.1099, 0.0880, 0.1034, 0.1003, 0.0989, 0.1124, 0.1082, 0.0936, 0.0910,
         0.0942],
        [0

In [72]:
for X_train, y_label in data_loader:
        optimizer.zero_grad()
        outputs=model(X_train)
        loss_fn = loss(outputs, y_label)
        loss_fn.backward()
        optimizer.step()
        break

Features output shape: torch.Size([32, 126, 3, 3])
Flattened shape: torch.Size([32, 1134])
Classifier output shape: torch.Size([32, 10])


In [63]:
model = LeNet5()
input_tensor = torch.randn(32, 1, 28, 28)  # [batch_size, channels, height, width]
output = model(input_tensor)
print(output.shape) 
 # Should be [batch_size, 10]

torch.Size([32, 1134])
torch.Size([32, 10])
