In [None]:
import torch  
import torch.nn.functional as F

`torch.nn.functional.conv2d(img, kernel)`
is different from
`nn.Convd(in_channels, out_channels, kernel_size)`

In [None]:
image = torch.rand(1, 6, 6) # count, image_height, image_width
kernel = torch.ones(3, 1, 3, 3) # channels, 1, kernel_height, kernel_width

outimage = F.conv2d(image, kernel)
print("outimage=", outimage.shape)

outimage= torch.Size([3, 4, 4])


In [45]:
import torch.nn as nn

class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.convolution_layers = nn.Sequential(
            nn.Conv2d(1, 32, 3), # notice the signature
            nn.ReLU(),  
            nn.Conv2d(32, 64, 3),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Dropout(0.5)
        )

        self.classification_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(12 * 12 * 64, 10),
            nn.Sigmoid(),
            nn.Linear(10, 10),
            nn.Sigmoid()
        )
    
    def forward(self, x):
        features = self.convolution_layers(x)
        return self.classification_layers(features)

Create the dataloaders

In [46]:
import torchvision.datasets as datasets
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader

train_data = datasets.MNIST(root="data", train=True, download=True, transform=ToTensor())
test_data = datasets.MNIST(root="data", train=False, download=True, transform=ToTensor())

train_loader = DataLoader(train_data, batch_size=64)
test_loader = DataLoader(test_data, batch_size=64)

Define helper methods for training and testing

In [47]:
device = torch.accelerator.current_accelerator() if torch.accelerator.is_available() else "cpu"

def train(dataloader, model, loss_fn, optimizer):
    model.train()

    for X, y in dataloader:
        X, y = X.to(device), y.to(device)

        pred = model(X)
        loss = loss_fn(pred, y)
        
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()


def test(dataloader, model):
    model.eval()

    correct = 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            
            pred = model(X).argmax(dim=1)
            correct += (pred == y).sum().item()
    
    accuracy = correct / len(dataloader.dataset)
    print(f"accuracy {accuracy * 100: .2f}%")


Train the model

In [48]:
lr = 0.1
epochs = 10

model = Net().to(device)
optimizer = torch.optim.Adadelta(model.parameters(), lr=lr)
loss_fn = nn.CrossEntropyLoss()

for epoch in range(1, epochs + 1):
    train(train_loader, model, loss_fn, optimizer)
    
    print(f"Epoch {epoch}")
    test(test_loader, model)

Epoch 1
accuracy  73.24%
Epoch 2
accuracy  82.51%
Epoch 3
accuracy  85.50%
Epoch 4
accuracy  88.00%
Epoch 5
accuracy  90.69%
Epoch 6
accuracy  92.91%
Epoch 7
accuracy  94.00%
Epoch 8
accuracy  94.74%
Epoch 9
accuracy  95.30%
Epoch 10
accuracy  95.62%


Count the number of parameters

In [53]:
n = sum(p.numel() for p in model.parameters())
print(f"Number of parameters {n}")

Number of parameters 111096
