In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import torchvision
import torchvision.transforms as transforms
from collections import OrderedDict

torch.set_printoptions(linewidth=100)

In [3]:
# extracting the dataset
train_set = torchvision.datasets.FashionMNIST(
    root='./data',
    train=True,
    download=False,
    transform=transforms.Compose([
        transforms.ToTensor()
    ])
)

## Method 1: Build using the functional API

In [9]:
# This is harder, but could turn out to be more flexible in the long run (depending on your use case)
class functional_net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)
        
        # you could walk yourself through to see how the each "feature map" 
        # would end up being of dimensions (4 x4) by looking at what the 
        # forward function does (Hint: 28 => 24 => 12 => 8 => 4)
        self.fc1 = nn.Linear(in_features=12*4*4, out_features=120)
        self.fc2 = nn.Linear(in_features=120, out_features=60)
        self.out = nn.Linear(in_features=60, out_features=10)
    
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, kernel_size=2, stride=2)
        
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, kernel_size=2, stride=2)
        
        # gotta flatten it before passing it the feedforward part of the network
        x = x.flatten(start_dim=1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.out(x)
        
        return x

In [10]:
# Setting seed so that our corresponding sequential model would give the same output.
torch.manual_seed(50)
# Creating an instance of the above network.
func_net = functional_net()
func_net

functional_net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 12, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=192, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=60, bias=True)
  (out): Linear(in_features=60, out_features=10, bias=True)
)

## Method 2: Building using the Sequential API

In [12]:
torch.manual_seed(50)
# You could see that the following is a big chunk of grouped code. Functional could be the 
# way to go if you've got even more complex networks.

# Note: The nn versions are essentially just wrappers around the above functional
#       versions of the same (say, nn.ReLU and F.relu)
seq_net = nn.Sequential(
    nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size=2, stride=2),
    nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size=2, stride=2),
    nn.Flatten(start_dim=1),
    nn.Linear(in_features=12*4*4, out_features=120),
    nn.ReLU(),
    nn.Linear(in_features=120, out_features=60),
    nn.ReLU(),
    nn.Linear(in_features=60, out_features=10)
)

seq_net

Sequential(
  (0): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (1): ReLU()
  (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (3): Conv2d(6, 12, kernel_size=(5, 5), stride=(1, 1))
  (4): ReLU()
  (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (6): Flatten()
  (7): Linear(in_features=192, out_features=120, bias=True)
  (8): ReLU()
  (9): Linear(in_features=120, out_features=60, bias=True)
  (10): ReLU()
  (11): Linear(in_features=60, out_features=10, bias=True)
)

## Forward pass through both networks

In [18]:
# prepping up our sample image
image, label = train_set[1]
image = image.unsqueeze(0)
image.shape

torch.Size([1, 1, 28, 28])

In [19]:
func_net(image)

tensor([[-0.0949,  0.1093, -0.1085,  0.1509, -0.0382, -0.0133,  0.0744, -0.1075,  0.0386,  0.0666]],
       grad_fn=<AddmmBackward>)

In [20]:
seq_net(image)

tensor([[-0.0949,  0.1093, -0.1085,  0.1509, -0.0382, -0.0133,  0.0744, -0.1075,  0.0386,  0.0666]],
       grad_fn=<AddmmBackward>)

### You can see that the untrained outputs of both nn building paradigms are the same (since we set the same seed)