## Imports, Config and Seeding

In [16]:
import timm
import torch
import torchvision
from typing import Dict, Union, Callable, OrderedDict, Tuple
import os, random
import numpy as np
import torch.nn as nn

In [2]:
def seed_all(seed: int = 1992) -> None:
    """Seed all random number generators."""
    print(f"Using Seed Number {seed}")

    os.environ["PYTHONHASHSEED"] = str(seed)  # set PYTHONHASHSEED env var at fixed value
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.cuda.manual_seed(seed)  # pytorch (both CPU and CUDA)
    np.random.seed(seed)  # for numpy pseudo-random generator
    # set fixed value for python built-in pseudo-random generator
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True
    torch.backends.cudnn.enabled = True


def seed_worker(_worker_id) -> None:
    """Seed a worker with the given ID."""
    worker_seed = torch.initial_seed() % 2 ** 32
    np.random.seed(worker_seed)
    random.seed(worker_seed)


seed_all(seed=1992)

Using Seed Number 1992


In [3]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## Convolutional Neural Networks

### Terminologies

- Kernel
- Filter
- Receptive Field

In [10]:
X = torch.tensor([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0], [6.0, 7.0, 8.0]])
K = torch.tensor([[0.0, 1.0], [2.0, 3.0]])

In [11]:
def my_conv2d(x: torch.Tensor, kernel: torch.Tensor) -> torch.Tensor:
    kernel_height, kernel_width = kernel.shape
    input_height, input_width = x.shape

    feature_map_height, feature_map_width = (
        input_height - kernel_height + 1,
        input_width - kernel_width + 1,
    )

    feature_map = torch.zeros(size=(feature_map_height, feature_map_width))

    for height_index in range(feature_map_height):
        for width_index in range(feature_map_width):
            # 1st iter: height_index = 0, width_index = 0
            # 2nd iter: height_index = 0, width_index = 1
            receptive_field = x[
                height_index : height_index + kernel_height,
                width_index : width_index + kernel_width,
            ]
            feature_map[height_index, width_index] = (
                receptive_field * kernel
            ).sum()
    
    return feature_map

In [12]:
my_conv2d(X, K)

tensor([[19., 25.],
        [37., 43.]])

In [17]:
class MyConv2D(nn.Module):
    def __init__(self, kernel_size: Tuple[int, int]):
        super().__init__()
        self.kernel = nn.Parameter(
            torch.rand(size=(kernel_size[0], kernel_size[1]))
        )
        self.bias = nn.Parameter(torch.zeros(1))

    def forward(self, x):
        return my_conv2d(x, self.kernel) + self.bias

In [18]:
conv2d = MyConv2D((2, 2))
conv2d(X)

tensor([[ 7.3671, 10.2563],
        [16.0346, 18.9238]], grad_fn=<AddBackward0>)

### CNN Conv2d Layer Output Dimensions Calculation

Given:

- n: Input image's height/width
- f: Filter/Kernel Size
- p: Padding Size
- s: Stride

Given an image of size $n \times n$ and a kernel of $f \times f$, our output shape is 

$$
o = n - f + 1
$$

and if we pad the image with 1 extra layer outside, that means our input image is of size $(n + 2*1) \times (n + 2*1)$, if you see an image you will be clear why adding one layer around means input width and height add 2 times the padding $p$.

hence our new output shape is:

$$
o = (n+2) - f + 1
$$

and a generic formula for padding equals $p$ will yield

$$
o = (n + 2p) - f + 1
$$

Note that:

- If $p=0$, then this is **valid padding** where the output shape is $n - f + 1$;
- If $p=\frac{f-1}{2}$, then this is **same padding** where the output shape is equals to the original input shape $n$.

Same padding can be deduced by setting

$$
(n+2p) - f + 1 = n \implies p = \frac{f-1}{2}
$$

where by construction $f$ must be odd in order to get a whole number for the padding $p$, and that's why most kernels/filters are of odd shape.

With stride into action our final shape is:

$$
o = \dfrac{n - f + 2p}{s} + 1
$$

where sometimes $o$ is applied by $\text{floor}(o)$ if the $o$ is non-integer.


### CNN Pooling Layer Output Dimensions Calculation

For general pooling operations:

Given:

- n: Input image's height/width
- f: Filter/Kernel Size
- s: Stride

Given an image of size $n \times n$ and a kernel of $f \times f$, our output shape after pooling is:

$$
o = \text{floor}\left(\dfrac{n - f}{s} + 1\right)
$$

with floor applied to $o$.

## LeNet

In [23]:
import torch
from torch import nn


lenet = nn.Sequential(
    nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5, padding=2, stride=1),
    nn.Sigmoid(),
    nn.AvgPool2d(kernel_size=2, stride=2),
    nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5, padding=0, stride=1),
    nn.Sigmoid(),
    nn.AvgPool2d(kernel_size=2, stride=2),
    nn.Flatten(),
    nn.Linear(16 * 5 * 5, 120),
    nn.Sigmoid(),
    nn.Linear(120, 84),
    nn.Sigmoid(),
    nn.Linear(84, 10),
)

- X: (1, 1, 28, 28)
- 1st conv2d layer: a conv2d layer that applies a filter containing 6 kernels, where each kernel is of size $5 \times 5$ with a padding of 2 and stride of 1
    - n = 28
    - f = 5
    - p = 2 ; **Note in particular the padding of 2 is derived from the formula $p = (f-1)/2 = 4/2=2$ to get same padding!**
    - s = 1
    - shape: $o = \frac{28-5+4}{1} + 1 = 28$
    - The final output shape is (1, 6, 28, 28) where
        - 1 is the batch size
        - 6 is the number of kernels applied, for each kernel our output shape is 28 by 28
        - 28, 28 is the output shape by the kernels
        
- 1st avgpool2d layer: 
    - n = 28
    - f = 2
    - s = 2
    - o = 14
    - The final output shape is (1, 6, 14, 14) where the kernels are halved in size.
 
- 2nd conv2d layer:
    - n = 14
    - f = 5
    - p = 0
    - s = 1
    - o = 10
    - The final output shape is (1, 16, 10, 10)
    
- 

In [49]:
X = torch.rand(size=(1, 1, 28, 28), dtype=torch.float32)

for layer in lenet:
    X = layer(X)
    print(layer.__class__.__name__, "output shape: \t", X.shape)

Conv2d output shape: 	 torch.Size([1, 6, 28, 28])
Sigmoid output shape: 	 torch.Size([1, 6, 28, 28])
AvgPool2d output shape: 	 torch.Size([1, 6, 14, 14])
Conv2d output shape: 	 torch.Size([1, 16, 10, 10])
Sigmoid output shape: 	 torch.Size([1, 16, 10, 10])
AvgPool2d output shape: 	 torch.Size([1, 16, 5, 5])
Flatten output shape: 	 torch.Size([1, 400])
Linear output shape: 	 torch.Size([1, 120])
Sigmoid output shape: 	 torch.Size([1, 120])
Linear output shape: 	 torch.Size([1, 84])
Sigmoid output shape: 	 torch.Size([1, 84])
Linear output shape: 	 torch.Size([1, 10])


In [50]:
X = torch.rand(size=(1, 1, 28, 28), dtype=torch.float32)
for layer in lenet:
    X = layer(X)
    if hasattr(layer, "weight"):
        print(layer, "layer weight shape: \t", layer.weight.shape)

Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2)) layer weight shape: 	 torch.Size([6, 1, 5, 5])
Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1)) layer weight shape: 	 torch.Size([16, 6, 5, 5])
Linear(in_features=400, out_features=120, bias=True) layer weight shape: 	 torch.Size([120, 400])
Linear(in_features=120, out_features=84, bias=True) layer weight shape: 	 torch.Size([84, 120])
Linear(in_features=84, out_features=10, bias=True) layer weight shape: 	 torch.Size([10, 84])


## Toy Models

I created two versions of the same model. The `Sequential` method has a more compact form, but often is more difficult to extract layers.

In [128]:
class ToyModel(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.cl1 = torch.nn.Linear(25, 60)
        self.cl2 = torch.nn.Linear(60, 16)
        self.fc1 = torch.nn.Linear(16, 120)
        self.fc2 = torch.nn.Linear(120, 84)
        self.fc3 = torch.nn.Linear(84, 10)

    def forward(self, x):
        """Forward pass of the model.

        Args:
            x ([type]): [description]

        Returns:
            [type]: [description]
        """
        x = torch.nn.ReLU()(self.cl1(x))
        x = torch.nn.ReLU()(self.cl2(x))
        x = torch.nn.ReLU()(self.fc1(x))
        x = torch.nn.ReLU()(self.fc2(x))
        x = torch.nn.LogSoftmax(dim=1)(self.fc3(x))
        return x


class ToySequentialModel(torch.nn.Module):
    # Create a sequential model pytorch same as ToyModel.
    def __init__(self) -> None:
        super().__init__()

        self.backbone = torch.nn.Sequential(
            OrderedDict(
                [
                    ("cl1", torch.nn.Linear(25, 60)),
                    ("cl_relu1", torch.nn.ReLU()),
                    ("cl2", torch.nn.Linear(60, 16)),
                    ("cl_relu2", torch.nn.ReLU()),
                ]
            )
        )

        self.head = torch.nn.Sequential(
            OrderedDict(
                [
                    ("fc1", torch.nn.Linear(16, 120)),
                    ("fc_relu_1", torch.nn.ReLU()),
                    ("fc2", torch.nn.Linear(120, 84)),
                    ("fc_relu_2", torch.nn.ReLU()),
                    ("fc3", torch.nn.Linear(84, 10)),
                    ("fc_log_softmax", torch.nn.LogSoftmax(dim=1)),
                ]
            )
        )

    def forward(self, x):
        """Forward pass of the model.

        Args:
            x ([type]): [description]

        Returns:
            [type]: [description]
        """
        x = self.backbone(x)
        x = self.head(x)
        return x

## Named Modules

Returns an iterator over all modules in the network, yielding both the name of the module as well as the module itself.

In [129]:
for name, layer in ToySequentialModel().named_modules():
    print(name)


backbone
backbone.cl1
backbone.cl_relu1
backbone.cl2
backbone.cl_relu2
head
head.fc1
head.fc_relu_1
head.fc2
head.fc_relu_2
head.fc3
head.fc_log_softmax


## Get Convolutional Layers

In [130]:
def get_conv_layers(
    model: Callable, layer_type: str = "Conv2d"
) -> Dict[str, str]:
    """Create a function that give me the convolutional layers of PyTorch model.

    This function is created to be used in conjunction with Visualization of Feature Maps.

    Args:
        model (Union[torchvision.models, timm.models]): A PyTorch model.
        layer_type (str): The type of layer to be extracted.

    Returns:
        conv_layers (Dict[str, str]): {"layer1.0.conv1": layer1.0.conv1, ...}

    Example:
        >>> resnet18_pretrained_true = timm.create_model(model_name = "resnet34", pretrained=True, num_classes=10).to(DEVICE)
        >>> conv_layers = get_conv_layers(resnet18_pretrained_true, layer_type="Conv2d")
    """

    if layer_type == "Conv2d":
        _layer_type = torch.nn.Conv2d
    elif layer_type == "Conv1d":
        _layer_type = torch.nn.Conv1d

    conv_layers = {}
    for name, layer in model.named_modules():
        if isinstance(layer, _layer_type):
            conv_layers[name] = name
    return conv_layers

In [None]:
resnet18_pretrained_true = timm.create_model(model_name = "resnet34", pretrained=True, num_classes=10).to(DEVICE)

In [131]:
>>> resnet18_pretrained_true = timm.create_model(model_name = "resnet34", pretrained=True, num_classes=10).to(DEVICE)
>>> conv_layers = get_conv_layers(resnet18_pretrained_true, layer_type="Conv2d")
>>> print(conv_layers)

{'conv1': 'conv1', 'layer1.0.conv1': 'layer1.0.conv1', 'layer1.0.conv2': 'layer1.0.conv2', 'layer1.1.conv1': 'layer1.1.conv1', 'layer1.1.conv2': 'layer1.1.conv2', 'layer1.2.conv1': 'layer1.2.conv1', 'layer1.2.conv2': 'layer1.2.conv2', 'layer2.0.conv1': 'layer2.0.conv1', 'layer2.0.conv2': 'layer2.0.conv2', 'layer2.0.downsample.0': 'layer2.0.downsample.0', 'layer2.1.conv1': 'layer2.1.conv1', 'layer2.1.conv2': 'layer2.1.conv2', 'layer2.2.conv1': 'layer2.2.conv1', 'layer2.2.conv2': 'layer2.2.conv2', 'layer2.3.conv1': 'layer2.3.conv1', 'layer2.3.conv2': 'layer2.3.conv2', 'layer3.0.conv1': 'layer3.0.conv1', 'layer3.0.conv2': 'layer3.0.conv2', 'layer3.0.downsample.0': 'layer3.0.downsample.0', 'layer3.1.conv1': 'layer3.1.conv1', 'layer3.1.conv2': 'layer3.1.conv2', 'layer3.2.conv1': 'layer3.2.conv1', 'layer3.2.conv2': 'layer3.2.conv2', 'layer3.3.conv1': 'layer3.3.conv1', 'layer3.3.conv2': 'layer3.3.conv2', 'layer3.4.conv1': 'layer3.4.conv1', 'layer3.4.conv2': 'layer3.4.conv2', 'layer3.5.conv1':

In [132]:
activation = {}

def get_intermediate_features(name: str) -> Callable:
    """Get the intermediate features of a model. Forward Hook.

    This is using forward hook with reference https://discuss.pytorch.org/t/how-can-l-load-my-best-model-as-a-feature-extractor-evaluator/17254/5

    Args:
        name (str): name of the layer.

    Returns:
        Callable: [description]
    """

    def hook(model, input, output):
        activation[name] = output.detach()

    return hook


# The below is testing the forward hook functionalities, especially getting intermediate features.
# Note that both models are same organically but created differently.
# Due to seeding issues, you can check whether they are the same output or not by running them separately.
# We also used assertion to check that the output from model(x) is same as torch.nn.LogSoftmax(dim=1)(fc3_output)

use_sequential_model = True
x = torch.randn(1, 25)

if not use_sequential_model:

    model = ToyModel()

    model.fc2.register_forward_hook(get_intermediate_features("fc2"))
    model.fc3.register_forward_hook(get_intermediate_features("fc3"))
    output = model(x)
    print(activation)
    fc2_output = activation["fc2"]
    fc3_output = activation["fc3"]
    # assert output and logsoftmax fc3_output are the same
    assert torch.allclose(output, torch.nn.LogSoftmax(dim=1)(fc3_output))
else:
    sequential_model = ToySequentialModel()

    # Do this if you want all, if not you can see below.
    # for name, layer in sequential_model.named_modules():
    #     layer.register_forward_hook(get_intermediate_features(name))
    sequential_model.head.fc2.register_forward_hook(
        get_intermediate_features("head.fc2")
    )
    sequential_model.head.fc3.register_forward_hook(
        get_intermediate_features("head.fc3")
    )
    sequential_model_output = sequential_model(x)
    print(activation)
    fc2_output = activation["head.fc2"]
    fc3_output = activation["head.fc3"]
    assert torch.allclose(
        sequential_model_output, torch.nn.LogSoftmax(dim=1)(fc3_output)
    )


{'head.fc2': tensor([[ 0.0697,  0.0544, -0.0157, -0.1059, -0.0464, -0.0090,  0.0532, -0.1273,
         -0.0286, -0.0151,  0.0963,  0.2205,  0.0745, -0.0110, -0.1127, -0.0367,
         -0.0681,  0.0463, -0.0833,  0.1288,  0.1058,  0.0976, -0.0251,  0.0980,
         -0.0110,  0.1170, -0.0650,  0.2091, -0.1773,  0.0363, -0.1452,  0.0036,
          0.0112, -0.0304, -0.0620, -0.0658, -0.0543,  0.0072,  0.0436,  0.0703,
          0.0254, -0.0614,  0.0164, -0.1003, -0.0396,  0.0349,  0.0089, -0.1243,
         -0.1037, -0.0491,  0.0627, -0.1347,  0.0010, -0.1290, -0.0280, -0.0344,
          0.1487, -0.1764, -0.0233,  0.0082,  0.1270,  0.0368,  0.0103, -0.0929,
          0.0038,  0.1346, -0.0688, -0.0437, -0.1205, -0.1596, -0.0240, -0.1001,
         -0.0300, -0.1119,  0.0344, -0.1587,  0.0329, -0.0424,  0.0999,  0.0732,
          0.1116,  0.0220, -0.0570,  0.0232]]), 'head.fc3': tensor([[ 0.0256, -0.0924,  0.0456,  0.0972,  0.0107,  0.0527,  0.0208,  0.0373,
          0.0451,  0.0712]])}


## How to freeze layers

In [53]:
# resnet18_pretrained_true = timm.create_model(model_name = "resnet34", pretrained=True, num_classes=10).to(DEVICE)

In [54]:
norm = torch.nn.InstanceNorm2d(num_features=3, track_running_stats=True)
print(norm.running_mean, norm.running_var)

tensor([0., 0., 0.]) tensor([1., 1., 1.])


In [55]:
x = torch.randn(2, 3, 24, 24)

out = norm(x)
print(norm.running_mean, norm.running_var)

out = norm(x)
print(norm.running_mean, norm.running_var)


out = norm(x)
print(norm.running_mean, norm.running_var)

tensor([-1.3414e-03, -4.7338e-05,  1.1239e-03]) tensor([1.0010, 0.9984, 0.9989])
tensor([-2.5486e-03, -8.9943e-05,  2.1355e-03]) tensor([1.0018, 0.9969, 0.9979])
tensor([-0.0036, -0.0001,  0.0030]) tensor([1.0026, 0.9956, 0.9970])


In [20]:
norm.eval()
out = norm(x)
print(norm.running_mean, norm.running_var)

tensor([-0.0160, -0.0018,  0.0068]) tensor([1.0002, 1.0082, 0.9904])


In [56]:
def freeze_batchnorm_layers(model: Callable) -> None:
    """Freeze the batchnorm layers of a PyTorch model.

    Args:
        model (CustomNeuralNet): model to be frozen.

    Example:
        >>> model = timm.create_model("efficientnet_b0", pretrained=True)
        >>> model.apply(freeze_batchnorm_layers) # to freeze during training
    """
    # https://discuss.pytorch.org/t/how-to-freeze-bn-layers-while-training-the-rest-of-network-mean-and-var-wont-freeze/89736/19
    # https://discuss.pytorch.org/t/should-i-use-model-eval-when-i-freeze-batchnorm-layers-to-finetune/39495/3
    classname = model.__class__.__name__

    for module in model.modules():
        if isinstance(module, torch.nn.InstanceNorm2d):
            module.eval()
        if isinstance(module, torch.nn.BatchNorm2d):
            
            if hasattr(module, "weight"):
                module.weight.requires_grad_(False)
            if hasattr(module, "bias"):
                module.bias.requires_grad_(False)
            module.eval()

In [57]:
norm.apply(freeze_batchnorm_layers)

InstanceNorm2d(3, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)

In [58]:
out = norm(x)

In [59]:
norm.running_mean, norm.running_var

(tensor([-0.0036, -0.0001,  0.0030]), tensor([1.0026, 0.9956, 0.9970]))