In [None]:
# default_exp classification.modelling.backbones

In [None]:
# hide
import warnings

warnings.filterwarnings("ignore")

%load_ext nb_black

<IPython.core.display.Javascript object>

In [None]:
# hide
from nbdev.showdoc import *
from nbdev.export import *
from nbdev.imports import Config as NbdevConfig

nbdev_path = str(NbdevConfig().path("nbs_path") / "data")
nbdev_path

'/Users/ayushman/Desktop/lightning_cv/nbs/data'

<IPython.core.display.Javascript object>

# Model Body for Image Classification
> Convenince functions to prepare a pretrained model for Vision applications

In [None]:
# export
from typing import *
import importlib

import timm
import torch
from torch import nn

import re
from omegaconf import DictConfig
from fastcore.all import use_kwargs_dict

from torchvision import models

from lightning_cv.core.layers import *
from lightning_cv.core import Registry, ACTIVATION_REGISTERY

<IPython.core.display.Javascript object>

In [None]:
# hide
from fastcore.all import *
from omegaconf import OmegaConf
from lightning_cv.core.layers import Mish

<IPython.core.display.Javascript object>

## Cut a pretrained model

In [None]:
# export
def _is_pool_type(l):
    return re.search(r"Pool[123]d$", l.__class__.__name__)

<IPython.core.display.Javascript object>

In [None]:
# hide
m = nn.Sequential(
    nn.AdaptiveAvgPool2d(5), nn.Linear(2, 3), nn.Conv2d(2, 3, 1), nn.MaxPool3d(5)
)
test_eq([bool(_is_pool_type(m_)) for m_ in m.children()], [True, False, False, True])

<IPython.core.display.Javascript object>

By default, the `LightningCV` library cuts a pretrained model at the pooling layer (Similar to the Fastai Library). This function helps detecting it.

In [None]:
# export
def has_pool_type(m):
    "Return `True` if `m` is a pooling layer or has one in its children"
    if _is_pool_type(m):
        return True
    for l in m.children():
        if has_pool_type(l):
            return True
    return False

<IPython.core.display.Javascript object>

In [None]:
m = nn.Sequential(
    nn.AdaptiveAvgPool2d(5), nn.Linear(2, 3), nn.Conv2d(2, 3, 1), nn.MaxPool3d(5)
)
assert has_pool_type(m)
test_eq([has_pool_type(m_) for m_ in m.children()], [True, False, False, True])

<IPython.core.display.Javascript object>

In [None]:
# export
def create_body(model: nn.Module, cut: Optional[Union[int, Callable]] = None):
    "Cut off the body of a `model` as determined by `cut`"
    if cut is None:
        ll = list(enumerate(model.children()))
        cut = next(i for i, o in reversed(ll) if has_pool_type(o))
    if isinstance(cut, int):
        return nn.Sequential(*list(model.children())[:cut])
    elif callable(cut):
        return cut(model)
    else:
        raise NamedError("cut must be either integer or a function")

<IPython.core.display.Javascript object>

In [None]:
tst = nn.Sequential(
    nn.Conv2d(3, 5, 3), nn.BatchNorm2d(5), nn.AvgPool2d(1), nn.Linear(3, 4)
)
m = create_body(tst)
test_eq(len(m), 2)

m = create_body(tst, cut=3)
test_eq(len(m), 3)

<IPython.core.display.Javascript object>

In [None]:
# hide
tst = timm.create_model("resnet18", pretrained=False, num_classes=0, global_pool="")
m = create_body(tst)
test_eq(len(m), 8)

m = create_body(tst, cut=-2)
test_eq(len(m), 8)

<IPython.core.display.Javascript object>

In [None]:
# export
class TimmCnnBody(nn.Module):
    "default `nn.Module` to create a body for vision applications from `timm`"

    @use_kwargs_dict(keep=True, pretrained=False, num_classes=0, global_pool="")
    def __init__(self, model_name: str, cut=None, act_layer: str = None, **kwargs):
        super(TimmCnnBody, self).__init__()
        # for different activation funtions
        # if act_layer is None then the default activations func will be used
        if act_layer is not None:
            act_layer = ACTIVATION_REGISTERY.get(act_layer)

        net = timm.create_model(model_name, act_layer=act_layer, **kwargs)

        # prepare body
        self.net = create_body(net, cut)

    def forward(self, xb):
        return self.net(xb)

    @classmethod
    def from_config(cls, config: DictConfig):
        "create from a `Omegaconf/ Hydra` config"
        return cls(**config)

<IPython.core.display.Javascript object>

In [None]:
m1 = timm.create_model("resnet18", pretrained=True, act_layer=None)
m2 = timm.create_model(
    "resnet18", pretrained=True, act_layer=None, global_pool="", num_classes=0
)
tst = TimmCnnBody(model_name="resnet18", cut=-2, act_layer=None, pretrained=True)


with torch.no_grad():
    i = torch.randn(2, 3, 299, 299)
    o1 = m1.forward_features(i)
    o2 = m2(i)
    o3 = tst(i)

test_eq(o1, o3)
test_eq(o2, o3)

<IPython.core.display.Javascript object>

> Note: You can use the `act_layer` argument to change the activation layer of the `CnnBody`. `act_layer` is a string which corresponds to an `obj` in the `ActivationCatalog`. If you are using an activation func that is not in the `ActivationCatalog` be sure to register the `obj`. Also timm requires that the activation func should have a `inplace` argument.

In [None]:
m1 = timm.create_model("resnet18", pretrained=True, act_layer=Mish)
m2 = timm.create_model(
    "resnet18", pretrained=True, act_layer=Mish, global_pool="", num_classes=0
)
tst = TimmCnnBody(model_name="resnet18", cut=-2, act_layer="Mish", pretrained=True)


with torch.no_grad():
    i = torch.randn(2, 3, 299, 299)
    o1 = m1.forward_features(i)
    o2 = m2(i)
    o3 = tst(i)

test_eq(o1, o3)
test_eq(o2, o3)

<IPython.core.display.Javascript object>

In [None]:
m1 = timm.create_model("resnet18", pretrained=True, act_layer=Mish, in_chans=1)
m2 = timm.create_model(
    "resnet18",
    pretrained=True,
    act_layer=Mish,
    global_pool="",
    num_classes=0,
    in_chans=1,
)
tst = TimmCnnBody(
    model_name="resnet18", cut=-2, act_layer="Mish", pretrained=True, in_chans=1
)


with torch.no_grad():
    i = torch.randn(2, 1, 299, 299)
    o1 = m1.forward_features(i)
    o2 = m2(i)
    o3 = tst(i)

test_eq(o1, o3)
test_eq(o2, o3)

<IPython.core.display.Javascript object>

In [None]:
# export
def _get_first_layer(m):
    "Access first layer of a model"
    c, p, n = m, None, None  # child, parent, name
    for n in next(m.named_parameters())[0].split(".")[:-1]:
        p, c = c, getattr(c, n)
    return c, p, n

<IPython.core.display.Javascript object>

In [None]:
# export
def _update_first_layer(model, n_in):
    "Change first layer based on number of input channels used for torchvisions.models"
    if n_in == 3:
        return
    first_layer, parent, name = _get_first_layer(model)
    assert isinstance(
        first_layer, nn.Conv2d
    ), f"Change of input channels only supported with Conv2d, found {first_layer.__class__.__name__}"
    assert (
        getattr(first_layer, "in_channels") == 3
    ), f'Unexpected number of input channels, found {getattr(first_layer, "in_channels")} while expecting 3'
    params = {
        attr: getattr(first_layer, attr)
        for attr in "out_channels kernel_size stride padding dilation groups padding_mode".split()
    }
    params["bias"] = getattr(first_layer, "bias") is not None
    params["in_channels"] = n_in
    new_layer = nn.Conv2d(**params)
    setattr(parent, name, new_layer)

<IPython.core.display.Javascript object>

In [None]:
# export
class TorchvisionCnnBody(nn.Module):
    "default `nn.Module` to create a body for vision applications from `torchvision.models`"

    def __init__(
        self, model_name: str, in_chans: int = 3, pretrained: bool = True, cut=None
    ):
        super(TorchvisionCnnBody, self).__init__()

        module = importlib.import_module(f"torchvision.models")
        model = getattr(module, model_name)(pretrained=pretrained)

        # update the channnels of the first layer
        _update_first_layer(model, n_in=in_chans)
        self.net = create_body(model, cut)

    def forward(self, xb):
        return self.net(xb)

    @classmethod
    def from_config(cls, config: DictConfig):
        "create from a `Omegaconf/ Hydra` config"
        return cls(**config)

<IPython.core.display.Javascript object>

In [None]:
# hide
m1 = TorchvisionCnnBody("resnet18", cut=-2)
m2 = TorchvisionCnnBody("resnet18", cut=None, in_chans=3)

with torch.no_grad():
    i = torch.randn(2, 3, 299, 299)
    o1 = m1(i)
    o2 = m2(i)

test_eq(o1, o2)

<IPython.core.display.Javascript object>

## CNN_BODY_REGISTRY
> Registery of Model Body for Image Classification from `timm` & `torchvision`

In [None]:
# export
CNN_BODY_REGISTRY = Registry("CNN_Body")
CNN_BODY_REGISTRY.register(TimmCnnBody)
CNN_BODY_REGISTRY.register(TorchvisionCnnBody)

<IPython.core.display.Javascript object>

In [None]:
# hide-input
CNN_BODY_REGISTRY

Registry of CNN_Body:
╒════════════════════╤═══════════════════════════════════════╕
│ Names              │ Objects                               │
╞════════════════════╪═══════════════════════════════════════╡
│ TimmCnnBody        │ <class '__main__.TimmCnnBody'>        │
├────────────────────┼───────────────────────────────────────┤
│ TorchvisionCnnBody │ <class '__main__.TorchvisionCnnBody'> │
╘════════════════════╧═══════════════════════════════════════╛

<IPython.core.display.Javascript object>

In [None]:
# export
def create_cnn_body(cfg: DictConfig) -> nn.Module:
    "instante an obj from ModelBody registery using lightning_cv config"
    body = CNN_BODY_REGISTRY.get(cfg.MODEL.BODY.NAME)
    body = body.from_config(cfg.MODEL.BODY.ARGUMENTS)
    return body

<IPython.core.display.Javascript object>

Creating a `cnn_body` from `lightning_cv.config` -

In [None]:
from lightning_cv.config import get_cfg

cfg = get_cfg()

cfg.MODEL.BODY.NAME = "TimmCnnBody"
cfg.MODEL.BODY.ARGUMENTS = dict(
    model_name="efficientnet_b0", pretrained=True, cut=None, in_chans=3
)
print(OmegaConf.to_yaml(cfg.MODEL.BODY))

NAME: TimmCnnBody
ARGUMENTS:
  model_name: efficientnet_b0
  pretrained: true
  cut: null
  in_chans: 3



<IPython.core.display.Javascript object>

The config given above will create a load in a `efficientnet_b0` pretrained model from `timm`, with 3 input channels and cut the model at the `pool_layer`.

In [None]:
tst = create_cnn_body(cfg)
m1 = timm.create_model(
    cfg.MODEL.BODY.ARGUMENTS.model_name, pretrained=True, act_layer=None, in_chans=3
)

m2 = timm.create_model(
    cfg.MODEL.BODY.ARGUMENTS.model_name,
    pretrained=True,
    act_layer=None,
    global_pool="",
    num_classes=0,
    in_chans=3,
)


with torch.no_grad():
    i = torch.randn(2, 3, 299, 299)
    o1 = m1.forward_features(i)
    o2 = m2(i)
    o3 = tst(i)

test_eq(o1, o3)
test_eq(o2, o3)

<IPython.core.display.Javascript object>

The resulting model will look like this -

In [None]:
# collaspe-ouptut
print(tst)

TimmCnnBody(
  (net): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): SiLU(inplace=True)
    (3): Sequential(
      (0): Sequential(
        (0): DepthwiseSeparableConv(
          (conv_dw): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (act1): SiLU(inplace=True)
          (se): SqueezeExcite(
            (conv_reduce): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
            (act1): SiLU(inplace=True)
            (conv_expand): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
          )
          (conv_pw): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (act2): Identi

<IPython.core.display.Javascript object>

This is the original model -

In [None]:
# collaspe-ouput
print(m1)

EfficientNet(
  (conv_stem): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (act1): SiLU(inplace=True)
  (blocks): Sequential(
    (0): Sequential(
      (0): DepthwiseSeparableConv(
        (conv_dw): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
        (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act1): SiLU(inplace=True)
        (se): SqueezeExcite(
          (conv_reduce): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
          (act1): SiLU(inplace=True)
          (conv_expand): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
        )
        (conv_pw): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act2): Identity()
      )
    )
    (1): Sequenti

<IPython.core.display.Javascript object>

Notice how the modified model created from `TimmCnnBody` does not have the layers from `m1` after `global_pool`. This is what we meant by cuting the model. We removed the layers from `tst` afer the `global_pool` layer.

In [None]:
# for a different activation
cfg.MODEL.BODY.ARGUMENTS.act_layer = "Mish"

tst = create_cnn_body(cfg)
m1 = timm.create_model(
    cfg.MODEL.BODY.ARGUMENTS.model_name, pretrained=True, act_layer=Mish
)
m2 = timm.create_model(
    cfg.MODEL.BODY.ARGUMENTS.model_name,
    pretrained=True,
    act_layer=Mish,
    global_pool="",
    num_classes=0,
)


with torch.no_grad():
    i = torch.randn(2, 3, 299, 299)
    o1 = m1.forward_features(i)
    o2 = m2(i)
    o3 = tst(i)

test_eq(o1, o3)
test_eq(o2, o3)

<IPython.core.display.Javascript object>

In [None]:
# collaspe-ouput
print(tst)

TimmCnnBody(
  (net): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): Mish()
    (3): Sequential(
      (0): Sequential(
        (0): DepthwiseSeparableConv(
          (conv_dw): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (act1): Mish()
          (se): SqueezeExcite(
            (conv_reduce): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
            (act1): Mish()
            (conv_expand): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
          )
          (conv_pw): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (act2): Identity()
        )
      )
      (1): Se

<IPython.core.display.Javascript object>

For creating models from `Torchvision` we need to use the `TorchvisionCnnBody` class. we will modify the config as given below. `model_name` and `pretrained` are arguments used for loadining `torchvision.models`.

In [None]:
# for a torchvision models

# First we need to update the config
cfg.MODEL.BODY.NAME = "TorchvisionCnnBody"
cfg.MODEL.BODY.ARGUMENTS = dict(
    model_name="resnet18", pretrained=True, in_chans=3, cut=None
)

<IPython.core.display.Javascript object>

In [None]:
tst = create_cnn_body(cfg)
m1 = TorchvisionCnnBody("resnet18", pretrained=True, in_chans=3, cut=None)

with torch.no_grad():
    i = torch.randn(2, 3, 299, 299)
    o1 = m1(i)
    o2 = tst(i)

test_eq(o1, o2)

<IPython.core.display.Javascript object>

> Note: For `create_cnn_body` to work your `obj` must be registerd in the `CNN_BODY_REGISTRY` registery and the `obj` must have a `from_config` `classmethod`.

you can also create your own custom body like so -

In [None]:
@CNN_BODY_REGISTRY.register()
class CustomBody(nn.Module):
    def __init__(self):
        super(CustomBody, self).__init__()
        model = models.googlenet(pretrained=False)
        _update_first_layer(model, n_in=3)
        self.net = create_body(model)

    def forward(self, xb):
        return self.net(xb)

    @classmethod
    def from_config(cls, cfg: Dict):
        return cls(**cfg)


cfg = get_cfg(strict=False)
cfg.MODEL.BODY.NAME = "CustomBody"
cfg.MODEL.BODY.ARGUMENTS = dict()

mb = create_cnn_body(cfg)

<IPython.core.display.Javascript object>

In [None]:
# collaspe-output
print(mb)

CustomBody(
  (net): Sequential(
    (0): BasicConv2d(
      (conv): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
    (2): BasicConv2d(
      (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    )
    (3): BasicConv2d(
      (conv): Conv2d(64, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    )
    (4): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
    (5): Inception(
      (branch1): BasicConv2d(
        (conv): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn): BatchNorm2d(64, eps=0.001, momentum=0

<IPython.core.display.Javascript object>

In [None]:
# hide
notebook2script()

Converted 00_config.ipynb.
Converted 00a_core.common.ipynb.
Converted 00b_core.data.ipynb.
Converted 00c_core.optim.ipynb.
Converted 00d_core.schedules.ipynb.
Converted 00e_core.layers.ipynb.
Converted 01a_classification.data.transforms.ipynb.
Converted 01b_classification.data.datasets.ipynb.
Converted 01c_classification.modelling.backbones.ipynb.
Converted index.ipynb.


<IPython.core.display.Javascript object>