In [None]:
# hide
# skip
!git clone https://github.com/benihime91/gale # install gale on colab
!pip install -e "gale[dev]"

In [None]:
# default_exp classification.model.meta_arch.common

In [None]:
# hide
%load_ext nb_black
%load_ext autoreload
%autoreload 2
%matplotlib inline

<IPython.core.display.Javascript object>

In [None]:
# hide
import warnings

from nbdev.export import *
from nbdev.showdoc import *
from timm.utils import *

warnings.filterwarnings("ignore")

setup_default_logging(default_level=20)

<IPython.core.display.Javascript object>

# Meta Architectures : Generalized Image Classifier 
> Default Model Architecture for Image Classification

In [None]:
# export
import logging
from collections import namedtuple
from typing import *

import torch
from omegaconf import DictConfig, OmegaConf
from pytorch_lightning.core.memory import get_human_readable_count
from torch.nn import Module

from gale.classification.model.backbones import ImageClassificationBackbone
from gale.classification.model.build import build_backbone, build_head
from gale.classification.model.heads import ImageClassificationHead
from gale.core_classes import BasicModule
from gale.utils.shape_spec import ShapeSpec

_logger = logging.getLogger(__name__)

<IPython.core.display.Javascript object>

In [None]:
# export
class GeneralizedImageClassifier(BasicModule):
    """
    A General Image Classifier. Any models that contains the following 2 components:
    1. Feature extractor (aka backbone)
    2. Image Classification head (Pooling + Classifier)
    """

    _hypers = namedtuple("hypers", field_names=["lr", "wd"])

    def __init__(
        self,
        backbone: ImageClassificationBackbone,
        head: ImageClassificationHead,
    ):
        """
        Arguments:
        1. `backbone`: a `ImageClassificationBackbone` module, must follow gale's backbone interface
        2. `head`: a head containg the classifier. and the pooling layer, must be an instance of
        `ImageClassificationHead`.
        """
        super(GeneralizedImageClassifier, self).__init__()
        self.backbone = backbone
        assert isinstance(backbone, ImageClassificationBackbone)
        self.head = head
        assert isinstance(head, ImageClassificationHead)

    def forward(self, batched_inputs: torch.Tensor) -> torch.Tensor:
        """
        Runs the batched_inputs through `backbone` followed by the `head`.
        Returns a Tensor which contains the logits for the batched_inputs.
        """
        # forward pass through the backbone
        out = self.backbone(batched_inputs)
        # pass through the classification layer
        out = self.head(out)
        return out

    @classmethod
    def from_config_dict(cls, cfg: DictConfig):
        """
        Instantiate the Meta Architecture from gale config
        """
        if not hasattr(cfg.model, "backbone"):
            raise ValueError("Configuration for model backbone not found")

        if not hasattr(cfg.model, "head"):
            raise ValueError("Configuration for model head not found")

        input_shape = ShapeSpec(cfg.input.channels, cfg.input.height, cfg.input.width)
        _logger.debug(f"Inputs: {input_shape}")

        backbone = build_backbone(cfg, input_shape=input_shape)
        param_count = get_human_readable_count(
            sum([m.numel() for m in backbone.parameters()])
        )
        _logger.debug(
            "Backbone {} created, param count: {}.".format(
                cfg.model.backbone.name, param_count
            )
        )

        head = build_head(cfg, backbone.output_shape())
        param_count = get_human_readable_count(
            sum([m.numel() for m in head.parameters()])
        )
        _logger.debug(
            "Head {} created, param count: {}.".format(cfg.model.head.name, param_count)
        )

        kwds = {"backbone": backbone, "head": head}

        instance = cls(**kwds)
        instance.input_shape = input_shape

        param_count = get_human_readable_count(
            sum([m.numel() for m in instance.parameters()])
        )
        _logger.info("Model created, param count: {}.".format(param_count))

        return instance

    def build_param_dicts(self):
        """
        Builds up the Paramters dicts for optimization
        """
        backbone_params = self.backbone.build_param_dicts()
        head_params = self.head.build_param_dicts()
        return backbone_params + head_params

    @property
    def hypers(self) -> Tuple:
        """
        Returns list of parameters like `lr` and `wd`
        for each param group
        """
        lrs = []
        wds = []

        for p in self.build_param_dicts():
            lrs.append(p["lr"])
            wds.append(p["weight_decay"])
        return self._hypers(lrs, wds)

<IPython.core.display.Javascript object>

This model architecture will work for most common computer vision fietuning use case. We take a `backbone` and `classifier`. We run the input through the backbone to extract the feature_maps which are then used by the classifier to given predictions on the Input. The paramters dicts for optimization are generated by the `backbone` and the `head` itself.

> Note: For advanced use cases you might want to create a model. A model muse inherit from `GaleModule` and be registered in `META_ARCH_REGISTRY`. Your model should also have the following methods to work in the Gale ecosystem.

In [None]:
show_doc(GeneralizedImageClassifier.from_config_dict)

<h4 id="GeneralizedImageClassifier.from_config_dict" class="doc_header"><code>GeneralizedImageClassifier.from_config_dict</code><a href="__main__.py#L39" class="source_link" style="float:right">[source]</a></h4>

> <code>GeneralizedImageClassifier.from_config_dict</code>(**`cfg`**:`DictConfig`)

Instantiate the Meta Architecture from gale config

<IPython.core.display.Javascript object>

In [None]:
show_doc(GeneralizedImageClassifier.forward)

<h4 id="GeneralizedImageClassifier.forward" class="doc_header"><code>GeneralizedImageClassifier.forward</code><a href="__main__.py#L28" class="source_link" style="float:right">[source]</a></h4>

> <code>GeneralizedImageClassifier.forward</code>(**`batched_inputs`**:`Tensor`)

Runs the batched_inputs through `backbone` followed by the `head`.
Returns a Tensor which contains the logits for the batched_inputs.

<IPython.core.display.Javascript object>

In [None]:
show_doc(GeneralizedImageClassifier.build_param_dicts)

<h4 id="GeneralizedImageClassifier.build_param_dicts" class="doc_header"><code>GeneralizedImageClassifier.build_param_dicts</code><a href="__main__.py#L83" class="source_link" style="float:right">[source]</a></h4>

> <code>GeneralizedImageClassifier.build_param_dicts</code>()

Builds up the Paramters dicts for optimization

<IPython.core.display.Javascript object>

`Meta_Arch`'s can also be instatiated via a appropriate config file. Let's see how ..

In [None]:
from dataclasses import dataclass, field
from omegaconf import MISSING, OmegaConf
from gale.classification.model.backbones import ResNetBackbone, ResNetBackboneDataClass
from gale.classification.model.heads import FastaiHead, FastaiHeadDataClass

<IPython.core.display.Javascript object>

For a meta_arch we first need to create the configurations for the `Backbone` and the `Head` of the model. These 
must be registerd in `IMAGE_CLASSIFICATION_BACKBONES` and `IMAGE_CLASSIFICATION_HEADS` Registy respectively. The instances are automatically instiated by the `GeneralizedImageClassifier` meta_arch.

In [None]:
backbone = OmegaConf.create()
backbone["name"] = "ResNetBackbone"

init_args = ResNetBackboneDataClass(model_name="resnet18", freeze_at=2)
init_args = OmegaConf.structured(init_args)
backbone["init_args"] = init_args

<IPython.core.display.Javascript object>

In [None]:
head = OmegaConf.create()
head.name = "FastaiHead"
head.init_args = OmegaConf.structured(FastaiHeadDataClass(num_classes=2))

<IPython.core.display.Javascript object>

We also need a few more things and the config must be composed in a gale config style. We need the definitions of the input like channels, height and width. So let's compose these -

In [None]:
# Input config
INPUTS = OmegaConf.create()
INPUTS.channels = 3
INPUTS.height = 224
INPUTS.width = 224

# Configuration for Model
MODEL = OmegaConf.create()
MODEL.meta_architecture = OmegaConf.create()
MODEL.meta_architecture.name = "GeneralizedImageClassifier"
MODEL.meta_architecture.init_args = None
MODEL.backbone = backbone
MODEL.head = head

<IPython.core.display.Javascript object>

So, these are the bare minimums that one would need to instantiate `GeneralizedImageClassifier`

In [None]:
# collapse-output
conf = OmegaConf.create(dict(input=INPUTS, model=MODEL))
print(OmegaConf.to_yaml(conf))

input:
  channels: 3
  height: 224
  width: 224
model:
  meta_architecture:
    name: GeneralizedImageClassifier
    init_args: null
  backbone:
    name: ResNetBackbone
    init_args:
      model_name: resnet18
      act: null
      lr: 0.001
      lr_div: 10
      wd: 0.0
      freeze_at: 2
      pretrained: true
      drop_block_rate: null
      drop_path_rate: null
      bn_tf: false
  head:
    name: FastaiHead
    init_args:
      num_classes: 2
      act: ReLU
      lin_ftrs: null
      ps: 0.5
      concat_pool: true
      first_bn: true
      bn_final: false
      lr: 0.002
      wd: 0.0
      filter_wd: false



<IPython.core.display.Javascript object>

In [None]:
m = GeneralizedImageClassifier.from_config_dict(conf)

Inputs: ShapeSpec(channels=3, height=224, width=224)
Loading pretrained weights from url (https://download.pytorch.org/models/resnet18-5c106cde.pth)
Backbone ResNetBackbone created, param count: 11.2 M.
Head FastaiHead created, param count: 528 K.
Model created, param count: 11.7 M.


<IPython.core.display.Javascript object>

In [None]:
# hide
shape = (m.input_shape.channels, m.input_shape.height, m.input_shape.width)
inp = torch.randn(2, *shape)
o = m(inp)
o

tensor([[-1.2598, -0.0377],
        [ 1.5199,  1.1705]], grad_fn=<MmBackward>)

<IPython.core.display.Javascript object>

In [None]:
# hide
# cuda
import pytorch_lightning as pl
import torchmetrics
import torchvision.transforms as T
from fastcore.all import Path
from nbdev.export import Config
from torch import optim
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder

from gale.collections.callbacks.notebook import NotebookProgressCallback
from gale.collections.download import download_and_extract_archive
from gale.schedules import WarmupStepLR
from gale.utils.display import show_images

URL = "https://download.pytorch.org/tutorial/hymenoptera_data.zip"
data_path = Path(Config().path("nbs_path")) / "data"

# download a toy dataset
download_and_extract_archive(url=URL, download_root=data_path)

Using downloaded and verified file: /Users/ayushman/Desktop/gale/nbs/data/hymenoptera_data.zip
Extracting /Users/ayushman/Desktop/gale/nbs/data/hymenoptera_data.zip to /Users/ayushman/Desktop/gale/nbs/data


<IPython.core.display.Javascript object>

In [None]:
# hide
# cuda
data_transforms = {
    "train": T.Compose(
        [
            T.RandomResizedCrop(224),
            T.RandomHorizontalFlip(),
            T.ToTensor(),
            T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        ]
    ),
    "val": T.Compose(
        [
            T.Resize(256),
            T.CenterCrop(224),
            T.ToTensor(),
            T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        ]
    ),
}

training_data = ImageFolder(
    data_path / "hymenoptera_data/train", transform=data_transforms["train"]
)
validation_data = ImageFolder(
    data_path / "hymenoptera_data/val", transform=data_transforms["val"]
)

train_dl = DataLoader(training_data, batch_size=32, shuffle=True)
valid_dl = DataLoader(validation_data, batch_size=32, shuffle=False)

<IPython.core.display.Javascript object>

In [None]:
# hide
# cuda
class Learner(pl.LightningModule):
    def __init__(self, model: GeneralizedImageClassifier):
        super().__init__()
        self.model = model
        self.train_metric = torchmetrics.Accuracy()
        self.valid_metric = torchmetrics.Accuracy()
        self.loss_fn = torch.nn.CrossEntropyLoss()

    def forward(self, xb):
        return self.model(xb)

    def training_step(self, batch: Any, batch_idx: int):
        x, y = batch
        y_hat = self(x)
        loss = self.loss_fn(y_hat, y)
        acc = self.train_metric(torch.nn.functional.softmax(y_hat), y)
        self.log_dict(dict(loss=loss, acc=acc))
        return loss

    def validation_step(self, batch: Any, batch_idx: int):
        x, y = batch
        y_hat = self(x)
        loss = self.loss_fn(y_hat, y)
        acc = self.valid_metric(torch.nn.functional.softmax(y_hat), y)
        self.log_dict(dict(val_loss=loss, val_acc=acc))

    def configure_optimizers(self):
        paramters = self.model.build_param_dicts()
        opt = optim.AdamW(paramters)
        sch = WarmupStepLR(
            opt,
            num_decays=2,
            warmup_epochs=1,
            decay_rate=0.1,
            epochs=self.trainer.max_epochs,
        )
        return [opt], [sch]

<IPython.core.display.Javascript object>

In [None]:
# hide
# cuda
cbs = [
    NotebookProgressCallback(),
    pl.callbacks.LearningRateMonitor(logging_interval="epoch", log_momentum=True),
]

logger = pl.loggers.TensorBoardLogger(save_dir="lightning_logs/", name="my_model")

trainer = pl.Trainer(max_epochs=7, callbacks=cbs, log_every_n_steps=1, logger=logger)

model = GeneralizedImageClassifier.from_config_dict(conf)
learn = Learner(model)

trainer.fit(learn, train_dataloader=train_dl, val_dataloaders=valid_dl)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
Inputs: ShapeSpec(channels=3, height=224, width=224)
Loading pretrained weights from url (https://download.pytorch.org/models/resnet18-5c106cde.pth)
Backbone ResNetBackbone created, param count: 11.2 M.
Head FastaiHead created, param count: 528 K.
Model created, param count: 11.7 M.

  | Name         | Type                       | Params
------------------------------------------------------------
0 | model        | GeneralizedImageClassifier | 11.7 M
1 | train_metric | Accuracy                   | 0     
2 | valid_metric | Accuracy                   | 0     
3 | loss_fn      | CrossEntropyLoss           | 0     
------------------------------------------------------------
11.5 M    Trainable params
157 K     Non-trainable params
11.7 M    Total params
46.820    Total estimated model params size (MB)


epoch,val_loss,val_acc,loss,acc,time,samples/s
0,0.731146,0.431373,0.743832,0.6,45.0572,0.2885


1

<IPython.core.display.Javascript object>

## Export-

In [None]:
# hide
notebook2script("04b_classification.model.meta_arch.common.ipynb")

Converted 04b_classification.model.meta_arch.common.ipynb.


<IPython.core.display.Javascript object>