## Introduction
This notebook runs all the processing steps one by one for several models and renders the output. Each section is individually runnable after a kernel restart 

## DenseNet

In [1]:
import sys, importlib
from pathlib import Path
nb_dir = Path.cwd()
project_root = nb_dir if nb_dir.name == "idlmav" else nb_dir.parent
sys.path.append(str(project_root))

import torch
import torchvision
from idlmav import MAV, plotly_renderer

def reload_imports():
    importlib.reload(sys.modules['idlmav.mavtypes'])
    importlib.reload(sys.modules['idlmav.mavutils'])
    importlib.reload(sys.modules['idlmav.tracing'])
    importlib.reload(sys.modules['idlmav.merging'])
    importlib.reload(sys.modules['idlmav.coloring'])
    importlib.reload(sys.modules['idlmav.layout'])
    importlib.reload(sys.modules['idlmav.renderers.renderer_utils'])
    importlib.reload(sys.modules['idlmav.renderers.figure_renderer'])
    importlib.reload(sys.modules['idlmav.renderers.widget_renderer'])
    importlib.reload(sys.modules['idlmav'])
    global MAV, plotly_renderer
    from idlmav import MAV, plotly_renderer

In [3]:
reload_imports()

In [4]:
device = 'cpu'
model = torchvision.models.densenet121()
inputs = torch.randn((10,3,32,32))
mav = MAV(model, inputs, device)
with plotly_renderer('notebook_connected'):
    mav.show_figure(add_table=False)

## DETR demo
* Based on [detr_demo.ipynb](https://github.com/facebookresearch/detr/blob/colab/notebooks/detr_demo.ipynb)

In [8]:
import sys, importlib
from pathlib import Path
nb_dir = Path.cwd()
project_root = nb_dir if nb_dir.name == "idlmav" else nb_dir.parent
sys.path.append(str(project_root))

import torch
from torch import nn
import torchvision
from idlmav import MAV, plotly_renderer

def reload_imports():
    importlib.reload(sys.modules['idlmav.mavtypes'])
    importlib.reload(sys.modules['idlmav.mavutils'])
    importlib.reload(sys.modules['idlmav.tracing'])
    importlib.reload(sys.modules['idlmav.merging'])
    importlib.reload(sys.modules['idlmav.coloring'])
    importlib.reload(sys.modules['idlmav.layout'])
    importlib.reload(sys.modules['idlmav.renderers.renderer_utils'])
    importlib.reload(sys.modules['idlmav.renderers.figure_renderer'])
    importlib.reload(sys.modules['idlmav.renderers.widget_renderer'])
    importlib.reload(sys.modules['idlmav'])
    global MAV, plotly_renderer
    from idlmav import MAV, plotly_renderer

In [9]:
reload_imports()

In [10]:
class DETRdemo(nn.Module):
    def __init__(self, num_classes, hidden_dim=256, nheads=8,
                 num_encoder_layers=6, num_decoder_layers=6):
        super().__init__()

        # create ResNet-50 backbone
        self.backbone = torchvision.models.resnet50()
        del self.backbone.fc

        # create conversion layer
        self.conv = nn.Conv2d(2048, hidden_dim, 1)

        # create a default PyTorch transformer
        self.transformer = nn.Transformer(
            hidden_dim, nheads, num_encoder_layers, num_decoder_layers)

        # prediction heads, one extra class for predicting non-empty slots
        # note that in baseline DETR linear_bbox layer is 3-layer MLP
        self.linear_class = nn.Linear(hidden_dim, num_classes + 1)
        self.linear_bbox = nn.Linear(hidden_dim, 4)

        # output positional encodings (object queries)
        self.query_pos = nn.Parameter(torch.rand(100, hidden_dim))

        # spatial positional encodings
        # note that in baseline DETR we use sine positional encodings
        self.row_embed = nn.Parameter(torch.rand(50, hidden_dim // 2))
        self.col_embed = nn.Parameter(torch.rand(50, hidden_dim // 2))

    def forward(self, inputs):
        # propagate inputs through ResNet-50 up to avg-pool layer
        x = self.backbone.conv1(inputs)
        x = self.backbone.bn1(x)
        x = self.backbone.relu(x)
        x = self.backbone.maxpool(x)

        x = self.backbone.layer1(x)
        x = self.backbone.layer2(x)
        x = self.backbone.layer3(x)
        x = self.backbone.layer4(x)

        # convert from 2048 to 256 feature planes for the transformer
        h = self.conv(x)

        # construct positional encodings
        H, W = h.shape[-2:]
        pos = torch.cat([
            self.col_embed[:W].unsqueeze(0).repeat(H, 1, 1),
            self.row_embed[:H].unsqueeze(1).repeat(1, W, 1),
        ], dim=-1).flatten(0, 1).unsqueeze(1)

        # propagate through the transformer
        h = self.transformer(pos + 0.1 * h.flatten(2).permute(2, 0, 1),
                             self.query_pos.unsqueeze(1)).transpose(0, 1)
        
        # finally project transformer outputs to class labels and bounding boxes
        return {'pred_logits': self.linear_class(h), 
                'pred_boxes': self.linear_bbox(h).sigmoid()}



In [11]:
device = 'cpu'
model = DETRdemo(num_classes=4)
inputs = torch.randn((1,3,224,224))
mav = MAV(model, inputs, device)
with plotly_renderer('notebook_connected'):
    mav.show_figure()


enable_nested_tensor is True, but self.use_nested_tensor is False because encoder_layer.self_attn.batch_first was not True(use batch_first for better inference performance)


No handlers found: "aten::unflatten". Skipped.


No handlers found: "aten::unsqueeze". Skipped.


No handlers found: "aten::scaled_dot_product_attention". Skipped.


No handlers found: "aten::permute". Skipped.


No handlers found: "aten::split_with_sizes". Skipped.

