# First version. Net.

> First version of constructor.

In [1]:
#hide

In [2]:
#hide
import torch
import torch.nn as nn

from nbdev.showdoc import show_doc
from IPython.display import Markdown, display

# from fastcore.test import *
from dataclasses import dataclass, asdict

In [3]:
# hide
from collections import OrderedDict
from functools import partial


from model_constructor.layers import ConvLayer, Flatten, SEBlock, SimpleSelfAttention, noop

In [4]:
# hide
def print_doc(func_name):
    doc = show_doc(func_name, title_level=4, disp=False)
    display(Markdown(doc))

# Utils

In [5]:
act_fn = nn.ReLU(inplace=True)


def init_cnn(module: nn.Module):
    if getattr(module, 'bias', None) is not None:
        nn.init.constant_(module.bias, 0)
    if isinstance(module, (nn.Conv2d, nn.Linear)):
        nn.init.kaiming_normal_(module.weight)
    for layer in module.children():
        init_cnn(layer)

# ResBlock

In [6]:


class ResBlock(nn.Module):
    '''Resnet block'''
    se_block = SEBlock

    def __init__(self, expansion, ni, nh, stride=1,
                 conv_layer=ConvLayer, act_fn=act_fn, zero_bn=True, bn_1st=True,
                 pool=nn.AvgPool2d(2, ceil_mode=True), sa=False, sym=False, se=False, se_reduction=16,
                 groups=1, dw=False):
        super().__init__()
        nf, ni = nh * expansion, ni * expansion
        if groups != 1:
            groups = int(nh / groups)
        if expansion == 1:
            layers = [("conv_0", conv_layer(ni, nh, 3, stride=stride, act_fn=act_fn, bn_1st=bn_1st,
                                            groups=nh if dw else groups)),
                      ("conv_1", conv_layer(nh, nf, 3, zero_bn=zero_bn, act=False, bn_1st=bn_1st))
                      ]
        else:
            layers = [("conv_0", conv_layer(ni, nh, 1, act_fn=act_fn, bn_1st=bn_1st)),
                      ("conv_1", conv_layer(nh, nh, 3, stride=stride, act_fn=act_fn, bn_1st=bn_1st,
                                            groups=nh if dw else groups)),
                      ("conv_2", conv_layer(nh, nf, 1, zero_bn=zero_bn, act=False, bn_1st=bn_1st))
                      ]
        if se:
            layers.append(('se', self.se_block(nf, se_reduction)))
        if sa:
            layers.append(('sa', SimpleSelfAttention(nf, ks=1, sym=sym)))
        self.convs = nn.Sequential(OrderedDict(layers))
        self.pool = noop if stride == 1 else pool
        self.idconv = noop if ni == nf else conv_layer(ni, nf, 1, act=False)
        self.act_fn = act_fn

    def forward(self, x):
        return self.act_fn(self.convs(x) + self.idconv(self.pool(x)))

In [7]:
ResBlock(1,64,64,sa=True)

ResBlock(
  (convs): Sequential(
    (conv_0): ConvLayer(
      (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (act_fn): ReLU(inplace=True)
    )
    (conv_1): ConvLayer(
      (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (sa): SimpleSelfAttention(
      (conv): Conv1d(64, 64, kernel_size=(1,), stride=(1,), bias=False)
    )
  )
  (act_fn): ReLU(inplace=True)
)

In [8]:
ResBlock(4,64,64,sa=True, dw=True)

ResBlock(
  (convs): Sequential(
    (conv_0): ConvLayer(
      (conv): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (act_fn): ReLU(inplace=True)
    )
    (conv_1): ConvLayer(
      (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=64, bias=False)
      (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (act_fn): ReLU(inplace=True)
    )
    (conv_2): ConvLayer(
      (conv): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (sa): SimpleSelfAttention(
      (conv): Conv1d(256, 256, kernel_size=(1,), stride=(1,), bias=False)
    )
  )
  (act_fn): ReLU(inplace=True)
)

In [9]:
ResBlock(4,64,64,sa=True, groups=4)

ResBlock(
  (convs): Sequential(
    (conv_0): ConvLayer(
      (conv): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (act_fn): ReLU(inplace=True)
    )
    (conv_1): ConvLayer(
      (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=16, bias=False)
      (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (act_fn): ReLU(inplace=True)
    )
    (conv_2): ConvLayer(
      (conv): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (sa): SimpleSelfAttention(
      (conv): Conv1d(256, 256, kernel_size=(1,), stride=(1,), bias=False)
    )
  )
  (act_fn): ReLU(inplace=True)
)

In [10]:
ResBlock(2,64,64,act_fn=nn.LeakyReLU(), bn_1st=False)

ResBlock(
  (convs): Sequential(
    (conv_0): ConvLayer(
      (conv): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (act_fn): LeakyReLU(negative_slope=0.01)
      (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (conv_1): ConvLayer(
      (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (act_fn): LeakyReLU(negative_slope=0.01)
      (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (conv_2): ConvLayer(
      (conv): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (act_fn): LeakyReLU(negative_slope=0.01)
)

In [11]:
ResBlock(2, 64, 64, sa=True, se=True)

ResBlock(
  (convs): Sequential(
    (conv_0): ConvLayer(
      (conv): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (act_fn): ReLU(inplace=True)
    )
    (conv_1): ConvLayer(
      (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (act_fn): ReLU(inplace=True)
    )
    (conv_2): ConvLayer(
      (conv): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (se): SEBlock(
      (squeeze): AdaptiveAvgPool2d(output_size=1)
      (excitation): Sequential(
        (fc_reduce): Linear(in_features=128, out_features=8, bias=True)
        (se_act): ReLU(inplace=True)
        (fc_expand): Linear(in_features=8, out_features=128, bias=True)

# NewResBlock

NewResBlock now is YaResBlock - Yet Another ResNet Block! It is now  at model_constructor.yaresnet.
Here i left old name for compatibility with existing Notebooks.

In [12]:
# NewResBlock now is YaResBlock - Yet Another ResNet Block! It is now at model_constructor.yaresnet.
class NewResBlock(nn.Module):
    '''YaResnet block'''
    se_block = SEBlock

    def __init__(self, expansion, ni, nh, stride=1,
                 conv_layer=ConvLayer, act_fn=act_fn, zero_bn=True, bn_1st=True,
                 pool=nn.AvgPool2d(2, ceil_mode=True), sa=False, sym=False, se=False, se_reduction=16,
                 groups=1, dw=False):
        super().__init__()
        nf, ni = nh * expansion, ni * expansion
        if groups != 1:
            groups = int(nh / groups)
        self.reduce = noop if stride == 1 else pool
        if expansion == 1:
            layers = [("conv_0", conv_layer(ni, nh, 3, stride=1, act_fn=act_fn, bn_1st=bn_1st,
                                            groups=nh if dw else groups)),
                      ("conv_1", conv_layer(nh, nf, 3, zero_bn=zero_bn, act=False, bn_1st=bn_1st))
                      ]
        else:
            layers = [("conv_0", conv_layer(ni, nh, 1, act_fn=act_fn, bn_1st=bn_1st)),
                      ("conv_1", conv_layer(nh, nh, 3, stride=1, act_fn=act_fn, bn_1st=bn_1st,
                                            groups=nh if dw else groups)),
                      ("conv_2", conv_layer(nh, nf, 1, zero_bn=zero_bn, act=False, bn_1st=bn_1st))
                      ]
        if se:
            layers.append(('se', self.se_block(nf, se_reduction)))
        if sa:
            layers.append(('sa', SimpleSelfAttention(nf, ks=1, sym=sym)))
        self.convs = nn.Sequential(OrderedDict(layers))
        self.idconv = noop if ni == nf else conv_layer(ni, nf, 1, act=False)
        self.merge = act_fn

    def forward(self, x):
        o = self.reduce(x)
        return self.merge(self.convs(o) + self.idconv(o))

In [13]:
NewResBlock(4, 64, 128, dw=1)

NewResBlock(
  (convs): Sequential(
    (conv_0): ConvLayer(
      (conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (act_fn): ReLU(inplace=True)
    )
    (conv_1): ConvLayer(
      (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128, bias=False)
      (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (act_fn): ReLU(inplace=True)
    )
    (conv_2): ConvLayer(
      (conv): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (idconv): ConvLayer(
    (conv): Conv2d(256, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (merge): ReLU(inplace=True)
)

In [14]:
#hide
# from model_constructor.yaresnet import YaResBlock
# NewResBlock = YaResBlock

In [15]:
#hide
bl = NewResBlock(1,64,64,sa=True)
bl

NewResBlock(
  (convs): Sequential(
    (conv_0): ConvLayer(
      (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (act_fn): ReLU(inplace=True)
    )
    (conv_1): ConvLayer(
      (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (sa): SimpleSelfAttention(
      (conv): Conv1d(64, 64, kernel_size=(1,), stride=(1,), bias=False)
    )
  )
  (merge): ReLU(inplace=True)
)

In [16]:
#hide
bs_test = 16
xb = torch.randn(bs_test, 64, 32, 32)
y = bl(xb)
print(y.shape)
assert y.shape == torch.Size([bs_test, 64, 32, 32]), f"size"

torch.Size([16, 64, 32, 32])


In [17]:
#hide
bl = NewResBlock(4,64,128,stride=2,act_fn=nn.LeakyReLU(), bn_1st=False)
bl

NewResBlock(
  (reduce): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (convs): Sequential(
    (conv_0): ConvLayer(
      (conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (act_fn): LeakyReLU(negative_slope=0.01)
      (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (conv_1): ConvLayer(
      (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (act_fn): LeakyReLU(negative_slope=0.01)
      (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (conv_2): ConvLayer(
      (conv): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (idconv): ConvLayer(
    (conv): Conv2d(256, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=Tr

In [18]:
#hide
bs_test = 16
xb = torch.randn(bs_test, 256, 32, 32)
y = bl(xb)
print(y.shape)
assert y.shape == torch.Size([bs_test, 512, 16, 16]), f"size"

torch.Size([16, 512, 16, 16])


In [19]:
#hide
bl = NewResBlock(4,64,128,stride=2,act_fn=nn.LeakyReLU(), bn_1st=False, groups=4)
bl

NewResBlock(
  (reduce): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (convs): Sequential(
    (conv_0): ConvLayer(
      (conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (act_fn): LeakyReLU(negative_slope=0.01)
      (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (conv_1): ConvLayer(
      (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
      (act_fn): LeakyReLU(negative_slope=0.01)
      (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (conv_2): ConvLayer(
      (conv): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (idconv): ConvLayer(
    (conv): Conv2d(256, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_runni

In [20]:
#hide
bs_test = 16
xb = torch.randn(bs_test, 256, 32, 32)
y = bl(xb)
print(y.shape)
assert y.shape == torch.Size([bs_test, 512, 16, 16]), f"size"

torch.Size([16, 512, 16, 16])


# Stem, Body, Head

In [21]:
def _make_stem(self):
    stem = [(f"conv_{i}", self.conv_layer(self.stem_sizes[i], self.stem_sizes[i + 1],
                                          stride=2 if i == self.stem_stride_on else 1,
                                          bn_layer=(not self.stem_bn_end) if i == (len(self.stem_sizes) - 2) else True,
                                          act_fn=self.act_fn, bn_1st=self.bn_1st))
            for i in range(len(self.stem_sizes) - 1)]
    stem.append(('stem_pool', self.stem_pool))
    if self.stem_bn_end:
        stem.append(('norm', self.norm(self.stem_sizes[-1])))
    return nn.Sequential(OrderedDict(stem))

In [22]:
def _make_layer(self, expansion, ni, nf, blocks, stride, sa):
    layers = [(f"bl_{i}", self.block(expansion, ni if i == 0 else nf, nf,
                                     stride if i == 0 else 1, sa=sa if i == blocks - 1 else False,
                                     conv_layer=self.conv_layer, act_fn=self.act_fn, pool=self.pool,
                                     zero_bn=self.zero_bn, bn_1st=self.bn_1st, groups=self.groups,
                                     dw=self.dw, se=self.se))
              for i in range(blocks)]
    return nn.Sequential(OrderedDict(layers))

In [23]:
def _make_body(self):
    blocks = [(f"l_{i}", self._make_layer(self, self.expansion,
                                          ni=self.block_sizes[i], nf=self.block_sizes[i + 1],
                                          blocks=l, stride=1 if i == 0 else 2,
                                          sa=self.sa if i == 0 else False))
              for i, l in enumerate(self.layers)]
    return nn.Sequential(OrderedDict(blocks))

In [24]:
def _make_head(self):
    head = [('pool', nn.AdaptiveAvgPool2d(1)),
            ('flat', Flatten()),
            ('fc', nn.Linear(self.block_sizes[-1] * self.expansion, self.c_out))]
    return nn.Sequential(OrderedDict(head))

# Net - Model Constructor.

In [25]:
class Net():
    """Model constructor. As default - xresnet18"""
    def __init__(self, name='Net', c_in=3, c_out=1000,
                 block=ResBlock, conv_layer=ConvLayer,
                 block_sizes=[64, 128, 256, 512], layers=[2, 2, 2, 2],
                 norm=nn.BatchNorm2d,
                 act_fn=nn.ReLU(inplace=True),
                 pool=nn.AvgPool2d(2, ceil_mode=True),
                 expansion=1, groups=1, dw=False,
                 sa=False, se=False, se_reduction=16,
                 bn_1st=True,
                 zero_bn=True,
                 stem_stride_on=0,
                 stem_sizes=[32, 32, 64],
                 stem_pool=nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
                 stem_bn_end=False,
                 _init_cnn=init_cnn,
                 _make_stem=_make_stem,
                 _make_layer=_make_layer,
                 _make_body=_make_body,
                 _make_head=_make_head,
                 ):
        super().__init__()

        params = locals()
        del params['self']
        self.__dict__ = params
        self._block_sizes = params['block_sizes']
        if self.stem_sizes[0] != self.c_in:
            self.stem_sizes = [self.c_in] + self.stem_sizes

    @property
    def block_sizes(self):
        return [self.stem_sizes[-1] // self.expansion] + self._block_sizes + [256] * (len(self.layers) - 4)

    @property
    def stem(self):
        return self._make_stem(self)

    @property
    def head(self):
        return self._make_head(self)

    @property
    def body(self):
        return self._make_body(self)

    def __call__(self):
        model = nn.Sequential(OrderedDict([
            ('stem', self.stem),
            ('body', self.body),
            ('head', self.head)]))
        self._init_cnn(model)
        model.extra_repr = lambda: f"model {self.name}"
        return model

    def __repr__(self):
        return (f"{self.name} constructor\n"
                f"  c_in: {self.c_in}, c_out: {self.c_out}\n"
                f"  expansion: {self.expansion}, groups: {self.groups}, dw: {self.dw}\n"
                f"  sa: {self.sa}, se: {self.se}\n"
                f"  stem sizes: {self.stem_sizes}, stide on {self.stem_stride_on}\n"
                f"  body sizes {self._block_sizes}\n"
                f"  layers: {self.layers}")

In [26]:
model  = Net()
model

Net constructor
  c_in: 3, c_out: 1000
  expansion: 1, groups: 1, dw: False
  sa: False, se: False
  stem sizes: [3, 32, 32, 64], stide on 0
  body sizes [64, 128, 256, 512]
  layers: [2, 2, 2, 2]

In [27]:
model._block_sizes

[64, 128, 256, 512]

In [28]:
model.block_sizes

[64, 64, 128, 256, 512]

In [29]:
model._block_sizes = [128, 256, 512, 1024]
model

Net constructor
  c_in: 3, c_out: 1000
  expansion: 1, groups: 1, dw: False
  sa: False, se: False
  stem sizes: [3, 32, 32, 64], stide on 0
  body sizes [128, 256, 512, 1024]
  layers: [2, 2, 2, 2]

In [30]:
model.block_sizes

[64, 128, 256, 512, 1024]

In [31]:
#hide
model  = Net(stem_sizes=[3,32,32,64])
model

Net constructor
  c_in: 3, c_out: 1000
  expansion: 1, groups: 1, dw: False
  sa: False, se: False
  stem sizes: [3, 32, 32, 64], stide on 0
  body sizes [64, 128, 256, 512]
  layers: [2, 2, 2, 2]

In [32]:
model.block_sizes

[64, 64, 128, 256, 512]

In [33]:
model = Net()

In [34]:
#hide
# model.block_sizes = [64, 128, 256, 512] # wrong way --> use _block_sizes
# model

In [35]:
#collapse_output
model.stem

Sequential(
  (conv_0): ConvLayer(
    (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (act_fn): ReLU(inplace=True)
  )
  (conv_1): ConvLayer(
    (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (act_fn): ReLU(inplace=True)
  )
  (conv_2): ConvLayer(
    (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (act_fn): ReLU(inplace=True)
  )
  (stem_pool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
)

In [36]:
#collapse_output
model.stem_stride_on = 1
model.stem

Sequential(
  (conv_0): ConvLayer(
    (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (act_fn): ReLU(inplace=True)
  )
  (conv_1): ConvLayer(
    (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (act_fn): ReLU(inplace=True)
  )
  (conv_2): ConvLayer(
    (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (act_fn): ReLU(inplace=True)
  )
  (stem_pool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
)

In [37]:
#hide
bs_test = 16
xb = torch.randn(bs_test, 3, 128, 128)
y = model.stem(xb)
print(y.shape)
assert y.shape == torch.Size([bs_test, 64, 32, 32]), f"size"

torch.Size([16, 64, 32, 32])


  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


In [38]:
model.bn_1st = False

In [39]:
model.act_fn =nn.LeakyReLU(inplace=True)

In [40]:
model.sa = True
model.se = True

In [41]:
#collapse_output
model.body.l_1

Sequential(
  (bl_0): ResBlock(
    (convs): Sequential(
      (conv_0): ConvLayer(
        (conv): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (act_fn): LeakyReLU(negative_slope=0.01, inplace=True)
        (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (conv_1): ConvLayer(
        (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (se): SEBlock(
        (squeeze): AdaptiveAvgPool2d(output_size=1)
        (excitation): Sequential(
          (fc_reduce): Linear(in_features=128, out_features=8, bias=True)
          (se_act): ReLU(inplace=True)
          (fc_expand): Linear(in_features=8, out_features=128, bias=True)
          (sigmoid): Sigmoid()
        )
      )
    )
    (pool): AvgPool2d(kernel_size=2, stride=2, padding=0)
    (idconv): ConvLaye

In [42]:
#hide
bs_test = 16
xb = torch.randn(bs_test, 64, 32, 32)
y = model.body.l_0(xb)
print(y.shape)
assert y.shape == torch.Size([bs_test, 64, 32, 32]), f"size"

torch.Size([16, 64, 32, 32])


In [43]:
#hide
model.body.l_0

Sequential(
  (bl_0): ResBlock(
    (convs): Sequential(
      (conv_0): ConvLayer(
        (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (act_fn): LeakyReLU(negative_slope=0.01, inplace=True)
        (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (conv_1): ConvLayer(
        (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (se): SEBlock(
        (squeeze): AdaptiveAvgPool2d(output_size=1)
        (excitation): Sequential(
          (fc_reduce): Linear(in_features=64, out_features=4, bias=True)
          (se_act): ReLU(inplace=True)
          (fc_expand): Linear(in_features=4, out_features=64, bias=True)
          (sigmoid): Sigmoid()
        )
      )
    )
    (act_fn): LeakyReLU(negative_slope=0.01, inplace=True)
  )
  (bl_1): ResBlock(
    

In [44]:
#hide
bs_test = 16
xb = torch.randn(bs_test, 64, 32, 32)
y = model.body.l_0(xb)
print(y.shape)
assert y.shape == torch.Size([bs_test, 64, 32, 32]), f"size"

torch.Size([16, 64, 32, 32])


In [45]:
#hide
model.groups = 4
model.expansion = 4
model.body.l_0

Sequential(
  (bl_0): ResBlock(
    (convs): Sequential(
      (conv_0): ConvLayer(
        (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (act_fn): LeakyReLU(negative_slope=0.01, inplace=True)
        (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (conv_1): ConvLayer(
        (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=16, bias=False)
        (act_fn): LeakyReLU(negative_slope=0.01, inplace=True)
        (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (conv_2): ConvLayer(
        (conv): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (se): SEBlock(
        (squeeze): AdaptiveAvgPool2d(output_size=1)
        (excitation): Sequential(
          (fc_reduce): Linear(in_features=256, out_features=

In [46]:
#hide
bs_test = 16
xb = torch.randn(bs_test, 64, 32, 32)
y = model.body.l_0(xb)
print(y.shape)
assert y.shape == torch.Size([bs_test, 256, 32, 32]), f"size"

torch.Size([16, 256, 32, 32])


In [47]:
#hide
model.groups = 1
model.dw = True
model.expansion = 4
model.body.l_0

Sequential(
  (bl_0): ResBlock(
    (convs): Sequential(
      (conv_0): ConvLayer(
        (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (act_fn): LeakyReLU(negative_slope=0.01, inplace=True)
        (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (conv_1): ConvLayer(
        (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=64, bias=False)
        (act_fn): LeakyReLU(negative_slope=0.01, inplace=True)
        (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (conv_2): ConvLayer(
        (conv): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (se): SEBlock(
        (squeeze): AdaptiveAvgPool2d(output_size=1)
        (excitation): Sequential(
          (fc_reduce): Linear(in_features=256, out_features=

In [48]:
#hide
bs_test = 16
xb = torch.randn(bs_test, 64, 32, 32)
y = model.body.l_0(xb)
print(y.shape)
assert y.shape == torch.Size([bs_test, 256, 32, 32]), f"size"

torch.Size([16, 256, 32, 32])


In [49]:
#hide
model.groups = 1
model.dw = 0
model.expansion = 1

In [50]:
model.block = NewResBlock

In [51]:
#hide
bs_test = 16
xb = torch.randn(bs_test, 64, 32, 32)
y = model.body.l_1.bl_0(xb)
print(y.shape)
assert y.shape == torch.Size([bs_test, 128, 16, 16]), f"size"

torch.Size([16, 128, 16, 16])


In [52]:
#hide
model.body.l_1.bl_0

NewResBlock(
  (reduce): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (convs): Sequential(
    (conv_0): ConvLayer(
      (conv): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (act_fn): LeakyReLU(negative_slope=0.01, inplace=True)
      (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (conv_1): ConvLayer(
      (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (se): SEBlock(
      (squeeze): AdaptiveAvgPool2d(output_size=1)
      (excitation): Sequential(
        (fc_reduce): Linear(in_features=128, out_features=8, bias=True)
        (se_act): ReLU(inplace=True)
        (fc_expand): Linear(in_features=8, out_features=128, bias=True)
        (sigmoid): Sigmoid()
      )
    )
  )
  (idconv): ConvLayer(
    (conv): Conv2d(64, 128, kernel_size=(1, 1), stride=(1,

In [53]:
# model = Net(expansion=4)
model.expansion = 4

In [54]:
#hide
model.stem_bn_end = True

In [55]:
#hide
model.stem

Sequential(
  (conv_0): ConvLayer(
    (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (act_fn): LeakyReLU(negative_slope=0.01, inplace=True)
    (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (conv_1): ConvLayer(
    (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (act_fn): LeakyReLU(negative_slope=0.01, inplace=True)
    (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (conv_2): ConvLayer(
    (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (act_fn): LeakyReLU(negative_slope=0.01, inplace=True)
  )
  (stem_pool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (norm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)

In [56]:
#hide
model.body.l_1.bl_0

NewResBlock(
  (reduce): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (convs): Sequential(
    (conv_0): ConvLayer(
      (conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (act_fn): LeakyReLU(negative_slope=0.01, inplace=True)
      (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (conv_1): ConvLayer(
      (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (act_fn): LeakyReLU(negative_slope=0.01, inplace=True)
      (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (conv_2): ConvLayer(
      (conv): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (se): SEBlock(
      (squeeze): AdaptiveAvgPool2d(output_size=1)
      (excitation): Sequential(
        (fc_reduce): Linear(in_features=512, out_features=32,

In [57]:
#hide
bs_test = 16
xb = torch.randn(bs_test, 256, 32, 32)
y = model.body.l_1.bl_0(xb)
print(y.shape)
assert y.shape == torch.Size([bs_test, 512, 16, 16]), f"size"

torch.Size([16, 512, 16, 16])


In [58]:
#hide
model.stem_bn_end = False

In [59]:
#collapse_input
m = model()

In [60]:
#hide
m

Sequential(
  model Net
  (stem): Sequential(
    (conv_0): ConvLayer(
      (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (act_fn): LeakyReLU(negative_slope=0.01, inplace=True)
      (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (conv_1): ConvLayer(
      (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (act_fn): LeakyReLU(negative_slope=0.01, inplace=True)
      (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (conv_2): ConvLayer(
      (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (act_fn): LeakyReLU(negative_slope=0.01, inplace=True)
      (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (stem_pool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  )
  (body): Sequential(
    (l_0)

In [61]:
#collapse_output
m.body

Sequential(
  (l_0): Sequential(
    (bl_0): NewResBlock(
      (convs): Sequential(
        (conv_0): ConvLayer(
          (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (act_fn): LeakyReLU(negative_slope=0.01, inplace=True)
          (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        )
        (conv_1): ConvLayer(
          (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (act_fn): LeakyReLU(negative_slope=0.01, inplace=True)
          (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        )
        (conv_2): ConvLayer(
          (conv): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        )
        (se): SEBlock(
          (squeeze): AdaptiveAvgPool2d(output_size=1)
          (excitation): Sequential(
         

In [62]:
#hide
bs_test = 16
xb = torch.randn(bs_test, 3, 128, 128)
y = m(xb)
print(y.shape)
assert y.shape == torch.Size([bs_test, 1000]), f"size expected {bs_test}, 1000"

torch.Size([16, 1000])


In [63]:
## xresnet constructor

# # me = sys.modules[__name__]
# # for n,e,l in [[ 18 , 1, [2,2,2 ,2] ],
# #     [ 34 , 1, [3,4,6 ,3] ],
# #     [ 50 , 4, [3,4,6 ,3] ],
# #     [ 101, 4, [3,4,23,3] ],
# #     [ 152, 4, [3,8,36,3] ],]:
# #     name = f'net{n}'
# #     setattr(me, name, partial(Net, expansion=e, layers=l, name=name))
# net34  = partial(Net, expansion=1, layers=[3, 4,  6, 3], name='xresnet34')
# net50  = partial(Net, expansion=4, layers=[3, 4,  6, 3], name='xresnet50')

In [64]:
# m = net50(c_out=10)

In [65]:
# m, m.c_out

In [66]:
#hide
xresnet34_parameters = {
    'name': 'xresnet34',
    'expansion': 1,
    'layers': [3, 4, 6, 3],
    }
xresnet50_parameters = {
    'name': 'xresnet50',
    'expansion': 4,
    'layers': [3, 4, 6, 3],
    }
xresnet34 = partial(Net, **xresnet34_parameters)

In [67]:
#hide
model34 = Net(**xresnet34_parameters)
model34

xresnet34 constructor
  c_in: 3, c_out: 1000
  expansion: 1, groups: 1, dw: False
  sa: False, se: False
  stem sizes: [3, 32, 32, 64], stide on 0
  body sizes [64, 128, 256, 512]
  layers: [3, 4, 6, 3]

In [68]:
#hide
model50 = Net(**xresnet50_parameters)
model50

xresnet50 constructor
  c_in: 3, c_out: 1000
  expansion: 4, groups: 1, dw: False
  sa: False, se: False
  stem sizes: [3, 32, 32, 64], stide on 0
  body sizes [64, 128, 256, 512]
  layers: [3, 4, 6, 3]

In [69]:
xresnet34 = partial(Net, name='xresnet34', expansion=1, layers=[3, 4, 6, 3])
xresnet50 = partial(Net, name='xresnet34', expansion=4, layers=[3, 4, 6, 3])

In [70]:
#hide
model34 = xresnet34()
model34

xresnet34 constructor
  c_in: 3, c_out: 1000
  expansion: 1, groups: 1, dw: False
  sa: False, se: False
  stem sizes: [3, 32, 32, 64], stide on 0
  body sizes [64, 128, 256, 512]
  layers: [3, 4, 6, 3]

In [71]:
#hide
model50 = xresnet50()
model50

xresnet34 constructor
  c_in: 3, c_out: 1000
  expansion: 4, groups: 1, dw: False
  sa: False, se: False
  stem sizes: [3, 32, 32, 64], stide on 0
  body sizes [64, 128, 256, 512]
  layers: [3, 4, 6, 3]

In [72]:
class XResNet34(Net):
    def __init__(self):
        super().__init__()
        self.layers = [3,4,6,3]

In [73]:
model = XResNet34()
model

Net constructor
  c_in: 3, c_out: 1000
  expansion: 1, groups: 1, dw: False
  sa: False, se: False
  stem sizes: [3, 32, 32, 64], stide on 0
  body sizes [64, 128, 256, 512]
  layers: [3, 4, 6, 3]

In [74]:
from dataclasses import field

In [75]:
@dataclass
class ConfXresnet34:
    name: str = 'xresnet34'
    layers: list = field(default_factory=lambda : [3,4,6,3])

In [76]:
asdict(ConfXresnet34())

{'name': 'xresnet34', 'layers': [3, 4, 6, 3]}

In [77]:
@dataclass
class Xres50(ConfXresnet34):
    name = 'xresnet50'
    expansion: int = 4

In [78]:
Xres50()

Xres50(name='xresnet34', layers=[3, 4, 6, 3], expansion=4)

In [79]:
asdict(Xres50())

{'name': 'xresnet34', 'layers': [3, 4, 6, 3], 'expansion': 4}

In [80]:
model = Net(asdict(Xres50()))
model

{'name': 'xresnet34', 'layers': [3, 4, 6, 3], 'expansion': 4} constructor
  c_in: 3, c_out: 1000
  expansion: 1, groups: 1, dw: False
  sa: False, se: False
  stem sizes: [3, 32, 32, 64], stide on 0
  body sizes [64, 128, 256, 512]
  layers: [2, 2, 2, 2]

# end
model_constructor
by ayasyrev