diff --git a/Nbs/00_ModelConstructor.ipynb b/Nbs/01_ModelConstructor.ipynb similarity index 97% rename from Nbs/00_ModelConstructor.ipynb rename to Nbs/01_ModelConstructor.ipynb index ee1665f..b00359d 100644 --- a/Nbs/00_ModelConstructor.ipynb +++ b/Nbs/01_ModelConstructor.ipynb @@ -84,13 +84,21 @@ "name": "stdout", "output_type": "stream", "text": [ - "ModelCfg\n", - " in_chans: 3, num_classes: 1000\n", - " expansion: 1, groups: 1, dw: False, div_groups: None\n", - " act_fn: ReLU, sa: False, se: False\n", - " stem sizes: [64], stride on 0\n", - " body sizes [64, 128, 256, 512]\n", - " layers: [2, 2, 2, 2]\n" + "ModelCfg(\n", + " in_chans=3\n", + " num_classes=1000\n", + " block='BasicBlock'\n", + " conv_layer='ConvBnAct'\n", + " block_sizes=[64, 128, 256, 512]\n", + " layers=[2, 2, 2, 2]\n", + " norm='BatchNorm2d'\n", + " act_fn='ReLU'\n", + " expansion=1\n", + " groups=1\n", + " bn_1st=True\n", + " zero_bn=True\n", + " stem_sizes=[64]\n", + " stem_pool=\"MaxPool2d {'kernel_size': 3, 'stride': 2, 'padding': 1}\")\n" ] } ], diff --git a/Nbs/001_Blocks.ipynb b/Nbs/02_1_Blocks.ipynb similarity index 99% rename from Nbs/001_Blocks.ipynb rename to Nbs/02_1_Blocks.ipynb index 07c5997..9ec8e25 100644 --- a/Nbs/001_Blocks.ipynb +++ b/Nbs/02_1_Blocks.ipynb @@ -27,7 +27,6 @@ "outputs": [], "source": [ "#hide\n", - "import torch\n", "import torch.nn as nn\n", "from functools import partial" ] diff --git a/Nbs/01_layers.ipynb b/Nbs/02_2_layers.ipynb similarity index 100% rename from Nbs/01_layers.ipynb rename to Nbs/02_2_layers.ipynb diff --git a/Nbs/02_XResNet.ipynb b/Nbs/02_XResNet.ipynb index b8ce612..72107de 100644 --- a/Nbs/02_XResNet.ipynb +++ b/Nbs/02_XResNet.ipynb @@ -17,7 +17,6 @@ "source": [ "#hide\n", "import torch\n", - "from typing import List\n", "\n", "from functools import partial\n", "\n", @@ -98,6 +97,14 @@ "execution_count": null, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/aya/mambaforge/envs/mc/lib/python3.10/site-packages/torch/nn/modules/conv.py:137: UserWarning: Failed to initialize NumPy: No module named 'numpy' (Triggered internally at ../torch/csrc/utils/tensor_numpy.cpp:84.)\n", + " self.weight = Parameter(torch.empty(\n" + ] + }, { "data": { "text/plain": [ diff --git a/Nbs/03_MXResNet.ipynb b/Nbs/03_MXResNet.ipynb index 7993219..b2bd56b 100644 --- a/Nbs/03_MXResNet.ipynb +++ b/Nbs/03_MXResNet.ipynb @@ -23,7 +23,6 @@ "outputs": [], "source": [ "#hide\n", - "from typing import List\n", "from functools import partial\n", "\n", "import torch\n", @@ -106,6 +105,14 @@ "execution_count": null, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/aya/mambaforge/envs/mc/lib/python3.10/site-packages/torch/nn/modules/conv.py:137: UserWarning: Failed to initialize NumPy: No module named 'numpy' (Triggered internally at ../torch/csrc/utils/tensor_numpy.cpp:84.)\n", + " self.weight = Parameter(torch.empty(\n" + ] + }, { "data": { "text/plain": [ @@ -664,6 +671,28 @@ "model = MxResNet34.create_model()" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "torch.Size([16, 1000])\n" + ] + } + ], + "source": [ + "#hide\n", + "bs_test = 16\n", + "xb = torch.randn(bs_test, 3, 128, 128)\n", + "y = model(xb)\n", + "print(y.shape)\n", + "assert y.shape == torch.Size([bs_test, 1000]), f\"size\"" + ] + }, { "cell_type": "markdown", "metadata": {}, diff --git a/Nbs/04_YaResNet.ipynb b/Nbs/04_YaResNet.ipynb index d5f1e4c..23c2945 100644 --- a/Nbs/04_YaResNet.ipynb +++ b/Nbs/04_YaResNet.ipynb @@ -443,39 +443,39 @@ { "data": { "text/plain": [ - "YaResBlock(\n", + "YaBottleneckBlock(\n", " (reduce): AvgPool2d(kernel_size=2, stride=2, padding=0)\n", " (convs): Sequential(\n", " (conv_0): ConvBnAct(\n", - " (conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (conv): Conv2d(64, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", " (act_fn): LeakyReLU(negative_slope=0.01, inplace=True)\n", " )\n", " (conv_1): ConvBnAct(\n", - " (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128, bias=False)\n", - " (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)\n", + " (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", " (act_fn): LeakyReLU(negative_slope=0.01, inplace=True)\n", " )\n", " (conv_2): ConvBnAct(\n", - " (conv): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (conv): Conv2d(32, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", " )\n", " (se): SEModule(\n", " (squeeze): AdaptiveAvgPool2d(output_size=1)\n", " (excitation): Sequential(\n", - " (reduce): Linear(in_features=512, out_features=32, bias=True)\n", + " (reduce): Linear(in_features=128, out_features=8, bias=True)\n", " (se_act): ReLU(inplace=True)\n", - " (expand): Linear(in_features=32, out_features=512, bias=True)\n", + " (expand): Linear(in_features=8, out_features=128, bias=True)\n", " (se_gate): Sigmoid()\n", " )\n", " )\n", " (sa): SimpleSelfAttention(\n", - " (conv): Conv1d(512, 512, kernel_size=(1,), stride=(1,), bias=False)\n", + " (conv): Conv1d(128, 128, kernel_size=(1,), stride=(1,), bias=False)\n", " )\n", " )\n", " (id_conv): ConvBnAct(\n", - " (conv): Conv2d(256, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (conv): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", " )\n", " (merge): LeakyReLU(negative_slope=0.01, inplace=True)\n", ")" @@ -488,8 +488,10 @@ ], "source": [ "#collapse_output\n", - "bl = YaResBlock(\n", - " 4, 64, 128,\n", + "bl = YaBottleneckBlock(\n", + " 64,\n", + " 128,\n", + " expansion=4,\n", " stride=2,\n", " pool=pool,\n", " act_fn=nn.LeakyReLU,\n", diff --git a/Nbs/06_ConvMixer.ipynb b/Nbs/06_ConvMixer.ipynb index 34916fe..b1c0e3d 100644 --- a/Nbs/06_ConvMixer.ipynb +++ b/Nbs/06_ConvMixer.ipynb @@ -653,6 +653,14 @@ "#collapse_output\n", "convmixer_1024_20[1]" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dacb4659", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/Nbs/README.ipynb b/Nbs/README.ipynb index 3c2148c..b5c55e9 100644 --- a/Nbs/README.ipynb +++ b/Nbs/README.ipynb @@ -18,7 +18,6 @@ "outputs": [], "source": [ "#hide\n", - "import torch\n", "from torch import nn" ] }, @@ -143,13 +142,26 @@ "name": "stdout", "output_type": "stream", "text": [ - "ModelConstructor\n", - " in_chans: 3, num_classes: 1000\n", - " expansion: 1, groups: 1, dw: False, div_groups: None\n", - " act_fn: ReLU, sa: False, se: False\n", - " stem sizes: [64], stride on 0\n", - " body sizes [64, 128, 256, 512]\n", - " layers: [2, 2, 2, 2]\n" + "ModelConstructor(\n", + " in_chans=3\n", + " num_classes=1000\n", + " block='BasicBlock'\n", + " conv_layer='ConvBnAct'\n", + " block_sizes=[64, 128, 256, 512]\n", + " layers=[2, 2, 2, 2]\n", + " norm='BatchNorm2d'\n", + " act_fn='ReLU'\n", + " expansion=1\n", + " groups=1\n", + " bn_1st=True\n", + " zero_bn=True\n", + " stem_sizes=[64]\n", + " stem_pool=\"MaxPool2d {'kernel_size': 3, 'stride': 2, 'padding': 1}\"\n", + " init_cnn='init_cnn'\n", + " make_stem='make_stem'\n", + " make_layer='make_layer'\n", + " make_body='make_body'\n", + " make_head='make_head')\n" ] } ], @@ -404,6 +416,32 @@ "mc.print_changed_fields()" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can compare changed with defaults." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Changed fields:\n", + "layers: [3, 4, 6, 3] | [2, 2, 2, 2]\n", + "expansion: 4 | 1\n" + ] + } + ], + "source": [ + "mc.print_changed_fields(show_default=True)" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -715,7 +753,21 @@ "name": "stdout", "output_type": "stream", "text": [ - "in_chans=3 num_classes=10 block='BasicBlock' conv_layer='ConvBnAct' block_sizes=[64, 128, 256, 512] layers=[2, 2, 2, 2] norm='BatchNorm2d' act_fn='Mish' expansion=1 groups=1 bn_1st=True zero_bn=True stem_sizes=[64] stem_pool=\"MaxPool2d {'kernel_size': 3, 'stride': 2, 'padding': 1}\"\n" + "ModelCfg(\n", + " in_chans=3\n", + " num_classes=10\n", + " block='BasicBlock'\n", + " conv_layer='ConvBnAct'\n", + " block_sizes=[64, 128, 256, 512]\n", + " layers=[2, 2, 2, 2]\n", + " norm='BatchNorm2d'\n", + " act_fn='Mish'\n", + " expansion=1\n", + " groups=1\n", + " bn_1st=True\n", + " zero_bn=True\n", + " stem_sizes=[64]\n", + " stem_pool=\"MaxPool2d {'kernel_size': 3, 'stride': 2, 'padding': 1}\")\n" ] } ], @@ -767,21 +819,13 @@ "execution_count": null, "metadata": {}, "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Deprecated. Pass se_module as se argument, se_reduction as arg to se.\n", - "Deprecated. Pass se_module as se argument, se_reduction as arg to se.\n" - ] - }, { "data": { "text/plain": [ "ModelConstructor\n", " in_chans: 3, num_classes: 10\n", " expansion: 1, groups: 1, dw: False, div_groups: None\n", - " act_fn: ReLU, sa: , se: SEModule\n", + " act_fn: SELU, sa: , se: SEModule\n", " stem sizes: [64], stride on 0\n", " body sizes [64, 128, 256, 512]\n", " layers: [2, 2, 2, 2]" @@ -1383,13 +1427,27 @@ "name": "stdout", "output_type": "stream", "text": [ - "YaResNet\n", - " in_chans: 3, num_classes: 1000\n", - " expansion: 1, groups: 1, dw: False, div_groups: None\n", - " act_fn: ReLU, sa: False, se: False\n", - " stem sizes: [64], stride on 0\n", - " body sizes [64, 128, 256, 512]\n", - " layers: [2, 2, 2, 2]\n" + "ModelConstructor(\n", + " name='YaResNet'\n", + " in_chans=3\n", + " num_classes=1000\n", + " block='YaBasicBlock'\n", + " conv_layer='ConvBnAct'\n", + " block_sizes=[64, 128, 256, 512]\n", + " layers=[2, 2, 2, 2]\n", + " norm='BatchNorm2d'\n", + " act_fn='ReLU'\n", + " expansion=1\n", + " groups=1\n", + " bn_1st=True\n", + " zero_bn=True\n", + " stem_sizes=[64]\n", + " stem_pool=\"MaxPool2d {'kernel_size': 3, 'stride': 2, 'padding': 1}\"\n", + " init_cnn='init_cnn'\n", + " make_stem='make_stem'\n", + " make_layer='make_layer'\n", + " make_body='make_body'\n", + " make_head='make_head')\n" ] } ], @@ -1482,13 +1540,26 @@ "name": "stdout", "output_type": "stream", "text": [ - "YaResnet34\n", - " in_chans: 3, num_classes: 1000\n", - " expansion: 1, groups: 1, dw: False, div_groups: None\n", - " act_fn: ReLU, sa: False, se: False\n", - " stem sizes: [64], stride on 0\n", - " body sizes [64, 128, 256, 512]\n", - " layers: [3, 4, 6, 3]\n" + "YaResnet34(\n", + " in_chans=3\n", + " num_classes=1000\n", + " block='YaBasicBlock'\n", + " conv_layer='ConvBnAct'\n", + " block_sizes=[64, 128, 256, 512]\n", + " layers=[3, 4, 6, 3]\n", + " norm='BatchNorm2d'\n", + " act_fn='ReLU'\n", + " expansion=1\n", + " groups=1\n", + " bn_1st=True\n", + " zero_bn=True\n", + " stem_sizes=[64]\n", + " stem_pool=\"MaxPool2d {'kernel_size': 3, 'stride': 2, 'padding': 1}\"\n", + " init_cnn='init_cnn'\n", + " make_stem='xresnet_stem'\n", + " make_layer='make_layer'\n", + " make_body='make_body'\n", + " make_head='make_head')\n" ] } ], @@ -1541,6 +1612,13 @@ "mc = YaResnet50()\n", "mc" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/README.md b/README.md index c5e1425..922e2dd 100644 --- a/README.md +++ b/README.md @@ -51,13 +51,26 @@ Check all parameters with `print_cfg` method: mc.print_cfg() ```
output -
ModelConstructor
-      in_chans: 3, num_classes: 1000
-      expansion: 1, groups: 1, dw: False, div_groups: None
-      act_fn: ReLU, sa: False, se: False
-      stem sizes: [64], stride on 0
-      body sizes [64, 128, 256, 512]
-      layers: [2, 2, 2, 2]
+    
ModelConstructor(
+      in_chans=3
+      num_classes=1000
+      block='BasicBlock'
+      conv_layer='ConvBnAct'
+      block_sizes=[64, 128, 256, 512]
+      layers=[2, 2, 2, 2]
+      norm='BatchNorm2d'
+      act_fn='ReLU'
+      expansion=1
+      groups=1
+      bn_1st=True
+      zero_bn=True
+      stem_sizes=[64]
+      stem_pool="MaxPool2d {'kernel_size': 3, 'stride': 2, 'padding': 1}"
+      init_cnn='init_cnn'
+      make_stem='make_stem'
+      make_layer='make_layer'
+      make_body='make_body'
+      make_head='make_head')
     
@@ -70,7 +83,7 @@ model = mc() model ```
output - ModelConstructor( +
ModelConstructor(
       (stem): Sequential(
         (conv_1): ConvBnAct(
           (conv): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
@@ -261,6 +274,19 @@ mc.print_changed_fields()
     
+We can compare changed with defaults. + + +```python +mc.print_changed_fields(show_default=True) +``` +
output +
Changed fields:
+    layers: [3, 4, 6, 3] | [2, 2, 2, 2]
+    expansion: 4 | 1
+    
+
+ Now we can look at model parts - stem, body, head. @@ -269,7 +295,7 @@ Now we can look at model parts - stem, body, head. mc.body ```
output - Sequential( +
Sequential(
       (l_0): Sequential(
         (bl_0): BasicBlock(
           (convs): Sequential(
@@ -543,7 +569,21 @@ cfg = ModelCfg(
 print(cfg)
 ```
 
output -
in_chans=3 num_classes=10 block='BasicBlock' conv_layer='ConvBnAct' block_sizes=[64, 128, 256, 512] layers=[2, 2, 2, 2] norm='BatchNorm2d' act_fn='Mish' expansion=1 groups=1 bn_1st=True zero_bn=True stem_sizes=[64] stem_pool="MaxPool2d {'kernel_size': 3, 'stride': 2, 'padding': 1}"
+    
ModelCfg(
+      in_chans=3
+      num_classes=10
+      block='BasicBlock'
+      conv_layer='ConvBnAct'
+      block_sizes=[64, 128, 256, 512]
+      layers=[2, 2, 2, 2]
+      norm='BatchNorm2d'
+      act_fn='Mish'
+      expansion=1
+      groups=1
+      bn_1st=True
+      zero_bn=True
+      stem_sizes=[64]
+      stem_pool="MaxPool2d {'kernel_size': 3, 'stride': 2, 'padding': 1}")
     
@@ -558,8 +598,7 @@ cfg = ModelCfg( print(cfg.act_fn) ```
output -
class 'torch.nn.modules.activation.SELU'
-    
+
class 'torch.nn.modules.activation.SELU'
Now we can create constructor from config: @@ -569,16 +608,11 @@ Now we can create constructor from config: mc = ModelConstructor.from_cfg(cfg) mc ``` -
output -
Deprecated. Pass se_module as se argument, se_reduction as arg to se.
-    Deprecated. Pass se_module as se argument, se_reduction as arg to se.
-    
-
output
ModelConstructor
       in_chans: 3, num_classes: 10
       expansion: 1, groups: 1, dw: False, div_groups: None
-      act_fn: ReLU, sa: , se: SEModule
+      act_fn: SELU, sa: , se: SEModule
       stem sizes: [64], stride on 0
       body sizes [64, 128, 256, 512]
       layers: [2, 2, 2, 2]
@@ -665,7 +699,7 @@ Here is model: mc() ```
output -
MxResNet( +
MxResNet(
       act_fn: Mish, stem_sizes: [3, 32, 64, 64], make_stem: xresnet_stem
       (stem): Sequential(
         (conv_0): ConvBnAct(
@@ -892,7 +926,7 @@ mc
 mc.stem.conv_1
 ```
 
output -
ConvBnAct( +
ConvBnAct(
       (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
       (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
       (act_fn): Mish(inplace=True)
@@ -907,7 +941,7 @@ mc.stem.conv_1
 mc.body.l_0.bl_0
 ```
 
output -
BasicBlock( +
BasicBlock(
       (convs): Sequential(
         (conv_0): ConvBnAct(
           (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
@@ -984,13 +1018,27 @@ That all. Now we have YaResNet constructor
 mc.print_cfg()
 ```
 
output -
YaResNet - in_chans: 3, num_classes: 1000 - expansion: 1, groups: 1, dw: False, div_groups: None - act_fn: ReLU, sa: False, se: False - stem sizes: [64], stride on 0 - body sizes [64, 128, 256, 512] - layers: [2, 2, 2, 2] +
ModelConstructor(
+      name='YaResNet'
+      in_chans=3
+      num_classes=1000
+      block='YaBasicBlock'
+      conv_layer='ConvBnAct'
+      block_sizes=[64, 128, 256, 512]
+      layers=[2, 2, 2, 2]
+      norm='BatchNorm2d'
+      act_fn='ReLU'
+      expansion=1
+      groups=1
+      bn_1st=True
+      zero_bn=True
+      stem_sizes=[64]
+      stem_pool="MaxPool2d {'kernel_size': 3, 'stride': 2, 'padding': 1}"
+      init_cnn='init_cnn'
+      make_stem='make_stem'
+      make_layer='make_layer'
+      make_body='make_body'
+      make_head='make_head')
     
@@ -1002,7 +1050,7 @@ Let see what we have. mc.body.l_1.bl_0 ```
output - YaBasicBlock( +
YaBasicBlock(
       (reduce): ConvBnAct(
         (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(2, 2), bias=False)
         (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
@@ -1050,13 +1098,26 @@ mc = YaResnet34()
 mc.print_cfg()
 ```
 
output -
YaResnet34
-      in_chans: 3, num_classes: 1000
-      expansion: 1, groups: 1, dw: False, div_groups: None
-      act_fn: ReLU, sa: False, se: False
-      stem sizes: [64], stride on 0
-      body sizes [64, 128, 256, 512]
-      layers: [3, 4, 6, 3]
+    
YaResnet34(
+      in_chans=3
+      num_classes=1000
+      block='YaBasicBlock'
+      conv_layer='ConvBnAct'
+      block_sizes=[64, 128, 256, 512]
+      layers=[3, 4, 6, 3]
+      norm='BatchNorm2d'
+      act_fn='ReLU'
+      expansion=1
+      groups=1
+      bn_1st=True
+      zero_bn=True
+      stem_sizes=[64]
+      stem_pool="MaxPool2d {'kernel_size': 3, 'stride': 2, 'padding': 1}"
+      init_cnn='init_cnn'
+      make_stem='xresnet_stem'
+      make_layer='make_layer'
+      make_body='make_body'
+      make_head='make_head')
     
diff --git a/docs/00_ModelConstructor.md b/docs/01_ModelConstructor.md similarity index 95% rename from docs/00_ModelConstructor.md rename to docs/01_ModelConstructor.md index 9659779..aef2afc 100644 --- a/docs/00_ModelConstructor.md +++ b/docs/01_ModelConstructor.md @@ -24,13 +24,21 @@ cfg = ModelCfg() cfg.print_cfg() ```
output -
ModelCfg
-      in_chans: 3, num_classes: 1000
-      expansion: 1, groups: 1, dw: False, div_groups: None
-      act_fn: ReLU, sa: False, se: False
-      stem sizes: [64], stride on 0
-      body sizes [64, 128, 256, 512]
-      layers: [2, 2, 2, 2]
+    
ModelCfg(
+      in_chans=3
+      num_classes=1000
+      block='BasicBlock'
+      conv_layer='ConvBnAct'
+      block_sizes=[64, 128, 256, 512]
+      layers=[2, 2, 2, 2]
+      norm='BatchNorm2d'
+      act_fn='ReLU'
+      expansion=1
+      groups=1
+      bn_1st=True
+      zero_bn=True
+      stem_sizes=[64]
+      stem_pool="MaxPool2d {'kernel_size': 3, 'stride': 2, 'padding': 1}")
     
@@ -57,8 +65,7 @@ cfg = ModelCfg(act_fn="torch.nn.Mish") print(cfg.act_fn) ```
output -
class 'torch.nn.modules.activation.Mish'>
-    
+
class 'torch.nn.modules.activation.Mish'
@@ -67,9 +74,7 @@ cfg = ModelCfg(act_fn="nn.SELU") print(cfg.act_fn) ```
output -
-    class 'torch.nn.modules.activation.SELU'
-    
+
class 'torch.nn.modules.activation.SELU'
@@ -82,15 +87,13 @@ print(cfg.act_fn) print(cfg.block) ```
output -
-    class 'torch.nn.modules.activation.Mish'
-    class 'model_constructor.yaresnet.YaBasicBlock'
-    
+
class 'torch.nn.modules.activation.Mish'>
+    
 
# Stem, Body, Head. -By default constructor create `nn.Sequential` model with `stem`, `body` and `head`. We can check it at constructor stage +By default constructor create `nn.Sequential` model with `stem`, `body` and `head`. We can check it at constructor stage. ```python @@ -132,7 +135,7 @@ stem = make_stem(cfg) stem ```
output -
Sequential( +
Sequential(
       (conv_1): ConvBnAct(
         (conv): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
         (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
@@ -156,7 +159,7 @@ layer = make_layer(cfg, layer_num=0)
 layer
 ```
 
output -
Sequential( +
Sequential(
       (bl_0): YaBasicBlock(
         (convs): Sequential(
           (conv_0): ConvBnAct(
@@ -199,7 +202,7 @@ body = make_body(cfg)
 body
 ```
 
output -
Sequential( +
Sequential(
       (l_0): Sequential(
         (bl_0): YaBasicBlock(
           (convs): Sequential(
@@ -361,7 +364,7 @@ head = make_head(cfg)
 head
 ```
 
output -
Sequential( +
Sequential(
       (pool): AdaptiveAvgPool2d(output_size=1)
       (flat): Flatten(start_dim=1, end_dim=-1)
       (fc): Linear(in_features=512, out_features=1000, bias=True)
@@ -395,7 +398,7 @@ mc
 mc.stem
 ```
 
output -
Sequential( +
Sequential(
       (conv_1): ConvBnAct(
         (conv): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
         (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
@@ -429,7 +432,7 @@ mc.se = SEModule
 mc.body.l_0
 ```
 
output -
Sequential( +
Sequential(
       (bl_0): BasicBlock(
         (convs): Sequential(
           (conv_0): ConvBnAct(
diff --git a/docs/001_Blocks.md b/docs/02_1_Blocks.md
similarity index 98%
rename from docs/001_Blocks.md
rename to docs/02_1_Blocks.md
index e57f317..3a3b283 100644
--- a/docs/001_Blocks.md
+++ b/docs/02_1_Blocks.md
@@ -95,7 +95,7 @@ block = BottleneckBlock(64, 64, expansion=2, act_fn=nn.LeakyReLU, bn_1st=False)
 block
 ```
 
output -
BottleneckBlock( +
BottleneckBlock(
       (convs): Sequential(
         (conv_0): ConvBnAct(
           (conv): Conv2d(64, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
@@ -125,7 +125,7 @@ lock = BottleneckBlock(32, 64, expansion=2, dw=True)
 block
 ```
 
output -
BottleneckBlock( +
BottleneckBlock(
       (convs): Sequential(
         (conv_0): ConvBnAct(
           (conv): Conv2d(64, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
@@ -160,7 +160,7 @@ block = BottleneckBlock(32, 64, stride=2, dw=True, pool=pool)
 block
 ```
 
output -
BottleneckBlock( +
BottleneckBlock(
       (convs): Sequential(
         (conv_0): ConvBnAct(
           (conv): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
@@ -202,7 +202,7 @@ block = BottleneckBlock(32, 64, stride=2, dw=True, pool=pool, se=SEModule)
 block
 ```
 
output -
BottleneckBlock( +
BottleneckBlock(
       (convs): Sequential(
         (conv_0): ConvBnAct(
           (conv): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
@@ -248,7 +248,7 @@ block = BottleneckBlock(32, 64, stride=2, dw=True, pool=pool, se=SEModule, sa=Si
 block
 ```
 
output -
BottleneckBlock( +
BottleneckBlock(
       (convs): Sequential(
         (conv_0): ConvBnAct(
           (conv): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
diff --git a/docs/01_layers.md b/docs/02_2_layers.md
similarity index 96%
rename from docs/01_layers.md
rename to docs/02_2_layers.md
index 3675f8e..f2d8002 100644
--- a/docs/01_layers.md
+++ b/docs/02_2_layers.md
@@ -11,7 +11,7 @@ conv_layer = ConvBnAct(32, 32)
 conv_layer
 ```
 
output -
ConvBnAct( +
ConvBnAct(
       (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
       (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
       (act_fn): ReLU(inplace=True)
@@ -27,7 +27,7 @@ conv_layer = ConvBnAct(32, 32, kernel_size=1)
 conv_layer
 ```
 
output -
ConvBnAct( +
ConvBnAct(
       (conv): Conv2d(32, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
       (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
       (act_fn): ReLU(inplace=True)
@@ -43,7 +43,7 @@ conv_layer = ConvBnAct(32, 32, stride=2)
 conv_layer
 ```
 
output -
ConvBnAct( +
ConvBnAct(
       (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
       (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
       (act_fn): ReLU(inplace=True)
@@ -59,7 +59,7 @@ conv_layer = ConvBnAct(32, 32, groups=32)
 conv_layer
 ```
 
output -
ConvBnAct( +
ConvBnAct(
       (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
       (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
       (act_fn): ReLU(inplace=True)
@@ -75,7 +75,7 @@ conv_layer = ConvBnAct(32, 64, act_fn=False)
 conv_layer
 ```
 
output -
ConvBnAct( +
ConvBnAct(
       (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
       (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
     )
@@ -90,7 +90,7 @@ conv_layer = ConvBnAct(32, 64, bn_layer=False) conv_layer ```
output - ConvBnAct( +
ConvBnAct(
       (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
       (act_fn): ReLU(inplace=True)
     )
@@ -105,7 +105,7 @@ conv_layer = ConvBnAct(32, 64, pre_act=True) conv_layer ```
output - ConvBnAct( +
ConvBnAct(
       (act_fn): ReLU(inplace=True)
       (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
       (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
@@ -131,7 +131,7 @@ conv_layer = ConvBnAct(32, 64, bn_1st=True, pre_act=True)
 conv_layer
 ```
 
output -
ConvBnAct( +
ConvBnAct(
       (act_fn): ReLU(inplace=True)
       (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
       (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
@@ -147,7 +147,7 @@ conv_layer = ConvBnAct(32, 64, bn_1st=True)
 conv_layer
 ```
 
output -
ConvBnAct( +
ConvBnAct(
       (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
       (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
       (act_fn): ReLU(inplace=True)
@@ -163,7 +163,7 @@ conv_layer = ConvBnAct(32, 64, bn_1st=True, act_fn=nn.LeakyReLU)
 conv_layer
 ```
 
output -
ConvBnAct( +
ConvBnAct(
       (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
       (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
       (act_fn): LeakyReLU(negative_slope=0.01, inplace=True)
diff --git a/docs/02_XResNet.md b/docs/02_XResNet.md
index e5573ce..a9bd13d 100644
--- a/docs/02_XResNet.md
+++ b/docs/02_XResNet.md
@@ -48,8 +48,11 @@ xresnet.print_changed_fields()
 
 xresnet.stem
 ```
+
+    /home/aya/mambaforge/envs/mc/lib/python3.10/site-packages/torch/nn/modules/conv.py:137: UserWarning: Failed to initialize NumPy: No module named 'numpy' (Triggered internally at ../torch/csrc/utils/tensor_numpy.cpp:84.)
+      self.weight = Parameter(torch.empty(
 
output -
Sequential( +
Sequential(
       (conv_0): ConvBnAct(
         (conv): Conv2d(3, 3, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
         (bn): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
@@ -82,7 +85,7 @@ xresnet.stem
 xresnet.body
 ```
 
output -
Sequential( +
Sequential(
       (l_0): Sequential(
         (bl_0): BasicBlock(
           (convs): Sequential(
@@ -232,7 +235,7 @@ xresnet.body
 xresnet.head
 ```
 
output -
Sequential( +
Sequential(
       (pool): AdaptiveAvgPool2d(output_size=1)
       (flat): Flatten(start_dim=1, end_dim=-1)
       (fc): Linear(in_features=512, out_features=1000, bias=True)
diff --git a/docs/03_MXResNet.md b/docs/03_MXResNet.md
index 5ad5434..04c2f78 100644
--- a/docs/03_MXResNet.md
+++ b/docs/03_MXResNet.md
@@ -50,8 +50,11 @@ mxresnet.print_changed_fields()
 
 mxresnet.stem
 ```
+
+    /home/aya/mambaforge/envs/mc/lib/python3.10/site-packages/torch/nn/modules/conv.py:137: UserWarning: Failed to initialize NumPy: No module named 'numpy' (Triggered internally at ../torch/csrc/utils/tensor_numpy.cpp:84.)
+      self.weight = Parameter(torch.empty(
 
output -
Sequential( +
Sequential(
       (conv_0): ConvBnAct(
         (conv): Conv2d(3, 3, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
         (bn): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
@@ -84,7 +87,7 @@ mxresnet.stem
 mxresnet.body
 ```
 
output -
Sequential( +
Sequential(
       (l_0): Sequential(
         (bl_0): BasicBlock(
           (convs): Sequential(
@@ -234,7 +237,7 @@ mxresnet.body
 mxresnet.body
 ```
 
output -
Sequential( +
Sequential(
       (l_0): Sequential(
         (bl_0): BasicBlock(
           (convs): Sequential(
@@ -384,7 +387,7 @@ mxresnet.body
 mxresnet.head
 ```
 
output -
Sequential( +
Sequential(
       (pool): AdaptiveAvgPool2d(output_size=1)
       (flat): Flatten(start_dim=1, end_dim=-1)
       (fc): Linear(in_features=512, out_features=1000, bias=True)
diff --git a/docs/04_YaResNet.md b/docs/04_YaResNet.md
index 2a1b4d3..766cfcb 100644
--- a/docs/04_YaResNet.md
+++ b/docs/04_YaResNet.md
@@ -16,7 +16,7 @@ bl = YaBasicBlock(64, 64)
 bl
 ```
 
output -
YaBasicBlock( +
YaBasicBlock(
       (convs): Sequential(
         (conv_0): ConvBnAct(
           (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
@@ -46,7 +46,7 @@ bl = YaBottleneckBlock(64, 128, stride=2, pool=pool, act_fn=nn.LeakyReLU, bn_1st
 bl
 ```
 
output -
YaBottleneckBlock( +
YaBottleneckBlock(
       (reduce): AvgPool2d(kernel_size=2, stride=2, padding=0)
       (convs): Sequential(
         (conv_0): ConvBnAct(
@@ -90,7 +90,7 @@ bl = YaBottleneckBlock(
 bl
 ```
 
output -
YaBottleneckBlock( +
YaBottleneckBlock(
       (reduce): AvgPool2d(kernel_size=2, stride=2, padding=0)
       (convs): Sequential(
         (conv_0): ConvBnAct(
@@ -133,7 +133,7 @@ bl = YaBottleneckBlock(
 bl
 ```
 
output -
YaBottleneckBlock( +
YaBottleneckBlock(
       (reduce): AvgPool2d(kernel_size=2, stride=2, padding=0)
       (convs): Sequential(
         (conv_0): ConvBnAct(
@@ -176,7 +176,7 @@ bl = YaBasicBlock(
 bl
 ```
 
output -
YaBasicBlock( +
YaBasicBlock(
       (reduce): AvgPool2d(kernel_size=2, stride=2, padding=0)
       (convs): Sequential(
         (conv_0): ConvBnAct(
@@ -221,7 +221,7 @@ bl = YaBottleneckBlock(
 bl
 ```
 
output -
YaBottleneckBlock( +
YaBottleneckBlock(
       (reduce): AvgPool2d(kernel_size=2, stride=2, padding=0)
       (convs): Sequential(
         (conv_0): ConvBnAct(
@@ -273,7 +273,7 @@ bl = YaBottleneckBlock(
 bl
 ```
 
output -
YaBottleneckBlock( +
YaBottleneckBlock(
       (reduce): AvgPool2d(kernel_size=2, stride=2, padding=0)
       (convs): Sequential(
         (conv_0): ConvBnAct(
@@ -307,8 +307,10 @@ bl
 
 ```python
 
-bl = YaResBlock(
-    4, 64, 128,
+bl = YaBottleneckBlock(
+    64,
+    128,
+    expansion=4,
     stride=2,
     pool=pool,
     act_fn=nn.LeakyReLU,
@@ -318,39 +320,39 @@ bl = YaResBlock(
 bl
 ```
 
output -
YaResBlock( +
YaBottleneckBlock(
       (reduce): AvgPool2d(kernel_size=2, stride=2, padding=0)
       (convs): Sequential(
         (conv_0): ConvBnAct(
-          (conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
-          (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+          (conv): Conv2d(64, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
+          (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
           (act_fn): LeakyReLU(negative_slope=0.01, inplace=True)
         )
         (conv_1): ConvBnAct(
-          (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128, bias=False)
-          (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+          (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
+          (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
           (act_fn): LeakyReLU(negative_slope=0.01, inplace=True)
         )
         (conv_2): ConvBnAct(
-          (conv): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
-          (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+          (conv): Conv2d(32, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
+          (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
         )
         (se): SEModule(
           (squeeze): AdaptiveAvgPool2d(output_size=1)
           (excitation): Sequential(
-            (reduce): Linear(in_features=512, out_features=32, bias=True)
+            (reduce): Linear(in_features=128, out_features=8, bias=True)
             (se_act): ReLU(inplace=True)
-            (expand): Linear(in_features=32, out_features=512, bias=True)
+            (expand): Linear(in_features=8, out_features=128, bias=True)
             (se_gate): Sigmoid()
           )
         )
         (sa): SimpleSelfAttention(
-          (conv): Conv1d(512, 512, kernel_size=(1,), stride=(1,), bias=False)
+          (conv): Conv1d(128, 128, kernel_size=(1,), stride=(1,), bias=False)
         )
       )
       (id_conv): ConvBnAct(
-        (conv): Conv2d(256, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
-        (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+        (conv): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
+        (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
       )
       (merge): LeakyReLU(negative_slope=0.01, inplace=True)
     )
@@ -409,7 +411,7 @@ yaresnet.print_changed_fields() yaresnet.stem ```
output - Sequential( +
Sequential(
       (conv_0): ConvBnAct(
         (conv): Conv2d(3, 3, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
         (bn): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
@@ -442,7 +444,7 @@ yaresnet.stem
 yaresnet.body
 ```
 
output -
Sequential( +
Sequential(
       (l_0): Sequential(
         (bl_0): YaBasicBlock(
           (convs): Sequential(
@@ -601,7 +603,7 @@ yaresnet.body
 yaresnet.head
 ```
 
output -
Sequential( +
Sequential(
       (pool): AdaptiveAvgPool2d(output_size=1)
       (flat): Flatten(start_dim=1, end_dim=-1)
       (fc): Linear(in_features=512, out_features=1000, bias=True)
@@ -619,7 +621,7 @@ yaresnet.act_fn = torch.nn.Mish
 yaresnet()
 ```
 
output -
YaResNet( +
YaResNet(
       block: YaBasicBlock, act_fn: Mish, stem_sizes: [3, 32, 64, 64], make_stem: xresnet_stem
       (stem): Sequential(
         (conv_0): ConvBnAct(
diff --git a/docs/05_Twist.md b/docs/05_Twist.md
index 025a313..026193b 100644
--- a/docs/05_Twist.md
+++ b/docs/05_Twist.md
@@ -309,7 +309,7 @@ bl = NewResBlockTwist(4, 64, 64, sa=True)
 bl
 ```
 
output -
NewResBlockTwist( +
NewResBlockTwist(
       (convs): Sequential(
         (conv_0): ConvLayer(
           (conv): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
@@ -345,7 +345,7 @@ bl = NewResBlockTwist(4, 64, 64, stride=2)
 bl
 ```
 
output -
NewResBlockTwist( +
NewResBlockTwist(
       (reduce): AvgPool2d(kernel_size=2, stride=2, padding=0)
       (convs): Sequential(
         (conv_0): ConvLayer(
@@ -379,7 +379,7 @@ bl = NewResBlockTwist(4, 64, 128, stride=2)
 bl
 ```
 
output -
NewResBlockTwist( +
NewResBlockTwist(
       (reduce): AvgPool2d(kernel_size=2, stride=2, padding=0)
       (convs): Sequential(
         (conv_0): ConvLayer(
@@ -424,7 +424,7 @@ bl = NewResBlockTwist(
 bl
 ```
 
output -
NewResBlockTwist( +
NewResBlockTwist(
       (reduce): AvgPool2d(kernel_size=2, stride=2, padding=0)
       (convs): Sequential(
         (conv_0): ConvLayer(
@@ -469,7 +469,7 @@ bl = ResBlockTwist(4, 64, 64, sa=True)
 bl
 ```
 
output -
ResBlockTwist( +
ResBlockTwist(
       (convs): Sequential(
         (conv_0): ConvLayer(
           (conv): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
@@ -505,7 +505,7 @@ bl = ResBlockTwist(4, 64, 64, stride=2)
 bl
 ```
 
output -
ResBlockTwist( +
ResBlockTwist(
       (convs): Sequential(
         (conv_0): ConvLayer(
           (conv): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
@@ -539,7 +539,7 @@ bl = ResBlockTwist(4, 64, 128, stride=2)
 bl
 ```
 
output -
ResBlockTwist( +
ResBlockTwist(
       (convs): Sequential(
         (conv_0): ConvLayer(
           (conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
@@ -588,7 +588,7 @@ model.block = NewResBlockTwist
 model.body
 ```
 
output -
Sequential( +
Sequential(
       (l_0): Sequential(
         (bl_0): NewResBlockTwist(
           (convs): Sequential(
@@ -989,7 +989,7 @@ m = model()
 m
 ```
 
output -
Sequential( +
Sequential(
       model Net
       (stem): Sequential(
         (conv_0): ConvLayer(
@@ -1406,7 +1406,7 @@ m
 m.stem
 ```
 
output -
Sequential( +
Sequential(
       (conv_0): ConvLayer(
         (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
         (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
@@ -1434,7 +1434,7 @@ m.stem
 m.head
 ```
 
output -
Sequential( +
Sequential(
       (pool): AdaptiveAvgPool2d(output_size=1)
       (flat): Flatten()
       (fc): Linear(in_features=2048, out_features=1000, bias=True)
@@ -1449,7 +1449,7 @@ m.head
 m.body.l_0
 ```
 
output -
Sequential( +
Sequential(
       (bl_0): ResBlockTwist(
         (convs): Sequential(
           (conv_0): ConvLayer(
@@ -1531,7 +1531,7 @@ m.body.l_0
 m.body.l_1
 ```
 
output -
Sequential( +
Sequential(
       (bl_0): ResBlockTwist(
         (convs): Sequential(
           (conv_0): ConvLayer(
@@ -1636,7 +1636,7 @@ m.body.l_1
 m.body.l_2
 ```
 
output -
Sequential( +
Sequential(
       (bl_0): ResBlockTwist(
         (convs): Sequential(
           (conv_0): ConvLayer(
@@ -1785,7 +1785,7 @@ m.body.l_2
 m.body.l_3
 ```
 
output -
Sequential( +
Sequential(
       (bl_0): ResBlockTwist(
         (convs): Sequential(
           (conv_0): ConvLayer(
diff --git a/docs/06_ConvMixer.md b/docs/06_ConvMixer.md
index f24268d..e0e36d9 100644
--- a/docs/06_ConvMixer.md
+++ b/docs/06_ConvMixer.md
@@ -32,7 +32,7 @@ convmixer_1024_20 = ConvMixer(dim=1024, depth=20)
 convmixer_1024_20
 ```
 
output -
ConvMixer( +
ConvMixer(
       (0): ConvLayer(
         (conv): Conv2d(3, 1024, kernel_size=(7, 7), stride=(7, 7))
         (act_fn): GELU(approximate='none')
@@ -341,7 +341,7 @@ convmixer_1024_20 = ConvMixer(dim=1024, depth=20, act_fn=Mish())
 convmixer_1024_20[0]
 ```
 
output -
ConvLayer( +
ConvLayer(
       (conv): Conv2d(3, 1024, kernel_size=(7, 7), stride=(7, 7))
       (act_fn): Mish()
       (bn): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
@@ -356,7 +356,7 @@ convmixer_1024_20[0]
 convmixer_1024_20[1]
 ```
 
output -
Sequential( +
Sequential(
       (0): Residual(
         (fn): ConvLayer(
           (conv): Conv2d(1024, 1024, kernel_size=(9, 9), stride=(1, 1), padding=same, groups=1024)
@@ -389,7 +389,7 @@ convmixer_1024_20 = ConvMixer(dim=1024, depth=20, act_fn=Mish(), pre_act=True)
 convmixer_1024_20[0]
 ```
 
output -
ConvLayer( +
ConvLayer(
       (conv): Conv2d(3, 1024, kernel_size=(7, 7), stride=(7, 7))
       (act_fn): Mish()
       (bn): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
@@ -404,7 +404,7 @@ convmixer_1024_20[0]
 convmixer_1024_20[1]
 ```
 
output -
Sequential( +
Sequential(
       (0): Residual(
         (fn): ConvLayer(
           (act_fn): Mish()
@@ -435,7 +435,7 @@ convmixer_1024_20 = ConvMixer(dim=1024, depth=20, act_fn=Mish(), bn_1st=True)
 convmixer_1024_20[0]
 ```
 
output -
ConvLayer( +
ConvLayer(
       (conv): Conv2d(3, 1024, kernel_size=(7, 7), stride=(7, 7))
       (bn): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
       (act_fn): Mish()
@@ -450,7 +450,7 @@ convmixer_1024_20[0]
 convmixer_1024_20[1]
 ```
 
output -
Sequential( +
Sequential(
       (0): Residual(
         (fn): ConvLayer(
           (conv): Conv2d(1024, 1024, kernel_size=(9, 9), stride=(1, 1), padding=same, groups=1024)
diff --git a/docs/README.md b/docs/README.md
index 7cdfa4a..922e2dd 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -51,13 +51,26 @@ Check all parameters with `print_cfg` method:
 mc.print_cfg()
 ```
 
output -
ModelConstructor
-      in_chans: 3, num_classes: 1000
-      expansion: 1, groups: 1, dw: False, div_groups: None
-      act_fn: ReLU, sa: False, se: False
-      stem sizes: [64], stride on 0
-      body sizes [64, 128, 256, 512]
-      layers: [2, 2, 2, 2]
+    
ModelConstructor(
+      in_chans=3
+      num_classes=1000
+      block='BasicBlock'
+      conv_layer='ConvBnAct'
+      block_sizes=[64, 128, 256, 512]
+      layers=[2, 2, 2, 2]
+      norm='BatchNorm2d'
+      act_fn='ReLU'
+      expansion=1
+      groups=1
+      bn_1st=True
+      zero_bn=True
+      stem_sizes=[64]
+      stem_pool="MaxPool2d {'kernel_size': 3, 'stride': 2, 'padding': 1}"
+      init_cnn='init_cnn'
+      make_stem='make_stem'
+      make_layer='make_layer'
+      make_body='make_body'
+      make_head='make_head')
     
@@ -70,7 +83,7 @@ model = mc() model ```
output -
ModelConstructor( +
ModelConstructor(
       (stem): Sequential(
         (conv_1): ConvBnAct(
           (conv): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
@@ -261,6 +274,19 @@ mc.print_changed_fields()
     
+We can compare changed with defaults. + + +```python +mc.print_changed_fields(show_default=True) +``` +
output +
Changed fields:
+    layers: [3, 4, 6, 3] | [2, 2, 2, 2]
+    expansion: 4 | 1
+    
+
+ Now we can look at model parts - stem, body, head. @@ -269,7 +295,7 @@ Now we can look at model parts - stem, body, head. mc.body ```
output - Sequential( +
Sequential(
       (l_0): Sequential(
         (bl_0): BasicBlock(
           (convs): Sequential(
@@ -543,7 +569,21 @@ cfg = ModelCfg(
 print(cfg)
 ```
 
output -
in_chans=3 num_classes=10 block='BasicBlock' conv_layer='ConvBnAct' block_sizes=[64, 128, 256, 512] layers=[2, 2, 2, 2] norm='BatchNorm2d' act_fn='Mish' expansion=1 groups=1 bn_1st=True zero_bn=True stem_sizes=[64] stem_pool="MaxPool2d {'kernel_size': 3, 'stride': 2, 'padding': 1}"
+    
ModelCfg(
+      in_chans=3
+      num_classes=10
+      block='BasicBlock'
+      conv_layer='ConvBnAct'
+      block_sizes=[64, 128, 256, 512]
+      layers=[2, 2, 2, 2]
+      norm='BatchNorm2d'
+      act_fn='Mish'
+      expansion=1
+      groups=1
+      bn_1st=True
+      zero_bn=True
+      stem_sizes=[64]
+      stem_pool="MaxPool2d {'kernel_size': 3, 'stride': 2, 'padding': 1}")
     
@@ -558,8 +598,7 @@ cfg = ModelCfg( print(cfg.act_fn) ```
output -

-    
+
class 'torch.nn.modules.activation.SELU'
Now we can create constructor from config: @@ -569,16 +608,11 @@ Now we can create constructor from config: mc = ModelConstructor.from_cfg(cfg) mc ``` -
output -
Deprecated. Pass se_module as se argument, se_reduction as arg to se.
-    Deprecated. Pass se_module as se argument, se_reduction as arg to se.
-    
-
output
ModelConstructor
       in_chans: 3, num_classes: 10
       expansion: 1, groups: 1, dw: False, div_groups: None
-      act_fn: ReLU, sa: , se: SEModule
+      act_fn: SELU, sa: , se: SEModule
       stem sizes: [64], stride on 0
       body sizes [64, 128, 256, 512]
       layers: [2, 2, 2, 2]
@@ -665,7 +699,7 @@ Here is model: mc() ```
output -
MxResNet( +
MxResNet(
       act_fn: Mish, stem_sizes: [3, 32, 64, 64], make_stem: xresnet_stem
       (stem): Sequential(
         (conv_0): ConvBnAct(
@@ -892,7 +926,7 @@ mc
 mc.stem.conv_1
 ```
 
output -
ConvBnAct( +
ConvBnAct(
       (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
       (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
       (act_fn): Mish(inplace=True)
@@ -907,7 +941,7 @@ mc.stem.conv_1
 mc.body.l_0.bl_0
 ```
 
output -
BasicBlock( +
BasicBlock(
       (convs): Sequential(
         (conv_0): ConvBnAct(
           (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
@@ -984,13 +1018,27 @@ That all. Now we have YaResNet constructor
 mc.print_cfg()
 ```
 
output -
YaResNet - in_chans: 3, num_classes: 1000 - expansion: 1, groups: 1, dw: False, div_groups: None - act_fn: ReLU, sa: False, se: False - stem sizes: [64], stride on 0 - body sizes [64, 128, 256, 512] - layers: [2, 2, 2, 2] +
ModelConstructor(
+      name='YaResNet'
+      in_chans=3
+      num_classes=1000
+      block='YaBasicBlock'
+      conv_layer='ConvBnAct'
+      block_sizes=[64, 128, 256, 512]
+      layers=[2, 2, 2, 2]
+      norm='BatchNorm2d'
+      act_fn='ReLU'
+      expansion=1
+      groups=1
+      bn_1st=True
+      zero_bn=True
+      stem_sizes=[64]
+      stem_pool="MaxPool2d {'kernel_size': 3, 'stride': 2, 'padding': 1}"
+      init_cnn='init_cnn'
+      make_stem='make_stem'
+      make_layer='make_layer'
+      make_body='make_body'
+      make_head='make_head')
     
@@ -1002,7 +1050,7 @@ Let see what we have. mc.body.l_1.bl_0 ```
output - YaBasicBlock( +
YaBasicBlock(
       (reduce): ConvBnAct(
         (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(2, 2), bias=False)
         (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
@@ -1050,13 +1098,26 @@ mc = YaResnet34()
 mc.print_cfg()
 ```
 
output -
YaResnet34
-      in_chans: 3, num_classes: 1000
-      expansion: 1, groups: 1, dw: False, div_groups: None
-      act_fn: ReLU, sa: False, se: False
-      stem sizes: [64], stride on 0
-      body sizes [64, 128, 256, 512]
-      layers: [3, 4, 6, 3]
+    
YaResnet34(
+      in_chans=3
+      num_classes=1000
+      block='YaBasicBlock'
+      conv_layer='ConvBnAct'
+      block_sizes=[64, 128, 256, 512]
+      layers=[3, 4, 6, 3]
+      norm='BatchNorm2d'
+      act_fn='ReLU'
+      expansion=1
+      groups=1
+      bn_1st=True
+      zero_bn=True
+      stem_sizes=[64]
+      stem_pool="MaxPool2d {'kernel_size': 3, 'stride': 2, 'padding': 1}"
+      init_cnn='init_cnn'
+      make_stem='xresnet_stem'
+      make_layer='make_layer'
+      make_body='make_body'
+      make_head='make_head')
     
diff --git a/mkdocs.yaml b/mkdocs.yaml index e114142..92482f2 100644 --- a/mkdocs.yaml +++ b/mkdocs.yaml @@ -6,6 +6,8 @@ docs_dir: docs theme: name: material custom_dir: docs/overrides + features: + - navigation.footer palette: - scheme: default diff --git a/src/model_constructor/helpers.py b/src/model_constructor/helpers.py index a2aa81d..3ee5cfa 100644 --- a/src/model_constructor/helpers.py +++ b/src/model_constructor/helpers.py @@ -85,11 +85,14 @@ def _get_str(self, value: Any) -> str: def _get_str_value(self, field: str) -> str: return self._get_str(getattr(self, field)) - def __repr__(self) -> str: + def _cfg_str(self) -> str: return f"{self.__repr_name__()}(\n {self.__repr_str__(chr(10) + ' ')})" + def __repr__(self) -> str: + return self._cfg_str() + def __str__(self) -> str: - return f"{self.__repr_name__()}(\n {self.__repr_str__(chr(10) + ' ')})" + return self._cfg_str() def __repr_args__(self) -> List[Tuple[str, str]]: return [ @@ -125,7 +128,7 @@ def changed_fields(self) -> Dict[str, Any]: def print_cfg(self) -> None: """Print full config""" - print(repr(self)) + print(self._cfg_str()) def print_set_fields(self) -> None: """Print fields changed at init."""