From 7885af4bf13cb0249a806b2cb6e13fac3a846863 Mon Sep 17 00:00:00 2001
From: ayasyrev <a.yasyrev@gmail.com>
Date: Wed, 11 Nov 2020 13:46:52 +0000
Subject: [PATCH 1/3] cleared magic

---
 model_constructor/activations.py      |   1 -
 model_constructor/base_constructor.py |   2 -
 model_constructor/mxresnet.py         |   1 +
 model_constructor/net.py              |   1 -
 nbs/00_Net.ipynb                      | 989 +++++++-------------------
 nbs/01_activations.ipynb              | 195 ++---
 nbs/01_layers.ipynb                   | 286 ++------
 nbs/03_MXResNet.ipynb                 |  24 +-
 nbs/04_YaResNet.ipynb                 |  50 +-
 nbs/05_Twist.ipynb                    |  54 +-
 nbs/10_base_constructor.ipynb         | 463 ++++--------
 nbs/11_xresnet.ipynb                  | 213 ++----
 nbs/index.ipynb                       |  53 +-
 13 files changed, 653 insertions(+), 1679 deletions(-)

diff --git a/model_constructor/activations.py b/model_constructor/activations.py
index c9a847b..a8cf539 100644
--- a/model_constructor/activations.py
+++ b/model_constructor/activations.py
@@ -155,7 +155,6 @@ def __init__(self, inplace: bool = False):
     def forward(self, x):
         return MishJitAutoFn.apply(x)
 
-
 # Cell
 def hard_swish(x, inplace: bool = False):
     """Hard swish activation function"""
diff --git a/model_constructor/base_constructor.py b/model_constructor/base_constructor.py
index 325fcfd..600f2b4 100644
--- a/model_constructor/base_constructor.py
+++ b/model_constructor/base_constructor.py
@@ -60,7 +60,6 @@ def forward(self, x):
             identity = self.downsample(x)
         return self.act_conn(self.merge(out + identity))
 
-
 # Cell
 class Bottleneck(nn.Module):
     '''Bottlneck block for resnet models'''
@@ -123,7 +122,6 @@ def __init__(self, block,
                     for i in range(num_layers)]
         super().__init__(OrderedDict(layers))
 
-
 # Cell
 class Head(nn.Sequential):
     '''base head'''
diff --git a/model_constructor/mxresnet.py b/model_constructor/mxresnet.py
index def2a05..6ed334d 100644
--- a/model_constructor/mxresnet.py
+++ b/model_constructor/mxresnet.py
@@ -5,6 +5,7 @@
 # Cell
 import torch.nn as nn
 import sys, torch
+import os
 from functools import partial
 from collections import OrderedDict
 from .layers import *
diff --git a/model_constructor/net.py b/model_constructor/net.py
index 34d59e5..69022be 100644
--- a/model_constructor/net.py
+++ b/model_constructor/net.py
@@ -17,7 +17,6 @@ def init_cnn(m):
     if isinstance(m, (nn.Conv2d,nn.Linear)): nn.init.kaiming_normal_(m.weight)
     for l in m.children(): init_cnn(l)
 
-
 # Cell
 class ResBlock(nn.Module):
     '''Resnet block'''
diff --git a/nbs/00_Net.ipynb b/nbs/00_Net.ipynb
index bae84d2..60df77b 100644
--- a/nbs/00_Net.ipynb
+++ b/nbs/00_Net.ipynb
@@ -2,12 +2,8 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:05:10.106995Z",
-     "start_time": "2020-09-15T15:05:09.505889Z"
-    },
     "hide_input": true
    },
    "outputs": [],
@@ -17,13 +13,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:05:10.480134Z",
-     "start_time": "2020-09-15T15:05:10.473435Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
@@ -35,21 +26,16 @@
     }
    ],
    "source": [
-    "%nbdev_default_export net"
+    "#default_exp net"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:05:12.777083Z",
-     "start_time": "2020-09-15T15:05:12.773799Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_hide"
+    "#hide"
    ]
   },
   {
@@ -63,32 +49,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:05:14.588153Z",
-     "start_time": "2020-09-15T15:05:14.577005Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "# from nbdev.showdoc import *\n",
     "from fastcore.test import *"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 71,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:07:51.267131Z",
-     "start_time": "2020-09-15T15:07:51.259892Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_export\n",
+    "#export\n",
     "import torch.nn as nn\n",
     "import sys, torch\n",
     "from functools import partial\n",
@@ -98,13 +74,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 105,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T16:36:04.485843Z",
-     "start_time": "2020-09-15T16:36:04.476302Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
     "from dataclasses import dataclass, asdict"
@@ -119,22 +90,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:06:31.875738Z",
-     "start_time": "2020-09-15T15:06:31.851804Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_export\n",
+    "#export\n",
     "act_fn = nn.ReLU(inplace=True)\n",
     "\n",
     "def init_cnn(m):\n",
     "    if getattr(m, 'bias', None) is not None: nn.init.constant_(m.bias, 0)\n",
     "    if isinstance(m, (nn.Conv2d,nn.Linear)): nn.init.kaiming_normal_(m.weight)\n",
-    "    for l in m.children(): init_cnn(l)\n"
+    "    for l in m.children(): init_cnn(l)"
    ]
   },
   {
@@ -146,20 +112,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:06:51.253988Z",
-     "start_time": "2020-09-15T15:06:51.234702Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_export\n",
+    "#export\n",
     "class ResBlock(nn.Module):\n",
     "    '''Resnet block'''\n",
     "    se_block = SEBlock\n",
-    "    def __init__(self, expansion, ni, nh, stride=1, \n",
+    "    def __init__(self, expansion, ni, nh, stride=1,\n",
     "                 conv_layer=ConvLayer, act_fn=act_fn, zero_bn=True, bn_1st=True,\n",
     "                 pool=nn.AvgPool2d(2, ceil_mode=True), sa=False, sym=False, se=False, se_reduction=16,\n",
     "                 groups=1, dw=False):\n",
@@ -171,7 +132,7 @@
     "                   (f\"conv_1\", conv_layer(nh, nf, 3, zero_bn=zero_bn, act=False, bn_1st=bn_1st))\n",
     "        ] if expansion == 1 else [\n",
     "                   (f\"conv_0\",conv_layer(ni, nh, 1, act_fn=act_fn, bn_1st=bn_1st)),\n",
-    "                   (f\"conv_1\",conv_layer(nh, nh, 3, stride=stride, act_fn=act_fn, bn_1st=bn_1st, \n",
+    "                   (f\"conv_1\",conv_layer(nh, nh, 3, stride=stride, act_fn=act_fn, bn_1st=bn_1st,\n",
     "                                         groups= nh if dw else groups)),\n",
     "                   (f\"conv_2\",conv_layer(nh, nf, 1, zero_bn=zero_bn, act=False, bn_1st=bn_1st))\n",
     "        ]\n",
@@ -187,13 +148,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:06:51.661257Z",
-     "start_time": "2020-09-15T15:06:51.640107Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -217,7 +173,7 @@
        ")"
       ]
      },
-     "execution_count": 16,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -228,13 +184,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:06:52.454965Z",
-     "start_time": "2020-09-15T15:06:52.445849Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -263,7 +214,7 @@
        ")"
       ]
      },
-     "execution_count": 17,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -274,13 +225,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:06:53.430030Z",
-     "start_time": "2020-09-15T15:06:53.403478Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -309,7 +255,7 @@
        ")"
       ]
      },
-     "execution_count": 18,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -320,13 +266,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:06:54.202712Z",
-     "start_time": "2020-09-15T15:06:54.175803Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -352,7 +293,7 @@
        ")"
       ]
      },
-     "execution_count": 19,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -363,13 +304,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:06:54.867910Z",
-     "start_time": "2020-09-15T15:06:54.841626Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -407,7 +343,7 @@
        ")"
       ]
      },
-     "execution_count": 20,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -433,21 +369,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:06:59.872031Z",
-     "start_time": "2020-09-15T15:06:59.824675Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_export\n",
+    "#export\n",
     "# NewResBlock now is YaResBlock - Yet Another ResNet Block! It is now at model_constructor.yaresnet.\n",
     "class NewResBlock(nn.Module):\n",
     "    '''YaResnet block'''\n",
     "    se_block = SEBlock\n",
-    "    def __init__(self, expansion, ni, nh, stride=1, \n",
+    "    def __init__(self, expansion, ni, nh, stride=1,\n",
     "                 conv_layer=ConvLayer, act_fn=act_fn, zero_bn=True, bn_1st=True,\n",
     "                 pool=nn.AvgPool2d(2, ceil_mode=True), sa=False,sym=False, se=False, se_reduction=16,\n",
     "                 groups=1, dw=False):\n",
@@ -460,7 +391,7 @@
     "                   (f\"conv_1\", conv_layer(nh, nf, 3, zero_bn=zero_bn, act=False, bn_1st=bn_1st))\n",
     "        ] if expansion == 1 else [\n",
     "                   (f\"conv_0\",conv_layer(ni, nh, 1, act_fn=act_fn, bn_1st=bn_1st)),\n",
-    "                   (f\"conv_1\",conv_layer(nh, nh, 3, stride=1, act_fn=act_fn, bn_1st=bn_1st, \n",
+    "                   (f\"conv_1\",conv_layer(nh, nh, 3, stride=1, act_fn=act_fn, bn_1st=bn_1st,\n",
     "                                         groups= nh if dw else groups)), # stride 1 !!!\n",
     "                   (f\"conv_2\",conv_layer(nh, nf, 1, zero_bn=zero_bn, act=False, bn_1st=bn_1st))\n",
     "        ]\n",
@@ -470,20 +401,15 @@
     "        self.idconv = noop if ni==nf else conv_layer(ni, nf, 1, act=False)\n",
     "        self.merge =act_fn\n",
     "\n",
-    "    def forward(self, x): \n",
+    "    def forward(self, x):\n",
     "        o = self.reduce(x)\n",
     "        return self.merge(self.convs(o) + self.idconv(o))"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:07:00.617334Z",
-     "start_time": "2020-09-15T15:07:00.605327Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -513,7 +439,7 @@
        ")"
       ]
      },
-     "execution_count": 22,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -524,16 +450,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:07:01.385225Z",
-     "start_time": "2020-09-15T15:07:01.375982Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "# %nbdev_export\n",
     "# from model_constructor.yaresnet import YaResBlock\n",
     "# NewResBlock = YaResBlock"
@@ -541,13 +462,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:07:02.101788Z",
-     "start_time": "2020-09-15T15:07:02.071627Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -571,26 +487,21 @@
        ")"
       ]
      },
-     "execution_count": 24,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "bl = NewResBlock(1,64,64,sa=True)\n",
     "bl"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:07:02.748148Z",
-     "start_time": "2020-09-15T15:07:02.694969Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
@@ -601,7 +512,7 @@
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "bs_test = 16\n",
     "xb = torch.randn(bs_test, 64, 32, 32)\n",
     "y = bl(xb)\n",
@@ -611,13 +522,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:07:03.330137Z",
-     "start_time": "2020-09-15T15:07:03.297402Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -648,26 +554,21 @@
        ")"
       ]
      },
-     "execution_count": 26,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "bl = NewResBlock(4,64,128,stride=2,act_fn=nn.LeakyReLU(), bn_1st=False)\n",
     "bl"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:07:03.913925Z",
-     "start_time": "2020-09-15T15:07:03.810639Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
@@ -678,7 +579,7 @@
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "bs_test = 16\n",
     "xb = torch.randn(bs_test, 256, 32, 32)\n",
     "y = bl(xb)\n",
@@ -688,13 +589,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:07:04.317012Z",
-     "start_time": "2020-09-15T15:07:04.284583Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -725,26 +621,21 @@
        ")"
       ]
      },
-     "execution_count": 28,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "bl = NewResBlock(4,64,128,stride=2,act_fn=nn.LeakyReLU(), bn_1st=False, groups=4)\n",
     "bl"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:07:04.932651Z",
-     "start_time": "2020-09-15T15:07:04.800290Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
@@ -755,7 +646,7 @@
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "bs_test = 16\n",
     "xb = torch.randn(bs_test, 256, 32, 32)\n",
     "y = bl(xb)\n",
@@ -772,20 +663,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:07:05.800721Z",
-     "start_time": "2020-09-15T15:07:05.768220Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_export\n",
+    "#export\n",
     "def _make_stem(self):\n",
-    "        stem = [(f\"conv_{i}\", self.conv_layer(self.stem_sizes[i], self.stem_sizes[i+1], \n",
-    "#                     stride=2 if i==0 else 1, \n",
-    "                    stride=2 if i==self.stem_stride_on else 1, \n",
+    "        stem = [(f\"conv_{i}\", self.conv_layer(self.stem_sizes[i], self.stem_sizes[i+1],\n",
+    "#                     stride=2 if i==0 else 1,\n",
+    "                    stride=2 if i==self.stem_stride_on else 1,\n",
     "                    bn_layer=(not self.stem_bn_end) if i==(len(self.stem_sizes)-2) else True,\n",
     "                    act_fn=self.act_fn, bn_1st=self.bn_1st))\n",
     "                for i in range(len(self.stem_sizes)-1)]\n",
@@ -796,19 +682,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:07:06.313839Z",
-     "start_time": "2020-09-15T15:07:06.305739Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_export\n",
+    "#export\n",
     "def _make_layer(self,expansion,ni,nf,blocks,stride,sa):\n",
     "        return nn.Sequential(OrderedDict(\n",
-    "            [(f\"bl_{i}\", self.block(expansion, ni if i==0 else nf, nf, \n",
+    "            [(f\"bl_{i}\", self.block(expansion, ni if i==0 else nf, nf,\n",
     "                    stride if i==0 else 1, sa=sa if i==blocks-1 else False,\n",
     "                    conv_layer=self.conv_layer, act_fn=self.act_fn, pool=self.pool,\n",
     "                    zero_bn=self.zero_bn, bn_1st=self.bn_1st, groups=self.groups, dw=self.dw, se=self.se))\n",
@@ -817,19 +698,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:07:06.827551Z",
-     "start_time": "2020-09-15T15:07:06.803610Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_export\n",
+    "#export\n",
     "def _make_body(self):\n",
-    "        blocks = [(f\"l_{i}\", self._make_layer(self,self.expansion, \n",
-    "                        self.block_sizes[i], self.block_sizes[i+1], l, \n",
+    "        blocks = [(f\"l_{i}\", self._make_layer(self,self.expansion,\n",
+    "                        self.block_sizes[i], self.block_sizes[i+1], l,\n",
     "                        1 if i==0 else 2, self.sa if i==0 else False))\n",
     "                  for i,l in enumerate(self.layers)]\n",
     "        return nn.Sequential(OrderedDict(blocks))"
@@ -837,16 +713,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 33,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:07:07.429989Z",
-     "start_time": "2020-09-15T15:07:07.409485Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_export\n",
+    "#export\n",
     "def _make_head(self):\n",
     "        head = [('pool', nn.AdaptiveAvgPool2d(1)),\n",
     "                ('flat', Flatten()),\n",
@@ -863,33 +734,28 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 34,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:07:08.290093Z",
-     "start_time": "2020-09-15T15:07:08.254969Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_export\n",
+    "#export\n",
     "class Net():\n",
     "    \"\"\"Model constructor. As default - xresnet18\"\"\"\n",
-    "    def __init__(self, name='Net', c_in=3, c_out=1000, \n",
+    "    def __init__(self, name='Net', c_in=3, c_out=1000,\n",
     "                block=ResBlock, conv_layer = ConvLayer,\n",
-    "                block_sizes = [64,128,256,512], layers=[2,2,2,2], \n",
+    "                block_sizes = [64,128,256,512], layers=[2,2,2,2],\n",
     "                norm = nn.BatchNorm2d,\n",
     "                act_fn=nn.ReLU(inplace=True),\n",
     "                pool = nn.AvgPool2d(2, ceil_mode=True),\n",
-    "                expansion=1, groups = 1, dw=False, \n",
+    "                expansion=1, groups = 1, dw=False,\n",
     "                sa=False, se=False, se_reduction=16,\n",
     "                bn_1st = True,\n",
     "                zero_bn=True,\n",
     "                stem_stride_on = 0,\n",
-    "                stem_sizes = [32,32,64],    \n",
+    "                stem_sizes = [32,32,64],\n",
     "                stem_pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1),\n",
     "                stem_bn_end = False,\n",
-    "                \n",
+    "\n",
     "                _init_cnn = init_cnn,\n",
     "                _make_stem = _make_stem,\n",
     "                _make_layer = _make_layer,\n",
@@ -903,10 +769,10 @@
     "        self.__dict__ = params\n",
     "        self._block_sizes = params['block_sizes']\n",
     "        if self.stem_sizes[0] != self.c_in: self.stem_sizes = [self.c_in] + self.stem_sizes\n",
-    "        \n",
+    "\n",
     "    @property\n",
     "    def block_sizes(self):\n",
-    "        return [self.stem_sizes[-1]//self.expansion] + self._block_sizes +[256]*(len(self.layers)-4) \n",
+    "        return [self.stem_sizes[-1]//self.expansion] + self._block_sizes +[256]*(len(self.layers)-4)\n",
     "\n",
     "    @property\n",
     "    def stem(self):\n",
@@ -919,7 +785,7 @@
     "    @property\n",
     "    def body(self):\n",
     "        return self._make_body(self)\n",
-    "    \n",
+    "\n",
     "    def __call__(self):\n",
     "        model = nn.Sequential(OrderedDict([\n",
     "            ('stem', self.stem),\n",
@@ -941,13 +807,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 35,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:07:08.930672Z",
-     "start_time": "2020-09-15T15:07:08.919167Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -961,7 +822,7 @@
        "  layers: [2, 2, 2, 2]"
       ]
      },
-     "execution_count": 35,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -973,13 +834,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 36,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:07:09.471080Z",
-     "start_time": "2020-09-15T15:07:09.460079Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -987,7 +843,7 @@
        "[64, 128, 256, 512]"
       ]
      },
-     "execution_count": 36,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -998,13 +854,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 37,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:07:10.527262Z",
-     "start_time": "2020-09-15T15:07:10.514092Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -1012,7 +863,7 @@
        "[64, 64, 128, 256, 512]"
       ]
      },
-     "execution_count": 37,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1023,13 +874,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 38,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:07:10.911200Z",
-     "start_time": "2020-09-15T15:07:10.907783Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -1043,7 +889,7 @@
        "  layers: [2, 2, 2, 2]"
       ]
      },
-     "execution_count": 38,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1055,13 +901,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 39,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:07:11.302065Z",
-     "start_time": "2020-09-15T15:07:11.290035Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -1069,7 +910,7 @@
        "[64, 128, 256, 512, 1024]"
       ]
      },
-     "execution_count": 39,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1080,13 +921,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 40,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:07:11.709098Z",
-     "start_time": "2020-09-15T15:07:11.702269Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -1100,26 +936,21 @@
        "  layers: [2, 2, 2, 2]"
       ]
      },
-     "execution_count": 40,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "model  = Net(stem_sizes=[3,32,32,64])\n",
     "model"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 41,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:07:12.090940Z",
-     "start_time": "2020-09-15T15:07:12.080199Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -1127,7 +958,7 @@
        "[64, 64, 128, 256, 512]"
       ]
      },
-     "execution_count": 41,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1138,13 +969,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 42,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:07:12.469528Z",
-     "start_time": "2020-09-15T15:07:12.467407Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
     "model = Net()"
@@ -1152,29 +978,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 43,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:07:12.830071Z",
-     "start_time": "2020-09-15T15:07:12.821423Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "# model.block_sizes = [64, 128, 256, 512] # wrong way --> use _block_sizes\n",
     "# model"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 44,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:07:13.183685Z",
-     "start_time": "2020-09-15T15:07:13.178314Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -1199,25 +1015,20 @@
        ")"
       ]
      },
-     "execution_count": 44,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "%nbdev_collapse_output\n",
+    "#collapse_output\n",
     "model.stem"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 45,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:07:13.578197Z",
-     "start_time": "2020-09-15T15:07:13.555787Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -1242,26 +1053,21 @@
        ")"
       ]
      },
-     "execution_count": 45,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "%nbdev_collapse_output\n",
+    "#collapse_output\n",
     "model.stem_stride_on = 1\n",
     "model.stem"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 46,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:07:14.101338Z",
-     "start_time": "2020-09-15T15:07:13.951890Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
@@ -1272,7 +1078,7 @@
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "bs_test = 16\n",
     "xb = torch.randn(bs_test, 3, 128, 128)\n",
     "y = model.stem(xb)\n",
@@ -1282,13 +1088,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 47,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:07:14.269326Z",
-     "start_time": "2020-09-15T15:07:14.266922Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
     "model.bn_1st = False"
@@ -1296,13 +1097,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 48,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:07:14.782404Z",
-     "start_time": "2020-09-15T15:07:14.776999Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
     "model.act_fn =nn.LeakyReLU(inplace=True)"
@@ -1310,13 +1106,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 49,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:07:15.183253Z",
-     "start_time": "2020-09-15T15:07:15.175224Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
     "model.sa = True\n",
@@ -1325,13 +1116,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 50,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:07:15.677001Z",
-     "start_time": "2020-09-15T15:07:15.583127Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -1391,25 +1177,20 @@
        ")"
       ]
      },
-     "execution_count": 50,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "%nbdev_collapse_output\n",
+    "#collapse_output\n",
     "model.body.l_1"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 51,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:07:16.386300Z",
-     "start_time": "2020-09-15T15:07:16.216646Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
@@ -1420,7 +1201,7 @@
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "bs_test = 16\n",
     "xb = torch.randn(bs_test, 64, 32, 32)\n",
     "y = model.body.l_0(xb)\n",
@@ -1430,13 +1211,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 52,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:07:16.733132Z",
-     "start_time": "2020-09-15T15:07:16.625466Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -1494,25 +1270,20 @@
        ")"
       ]
      },
-     "execution_count": 52,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "model.body.l_0"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 53,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:07:17.338636Z",
-     "start_time": "2020-09-15T15:07:17.199753Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
@@ -1523,7 +1294,7 @@
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "bs_test = 16\n",
     "xb = torch.randn(bs_test, 64, 32, 32)\n",
     "y = model.body.l_0(xb)\n",
@@ -1533,13 +1304,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 54,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:07:17.785128Z",
-     "start_time": "2020-09-15T15:07:17.708421Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -1611,13 +1377,13 @@
        ")"
       ]
      },
-     "execution_count": 54,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "model.groups = 4\n",
     "model.expansion = 4\n",
     "model.body.l_0"
@@ -1625,13 +1391,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 55,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:07:18.351990Z",
-     "start_time": "2020-09-15T15:07:18.067907Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
@@ -1642,7 +1403,7 @@
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "bs_test = 16\n",
     "xb = torch.randn(bs_test, 64, 32, 32)\n",
     "y = model.body.l_0(xb)\n",
@@ -1652,13 +1413,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 56,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:07:18.417339Z",
-     "start_time": "2020-09-15T15:07:18.354230Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -1730,13 +1486,13 @@
        ")"
       ]
      },
-     "execution_count": 56,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "model.groups = 1\n",
     "model.dw = True\n",
     "model.expansion = 4\n",
@@ -1745,13 +1501,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 57,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:07:19.018917Z",
-     "start_time": "2020-09-15T15:07:18.662599Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
@@ -1762,7 +1513,7 @@
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "bs_test = 16\n",
     "xb = torch.randn(bs_test, 64, 32, 32)\n",
     "y = model.body.l_0(xb)\n",
@@ -1772,16 +1523,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 58,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:07:19.025610Z",
-     "start_time": "2020-09-15T15:07:19.021179Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "model.groups = 1\n",
     "model.dw = 0\n",
     "model.expansion = 1"
@@ -1789,13 +1535,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 59,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:07:19.270731Z",
-     "start_time": "2020-09-15T15:07:19.261966Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
     "model.block = NewResBlock"
@@ -1803,13 +1544,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 60,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:07:19.994546Z",
-     "start_time": "2020-09-15T15:07:19.851080Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
@@ -1820,7 +1556,7 @@
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "bs_test = 16\n",
     "xb = torch.randn(bs_test, 64, 32, 32)\n",
     "y = model.body.l_1.bl_0(xb)\n",
@@ -1830,13 +1566,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 61,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:07:20.532951Z",
-     "start_time": "2020-09-15T15:07:20.436949Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -1871,25 +1602,20 @@
        ")"
       ]
      },
-     "execution_count": 61,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "model.body.l_1.bl_0"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 62,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:07:20.889742Z",
-     "start_time": "2020-09-15T15:07:20.886369Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
     "# model = Net(expansion=4)\n",
@@ -1898,28 +1624,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 63,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:07:21.361772Z",
-     "start_time": "2020-09-15T15:07:21.358970Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "model.stem_bn_end = True"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 65,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:07:27.697499Z",
-     "start_time": "2020-09-15T15:07:27.684261Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -1944,25 +1660,20 @@
        ")"
       ]
      },
-     "execution_count": 65,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "model.stem"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 66,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:07:28.043163Z",
-     "start_time": "2020-09-15T15:07:27.957348Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -2002,25 +1713,20 @@
        ")"
       ]
      },
-     "execution_count": 66,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "model.body.l_1.bl_0"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 67,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:07:28.430335Z",
-     "start_time": "2020-09-15T15:07:28.199731Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
@@ -2031,7 +1737,7 @@
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "bs_test = 16\n",
     "xb = torch.randn(bs_test, 256, 32, 32)\n",
     "y = model.body.l_1.bl_0(xb)\n",
@@ -2041,43 +1747,28 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 68,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:07:28.435805Z",
-     "start_time": "2020-09-15T15:07:28.432826Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "model.stem_bn_end = False"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 72,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:08:01.044096Z",
-     "start_time": "2020-09-15T15:08:00.746890Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_collapse_input\n",
+    "#collapse_input\n",
     "m = model()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 73,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:08:01.318234Z",
-     "start_time": "2020-09-15T15:08:01.291990Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -2366,25 +2057,20 @@
        ")"
       ]
      },
-     "execution_count": 73,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "m"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 74,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:08:02.174918Z",
-     "start_time": "2020-09-15T15:08:02.147042Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -2647,25 +2333,20 @@
        ")"
       ]
      },
-     "execution_count": 74,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "%nbdev_collapse_output\n",
+    "#collapse_output\n",
     "m.body"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 75,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:08:03.486811Z",
-     "start_time": "2020-09-15T15:08:02.911398Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
@@ -2676,7 +2357,7 @@
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "bs_test = 16\n",
     "xb = torch.randn(bs_test, 3, 128, 128)\n",
     "y = m(xb)\n",
@@ -2686,13 +2367,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 76,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:08:03.501054Z",
-     "start_time": "2020-09-15T15:08:03.499056Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
     "## xresnet constructor\n",
@@ -2712,13 +2388,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 77,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:08:04.282047Z",
-     "start_time": "2020-09-15T15:08:04.273710Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
     "# m = net50(c_out=10)"
@@ -2726,13 +2397,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 78,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:08:04.693899Z",
-     "start_time": "2020-09-15T15:08:04.691824Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
     "# m, m.c_out"
@@ -2740,16 +2406,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 79,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:08:05.884571Z",
-     "start_time": "2020-09-15T15:08:05.871787Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "xresnet34_parameters = {\n",
     "    'name': 'xresnet34',\n",
     "    'expansion': 1,\n",
@@ -2765,13 +2426,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 80,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:08:06.495561Z",
-     "start_time": "2020-09-15T15:08:06.489809Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -2785,26 +2441,21 @@
        "  layers: [3, 4, 6, 3]"
       ]
      },
-     "execution_count": 80,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "model34 = Net(**xresnet34_parameters)\n",
     "model34"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 81,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:08:07.115853Z",
-     "start_time": "2020-09-15T15:08:07.098473Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -2818,42 +2469,32 @@
        "  layers: [3, 4, 6, 3]"
       ]
      },
-     "execution_count": 81,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "model50 = Net(**xresnet50_parameters)\n",
     "model50"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 82,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:08:07.761469Z",
-     "start_time": "2020-09-15T15:08:07.745240Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_export\n",
+    "#export\n",
     "xresnet34 = partial(Net, name='xresnet34', expansion=1, layers = [3, 4, 6, 3])\n",
     "xresnet50 = partial(Net, name='xresnet34', expansion=4, layers = [3, 4, 6, 3])"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 83,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:08:08.451245Z",
-     "start_time": "2020-09-15T15:08:08.435714Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -2867,26 +2508,21 @@
        "  layers: [3, 4, 6, 3]"
       ]
      },
-     "execution_count": 83,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "model34 = xresnet34()\n",
     "model34"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 84,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:08:09.050303Z",
-     "start_time": "2020-09-15T15:08:09.032704Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -2900,26 +2536,21 @@
        "  layers: [3, 4, 6, 3]"
       ]
      },
-     "execution_count": 84,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "model50 = xresnet50()\n",
     "model50"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 100,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:23:29.673176Z",
-     "start_time": "2020-09-15T15:23:29.668886Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
     "class XResNet34(Net):\n",
@@ -2930,14 +2561,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 104,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T15:24:03.920353Z",
-     "start_time": "2020-09-15T15:24:03.912929Z"
-    },
-    "scrolled": true
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -2951,7 +2576,7 @@
        "  layers: [3, 4, 6, 3]"
       ]
      },
-     "execution_count": 104,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2963,13 +2588,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 112,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T16:41:41.886520Z",
-     "start_time": "2020-09-15T16:41:41.884324Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
     "from dataclasses import field"
@@ -2977,13 +2597,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 119,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T16:43:31.023793Z",
-     "start_time": "2020-09-15T16:43:31.018109Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
     "@dataclass\n",
@@ -2994,13 +2609,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 120,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T16:43:32.399958Z",
-     "start_time": "2020-09-15T16:43:32.394016Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -3008,7 +2618,7 @@
        "{'name': 'xresnet34', 'layers': [3, 4, 6, 3]}"
       ]
      },
-     "execution_count": 120,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -3019,13 +2629,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 126,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T16:45:55.696580Z",
-     "start_time": "2020-09-15T16:45:55.692231Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
     "@dataclass\n",
@@ -3036,13 +2641,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 127,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T16:45:56.207660Z",
-     "start_time": "2020-09-15T16:45:56.203225Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -3050,7 +2650,7 @@
        "Xres50(name='xresnet34', layers=[3, 4, 6, 3], expansion=4)"
       ]
      },
-     "execution_count": 127,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -3061,13 +2661,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 128,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T16:46:01.313728Z",
-     "start_time": "2020-09-15T16:46:01.307542Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -3075,7 +2670,7 @@
        "{'name': 'xresnet34', 'layers': [3, 4, 6, 3], 'expansion': 4}"
       ]
      },
-     "execution_count": 128,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -3086,13 +2681,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 129,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T16:46:27.728315Z",
-     "start_time": "2020-09-15T16:46:27.722310Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -3106,7 +2696,7 @@
        "  layers: [2, 2, 2, 2]"
       ]
      },
-     "execution_count": 129,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -3127,13 +2717,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-19T07:31:33.882175Z",
-     "start_time": "2020-09-19T07:31:33.877122Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "name": "stderr",
@@ -3144,7 +2729,7 @@
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "from nbdev.export import *\n",
     "notebook2script()"
    ]
@@ -3162,36 +2747,6 @@
    "display_name": "Python 3",
    "language": "python",
    "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.7"
-  },
-  "toc": {
-   "base_numbering": 1,
-   "nav_menu": {},
-   "number_sections": true,
-   "sideBar": true,
-   "skip_h1_title": false,
-   "title_cell": "Table of Contents",
-   "title_sidebar": "Contents",
-   "toc_cell": false,
-   "toc_position": {
-    "height": "calc(100% - 180px)",
-    "left": "10px",
-    "top": "150px",
-    "width": "247.323px"
-   },
-   "toc_section_display": true,
-   "toc_window_display": true
   }
  },
  "nbformat": 4,
diff --git a/nbs/01_activations.ipynb b/nbs/01_activations.ipynb
index 38a440b..067b62d 100644
--- a/nbs/01_activations.ipynb
+++ b/nbs/01_activations.ipynb
@@ -2,12 +2,8 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-19T07:39:27.761520Z",
-     "start_time": "2020-09-19T07:39:27.123512Z"
-    },
     "hide_input": true
    },
    "outputs": [],
@@ -17,13 +13,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-19T07:39:27.766214Z",
-     "start_time": "2020-09-19T07:39:27.762895Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
@@ -35,7 +26,7 @@
     }
    ],
    "source": [
-    "%nbdev_default_export activations"
+    "#default_exp activations"
    ]
   },
   {
@@ -66,16 +57,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-19T07:39:30.846492Z",
-     "start_time": "2020-09-19T07:39:30.492725Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_export\n",
+    "#export\n",
     "# forked from https://github.com/rwightman/pytorch-image-models/timm/models/layers/activations.py\n",
     "import torch\n",
     "from torch import nn as nn\n",
@@ -91,16 +77,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-19T07:39:52.285184Z",
-     "start_time": "2020-09-19T07:39:52.258095Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_export\n",
+    "#export\n",
     "def swish(x, inplace: bool = False):\n",
     "    \"\"\"Swish - Described in: https://arxiv.org/abs/1710.05941\"\"\"\n",
     "    return x.mul_(x.sigmoid()) if inplace else x.mul(x.sigmoid())\n",
@@ -124,16 +105,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 33,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-19T07:59:58.190958Z",
-     "start_time": "2020-09-19T07:59:58.181329Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_export\n",
+    "#export\n",
     "@torch.jit.script\n",
     "def swish_jit(x, inplace: bool = False):\n",
     "    \"\"\"Jit version of Swish.\n",
@@ -142,7 +118,7 @@
     "    return x.mul(x.sigmoid())\n",
     "\n",
     "class SwishJit(nn.Module):\n",
-    "    \"\"\"Jit version of Swish. \n",
+    "    \"\"\"Jit version of Swish.\n",
     "    Swish - Described in: https://arxiv.org/abs/1710.05941\"\"\"\n",
     "    def __init__(self, inplace: bool = False):\n",
     "        super(SwishJit, self).__init__()\n",
@@ -160,16 +136,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 35,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-19T08:06:09.243621Z",
-     "start_time": "2020-09-19T08:06:09.204597Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_export\n",
+    "#export\n",
     "\n",
     "@torch.jit.script\n",
     "def swish_jit_bwd(x, grad_output):\n",
@@ -215,16 +186,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-19T07:48:01.044576Z",
-     "start_time": "2020-09-19T07:48:01.036045Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_export\n",
+    "#export\n",
     "def mish(x, inplace: bool = False):\n",
     "    \"\"\"Mish: A Self Regularized Non-Monotonic Neural Activation Function - https://arxiv.org/abs/1908.08681\n",
     "    NOTE: I don't have a working inplace variant\n",
@@ -251,26 +217,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-19T07:59:31.493260Z",
-     "start_time": "2020-09-19T07:59:31.464218Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_export\n",
+    "#export\n",
     "@torch.jit.script\n",
     "def mish_jit(x, _inplace: bool = False):\n",
-    "    \"\"\"Jit version of Mish. \n",
+    "    \"\"\"Jit version of Mish.\n",
     "    Mish: A Self Regularized Non-Monotonic Neural Activation Function - https://arxiv.org/abs/1908.08681\n",
     "    \"\"\"\n",
     "    return x.mul(F.softplus(x).tanh())\n",
     "\n",
     "class MishJit(nn.Module):\n",
     "    def __init__(self, inplace: bool = False):\n",
-    "        \"\"\"Jit version of Mish. \n",
+    "        \"\"\"Jit version of Mish.\n",
     "        Mish: A Self Regularized Non-Monotonic Neural Activation Function - https://arxiv.org/abs/1908.08681\"\"\"\n",
     "        super(MishJit, self).__init__()\n",
     "\n",
@@ -287,16 +248,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 40,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-19T08:12:49.922152Z",
-     "start_time": "2020-09-19T08:12:49.903635Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_export\n",
+    "#export\n",
     "\n",
     "@torch.jit.script\n",
     "def mish_jit_fwd(x):\n",
@@ -336,7 +292,7 @@
     "        super(MishMe, self).__init__()\n",
     "\n",
     "    def forward(self, x):\n",
-    "        return MishJitAutoFn.apply(x)\n"
+    "        return MishJitAutoFn.apply(x)"
    ]
   },
   {
@@ -355,16 +311,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-19T07:42:36.529178Z",
-     "start_time": "2020-09-19T07:42:36.519813Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_export\n",
+    "#export\n",
     "def hard_swish(x, inplace: bool = False):\n",
     "    \"\"\"Hard swish activation function\"\"\"\n",
     "    inner = F.relu6(x + 3.).div_(6.)\n",
@@ -394,7 +345,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_export\n",
+    "#export\n",
     "@torch.jit.script\n",
     "def hard_swish_jit(x, inplace: bool = False):\n",
     "    # return x * (F.relu6(x + 3.) / 6)\n",
@@ -418,16 +369,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 38,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-19T08:12:29.059919Z",
-     "start_time": "2020-09-19T08:12:29.044200Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_export\n",
+    "#export\n",
     "\n",
     "@torch.jit.script\n",
     "def hard_swish_jit_fwd(x):\n",
@@ -477,16 +423,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-19T07:50:17.702309Z",
-     "start_time": "2020-09-19T07:50:17.690530Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_export\n",
+    "#export\n",
     "def hard_mish(x, inplace: bool = False):\n",
     "    \"\"\" Hard Mish\n",
     "    Experimental, based on notes by Mish author Diganta Misra at\n",
@@ -532,16 +473,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 34,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-19T08:01:27.116469Z",
-     "start_time": "2020-09-19T08:01:27.106399Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_export\n",
+    "#export\n",
     "@torch.jit.script\n",
     "def hard_mish_jit(x, inplace: bool = False):\n",
     "    \"\"\" Hard Mish\n",
@@ -576,7 +512,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_export\n",
+    "#export\n",
     "@torch.jit.script\n",
     "def hard_mish_jit_fwd(x):\n",
     "    return 0.5 * x * (x + 2).clamp(min=0, max=2)\n",
@@ -634,7 +570,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "act_fn = Swish(inplace=True)"
    ]
   },
@@ -644,7 +580,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "act_fn = Mish(inplace=True)"
    ]
   },
@@ -659,13 +595,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 41,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-19T08:13:09.610451Z",
-     "start_time": "2020-09-19T08:13:09.045551Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
@@ -684,7 +615,7 @@
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "from nbdev.export import *\n",
     "notebook2script()"
    ]
@@ -702,36 +633,6 @@
    "display_name": "Python 3",
    "language": "python",
    "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.7"
-  },
-  "toc": {
-   "base_numbering": 1,
-   "nav_menu": {},
-   "number_sections": true,
-   "sideBar": true,
-   "skip_h1_title": false,
-   "title_cell": "Table of Contents",
-   "title_sidebar": "Contents",
-   "toc_cell": false,
-   "toc_position": {
-    "height": "calc(100% - 180px)",
-    "left": "10px",
-    "top": "150px",
-    "width": "341.333px"
-   },
-   "toc_section_display": true,
-   "toc_window_display": true
   }
  },
  "nbformat": 4,
diff --git a/nbs/01_layers.ipynb b/nbs/01_layers.ipynb
index 4308e2a..fd04abc 100644
--- a/nbs/01_layers.ipynb
+++ b/nbs/01_layers.ipynb
@@ -2,12 +2,8 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T09:02:28.613591Z",
-     "start_time": "2020-09-15T09:02:28.034529Z"
-    },
     "hide_input": true
    },
    "outputs": [],
@@ -17,13 +13,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T09:02:28.724960Z",
-     "start_time": "2020-09-15T09:02:28.712914Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
@@ -35,7 +26,7 @@
     }
    ],
    "source": [
-    "%nbdev_default_export layers"
+    "#default_exp layers"
    ]
   },
   {
@@ -49,35 +40,25 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T09:02:29.280009Z",
-     "start_time": "2020-09-15T09:02:29.274755Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "# from nbdev.showdoc import *\n",
     "from fastcore.test import *"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T09:02:29.881698Z",
-     "start_time": "2020-09-15T09:02:29.560082Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_export\n",
+    "#export\n",
     "import torch.nn as nn\n",
     "import torch\n",
-    "from torch.nn.utils import spectral_norm # weight_norm, \n",
+    "from torch.nn.utils import spectral_norm # weight_norm,\n",
     "from collections import OrderedDict"
    ]
   },
@@ -90,16 +71,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T09:02:59.401503Z",
-     "start_time": "2020-09-15T09:02:59.394107Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_export\n",
+    "#export\n",
     "class Flatten(nn.Module):\n",
     "    '''flat x to vector'''\n",
     "    def __init__(self):\n",
@@ -116,23 +92,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T09:03:06.151213Z",
-     "start_time": "2020-09-15T09:03:06.144829Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_export\n",
+    "#export\n",
     "def noop(x): return x\n",
     "\n",
-    "class Noop(nn.Module): \n",
+    "class Noop(nn.Module):\n",
     "    '''Dummy module'''\n",
     "    def __init__(self):\n",
     "        super().__init__()\n",
-    "        \n",
+    "\n",
     "    def forward(self, x):\n",
     "        return x"
    ]
@@ -146,32 +117,27 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T09:03:32.218769Z",
-     "start_time": "2020-09-15T09:03:32.206265Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_export\n",
+    "#export\n",
     "act_fn = nn.ReLU(inplace=True)\n",
     "\n",
     "class ConvLayer(nn.Sequential):\n",
     "    \"\"\"Basic conv layers block\"\"\"\n",
     "    Conv2d = nn.Conv2d\n",
-    "    def __init__(self, ni, nf, ks=3, stride=1, \n",
-    "            act=True,  act_fn=act_fn, \n",
-    "            bn_layer=True, bn_1st=True, zero_bn=False, \n",
+    "    def __init__(self, ni, nf, ks=3, stride=1,\n",
+    "            act=True,  act_fn=act_fn,\n",
+    "            bn_layer=True, bn_1st=True, zero_bn=False,\n",
     "            padding=None, bias=False, groups=1, **kwargs):\n",
     "\n",
-    "        if padding==None: padding = ks//2  \n",
+    "        if padding==None: padding = ks//2\n",
     "        layers = [('conv', self.Conv2d(ni, nf, ks, stride=stride, padding=padding, bias=bias, groups=groups))]\n",
     "        act_bn = [('act_fn', act_fn)] if act else []\n",
     "        if bn_layer:\n",
     "            bn = nn.BatchNorm2d(nf)\n",
-    "            nn.init.constant_(bn.weight, 0. if zero_bn else 1.) \n",
+    "            nn.init.constant_(bn.weight, 0. if zero_bn else 1.)\n",
     "            act_bn += [('bn', bn)]\n",
     "        if bn_1st: act_bn.reverse()\n",
     "        layers += act_bn\n",
@@ -180,13 +146,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T09:04:12.561785Z",
-     "start_time": "2020-09-15T09:04:12.552909Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -198,7 +159,7 @@
        ")"
       ]
      },
-     "execution_count": 9,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -210,13 +171,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T09:04:13.100662Z",
-     "start_time": "2020-09-15T09:04:13.083970Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -227,7 +183,7 @@
        ")"
       ]
      },
-     "execution_count": 10,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -239,13 +195,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T09:04:13.619415Z",
-     "start_time": "2020-09-15T09:04:13.614435Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -256,7 +207,7 @@
        ")"
       ]
      },
-     "execution_count": 11,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -268,13 +219,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T09:04:14.115042Z",
-     "start_time": "2020-09-15T09:04:14.100292Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -286,7 +232,7 @@
        ")"
       ]
      },
-     "execution_count": 12,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -298,13 +244,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T09:04:14.808440Z",
-     "start_time": "2020-09-15T09:04:14.798904Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -316,7 +257,7 @@
        ")"
       ]
      },
-     "execution_count": 13,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -328,16 +269,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T09:09:38.178529Z",
-     "start_time": "2020-09-15T09:09:38.161811Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "bs = 8\n",
     "xb = torch.randn(bs, 32, 32, 32)\n",
     "y = conv_layer(xb)\n",
@@ -354,16 +290,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T09:06:35.947787Z",
-     "start_time": "2020-09-15T09:06:35.932076Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_export\n",
+    "#export\n",
     "# SA module from mxresnet at fastai. todo - add persons!!!\n",
     "\n",
     "#Unmodified from https://github.com/fastai/fastai/blob/5c51f9eabf76853a89a9bc5741804d2ed4407e49/fastai/layers.py\n",
@@ -380,7 +311,7 @@
     "class SimpleSelfAttention(nn.Module):\n",
     "    def __init__(self, n_in:int, ks=1, sym=False):#, n_out:int):\n",
     "        super().__init__()\n",
-    "        self.conv = conv1d(n_in, n_in, ks, padding=ks//2, bias=False)      \n",
+    "        self.conv = conv1d(n_in, n_in, ks, padding=ks//2, bias=False)\n",
     "        self.gamma = nn.Parameter(torch.tensor([0.]))\n",
     "        self.sym = sym\n",
     "        self.n_in = n_in\n",
@@ -390,7 +321,7 @@
     "            c = self.conv.weight.view(self.n_in,self.n_in)\n",
     "            c = (c + c.t())/2\n",
     "            self.conv.weight = c.view(self.n_in,self.n_in,1)\n",
-    "        size = x.size()  \n",
+    "        size = x.size()\n",
     "        x = x.view(*size[:2],-1)   # (C,N)\n",
     "        # changed the order of mutiplication to avoid O(N^2) complexity\n",
     "        # (x*xT)*(W*x) instead of (x*(xT*(W*x)))\n",
@@ -410,16 +341,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T09:06:43.209459Z",
-     "start_time": "2020-09-15T09:06:43.195928Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_export\n",
+    "#export\n",
     "class SEBlock(nn.Module):\n",
     "    \"se block\"\n",
     "    se_layer = nn.Linear\n",
@@ -434,7 +360,7 @@
     "            ('se_act', self.act_fn),\n",
     "            ('fc_expand', self.se_layer(ch, c, bias=False)),\n",
     "            ('sigmoid', nn.Sigmoid())\n",
-    "            ]))        \n",
+    "            ]))\n",
     "\n",
     "    def forward(self, x):\n",
     "        bs, c, _, _ = x.shape\n",
@@ -445,28 +371,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T09:07:02.304137Z",
-     "start_time": "2020-09-15T09:07:02.299521Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "#         return torch.mul(x, y.expand_as(x))# "
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T09:07:02.934031Z",
-     "start_time": "2020-09-15T09:07:02.922985Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -482,7 +398,7 @@
        ")"
       ]
      },
-     "execution_count": 23,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -494,13 +410,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T09:07:05.796153Z",
-     "start_time": "2020-09-15T09:07:05.732761Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
@@ -511,7 +422,7 @@
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "bs = 8\n",
     "xb = torch.randn(bs, 128, 32, 32)\n",
     "y = se_block(xb)\n",
@@ -528,16 +439,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T09:07:07.917795Z",
-     "start_time": "2020-09-15T09:07:07.883347Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_export\n",
+    "#export\n",
     "class SEBlockConv(nn.Module):\n",
     "    \"se block with conv on excitation\"\n",
     "    se_layer = nn.Conv2d\n",
@@ -558,18 +464,13 @@
     "    def forward(self, x):\n",
     "        y = self.squeeze(x)\n",
     "        y = self.excitation(y)\n",
-    "        return x * y.expand_as(x)  "
+    "        return x * y.expand_as(x)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T09:07:10.529716Z",
-     "start_time": "2020-09-15T09:07:10.520001Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -585,7 +486,7 @@
        ")"
       ]
      },
-     "execution_count": 26,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -597,13 +498,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T09:07:12.126044Z",
-     "start_time": "2020-09-15T09:07:12.106701Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
@@ -614,7 +510,7 @@
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "bs = 8\n",
     "xb = torch.randn(bs, 128, 32, 32)\n",
     "y = se_block(xb)\n",
@@ -633,13 +529,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-19T07:29:31.169362Z",
-     "start_time": "2020-09-19T07:29:31.163089Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "name": "stderr",
@@ -650,7 +541,7 @@
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "from nbdev.export import *\n",
     "notebook2script()"
    ]
@@ -675,31 +566,6 @@
    "display_name": "Python 3",
    "language": "python",
    "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.7"
-  },
-  "toc": {
-   "base_numbering": 1,
-   "nav_menu": {},
-   "number_sections": true,
-   "sideBar": true,
-   "skip_h1_title": false,
-   "title_cell": "Table of Contents",
-   "title_sidebar": "Contents",
-   "toc_cell": false,
-   "toc_position": {},
-   "toc_section_display": true,
-   "toc_window_display": true
   }
  },
  "nbformat": 4,
diff --git a/nbs/03_MXResNet.ipynb b/nbs/03_MXResNet.ipynb
index c24b1b5..7910e53 100644
--- a/nbs/03_MXResNet.ipynb
+++ b/nbs/03_MXResNet.ipynb
@@ -26,7 +26,7 @@
     }
    ],
    "source": [
-    "%nbdev_default_export mxresnet"
+    "#default_exp mxresnet"
    ]
   },
   {
@@ -51,7 +51,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "# from nbdev.showdoc import *\n",
     "from fastcore.test import *"
    ]
@@ -62,9 +62,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_export\n",
+    "#export\n",
     "import torch.nn as nn\n",
     "import sys, torch\n",
+    "import os\n",
     "from functools import partial\n",
     "from collections import OrderedDict\n",
     "from model_constructor.layers import *\n",
@@ -165,7 +166,7 @@
     }
    ],
    "source": [
-    "%nbdev_collapse_output\n",
+    "#collapse_output\n",
     "mxresnet.stem"
    ]
   },
@@ -183,7 +184,7 @@
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "bs_test = 16\n",
     "xb = torch.randn(bs_test, 3, 128, 128)\n",
     "y = mxresnet.stem(xb)\n",
@@ -344,7 +345,7 @@
     }
    ],
    "source": [
-    "%nbdev_collapse_output\n",
+    "#collapse_output\n",
     "mxresnet.body"
    ]
   },
@@ -501,7 +502,7 @@
     }
    ],
    "source": [
-    "%nbdev_collapse_output\n",
+    "#collapse_output\n",
     "mxresnet.body"
    ]
   },
@@ -526,7 +527,7 @@
     }
    ],
    "source": [
-    "%nbdev_collapse_output\n",
+    "#collapse_output\n",
     "mxresnet.head"
    ]
   },
@@ -544,7 +545,7 @@
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "bs_test = 16\n",
     "xb = torch.randn(bs_test, 3, 128, 128)\n",
     "y = mxresnet()(xb)\n",
@@ -558,8 +559,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_export\n",
-    "%nbdev_export_and_show\n",
+    "#export\n",
     "mxresnet_parameters = {'stem_sizes': [3, 32, 64, 64], 'act_fn': Mish()}\n",
     "mxresnet34  = partial(Net, name='MXResnet32', expansion=1, layers=[3, 4,  6, 3], **mxresnet_parameters)\n",
     "mxresnet50  = partial(Net, name='MXResnet50', expansion=4, layers=[3, 4,  6, 3], **mxresnet_parameters)"
@@ -647,7 +647,7 @@
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "from nbdev.export import *\n",
     "notebook2script()"
    ]
diff --git a/nbs/04_YaResNet.ipynb b/nbs/04_YaResNet.ipynb
index 87a5136..d41879d 100644
--- a/nbs/04_YaResNet.ipynb
+++ b/nbs/04_YaResNet.ipynb
@@ -26,7 +26,7 @@
     }
    ],
    "source": [
-    "%nbdev_default_export yaresnet"
+    "#default_exp yaresnet"
    ]
   },
   {
@@ -44,7 +44,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "# from nbdev.showdoc import *\n",
     "from fastcore.test import *"
    ]
@@ -55,7 +55,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_export\n",
+    "#export\n",
     "import torch.nn as nn\n",
     "import sys, torch\n",
     "from functools import partial\n",
@@ -78,13 +78,13 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_export\n",
+    "#export\n",
     "# YaResBlock - former NewResBlock.\n",
     "# Yet another ResNet.\n",
     "class YaResBlock(nn.Module):\n",
     "    '''YaResBlock. Reduce by pool instead of stride 2'''\n",
     "    se_block = SEBlock\n",
-    "    def __init__(self, expansion, ni, nh, stride=1, \n",
+    "    def __init__(self, expansion, ni, nh, stride=1,\n",
     "                 conv_layer=ConvLayer, act_fn=act_fn, zero_bn=True, bn_1st=True,\n",
     "                 pool=nn.AvgPool2d(2, ceil_mode=True), sa=False,sym=False,  se=False,\n",
     "                 groups=1, dw=False):\n",
@@ -97,7 +97,7 @@
     "                   (f\"conv_1\", conv_layer(nh, nf, 3, zero_bn=zero_bn, act=False, bn_1st=bn_1st))\n",
     "        ] if expansion == 1 else [\n",
     "                   (f\"conv_0\",conv_layer(ni, nh, 1, act_fn=act_fn, bn_1st=bn_1st)),\n",
-    "                   (f\"conv_1\",conv_layer(nh, nh, 3, stride=1, act_fn=act_fn, bn_1st=bn_1st, \n",
+    "                   (f\"conv_1\",conv_layer(nh, nh, 3, stride=1, act_fn=act_fn, bn_1st=bn_1st,\n",
     "                                        groups= nh if dw else groups)), # stride 1 !!!\n",
     "                   (f\"conv_2\",conv_layer(nh, nf, 1, zero_bn=zero_bn, act=False, bn_1st=bn_1st))\n",
     "        ]\n",
@@ -107,7 +107,7 @@
     "        self.idconv = noop if ni==nf else conv_layer(ni, nf, 1, act=False)\n",
     "        self.merge =act_fn\n",
     "\n",
-    "    def forward(self, x): \n",
+    "    def forward(self, x):\n",
     "        o = self.reduce(x)\n",
     "        return self.merge(self.convs(o) + self.idconv(o))"
    ]
@@ -145,7 +145,7 @@
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "bl = YaResBlock(1,64,64,sa=True)\n",
     "bl"
    ]
@@ -164,7 +164,7 @@
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "bs_test = 16\n",
     "xb = torch.randn(bs_test, 64, 32, 32)\n",
     "y = bl(xb)\n",
@@ -211,7 +211,7 @@
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "bl = YaResBlock(1,64,64,se=True)\n",
     "bl"
    ]
@@ -230,7 +230,7 @@
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "bs_test = 16\n",
     "xb = torch.randn(bs_test, 64, 32, 32)\n",
     "y = bl(xb)\n",
@@ -278,7 +278,7 @@
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "bl = YaResBlock(4,64,128,stride=2,act_fn=nn.LeakyReLU(), bn_1st=False)\n",
     "bl"
    ]
@@ -297,7 +297,7 @@
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "bs_test = 16\n",
     "xb = torch.randn(bs_test, 256, 32, 32)\n",
     "y = bl(xb)\n",
@@ -345,7 +345,7 @@
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "bl = YaResBlock(4,64,128,stride=2,act_fn=nn.LeakyReLU(), bn_1st=False, groups=4)\n",
     "bl"
    ]
@@ -364,7 +364,7 @@
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "bs_test = 16\n",
     "xb = torch.randn(bs_test, 256, 32, 32)\n",
     "y = bl(xb)\n",
@@ -412,7 +412,7 @@
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "bl = YaResBlock(4,64,128,stride=2,act_fn=nn.LeakyReLU(), bn_1st=False, dw=True)\n",
     "bl"
    ]
@@ -431,7 +431,7 @@
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "bs_test = 16\n",
     "xb = torch.randn(bs_test, 256, 32, 32)\n",
     "y = bl(xb)\n",
@@ -535,7 +535,7 @@
     }
    ],
    "source": [
-    "%nbdev_collapse_output\n",
+    "#collapse_output\n",
     "yaresnet.stem"
    ]
   },
@@ -553,7 +553,7 @@
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "bs_test = 16\n",
     "xb = torch.randn(bs_test, 3, 128, 128)\n",
     "y = yaresnet.stem(xb)\n",
@@ -714,7 +714,7 @@
     }
    ],
    "source": [
-    "%nbdev_collapse_output\n",
+    "#collapse_output\n",
     "yaresnet.body"
    ]
   },
@@ -739,7 +739,7 @@
     }
    ],
    "source": [
-    "%nbdev_collapse_output\n",
+    "#collapse_output\n",
     "yaresnet.head"
    ]
   },
@@ -943,7 +943,7 @@
     }
    ],
    "source": [
-    "%nbdev_collapse_output\n",
+    "#collapse_output\n",
     "yaresnet.act_fn = Mish()\n",
     "yaresnet()"
    ]
@@ -968,7 +968,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_export\n",
+    "#export\n",
     "yaresnet_parameters = {'block': YaResBlock, 'stem_sizes': [3, 32, 64, 64], 'act_fn': Mish(),\n",
     "                       'stem_sizes': [3,32,64,64], 'stem_stride_on': 1}\n",
     "yaresnet34  = partial(Net, name='YaResnet34', expansion=1, layers=[3, 4,  6, 3], **yaresnet_parameters)\n",
@@ -1044,7 +1044,7 @@
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "bs_test = 16\n",
     "xb = torch.randn(bs_test, 3, 128, 128)\n",
     "y = model()(xb)\n",
@@ -1084,7 +1084,7 @@
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "from nbdev.export import *\n",
     "notebook2script()"
    ]
diff --git a/nbs/05_Twist.ipynb b/nbs/05_Twist.ipynb
index 83a54ca..2dd28aa 100644
--- a/nbs/05_Twist.ipynb
+++ b/nbs/05_Twist.ipynb
@@ -26,7 +26,7 @@
     }
    ],
    "source": [
-    "%nbdev_default_export twist"
+    "#default_exp twist"
    ]
   },
   {
@@ -44,7 +44,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "# from nbdev.showdoc import *\n",
     "from fastcore.test import *"
    ]
@@ -55,7 +55,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_export\n",
+    "#export\n",
     "from functools import partial\n",
     "from collections import OrderedDict\n",
     "from model_constructor.layers import *\n",
@@ -68,7 +68,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_export\n",
+    "#export\n",
     "import sys, torch\n",
     "nn = torch.nn\n",
     "F = torch.nn.functional"
@@ -87,7 +87,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_export\n",
+    "#export\n",
     "class ConvTwist(nn.Module):\n",
     "    '''Replacement for Conv2d (kernelsize 3x3)'''\n",
     "    permute = True\n",
@@ -138,7 +138,7 @@
     "            else:\n",
     "                KK[a*i:a*(i+1),b*(i-1):b*i] = kernel[a*i:a*(i+1)]\n",
     "        return KK\n",
-    "    \n",
+    "\n",
     "    def _conv(self, inpt, kernel=None):\n",
     "        if kernel is None:\n",
     "            kernel = self.conv.weight\n",
@@ -299,8 +299,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_export\n",
-    "class ConvLayerTwist(ConvLayer): # replace Conv2d by Twist \n",
+    "#export\n",
+    "class ConvLayerTwist(ConvLayer): # replace Conv2d by Twist\n",
     "    Conv2d = ConvTwist"
    ]
   },
@@ -664,7 +664,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_export\n",
+    "#export\n",
     "class NewResBlockTwist(nn.Module):\n",
     "    def __init__(self, expansion, ni, nh, stride=1,\n",
     "                 conv_layer=ConvLayer, act_fn=act_fn, bn_1st=True,\n",
@@ -750,7 +750,7 @@
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "bs_test = 16\n",
     "xb = torch.randn(bs_test, 256, 32, 32)\n",
     "y = bl(xb)\n",
@@ -815,7 +815,7 @@
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "bs_test = 16\n",
     "xb = torch.randn(bs_test, 256, 32, 32)\n",
     "y = bl(xb)\n",
@@ -884,7 +884,7 @@
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "bs_test = 16\n",
     "xb = torch.randn(bs_test, 256, 32, 32)\n",
     "y = bl(xb)\n",
@@ -925,7 +925,7 @@
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "bl = NewResBlock(1,64,64,sa=True)\n",
     "bl"
    ]
@@ -944,7 +944,7 @@
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "bs_test = 16\n",
     "xb = torch.randn(bs_test, 64, 32, 32)\n",
     "y = bl(xb)\n",
@@ -992,7 +992,7 @@
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "bl = NewResBlock(4,64,128,stride=2,act_fn=nn.LeakyReLU(), bn_1st=False)\n",
     "bl"
    ]
@@ -1011,7 +1011,7 @@
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "bs_test = 16\n",
     "xb = torch.randn(bs_test, 256, 32, 32)\n",
     "y = bl(xb)\n",
@@ -1032,9 +1032,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_export\n",
+    "#export\n",
     "class ResBlockTwist(nn.Module):\n",
-    "    def __init__(self, expansion, ni, nh, stride=1, \n",
+    "    def __init__(self, expansion, ni, nh, stride=1,\n",
     "                 conv_layer=ConvLayer, act_fn=act_fn, zero_bn=True, bn_1st=True,\n",
     "                 pool=nn.AvgPool2d(2, ceil_mode=True), sa=False,sym=False, **kvargs):\n",
     "        super().__init__()\n",
@@ -1116,7 +1116,7 @@
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "bs_test = 16\n",
     "xb = torch.randn(bs_test, 256, 32, 32)\n",
     "y = bl(xb)\n",
@@ -1181,7 +1181,7 @@
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "bs_test = 16\n",
     "xb = torch.randn(bs_test, 256, 32, 32)\n",
     "y = bl(xb)\n",
@@ -1250,7 +1250,7 @@
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "bs_test = 16\n",
     "xb = torch.randn(bs_test, 256, 32, 32)\n",
     "y = bl(xb)\n",
@@ -1697,7 +1697,7 @@
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "bs_test = 16\n",
     "xb = torch.randn(bs_test, 3, 128, 128)\n",
     "y = model.stem(xb)\n",
@@ -1719,7 +1719,7 @@
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "bs_test = 16\n",
     "xb = torch.randn(bs_test, 64, 32, 32)\n",
     "y = model.body.l_0(xb)\n",
@@ -1750,7 +1750,7 @@
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "bs_test = 16\n",
     "xb = torch.randn(bs_test, 64, 32, 32)\n",
     "y = model.body.l_0(xb)\n",
@@ -2190,7 +2190,7 @@
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "m"
    ]
   },
@@ -2208,7 +2208,7 @@
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "bs_test = 16\n",
     "xb = torch.randn(bs_test, 3, 128, 128)\n",
     "y = m(xb)\n",
@@ -2761,7 +2761,7 @@
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "from nbdev.export import *\n",
     "notebook2script()"
    ]
diff --git a/nbs/10_base_constructor.ipynb b/nbs/10_base_constructor.ipynb
index 9172408..c85ee82 100644
--- a/nbs/10_base_constructor.ipynb
+++ b/nbs/10_base_constructor.ipynb
@@ -2,12 +2,8 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T07:44:19.736730Z",
-     "start_time": "2020-09-15T07:44:19.154933Z"
-    },
     "hide_input": true
    },
    "outputs": [],
@@ -17,13 +13,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T07:44:20.901006Z",
-     "start_time": "2020-09-15T07:44:20.888522Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
@@ -35,7 +26,7 @@
     }
    ],
    "source": [
-    "%nbdev_default_export base_constructor"
+    "#default_exp base_constructor"
    ]
   },
   {
@@ -49,32 +40,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T07:44:43.817966Z",
-     "start_time": "2020-09-15T07:44:43.812740Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "from nbdev.showdoc import *\n",
     "from fastcore.test import *"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T07:47:53.716094Z",
-     "start_time": "2020-09-15T07:47:53.712373Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_export\n",
+    "#export\n",
     "import torch.nn as nn\n",
     "import torch\n",
     "from collections import OrderedDict\n",
@@ -83,16 +64,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T07:46:31.796210Z",
-     "start_time": "2020-09-15T07:46:31.790752Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_export\n",
+    "#export\n",
     "act_fn = nn.ReLU(inplace=True)"
    ]
   },
@@ -105,30 +81,25 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T07:44:48.264407Z",
-     "start_time": "2020-09-15T07:44:48.221947Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_export\n",
+    "#export\n",
     "class Stem(nn.Sequential):\n",
     "    \"\"\"Base stem\"\"\"\n",
-    "    def __init__(self, c_in=3,  stem_sizes=[], stem_out=64, \n",
-    "            conv_layer=ConvLayer, stride_on=0,    \n",
-    "            stem_bn_last=False, bn_1st=True,   \n",
-    "            stem_use_pool=True, stem_pool=nn.MaxPool2d(kernel_size=3, stride=2, padding=1), **kwargs): \n",
+    "    def __init__(self, c_in=3,  stem_sizes=[], stem_out=64,\n",
+    "            conv_layer=ConvLayer, stride_on=0,\n",
+    "            stem_bn_last=False, bn_1st=True,\n",
+    "            stem_use_pool=True, stem_pool=nn.MaxPool2d(kernel_size=3, stride=2, padding=1), **kwargs):\n",
     "        self.sizes = [c_in] + stem_sizes + [stem_out]\n",
     "        num_layers = len(self.sizes)-1\n",
-    "        stem = [(f\"conv_{i}\", conv_layer(self.sizes[i], self.sizes[i+1], \n",
+    "        stem = [(f\"conv_{i}\", conv_layer(self.sizes[i], self.sizes[i+1],\n",
     "                stride=2 if i==stride_on else 1, act=True,\n",
-    "                bn_layer=not stem_bn_last if i==num_layers-1 else True, \n",
+    "                bn_layer=not stem_bn_last if i==num_layers-1 else True,\n",
     "                bn_1st=bn_1st, **kwargs))\n",
     "                    for i in range(num_layers)]\n",
-    "        if stem_use_pool: stem += [('pool', stem_pool)] \n",
+    "        if stem_use_pool: stem += [('pool', stem_pool)]\n",
     "        if stem_bn_last: stem.append(('bn', nn.BatchNorm2d(stem_out)))\n",
     "        super().__init__(OrderedDict(stem))\n",
     "    def extra_repr(self):\n",
@@ -137,13 +108,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T07:44:48.987592Z",
-     "start_time": "2020-09-15T07:44:48.965376Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -159,7 +125,7 @@
        ")"
       ]
      },
-     "execution_count": 6,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -171,13 +137,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T07:44:51.733455Z",
-     "start_time": "2020-09-15T07:44:51.703154Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -185,7 +146,7 @@
        "torch.Size([8, 64, 32, 32])"
       ]
      },
-     "execution_count": 7,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -198,13 +159,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T07:44:52.270167Z",
-     "start_time": "2020-09-15T07:44:52.254111Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -220,7 +176,7 @@
        ")"
       ]
      },
-     "execution_count": 8,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -232,13 +188,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T07:44:53.128168Z",
-     "start_time": "2020-09-15T07:44:53.022016Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -246,7 +197,7 @@
        "torch.Size([8, 64, 32, 32])"
       ]
      },
-     "execution_count": 9,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -259,13 +210,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T07:44:53.838929Z",
-     "start_time": "2020-09-15T07:44:53.829075Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -286,7 +232,7 @@
        ")"
       ]
      },
-     "execution_count": 10,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -298,13 +244,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T07:44:54.773691Z",
-     "start_time": "2020-09-15T07:44:54.716307Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -312,7 +253,7 @@
        "torch.Size([8, 64, 32, 32])"
       ]
      },
-     "execution_count": 11,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -325,13 +266,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T07:44:55.640343Z",
-     "start_time": "2020-09-15T07:44:55.637028Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
     "assert y.shape, torch.Size([64, 64, 32, 32])"
@@ -339,13 +275,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T07:44:58.194349Z",
-     "start_time": "2020-09-15T07:44:58.173191Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -366,7 +297,7 @@
        ")"
       ]
      },
-     "execution_count": 13,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -392,16 +323,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T07:47:02.853520Z",
-     "start_time": "2020-09-15T07:47:02.843712Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_export\n",
+    "#export\n",
     "def DownsampleBlock(conv_layer, ni, nf, ks, stride, act=False, **kwargs):\n",
     "    '''Base downsample for res-like blocks'''\n",
     "    return conv_layer(ni, nf, ks, stride, act, **kwargs)"
@@ -409,16 +335,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T07:47:03.166537Z",
-     "start_time": "2020-09-15T07:47:03.157366Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_export\n",
+    "#export\n",
     "class BasicBlock(nn.Module):\n",
     "    \"\"\"Basic block (simplified) as in pytorch resnet\"\"\"\n",
     "    def __init__(self, ni, nf,  expansion=1, stride=1, zero_bn=False,\n",
@@ -439,7 +360,7 @@
     "        out = self.conv(x)\n",
     "        if self.downsample:\n",
     "            identity = self.downsample(x)\n",
-    "        return self.act_conn(self.merge(out + identity))\n"
+    "        return self.act_conn(self.merge(out + identity))"
    ]
   },
   {
@@ -448,7 +369,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_export\n",
+    "#export\n",
     "class Bottleneck(nn.Module):\n",
     "    '''Bottlneck block for resnet models'''\n",
     "    def __init__(self, ni, nh, expansion=4, stride=1, zero_bn=False,\n",
@@ -479,15 +400,10 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T07:47:10.025598Z",
-     "start_time": "2020-09-15T07:47:10.009381Z"
-    }
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_export\n",
+    "#export\n",
     "class BasicLayer(nn.Sequential):\n",
     "    '''Layer from blocks'''\n",
     "    def __init__(self, block, blocks, ni, nf, expansion, stride, sa=False,**kwargs):\n",
@@ -496,13 +412,13 @@
     "        self.blocks = blocks\n",
     "        self.expansion = expansion\n",
     "        super().__init__(OrderedDict(\n",
-    "            [(f'block_{i}', block(ni if i==0 else nf, nf, expansion, \n",
+    "            [(f'block_{i}', block(ni if i==0 else nf, nf, expansion,\n",
     "                            stride if i==0 else 1,\n",
-    "                            sa=sa if i==blocks-1 else False, \n",
+    "                            sa=sa if i==blocks-1 else False,\n",
     "                                  **kwargs))\n",
     "              for i in range(blocks)]))\n",
     "    def extra_repr(self):\n",
-    "        return f'from {self.ni*self.expansion} to {self.nf}, {self.blocks} blocks, expansion {self.expansion}.'                    "
+    "        return f'from {self.ni*self.expansion} to {self.nf}, {self.blocks} blocks, expansion {self.expansion}.'"
    ]
   },
   {
@@ -514,44 +430,33 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T07:47:10.025598Z",
-     "start_time": "2020-09-15T07:47:10.009381Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_export\n",
-    "class Body(nn.Sequential): \n",
+    "#export\n",
+    "class Body(nn.Sequential):\n",
     "    '''Constructor for body'''\n",
-    "    def __init__(self, block, \n",
-    "                 body_in=64, body_out=512,  \n",
+    "    def __init__(self, block,\n",
+    "                 body_in=64, body_out=512,\n",
     "                 bodylayer=BasicLayer, expansion=1,\n",
     "                 layer_szs=[64,128,256,], blocks=[2,2,2,2],\n",
-    "                 sa=False, **kwargs):  \n",
+    "                 sa=False, **kwargs):\n",
     "        layer_szs = [body_in//expansion] + layer_szs + [body_out]\n",
     "        num_layers = len(layer_szs)-1\n",
-    "        layers = [(f\"layer_{i}\", bodylayer(block, blocks[i], \n",
+    "        layers = [(f\"layer_{i}\", bodylayer(block, blocks[i],\n",
     "                            layer_szs[i], layer_szs[i+1], expansion,\n",
-    "                            1 if i==0 else 2,  \n",
-    "                            sa=sa if i==0 else False,  \n",
+    "                            1 if i==0 else 2,\n",
+    "                            sa=sa if i==0 else False,\n",
     "                                           **kwargs))\n",
     "                    for i in range(num_layers)]\n",
-    "        super().__init__(OrderedDict(layers))\n",
-    "    "
+    "        super().__init__(OrderedDict(layers))"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T07:47:32.232273Z",
-     "start_time": "2020-09-15T07:47:32.159289Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -704,7 +609,7 @@
        ")"
       ]
      },
-     "execution_count": 24,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -716,13 +621,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T07:47:33.801922Z",
-     "start_time": "2020-09-15T07:47:33.684159Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -875,26 +775,21 @@
        ")"
       ]
      },
-     "execution_count": 25,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "body = Body(BasicBlock, bn_1st=False)\n",
     "body"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T07:47:34.729432Z",
-     "start_time": "2020-09-15T07:47:34.620483Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
@@ -914,13 +809,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T07:47:35.655255Z",
-     "start_time": "2020-09-15T07:47:35.575441Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -960,26 +850,21 @@
        ")"
       ]
      },
-     "execution_count": 27,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "body = Body(BasicBlock, sa=True)\n",
     "body.layer_0"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T07:47:36.664623Z",
-     "start_time": "2020-09-15T07:47:36.519727Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
@@ -990,7 +875,7 @@
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "bs = 8\n",
     "xb = torch.randn(bs, 64, 32, 32)\n",
     "y = body(xb)\n",
@@ -1007,16 +892,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T07:47:39.115248Z",
-     "start_time": "2020-09-15T07:47:39.107197Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_export\n",
+    "#export\n",
     "class Head(nn.Sequential):\n",
     "    '''base head'''\n",
     "    def __init__(self, ni, nf, **kwargs):\n",
@@ -1029,13 +909,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T07:47:58.484497Z",
-     "start_time": "2020-09-15T07:47:58.480454Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
     "head = Head(512, 10)"
@@ -1043,13 +918,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 33,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T07:47:59.217773Z",
-     "start_time": "2020-09-15T07:47:59.207843Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -1057,7 +927,7 @@
        "torch.Size([8, 10])"
       ]
      },
-     "execution_count": 33,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1085,16 +955,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 34,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T07:48:01.277470Z",
-     "start_time": "2020-09-15T07:48:01.270200Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_export\n",
+    "#export\n",
     "def init_model(model, nonlinearity='leaky_relu'):\n",
     "    '''Init model'''\n",
     "    for m in model.modules():\n",
@@ -1105,30 +970,25 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 35,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T07:48:03.022644Z",
-     "start_time": "2020-09-15T07:48:02.983887Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_export\n",
+    "#export\n",
     "class Net(nn.Sequential):\n",
     "    '''Constructor for model'''\n",
     "    def __init__(self,  stem=Stem,\n",
     "                 body=Body, block=BasicBlock, sa=False,\n",
     "                 layer_szs=[64,128,256,], blocks=[2,2,2,2],\n",
-    "                 head=Head, \n",
-    "                 c_in=3,  num_classes=1000, \n",
+    "                 head=Head,\n",
+    "                 c_in=3,  num_classes=1000,\n",
     "                 body_in=64, body_out=512, expansion=1,\n",
     "                init_fn=init_model, **kwargs):\n",
     "        self.init_model=init_fn\n",
     "        super().__init__(OrderedDict([\n",
     "            ('stem', stem(c_in=c_in,stem_out=body_in, **kwargs)),\n",
-    "            ('body', body(block, body_in, body_out, \n",
-    "                        layer_szs=layer_szs, blocks=blocks, expansion=expansion, \n",
+    "            ('body', body(block, body_in, body_out,\n",
+    "                        layer_szs=layer_szs, blocks=blocks, expansion=expansion,\n",
     "                        sa=sa, **kwargs)),\n",
     "            ('head', head(body_out*expansion, num_classes, **kwargs))\n",
     "            ]))\n",
@@ -1137,13 +997,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 36,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T07:48:04.552204Z",
-     "start_time": "2020-09-15T07:48:04.392342Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
     "model = Net()"
@@ -1151,13 +1006,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 37,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T07:48:05.372416Z",
-     "start_time": "2020-09-15T07:48:05.367071Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -1326,7 +1176,7 @@
        ")"
       ]
      },
-     "execution_count": 37,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1337,13 +1187,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 38,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T07:48:06.841792Z",
-     "start_time": "2020-09-15T07:48:06.575545Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -1351,7 +1196,7 @@
        "torch.Size([16, 1000])"
       ]
      },
-     "execution_count": 38,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1365,13 +1210,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 39,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T07:48:07.771461Z",
-     "start_time": "2020-09-15T07:48:07.599293Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -1411,26 +1251,21 @@
        ")"
       ]
      },
-     "execution_count": 39,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "model = Net(bn_1st=False)\n",
     "model.body.layer_0"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 40,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T07:48:08.706261Z",
-     "start_time": "2020-09-15T07:48:08.567145Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
@@ -1441,7 +1276,7 @@
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "bs_test = 8\n",
     "xb = torch.randn(bs_test, 3, 128, 128)\n",
     "y = model(xb)\n",
@@ -1451,13 +1286,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 41,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T07:48:09.591909Z",
-     "start_time": "2020-09-15T07:48:09.433365Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -1626,26 +1456,21 @@
        ")"
       ]
      },
-     "execution_count": 41,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "model = Net(sa=True)\n",
     "model"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 42,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T07:48:10.888883Z",
-     "start_time": "2020-09-15T07:48:10.772022Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
@@ -1656,7 +1481,7 @@
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "bs_test = 8\n",
     "xb = torch.randn(bs_test, 3, 128, 128)\n",
     "y = model(xb)\n",
@@ -1674,13 +1499,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 44,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T07:48:53.788843Z",
-     "start_time": "2020-09-15T07:48:53.151412Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
@@ -1700,7 +1520,7 @@
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "from nbdev.export import *\n",
     "notebook2script()"
    ]
@@ -1732,31 +1552,6 @@
    "display_name": "Python 3",
    "language": "python",
    "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.7"
-  },
-  "toc": {
-   "base_numbering": 1,
-   "nav_menu": {},
-   "number_sections": true,
-   "sideBar": true,
-   "skip_h1_title": false,
-   "title_cell": "Table of Contents",
-   "title_sidebar": "Contents",
-   "toc_cell": false,
-   "toc_position": {},
-   "toc_section_display": true,
-   "toc_window_display": true
   }
  },
  "nbformat": 4,
diff --git a/nbs/11_xresnet.ipynb b/nbs/11_xresnet.ipynb
index 806b913..6a1a5cd 100644
--- a/nbs/11_xresnet.ipynb
+++ b/nbs/11_xresnet.ipynb
@@ -2,12 +2,8 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T07:58:46.593635Z",
-     "start_time": "2020-09-15T07:58:45.999276Z"
-    },
     "hide_input": true
    },
    "outputs": [],
@@ -17,13 +13,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T07:58:46.598475Z",
-     "start_time": "2020-09-15T07:58:46.595162Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
@@ -35,7 +26,7 @@
     }
    ],
    "source": [
-    "%nbdev_default_export xresnet"
+    "#default_exp xresnet"
    ]
   },
   {
@@ -49,32 +40,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T07:58:46.651217Z",
-     "start_time": "2020-09-15T07:58:46.647967Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "from nbdev.showdoc import *\n",
     "from fastcore.test import *"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T07:58:47.536442Z",
-     "start_time": "2020-09-15T07:58:47.235892Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_export\n",
+    "#export\n",
     "import torch.nn as nn\n",
     "import torch\n",
     "from collections import OrderedDict"
@@ -82,16 +63,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T07:59:50.981128Z",
-     "start_time": "2020-09-15T07:59:50.975818Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_export\n",
+    "#export\n",
     "from model_constructor.base_constructor import *\n",
     "from model_constructor.layers import ConvLayer, Noop, act_fn"
    ]
@@ -105,16 +81,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T08:00:33.445205Z",
-     "start_time": "2020-09-15T08:00:33.438414Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_export\n",
+    "#export\n",
     "class DownsampleLayer(nn.Sequential):\n",
     "    \"\"\"Downsample layer for Xresnet Resblock\"\"\"\n",
     "    def __init__(self, conv_layer, ni, nf, stride, act,\n",
@@ -128,16 +99,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T07:59:52.802349Z",
-     "start_time": "2020-09-15T07:59:52.767093Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_export\n",
+    "#export\n",
     "class XResBlock(nn.Module):\n",
     "    def __init__(self, ni, nh, expansion=1, stride=1, zero_bn=True,\n",
     "                 conv_layer=ConvLayer, act_fn=act_fn, **kwargs):\n",
@@ -160,14 +126,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T07:59:53.363415Z",
-     "start_time": "2020-09-15T07:59:53.262651Z"
-    },
-    "scrolled": true
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -379,7 +339,7 @@
        ")"
       ]
      },
-     "execution_count": 13,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -391,13 +351,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T07:59:54.659184Z",
-     "start_time": "2020-09-15T07:59:54.243419Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -405,7 +360,7 @@
        "torch.Size([16, 2048, 4, 4])"
       ]
      },
-     "execution_count": 14,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -425,16 +380,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T07:59:57.404159Z",
-     "start_time": "2020-09-15T07:59:57.394665Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_export\n",
+    "#export\n",
     "def xresnet18(**kwargs):\n",
     "    \"\"\"Constructs a xresnet-18 model. \"\"\"\n",
     "    return Net(stem_sizes=[32,32], block=XResBlock, blocks=[2, 2, 2, 2], expansion=1, **kwargs)\n",
@@ -448,13 +398,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T07:59:57.999165Z",
-     "start_time": "2020-09-15T07:59:57.797163Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -647,7 +592,7 @@
        ")"
       ]
      },
-     "execution_count": 16,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -658,13 +603,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T07:59:59.209879Z",
-     "start_time": "2020-09-15T07:59:58.860378Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -985,7 +925,7 @@
        ")"
       ]
      },
-     "execution_count": 17,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -996,13 +936,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T08:00:00.927778Z",
-     "start_time": "2020-09-15T08:00:00.557930Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
     "model = xresnet50()"
@@ -1010,13 +945,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T08:00:01.052196Z",
-     "start_time": "2020-09-15T08:00:01.040363Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -1042,7 +972,7 @@
        ")"
       ]
      },
-     "execution_count": 19,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1053,13 +983,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T08:00:01.914108Z",
-     "start_time": "2020-09-15T08:00:01.884870Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -1439,7 +1364,7 @@
        ")"
       ]
      },
-     "execution_count": 20,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1450,13 +1375,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T08:00:02.593246Z",
-     "start_time": "2020-09-15T08:00:02.587983Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -1468,7 +1388,7 @@
        ")"
       ]
      },
-     "execution_count": 21,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1479,13 +1399,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-09-15T08:00:03.624497Z",
-     "start_time": "2020-09-15T08:00:03.210627Z"
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -1493,7 +1408,7 @@
        "torch.Size([8, 1000])"
       ]
      },
-     "execution_count": 22,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1532,7 +1447,7 @@
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "from nbdev.export import *\n",
     "notebook2script()"
    ]
@@ -1550,36 +1465,6 @@
    "display_name": "Python 3",
    "language": "python",
    "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.7"
-  },
-  "toc": {
-   "base_numbering": 1,
-   "nav_menu": {},
-   "number_sections": true,
-   "sideBar": true,
-   "skip_h1_title": false,
-   "title_cell": "Table of Contents",
-   "title_sidebar": "Contents",
-   "toc_cell": false,
-   "toc_position": {
-    "height": "calc(100% - 180px)",
-    "left": "10px",
-    "top": "150px",
-    "width": "215.323px"
-   },
-   "toc_section_display": true,
-   "toc_window_display": true
   }
  },
  "nbformat": 4,
diff --git a/nbs/index.ipynb b/nbs/index.ipynb
index fd4eab7..3b2fa6d 100644
--- a/nbs/index.ipynb
+++ b/nbs/index.ipynb
@@ -17,7 +17,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "%reload_ext autoreload\n",
     "%autoreload 2"
    ]
@@ -28,7 +28,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "from functools import partial\n",
     "import torch"
    ]
@@ -39,7 +39,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "from model_constructor.layers import *"
    ]
   },
@@ -441,7 +441,7 @@
     }
    ],
    "source": [
-    "%nbdev_collapse_output\n",
+    "#collapse_output\n",
     "model()"
    ]
   },
@@ -819,7 +819,7 @@
     }
    ],
    "source": [
-    "%nbdev_collapse_output\n",
+    "#collapse_output\n",
     "model.body"
    ]
   },
@@ -837,7 +837,7 @@
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "bs_test = 16\n",
     "xb = torch.randn(bs_test, 3, 128, 128)\n",
     "y = model()(xb)\n",
@@ -862,7 +862,7 @@
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "model.block_sizes"
    ]
   },
@@ -1147,7 +1147,7 @@
     }
    ],
    "source": [
-    "%nbdev_collapse_output\n",
+    "#collapse_output\n",
     "model()"
    ]
   },
@@ -1232,7 +1232,7 @@
     }
    ],
    "source": [
-    "%nbdev_collapse_output\n",
+    "#collapse_output\n",
     "model.stem.conv_1"
    ]
   },
@@ -1275,7 +1275,7 @@
     }
    ],
    "source": [
-    "%nbdev_collapse_output\n",
+    "#collapse_output\n",
     "model.body.l_0.bl_0"
    ]
   },
@@ -1293,7 +1293,7 @@
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "bs_test = 16\n",
     "xb = torch.randn(bs_test, 3, 128, 128)\n",
     "y = model()(xb)\n",
@@ -1363,7 +1363,7 @@
     }
    ],
    "source": [
-    "%nbdev_collapse_output\n",
+    "#collapse_output\n",
     "model.name = 'YaResNet'\n",
     "model"
    ]
@@ -1415,7 +1415,7 @@
     }
    ],
    "source": [
-    "%nbdev_collapse_output\n",
+    "#collapse_output\n",
     "model.body.l_1.bl_0"
    ]
   },
@@ -1433,7 +1433,7 @@
     }
    ],
    "source": [
-    "%nbdev_hide\n",
+    "#hide\n",
     "bs_test = 16\n",
     "xb = torch.randn(bs_test, 3, 128, 128)\n",
     "y = model()(xb)\n",
@@ -1461,31 +1461,6 @@
    "display_name": "Python 3",
    "language": "python",
    "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.7"
-  },
-  "toc": {
-   "base_numbering": 1,
-   "nav_menu": {},
-   "number_sections": true,
-   "sideBar": true,
-   "skip_h1_title": false,
-   "title_cell": "Table of Contents",
-   "title_sidebar": "Contents",
-   "toc_cell": false,
-   "toc_position": {},
-   "toc_section_display": true,
-   "toc_window_display": true
   }
  },
  "nbformat": 4,

From 1c0861aa890372e6f8baf881f79db83bccd79388 Mon Sep 17 00:00:00 2001
From: ayasyrev <a.yasyrev@gmail.com>
Date: Thu, 12 Nov 2020 07:01:25 +0000
Subject: [PATCH 2/3] cleared code

---
 model_constructor/_nbdev.py           |  19 -
 model_constructor/activations.py      | 171 +--------
 model_constructor/base_constructor.py | 127 ++++---
 model_constructor/layers.py           |  81 ++--
 model_constructor/mxresnet.py         |   9 +-
 model_constructor/net.py              | 210 +++++-----
 nbs/00_Net.ipynb                      | 238 +++++++-----
 nbs/01_activations.ipynb              | 420 +++++---------------
 nbs/01_layers.ipynb                   |  81 ++--
 nbs/03_MXResNet.ipynb                 | 252 ++++++++----
 nbs/10_base_constructor.ipynb         | 527 +++++++++++++++++---------
 setup.cfg                             |   9 +
 12 files changed, 1060 insertions(+), 1084 deletions(-)
 create mode 100644 setup.cfg

diff --git a/model_constructor/_nbdev.py b/model_constructor/_nbdev.py
index 22fca4a..127fa0c 100644
--- a/model_constructor/_nbdev.py
+++ b/model_constructor/_nbdev.py
@@ -9,14 +9,6 @@
          "Net": "10_base_constructor.ipynb",
          "xresnet34": "11_xresnet.ipynb",
          "xresnet50": "11_xresnet.ipynb",
-         "swish": "01_activations.ipynb",
-         "Swish": "01_activations.ipynb",
-         "swish_jit": "01_activations.ipynb",
-         "SwishJit": "01_activations.ipynb",
-         "swish_jit_bwd": "01_activations.ipynb",
-         "SwishJitAutoFn": "01_activations.ipynb",
-         "swish_me": "01_activations.ipynb",
-         "SwishMe": "01_activations.ipynb",
          "mish": "01_activations.ipynb",
          "Mish": "01_activations.ipynb",
          "mish_jit": "01_activations.ipynb",
@@ -26,17 +18,6 @@
          "MishJitAutoFn": "01_activations.ipynb",
          "mish_me": "01_activations.ipynb",
          "MishMe": "01_activations.ipynb",
-         "hard_swish": "01_activations.ipynb",
-         "HardSwish": "01_activations.ipynb",
-         "hard_swish_jit": "01_activations.ipynb",
-         "HardSwishJit": "01_activations.ipynb",
-         "hard_swish_jit_fwd": "01_activations.ipynb",
-         "hard_swish_jit_bwd": "01_activations.ipynb",
-         "HardSwishJitAutoFn": "01_activations.ipynb",
-         "hard_swish_me": "01_activations.ipynb",
-         "HardSwishMe": "01_activations.ipynb",
-         "hard_mish": "01_activations.ipynb",
-         "HardMish": "01_activations.ipynb",
          "hard_mish_jit": "01_activations.ipynb",
          "HardMishJit": "01_activations.ipynb",
          "hard_mish_jit_fwd": "01_activations.ipynb",
diff --git a/model_constructor/activations.py b/model_constructor/activations.py
index a8cf539..19aabaf 100644
--- a/model_constructor/activations.py
+++ b/model_constructor/activations.py
@@ -1,10 +1,8 @@
 # AUTOGENERATED! DO NOT EDIT! File to edit: nbs/01_activations.ipynb (unless otherwise specified).
 
-__all__ = ['swish', 'Swish', 'swish_jit', 'SwishJit', 'swish_jit_bwd', 'SwishJitAutoFn', 'swish_me', 'SwishMe', 'mish',
-           'Mish', 'mish_jit', 'MishJit', 'mish_jit_fwd', 'mish_jit_bwd', 'MishJitAutoFn', 'mish_me', 'MishMe',
-           'hard_swish', 'HardSwish', 'hard_swish_jit', 'HardSwishJit', 'hard_swish_jit_fwd', 'hard_swish_jit_bwd',
-           'HardSwishJitAutoFn', 'hard_swish_me', 'HardSwishMe', 'hard_mish', 'HardMish', 'hard_mish_jit',
-           'HardMishJit', 'hard_mish_jit_fwd', 'hard_mish_jit_bwd', 'HardMishJitAutoFn', 'hard_mish_me', 'HardMishMe']
+__all__ = ['mish', 'Mish', 'mish_jit', 'MishJit', 'mish_jit_fwd', 'mish_jit_bwd', 'MishJitAutoFn', 'mish_me', 'MishMe',
+           'hard_mish_jit', 'HardMishJit', 'hard_mish_jit_fwd', 'hard_mish_jit_bwd', 'HardMishJitAutoFn',
+           'hard_mish_me', 'HardMishMe']
 
 # Cell
 # forked from https://github.com/rwightman/pytorch-image-models/timm/models/layers/activations.py
@@ -12,73 +10,6 @@
 from torch import nn as nn
 from torch.nn import functional as F
 
-# Cell
-def swish(x, inplace: bool = False):
-    """Swish - Described in: https://arxiv.org/abs/1710.05941"""
-    return x.mul_(x.sigmoid()) if inplace else x.mul(x.sigmoid())
-
-class Swish(nn.Module):
-    """Swish - Described in: https://arxiv.org/abs/1710.05941"""
-    def __init__(self, inplace: bool = False):
-        super(Swish, self).__init__()
-        self.inplace = inplace
-
-    def forward(self, x):
-        return swish(x, self.inplace)
-
-# Cell
-@torch.jit.script
-def swish_jit(x, inplace: bool = False):
-    """Jit version of Swish.
-    Swish- Described in: https://arxiv.org/abs/1710.05941
-    """
-    return x.mul(x.sigmoid())
-
-class SwishJit(nn.Module):
-    """Jit version of Swish.
-    Swish - Described in: https://arxiv.org/abs/1710.05941"""
-    def __init__(self, inplace: bool = False):
-        super(SwishJit, self).__init__()
-
-    def forward(self, x):
-        return swish_jit(x)
-
-# Cell
-
-@torch.jit.script
-def swish_jit_bwd(x, grad_output):
-    x_sigmoid = torch.sigmoid(x)
-    return grad_output * (x_sigmoid * (1 + x * (1 - x_sigmoid)))
-
-
-class SwishJitAutoFn(torch.autograd.Function):
-    """ torch.jit.script optimised Swish w/ memory-efficient checkpoint
-    Inspired by conversation btw Jeremy Howard & Adam Pazske
-    https://twitter.com/jeremyphoward/status/1188251041835315200
-    """
-
-    @staticmethod
-    def forward(ctx, x):
-        ctx.save_for_backward(x)
-        return swish_jit_fwd(x)
-
-    @staticmethod
-    def backward(ctx, grad_output):
-        x = ctx.saved_tensors[0]
-        return swish_jit_bwd(x, grad_output)
-
-
-def swish_me(x, inplace=False):
-    return SwishJitAutoFn.apply(x)
-
-
-class SwishMe(nn.Module):
-    def __init__(self, inplace: bool = False):
-        super(SwishMe, self).__init__()
-
-    def forward(self, x):
-        return SwishJitAutoFn.apply(x)
-
 # Cell
 def mish(x, inplace: bool = False):
     """Mish: A Self Regularized Non-Monotonic Neural Activation Function - https://arxiv.org/abs/1908.08681
@@ -104,6 +35,7 @@ def mish_jit(x, _inplace: bool = False):
     """
     return x.mul(F.softplus(x).tanh())
 
+
 class MishJit(nn.Module):
     def __init__(self, inplace: bool = False):
         """Jit version of Mish.
@@ -114,10 +46,9 @@ def forward(self, x):
         return mish_jit(x)
 
 # Cell
-
 @torch.jit.script
 def mish_jit_fwd(x):
-#     return x.mul(torch.tanh(F.softplus(x)))
+    # return x.mul(torch.tanh(F.softplus(x)))
     return x.mul(F.softplus(x).tanh())
 
 
@@ -155,98 +86,6 @@ def __init__(self, inplace: bool = False):
     def forward(self, x):
         return MishJitAutoFn.apply(x)
 
-# Cell
-def hard_swish(x, inplace: bool = False):
-    """Hard swish activation function"""
-    inner = F.relu6(x + 3.).div_(6.)
-    return x.mul_(inner) if inplace else x.mul(inner)
-
-
-class HardSwish(nn.Module):
-    """Hard swish activation function"""
-    def __init__(self, inplace: bool = False):
-        super(HardSwish, self).__init__()
-        self.inplace = inplace
-
-    def forward(self, x):
-        return hard_swish(x, self.inplace)
-
-# Cell
-@torch.jit.script
-def hard_swish_jit(x, inplace: bool = False):
-    # return x * (F.relu6(x + 3.) / 6)
-    return x * (x + 3).clamp(min=0, max=6).div(6.)  # clamp seems ever so slightly faster?
-
-
-class HardSwishJit(nn.Module):
-    def __init__(self, inplace: bool = False):
-        super(HardSwishJit, self).__init__()
-
-    def forward(self, x):
-        return hard_swish_jit(x)
-
-# Cell
-
-@torch.jit.script
-def hard_swish_jit_fwd(x):
-    return x * (x + 3).clamp(min=0, max=6).div(6.)
-
-
-@torch.jit.script
-def hard_swish_jit_bwd(x, grad_output):
-    m = torch.ones_like(x) * (x >= 3.)
-    m = torch.where((x >= -3.) & (x <= 3.),  x / 3. + .5, m)
-    return grad_output * m
-
-
-class HardSwishJitAutoFn(torch.autograd.Function):
-    """A memory efficient, jit-scripted HardSwish activation"""
-    @staticmethod
-    def forward(ctx, x):
-        ctx.save_for_backward(x)
-        return hard_swish_jit_fwd(x)
-
-    @staticmethod
-    def backward(ctx, grad_output):
-        x = ctx.saved_tensors[0]
-        return hard_swish_jit_bwd(x, grad_output)
-
-
-def hard_swish_me(x, inplace=False):
-    """A memory efficient, jit-scripted HardSwish activation"""
-    return HardSwishJitAutoFn.apply(x)
-
-
-class HardSwishMe(nn.Module):
-    """A memory efficient, jit-scripted HardSwish activation"""
-    def __init__(self, inplace: bool = False):
-        super(HardSwishMe, self).__init__()
-
-    def forward(self, x):
-        return HardSwishJitAutoFn.apply(x)
-
-# Cell
-def hard_mish(x, inplace: bool = False):
-    """ Hard Mish
-    Experimental, based on notes by Mish author Diganta Misra at
-      https://github.com/digantamisra98/H-Mish/blob/0da20d4bc58e696b6803f2523c58d3c8a82782d0/README.md
-    """
-    if inplace:
-        return x.mul_(0.5 * (x + 2).clamp(min=0, max=2))
-    else:
-        return 0.5 * x * (x + 2).clamp(min=0, max=2)
-
-
-class HardMish(nn.Module):
-    """Hard Mish, Experimental, based on notes by Mish author Diganta Misra at
-      https://github.com/digantamisra98/H-Mish/blob/0da20d4bc58e696b6803f2523c58d3c8a82782d0/README.md"""
-    def __init__(self, inplace: bool = False):
-        super(HardMish, self).__init__()
-        self.inplace = inplace
-
-    def forward(self, x):
-        return hard_mish(x, self.inplace)
-
 # Cell
 @torch.jit.script
 def hard_mish_jit(x, inplace: bool = False):
diff --git a/model_constructor/base_constructor.py b/model_constructor/base_constructor.py
index 600f2b4..11ceabf 100644
--- a/model_constructor/base_constructor.py
+++ b/model_constructor/base_constructor.py
@@ -5,48 +5,57 @@
 
 # Cell
 import torch.nn as nn
-import torch
 from collections import OrderedDict
 from .layers import ConvLayer, Noop, Flatten
 
+
 # Cell
 act_fn = nn.ReLU(inplace=True)
 
+
 # Cell
 class Stem(nn.Sequential):
     """Base stem"""
-    def __init__(self, c_in=3,  stem_sizes=[], stem_out=64,
-            conv_layer=ConvLayer, stride_on=0,
-            stem_bn_last=False, bn_1st=True,
-            stem_use_pool=True, stem_pool=nn.MaxPool2d(kernel_size=3, stride=2, padding=1), **kwargs):
+
+    def __init__(self, c_in=3, stem_sizes=[], stem_out=64,
+                 conv_layer=ConvLayer, stride_on=0,
+                 stem_bn_last=False, bn_1st=True,
+                 stem_use_pool=True, stem_pool=nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
+                 **kwargs
+                 ):
         self.sizes = [c_in] + stem_sizes + [stem_out]
-        num_layers = len(self.sizes)-1
-        stem = [(f"conv_{i}", conv_layer(self.sizes[i], self.sizes[i+1],
-                stride=2 if i==stride_on else 1, act=True,
-                bn_layer=not stem_bn_last if i==num_layers-1 else True,
+        num_layers = len(self.sizes) - 1
+        stem = [(f"conv_{i}", conv_layer(self.sizes[i], self.sizes[i + 1],
+                stride=2 if i == stride_on else 1, act=True,
+                bn_layer=not stem_bn_last if i == num_layers - 1 else True,
                 bn_1st=bn_1st, **kwargs))
-                    for i in range(num_layers)]
-        if stem_use_pool: stem += [('pool', stem_pool)]
-        if stem_bn_last: stem.append(('bn', nn.BatchNorm2d(stem_out)))
+                for i in range(num_layers)]
+        if stem_use_pool:
+            stem += [('pool', stem_pool)]
+        if stem_bn_last:
+            stem.append(('bn', nn.BatchNorm2d(stem_out)))
         super().__init__(OrderedDict(stem))
+
     def extra_repr(self):
-            return f"sizes: {self.sizes}"
+        return f"sizes: {self.sizes}"
+
 
 # Cell
 def DownsampleBlock(conv_layer, ni, nf, ks, stride, act=False, **kwargs):
     '''Base downsample for res-like blocks'''
     return conv_layer(ni, nf, ks, stride, act, **kwargs)
 
+
 # Cell
 class BasicBlock(nn.Module):
     """Basic block (simplified) as in pytorch resnet"""
-    def __init__(self, ni, nf,  expansion=1, stride=1, zero_bn=False,
-                conv_layer=ConvLayer, act_fn=act_fn,
-                downsample_block=DownsampleBlock, **kwargs):
+    def __init__(self, ni, nf, expansion=1, stride=1, zero_bn=False,
+                 conv_layer=ConvLayer, act_fn=act_fn,
+                 downsample_block=DownsampleBlock, **kwargs):
         super().__init__()
-        self.downsample = not ni==nf or stride==2
+        self.downsample = not ni == nf or stride == 2
         self.conv = nn.Sequential(OrderedDict([
-            ('conv_0', conv_layer(ni, nf, stride=stride, act_fn=act_fn,  **kwargs)),
+            ('conv_0', conv_layer(ni, nf, stride=stride, act_fn=act_fn, **kwargs)),
             ('conv_1', conv_layer(nf, nf, zero_bn=zero_bn, act=False, act_fn=act_fn, **kwargs))]))
         if self.downsample:
             self.downsample = downsample_block(conv_layer, ni, nf, ks=1, stride=stride, act=False, **kwargs)
@@ -60,23 +69,24 @@ def forward(self, x):
             identity = self.downsample(x)
         return self.act_conn(self.merge(out + identity))
 
+
 # Cell
 class Bottleneck(nn.Module):
     '''Bottlneck block for resnet models'''
     def __init__(self, ni, nh, expansion=4, stride=1, zero_bn=False,
-                conv_layer=ConvLayer, act_fn=act_fn,
+                 conv_layer=ConvLayer, act_fn=act_fn,
                  downsample_block=DownsampleBlock, **kwargs):
-#                  groups=1, base_width=64, dilation=1, norm_layer=None
         super().__init__()
-        self.downsample = not ni==nh or stride==2
-        ni = ni*expansion
-        nf = nh*expansion
+        self.downsample = not ni == nh or stride == 2
+        ni = ni * expansion
+        nf = nh * expansion
         self.conv = nn.Sequential(OrderedDict([
-            ('conv_0', conv_layer(ni, nh, ks=1,            act_fn=act_fn, **kwargs)),
-            ('conv_1', conv_layer(nh, nh, stride=stride,   act_fn=act_fn, **kwargs)),
+            ('conv_0', conv_layer(ni, nh, ks=1,            act_fn=act_fn, **kwargs)),   # noqa: E241
+            ('conv_1', conv_layer(nh, nh, stride=stride,   act_fn=act_fn, **kwargs)),   # noqa: E241
             ('conv_2', conv_layer(nh, nf, ks=1, zero_bn=zero_bn, act=False, act_fn=act_fn, **kwargs))]))
         if self.downsample:
-            self.downsample = downsample_block(conv_layer, ni, nf, ks=1, stride=stride, act=False, act_fn=act_fn, **kwargs)
+            self.downsample = downsample_block(conv_layer, ni, nf, ks=1,
+                                               stride=stride, act=False, act_fn=act_fn, **kwargs)
         self.merge = Noop()
         self.act_conn = act_fn
 
@@ -87,22 +97,25 @@ def forward(self, x):
             identity = self.downsample(x)
         return self.act_conn(self.merge(out + identity))
 
+
 # Cell
 class BasicLayer(nn.Sequential):
     '''Layer from blocks'''
-    def __init__(self, block, blocks, ni, nf, expansion, stride, sa=False,**kwargs):
+    def __init__(self, block, blocks, ni, nf, expansion, stride, sa=False, **kwargs):
         self.ni = ni
         self.nf = nf
         self.blocks = blocks
         self.expansion = expansion
         super().__init__(OrderedDict(
-            [(f'block_{i}', block(ni if i==0 else nf, nf, expansion,
-                            stride if i==0 else 1,
-                            sa=sa if i==blocks-1 else False,
+            [(f'block_{i}', block(ni if i == 0 else nf, nf, expansion,
+                                  stride if i == 0 else 1,
+                                  sa=sa if i == blocks - 1 else False,
                                   **kwargs))
-              for i in range(blocks)]))
+             for i in range(blocks)]))
+
     def extra_repr(self):
-        return f'from {self.ni*self.expansion} to {self.nf}, {self.blocks} blocks, expansion {self.expansion}.'
+        return f'from {self.ni * self.expansion} to {self.nf}, {self.blocks} blocks, expansion {self.expansion}.'
+
 
 # Cell
 class Body(nn.Sequential):
@@ -110,18 +123,19 @@ class Body(nn.Sequential):
     def __init__(self, block,
                  body_in=64, body_out=512,
                  bodylayer=BasicLayer, expansion=1,
-                 layer_szs=[64,128,256,], blocks=[2,2,2,2],
+                 layer_szs=[64, 128, 256, ], blocks=[2, 2, 2, 2],
                  sa=False, **kwargs):
-        layer_szs = [body_in//expansion] + layer_szs + [body_out]
-        num_layers = len(layer_szs)-1
+        layer_szs = [body_in // expansion] + layer_szs + [body_out]
+        num_layers = len(layer_szs) - 1
         layers = [(f"layer_{i}", bodylayer(block, blocks[i],
-                            layer_szs[i], layer_szs[i+1], expansion,
-                            1 if i==0 else 2,
-                            sa=sa if i==0 else False,
+                                           layer_szs[i], layer_szs[i + 1], expansion,
+                                           stride=1 if i == 0 else 2,
+                                           sa=sa if i == 0 else False,
                                            **kwargs))
-                    for i in range(num_layers)]
+                  for i in range(num_layers)]
         super().__init__(OrderedDict(layers))
 
+
 # Cell
 class Head(nn.Sequential):
     '''base head'''
@@ -129,33 +143,34 @@ def __init__(self, ni, nf, **kwargs):
         super().__init__(OrderedDict(
             [('pool', nn.AdaptiveAvgPool2d((1, 1))),
              ('flat', Flatten()),
-             ('fc',   nn.Linear(ni, nf)),
+             ('fc', nn.Linear(ni, nf)),
              ]))
 
+
 # Cell
 def init_model(model, nonlinearity='leaky_relu'):
     '''Init model'''
     for m in model.modules():
-#             if isinstance(m, nn.Conv2d):
-            if isinstance(m, (nn.Conv2d, nn.Linear)):
-                nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity=nonlinearity)
+        if isinstance(m, (nn.Conv2d, nn.Linear)):
+            nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity=nonlinearity)
+
 
 # Cell
 class Net(nn.Sequential):
     '''Constructor for model'''
-    def __init__(self,  stem=Stem,
+    def __init__(self, stem=Stem,
                  body=Body, block=BasicBlock, sa=False,
-                 layer_szs=[64,128,256,], blocks=[2,2,2,2],
+                 layer_szs=[64, 128, 256, ], blocks=[2, 2, 2, 2],
                  head=Head,
-                 c_in=3,  num_classes=1000,
+                 c_in=3, num_classes=1000,
                  body_in=64, body_out=512, expansion=1,
-                init_fn=init_model, **kwargs):
-        self.init_model=init_fn
-        super().__init__(OrderedDict([
-            ('stem', stem(c_in=c_in,stem_out=body_in, **kwargs)),
-            ('body', body(block, body_in, body_out,
-                        layer_szs=layer_szs, blocks=blocks, expansion=expansion,
-                        sa=sa, **kwargs)),
-            ('head', head(body_out*expansion, num_classes, **kwargs))
-            ]))
-        self.init_model(self)
\ No newline at end of file
+                 init_fn=init_model, **kwargs):
+        self.init_model = init_fn
+        super().__init__(OrderedDict(
+            [('stem', stem(c_in=c_in, stem_out=body_in, **kwargs)),
+             ('body', body(block, body_in, body_out,
+                           layer_szs=layer_szs, blocks=blocks, expansion=expansion,
+                           sa=sa, **kwargs)),
+             ('head', head(body_out * expansion, num_classes, **kwargs))
+             ]))
+        self.init_model(self)
diff --git a/model_constructor/layers.py b/model_constructor/layers.py
index 3b251d6..4c09ac2 100644
--- a/model_constructor/layers.py
+++ b/model_constructor/layers.py
@@ -5,7 +5,7 @@
 # Cell
 import torch.nn as nn
 import torch
-from torch.nn.utils import spectral_norm # weight_norm,
+from torch.nn.utils import spectral_norm
 from collections import OrderedDict
 
 # Cell
@@ -13,10 +13,14 @@ class Flatten(nn.Module):
     '''flat x to vector'''
     def __init__(self):
         super().__init__()
-    def forward(self, x): return x.view(x.size(0), -1)
+
+    def forward(self, x):
+        return x.view(x.size(0), -1)
 
 # Cell
-def noop(x): return x
+def noop(x):
+    return x
+
 
 class Noop(nn.Module):
     '''Dummy module'''
@@ -29,58 +33,64 @@ def forward(self, x):
 # Cell
 act_fn = nn.ReLU(inplace=True)
 
+
 class ConvLayer(nn.Sequential):
     """Basic conv layers block"""
     Conv2d = nn.Conv2d
+
     def __init__(self, ni, nf, ks=3, stride=1,
-            act=True,  act_fn=act_fn,
-            bn_layer=True, bn_1st=True, zero_bn=False,
-            padding=None, bias=False, groups=1, **kwargs):
+                 act=True, act_fn=act_fn,
+                 bn_layer=True, bn_1st=True, zero_bn=False,
+                 padding=None, bias=False, groups=1, **kwargs):
 
-        if padding==None: padding = ks//2
+        if padding is None:
+            padding = ks // 2
         layers = [('conv', self.Conv2d(ni, nf, ks, stride=stride, padding=padding, bias=bias, groups=groups))]
         act_bn = [('act_fn', act_fn)] if act else []
         if bn_layer:
             bn = nn.BatchNorm2d(nf)
             nn.init.constant_(bn.weight, 0. if zero_bn else 1.)
             act_bn += [('bn', bn)]
-        if bn_1st: act_bn.reverse()
+        if bn_1st:
+            act_bn.reverse()
         layers += act_bn
         super().__init__(OrderedDict(layers))
 
 # Cell
 # SA module from mxresnet at fastai. todo - add persons!!!
-
-#Unmodified from https://github.com/fastai/fastai/blob/5c51f9eabf76853a89a9bc5741804d2ed4407e49/fastai/layers.py
-def conv1d(ni:int, no:int, ks:int=1, stride:int=1, padding:int=0, bias:bool=False):
+# Unmodified from https://github.com/fastai/fastai/blob/5c51f9eabf76853a89a9bc5741804d2ed4407e49/fastai/layers.py
+def conv1d(ni: int, no: int, ks: int = 1, stride: int = 1, padding: int = 0, bias: bool = False):
     "Create and initialize a `nn.Conv1d` layer with spectral normalization."
     conv = nn.Conv1d(ni, no, ks, stride=stride, padding=padding, bias=bias)
     nn.init.kaiming_normal_(conv.weight)
-    if bias: conv.bias.data.zero_()
+    if bias:
+        conv.bias.data.zero_()
     return spectral_norm(conv)
 
 
-# Adapted from SelfAttention layer at https://github.com/fastai/fastai/blob/5c51f9eabf76853a89a9bc5741804d2ed4407e49/fastai/layers.py
+# Adapted from SelfAttention layer at
+# https://github.com/fastai/fastai/blob/5c51f9eabf76853a89a9bc5741804d2ed4407e49/fastai/layers.py
 # Inspired by https://arxiv.org/pdf/1805.08318.pdf
 class SimpleSelfAttention(nn.Module):
-    def __init__(self, n_in:int, ks=1, sym=False):#, n_out:int):
+    def __init__(self, n_in: int, ks=1, sym=False):  # , n_out:int):
         super().__init__()
-        self.conv = conv1d(n_in, n_in, ks, padding=ks//2, bias=False)
+        self.conv = conv1d(n_in, n_in, ks, padding=ks // 2, bias=False)
         self.gamma = nn.Parameter(torch.tensor([0.]))
         self.sym = sym
         self.n_in = n_in
-    def forward(self,x):
+
+    def forward(self, x):
         if self.sym:
             # symmetry hack by https://github.com/mgrankin
-            c = self.conv.weight.view(self.n_in,self.n_in)
-            c = (c + c.t())/2
-            self.conv.weight = c.view(self.n_in,self.n_in,1)
+            c = self.conv.weight.view(self.n_in, self.n_in)
+            c = (c + c.t()) / 2
+            self.conv.weight = c.view(self.n_in, self.n_in, 1)
         size = x.size()
-        x = x.view(*size[:2],-1)   # (C,N)
+        x = x.view(*size[:2], -1)   # (C,N)
         # changed the order of mutiplication to avoid O(N^2) complexity
         # (x*xT)*(W*x) instead of (x*(xT*(W*x)))
         convx = self.conv(x)   # (C,C) * (C,N) = (C,N)   => O(NC^2)
-        xxT = torch.bmm(x,x.permute(0,2,1).contiguous())   # (C,N) * (N,C) = (C,C)   => O(NC^2)
+        xxT = torch.bmm(x, x.permute(0, 2, 1).contiguous())   # (C,N) * (N,C) = (C,C)   => O(NC^2)
         o = torch.bmm(xxT, convx)   # (C,C) * (C,N) = (C,N)   => O(NC^2)
         o = self.gamma * o + x
         return o.view(*size).contiguous()
@@ -90,17 +100,17 @@ class SEBlock(nn.Module):
     "se block"
     se_layer = nn.Linear
     act_fn = nn.ReLU(inplace=True)
+
     def __init__(self, c, r=16):
         super().__init__()
         ch = c // r
         self.squeeze = nn.AdaptiveAvgPool2d(1)
         self.excitation = nn.Sequential(
-            OrderedDict([
-            ('fc_reduce', self.se_layer(c, ch, bias=False)),
-            ('se_act', self.act_fn),
-            ('fc_expand', self.se_layer(ch, c, bias=False)),
-            ('sigmoid', nn.Sigmoid())
-            ]))
+            OrderedDict([('fc_reduce', self.se_layer(c, ch, bias=False)),
+                         ('se_act', self.act_fn),
+                         ('fc_expand', self.se_layer(ch, c, bias=False)),
+                         ('sigmoid', nn.Sigmoid())
+                         ]))
 
     def forward(self, x):
         bs, c, _, _ = x.shape
@@ -113,18 +123,19 @@ class SEBlockConv(nn.Module):
     "se block with conv on excitation"
     se_layer = nn.Conv2d
     act_fn = nn.ReLU(inplace=True)
+
     def __init__(self, c, r=16):
         super().__init__()
 #         c_in = math.ceil(c//r/8)*8
-        c_in = c//r
-
+        c_in = c // r
         self.squeeze = nn.AdaptiveAvgPool2d(1)
-        self.excitation = nn.Sequential(OrderedDict([
-            ('conv_reduce', self.se_layer(c, c_in, 1, bias=False)),
-            ('se_act', self.act_fn),
-            ('conv_expand', self.se_layer(c_in, c, 1, bias=False)),
-            ('sigmoid', nn.Sigmoid())
-        ]))
+        self.excitation = nn.Sequential(
+            OrderedDict([
+                ('conv_reduce', self.se_layer(c, c_in, 1, bias=False)),
+                ('se_act', self.act_fn),
+                ('conv_expand', self.se_layer(c_in, c, 1, bias=False)),
+                ('sigmoid', nn.Sigmoid())
+            ]))
 
     def forward(self, x):
         y = self.squeeze(x)
diff --git a/model_constructor/mxresnet.py b/model_constructor/mxresnet.py
index 6ed334d..b36761c 100644
--- a/model_constructor/mxresnet.py
+++ b/model_constructor/mxresnet.py
@@ -3,16 +3,11 @@
 __all__ = ['mxresnet_parameters', 'mxresnet34', 'mxresnet50']
 
 # Cell
-import torch.nn as nn
-import sys, torch
-import os
 from functools import partial
-from collections import OrderedDict
-from .layers import *
 from .net import Net
 from .activations import Mish
 
 # Cell
 mxresnet_parameters = {'stem_sizes': [3, 32, 64, 64], 'act_fn': Mish()}
-mxresnet34  = partial(Net, name='MXResnet32', expansion=1, layers=[3, 4,  6, 3], **mxresnet_parameters)
-mxresnet50  = partial(Net, name='MXResnet50', expansion=4, layers=[3, 4,  6, 3], **mxresnet_parameters)
\ No newline at end of file
+mxresnet34 = partial(Net, name='MXResnet32', expansion=1, layers=[3, 4, 6, 3], **mxresnet_parameters)
+mxresnet50 = partial(Net, name='MXResnet50', expansion=4, layers=[3, 4, 6, 3], **mxresnet_parameters)
\ No newline at end of file
diff --git a/model_constructor/net.py b/model_constructor/net.py
index 69022be..b939482 100644
--- a/model_constructor/net.py
+++ b/model_constructor/net.py
@@ -4,7 +4,6 @@
 
 # Cell
 import torch.nn as nn
-import sys, torch
 from functools import partial
 from collections import OrderedDict
 from .layers import ConvLayer, noop, SEBlock, SimpleSelfAttention, Flatten
@@ -12,67 +11,86 @@
 # Cell
 act_fn = nn.ReLU(inplace=True)
 
-def init_cnn(m):
-    if getattr(m, 'bias', None) is not None: nn.init.constant_(m.bias, 0)
-    if isinstance(m, (nn.Conv2d,nn.Linear)): nn.init.kaiming_normal_(m.weight)
-    for l in m.children(): init_cnn(l)
+
+def init_cnn(module: nn.Module):
+    if getattr(module, 'bias', None) is not None:
+        nn.init.constant_(module.bias, 0)
+    if isinstance(module, (nn.Conv2d, nn.Linear)):
+        nn.init.kaiming_normal_(module.weight)
+    for layer in module.children():
+        init_cnn(layer)
 
 # Cell
 class ResBlock(nn.Module):
     '''Resnet block'''
     se_block = SEBlock
+
     def __init__(self, expansion, ni, nh, stride=1,
                  conv_layer=ConvLayer, act_fn=act_fn, zero_bn=True, bn_1st=True,
                  pool=nn.AvgPool2d(2, ceil_mode=True), sa=False, sym=False, se=False, se_reduction=16,
                  groups=1, dw=False):
         super().__init__()
-        nf,ni = nh*expansion,ni*expansion
-        if groups != 1: groups = int(nh/groups)
-        layers  = [(f"conv_0", conv_layer(ni, nh, 3, stride=stride, act_fn=act_fn, bn_1st=bn_1st,
-                                         groups= nh if dw else groups)),
-                   (f"conv_1", conv_layer(nh, nf, 3, zero_bn=zero_bn, act=False, bn_1st=bn_1st))
-        ] if expansion == 1 else [
-                   (f"conv_0",conv_layer(ni, nh, 1, act_fn=act_fn, bn_1st=bn_1st)),
-                   (f"conv_1",conv_layer(nh, nh, 3, stride=stride, act_fn=act_fn, bn_1st=bn_1st,
-                                         groups= nh if dw else groups)),
-                   (f"conv_2",conv_layer(nh, nf, 1, zero_bn=zero_bn, act=False, bn_1st=bn_1st))
-        ]
-        if se: layers.append(('se', self.se_block(nf, se_reduction)))
-        if sa: layers.append(('sa', SimpleSelfAttention(nf,ks=1,sym=sym)))
+        nf, ni = nh * expansion, ni * expansion
+        if groups != 1:
+            groups = int(nh / groups)
+        if expansion == 1:
+            layers = [("conv_0", conv_layer(ni, nh, 3, stride=stride, act_fn=act_fn, bn_1st=bn_1st,
+                                            groups=nh if dw else groups)),
+                      ("conv_1", conv_layer(nh, nf, 3, zero_bn=zero_bn, act=False, bn_1st=bn_1st))
+                      ]
+        else:
+            layers = [("conv_0", conv_layer(ni, nh, 1, act_fn=act_fn, bn_1st=bn_1st)),
+                      ("conv_1", conv_layer(nh, nh, 3, stride=stride, act_fn=act_fn, bn_1st=bn_1st,
+                                            groups=nh if dw else groups)),
+                      ("conv_2", conv_layer(nh, nf, 1, zero_bn=zero_bn, act=False, bn_1st=bn_1st))
+                      ]
+        if se:
+            layers.append(('se', self.se_block(nf, se_reduction)))
+        if sa:
+            layers.append(('sa', SimpleSelfAttention(nf, ks=1, sym=sym)))
         self.convs = nn.Sequential(OrderedDict(layers))
-        self.pool = noop if stride==1 else pool
-        self.idconv = noop if ni==nf else conv_layer(ni, nf, 1, act=False)
-        self.act_fn =act_fn
+        self.pool = noop if stride == 1 else pool
+        self.idconv = noop if ni == nf else conv_layer(ni, nf, 1, act=False)
+        self.act_fn = act_fn
 
-    def forward(self, x): return self.act_fn(self.convs(x) + self.idconv(self.pool(x)))
+    def forward(self, x):
+        return self.act_fn(self.convs(x) + self.idconv(self.pool(x)))
 
 # Cell
 # NewResBlock now is YaResBlock - Yet Another ResNet Block! It is now at model_constructor.yaresnet.
+
+
 class NewResBlock(nn.Module):
     '''YaResnet block'''
     se_block = SEBlock
+
     def __init__(self, expansion, ni, nh, stride=1,
                  conv_layer=ConvLayer, act_fn=act_fn, zero_bn=True, bn_1st=True,
-                 pool=nn.AvgPool2d(2, ceil_mode=True), sa=False,sym=False, se=False, se_reduction=16,
+                 pool=nn.AvgPool2d(2, ceil_mode=True), sa=False, sym=False, se=False, se_reduction=16,
                  groups=1, dw=False):
         super().__init__()
-        nf,ni = nh*expansion,ni*expansion
-        if groups != 1: groups = int(nh/groups)
-        self.reduce = noop if stride==1 else pool
-        layers  = [(f"conv_0", conv_layer(ni, nh, 3, stride=1, act_fn=act_fn, bn_1st=bn_1st,
-                                          groups= nh if dw else groups)), # stride 1 !!!
-                   (f"conv_1", conv_layer(nh, nf, 3, zero_bn=zero_bn, act=False, bn_1st=bn_1st))
-        ] if expansion == 1 else [
-                   (f"conv_0",conv_layer(ni, nh, 1, act_fn=act_fn, bn_1st=bn_1st)),
-                   (f"conv_1",conv_layer(nh, nh, 3, stride=1, act_fn=act_fn, bn_1st=bn_1st,
-                                         groups= nh if dw else groups)), # stride 1 !!!
-                   (f"conv_2",conv_layer(nh, nf, 1, zero_bn=zero_bn, act=False, bn_1st=bn_1st))
-        ]
-        if se: layers.append(('se', self.se_block(nf, se_reduction)))
-        if sa: layers.append(('sa', SimpleSelfAttention(nf,ks=1,sym=sym)))
+        nf, ni = nh * expansion, ni * expansion
+        if groups != 1:
+            groups = int(nh / groups)
+        self.reduce = noop if stride == 1 else pool
+        if expansion == 1:
+            layers = [("conv_0", conv_layer(ni, nh, 3, stride=1, act_fn=act_fn, bn_1st=bn_1st,
+                                            groups=nh if dw else groups)),
+                      ("conv_1", conv_layer(nh, nf, 3, zero_bn=zero_bn, act=False, bn_1st=bn_1st))
+                      ]
+        else:
+            layers = [("conv_0", conv_layer(ni, nh, 1, act_fn=act_fn, bn_1st=bn_1st)),
+                      ("conv_1", conv_layer(nh, nh, 3, stride=1, act_fn=act_fn, bn_1st=bn_1st,
+                                            groups=nh if dw else groups)),
+                      ("conv_2", conv_layer(nh, nf, 1, zero_bn=zero_bn, act=False, bn_1st=bn_1st))
+                      ]
+        if se:
+            layers.append(('se', self.se_block(nf, se_reduction)))
+        if sa:
+            layers.append(('sa', SimpleSelfAttention(nf, ks=1, sym=sym)))
         self.convs = nn.Sequential(OrderedDict(layers))
-        self.idconv = noop if ni==nf else conv_layer(ni, nf, 1, act=False)
-        self.merge =act_fn
+        self.idconv = noop if ni == nf else conv_layer(ni, nf, 1, act=False)
+        self.merge = act_fn
 
     def forward(self, x):
         o = self.reduce(x)
@@ -80,75 +98,77 @@ def forward(self, x):
 
 # Cell
 def _make_stem(self):
-        stem = [(f"conv_{i}", self.conv_layer(self.stem_sizes[i], self.stem_sizes[i+1],
-#                     stride=2 if i==0 else 1,
-                    stride=2 if i==self.stem_stride_on else 1,
-                    bn_layer=(not self.stem_bn_end) if i==(len(self.stem_sizes)-2) else True,
-                    act_fn=self.act_fn, bn_1st=self.bn_1st))
-                for i in range(len(self.stem_sizes)-1)]
-        stem.append(('stem_pool', self.stem_pool))
-        if self.stem_bn_end: stem.append(('norm', self.norm(self.stem_sizes[-1])))
-        return nn.Sequential(OrderedDict(stem))
+    stem = [(f"conv_{i}", self.conv_layer(self.stem_sizes[i], self.stem_sizes[i + 1],
+                                          stride=2 if i == self.stem_stride_on else 1,
+                                          bn_layer=(not self.stem_bn_end) if i == (len(self.stem_sizes) - 2) else True,
+                                          act_fn=self.act_fn, bn_1st=self.bn_1st))
+            for i in range(len(self.stem_sizes) - 1)]
+    stem.append(('stem_pool', self.stem_pool))
+    if self.stem_bn_end:
+        stem.append(('norm', self.norm(self.stem_sizes[-1])))
+    return nn.Sequential(OrderedDict(stem))
 
 # Cell
-def _make_layer(self,expansion,ni,nf,blocks,stride,sa):
-        return nn.Sequential(OrderedDict(
-            [(f"bl_{i}", self.block(expansion, ni if i==0 else nf, nf,
-                    stride if i==0 else 1, sa=sa if i==blocks-1 else False,
-                    conv_layer=self.conv_layer, act_fn=self.act_fn, pool=self.pool,
-                    zero_bn=self.zero_bn, bn_1st=self.bn_1st, groups=self.groups, dw=self.dw, se=self.se))
-              for i in range(blocks)]))
+def _make_layer(self, expansion, ni, nf, blocks, stride, sa):
+    layers = [(f"bl_{i}", self.block(expansion, ni if i == 0 else nf, nf,
+                                     stride if i == 0 else 1, sa=sa if i == blocks - 1 else False,
+                                     conv_layer=self.conv_layer, act_fn=self.act_fn, pool=self.pool,
+                                     zero_bn=self.zero_bn, bn_1st=self.bn_1st, groups=self.groups,
+                                     dw=self.dw, se=self.se))
+              for i in range(blocks)]
+    return nn.Sequential(OrderedDict(layers))
 
 # Cell
 def _make_body(self):
-        blocks = [(f"l_{i}", self._make_layer(self,self.expansion,
-                        self.block_sizes[i], self.block_sizes[i+1], l,
-                        1 if i==0 else 2, self.sa if i==0 else False))
-                  for i,l in enumerate(self.layers)]
-        return nn.Sequential(OrderedDict(blocks))
+    blocks = [(f"l_{i}", self._make_layer(self, self.expansion,
+                                          ni=self.block_sizes[i], nf=self.block_sizes[i + 1],
+                                          blocks=l, stride=1 if i == 0 else 2,
+                                          sa=self.sa if i == 0 else False))
+              for i, l in enumerate(self.layers)]
+    return nn.Sequential(OrderedDict(blocks))
 
 # Cell
 def _make_head(self):
-        head = [('pool', nn.AdaptiveAvgPool2d(1)),
-                ('flat', Flatten()),
-                ('fc',   nn.Linear(self.block_sizes[-1]*self.expansion, self.c_out))]
-        return nn.Sequential(OrderedDict(head))
+    head = [('pool', nn.AdaptiveAvgPool2d(1)),
+            ('flat', Flatten()),
+            ('fc', nn.Linear(self.block_sizes[-1] * self.expansion, self.c_out))]
+    return nn.Sequential(OrderedDict(head))
 
 # Cell
 class Net():
     """Model constructor. As default - xresnet18"""
     def __init__(self, name='Net', c_in=3, c_out=1000,
-                block=ResBlock, conv_layer = ConvLayer,
-                block_sizes = [64,128,256,512], layers=[2,2,2,2],
-                norm = nn.BatchNorm2d,
-                act_fn=nn.ReLU(inplace=True),
-                pool = nn.AvgPool2d(2, ceil_mode=True),
-                expansion=1, groups = 1, dw=False,
-                sa=False, se=False, se_reduction=16,
-                bn_1st = True,
-                zero_bn=True,
-                stem_stride_on = 0,
-                stem_sizes = [32,32,64],
-                stem_pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
-                stem_bn_end = False,
-
-                _init_cnn = init_cnn,
-                _make_stem = _make_stem,
-                _make_layer = _make_layer,
-                _make_body = _make_body,
-                _make_head = _make_head,
-                ):
+                 block=ResBlock, conv_layer=ConvLayer,
+                 block_sizes=[64, 128, 256, 512], layers=[2, 2, 2, 2],
+                 norm=nn.BatchNorm2d,
+                 act_fn=nn.ReLU(inplace=True),
+                 pool=nn.AvgPool2d(2, ceil_mode=True),
+                 expansion=1, groups=1, dw=False,
+                 sa=False, se=False, se_reduction=16,
+                 bn_1st=True,
+                 zero_bn=True,
+                 stem_stride_on=0,
+                 stem_sizes=[32, 32, 64],
+                 stem_pool=nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
+                 stem_bn_end=False,
+                 _init_cnn=init_cnn,
+                 _make_stem=_make_stem,
+                 _make_layer=_make_layer,
+                 _make_body=_make_body,
+                 _make_head=_make_head,
+                 ):
         super().__init__()
 
         params = locals()
         del params['self']
         self.__dict__ = params
         self._block_sizes = params['block_sizes']
-        if self.stem_sizes[0] != self.c_in: self.stem_sizes = [self.c_in] + self.stem_sizes
+        if self.stem_sizes[0] != self.c_in:
+            self.stem_sizes = [self.c_in] + self.stem_sizes
 
     @property
     def block_sizes(self):
-        return [self.stem_sizes[-1]//self.expansion] + self._block_sizes +[256]*(len(self.layers)-4)
+        return [self.stem_sizes[-1] // self.expansion] + self._block_sizes + [256] * (len(self.layers) - 4)
 
     @property
     def stem(self):
@@ -168,18 +188,18 @@ def __call__(self):
             ('body', self.body),
             ('head', self.head)]))
         self._init_cnn(model)
-        model.extra_repr = lambda : f"model {self.name}"
+        model.extra_repr = lambda: f"model {self.name}"
         return model
 
     def __repr__(self):
         return (f"{self.name} constructor\n"
-        f"  c_in: {self.c_in}, c_out: {self.c_out}\n"
-        f"  expansion: {self.expansion}, groups: {self.groups}, dw: {self.dw}\n"
-        f"  sa: {self.sa}, se: {self.se}\n"
-        f"  stem sizes: {self.stem_sizes}, stide on {self.stem_stride_on}\n"
-        f"  body sizes {self._block_sizes}\n"
-        f"  layers: {self.layers}")
+                f"  c_in: {self.c_in}, c_out: {self.c_out}\n"
+                f"  expansion: {self.expansion}, groups: {self.groups}, dw: {self.dw}\n"
+                f"  sa: {self.sa}, se: {self.se}\n"
+                f"  stem sizes: {self.stem_sizes}, stide on {self.stem_stride_on}\n"
+                f"  body sizes {self._block_sizes}\n"
+                f"  layers: {self.layers}")
 
 # Cell
-xresnet34 = partial(Net, name='xresnet34', expansion=1, layers = [3, 4, 6, 3])
-xresnet50 = partial(Net, name='xresnet34', expansion=4, layers = [3, 4, 6, 3])
\ No newline at end of file
+xresnet34 = partial(Net, name='xresnet34', expansion=1, layers=[3, 4, 6, 3])
+xresnet50 = partial(Net, name='xresnet34', expansion=4, layers=[3, 4, 6, 3])
\ No newline at end of file
diff --git a/nbs/00_Net.ipynb b/nbs/00_Net.ipynb
index 60df77b..b2d3353 100644
--- a/nbs/00_Net.ipynb
+++ b/nbs/00_Net.ipynb
@@ -35,7 +35,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "#hide"
+    "#hide\n",
+    "import torch  #sys"
    ]
   },
   {
@@ -66,7 +67,6 @@
    "source": [
     "#export\n",
     "import torch.nn as nn\n",
-    "import sys, torch\n",
     "from functools import partial\n",
     "from collections import OrderedDict\n",
     "from model_constructor.layers import ConvLayer, noop, SEBlock, SimpleSelfAttention, Flatten"
@@ -97,10 +97,14 @@
     "#export\n",
     "act_fn = nn.ReLU(inplace=True)\n",
     "\n",
-    "def init_cnn(m):\n",
-    "    if getattr(m, 'bias', None) is not None: nn.init.constant_(m.bias, 0)\n",
-    "    if isinstance(m, (nn.Conv2d,nn.Linear)): nn.init.kaiming_normal_(m.weight)\n",
-    "    for l in m.children(): init_cnn(l)"
+    "\n",
+    "def init_cnn(module: nn.Module):\n",
+    "    if getattr(module, 'bias', None) is not None:\n",
+    "        nn.init.constant_(module.bias, 0)\n",
+    "    if isinstance(module, (nn.Conv2d, nn.Linear)):\n",
+    "        nn.init.kaiming_normal_(module.weight)\n",
+    "    for layer in module.children():\n",
+    "        init_cnn(layer)"
    ]
   },
   {
@@ -120,30 +124,37 @@
     "class ResBlock(nn.Module):\n",
     "    '''Resnet block'''\n",
     "    se_block = SEBlock\n",
+    "\n",
     "    def __init__(self, expansion, ni, nh, stride=1,\n",
     "                 conv_layer=ConvLayer, act_fn=act_fn, zero_bn=True, bn_1st=True,\n",
     "                 pool=nn.AvgPool2d(2, ceil_mode=True), sa=False, sym=False, se=False, se_reduction=16,\n",
     "                 groups=1, dw=False):\n",
     "        super().__init__()\n",
-    "        nf,ni = nh*expansion,ni*expansion\n",
-    "        if groups != 1: groups = int(nh/groups)\n",
-    "        layers  = [(f\"conv_0\", conv_layer(ni, nh, 3, stride=stride, act_fn=act_fn, bn_1st=bn_1st,\n",
-    "                                         groups= nh if dw else groups)),\n",
-    "                   (f\"conv_1\", conv_layer(nh, nf, 3, zero_bn=zero_bn, act=False, bn_1st=bn_1st))\n",
-    "        ] if expansion == 1 else [\n",
-    "                   (f\"conv_0\",conv_layer(ni, nh, 1, act_fn=act_fn, bn_1st=bn_1st)),\n",
-    "                   (f\"conv_1\",conv_layer(nh, nh, 3, stride=stride, act_fn=act_fn, bn_1st=bn_1st,\n",
-    "                                         groups= nh if dw else groups)),\n",
-    "                   (f\"conv_2\",conv_layer(nh, nf, 1, zero_bn=zero_bn, act=False, bn_1st=bn_1st))\n",
-    "        ]\n",
-    "        if se: layers.append(('se', self.se_block(nf, se_reduction)))\n",
-    "        if sa: layers.append(('sa', SimpleSelfAttention(nf,ks=1,sym=sym)))\n",
+    "        nf, ni = nh * expansion, ni * expansion\n",
+    "        if groups != 1:\n",
+    "            groups = int(nh / groups)\n",
+    "        if expansion == 1:\n",
+    "            layers = [(\"conv_0\", conv_layer(ni, nh, 3, stride=stride, act_fn=act_fn, bn_1st=bn_1st,\n",
+    "                                            groups=nh if dw else groups)),\n",
+    "                      (\"conv_1\", conv_layer(nh, nf, 3, zero_bn=zero_bn, act=False, bn_1st=bn_1st))\n",
+    "                      ]\n",
+    "        else:\n",
+    "            layers = [(\"conv_0\", conv_layer(ni, nh, 1, act_fn=act_fn, bn_1st=bn_1st)),\n",
+    "                      (\"conv_1\", conv_layer(nh, nh, 3, stride=stride, act_fn=act_fn, bn_1st=bn_1st,\n",
+    "                                            groups=nh if dw else groups)),\n",
+    "                      (\"conv_2\", conv_layer(nh, nf, 1, zero_bn=zero_bn, act=False, bn_1st=bn_1st))\n",
+    "                      ]\n",
+    "        if se:\n",
+    "            layers.append(('se', self.se_block(nf, se_reduction)))\n",
+    "        if sa:\n",
+    "            layers.append(('sa', SimpleSelfAttention(nf, ks=1, sym=sym)))\n",
     "        self.convs = nn.Sequential(OrderedDict(layers))\n",
-    "        self.pool = noop if stride==1 else pool\n",
-    "        self.idconv = noop if ni==nf else conv_layer(ni, nf, 1, act=False)\n",
-    "        self.act_fn =act_fn\n",
+    "        self.pool = noop if stride == 1 else pool\n",
+    "        self.idconv = noop if ni == nf else conv_layer(ni, nf, 1, act=False)\n",
+    "        self.act_fn = act_fn\n",
     "\n",
-    "    def forward(self, x): return self.act_fn(self.convs(x) + self.idconv(self.pool(x)))"
+    "    def forward(self, x):\n",
+    "        return self.act_fn(self.convs(x) + self.idconv(self.pool(x)))"
    ]
   },
   {
@@ -375,31 +386,39 @@
    "source": [
     "#export\n",
     "# NewResBlock now is YaResBlock - Yet Another ResNet Block! It is now at model_constructor.yaresnet.\n",
+    "\n",
+    "\n",
     "class NewResBlock(nn.Module):\n",
     "    '''YaResnet block'''\n",
     "    se_block = SEBlock\n",
+    "\n",
     "    def __init__(self, expansion, ni, nh, stride=1,\n",
     "                 conv_layer=ConvLayer, act_fn=act_fn, zero_bn=True, bn_1st=True,\n",
-    "                 pool=nn.AvgPool2d(2, ceil_mode=True), sa=False,sym=False, se=False, se_reduction=16,\n",
+    "                 pool=nn.AvgPool2d(2, ceil_mode=True), sa=False, sym=False, se=False, se_reduction=16,\n",
     "                 groups=1, dw=False):\n",
     "        super().__init__()\n",
-    "        nf,ni = nh*expansion,ni*expansion\n",
-    "        if groups != 1: groups = int(nh/groups)\n",
-    "        self.reduce = noop if stride==1 else pool\n",
-    "        layers  = [(f\"conv_0\", conv_layer(ni, nh, 3, stride=1, act_fn=act_fn, bn_1st=bn_1st,\n",
-    "                                          groups= nh if dw else groups)), # stride 1 !!!\n",
-    "                   (f\"conv_1\", conv_layer(nh, nf, 3, zero_bn=zero_bn, act=False, bn_1st=bn_1st))\n",
-    "        ] if expansion == 1 else [\n",
-    "                   (f\"conv_0\",conv_layer(ni, nh, 1, act_fn=act_fn, bn_1st=bn_1st)),\n",
-    "                   (f\"conv_1\",conv_layer(nh, nh, 3, stride=1, act_fn=act_fn, bn_1st=bn_1st,\n",
-    "                                         groups= nh if dw else groups)), # stride 1 !!!\n",
-    "                   (f\"conv_2\",conv_layer(nh, nf, 1, zero_bn=zero_bn, act=False, bn_1st=bn_1st))\n",
-    "        ]\n",
-    "        if se: layers.append(('se', self.se_block(nf, se_reduction)))\n",
-    "        if sa: layers.append(('sa', SimpleSelfAttention(nf,ks=1,sym=sym)))\n",
+    "        nf, ni = nh * expansion, ni * expansion\n",
+    "        if groups != 1:\n",
+    "            groups = int(nh / groups)\n",
+    "        self.reduce = noop if stride == 1 else pool\n",
+    "        if expansion == 1:\n",
+    "            layers = [(\"conv_0\", conv_layer(ni, nh, 3, stride=1, act_fn=act_fn, bn_1st=bn_1st,\n",
+    "                                            groups=nh if dw else groups)),\n",
+    "                      (\"conv_1\", conv_layer(nh, nf, 3, zero_bn=zero_bn, act=False, bn_1st=bn_1st))\n",
+    "                      ]\n",
+    "        else:\n",
+    "            layers = [(\"conv_0\", conv_layer(ni, nh, 1, act_fn=act_fn, bn_1st=bn_1st)),\n",
+    "                      (\"conv_1\", conv_layer(nh, nh, 3, stride=1, act_fn=act_fn, bn_1st=bn_1st,\n",
+    "                                            groups=nh if dw else groups)),\n",
+    "                      (\"conv_2\", conv_layer(nh, nf, 1, zero_bn=zero_bn, act=False, bn_1st=bn_1st))\n",
+    "                      ]\n",
+    "        if se:\n",
+    "            layers.append(('se', self.se_block(nf, se_reduction)))\n",
+    "        if sa:\n",
+    "            layers.append(('sa', SimpleSelfAttention(nf, ks=1, sym=sym)))\n",
     "        self.convs = nn.Sequential(OrderedDict(layers))\n",
-    "        self.idconv = noop if ni==nf else conv_layer(ni, nf, 1, act=False)\n",
-    "        self.merge =act_fn\n",
+    "        self.idconv = noop if ni == nf else conv_layer(ni, nf, 1, act=False)\n",
+    "        self.merge = act_fn\n",
     "\n",
     "    def forward(self, x):\n",
     "        o = self.reduce(x)\n",
@@ -669,15 +688,15 @@
    "source": [
     "#export\n",
     "def _make_stem(self):\n",
-    "        stem = [(f\"conv_{i}\", self.conv_layer(self.stem_sizes[i], self.stem_sizes[i+1],\n",
-    "#                     stride=2 if i==0 else 1,\n",
-    "                    stride=2 if i==self.stem_stride_on else 1,\n",
-    "                    bn_layer=(not self.stem_bn_end) if i==(len(self.stem_sizes)-2) else True,\n",
-    "                    act_fn=self.act_fn, bn_1st=self.bn_1st))\n",
-    "                for i in range(len(self.stem_sizes)-1)]\n",
-    "        stem.append(('stem_pool', self.stem_pool))\n",
-    "        if self.stem_bn_end: stem.append(('norm', self.norm(self.stem_sizes[-1])))\n",
-    "        return nn.Sequential(OrderedDict(stem))"
+    "    stem = [(f\"conv_{i}\", self.conv_layer(self.stem_sizes[i], self.stem_sizes[i + 1],\n",
+    "                                          stride=2 if i == self.stem_stride_on else 1,\n",
+    "                                          bn_layer=(not self.stem_bn_end) if i == (len(self.stem_sizes) - 2) else True,\n",
+    "                                          act_fn=self.act_fn, bn_1st=self.bn_1st))\n",
+    "            for i in range(len(self.stem_sizes) - 1)]\n",
+    "    stem.append(('stem_pool', self.stem_pool))\n",
+    "    if self.stem_bn_end:\n",
+    "        stem.append(('norm', self.norm(self.stem_sizes[-1])))\n",
+    "    return nn.Sequential(OrderedDict(stem))"
    ]
   },
   {
@@ -687,13 +706,14 @@
    "outputs": [],
    "source": [
     "#export\n",
-    "def _make_layer(self,expansion,ni,nf,blocks,stride,sa):\n",
-    "        return nn.Sequential(OrderedDict(\n",
-    "            [(f\"bl_{i}\", self.block(expansion, ni if i==0 else nf, nf,\n",
-    "                    stride if i==0 else 1, sa=sa if i==blocks-1 else False,\n",
-    "                    conv_layer=self.conv_layer, act_fn=self.act_fn, pool=self.pool,\n",
-    "                    zero_bn=self.zero_bn, bn_1st=self.bn_1st, groups=self.groups, dw=self.dw, se=self.se))\n",
-    "              for i in range(blocks)]))"
+    "def _make_layer(self, expansion, ni, nf, blocks, stride, sa):\n",
+    "    layers = [(f\"bl_{i}\", self.block(expansion, ni if i == 0 else nf, nf,\n",
+    "                                     stride if i == 0 else 1, sa=sa if i == blocks - 1 else False,\n",
+    "                                     conv_layer=self.conv_layer, act_fn=self.act_fn, pool=self.pool,\n",
+    "                                     zero_bn=self.zero_bn, bn_1st=self.bn_1st, groups=self.groups,\n",
+    "                                     dw=self.dw, se=self.se))\n",
+    "              for i in range(blocks)]\n",
+    "    return nn.Sequential(OrderedDict(layers))"
    ]
   },
   {
@@ -704,11 +724,12 @@
    "source": [
     "#export\n",
     "def _make_body(self):\n",
-    "        blocks = [(f\"l_{i}\", self._make_layer(self,self.expansion,\n",
-    "                        self.block_sizes[i], self.block_sizes[i+1], l,\n",
-    "                        1 if i==0 else 2, self.sa if i==0 else False))\n",
-    "                  for i,l in enumerate(self.layers)]\n",
-    "        return nn.Sequential(OrderedDict(blocks))"
+    "    blocks = [(f\"l_{i}\", self._make_layer(self, self.expansion,\n",
+    "                                          ni=self.block_sizes[i], nf=self.block_sizes[i + 1],\n",
+    "                                          blocks=l, stride=1 if i == 0 else 2,\n",
+    "                                          sa=self.sa if i == 0 else False))\n",
+    "              for i, l in enumerate(self.layers)]\n",
+    "    return nn.Sequential(OrderedDict(blocks))"
    ]
   },
   {
@@ -719,10 +740,10 @@
    "source": [
     "#export\n",
     "def _make_head(self):\n",
-    "        head = [('pool', nn.AdaptiveAvgPool2d(1)),\n",
-    "                ('flat', Flatten()),\n",
-    "                ('fc',   nn.Linear(self.block_sizes[-1]*self.expansion, self.c_out))]\n",
-    "        return nn.Sequential(OrderedDict(head))"
+    "    head = [('pool', nn.AdaptiveAvgPool2d(1)),\n",
+    "            ('flat', Flatten()),\n",
+    "            ('fc', nn.Linear(self.block_sizes[-1] * self.expansion, self.c_out))]\n",
+    "    return nn.Sequential(OrderedDict(head))"
    ]
   },
   {
@@ -742,37 +763,37 @@
     "class Net():\n",
     "    \"\"\"Model constructor. As default - xresnet18\"\"\"\n",
     "    def __init__(self, name='Net', c_in=3, c_out=1000,\n",
-    "                block=ResBlock, conv_layer = ConvLayer,\n",
-    "                block_sizes = [64,128,256,512], layers=[2,2,2,2],\n",
-    "                norm = nn.BatchNorm2d,\n",
-    "                act_fn=nn.ReLU(inplace=True),\n",
-    "                pool = nn.AvgPool2d(2, ceil_mode=True),\n",
-    "                expansion=1, groups = 1, dw=False,\n",
-    "                sa=False, se=False, se_reduction=16,\n",
-    "                bn_1st = True,\n",
-    "                zero_bn=True,\n",
-    "                stem_stride_on = 0,\n",
-    "                stem_sizes = [32,32,64],\n",
-    "                stem_pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1),\n",
-    "                stem_bn_end = False,\n",
-    "\n",
-    "                _init_cnn = init_cnn,\n",
-    "                _make_stem = _make_stem,\n",
-    "                _make_layer = _make_layer,\n",
-    "                _make_body = _make_body,\n",
-    "                _make_head = _make_head,\n",
-    "                ):\n",
+    "                 block=ResBlock, conv_layer=ConvLayer,\n",
+    "                 block_sizes=[64, 128, 256, 512], layers=[2, 2, 2, 2],\n",
+    "                 norm=nn.BatchNorm2d,\n",
+    "                 act_fn=nn.ReLU(inplace=True),\n",
+    "                 pool=nn.AvgPool2d(2, ceil_mode=True),\n",
+    "                 expansion=1, groups=1, dw=False,\n",
+    "                 sa=False, se=False, se_reduction=16,\n",
+    "                 bn_1st=True,\n",
+    "                 zero_bn=True,\n",
+    "                 stem_stride_on=0,\n",
+    "                 stem_sizes=[32, 32, 64],\n",
+    "                 stem_pool=nn.MaxPool2d(kernel_size=3, stride=2, padding=1),\n",
+    "                 stem_bn_end=False,\n",
+    "                 _init_cnn=init_cnn,\n",
+    "                 _make_stem=_make_stem,\n",
+    "                 _make_layer=_make_layer,\n",
+    "                 _make_body=_make_body,\n",
+    "                 _make_head=_make_head,\n",
+    "                 ):\n",
     "        super().__init__()\n",
     "\n",
     "        params = locals()\n",
     "        del params['self']\n",
     "        self.__dict__ = params\n",
     "        self._block_sizes = params['block_sizes']\n",
-    "        if self.stem_sizes[0] != self.c_in: self.stem_sizes = [self.c_in] + self.stem_sizes\n",
+    "        if self.stem_sizes[0] != self.c_in:\n",
+    "            self.stem_sizes = [self.c_in] + self.stem_sizes\n",
     "\n",
     "    @property\n",
     "    def block_sizes(self):\n",
-    "        return [self.stem_sizes[-1]//self.expansion] + self._block_sizes +[256]*(len(self.layers)-4)\n",
+    "        return [self.stem_sizes[-1] // self.expansion] + self._block_sizes + [256] * (len(self.layers) - 4)\n",
     "\n",
     "    @property\n",
     "    def stem(self):\n",
@@ -792,17 +813,17 @@
     "            ('body', self.body),\n",
     "            ('head', self.head)]))\n",
     "        self._init_cnn(model)\n",
-    "        model.extra_repr = lambda : f\"model {self.name}\"\n",
+    "        model.extra_repr = lambda: f\"model {self.name}\"\n",
     "        return model\n",
     "\n",
     "    def __repr__(self):\n",
     "        return (f\"{self.name} constructor\\n\"\n",
-    "        f\"  c_in: {self.c_in}, c_out: {self.c_out}\\n\"\n",
-    "        f\"  expansion: {self.expansion}, groups: {self.groups}, dw: {self.dw}\\n\"\n",
-    "        f\"  sa: {self.sa}, se: {self.se}\\n\"\n",
-    "        f\"  stem sizes: {self.stem_sizes}, stide on {self.stem_stride_on}\\n\"\n",
-    "        f\"  body sizes {self._block_sizes}\\n\"\n",
-    "        f\"  layers: {self.layers}\")"
+    "                f\"  c_in: {self.c_in}, c_out: {self.c_out}\\n\"\n",
+    "                f\"  expansion: {self.expansion}, groups: {self.groups}, dw: {self.dw}\\n\"\n",
+    "                f\"  sa: {self.sa}, se: {self.se}\\n\"\n",
+    "                f\"  stem sizes: {self.stem_sizes}, stide on {self.stem_stride_on}\\n\"\n",
+    "                f\"  body sizes {self._block_sizes}\\n\"\n",
+    "                f\"  layers: {self.layers}\")"
    ]
   },
   {
@@ -2487,8 +2508,8 @@
    "outputs": [],
    "source": [
     "#export\n",
-    "xresnet34 = partial(Net, name='xresnet34', expansion=1, layers = [3, 4, 6, 3])\n",
-    "xresnet50 = partial(Net, name='xresnet34', expansion=4, layers = [3, 4, 6, 3])"
+    "xresnet34 = partial(Net, name='xresnet34', expansion=1, layers=[3, 4, 6, 3])\n",
+    "xresnet50 = partial(Net, name='xresnet34', expansion=4, layers=[3, 4, 6, 3])"
    ]
   },
   {
@@ -2747,6 +2768,31 @@
    "display_name": "Python 3",
    "language": "python",
    "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.7"
+  },
+  "toc": {
+   "base_numbering": 1,
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "Table of Contents",
+   "title_sidebar": "Contents",
+   "toc_cell": false,
+   "toc_position": {},
+   "toc_section_display": true,
+   "toc_window_display": true
   }
  },
  "nbformat": 4,
diff --git a/nbs/01_activations.ipynb b/nbs/01_activations.ipynb
index 067b62d..176827d 100644
--- a/nbs/01_activations.ipynb
+++ b/nbs/01_activations.ipynb
@@ -2,8 +2,12 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-11T16:20:00.256061Z",
+     "start_time": "2020-11-11T16:20:00.027667Z"
+    },
     "hide_input": true
    },
    "outputs": [],
@@ -13,18 +17,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Cells will be exported to model_constructor.activations,\n",
-      "unless a different module is specified after an export flag: `%nbdev_export special.module`\n"
-     ]
+   "execution_count": 2,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-11T16:20:01.430479Z",
+     "start_time": "2020-11-11T16:20:01.424122Z"
     }
-   ],
+   },
+   "outputs": [],
    "source": [
     "#default_exp activations"
    ]
@@ -57,8 +57,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 3,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-11T16:20:04.565883Z",
+     "start_time": "2020-11-11T16:20:04.071041Z"
+    }
+   },
    "outputs": [],
    "source": [
     "#export\n",
@@ -68,115 +73,6 @@
     "from torch.nn import functional as F"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Swish"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#export\n",
-    "def swish(x, inplace: bool = False):\n",
-    "    \"\"\"Swish - Described in: https://arxiv.org/abs/1710.05941\"\"\"\n",
-    "    return x.mul_(x.sigmoid()) if inplace else x.mul(x.sigmoid())\n",
-    "\n",
-    "class Swish(nn.Module):\n",
-    "    \"\"\"Swish - Described in: https://arxiv.org/abs/1710.05941\"\"\"\n",
-    "    def __init__(self, inplace: bool = False):\n",
-    "        super(Swish, self).__init__()\n",
-    "        self.inplace = inplace\n",
-    "\n",
-    "    def forward(self, x):\n",
-    "        return swish(x, self.inplace)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# SwishJit"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#export\n",
-    "@torch.jit.script\n",
-    "def swish_jit(x, inplace: bool = False):\n",
-    "    \"\"\"Jit version of Swish.\n",
-    "    Swish- Described in: https://arxiv.org/abs/1710.05941\n",
-    "    \"\"\"\n",
-    "    return x.mul(x.sigmoid())\n",
-    "\n",
-    "class SwishJit(nn.Module):\n",
-    "    \"\"\"Jit version of Swish.\n",
-    "    Swish - Described in: https://arxiv.org/abs/1710.05941\"\"\"\n",
-    "    def __init__(self, inplace: bool = False):\n",
-    "        super(SwishJit, self).__init__()\n",
-    "\n",
-    "    def forward(self, x):\n",
-    "        return swish_jit(x)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# SwishJitMe - memory-efficient."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#export\n",
-    "\n",
-    "@torch.jit.script\n",
-    "def swish_jit_bwd(x, grad_output):\n",
-    "    x_sigmoid = torch.sigmoid(x)\n",
-    "    return grad_output * (x_sigmoid * (1 + x * (1 - x_sigmoid)))\n",
-    "\n",
-    "\n",
-    "class SwishJitAutoFn(torch.autograd.Function):\n",
-    "    \"\"\" torch.jit.script optimised Swish w/ memory-efficient checkpoint\n",
-    "    Inspired by conversation btw Jeremy Howard & Adam Pazske\n",
-    "    https://twitter.com/jeremyphoward/status/1188251041835315200\n",
-    "    \"\"\"\n",
-    "\n",
-    "    @staticmethod\n",
-    "    def forward(ctx, x):\n",
-    "        ctx.save_for_backward(x)\n",
-    "        return swish_jit_fwd(x)\n",
-    "\n",
-    "    @staticmethod\n",
-    "    def backward(ctx, grad_output):\n",
-    "        x = ctx.saved_tensors[0]\n",
-    "        return swish_jit_bwd(x, grad_output)\n",
-    "\n",
-    "\n",
-    "def swish_me(x, inplace=False):\n",
-    "    return SwishJitAutoFn.apply(x)\n",
-    "\n",
-    "\n",
-    "class SwishMe(nn.Module):\n",
-    "    def __init__(self, inplace: bool = False):\n",
-    "        super(SwishMe, self).__init__()\n",
-    "\n",
-    "    def forward(self, x):\n",
-    "        return SwishJitAutoFn.apply(x)"
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -186,8 +82,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 4,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-11T16:20:08.693991Z",
+     "start_time": "2020-11-11T16:20:08.687244Z"
+    }
+   },
    "outputs": [],
    "source": [
     "#export\n",
@@ -217,8 +118,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 5,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-11T16:20:15.683629Z",
+     "start_time": "2020-11-11T16:20:15.639490Z"
+    }
+   },
    "outputs": [],
    "source": [
     "#export\n",
@@ -229,6 +135,7 @@
     "    \"\"\"\n",
     "    return x.mul(F.softplus(x).tanh())\n",
     "\n",
+    "\n",
     "class MishJit(nn.Module):\n",
     "    def __init__(self, inplace: bool = False):\n",
     "        \"\"\"Jit version of Mish.\n",
@@ -248,15 +155,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 6,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-11T16:20:24.777571Z",
+     "start_time": "2020-11-11T16:20:24.765170Z"
+    }
+   },
    "outputs": [],
    "source": [
     "#export\n",
-    "\n",
     "@torch.jit.script\n",
     "def mish_jit_fwd(x):\n",
-    "#     return x.mul(torch.tanh(F.softplus(x)))\n",
+    "    # return x.mul(torch.tanh(F.softplus(x)))\n",
     "    return x.mul(F.softplus(x).tanh())\n",
     "\n",
     "\n",
@@ -295,175 +206,6 @@
     "        return MishJitAutoFn.apply(x)"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# HardSwish"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#export\n",
-    "def hard_swish(x, inplace: bool = False):\n",
-    "    \"\"\"Hard swish activation function\"\"\"\n",
-    "    inner = F.relu6(x + 3.).div_(6.)\n",
-    "    return x.mul_(inner) if inplace else x.mul(inner)\n",
-    "\n",
-    "\n",
-    "class HardSwish(nn.Module):\n",
-    "    \"\"\"Hard swish activation function\"\"\"\n",
-    "    def __init__(self, inplace: bool = False):\n",
-    "        super(HardSwish, self).__init__()\n",
-    "        self.inplace = inplace\n",
-    "\n",
-    "    def forward(self, x):\n",
-    "        return hard_swish(x, self.inplace)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# HardSwishJit"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#export\n",
-    "@torch.jit.script\n",
-    "def hard_swish_jit(x, inplace: bool = False):\n",
-    "    # return x * (F.relu6(x + 3.) / 6)\n",
-    "    return x * (x + 3).clamp(min=0, max=6).div(6.)  # clamp seems ever so slightly faster?\n",
-    "\n",
-    "\n",
-    "class HardSwishJit(nn.Module):\n",
-    "    def __init__(self, inplace: bool = False):\n",
-    "        super(HardSwishJit, self).__init__()\n",
-    "\n",
-    "    def forward(self, x):\n",
-    "        return hard_swish_jit(x)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# HardSwishJitMe"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#export\n",
-    "\n",
-    "@torch.jit.script\n",
-    "def hard_swish_jit_fwd(x):\n",
-    "    return x * (x + 3).clamp(min=0, max=6).div(6.)\n",
-    "\n",
-    "\n",
-    "@torch.jit.script\n",
-    "def hard_swish_jit_bwd(x, grad_output):\n",
-    "    m = torch.ones_like(x) * (x >= 3.)\n",
-    "    m = torch.where((x >= -3.) & (x <= 3.),  x / 3. + .5, m)\n",
-    "    return grad_output * m\n",
-    "\n",
-    "\n",
-    "class HardSwishJitAutoFn(torch.autograd.Function):\n",
-    "    \"\"\"A memory efficient, jit-scripted HardSwish activation\"\"\"\n",
-    "    @staticmethod\n",
-    "    def forward(ctx, x):\n",
-    "        ctx.save_for_backward(x)\n",
-    "        return hard_swish_jit_fwd(x)\n",
-    "\n",
-    "    @staticmethod\n",
-    "    def backward(ctx, grad_output):\n",
-    "        x = ctx.saved_tensors[0]\n",
-    "        return hard_swish_jit_bwd(x, grad_output)\n",
-    "\n",
-    "\n",
-    "def hard_swish_me(x, inplace=False):\n",
-    "    \"\"\"A memory efficient, jit-scripted HardSwish activation\"\"\"\n",
-    "    return HardSwishJitAutoFn.apply(x)\n",
-    "\n",
-    "\n",
-    "class HardSwishMe(nn.Module):\n",
-    "    \"\"\"A memory efficient, jit-scripted HardSwish activation\"\"\"\n",
-    "    def __init__(self, inplace: bool = False):\n",
-    "        super(HardSwishMe, self).__init__()\n",
-    "\n",
-    "    def forward(self, x):\n",
-    "        return HardSwishJitAutoFn.apply(x)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# HardMish"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#export\n",
-    "def hard_mish(x, inplace: bool = False):\n",
-    "    \"\"\" Hard Mish\n",
-    "    Experimental, based on notes by Mish author Diganta Misra at\n",
-    "      https://github.com/digantamisra98/H-Mish/blob/0da20d4bc58e696b6803f2523c58d3c8a82782d0/README.md\n",
-    "    \"\"\"\n",
-    "    if inplace:\n",
-    "        return x.mul_(0.5 * (x + 2).clamp(min=0, max=2))\n",
-    "    else:\n",
-    "        return 0.5 * x * (x + 2).clamp(min=0, max=2)\n",
-    "\n",
-    "\n",
-    "class HardMish(nn.Module):\n",
-    "    \"\"\"Hard Mish, Experimental, based on notes by Mish author Diganta Misra at\n",
-    "      https://github.com/digantamisra98/H-Mish/blob/0da20d4bc58e696b6803f2523c58d3c8a82782d0/README.md\"\"\"\n",
-    "    def __init__(self, inplace: bool = False):\n",
-    "        super(HardMish, self).__init__()\n",
-    "        self.inplace = inplace\n",
-    "\n",
-    "    def forward(self, x):\n",
-    "        return hard_mish(x, self.inplace)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -473,8 +215,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 7,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-11T16:20:43.826063Z",
+     "start_time": "2020-11-11T16:20:43.817902Z"
+    }
+   },
    "outputs": [],
    "source": [
     "#export\n",
@@ -508,8 +255,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 8,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-11T16:20:46.737592Z",
+     "start_time": "2020-11-11T16:20:46.698069Z"
+    }
+   },
    "outputs": [],
    "source": [
     "#export\n",
@@ -559,25 +311,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#hide\n",
-    "act_fn = Swish(inplace=True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 10,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-11T16:20:52.980532Z",
+     "start_time": "2020-11-11T16:20:52.975600Z"
+    }
+   },
    "outputs": [],
    "source": [
     "#hide\n",
@@ -595,8 +335,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 1,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-11T16:24:49.841271Z",
+     "start_time": "2020-11-11T16:24:49.167581Z"
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -633,6 +378,31 @@
    "display_name": "Python 3",
    "language": "python",
    "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.7"
+  },
+  "toc": {
+   "base_numbering": 1,
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "Table of Contents",
+   "title_sidebar": "Contents",
+   "toc_cell": false,
+   "toc_position": {},
+   "toc_section_display": true,
+   "toc_window_display": true
   }
  },
  "nbformat": 4,
diff --git a/nbs/01_layers.ipynb b/nbs/01_layers.ipynb
index fd04abc..9ef6e68 100644
--- a/nbs/01_layers.ipynb
+++ b/nbs/01_layers.ipynb
@@ -58,7 +58,7 @@
     "#export\n",
     "import torch.nn as nn\n",
     "import torch\n",
-    "from torch.nn.utils import spectral_norm # weight_norm,\n",
+    "from torch.nn.utils import spectral_norm\n",
     "from collections import OrderedDict"
    ]
   },
@@ -80,7 +80,9 @@
     "    '''flat x to vector'''\n",
     "    def __init__(self):\n",
     "        super().__init__()\n",
-    "    def forward(self, x): return x.view(x.size(0), -1)"
+    "\n",
+    "    def forward(self, x):\n",
+    "        return x.view(x.size(0), -1)"
    ]
   },
   {
@@ -97,7 +99,9 @@
    "outputs": [],
    "source": [
     "#export\n",
-    "def noop(x): return x\n",
+    "def noop(x):\n",
+    "    return x\n",
+    "\n",
     "\n",
     "class Noop(nn.Module):\n",
     "    '''Dummy module'''\n",
@@ -124,22 +128,26 @@
     "#export\n",
     "act_fn = nn.ReLU(inplace=True)\n",
     "\n",
+    "\n",
     "class ConvLayer(nn.Sequential):\n",
     "    \"\"\"Basic conv layers block\"\"\"\n",
     "    Conv2d = nn.Conv2d\n",
+    "\n",
     "    def __init__(self, ni, nf, ks=3, stride=1,\n",
-    "            act=True,  act_fn=act_fn,\n",
-    "            bn_layer=True, bn_1st=True, zero_bn=False,\n",
-    "            padding=None, bias=False, groups=1, **kwargs):\n",
+    "                 act=True, act_fn=act_fn,\n",
+    "                 bn_layer=True, bn_1st=True, zero_bn=False,\n",
+    "                 padding=None, bias=False, groups=1, **kwargs):\n",
     "\n",
-    "        if padding==None: padding = ks//2\n",
+    "        if padding is None:\n",
+    "            padding = ks // 2\n",
     "        layers = [('conv', self.Conv2d(ni, nf, ks, stride=stride, padding=padding, bias=bias, groups=groups))]\n",
     "        act_bn = [('act_fn', act_fn)] if act else []\n",
     "        if bn_layer:\n",
     "            bn = nn.BatchNorm2d(nf)\n",
     "            nn.init.constant_(bn.weight, 0. if zero_bn else 1.)\n",
     "            act_bn += [('bn', bn)]\n",
-    "        if bn_1st: act_bn.reverse()\n",
+    "        if bn_1st:\n",
+    "            act_bn.reverse()\n",
     "        layers += act_bn\n",
     "        super().__init__(OrderedDict(layers))"
    ]
@@ -296,37 +304,39 @@
    "source": [
     "#export\n",
     "# SA module from mxresnet at fastai. todo - add persons!!!\n",
-    "\n",
-    "#Unmodified from https://github.com/fastai/fastai/blob/5c51f9eabf76853a89a9bc5741804d2ed4407e49/fastai/layers.py\n",
-    "def conv1d(ni:int, no:int, ks:int=1, stride:int=1, padding:int=0, bias:bool=False):\n",
+    "# Unmodified from https://github.com/fastai/fastai/blob/5c51f9eabf76853a89a9bc5741804d2ed4407e49/fastai/layers.py\n",
+    "def conv1d(ni: int, no: int, ks: int = 1, stride: int = 1, padding: int = 0, bias: bool = False):\n",
     "    \"Create and initialize a `nn.Conv1d` layer with spectral normalization.\"\n",
     "    conv = nn.Conv1d(ni, no, ks, stride=stride, padding=padding, bias=bias)\n",
     "    nn.init.kaiming_normal_(conv.weight)\n",
-    "    if bias: conv.bias.data.zero_()\n",
+    "    if bias:\n",
+    "        conv.bias.data.zero_()\n",
     "    return spectral_norm(conv)\n",
     "\n",
     "\n",
-    "# Adapted from SelfAttention layer at https://github.com/fastai/fastai/blob/5c51f9eabf76853a89a9bc5741804d2ed4407e49/fastai/layers.py\n",
+    "# Adapted from SelfAttention layer at\n",
+    "# https://github.com/fastai/fastai/blob/5c51f9eabf76853a89a9bc5741804d2ed4407e49/fastai/layers.py\n",
     "# Inspired by https://arxiv.org/pdf/1805.08318.pdf\n",
     "class SimpleSelfAttention(nn.Module):\n",
-    "    def __init__(self, n_in:int, ks=1, sym=False):#, n_out:int):\n",
+    "    def __init__(self, n_in: int, ks=1, sym=False):  # , n_out:int):\n",
     "        super().__init__()\n",
-    "        self.conv = conv1d(n_in, n_in, ks, padding=ks//2, bias=False)\n",
+    "        self.conv = conv1d(n_in, n_in, ks, padding=ks // 2, bias=False)\n",
     "        self.gamma = nn.Parameter(torch.tensor([0.]))\n",
     "        self.sym = sym\n",
     "        self.n_in = n_in\n",
-    "    def forward(self,x):\n",
+    "\n",
+    "    def forward(self, x):\n",
     "        if self.sym:\n",
     "            # symmetry hack by https://github.com/mgrankin\n",
-    "            c = self.conv.weight.view(self.n_in,self.n_in)\n",
-    "            c = (c + c.t())/2\n",
-    "            self.conv.weight = c.view(self.n_in,self.n_in,1)\n",
+    "            c = self.conv.weight.view(self.n_in, self.n_in)\n",
+    "            c = (c + c.t()) / 2\n",
+    "            self.conv.weight = c.view(self.n_in, self.n_in, 1)\n",
     "        size = x.size()\n",
-    "        x = x.view(*size[:2],-1)   # (C,N)\n",
+    "        x = x.view(*size[:2], -1)   # (C,N)\n",
     "        # changed the order of mutiplication to avoid O(N^2) complexity\n",
     "        # (x*xT)*(W*x) instead of (x*(xT*(W*x)))\n",
     "        convx = self.conv(x)   # (C,C) * (C,N) = (C,N)   => O(NC^2)\n",
-    "        xxT = torch.bmm(x,x.permute(0,2,1).contiguous())   # (C,N) * (N,C) = (C,C)   => O(NC^2)\n",
+    "        xxT = torch.bmm(x, x.permute(0, 2, 1).contiguous())   # (C,N) * (N,C) = (C,C)   => O(NC^2)\n",
     "        o = torch.bmm(xxT, convx)   # (C,C) * (C,N) = (C,N)   => O(NC^2)\n",
     "        o = self.gamma * o + x\n",
     "        return o.view(*size).contiguous()"
@@ -350,17 +360,17 @@
     "    \"se block\"\n",
     "    se_layer = nn.Linear\n",
     "    act_fn = nn.ReLU(inplace=True)\n",
+    "\n",
     "    def __init__(self, c, r=16):\n",
     "        super().__init__()\n",
     "        ch = c // r\n",
     "        self.squeeze = nn.AdaptiveAvgPool2d(1)\n",
     "        self.excitation = nn.Sequential(\n",
-    "            OrderedDict([\n",
-    "            ('fc_reduce', self.se_layer(c, ch, bias=False)),\n",
-    "            ('se_act', self.act_fn),\n",
-    "            ('fc_expand', self.se_layer(ch, c, bias=False)),\n",
-    "            ('sigmoid', nn.Sigmoid())\n",
-    "            ]))\n",
+    "            OrderedDict([('fc_reduce', self.se_layer(c, ch, bias=False)),\n",
+    "                         ('se_act', self.act_fn),\n",
+    "                         ('fc_expand', self.se_layer(ch, c, bias=False)),\n",
+    "                         ('sigmoid', nn.Sigmoid())\n",
+    "                         ]))\n",
     "\n",
     "    def forward(self, x):\n",
     "        bs, c, _, _ = x.shape\n",
@@ -448,18 +458,19 @@
     "    \"se block with conv on excitation\"\n",
     "    se_layer = nn.Conv2d\n",
     "    act_fn = nn.ReLU(inplace=True)\n",
+    "\n",
     "    def __init__(self, c, r=16):\n",
     "        super().__init__()\n",
     "#         c_in = math.ceil(c//r/8)*8\n",
-    "        c_in = c//r\n",
-    "\n",
+    "        c_in = c // r\n",
     "        self.squeeze = nn.AdaptiveAvgPool2d(1)\n",
-    "        self.excitation = nn.Sequential(OrderedDict([\n",
-    "            ('conv_reduce', self.se_layer(c, c_in, 1, bias=False)),\n",
-    "            ('se_act', self.act_fn),\n",
-    "            ('conv_expand', self.se_layer(c_in, c, 1, bias=False)),\n",
-    "            ('sigmoid', nn.Sigmoid())\n",
-    "        ]))\n",
+    "        self.excitation = nn.Sequential(\n",
+    "            OrderedDict([\n",
+    "                ('conv_reduce', self.se_layer(c, c_in, 1, bias=False)),\n",
+    "                ('se_act', self.act_fn),\n",
+    "                ('conv_expand', self.se_layer(c_in, c, 1, bias=False)),\n",
+    "                ('sigmoid', nn.Sigmoid())\n",
+    "            ]))\n",
     "\n",
     "    def forward(self, x):\n",
     "        y = self.squeeze(x)\n",
diff --git a/nbs/03_MXResNet.ipynb b/nbs/03_MXResNet.ipynb
index 7910e53..5cfe5f1 100644
--- a/nbs/03_MXResNet.ipynb
+++ b/nbs/03_MXResNet.ipynb
@@ -2,29 +2,14 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "metadata": {
-    "hide_input": true
+    "ExecuteTime": {
+     "end_time": "2020-11-11T17:05:20.113995Z",
+     "start_time": "2020-11-11T17:05:20.110467Z"
+    }
    },
    "outputs": [],
-   "source": [
-    "from nbdev import *"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Cells will be exported to model_constructor.mxresnet,\n",
-      "unless a different module is specified after an export flag: `%nbdev_export special.module`\n"
-     ]
-    }
-   ],
    "source": [
     "#default_exp mxresnet"
    ]
@@ -47,28 +32,33 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 3,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-11T17:05:22.054950Z",
+     "start_time": "2020-11-11T17:05:21.717082Z"
+    }
+   },
    "outputs": [],
    "source": [
     "#hide\n",
-    "# from nbdev.showdoc import *\n",
-    "from fastcore.test import *"
+    "from fastcore.test import *\n",
+    "import torch\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 4,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-11T17:05:22.567153Z",
+     "start_time": "2020-11-11T17:05:22.542806Z"
+    }
+   },
    "outputs": [],
    "source": [
     "#export\n",
-    "import torch.nn as nn\n",
-    "import sys, torch\n",
-    "import os\n",
     "from functools import partial\n",
-    "from collections import OrderedDict\n",
-    "from model_constructor.layers import *\n",
     "from model_constructor.net import Net\n",
     "from model_constructor.activations import Mish"
    ]
@@ -82,8 +72,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 5,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-11T17:05:23.723171Z",
+     "start_time": "2020-11-11T17:05:23.718256Z"
+    }
+   },
    "outputs": [],
    "source": [
     "mxresnet  = Net(stem_sizes = [3, 32, 64, 64], name='MXResNet', act_fn=Mish())"
@@ -91,19 +86,27 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 6,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-11T17:05:24.482988Z",
+     "start_time": "2020-11-11T17:05:24.471791Z"
+    }
+   },
    "outputs": [
     {
      "data": {
       "text/plain": [
        "MXResNet constructor\n",
-       " expansion: 1, sa: 0, groups: 1\n",
-       " stem sizes: [3, 32, 64, 64]\n",
-       " body sizes [64, 64, 128, 256, 512]"
+       "  c_in: 3, c_out: 1000\n",
+       "  expansion: 1, groups: 1, dw: False\n",
+       "  sa: False, se: False\n",
+       "  stem sizes: [3, 32, 64, 64], stide on 0\n",
+       "  body sizes [64, 128, 256, 512]\n",
+       "  layers: [2, 2, 2, 2]"
       ]
      },
-     "execution_count": null,
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -114,8 +117,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 7,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-11T17:05:24.801976Z",
+     "start_time": "2020-11-11T17:05:24.789771Z"
+    }
+   },
    "outputs": [
     {
      "data": {
@@ -123,7 +131,7 @@
        "([64, 64, 128, 256, 512], [2, 2, 2, 2])"
       ]
      },
-     "execution_count": null,
+     "execution_count": 7,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -134,8 +142,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 8,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-11T17:05:25.341362Z",
+     "start_time": "2020-11-11T17:05:25.329372Z"
+    }
+   },
    "outputs": [
     {
      "data": {
@@ -160,7 +173,7 @@
        ")"
       ]
      },
-     "execution_count": null,
+     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -172,8 +185,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 9,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-11T17:05:25.914806Z",
+     "start_time": "2020-11-11T17:05:25.711184Z"
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -194,8 +212,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 10,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-11T17:05:26.270885Z",
+     "start_time": "2020-11-11T17:05:26.133631Z"
+    }
+   },
    "outputs": [
     {
      "data": {
@@ -339,7 +362,7 @@
        ")"
       ]
      },
-     "execution_count": null,
+     "execution_count": 10,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -351,8 +374,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 11,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-11T17:05:26.711041Z",
+     "start_time": "2020-11-11T17:05:26.633012Z"
+    }
+   },
    "outputs": [
     {
      "data": {
@@ -496,7 +524,7 @@
        ")"
       ]
      },
-     "execution_count": null,
+     "execution_count": 11,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -508,8 +536,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 12,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-11T17:05:27.558832Z",
+     "start_time": "2020-11-11T17:05:27.549740Z"
+    }
+   },
    "outputs": [
     {
      "data": {
@@ -521,7 +554,7 @@
        ")"
       ]
      },
-     "execution_count": null,
+     "execution_count": 12,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -533,8 +566,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 13,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-11T17:05:28.582657Z",
+     "start_time": "2020-11-11T17:05:28.028508Z"
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -555,20 +593,30 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 14,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-11T17:05:28.587488Z",
+     "start_time": "2020-11-11T17:05:28.584077Z"
+    }
+   },
    "outputs": [],
    "source": [
     "#export\n",
     "mxresnet_parameters = {'stem_sizes': [3, 32, 64, 64], 'act_fn': Mish()}\n",
-    "mxresnet34  = partial(Net, name='MXResnet32', expansion=1, layers=[3, 4,  6, 3], **mxresnet_parameters)\n",
-    "mxresnet50  = partial(Net, name='MXResnet50', expansion=4, layers=[3, 4,  6, 3], **mxresnet_parameters)"
+    "mxresnet34 = partial(Net, name='MXResnet32', expansion=1, layers=[3, 4, 6, 3], **mxresnet_parameters)\n",
+    "mxresnet50 = partial(Net, name='MXResnet50', expansion=4, layers=[3, 4, 6, 3], **mxresnet_parameters)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 15,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-11T17:05:30.655698Z",
+     "start_time": "2020-11-11T17:05:30.650994Z"
+    }
+   },
    "outputs": [],
    "source": [
     "model = mxresnet50(c_out=10)"
@@ -576,19 +624,27 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 16,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-11T17:05:32.842793Z",
+     "start_time": "2020-11-11T17:05:32.837170Z"
+    }
+   },
    "outputs": [
     {
      "data": {
       "text/plain": [
        "MXResnet50 constructor\n",
-       " expansion: 4, sa: 0, groups: 1\n",
-       " stem sizes: [3, 32, 64, 64]\n",
-       " body sizes [16, 64, 128, 256, 512]"
+       "  c_in: 3, c_out: 10\n",
+       "  expansion: 4, groups: 1, dw: False\n",
+       "  sa: False, se: False\n",
+       "  stem sizes: [3, 32, 64, 64], stide on 0\n",
+       "  body sizes [64, 128, 256, 512]\n",
+       "  layers: [3, 4, 6, 3]"
       ]
      },
-     "execution_count": null,
+     "execution_count": 16,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -599,8 +655,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 17,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-11T17:05:33.206370Z",
+     "start_time": "2020-11-11T17:05:33.194473Z"
+    }
+   },
    "outputs": [
     {
      "data": {
@@ -608,7 +669,7 @@
        "(10, [3, 4, 6, 3])"
       ]
      },
-     "execution_count": null,
+     "execution_count": 17,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -628,20 +689,26 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 20,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-11T17:07:53.000101Z",
+     "start_time": "2020-11-11T17:07:52.520078Z"
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Converted 00_constructor.ipynb.\n",
+      "Converted 00_Net.ipynb.\n",
+      "Converted 01_activations.ipynb.\n",
       "Converted 01_layers.ipynb.\n",
-      "Converted 02_resnet.ipynb.\n",
-      "Converted 03_xresnet.ipynb.\n",
-      "Converted 04_Net.ipynb.\n",
+      "Converted 03_MXResNet.ipynb.\n",
+      "Converted 04_YaResNet.ipynb.\n",
       "Converted 05_Twist.ipynb.\n",
-      "Converted 06_YaResNet.ipynb.\n",
+      "Converted 10_base_constructor.ipynb.\n",
+      "Converted 11_xresnet.ipynb.\n",
       "Converted index.ipynb.\n"
      ]
     }
@@ -665,6 +732,31 @@
    "display_name": "Python 3",
    "language": "python",
    "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.7"
+  },
+  "toc": {
+   "base_numbering": 1,
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "Table of Contents",
+   "title_sidebar": "Contents",
+   "toc_cell": false,
+   "toc_position": {},
+   "toc_section_display": true,
+   "toc_window_display": true
   }
  },
  "nbformat": 4,
diff --git a/nbs/10_base_constructor.ipynb b/nbs/10_base_constructor.ipynb
index c85ee82..011659f 100644
--- a/nbs/10_base_constructor.ipynb
+++ b/nbs/10_base_constructor.ipynb
@@ -1,16 +1,5 @@
 {
  "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "hide_input": true
-   },
-   "outputs": [],
-   "source": [
-    "from nbdev import *"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -40,36 +29,51 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 1,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T06:30:39.411413Z",
+     "start_time": "2020-11-12T06:30:38.396874Z"
+    }
+   },
    "outputs": [],
    "source": [
     "#hide\n",
     "from nbdev.showdoc import *\n",
-    "from fastcore.test import *"
+    "from fastcore.test import *\n",
+    "import torch\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 2,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T06:31:40.474152Z",
+     "start_time": "2020-11-12T06:31:40.458394Z"
+    }
+   },
    "outputs": [],
    "source": [
     "#export\n",
     "import torch.nn as nn\n",
-    "import torch\n",
     "from collections import OrderedDict\n",
-    "from model_constructor.layers import ConvLayer, Noop, Flatten"
+    "from model_constructor.layers import ConvLayer, Noop, Flatten\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 3,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T06:31:42.115714Z",
+     "start_time": "2020-11-12T06:31:42.110964Z"
+    }
+   },
    "outputs": [],
    "source": [
     "#export\n",
-    "act_fn = nn.ReLU(inplace=True)"
+    "act_fn = nn.ReLU(inplace=True)\n"
    ]
   },
   {
@@ -81,35 +85,51 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 4,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T06:31:57.186999Z",
+     "start_time": "2020-11-12T06:31:57.175738Z"
+    }
+   },
    "outputs": [],
    "source": [
     "#export\n",
     "class Stem(nn.Sequential):\n",
     "    \"\"\"Base stem\"\"\"\n",
-    "    def __init__(self, c_in=3,  stem_sizes=[], stem_out=64,\n",
-    "            conv_layer=ConvLayer, stride_on=0,\n",
-    "            stem_bn_last=False, bn_1st=True,\n",
-    "            stem_use_pool=True, stem_pool=nn.MaxPool2d(kernel_size=3, stride=2, padding=1), **kwargs):\n",
+    "\n",
+    "    def __init__(self, c_in=3, stem_sizes=[], stem_out=64,\n",
+    "                 conv_layer=ConvLayer, stride_on=0,\n",
+    "                 stem_bn_last=False, bn_1st=True,\n",
+    "                 stem_use_pool=True, stem_pool=nn.MaxPool2d(kernel_size=3, stride=2, padding=1),\n",
+    "                 **kwargs\n",
+    "                 ):\n",
     "        self.sizes = [c_in] + stem_sizes + [stem_out]\n",
-    "        num_layers = len(self.sizes)-1\n",
-    "        stem = [(f\"conv_{i}\", conv_layer(self.sizes[i], self.sizes[i+1],\n",
-    "                stride=2 if i==stride_on else 1, act=True,\n",
-    "                bn_layer=not stem_bn_last if i==num_layers-1 else True,\n",
+    "        num_layers = len(self.sizes) - 1\n",
+    "        stem = [(f\"conv_{i}\", conv_layer(self.sizes[i], self.sizes[i + 1],\n",
+    "                stride=2 if i == stride_on else 1, act=True,\n",
+    "                bn_layer=not stem_bn_last if i == num_layers - 1 else True,\n",
     "                bn_1st=bn_1st, **kwargs))\n",
-    "                    for i in range(num_layers)]\n",
-    "        if stem_use_pool: stem += [('pool', stem_pool)]\n",
-    "        if stem_bn_last: stem.append(('bn', nn.BatchNorm2d(stem_out)))\n",
+    "                for i in range(num_layers)]\n",
+    "        if stem_use_pool:\n",
+    "            stem += [('pool', stem_pool)]\n",
+    "        if stem_bn_last:\n",
+    "            stem.append(('bn', nn.BatchNorm2d(stem_out)))\n",
     "        super().__init__(OrderedDict(stem))\n",
+    "\n",
     "    def extra_repr(self):\n",
-    "            return f\"sizes: {self.sizes}\""
+    "        return f\"sizes: {self.sizes}\"\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 5,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T06:31:58.643757Z",
+     "start_time": "2020-11-12T06:31:58.624038Z"
+    }
+   },
    "outputs": [
     {
      "data": {
@@ -125,7 +145,7 @@
        ")"
       ]
      },
-     "execution_count": null,
+     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -137,8 +157,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 6,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T06:32:01.418364Z",
+     "start_time": "2020-11-12T06:32:01.383260Z"
+    }
+   },
    "outputs": [
     {
      "data": {
@@ -146,7 +171,7 @@
        "torch.Size([8, 64, 32, 32])"
       ]
      },
-     "execution_count": null,
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -159,8 +184,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 7,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T06:32:02.220418Z",
+     "start_time": "2020-11-12T06:32:02.213909Z"
+    }
+   },
    "outputs": [
     {
      "data": {
@@ -176,7 +206,7 @@
        ")"
       ]
      },
-     "execution_count": null,
+     "execution_count": 7,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -188,8 +218,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 8,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T06:32:03.112277Z",
+     "start_time": "2020-11-12T06:32:03.078549Z"
+    }
+   },
    "outputs": [
     {
      "data": {
@@ -197,7 +232,7 @@
        "torch.Size([8, 64, 32, 32])"
       ]
      },
-     "execution_count": null,
+     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -210,8 +245,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 9,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T06:32:05.456464Z",
+     "start_time": "2020-11-12T06:32:05.447631Z"
+    }
+   },
    "outputs": [
     {
      "data": {
@@ -232,7 +272,7 @@
        ")"
       ]
      },
-     "execution_count": null,
+     "execution_count": 9,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -244,8 +284,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 10,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T06:32:07.526283Z",
+     "start_time": "2020-11-12T06:32:07.463289Z"
+    }
+   },
    "outputs": [
     {
      "data": {
@@ -253,7 +298,7 @@
        "torch.Size([8, 64, 32, 32])"
       ]
      },
-     "execution_count": null,
+     "execution_count": 10,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -266,8 +311,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 11,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T06:32:09.662366Z",
+     "start_time": "2020-11-12T06:32:09.657136Z"
+    }
+   },
    "outputs": [],
    "source": [
     "assert y.shape, torch.Size([64, 64, 32, 32])"
@@ -275,8 +325,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 12,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T06:32:14.897155Z",
+     "start_time": "2020-11-12T06:32:14.888814Z"
+    }
+   },
    "outputs": [
     {
      "data": {
@@ -297,7 +352,7 @@
        ")"
       ]
      },
-     "execution_count": null,
+     "execution_count": 12,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -323,32 +378,42 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 14,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T06:32:26.721501Z",
+     "start_time": "2020-11-12T06:32:26.716463Z"
+    }
+   },
    "outputs": [],
    "source": [
     "#export\n",
     "def DownsampleBlock(conv_layer, ni, nf, ks, stride, act=False, **kwargs):\n",
     "    '''Base downsample for res-like blocks'''\n",
-    "    return conv_layer(ni, nf, ks, stride, act, **kwargs)"
+    "    return conv_layer(ni, nf, ks, stride, act, **kwargs)\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 15,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T06:32:36.693247Z",
+     "start_time": "2020-11-12T06:32:36.684089Z"
+    }
+   },
    "outputs": [],
    "source": [
     "#export\n",
     "class BasicBlock(nn.Module):\n",
     "    \"\"\"Basic block (simplified) as in pytorch resnet\"\"\"\n",
-    "    def __init__(self, ni, nf,  expansion=1, stride=1, zero_bn=False,\n",
-    "                conv_layer=ConvLayer, act_fn=act_fn,\n",
-    "                downsample_block=DownsampleBlock, **kwargs):\n",
+    "    def __init__(self, ni, nf, expansion=1, stride=1, zero_bn=False,\n",
+    "                 conv_layer=ConvLayer, act_fn=act_fn,\n",
+    "                 downsample_block=DownsampleBlock, **kwargs):\n",
     "        super().__init__()\n",
-    "        self.downsample = not ni==nf or stride==2\n",
+    "        self.downsample = not ni == nf or stride == 2\n",
     "        self.conv = nn.Sequential(OrderedDict([\n",
-    "            ('conv_0', conv_layer(ni, nf, stride=stride, act_fn=act_fn,  **kwargs)),\n",
+    "            ('conv_0', conv_layer(ni, nf, stride=stride, act_fn=act_fn, **kwargs)),\n",
     "            ('conv_1', conv_layer(nf, nf, zero_bn=zero_bn, act=False, act_fn=act_fn, **kwargs))]))\n",
     "        if self.downsample:\n",
     "            self.downsample = downsample_block(conv_layer, ni, nf, ks=1, stride=stride, act=False, **kwargs)\n",
@@ -360,32 +425,37 @@
     "        out = self.conv(x)\n",
     "        if self.downsample:\n",
     "            identity = self.downsample(x)\n",
-    "        return self.act_conn(self.merge(out + identity))"
+    "        return self.act_conn(self.merge(out + identity))\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 16,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T06:32:41.889641Z",
+     "start_time": "2020-11-12T06:32:41.879334Z"
+    }
+   },
    "outputs": [],
    "source": [
     "#export\n",
     "class Bottleneck(nn.Module):\n",
     "    '''Bottlneck block for resnet models'''\n",
     "    def __init__(self, ni, nh, expansion=4, stride=1, zero_bn=False,\n",
-    "                conv_layer=ConvLayer, act_fn=act_fn,\n",
+    "                 conv_layer=ConvLayer, act_fn=act_fn,\n",
     "                 downsample_block=DownsampleBlock, **kwargs):\n",
-    "#                  groups=1, base_width=64, dilation=1, norm_layer=None\n",
     "        super().__init__()\n",
-    "        self.downsample = not ni==nh or stride==2\n",
-    "        ni = ni*expansion\n",
-    "        nf = nh*expansion\n",
+    "        self.downsample = not ni == nh or stride == 2\n",
+    "        ni = ni * expansion\n",
+    "        nf = nh * expansion\n",
     "        self.conv = nn.Sequential(OrderedDict([\n",
-    "            ('conv_0', conv_layer(ni, nh, ks=1,            act_fn=act_fn, **kwargs)),\n",
-    "            ('conv_1', conv_layer(nh, nh, stride=stride,   act_fn=act_fn, **kwargs)),\n",
+    "            ('conv_0', conv_layer(ni, nh, ks=1,            act_fn=act_fn, **kwargs)),   # noqa: E241\n",
+    "            ('conv_1', conv_layer(nh, nh, stride=stride,   act_fn=act_fn, **kwargs)),   # noqa: E241\n",
     "            ('conv_2', conv_layer(nh, nf, ks=1, zero_bn=zero_bn, act=False, act_fn=act_fn, **kwargs))]))\n",
     "        if self.downsample:\n",
-    "            self.downsample = downsample_block(conv_layer, ni, nf, ks=1, stride=stride, act=False, act_fn=act_fn, **kwargs)\n",
+    "            self.downsample = downsample_block(conv_layer, ni, nf, ks=1, \n",
+    "                                               stride=stride, act=False, act_fn=act_fn, **kwargs)\n",
     "        self.merge = Noop()\n",
     "        self.act_conn = act_fn\n",
     "\n",
@@ -394,31 +464,37 @@
     "        out = self.conv(x)\n",
     "        if self.downsample:\n",
     "            identity = self.downsample(x)\n",
-    "        return self.act_conn(self.merge(out + identity))"
+    "        return self.act_conn(self.merge(out + identity))\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 17,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T06:32:47.811518Z",
+     "start_time": "2020-11-12T06:32:47.804120Z"
+    }
+   },
    "outputs": [],
    "source": [
     "#export\n",
     "class BasicLayer(nn.Sequential):\n",
     "    '''Layer from blocks'''\n",
-    "    def __init__(self, block, blocks, ni, nf, expansion, stride, sa=False,**kwargs):\n",
+    "    def __init__(self, block, blocks, ni, nf, expansion, stride, sa=False, **kwargs):\n",
     "        self.ni = ni\n",
     "        self.nf = nf\n",
     "        self.blocks = blocks\n",
     "        self.expansion = expansion\n",
     "        super().__init__(OrderedDict(\n",
-    "            [(f'block_{i}', block(ni if i==0 else nf, nf, expansion,\n",
-    "                            stride if i==0 else 1,\n",
-    "                            sa=sa if i==blocks-1 else False,\n",
+    "            [(f'block_{i}', block(ni if i == 0 else nf, nf, expansion,\n",
+    "                                  stride if i == 0 else 1,\n",
+    "                                  sa=sa if i == blocks - 1 else False,\n",
     "                                  **kwargs))\n",
-    "              for i in range(blocks)]))\n",
+    "             for i in range(blocks)]))\n",
+    "\n",
     "    def extra_repr(self):\n",
-    "        return f'from {self.ni*self.expansion} to {self.nf}, {self.blocks} blocks, expansion {self.expansion}.'"
+    "        return f'from {self.ni * self.expansion} to {self.nf}, {self.blocks} blocks, expansion {self.expansion}.'\n"
    ]
   },
   {
@@ -430,8 +506,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 18,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T06:32:52.952888Z",
+     "start_time": "2020-11-12T06:32:52.945018Z"
+    }
+   },
    "outputs": [],
    "source": [
     "#export\n",
@@ -440,23 +521,28 @@
     "    def __init__(self, block,\n",
     "                 body_in=64, body_out=512,\n",
     "                 bodylayer=BasicLayer, expansion=1,\n",
-    "                 layer_szs=[64,128,256,], blocks=[2,2,2,2],\n",
+    "                 layer_szs=[64, 128, 256, ], blocks=[2, 2, 2, 2],\n",
     "                 sa=False, **kwargs):\n",
-    "        layer_szs = [body_in//expansion] + layer_szs + [body_out]\n",
-    "        num_layers = len(layer_szs)-1\n",
+    "        layer_szs = [body_in // expansion] + layer_szs + [body_out]\n",
+    "        num_layers = len(layer_szs) - 1\n",
     "        layers = [(f\"layer_{i}\", bodylayer(block, blocks[i],\n",
-    "                            layer_szs[i], layer_szs[i+1], expansion,\n",
-    "                            1 if i==0 else 2,\n",
-    "                            sa=sa if i==0 else False,\n",
+    "                                           layer_szs[i], layer_szs[i + 1], expansion,\n",
+    "                                           stride=1 if i == 0 else 2,\n",
+    "                                           sa=sa if i == 0 else False,\n",
     "                                           **kwargs))\n",
-    "                    for i in range(num_layers)]\n",
-    "        super().__init__(OrderedDict(layers))"
+    "                  for i in range(num_layers)]\n",
+    "        super().__init__(OrderedDict(layers))\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 19,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T06:32:53.745373Z",
+     "start_time": "2020-11-12T06:32:53.672090Z"
+    }
+   },
    "outputs": [
     {
      "data": {
@@ -609,7 +695,7 @@
        ")"
       ]
      },
-     "execution_count": null,
+     "execution_count": 19,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -621,8 +707,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 20,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T06:32:56.260747Z",
+     "start_time": "2020-11-12T06:32:56.184140Z"
+    }
+   },
    "outputs": [
     {
      "data": {
@@ -775,7 +866,7 @@
        ")"
       ]
      },
-     "execution_count": null,
+     "execution_count": 20,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -788,8 +879,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 21,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T06:32:57.657647Z",
+     "start_time": "2020-11-12T06:32:57.544627Z"
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -809,8 +905,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 22,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T06:32:59.370043Z",
+     "start_time": "2020-11-12T06:32:59.303879Z"
+    }
+   },
    "outputs": [
     {
      "data": {
@@ -850,7 +951,7 @@
        ")"
       ]
      },
-     "execution_count": null,
+     "execution_count": 22,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -863,8 +964,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 23,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T06:33:01.422419Z",
+     "start_time": "2020-11-12T06:33:01.279749Z"
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -892,8 +998,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 25,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T06:33:11.171678Z",
+     "start_time": "2020-11-12T06:33:11.167517Z"
+    }
+   },
    "outputs": [],
    "source": [
     "#export\n",
@@ -903,14 +1014,19 @@
     "        super().__init__(OrderedDict(\n",
     "            [('pool', nn.AdaptiveAvgPool2d((1, 1))),\n",
     "             ('flat', Flatten()),\n",
-    "             ('fc',   nn.Linear(ni, nf)),\n",
-    "             ]))"
+    "             ('fc', nn.Linear(ni, nf)),\n",
+    "             ]))\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 26,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T06:33:11.594299Z",
+     "start_time": "2020-11-12T06:33:11.590324Z"
+    }
+   },
    "outputs": [],
    "source": [
     "head = Head(512, 10)"
@@ -918,8 +1034,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 27,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T06:33:12.744267Z",
+     "start_time": "2020-11-12T06:33:12.732280Z"
+    }
+   },
    "outputs": [
     {
      "data": {
@@ -927,7 +1048,7 @@
        "torch.Size([8, 10])"
       ]
      },
-     "execution_count": null,
+     "execution_count": 27,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -939,13 +1060,6 @@
     "y.shape"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -955,50 +1069,64 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 28,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T06:33:21.010150Z",
+     "start_time": "2020-11-12T06:33:21.005904Z"
+    }
+   },
    "outputs": [],
    "source": [
     "#export\n",
     "def init_model(model, nonlinearity='leaky_relu'):\n",
     "    '''Init model'''\n",
     "    for m in model.modules():\n",
-    "#             if isinstance(m, nn.Conv2d):\n",
-    "            if isinstance(m, (nn.Conv2d, nn.Linear)):\n",
-    "                nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity=nonlinearity)"
+    "        if isinstance(m, (nn.Conv2d, nn.Linear)):\n",
+    "            nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity=nonlinearity)\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 29,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T06:33:33.550879Z",
+     "start_time": "2020-11-12T06:33:33.542345Z"
+    }
+   },
    "outputs": [],
    "source": [
     "#export\n",
     "class Net(nn.Sequential):\n",
     "    '''Constructor for model'''\n",
-    "    def __init__(self,  stem=Stem,\n",
+    "    def __init__(self, stem=Stem,\n",
     "                 body=Body, block=BasicBlock, sa=False,\n",
-    "                 layer_szs=[64,128,256,], blocks=[2,2,2,2],\n",
+    "                 layer_szs=[64, 128, 256, ], blocks=[2, 2, 2, 2],\n",
     "                 head=Head,\n",
-    "                 c_in=3,  num_classes=1000,\n",
+    "                 c_in=3, num_classes=1000,\n",
     "                 body_in=64, body_out=512, expansion=1,\n",
-    "                init_fn=init_model, **kwargs):\n",
-    "        self.init_model=init_fn\n",
-    "        super().__init__(OrderedDict([\n",
-    "            ('stem', stem(c_in=c_in,stem_out=body_in, **kwargs)),\n",
-    "            ('body', body(block, body_in, body_out,\n",
-    "                        layer_szs=layer_szs, blocks=blocks, expansion=expansion,\n",
-    "                        sa=sa, **kwargs)),\n",
-    "            ('head', head(body_out*expansion, num_classes, **kwargs))\n",
-    "            ]))\n",
-    "        self.init_model(self)"
+    "                 init_fn=init_model, **kwargs):\n",
+    "        self.init_model = init_fn\n",
+    "        super().__init__(OrderedDict(\n",
+    "            [('stem', stem(c_in=c_in, stem_out=body_in, **kwargs)),\n",
+    "             ('body', body(block, body_in, body_out,\n",
+    "                           layer_szs=layer_szs, blocks=blocks, expansion=expansion,\n",
+    "                           sa=sa, **kwargs)),\n",
+    "             ('head', head(body_out * expansion, num_classes, **kwargs))\n",
+    "             ]))\n",
+    "        self.init_model(self)\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 30,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T06:33:36.506266Z",
+     "start_time": "2020-11-12T06:33:36.348713Z"
+    }
+   },
    "outputs": [],
    "source": [
     "model = Net()"
@@ -1006,8 +1134,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 31,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T06:33:36.827690Z",
+     "start_time": "2020-11-12T06:33:36.806284Z"
+    }
+   },
    "outputs": [
     {
      "data": {
@@ -1176,7 +1309,7 @@
        ")"
       ]
      },
-     "execution_count": null,
+     "execution_count": 31,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1187,8 +1320,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 32,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T06:33:38.249477Z",
+     "start_time": "2020-11-12T06:33:38.019788Z"
+    }
+   },
    "outputs": [
     {
      "data": {
@@ -1196,7 +1334,7 @@
        "torch.Size([16, 1000])"
       ]
      },
-     "execution_count": null,
+     "execution_count": 32,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1210,8 +1348,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 33,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T06:33:39.362878Z",
+     "start_time": "2020-11-12T06:33:39.195752Z"
+    }
+   },
    "outputs": [
     {
      "data": {
@@ -1251,7 +1394,7 @@
        ")"
       ]
      },
-     "execution_count": null,
+     "execution_count": 33,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1264,8 +1407,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 34,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T06:33:40.223688Z",
+     "start_time": "2020-11-12T06:33:40.091324Z"
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -1286,8 +1434,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 35,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T06:33:40.803038Z",
+     "start_time": "2020-11-12T06:33:40.591347Z"
+    }
+   },
    "outputs": [
     {
      "data": {
@@ -1456,7 +1609,7 @@
        ")"
       ]
      },
-     "execution_count": null,
+     "execution_count": 35,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1469,8 +1622,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 36,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T06:33:41.679937Z",
+     "start_time": "2020-11-12T06:33:41.562299Z"
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -1499,8 +1657,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 1,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T06:58:53.867375Z",
+     "start_time": "2020-11-12T06:58:53.190412Z"
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -1512,9 +1675,8 @@
       "Converted 03_MXResNet.ipynb.\n",
       "Converted 04_YaResNet.ipynb.\n",
       "Converted 05_Twist.ipynb.\n",
-      "Converted 10_constructor.ipynb.\n",
-      "Converted 11_resnet.ipynb.\n",
-      "Converted 12_xresnet.ipynb.\n",
+      "Converted 10_base_constructor.ipynb.\n",
+      "Converted 11_xresnet.ipynb.\n",
       "Converted index.ipynb.\n"
      ]
     }
@@ -1552,6 +1714,31 @@
    "display_name": "Python 3",
    "language": "python",
    "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.7"
+  },
+  "toc": {
+   "base_numbering": 1,
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "Table of Contents",
+   "title_sidebar": "Contents",
+   "toc_cell": false,
+   "toc_position": {},
+   "toc_section_display": true,
+   "toc_window_display": true
   }
  },
  "nbformat": 4,
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 0000000..2fb20a2
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,9 @@
+[flake8]
+max-line-length = 120
+exclude = .tox,*.egg,build,temp
+select = E,W,F
+doctests = True
+verbose = 2
+# https://pep8.readthedocs.io/en/latest/intro.html#error-codes
+format = pylint
+# ignore =
\ No newline at end of file

From 9bd8b96172e9e46d0867a98c6032a2453a71fa7b Mon Sep 17 00:00:00 2001
From: ayasyrev <a.yasyrev@gmail.com>
Date: Thu, 12 Nov 2020 08:36:49 +0000
Subject: [PATCH 3/3] Clear code, remove nbdev %magic Fixes #2

---
 README.md                             |  64 +--
 docs/MXResNet.html                    |  25 +-
 docs/Net.html                         |  89 ++-
 docs/Twist.html                       |  62 +-
 docs/YaResNet.html                    |  23 +-
 docs/activations.html                 | 692 ++--------------------
 docs/base_constructor.html            |  93 ++-
 docs/layers.html                      |  95 ++-
 docs/xresnet.html                     |  43 +-
 model_constructor/_nbdev.py           |   4 +-
 model_constructor/base_constructor.py |  12 +-
 model_constructor/mxresnet.py         |   3 +-
 model_constructor/net.py              |  12 +-
 model_constructor/twist.py            | 133 ++---
 model_constructor/xresnet.py          |  40 +-
 model_constructor/yaresnet.py         |  46 +-
 nbs/00_Net.ipynb                      |  65 ++-
 nbs/01_activations.ipynb              |  11 +-
 nbs/03_MXResNet.ipynb                 |   5 +-
 nbs/04_YaResNet.ipynb                 | 103 ++--
 nbs/05_Twist.ipynb                    | 798 ++++++++++++++++++--------
 nbs/10_base_constructor.ipynb         |  24 +-
 nbs/11_xresnet.ipynb                  | 251 +++++---
 setup.cfg                             |   2 +-
 setup.py                              |  51 +-
 25 files changed, 1291 insertions(+), 1455 deletions(-)

diff --git a/README.md b/README.md
index 117a110..e91e829 100644
--- a/README.md
+++ b/README.md
@@ -21,15 +21,15 @@ First import constructor class, then create model constructor oject.
 
 Now you can change every part of model.
 
-```python
+```
 from model_constructor.net import *
 ```
 
-```python
+```
 model = Net()
 ```
 
-```python
+```
 model
 ```
 
@@ -48,7 +48,7 @@ model
 
 Now we have model consructor, default setting as xresnet18. And we can get model after call it.
 
-```python
+```
 model.c_in
 ```
 
@@ -59,7 +59,7 @@ model.c_in
 
 
 
-```python
+```
 model.c_out
 ```
 
@@ -70,7 +70,7 @@ model.c_out
 
 
 
-```python
+```
 model.stem_sizes
 ```
 
@@ -81,7 +81,7 @@ model.stem_sizes
 
 
 
-```python
+```
 model.layers
 ```
 
@@ -92,7 +92,7 @@ model.layers
 
 
 
-```python
+```
 model.expansion
 ```
 
@@ -103,8 +103,8 @@ model.expansion
 
 
 
-```python
-%nbdev_collapse_output
+```
+#collapse_output
 model()
 ```
 <details class="description">
@@ -284,15 +284,15 @@ model()
 If you want to change model, just change constructor parameters.  
 Lets create xresnet50.
 
-```python
+```
 model.expansion = 4
 model.layers = [3,4,6,3]
 ```
 
 Now we can look at model body and if we call constructor - we have pytorch model!
 
-```python
-%nbdev_collapse_output
+```
+#collapse_output
 model.body
 ```
 <details class="description">
@@ -649,13 +649,13 @@ But now lets create model as mxresnet50 from fastai forums tread https://forums.
 
 Lets create mxresnet constructor.
 
-```python
+```
 model = Net(name='MxResNet')
 ```
 
 Then lets modify stem.
 
-```python
+```
 model.stem_sizes = [3,32,64,64]
 ```
 
@@ -663,15 +663,15 @@ Now lets change activation function to Mish.
 Here is link to forum disscussion https://forums.fast.ai/t/meet-mish-new-activation-function-possible-successor-to-relu  
 Mish is in model_constructor.activations
 
-```python
+```
 from model_constructor.activations import Mish
 ```
 
-```python
+```
 model.act_fn = Mish()
 ```
 
-```python
+```
 model
 ```
 
@@ -688,8 +688,8 @@ model
 
 
 
-```python
-%nbdev_collapse_output
+```
+#collapse_output
 model()
 ```
 <details class="description">
@@ -870,7 +870,7 @@ model()
 
 Now lets make MxResNet50
 
-```python
+```
 model.expansion = 4
 model.layers = [3,4,6,3]
 model.name = 'mxresnet50'
@@ -880,7 +880,7 @@ Now we have mxresnet50 constructor.
 We can inspect every parts of it.  
 And after call it we got model.
 
-```python
+```
 model
 ```
 
@@ -897,8 +897,8 @@ model
 
 
 
-```python
-%nbdev_collapse_output
+```
+#collapse_output
 model.stem.conv_1
 ```
 <details class="description">
@@ -917,8 +917,8 @@ model.stem.conv_1
 
 </details>
 
-```python
-%nbdev_collapse_output
+```
+#collapse_output
 model.body.l_0.bl_0
 ```
 <details class="description">
@@ -959,18 +959,18 @@ model.body.l_0.bl_0
 
 Now lets change Resblock to YaResBlock (Yet another ResNet, former NewResBlock) is in lib from version 0.1.0
 
-```python
+```
 from model_constructor.yaresnet import YaResBlock
 ```
 
-```python
+```
 model.block = YaResBlock
 ```
 
 That all. Now we have YaResNet constructor
 
-```python
-%nbdev_collapse_output
+```
+#collapse_output
 model.name = 'YaResNet'
 model
 ```
@@ -994,8 +994,8 @@ model
 
 Let see what we have.
 
-```python
-%nbdev_collapse_output
+```
+#collapse_output
 model.body.l_1.bl_0
 ```
 <details class="description">
diff --git a/docs/MXResNet.html b/docs/MXResNet.html
index 2aed8f5..75a31fa 100644
--- a/docs/MXResNet.html
+++ b/docs/MXResNet.html
@@ -26,13 +26,6 @@
     
 <div class="cell border-box-sizing code_cell rendered">
 
-</div>
-    {% endraw %}
-
-    {% raw %}
-    
-<div class="cell border-box-sizing code_cell rendered">
-
 </div>
     {% endraw %}
 
@@ -96,9 +89,12 @@ <h1 id="MXResNet-constructor.">MXResNet constructor.<a class="anchor-link" href=
 
 <div class="output_text output_subarea output_execute_result">
 <pre>MXResNet constructor
- expansion: 1, sa: 0, groups: 1
- stem sizes: [3, 32, 64, 64]
- body sizes [64, 64, 128, 256, 512]</pre>
+  c_in: 3, c_out: 1000
+  expansion: 1, groups: 1, dw: False
+  sa: False, se: False
+  stem sizes: [3, 32, 64, 64], stide on 0
+  body sizes [64, 128, 256, 512]
+  layers: [2, 2, 2, 2]</pre>
 </div>
 
 </div>
@@ -630,9 +626,12 @@ <h1 id="MXResNet-constructor.">MXResNet constructor.<a class="anchor-link" href=
 
 <div class="output_text output_subarea output_execute_result">
 <pre>MXResnet50 constructor
- expansion: 4, sa: 0, groups: 1
- stem sizes: [3, 32, 64, 64]
- body sizes [16, 64, 128, 256, 512]</pre>
+  c_in: 3, c_out: 10
+  expansion: 4, groups: 1, dw: False
+  sa: False, se: False
+  stem sizes: [3, 32, 64, 64], stide on 0
+  body sizes [64, 128, 256, 512]
+  layers: [3, 4, 6, 3]</pre>
 </div>
 
 </div>
diff --git a/docs/Net.html b/docs/Net.html
index 4535bc6..81a2910 100644
--- a/docs/Net.html
+++ b/docs/Net.html
@@ -26,20 +26,6 @@
     
 <div class="cell border-box-sizing code_cell rendered">
 
-</div>
-    {% endraw %}
-
-    {% raw %}
-    
-<div class="cell border-box-sizing code_cell rendered">
-
-</div>
-    {% endraw %}
-
-    {% raw %}
-    
-<div class="cell border-box-sizing code_cell rendered">
-
 </div>
     {% endraw %}
 
@@ -50,7 +36,7 @@
 
 <div class="inner_cell">
     <div class="input_area">
-<div class=" highlight hl-ipython3"><pre><span></span><span class="kn">from</span> <span class="nn">dataclasses</span> <span class="kn">import</span> <span class="n">dataclass</span><span class="p">,</span> <span class="n">asdict</span>
+<div class=" highlight hl-ipython3"><pre><span></span><span class="kn">from</span> <span class="nn">nbdev</span> <span class="kn">import</span> <span class="n">docs</span><span class="p">,</span> <span class="n">showdoc</span><span class="p">,</span> <span class="n">show_doc</span>
 </pre></div>
 
     </div>
@@ -60,12 +46,6 @@
 </div>
     {% endraw %}
 
-<div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
-<div class="text_cell_render border-box-sizing rendered_html">
-<h1 id="Utils">Utils<a class="anchor-link" href="#Utils"> </a></h1>
-</div>
-</div>
-</div>
     {% raw %}
     
 <div class="cell border-box-sizing code_cell rendered">
@@ -73,6 +53,12 @@ <h1 id="Utils">Utils<a class="anchor-link" href="#Utils"> </a></h1>
 </div>
     {% endraw %}
 
+<div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
+<div class="text_cell_render border-box-sizing rendered_html">
+<h1 id="Utils">Utils<a class="anchor-link" href="#Utils"> </a></h1>
+</div>
+</div>
+</div>
     {% raw %}
     
 <div class="cell border-box-sizing code_cell rendered">
@@ -84,7 +70,7 @@ <h1 id="Utils">Utils<a class="anchor-link" href="#Utils"> </a></h1>
 
 
 <div class="output_markdown rendered_html output_subarea ">
-<h4 id="init_cnn" class="doc_header"><code>init_cnn</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/net.py#L15" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>init_cnn</code>(<strong><code>m</code></strong>)</p>
+<h4 id="init_cnn" class="doc_header"><code>init_cnn</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/net.py#L17" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>init_cnn</code>(<strong><code>module</code></strong>:<code>Module</code>)</p>
 </blockquote>
 
 </div>
@@ -97,12 +83,6 @@ <h4 id="init_cnn" class="doc_header"><code>init_cnn</code><a href="https://githu
 </div>
     {% endraw %}
 
-<div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
-<div class="text_cell_render border-box-sizing rendered_html">
-<h1 id="ResBlock">ResBlock<a class="anchor-link" href="#ResBlock"> </a></h1>
-</div>
-</div>
-</div>
     {% raw %}
     
 <div class="cell border-box-sizing code_cell rendered">
@@ -110,6 +90,12 @@ <h1 id="ResBlock">ResBlock<a class="anchor-link" href="#ResBlock"> </a></h1>
 </div>
     {% endraw %}
 
+<div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
+<div class="text_cell_render border-box-sizing rendered_html">
+<h1 id="ResBlock">ResBlock<a class="anchor-link" href="#ResBlock"> </a></h1>
+</div>
+</div>
+</div>
     {% raw %}
     
 <div class="cell border-box-sizing code_cell rendered">
@@ -121,7 +107,7 @@ <h1 id="ResBlock">ResBlock<a class="anchor-link" href="#ResBlock"> </a></h1>
 
 
 <div class="output_markdown rendered_html output_subarea ">
-<h2 id="ResBlock" class="doc_header"><code>class</code> <code>ResBlock</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/net.py#L22" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>ResBlock</code>(<strong><code>expansion</code></strong>, <strong><code>ni</code></strong>, <strong><code>nh</code></strong>, <strong><code>stride</code></strong>=<em><code>1</code></em>, <strong><code>conv_layer</code></strong>=<em><code>ConvLayer</code></em>, <strong><code>act_fn</code></strong>=<em><code>ReLU(inplace=True)</code></em>, <strong><code>zero_bn</code></strong>=<em><code>True</code></em>, <strong><code>bn_1st</code></strong>=<em><code>True</code></em>, <strong><code>pool</code></strong>=<em><code>AvgPool2d(kernel_size=2, stride=2, padding=0)</code></em>, <strong><code>sa</code></strong>=<em><code>False</code></em>, <strong><code>sym</code></strong>=<em><code>False</code></em>, <strong><code>se</code></strong>=<em><code>False</code></em>, <strong><code>se_reduction</code></strong>=<em><code>16</code></em>, <strong><code>groups</code></strong>=<em><code>1</code></em>, <strong><code>dw</code></strong>=<em><code>False</code></em>) :: <code>Module</code></p>
+<h2 id="ResBlock" class="doc_header"><code>class</code> <code>ResBlock</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/net.py#L28" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>ResBlock</code>(<strong><code>expansion</code></strong>, <strong><code>ni</code></strong>, <strong><code>nh</code></strong>, <strong><code>stride</code></strong>=<em><code>1</code></em>, <strong><code>conv_layer</code></strong>=<em><code>ConvLayer</code></em>, <strong><code>act_fn</code></strong>=<em><code>ReLU(inplace=True)</code></em>, <strong><code>zero_bn</code></strong>=<em><code>True</code></em>, <strong><code>bn_1st</code></strong>=<em><code>True</code></em>, <strong><code>pool</code></strong>=<em><code>AvgPool2d(kernel_size=2, stride=2, padding=0)</code></em>, <strong><code>sa</code></strong>=<em><code>False</code></em>, <strong><code>sym</code></strong>=<em><code>False</code></em>, <strong><code>se</code></strong>=<em><code>False</code></em>, <strong><code>se_reduction</code></strong>=<em><code>16</code></em>, <strong><code>groups</code></strong>=<em><code>1</code></em>, <strong><code>dw</code></strong>=<em><code>False</code></em>) :: <code>Module</code></p>
 </blockquote>
 <p>Resnet block</p>
 
@@ -132,6 +118,13 @@ <h2 id="ResBlock" class="doc_header"><code>class</code> <code>ResBlock</code><a
 </div>
 </div>
 
+</div>
+    {% endraw %}
+
+    {% raw %}
+    
+<div class="cell border-box-sizing code_cell rendered">
+
 </div>
     {% endraw %}
 
@@ -424,13 +417,6 @@ <h1 id="NewResBlock">NewResBlock<a class="anchor-link" href="#NewResBlock"> </a>
     
 <div class="cell border-box-sizing code_cell rendered">
 
-</div>
-    {% endraw %}
-
-    {% raw %}
-    
-<div class="cell border-box-sizing code_cell rendered">
-
 <div class="output_wrapper">
 <div class="output">
 
@@ -438,7 +424,7 @@ <h1 id="NewResBlock">NewResBlock<a class="anchor-link" href="#NewResBlock"> </a>
 
 
 <div class="output_markdown rendered_html output_subarea ">
-<h2 id="NewResBlock" class="doc_header"><code>class</code> <code>NewResBlock</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/net.py#L52" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>NewResBlock</code>(<strong><code>expansion</code></strong>, <strong><code>ni</code></strong>, <strong><code>nh</code></strong>, <strong><code>stride</code></strong>=<em><code>1</code></em>, <strong><code>conv_layer</code></strong>=<em><code>ConvLayer</code></em>, <strong><code>act_fn</code></strong>=<em><code>ReLU(inplace=True)</code></em>, <strong><code>zero_bn</code></strong>=<em><code>True</code></em>, <strong><code>bn_1st</code></strong>=<em><code>True</code></em>, <strong><code>pool</code></strong>=<em><code>AvgPool2d(kernel_size=2, stride=2, padding=0)</code></em>, <strong><code>sa</code></strong>=<em><code>False</code></em>, <strong><code>sym</code></strong>=<em><code>False</code></em>, <strong><code>se</code></strong>=<em><code>False</code></em>, <strong><code>se_reduction</code></strong>=<em><code>16</code></em>, <strong><code>groups</code></strong>=<em><code>1</code></em>, <strong><code>dw</code></strong>=<em><code>False</code></em>) :: <code>Module</code></p>
+<h2 id="NewResBlock" class="doc_header"><code>class</code> <code>NewResBlock</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/net.py#L65" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>NewResBlock</code>(<strong><code>expansion</code></strong>, <strong><code>ni</code></strong>, <strong><code>nh</code></strong>, <strong><code>stride</code></strong>=<em><code>1</code></em>, <strong><code>conv_layer</code></strong>=<em><code>ConvLayer</code></em>, <strong><code>act_fn</code></strong>=<em><code>ReLU(inplace=True)</code></em>, <strong><code>zero_bn</code></strong>=<em><code>True</code></em>, <strong><code>bn_1st</code></strong>=<em><code>True</code></em>, <strong><code>pool</code></strong>=<em><code>AvgPool2d(kernel_size=2, stride=2, padding=0)</code></em>, <strong><code>sa</code></strong>=<em><code>False</code></em>, <strong><code>sym</code></strong>=<em><code>False</code></em>, <strong><code>se</code></strong>=<em><code>False</code></em>, <strong><code>se_reduction</code></strong>=<em><code>16</code></em>, <strong><code>groups</code></strong>=<em><code>1</code></em>, <strong><code>dw</code></strong>=<em><code>False</code></em>) :: <code>Module</code></p>
 </blockquote>
 <p>YaResnet block</p>
 
@@ -449,6 +435,13 @@ <h2 id="NewResBlock" class="doc_header"><code>class</code> <code>NewResBlock</co
 </div>
 </div>
 
+</div>
+    {% endraw %}
+
+    {% raw %}
+    
+<div class="cell border-box-sizing code_cell rendered">
+
 </div>
     {% endraw %}
 
@@ -551,13 +544,6 @@ <h1 id="Net---Model-Constructor.">Net - Model Constructor.<a class="anchor-link"
     
 <div class="cell border-box-sizing code_cell rendered">
 
-</div>
-    {% endraw %}
-
-    {% raw %}
-    
-<div class="cell border-box-sizing code_cell rendered">
-
 <div class="output_wrapper">
 <div class="output">
 
@@ -565,7 +551,7 @@ <h1 id="Net---Model-Constructor.">Net - Model Constructor.<a class="anchor-link"
 
 
 <div class="output_markdown rendered_html output_subarea ">
-<h2 id="Net" class="doc_header"><code>class</code> <code>Net</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/net.py#L119" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>Net</code>(<strong><code>name</code></strong>=<em><code>'Net'</code></em>, <strong><code>c_in</code></strong>=<em><code>3</code></em>, <strong><code>c_out</code></strong>=<em><code>1000</code></em>, <strong><code>block</code></strong>=<em><code>ResBlock</code></em>, <strong><code>conv_layer</code></strong>=<em><code>ConvLayer</code></em>, <strong><code>block_sizes</code></strong>=<em><code>[64, 128, 256, 512]</code></em>, <strong><code>layers</code></strong>=<em><code>[2, 2, 2, 2]</code></em>, <strong><code>norm</code></strong>=<em><code>BatchNorm2d</code></em>, <strong><code>act_fn</code></strong>=<em><code>ReLU(inplace=True)</code></em>, <strong><code>pool</code></strong>=<em><code>AvgPool2d(kernel_size=2, stride=2, padding=0)</code></em>, <strong><code>expansion</code></strong>=<em><code>1</code></em>, <strong><code>groups</code></strong>=<em><code>1</code></em>, <strong><code>dw</code></strong>=<em><code>False</code></em>, <strong><code>sa</code></strong>=<em><code>False</code></em>, <strong><code>se</code></strong>=<em><code>False</code></em>, <strong><code>se_reduction</code></strong>=<em><code>16</code></em>, <strong><code>bn_1st</code></strong>=<em><code>True</code></em>, <strong><code>zero_bn</code></strong>=<em><code>True</code></em>, <strong><code>stem_stride_on</code></strong>=<em><code>0</code></em>, <strong><code>stem_sizes</code></strong>=<em><code>[32, 32, 64]</code></em>, <strong><code>stem_pool</code></strong>=<em><code>MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)</code></em>, <strong><code>stem_bn_end</code></strong>=<em><code>False</code></em>, <strong><code>_init_cnn</code></strong>=<em><code>init_cnn</code></em>, <strong><code>_make_stem</code></strong>=<em><code>_make_stem</code></em>, <strong><code>_make_layer</code></strong>=<em><code>_make_layer</code></em>, <strong><code>_make_body</code></strong>=<em><code>_make_body</code></em>, <strong><code>_make_head</code></strong>=<em><code>_make_head</code></em>)</p>
+<h2 id="Net" class="doc_header"><code>class</code> <code>Net</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/net.py#L140" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>Net</code>(<strong><code>name</code></strong>=<em><code>'Net'</code></em>, <strong><code>c_in</code></strong>=<em><code>3</code></em>, <strong><code>c_out</code></strong>=<em><code>1000</code></em>, <strong><code>block</code></strong>=<em><code>ResBlock</code></em>, <strong><code>conv_layer</code></strong>=<em><code>ConvLayer</code></em>, <strong><code>block_sizes</code></strong>=<em><code>[64, 128, 256, 512]</code></em>, <strong><code>layers</code></strong>=<em><code>[2, 2, 2, 2]</code></em>, <strong><code>norm</code></strong>=<em><code>BatchNorm2d</code></em>, <strong><code>act_fn</code></strong>=<em><code>ReLU(inplace=True)</code></em>, <strong><code>pool</code></strong>=<em><code>AvgPool2d(kernel_size=2, stride=2, padding=0)</code></em>, <strong><code>expansion</code></strong>=<em><code>1</code></em>, <strong><code>groups</code></strong>=<em><code>1</code></em>, <strong><code>dw</code></strong>=<em><code>False</code></em>, <strong><code>sa</code></strong>=<em><code>False</code></em>, <strong><code>se</code></strong>=<em><code>False</code></em>, <strong><code>se_reduction</code></strong>=<em><code>16</code></em>, <strong><code>bn_1st</code></strong>=<em><code>True</code></em>, <strong><code>zero_bn</code></strong>=<em><code>True</code></em>, <strong><code>stem_stride_on</code></strong>=<em><code>0</code></em>, <strong><code>stem_sizes</code></strong>=<em><code>[32, 32, 64]</code></em>, <strong><code>stem_pool</code></strong>=<em><code>MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)</code></em>, <strong><code>stem_bn_end</code></strong>=<em><code>False</code></em>, <strong><code>_init_cnn</code></strong>=<em><code>init_cnn</code></em>, <strong><code>_make_stem</code></strong>=<em><code>_make_stem</code></em>, <strong><code>_make_layer</code></strong>=<em><code>_make_layer</code></em>, <strong><code>_make_body</code></strong>=<em><code>_make_body</code></em>, <strong><code>_make_head</code></strong>=<em><code>_make_head</code></em>)</p>
 </blockquote>
 <p>Model constructor. As default - xresnet18</p>
 
@@ -576,6 +562,13 @@ <h2 id="Net" class="doc_header"><code>class</code> <code>Net</code><a href="http
 </div>
 </div>
 
+</div>
+    {% endraw %}
+
+    {% raw %}
+    
+<div class="cell border-box-sizing code_cell rendered">
+
 </div>
     {% endraw %}
 
@@ -1095,10 +1088,7 @@ <h2 id="Net" class="doc_header"><code>class</code> <code>Net</code><a href="http
     {% raw %}
     
 <div class="cell border-box-sizing code_cell rendered">
-<details class="description">
-      <summary data-open="Hide Code" data-close="Show Code"></summary>
-        <summary></summary>
-        <div class="input">
+<div class="input">
 
 <div class="inner_cell">
     <div class="input_area">
@@ -1109,7 +1099,6 @@ <h2 id="Net" class="doc_header"><code>class</code> <code>Net</code><a href="http
 </div>
 </div>
 
-    </details>
 </div>
     {% endraw %}
 
diff --git a/docs/Twist.html b/docs/Twist.html
index 62ebfc5..72d9f3f 100644
--- a/docs/Twist.html
+++ b/docs/Twist.html
@@ -60,13 +60,6 @@ <h1 id="ConvTwist">ConvTwist<a class="anchor-link" href="#ConvTwist"> </a></h1>
     
 <div class="cell border-box-sizing code_cell rendered">
 
-</div>
-    {% endraw %}
-
-    {% raw %}
-    
-<div class="cell border-box-sizing code_cell rendered">
-
 <div class="output_wrapper">
 <div class="output">
 
@@ -85,6 +78,13 @@ <h2 id="ConvTwist" class="doc_header"><code>class</code> <code>ConvTwist</code><
 </div>
 </div>
 
+</div>
+    {% endraw %}
+
+    {% raw %}
+    
+<div class="cell border-box-sizing code_cell rendered">
+
 </div>
     {% endraw %}
 
@@ -274,13 +274,6 @@ <h1 id="ConvLayerTwist">ConvLayerTwist<a class="anchor-link" href="#ConvLayerTwi
     
 <div class="cell border-box-sizing code_cell rendered">
 
-</div>
-    {% endraw %}
-
-    {% raw %}
-    
-<div class="cell border-box-sizing code_cell rendered">
-
 <div class="output_wrapper">
 <div class="output">
 
@@ -288,7 +281,7 @@ <h1 id="ConvLayerTwist">ConvLayerTwist<a class="anchor-link" href="#ConvLayerTwi
 
 
 <div class="output_markdown rendered_html output_subarea ">
-<h2 id="ConvLayerTwist" class="doc_header"><code>class</code> <code>ConvLayerTwist</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/twist.py#L104" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>ConvLayerTwist</code>(<strong><code>ni</code></strong>, <strong><code>nf</code></strong>, <strong><code>ks</code></strong>=<em><code>3</code></em>, <strong><code>stride</code></strong>=<em><code>1</code></em>, <strong><code>act</code></strong>=<em><code>True</code></em>, <strong><code>act_fn</code></strong>=<em><code>ReLU(inplace=True)</code></em>, <strong><code>bn_layer</code></strong>=<em><code>True</code></em>, <strong><code>bn_1st</code></strong>=<em><code>True</code></em>, <strong><code>zero_bn</code></strong>=<em><code>False</code></em>, <strong><code>padding</code></strong>=<em><code>None</code></em>, <strong><code>bias</code></strong>=<em><code>False</code></em>, <strong><code>groups</code></strong>=<em><code>1</code></em>, <strong>**<code>kwargs</code></strong>) :: <a href="/model_constructor/layers.html#ConvLayer"><code>ConvLayer</code></a></p>
+<h2 id="ConvLayerTwist" class="doc_header"><code>class</code> <code>ConvLayerTwist</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/twist.py#L108" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>ConvLayerTwist</code>(<strong><code>ni</code></strong>, <strong><code>nf</code></strong>, <strong><code>ks</code></strong>=<em><code>3</code></em>, <strong><code>stride</code></strong>=<em><code>1</code></em>, <strong><code>act</code></strong>=<em><code>True</code></em>, <strong><code>act_fn</code></strong>=<em><code>ReLU(inplace=True)</code></em>, <strong><code>bn_layer</code></strong>=<em><code>True</code></em>, <strong><code>bn_1st</code></strong>=<em><code>True</code></em>, <strong><code>zero_bn</code></strong>=<em><code>False</code></em>, <strong><code>padding</code></strong>=<em><code>None</code></em>, <strong><code>bias</code></strong>=<em><code>False</code></em>, <strong><code>groups</code></strong>=<em><code>1</code></em>, <strong>**<code>kwargs</code></strong>) :: <a href="/model_constructor/layers.html#ConvLayer"><code>ConvLayer</code></a></p>
 </blockquote>
 <p>Basic conv layers block</p>
 
@@ -299,6 +292,13 @@ <h2 id="ConvLayerTwist" class="doc_header"><code>class</code> <code>ConvLayerTwi
 </div>
 </div>
 
+</div>
+    {% endraw %}
+
+    {% raw %}
+    
+<div class="cell border-box-sizing code_cell rendered">
+
 </div>
     {% endraw %}
 
@@ -828,13 +828,6 @@ <h1 id="NewResBlockTwist">NewResBlockTwist<a class="anchor-link" href="#NewResBl
     
 <div class="cell border-box-sizing code_cell rendered">
 
-</div>
-    {% endraw %}
-
-    {% raw %}
-    
-<div class="cell border-box-sizing code_cell rendered">
-
 <div class="output_wrapper">
 <div class="output">
 
@@ -842,7 +835,7 @@ <h1 id="NewResBlockTwist">NewResBlockTwist<a class="anchor-link" href="#NewResBl
 
 
 <div class="output_markdown rendered_html output_subarea ">
-<h2 id="NewResBlockTwist" class="doc_header"><code>class</code> <code>NewResBlockTwist</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/twist.py#L108" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>NewResBlockTwist</code>(<strong><code>expansion</code></strong>, <strong><code>ni</code></strong>, <strong><code>nh</code></strong>, <strong><code>stride</code></strong>=<em><code>1</code></em>, <strong><code>conv_layer</code></strong>=<em><code>ConvLayer</code></em>, <strong><code>act_fn</code></strong>=<em><code>ReLU(inplace=True)</code></em>, <strong><code>bn_1st</code></strong>=<em><code>True</code></em>, <strong><code>pool</code></strong>=<em><code>AvgPool2d(kernel_size=2, stride=2, padding=0)</code></em>, <strong><code>sa</code></strong>=<em><code>False</code></em>, <strong><code>sym</code></strong>=<em><code>False</code></em>, <strong><code>zero_bn</code></strong>=<em><code>True</code></em>, <strong>**<code>kvargs</code></strong>) :: <code>Module</code></p>
+<h2 id="NewResBlockTwist" class="doc_header"><code>class</code> <code>NewResBlockTwist</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/twist.py#L112" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>NewResBlockTwist</code>(<strong><code>expansion</code></strong>, <strong><code>ni</code></strong>, <strong><code>nh</code></strong>, <strong><code>stride</code></strong>=<em><code>1</code></em>, <strong><code>conv_layer</code></strong>=<em><code>ConvLayer</code></em>, <strong><code>act_fn</code></strong>=<em><code>ReLU(inplace=True)</code></em>, <strong><code>bn_1st</code></strong>=<em><code>True</code></em>, <strong><code>pool</code></strong>=<em><code>AvgPool2d(kernel_size=2, stride=2, padding=0)</code></em>, <strong><code>sa</code></strong>=<em><code>False</code></em>, <strong><code>sym</code></strong>=<em><code>False</code></em>, <strong><code>zero_bn</code></strong>=<em><code>True</code></em>, <strong>**<code>kvargs</code></strong>) :: <code>Module</code></p>
 </blockquote>
 <p>Base class for all neural network modules.</p>
 <p>Your models should also subclass this class.</p>
@@ -873,6 +866,13 @@ <h2 id="NewResBlockTwist" class="doc_header"><code>class</code> <code>NewResBloc
 </div>
 </div>
 
+</div>
+    {% endraw %}
+
+    {% raw %}
+    
+<div class="cell border-box-sizing code_cell rendered">
+
 </div>
     {% endraw %}
 
@@ -1060,13 +1060,6 @@ <h1 id="ResBlockTwist">ResBlockTwist<a class="anchor-link" href="#ResBlockTwist"
     
 <div class="cell border-box-sizing code_cell rendered">
 
-</div>
-    {% endraw %}
-
-    {% raw %}
-    
-<div class="cell border-box-sizing code_cell rendered">
-
 <div class="output_wrapper">
 <div class="output">
 
@@ -1074,7 +1067,7 @@ <h1 id="ResBlockTwist">ResBlockTwist<a class="anchor-link" href="#ResBlockTwist"
 
 
 <div class="output_markdown rendered_html output_subarea ">
-<h2 id="ResBlockTwist" class="doc_header"><code>class</code> <code>ResBlockTwist</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/twist.py#L134" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>ResBlockTwist</code>(<strong><code>expansion</code></strong>, <strong><code>ni</code></strong>, <strong><code>nh</code></strong>, <strong><code>stride</code></strong>=<em><code>1</code></em>, <strong><code>conv_layer</code></strong>=<em><code>ConvLayer</code></em>, <strong><code>act_fn</code></strong>=<em><code>ReLU(inplace=True)</code></em>, <strong><code>zero_bn</code></strong>=<em><code>True</code></em>, <strong><code>bn_1st</code></strong>=<em><code>True</code></em>, <strong><code>pool</code></strong>=<em><code>AvgPool2d(kernel_size=2, stride=2, padding=0)</code></em>, <strong><code>sa</code></strong>=<em><code>False</code></em>, <strong><code>sym</code></strong>=<em><code>False</code></em>, <strong>**<code>kvargs</code></strong>) :: <code>Module</code></p>
+<h2 id="ResBlockTwist" class="doc_header"><code>class</code> <code>ResBlockTwist</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/twist.py#L137" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>ResBlockTwist</code>(<strong><code>expansion</code></strong>, <strong><code>ni</code></strong>, <strong><code>nh</code></strong>, <strong><code>stride</code></strong>=<em><code>1</code></em>, <strong><code>conv_layer</code></strong>=<em><code>ConvLayer</code></em>, <strong><code>act_fn</code></strong>=<em><code>ReLU(inplace=True)</code></em>, <strong><code>zero_bn</code></strong>=<em><code>True</code></em>, <strong><code>bn_1st</code></strong>=<em><code>True</code></em>, <strong><code>pool</code></strong>=<em><code>AvgPool2d(kernel_size=2, stride=2, padding=0)</code></em>, <strong><code>sa</code></strong>=<em><code>False</code></em>, <strong><code>sym</code></strong>=<em><code>False</code></em>, <strong>**<code>kvargs</code></strong>) :: <code>Module</code></p>
 </blockquote>
 <p>Base class for all neural network modules.</p>
 <p>Your models should also subclass this class.</p>
@@ -1105,6 +1098,13 @@ <h2 id="ResBlockTwist" class="doc_header"><code>class</code> <code>ResBlockTwist
 </div>
 </div>
 
+</div>
+    {% endraw %}
+
+    {% raw %}
+    
+<div class="cell border-box-sizing code_cell rendered">
+
 </div>
     {% endraw %}
 
diff --git a/docs/YaResNet.html b/docs/YaResNet.html
index 0e31dde..27e5445 100644
--- a/docs/YaResNet.html
+++ b/docs/YaResNet.html
@@ -33,13 +33,6 @@
     
 <div class="cell border-box-sizing code_cell rendered">
 
-</div>
-    {% endraw %}
-
-    {% raw %}
-    
-<div class="cell border-box-sizing code_cell rendered">
-
 </div>
     {% endraw %}
 
@@ -53,13 +46,6 @@ <h1 id="YaResBlock">YaResBlock<a class="anchor-link" href="#YaResBlock"> </a></h
     
 <div class="cell border-box-sizing code_cell rendered">
 
-</div>
-    {% endraw %}
-
-    {% raw %}
-    
-<div class="cell border-box-sizing code_cell rendered">
-
 <div class="output_wrapper">
 <div class="output">
 
@@ -67,7 +53,7 @@ <h1 id="YaResBlock">YaResBlock<a class="anchor-link" href="#YaResBlock"> </a></h
 
 
 <div class="output_markdown rendered_html output_subarea ">
-<h2 id="YaResBlock" class="doc_header"><code>class</code> <code>YaResBlock</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/yaresnet.py#L17" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>YaResBlock</code>(<strong><code>expansion</code></strong>, <strong><code>ni</code></strong>, <strong><code>nh</code></strong>, <strong><code>stride</code></strong>=<em><code>1</code></em>, <strong><code>conv_layer</code></strong>=<em><code>ConvLayer</code></em>, <strong><code>act_fn</code></strong>=<em><code>ReLU(inplace=True)</code></em>, <strong><code>zero_bn</code></strong>=<em><code>True</code></em>, <strong><code>bn_1st</code></strong>=<em><code>True</code></em>, <strong><code>pool</code></strong>=<em><code>AvgPool2d(kernel_size=2, stride=2, padding=0)</code></em>, <strong><code>sa</code></strong>=<em><code>False</code></em>, <strong><code>sym</code></strong>=<em><code>False</code></em>, <strong><code>se</code></strong>=<em><code>False</code></em>, <strong><code>groups</code></strong>=<em><code>1</code></em>, <strong><code>dw</code></strong>=<em><code>False</code></em>) :: <code>Module</code></p>
+<h2 id="YaResBlock" class="doc_header"><code>class</code> <code>YaResBlock</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/yaresnet.py#L16" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>YaResBlock</code>(<strong><code>expansion</code></strong>, <strong><code>ni</code></strong>, <strong><code>nh</code></strong>, <strong><code>stride</code></strong>=<em><code>1</code></em>, <strong><code>conv_layer</code></strong>=<em><code>ConvLayer</code></em>, <strong><code>act_fn</code></strong>=<em><code>ReLU(inplace=True)</code></em>, <strong><code>zero_bn</code></strong>=<em><code>True</code></em>, <strong><code>bn_1st</code></strong>=<em><code>True</code></em>, <strong><code>pool</code></strong>=<em><code>AvgPool2d(kernel_size=2, stride=2, padding=0)</code></em>, <strong><code>sa</code></strong>=<em><code>False</code></em>, <strong><code>sym</code></strong>=<em><code>False</code></em>, <strong><code>se</code></strong>=<em><code>False</code></em>, <strong><code>groups</code></strong>=<em><code>1</code></em>, <strong><code>dw</code></strong>=<em><code>False</code></em>) :: <code>Module</code></p>
 </blockquote>
 <p>YaResBlock. Reduce by pool instead of stride 2</p>
 
@@ -78,6 +64,13 @@ <h2 id="YaResBlock" class="doc_header"><code>class</code> <code>YaResBlock</code
 </div>
 </div>
 
+</div>
+    {% endraw %}
+
+    {% raw %}
+    
+<div class="cell border-box-sizing code_cell rendered">
+
 </div>
     {% endraw %}
 
diff --git a/docs/activations.html b/docs/activations.html
index 5267562..74137ea 100644
--- a/docs/activations.html
+++ b/docs/activations.html
@@ -62,524 +62,10 @@
 
 <div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
 <div class="text_cell_render border-box-sizing rendered_html">
-<h1 id="Swish">Swish<a class="anchor-link" href="#Swish"> </a></h1>
-</div>
-</div>
-</div>
-    {% raw %}
-    
-<div class="cell border-box-sizing code_cell rendered">
-
-</div>
-    {% endraw %}
-
-    {% raw %}
-    
-<div class="cell border-box-sizing code_cell rendered">
-
-<div class="output_wrapper">
-<div class="output">
-
-<div class="output_area">
-
-
-<div class="output_markdown rendered_html output_subarea ">
-<h4 id="swish" class="doc_header"><code>swish</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/activations.py#L16" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>swish</code>(<strong><code>x</code></strong>, <strong><code>inplace</code></strong>:<code>bool</code>=<em><code>False</code></em>)</p>
-</blockquote>
-<p>Swish - Described in: <a href="https://arxiv.org/abs/1710.05941">https://arxiv.org/abs/1710.05941</a></p>
-
-</div>
-
-</div>
-
-</div>
-</div>
-
-</div>
-    {% endraw %}
-
-    {% raw %}
-    
-<div class="cell border-box-sizing code_cell rendered">
-
-<div class="output_wrapper">
-<div class="output">
-
-<div class="output_area">
-
-
-<div class="output_markdown rendered_html output_subarea ">
-<h2 id="Swish" class="doc_header"><code>class</code> <code>Swish</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/activations.py#L20" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>Swish</code>(<strong><code>inplace</code></strong>:<code>bool</code>=<em><code>False</code></em>) :: <code>Module</code></p>
-</blockquote>
-<p>Swish - Described in: <a href="https://arxiv.org/abs/1710.05941">https://arxiv.org/abs/1710.05941</a></p>
-
-</div>
-
-</div>
-
-</div>
-</div>
-
-</div>
-    {% endraw %}
-
-<div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
-<div class="text_cell_render border-box-sizing rendered_html">
-<h1 id="SwishJit">SwishJit<a class="anchor-link" href="#SwishJit"> </a></h1>
-</div>
-</div>
-</div>
-    {% raw %}
-    
-<div class="cell border-box-sizing code_cell rendered">
-
-</div>
-    {% endraw %}
-
-    {% raw %}
-    
-<div class="cell border-box-sizing code_cell rendered">
-
-<div class="output_wrapper">
-<div class="output">
-
-<div class="output_area">
-
-
-<div class="output_markdown rendered_html output_subarea ">
-<h4 id="ScriptFunction object at 0x7f9f833e1e90>" class="doc_header"><code>ScriptFunction object at 0x7f9f833e1e90></code><a href="" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>ScriptFunction object at 0x7f9f833e1e90></code>()</p>
-</blockquote>
-<p>Jit version of Swish.
-Swish- Described in: <a href="https://arxiv.org/abs/1710.05941">https://arxiv.org/abs/1710.05941</a></p>
-
-</div>
-
-</div>
-
-</div>
-</div>
-
-</div>
-    {% endraw %}
-
-    {% raw %}
-    
-<div class="cell border-box-sizing code_cell rendered">
-
-<div class="output_wrapper">
-<div class="output">
-
-<div class="output_area">
-
-
-<div class="output_markdown rendered_html output_subarea ">
-<h2 id="SwishJit" class="doc_header"><code>class</code> <code>SwishJit</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/activations.py#L37" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>SwishJit</code>(<strong><code>inplace</code></strong>:<code>bool</code>=<em><code>False</code></em>) :: <code>Module</code></p>
-</blockquote>
-<p>Jit version of Swish.
-Swish - Described in: <a href="https://arxiv.org/abs/1710.05941">https://arxiv.org/abs/1710.05941</a></p>
-
-</div>
-
-</div>
-
-</div>
-</div>
-
-</div>
-    {% endraw %}
-
-<div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
-<div class="text_cell_render border-box-sizing rendered_html">
-<h1 id="SwishJitMe---memory-efficient.">SwishJitMe - memory-efficient.<a class="anchor-link" href="#SwishJitMe---memory-efficient."> </a></h1>
-</div>
-</div>
-</div>
-    {% raw %}
-    
-<div class="cell border-box-sizing code_cell rendered">
-
-</div>
-    {% endraw %}
-
-    {% raw %}
-    
-<div class="cell border-box-sizing code_cell rendered">
-
-<div class="output_wrapper">
-<div class="output">
-
-<div class="output_area">
-
-
-<div class="output_markdown rendered_html output_subarea ">
-<h4 id="ScriptFunction object at 0x7f9f2684f2f0>" class="doc_header"><code>ScriptFunction object at 0x7f9f2684f2f0></code><a href="" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>ScriptFunction object at 0x7f9f2684f2f0></code>()</p>
-</blockquote>
-
-</div>
-
-</div>
-
-</div>
-</div>
-
-</div>
-    {% endraw %}
-
-    {% raw %}
-    
-<div class="cell border-box-sizing code_cell rendered">
-
-<div class="output_wrapper">
-<div class="output">
-
-<div class="output_area">
-
-
-<div class="output_markdown rendered_html output_subarea ">
-<h2 id="SwishJitAutoFn" class="doc_header"><code>class</code> <code>SwishJitAutoFn</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/activations.py#L54" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>SwishJitAutoFn</code>() :: <code>Function</code></p>
-</blockquote>
-<p>torch.jit.script optimised Swish w/ memory-efficient checkpoint
-Inspired by conversation btw Jeremy Howard &amp; Adam Pazske
-<a href="https://twitter.com/jeremyphoward/status/1188251041835315200">https://twitter.com/jeremyphoward/status/1188251041835315200</a></p>
-
-</div>
-
-</div>
-
-</div>
-</div>
-
-</div>
-    {% endraw %}
-
-    {% raw %}
-    
-<div class="cell border-box-sizing code_cell rendered">
-
-<div class="output_wrapper">
-<div class="output">
-
-<div class="output_area">
-
-
-<div class="output_markdown rendered_html output_subarea ">
-<h4 id="swish_me" class="doc_header"><code>swish_me</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/activations.py#L71" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>swish_me</code>(<strong><code>x</code></strong>, <strong><code>inplace</code></strong>=<em><code>False</code></em>)</p>
-</blockquote>
-
-</div>
-
-</div>
-
-</div>
-</div>
-
-</div>
-    {% endraw %}
-
-    {% raw %}
-    
-<div class="cell border-box-sizing code_cell rendered">
-
-<div class="output_wrapper">
-<div class="output">
-
-<div class="output_area">
-
-
-<div class="output_markdown rendered_html output_subarea ">
-<h2 id="SwishMe" class="doc_header"><code>class</code> <code>SwishMe</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/activations.py#L75" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>SwishMe</code>(<strong><code>inplace</code></strong>:<code>bool</code>=<em><code>False</code></em>) :: <code>Module</code></p>
-</blockquote>
-<p>Base class for all neural network modules.</p>
-<p>Your models should also subclass this class.</p>
-<p>Modules can also contain other Modules, allowing to nest them in
-a tree structure. You can assign the submodules as regular attributes::</p>
-
-<pre><code>import torch.nn as nn
-import torch.nn.functional as F
-
-class Model(nn.Module):
-    def __init__(self):
-        super(Model, self).__init__()
-        self.conv1 = nn.Conv2d(1, 20, 5)
-        self.conv2 = nn.Conv2d(20, 20, 5)
-
-    def forward(self, x):
-        x = F.relu(self.conv1(x))
-        return F.relu(self.conv2(x))
-
-</code></pre>
-<p>Submodules assigned in this way will be registered, and will have their
-parameters converted too when you call :meth:<code>to</code>, etc.</p>
-
-</div>
-
-</div>
-
-</div>
-</div>
-
-</div>
-    {% endraw %}
-
-<div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
-<div class="text_cell_render border-box-sizing rendered_html">
-<h1 id="Mish">Mish<a class="anchor-link" href="#Mish"> </a></h1>
-</div>
-</div>
-</div>
-    {% raw %}
-    
-<div class="cell border-box-sizing code_cell rendered">
-
-</div>
-    {% endraw %}
-
-    {% raw %}
-    
-<div class="cell border-box-sizing code_cell rendered">
-
-<div class="output_wrapper">
-<div class="output">
-
-<div class="output_area">
-
-
-<div class="output_markdown rendered_html output_subarea ">
-<h4 id="mish" class="doc_header"><code>mish</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/activations.py#L83" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>mish</code>(<strong><code>x</code></strong>, <strong><code>inplace</code></strong>:<code>bool</code>=<em><code>False</code></em>)</p>
-</blockquote>
-<p>Mish: A Self Regularized Non-Monotonic Neural Activation Function - <a href="https://arxiv.org/abs/1908.08681">https://arxiv.org/abs/1908.08681</a>
-NOTE: I don't have a working inplace variant</p>
-
-</div>
-
-</div>
-
-</div>
-</div>
-
-</div>
-    {% endraw %}
-
-    {% raw %}
-    
-<div class="cell border-box-sizing code_cell rendered">
-
-<div class="output_wrapper">
-<div class="output">
-
-<div class="output_area">
-
-
-<div class="output_markdown rendered_html output_subarea ">
-<h2 id="Mish" class="doc_header"><code>class</code> <code>Mish</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/activations.py#L90" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>Mish</code>(<strong><code>inplace</code></strong>:<code>bool</code>=<em><code>False</code></em>) :: <code>Module</code></p>
-</blockquote>
-<p>Mish: A Self Regularized Non-Monotonic Neural Activation Function - <a href="https://arxiv.org/abs/1908.08681">https://arxiv.org/abs/1908.08681</a></p>
-
-</div>
-
-</div>
-
-</div>
-</div>
-
-</div>
-    {% endraw %}
-
-<div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
-<div class="text_cell_render border-box-sizing rendered_html">
-<h1 id="MishJit">MishJit<a class="anchor-link" href="#MishJit"> </a></h1>
-</div>
-</div>
-</div>
-    {% raw %}
-    
-<div class="cell border-box-sizing code_cell rendered">
-
-</div>
-    {% endraw %}
-
-    {% raw %}
-    
-<div class="cell border-box-sizing code_cell rendered">
-
-<div class="output_wrapper">
-<div class="output">
-
-<div class="output_area">
-
-
-<div class="output_markdown rendered_html output_subarea ">
-<h4 id="ScriptFunction object at 0x7f9f26062290>" class="doc_header"><code>ScriptFunction object at 0x7f9f26062290></code><a href="" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>ScriptFunction object at 0x7f9f26062290></code>()</p>
-</blockquote>
-<p>Jit version of Mish.
-Mish: A Self Regularized Non-Monotonic Neural Activation Function - <a href="https://arxiv.org/abs/1908.08681">https://arxiv.org/abs/1908.08681</a></p>
-
-</div>
-
-</div>
-
-</div>
-</div>
-
-</div>
-    {% endraw %}
-
-    {% raw %}
-    
-<div class="cell border-box-sizing code_cell rendered">
-
-<div class="output_wrapper">
-<div class="output">
-
-<div class="output_area">
-
-
-<div class="output_markdown rendered_html output_subarea ">
-<h2 id="MishJit" class="doc_header"><code>class</code> <code>MishJit</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/activations.py#L107" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>MishJit</code>(<strong><code>inplace</code></strong>:<code>bool</code>=<em><code>False</code></em>) :: <code>Module</code></p>
-</blockquote>
-<p>Base class for all neural network modules.</p>
-<p>Your models should also subclass this class.</p>
-<p>Modules can also contain other Modules, allowing to nest them in
-a tree structure. You can assign the submodules as regular attributes::</p>
-
-<pre><code>import torch.nn as nn
-import torch.nn.functional as F
-
-class Model(nn.Module):
-    def __init__(self):
-        super(Model, self).__init__()
-        self.conv1 = nn.Conv2d(1, 20, 5)
-        self.conv2 = nn.Conv2d(20, 20, 5)
-
-    def forward(self, x):
-        x = F.relu(self.conv1(x))
-        return F.relu(self.conv2(x))
-
-</code></pre>
-<p>Submodules assigned in this way will be registered, and will have their
-parameters converted too when you call :meth:<code>to</code>, etc.</p>
-
-</div>
-
-</div>
-
-</div>
-</div>
-
-</div>
-    {% endraw %}
-
-<div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
-<div class="text_cell_render border-box-sizing rendered_html">
-<h1 id="MishJitMe---memory-efficient.">MishJitMe - memory-efficient.<a class="anchor-link" href="#MishJitMe---memory-efficient."> </a></h1>
-</div>
-</div>
-</div>
-    {% raw %}
-    
-<div class="cell border-box-sizing code_cell rendered">
-
-</div>
-    {% endraw %}
-
-    {% raw %}
-    
-<div class="cell border-box-sizing code_cell rendered">
-
-<div class="output_wrapper">
-<div class="output">
-
-<div class="output_area">
-
-
-<div class="output_markdown rendered_html output_subarea ">
-<h4 id="ScriptFunction object at 0x7f9f260623b0>" class="doc_header"><code>ScriptFunction object at 0x7f9f260623b0></code><a href="" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>ScriptFunction object at 0x7f9f260623b0></code>()</p>
-</blockquote>
-
-</div>
-
-</div>
-
-</div>
-</div>
-
-</div>
-    {% endraw %}
-
-    {% raw %}
-    
-<div class="cell border-box-sizing code_cell rendered">
-
-<div class="output_wrapper">
-<div class="output">
-
-<div class="output_area">
-
-
-<div class="output_markdown rendered_html output_subarea ">
-<h4 id="ScriptFunction object at 0x7f9f26062470>" class="doc_header"><code>ScriptFunction object at 0x7f9f26062470></code><a href="" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>ScriptFunction object at 0x7f9f26062470></code>()</p>
-</blockquote>
-
-</div>
-
-</div>
-
-</div>
-</div>
-
-</div>
-    {% endraw %}
-
-    {% raw %}
-    
-<div class="cell border-box-sizing code_cell rendered">
-
-<div class="output_wrapper">
-<div class="output">
-
-<div class="output_area">
-
-
-<div class="output_markdown rendered_html output_subarea ">
-<h2 id="MishJitAutoFn" class="doc_header"><code>class</code> <code>MishJitAutoFn</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/activations.py#L131" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>MishJitAutoFn</code>() :: <code>Function</code></p>
-</blockquote>
-<p>Mish: A Self Regularized Non-Monotonic Neural Activation Function - <a href="https://arxiv.org/abs/1908.08681">https://arxiv.org/abs/1908.08681</a>
-A memory efficient, jit scripted variant of Mish</p>
-
-</div>
-
-</div>
-
-</div>
-</div>
-
-</div>
-    {% endraw %}
-
-    {% raw %}
-    
-<div class="cell border-box-sizing code_cell rendered">
-
-<div class="output_wrapper">
-<div class="output">
-
-<div class="output_area">
-
-
-<div class="output_markdown rendered_html output_subarea ">
-<h4 id="mish_me" class="doc_header"><code>mish_me</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/activations.py#L145" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>mish_me</code>(<strong><code>x</code></strong>, <strong><code>inplace</code></strong>=<em><code>False</code></em>)</p>
-</blockquote>
-
-</div>
-
-</div>
-
+<h1 id="Mish">Mish<a class="anchor-link" href="#Mish"> </a></h1>
 </div>
 </div>
-
 </div>
-    {% endraw %}
-
     {% raw %}
     
 <div class="cell border-box-sizing code_cell rendered">
@@ -591,30 +77,17 @@ <h4 id="mish_me" class="doc_header"><code>mish_me</code><a href="https://github.
 
 
 <div class="output_markdown rendered_html output_subarea ">
-<h2 id="MishMe" class="doc_header"><code>class</code> <code>MishMe</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/activations.py#L149" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>MishMe</code>(<strong><code>inplace</code></strong>:<code>bool</code>=<em><code>False</code></em>) :: <code>Module</code></p>
+<h4 id="mish" class="doc_header"><code>mish</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/activations.py#L14" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>mish</code>(<strong><code>x</code></strong>, <strong><code>inplace</code></strong>:<code>bool</code>=<em><code>False</code></em>)</p>
 </blockquote>
 <p>Mish: A Self Regularized Non-Monotonic Neural Activation Function - <a href="https://arxiv.org/abs/1908.08681">https://arxiv.org/abs/1908.08681</a>
-A memory efficient, jit scripted variant of Mish</p>
-
-</div>
-
-</div>
+NOTE: I don't have a working inplace variant</p>
 
-</div>
 </div>
 
 </div>
-    {% endraw %}
 
-<div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
-<div class="text_cell_render border-box-sizing rendered_html">
-<h1 id="HardSwish">HardSwish<a class="anchor-link" href="#HardSwish"> </a></h1>
 </div>
 </div>
-</div>
-    {% raw %}
-    
-<div class="cell border-box-sizing code_cell rendered">
 
 </div>
     {% endraw %}
@@ -630,9 +103,9 @@ <h1 id="HardSwish">HardSwish<a class="anchor-link" href="#HardSwish"> </a></h1>
 
 
 <div class="output_markdown rendered_html output_subarea ">
-<h4 id="hard_swish" class="doc_header"><code>hard_swish</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/activations.py#L160" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>hard_swish</code>(<strong><code>x</code></strong>, <strong><code>inplace</code></strong>:<code>bool</code>=<em><code>False</code></em>)</p>
+<h2 id="Mish" class="doc_header"><code>class</code> <code>Mish</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/activations.py#L21" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>Mish</code>(<strong><code>inplace</code></strong>:<code>bool</code>=<em><code>False</code></em>) :: <code>Module</code></p>
 </blockquote>
-<p>Hard swish activation function</p>
+<p>Mish: A Self Regularized Non-Monotonic Neural Activation Function - <a href="https://arxiv.org/abs/1908.08681">https://arxiv.org/abs/1908.08681</a></p>
 
 </div>
 
@@ -648,40 +121,15 @@ <h4 id="hard_swish" class="doc_header"><code>hard_swish</code><a href="https://g
     
 <div class="cell border-box-sizing code_cell rendered">
 
-<div class="output_wrapper">
-<div class="output">
-
-<div class="output_area">
-
-
-<div class="output_markdown rendered_html output_subarea ">
-<h2 id="HardSwish" class="doc_header"><code>class</code> <code>HardSwish</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/activations.py#L166" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>HardSwish</code>(<strong><code>inplace</code></strong>:<code>bool</code>=<em><code>False</code></em>) :: <code>Module</code></p>
-</blockquote>
-<p>Hard swish activation function</p>
-
-</div>
-
-</div>
-
-</div>
-</div>
-
 </div>
     {% endraw %}
 
 <div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
 <div class="text_cell_render border-box-sizing rendered_html">
-<h1 id="HardSwishJit">HardSwishJit<a class="anchor-link" href="#HardSwishJit"> </a></h1>
-</div>
+<h1 id="MishJit">MishJit<a class="anchor-link" href="#MishJit"> </a></h1>
 </div>
 </div>
-    {% raw %}
-    
-<div class="cell border-box-sizing code_cell rendered">
-
 </div>
-    {% endraw %}
-
     {% raw %}
     
 <div class="cell border-box-sizing code_cell rendered">
@@ -693,8 +141,10 @@ <h1 id="HardSwishJit">HardSwishJit<a class="anchor-link" href="#HardSwishJit"> <
 
 
 <div class="output_markdown rendered_html output_subarea ">
-<h4 id="ScriptFunction object at 0x7f9f260626b0>" class="doc_header"><code>ScriptFunction object at 0x7f9f260626b0></code><a href="" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>ScriptFunction object at 0x7f9f260626b0></code>()</p>
+<h4 id="ScriptFunction object at 0x7f60a962c7c0>" class="doc_header"><code>ScriptFunction object at 0x7f60a962c7c0></code><a href="" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>ScriptFunction object at 0x7f60a962c7c0></code>()</p>
 </blockquote>
+<p>Jit version of Mish.
+Mish: A Self Regularized Non-Monotonic Neural Activation Function - <a href="https://arxiv.org/abs/1908.08681">https://arxiv.org/abs/1908.08681</a></p>
 
 </div>
 
@@ -717,7 +167,7 @@ <h4 id="ScriptFunction object at 0x7f9f260626b0>" class="doc_header"><code>Scrip
 
 
 <div class="output_markdown rendered_html output_subarea ">
-<h2 id="HardSwishJit" class="doc_header"><code>class</code> <code>HardSwishJit</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/activations.py#L182" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>HardSwishJit</code>(<strong><code>inplace</code></strong>:<code>bool</code>=<em><code>False</code></em>) :: <code>Module</code></p>
+<h2 id="MishJit" class="doc_header"><code>class</code> <code>MishJit</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/activations.py#L39" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>MishJit</code>(<strong><code>inplace</code></strong>:<code>bool</code>=<em><code>False</code></em>) :: <code>Module</code></p>
 </blockquote>
 <p>Base class for all neural network modules.</p>
 <p>Your models should also subclass this class.</p>
@@ -751,12 +201,6 @@ <h2 id="HardSwishJit" class="doc_header"><code>class</code> <code>HardSwishJit</
 </div>
     {% endraw %}
 
-<div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
-<div class="text_cell_render border-box-sizing rendered_html">
-<h1 id="HardSwishJitMe">HardSwishJitMe<a class="anchor-link" href="#HardSwishJitMe"> </a></h1>
-</div>
-</div>
-</div>
     {% raw %}
     
 <div class="cell border-box-sizing code_cell rendered">
@@ -764,30 +208,12 @@ <h1 id="HardSwishJitMe">HardSwishJitMe<a class="anchor-link" href="#HardSwishJit
 </div>
     {% endraw %}
 
-    {% raw %}
-    
-<div class="cell border-box-sizing code_cell rendered">
-
-<div class="output_wrapper">
-<div class="output">
-
-<div class="output_area">
-
-
-<div class="output_markdown rendered_html output_subarea ">
-<h4 id="ScriptFunction object at 0x7f9f26062770>" class="doc_header"><code>ScriptFunction object at 0x7f9f26062770></code><a href="" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>ScriptFunction object at 0x7f9f26062770></code>()</p>
-</blockquote>
-
-</div>
-
-</div>
-
+<div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
+<div class="text_cell_render border-box-sizing rendered_html">
+<h1 id="MishJitMe---memory-efficient.">MishJitMe - memory-efficient.<a class="anchor-link" href="#MishJitMe---memory-efficient."> </a></h1>
 </div>
 </div>
-
 </div>
-    {% endraw %}
-
     {% raw %}
     
 <div class="cell border-box-sizing code_cell rendered">
@@ -799,7 +225,7 @@ <h4 id="ScriptFunction object at 0x7f9f26062770>" class="doc_header"><code>Scrip
 
 
 <div class="output_markdown rendered_html output_subarea ">
-<h4 id="ScriptFunction object at 0x7f9f2684f6b0>" class="doc_header"><code>ScriptFunction object at 0x7f9f2684f6b0></code><a href="" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>ScriptFunction object at 0x7f9f2684f6b0></code>()</p>
+<h4 id="ScriptFunction object at 0x7f60a962c950>" class="doc_header"><code>ScriptFunction object at 0x7f60a962c950></code><a href="" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>ScriptFunction object at 0x7f60a962c950></code>()</p>
 </blockquote>
 
 </div>
@@ -823,9 +249,8 @@ <h4 id="ScriptFunction object at 0x7f9f2684f6b0>" class="doc_header"><code>Scrip
 
 
 <div class="output_markdown rendered_html output_subarea ">
-<h2 id="HardSwishJitAutoFn" class="doc_header"><code>class</code> <code>HardSwishJitAutoFn</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/activations.py#L203" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>HardSwishJitAutoFn</code>() :: <code>Function</code></p>
+<h4 id="ScriptFunction object at 0x7f60a962c360>" class="doc_header"><code>ScriptFunction object at 0x7f60a962c360></code><a href="" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>ScriptFunction object at 0x7f60a962c360></code>()</p>
 </blockquote>
-<p>A memory efficient, jit-scripted HardSwish activation</p>
 
 </div>
 
@@ -848,9 +273,10 @@ <h2 id="HardSwishJitAutoFn" class="doc_header"><code>class</code> <code>HardSwis
 
 
 <div class="output_markdown rendered_html output_subarea ">
-<h4 id="hard_swish_me" class="doc_header"><code>hard_swish_me</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/activations.py#L216" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>hard_swish_me</code>(<strong><code>x</code></strong>, <strong><code>inplace</code></strong>=<em><code>False</code></em>)</p>
+<h2 id="MishJitAutoFn" class="doc_header"><code>class</code> <code>MishJitAutoFn</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/activations.py#L62" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>MishJitAutoFn</code>() :: <code>Function</code></p>
 </blockquote>
-<p>A memory efficient, jit-scripted HardSwish activation</p>
+<p>Mish: A Self Regularized Non-Monotonic Neural Activation Function - <a href="https://arxiv.org/abs/1908.08681">https://arxiv.org/abs/1908.08681</a>
+A memory efficient, jit scripted variant of Mish</p>
 
 </div>
 
@@ -873,29 +299,15 @@ <h4 id="hard_swish_me" class="doc_header"><code>hard_swish_me</code><a href="htt
 
 
 <div class="output_markdown rendered_html output_subarea ">
-<h2 id="HardSwishMe" class="doc_header"><code>class</code> <code>HardSwishMe</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/activations.py#L221" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>HardSwishMe</code>(<strong><code>inplace</code></strong>:<code>bool</code>=<em><code>False</code></em>) :: <code>Module</code></p>
+<h4 id="mish_me" class="doc_header"><code>mish_me</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/activations.py#L76" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>mish_me</code>(<strong><code>x</code></strong>, <strong><code>inplace</code></strong>=<em><code>False</code></em>)</p>
 </blockquote>
-<p>A memory efficient, jit-scripted HardSwish activation</p>
-
-</div>
-
-</div>
 
-</div>
 </div>
 
 </div>
-    {% endraw %}
 
-<div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
-<div class="text_cell_render border-box-sizing rendered_html">
-<h1 id="HardMish">HardMish<a class="anchor-link" href="#HardMish"> </a></h1>
-</div>
 </div>
 </div>
-    {% raw %}
-    
-<div class="cell border-box-sizing code_cell rendered">
 
 </div>
     {% endraw %}
@@ -911,11 +323,10 @@ <h1 id="HardMish">HardMish<a class="anchor-link" href="#HardMish"> </a></h1>
 
 
 <div class="output_markdown rendered_html output_subarea ">
-<h4 id="hard_mish" class="doc_header"><code>hard_mish</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/activations.py#L230" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>hard_mish</code>(<strong><code>x</code></strong>, <strong><code>inplace</code></strong>:<code>bool</code>=<em><code>False</code></em>)</p>
+<h2 id="MishMe" class="doc_header"><code>class</code> <code>MishMe</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/activations.py#L80" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>MishMe</code>(<strong><code>inplace</code></strong>:<code>bool</code>=<em><code>False</code></em>) :: <code>Module</code></p>
 </blockquote>
-<p>Hard Mish
-Experimental, based on notes by Mish author Diganta Misra at
-  <a href="https://github.com/digantamisra98/H-Mish/blob/0da20d4bc58e696b6803f2523c58d3c8a82782d0/README.md">https://github.com/digantamisra98/H-Mish/blob/0da20d4bc58e696b6803f2523c58d3c8a82782d0/README.md</a></p>
+<p>Mish: A Self Regularized Non-Monotonic Neural Activation Function - <a href="https://arxiv.org/abs/1908.08681">https://arxiv.org/abs/1908.08681</a>
+A memory efficient, jit scripted variant of Mish</p>
 
 </div>
 
@@ -931,25 +342,6 @@ <h4 id="hard_mish" class="doc_header"><code>hard_mish</code><a href="https://git
     
 <div class="cell border-box-sizing code_cell rendered">
 
-<div class="output_wrapper">
-<div class="output">
-
-<div class="output_area">
-
-
-<div class="output_markdown rendered_html output_subarea ">
-<h2 id="HardMish" class="doc_header"><code>class</code> <code>HardMish</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/activations.py#L241" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>HardMish</code>(<strong><code>inplace</code></strong>:<code>bool</code>=<em><code>False</code></em>) :: <code>Module</code></p>
-</blockquote>
-<p>Hard Mish, Experimental, based on notes by Mish author Diganta Misra at
-<a href="https://github.com/digantamisra98/H-Mish/blob/0da20d4bc58e696b6803f2523c58d3c8a82782d0/README.md">https://github.com/digantamisra98/H-Mish/blob/0da20d4bc58e696b6803f2523c58d3c8a82782d0/README.md</a></p>
-
-</div>
-
-</div>
-
-</div>
-</div>
-
 </div>
     {% endraw %}
 
@@ -963,13 +355,6 @@ <h1 id="HardMishJit">HardMishJit<a class="anchor-link" href="#HardMishJit"> </a>
     
 <div class="cell border-box-sizing code_cell rendered">
 
-</div>
-    {% endraw %}
-
-    {% raw %}
-    
-<div class="cell border-box-sizing code_cell rendered">
-
 <div class="output_wrapper">
 <div class="output">
 
@@ -977,7 +362,7 @@ <h1 id="HardMishJit">HardMishJit<a class="anchor-link" href="#HardMishJit"> </a>
 
 
 <div class="output_markdown rendered_html output_subarea ">
-<h4 id="ScriptFunction object at 0x7f9f26062230>" class="doc_header"><code>ScriptFunction object at 0x7f9f26062230></code><a href="" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>ScriptFunction object at 0x7f9f26062230></code>()</p>
+<h4 id="ScriptFunction object at 0x7f60a962c130>" class="doc_header"><code>ScriptFunction object at 0x7f60a962c130></code><a href="" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>ScriptFunction object at 0x7f60a962c130></code>()</p>
 </blockquote>
 <p>Hard Mish
 Experimental, based on notes by Mish author Diganta Misra at
@@ -1004,7 +389,7 @@ <h4 id="ScriptFunction object at 0x7f9f26062230>" class="doc_header"><code>Scrip
 
 
 <div class="output_markdown rendered_html output_subarea ">
-<h2 id="HardMishJit" class="doc_header"><code>class</code> <code>HardMishJit</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/activations.py#L261" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>HardMishJit</code>(<strong><code>inplace</code></strong>:<code>bool</code>=<em><code>False</code></em>) :: <code>Module</code></p>
+<h2 id="HardMishJit" class="doc_header"><code>class</code> <code>HardMishJit</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/activations.py#L99" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>HardMishJit</code>(<strong><code>inplace</code></strong>:<code>bool</code>=<em><code>False</code></em>) :: <code>Module</code></p>
 </blockquote>
 <p>Hard Mish
 Experimental, based on notes by Mish author Diganta Misra at
@@ -1020,12 +405,6 @@ <h2 id="HardMishJit" class="doc_header"><code>class</code> <code>HardMishJit</co
 </div>
     {% endraw %}
 
-<div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
-<div class="text_cell_render border-box-sizing rendered_html">
-<h1 id="HardMishJitMe---memory-efficient.">HardMishJitMe - memory efficient.<a class="anchor-link" href="#HardMishJitMe---memory-efficient."> </a></h1>
-</div>
-</div>
-</div>
     {% raw %}
     
 <div class="cell border-box-sizing code_cell rendered">
@@ -1033,6 +412,12 @@ <h1 id="HardMishJitMe---memory-efficient.">HardMishJitMe - memory efficient.<a c
 </div>
     {% endraw %}
 
+<div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
+<div class="text_cell_render border-box-sizing rendered_html">
+<h1 id="HardMishJitMe---memory-efficient.">HardMishJitMe - memory efficient.<a class="anchor-link" href="#HardMishJitMe---memory-efficient."> </a></h1>
+</div>
+</div>
+</div>
     {% raw %}
     
 <div class="cell border-box-sizing code_cell rendered">
@@ -1044,7 +429,7 @@ <h1 id="HardMishJitMe---memory-efficient.">HardMishJitMe - memory efficient.<a c
 
 
 <div class="output_markdown rendered_html output_subarea ">
-<h4 id="ScriptFunction object at 0x7f9f260628f0>" class="doc_header"><code>ScriptFunction object at 0x7f9f260628f0></code><a href="" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>ScriptFunction object at 0x7f9f260628f0></code>()</p>
+<h4 id="ScriptFunction object at 0x7f60a962c180>" class="doc_header"><code>ScriptFunction object at 0x7f60a962c180></code><a href="" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>ScriptFunction object at 0x7f60a962c180></code>()</p>
 </blockquote>
 
 </div>
@@ -1068,7 +453,7 @@ <h4 id="ScriptFunction object at 0x7f9f260628f0>" class="doc_header"><code>Scrip
 
 
 <div class="output_markdown rendered_html output_subarea ">
-<h4 id="ScriptFunction object at 0x7f9f26062a10>" class="doc_header"><code>ScriptFunction object at 0x7f9f26062a10></code><a href="" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>ScriptFunction object at 0x7f9f26062a10></code>()</p>
+<h4 id="ScriptFunction object at 0x7f605af1eb80>" class="doc_header"><code>ScriptFunction object at 0x7f605af1eb80></code><a href="" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>ScriptFunction object at 0x7f605af1eb80></code>()</p>
 </blockquote>
 
 </div>
@@ -1092,7 +477,7 @@ <h4 id="ScriptFunction object at 0x7f9f26062a10>" class="doc_header"><code>Scrip
 
 
 <div class="output_markdown rendered_html output_subarea ">
-<h2 id="HardMishJitAutoFn" class="doc_header"><code>class</code> <code>HardMishJitAutoFn</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/activations.py#L285" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>HardMishJitAutoFn</code>() :: <code>Function</code></p>
+<h2 id="HardMishJitAutoFn" class="doc_header"><code>class</code> <code>HardMishJitAutoFn</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/activations.py#L123" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>HardMishJitAutoFn</code>() :: <code>Function</code></p>
 </blockquote>
 <p>A memory efficient, jit scripted variant of Hard Mish
 Experimental, based on notes by Mish author Diganta Misra at
@@ -1119,7 +504,7 @@ <h2 id="HardMishJitAutoFn" class="doc_header"><code>class</code> <code>HardMishJ
 
 
 <div class="output_markdown rendered_html output_subarea ">
-<h4 id="hard_mish_me" class="doc_header"><code>hard_mish_me</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/activations.py#L301" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>hard_mish_me</code>(<strong><code>x</code></strong>, <strong><code>inplace</code></strong>:<code>bool</code>=<em><code>False</code></em>)</p>
+<h4 id="hard_mish_me" class="doc_header"><code>hard_mish_me</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/activations.py#L139" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>hard_mish_me</code>(<strong><code>x</code></strong>, <strong><code>inplace</code></strong>:<code>bool</code>=<em><code>False</code></em>)</p>
 </blockquote>
 
 </div>
@@ -1143,7 +528,7 @@ <h4 id="hard_mish_me" class="doc_header"><code>hard_mish_me</code><a href="https
 
 
 <div class="output_markdown rendered_html output_subarea ">
-<h2 id="HardMishMe" class="doc_header"><code>class</code> <code>HardMishMe</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/activations.py#L305" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>HardMishMe</code>(<strong><code>inplace</code></strong>:<code>bool</code>=<em><code>False</code></em>) :: <code>Module</code></p>
+<h2 id="HardMishMe" class="doc_header"><code>class</code> <code>HardMishMe</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/activations.py#L143" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>HardMishMe</code>(<strong><code>inplace</code></strong>:<code>bool</code>=<em><code>False</code></em>) :: <code>Module</code></p>
 </blockquote>
 <p>A memory efficient, jit scripted variant of Hard Mish
 Experimental, based on notes by Mish author Diganta Misra at
@@ -1156,6 +541,13 @@ <h2 id="HardMishMe" class="doc_header"><code>class</code> <code>HardMishMe</code
 </div>
 </div>
 
+</div>
+    {% endraw %}
+
+    {% raw %}
+    
+<div class="cell border-box-sizing code_cell rendered">
+
 </div>
     {% endraw %}
 
diff --git a/docs/base_constructor.html b/docs/base_constructor.html
index f61cfb8..c497454 100644
--- a/docs/base_constructor.html
+++ b/docs/base_constructor.html
@@ -40,13 +40,6 @@
     
 <div class="cell border-box-sizing code_cell rendered">
 
-</div>
-    {% endraw %}
-
-    {% raw %}
-    
-<div class="cell border-box-sizing code_cell rendered">
-
 </div>
     {% endraw %}
 
@@ -60,13 +53,6 @@ <h1 id="Stem">Stem<a class="anchor-link" href="#Stem"> </a></h1>
     
 <div class="cell border-box-sizing code_cell rendered">
 
-</div>
-    {% endraw %}
-
-    {% raw %}
-    
-<div class="cell border-box-sizing code_cell rendered">
-
 <div class="output_wrapper">
 <div class="output">
 
@@ -74,7 +60,7 @@ <h1 id="Stem">Stem<a class="anchor-link" href="#Stem"> </a></h1>
 
 
 <div class="output_markdown rendered_html output_subarea ">
-<h2 id="Stem" class="doc_header"><code>class</code> <code>Stem</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/base_constructor.py#L16" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>Stem</code>(<strong><code>c_in</code></strong>=<em><code>3</code></em>, <strong><code>stem_sizes</code></strong>=<em><code>[]</code></em>, <strong><code>stem_out</code></strong>=<em><code>64</code></em>, <strong><code>conv_layer</code></strong>=<em><code>ConvLayer</code></em>, <strong><code>stride_on</code></strong>=<em><code>0</code></em>, <strong><code>stem_bn_last</code></strong>=<em><code>False</code></em>, <strong><code>bn_1st</code></strong>=<em><code>True</code></em>, <strong><code>stem_use_pool</code></strong>=<em><code>True</code></em>, <strong><code>stem_pool</code></strong>=<em><code>MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)</code></em>, <strong>**<code>kwargs</code></strong>) :: <code>Sequential</code></p>
+<h2 id="Stem" class="doc_header"><code>class</code> <code>Stem</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/base_constructor.py#L15" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>Stem</code>(<strong><code>c_in</code></strong>=<em><code>3</code></em>, <strong><code>stem_sizes</code></strong>=<em><code>[]</code></em>, <strong><code>stem_out</code></strong>=<em><code>64</code></em>, <strong><code>conv_layer</code></strong>=<em><code>ConvLayer</code></em>, <strong><code>stride_on</code></strong>=<em><code>0</code></em>, <strong><code>stem_bn_last</code></strong>=<em><code>False</code></em>, <strong><code>bn_1st</code></strong>=<em><code>True</code></em>, <strong><code>stem_use_pool</code></strong>=<em><code>True</code></em>, <strong><code>stem_pool</code></strong>=<em><code>MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)</code></em>, <strong>**<code>kwargs</code></strong>) :: <code>Sequential</code></p>
 </blockquote>
 <p>Base stem</p>
 
@@ -85,6 +71,13 @@ <h2 id="Stem" class="doc_header"><code>class</code> <code>Stem</code><a href="ht
 </div>
 </div>
 
+</div>
+    {% endraw %}
+
+    {% raw %}
+    
+<div class="cell border-box-sizing code_cell rendered">
+
 </div>
     {% endraw %}
 
@@ -404,13 +397,6 @@ <h2 id="Blocks">Blocks<a class="anchor-link" href="#Blocks"> </a></h2>
     
 <div class="cell border-box-sizing code_cell rendered">
 
-</div>
-    {% endraw %}
-
-    {% raw %}
-    
-<div class="cell border-box-sizing code_cell rendered">
-
 <div class="output_wrapper">
 <div class="output">
 
@@ -418,7 +404,7 @@ <h2 id="Blocks">Blocks<a class="anchor-link" href="#Blocks"> </a></h2>
 
 
 <div class="output_markdown rendered_html output_subarea ">
-<h4 id="DownsampleBlock" class="doc_header"><code>DownsampleBlock</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/base_constructor.py#L36" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>DownsampleBlock</code>(<strong><code>conv_layer</code></strong>, <strong><code>ni</code></strong>, <strong><code>nf</code></strong>, <strong><code>ks</code></strong>, <strong><code>stride</code></strong>, <strong><code>act</code></strong>=<em><code>False</code></em>, <strong>**<code>kwargs</code></strong>)</p>
+<h4 id="DownsampleBlock" class="doc_header"><code>DownsampleBlock</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/base_constructor.py#L41" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>DownsampleBlock</code>(<strong><code>conv_layer</code></strong>, <strong><code>ni</code></strong>, <strong><code>nf</code></strong>, <strong><code>ks</code></strong>, <strong><code>stride</code></strong>, <strong><code>act</code></strong>=<em><code>False</code></em>, <strong>**<code>kwargs</code></strong>)</p>
 </blockquote>
 <p>Base downsample for res-like blocks</p>
 
@@ -450,7 +436,7 @@ <h4 id="DownsampleBlock" class="doc_header"><code>DownsampleBlock</code><a href=
 
 
 <div class="output_markdown rendered_html output_subarea ">
-<h2 id="BasicBlock" class="doc_header"><code>class</code> <code>BasicBlock</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/base_constructor.py#L41" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>BasicBlock</code>(<strong><code>ni</code></strong>, <strong><code>nf</code></strong>, <strong><code>expansion</code></strong>=<em><code>1</code></em>, <strong><code>stride</code></strong>=<em><code>1</code></em>, <strong><code>zero_bn</code></strong>=<em><code>False</code></em>, <strong><code>conv_layer</code></strong>=<em><code>ConvLayer</code></em>, <strong><code>act_fn</code></strong>=<em><code>ReLU(inplace=True)</code></em>, <strong><code>downsample_block</code></strong>=<em><code>DownsampleBlock</code></em>, <strong>**<code>kwargs</code></strong>) :: <code>Module</code></p>
+<h2 id="BasicBlock" class="doc_header"><code>class</code> <code>BasicBlock</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/base_constructor.py#L46" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>BasicBlock</code>(<strong><code>ni</code></strong>, <strong><code>nf</code></strong>, <strong><code>expansion</code></strong>=<em><code>1</code></em>, <strong><code>stride</code></strong>=<em><code>1</code></em>, <strong><code>zero_bn</code></strong>=<em><code>False</code></em>, <strong><code>conv_layer</code></strong>=<em><code>ConvLayer</code></em>, <strong><code>act_fn</code></strong>=<em><code>ReLU(inplace=True)</code></em>, <strong><code>downsample_block</code></strong>=<em><code>DownsampleBlock</code></em>, <strong>**<code>kwargs</code></strong>) :: <code>Module</code></p>
 </blockquote>
 <p>Basic block (simplified) as in pytorch resnet</p>
 
@@ -482,7 +468,7 @@ <h2 id="BasicBlock" class="doc_header"><code>class</code> <code>BasicBlock</code
 
 
 <div class="output_markdown rendered_html output_subarea ">
-<h2 id="Bottleneck" class="doc_header"><code>class</code> <code>Bottleneck</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/base_constructor.py#L65" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>Bottleneck</code>(<strong><code>ni</code></strong>, <strong><code>nh</code></strong>, <strong><code>expansion</code></strong>=<em><code>4</code></em>, <strong><code>stride</code></strong>=<em><code>1</code></em>, <strong><code>zero_bn</code></strong>=<em><code>False</code></em>, <strong><code>conv_layer</code></strong>=<em><code>ConvLayer</code></em>, <strong><code>act_fn</code></strong>=<em><code>ReLU(inplace=True)</code></em>, <strong><code>downsample_block</code></strong>=<em><code>DownsampleBlock</code></em>, <strong>**<code>kwargs</code></strong>) :: <code>Module</code></p>
+<h2 id="Bottleneck" class="doc_header"><code>class</code> <code>Bottleneck</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/base_constructor.py#L69" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>Bottleneck</code>(<strong><code>ni</code></strong>, <strong><code>nh</code></strong>, <strong><code>expansion</code></strong>=<em><code>4</code></em>, <strong><code>stride</code></strong>=<em><code>1</code></em>, <strong><code>zero_bn</code></strong>=<em><code>False</code></em>, <strong><code>conv_layer</code></strong>=<em><code>ConvLayer</code></em>, <strong><code>act_fn</code></strong>=<em><code>ReLU(inplace=True)</code></em>, <strong><code>downsample_block</code></strong>=<em><code>DownsampleBlock</code></em>, <strong>**<code>kwargs</code></strong>) :: <code>Module</code></p>
 </blockquote>
 <p>Bottlneck block for resnet models</p>
 
@@ -514,7 +500,7 @@ <h2 id="Bottleneck" class="doc_header"><code>class</code> <code>Bottleneck</code
 
 
 <div class="output_markdown rendered_html output_subarea ">
-<h2 id="BasicLayer" class="doc_header"><code>class</code> <code>BasicLayer</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/base_constructor.py#L92" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>BasicLayer</code>(<strong><code>block</code></strong>, <strong><code>blocks</code></strong>, <strong><code>ni</code></strong>, <strong><code>nf</code></strong>, <strong><code>expansion</code></strong>, <strong><code>stride</code></strong>, <strong><code>sa</code></strong>=<em><code>False</code></em>, <strong>**<code>kwargs</code></strong>) :: <code>Sequential</code></p>
+<h2 id="BasicLayer" class="doc_header"><code>class</code> <code>BasicLayer</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/base_constructor.py#L96" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>BasicLayer</code>(<strong><code>block</code></strong>, <strong><code>blocks</code></strong>, <strong><code>ni</code></strong>, <strong><code>nf</code></strong>, <strong><code>expansion</code></strong>, <strong><code>stride</code></strong>, <strong><code>sa</code></strong>=<em><code>False</code></em>, <strong>**<code>kwargs</code></strong>) :: <code>Sequential</code></p>
 </blockquote>
 <p>Layer from blocks</p>
 
@@ -528,12 +514,6 @@ <h2 id="BasicLayer" class="doc_header"><code>class</code> <code>BasicLayer</code
 </div>
     {% endraw %}
 
-<div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
-<div class="text_cell_render border-box-sizing rendered_html">
-<h2 id="Body-constructor">Body constructor<a class="anchor-link" href="#Body-constructor"> </a></h2>
-</div>
-</div>
-</div>
     {% raw %}
     
 <div class="cell border-box-sizing code_cell rendered">
@@ -541,6 +521,12 @@ <h2 id="Body-constructor">Body constructor<a class="anchor-link" href="#Body-con
 </div>
     {% endraw %}
 
+<div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
+<div class="text_cell_render border-box-sizing rendered_html">
+<h2 id="Body-constructor">Body constructor<a class="anchor-link" href="#Body-constructor"> </a></h2>
+</div>
+</div>
+</div>
     {% raw %}
     
 <div class="cell border-box-sizing code_cell rendered">
@@ -552,7 +538,7 @@ <h2 id="Body-constructor">Body constructor<a class="anchor-link" href="#Body-con
 
 
 <div class="output_markdown rendered_html output_subarea ">
-<h2 id="Body" class="doc_header"><code>class</code> <code>Body</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/base_constructor.py#L109" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>Body</code>(<strong><code>block</code></strong>, <strong><code>body_in</code></strong>=<em><code>64</code></em>, <strong><code>body_out</code></strong>=<em><code>512</code></em>, <strong><code>bodylayer</code></strong>=<em><code>BasicLayer</code></em>, <strong><code>expansion</code></strong>=<em><code>1</code></em>, <strong><code>layer_szs</code></strong>=<em><code>[64, 128, 256]</code></em>, <strong><code>blocks</code></strong>=<em><code>[2, 2, 2, 2]</code></em>, <strong><code>sa</code></strong>=<em><code>False</code></em>, <strong>**<code>kwargs</code></strong>) :: <code>Sequential</code></p>
+<h2 id="Body" class="doc_header"><code>class</code> <code>Body</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/base_constructor.py#L114" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>Body</code>(<strong><code>block</code></strong>, <strong><code>body_in</code></strong>=<em><code>64</code></em>, <strong><code>body_out</code></strong>=<em><code>512</code></em>, <strong><code>bodylayer</code></strong>=<em><code>BasicLayer</code></em>, <strong><code>expansion</code></strong>=<em><code>1</code></em>, <strong><code>layer_szs</code></strong>=<em><code>[64, 128, 256]</code></em>, <strong><code>blocks</code></strong>=<em><code>[2, 2, 2, 2]</code></em>, <strong><code>sa</code></strong>=<em><code>False</code></em>, <strong>**<code>kwargs</code></strong>) :: <code>Sequential</code></p>
 </blockquote>
 <p>Constructor for body</p>
 
@@ -563,6 +549,13 @@ <h2 id="Body" class="doc_header"><code>class</code> <code>Body</code><a href="ht
 </div>
 </div>
 
+</div>
+    {% endraw %}
+
+    {% raw %}
+    
+<div class="cell border-box-sizing code_cell rendered">
+
 </div>
     {% endraw %}
 
@@ -790,13 +783,6 @@ <h1 id="Head">Head<a class="anchor-link" href="#Head"> </a></h1>
     
 <div class="cell border-box-sizing code_cell rendered">
 
-</div>
-    {% endraw %}
-
-    {% raw %}
-    
-<div class="cell border-box-sizing code_cell rendered">
-
 <div class="output_wrapper">
 <div class="output">
 
@@ -804,7 +790,7 @@ <h1 id="Head">Head<a class="anchor-link" href="#Head"> </a></h1>
 
 
 <div class="output_markdown rendered_html output_subarea ">
-<h2 id="Head" class="doc_header"><code>class</code> <code>Head</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/base_constructor.py#L128" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>Head</code>(<strong><code>ni</code></strong>, <strong><code>nf</code></strong>, <strong>**<code>kwargs</code></strong>) :: <code>Sequential</code></p>
+<h2 id="Head" class="doc_header"><code>class</code> <code>Head</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/base_constructor.py#L132" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>Head</code>(<strong><code>ni</code></strong>, <strong><code>nf</code></strong>, <strong>**<code>kwargs</code></strong>) :: <code>Sequential</code></p>
 </blockquote>
 <p>base head</p>
 
@@ -815,6 +801,13 @@ <h2 id="Head" class="doc_header"><code>class</code> <code>Head</code><a href="ht
 </div>
 </div>
 
+</div>
+    {% endraw %}
+
+    {% raw %}
+    
+<div class="cell border-box-sizing code_cell rendered">
+
 </div>
     {% endraw %}
 
@@ -881,13 +874,6 @@ <h1 id="class-Net">class Net<a class="anchor-link" href="#class-Net"> </a></h1>
     
 <div class="cell border-box-sizing code_cell rendered">
 
-</div>
-    {% endraw %}
-
-    {% raw %}
-    
-<div class="cell border-box-sizing code_cell rendered">
-
 <div class="output_wrapper">
 <div class="output">
 
@@ -895,7 +881,7 @@ <h1 id="class-Net">class Net<a class="anchor-link" href="#class-Net"> </a></h1>
 
 
 <div class="output_markdown rendered_html output_subarea ">
-<h4 id="init_model" class="doc_header"><code>init_model</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/base_constructor.py#L138" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>init_model</code>(<strong><code>model</code></strong>, <strong><code>nonlinearity</code></strong>=<em><code>'leaky_relu'</code></em>)</p>
+<h4 id="init_model" class="doc_header"><code>init_model</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/base_constructor.py#L142" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>init_model</code>(<strong><code>model</code></strong>, <strong><code>nonlinearity</code></strong>=<em><code>'leaky_relu'</code></em>)</p>
 </blockquote>
 <p>Init model</p>
 
@@ -927,7 +913,7 @@ <h4 id="init_model" class="doc_header"><code>init_model</code><a href="https://g
 
 
 <div class="output_markdown rendered_html output_subarea ">
-<h2 id="Net" class="doc_header"><code>class</code> <code>Net</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/base_constructor.py#L146" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>Net</code>(<strong><code>stem</code></strong>=<em><code>Stem</code></em>, <strong><code>body</code></strong>=<em><code>Body</code></em>, <strong><code>block</code></strong>=<em><code>BasicBlock</code></em>, <strong><code>sa</code></strong>=<em><code>False</code></em>, <strong><code>layer_szs</code></strong>=<em><code>[64, 128, 256]</code></em>, <strong><code>blocks</code></strong>=<em><code>[2, 2, 2, 2]</code></em>, <strong><code>head</code></strong>=<em><code>Head</code></em>, <strong><code>c_in</code></strong>=<em><code>3</code></em>, <strong><code>num_classes</code></strong>=<em><code>1000</code></em>, <strong><code>body_in</code></strong>=<em><code>64</code></em>, <strong><code>body_out</code></strong>=<em><code>512</code></em>, <strong><code>expansion</code></strong>=<em><code>1</code></em>, <strong><code>init_fn</code></strong>=<em><code>init_model</code></em>, <strong>**<code>kwargs</code></strong>) :: <code>Sequential</code></p>
+<h2 id="Net" class="doc_header"><code>class</code> <code>Net</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/base_constructor.py#L149" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>Net</code>(<strong><code>stem</code></strong>=<em><code>Stem</code></em>, <strong><code>body</code></strong>=<em><code>Body</code></em>, <strong><code>block</code></strong>=<em><code>BasicBlock</code></em>, <strong><code>sa</code></strong>=<em><code>False</code></em>, <strong><code>layer_szs</code></strong>=<em><code>[64, 128, 256]</code></em>, <strong><code>blocks</code></strong>=<em><code>[2, 2, 2, 2]</code></em>, <strong><code>head</code></strong>=<em><code>Head</code></em>, <strong><code>c_in</code></strong>=<em><code>3</code></em>, <strong><code>num_classes</code></strong>=<em><code>1000</code></em>, <strong><code>body_in</code></strong>=<em><code>64</code></em>, <strong><code>body_out</code></strong>=<em><code>512</code></em>, <strong><code>expansion</code></strong>=<em><code>1</code></em>, <strong><code>init_fn</code></strong>=<em><code>init_model</code></em>, <strong>**<code>kwargs</code></strong>) :: <code>Sequential</code></p>
 </blockquote>
 <p>Constructor for model</p>
 
@@ -938,6 +924,13 @@ <h2 id="Net" class="doc_header"><code>class</code> <code>Net</code><a href="http
 </div>
 </div>
 
+</div>
+    {% endraw %}
+
+    {% raw %}
+    
+<div class="cell border-box-sizing code_cell rendered">
+
 </div>
     {% endraw %}
 
diff --git a/docs/layers.html b/docs/layers.html
index 47de2f4..548e8cb 100644
--- a/docs/layers.html
+++ b/docs/layers.html
@@ -53,13 +53,6 @@ <h1 id="Flatten">Flatten<a class="anchor-link" href="#Flatten"> </a></h1>
     
 <div class="cell border-box-sizing code_cell rendered">
 
-</div>
-    {% endraw %}
-
-    {% raw %}
-    
-<div class="cell border-box-sizing code_cell rendered">
-
 <div class="output_wrapper">
 <div class="output">
 
@@ -81,12 +74,6 @@ <h2 id="Flatten" class="doc_header"><code>class</code> <code>Flatten</code><a hr
 </div>
     {% endraw %}
 
-<div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
-<div class="text_cell_render border-box-sizing rendered_html">
-<h1 id="Noop">Noop<a class="anchor-link" href="#Noop"> </a></h1>
-</div>
-</div>
-</div>
     {% raw %}
     
 <div class="cell border-box-sizing code_cell rendered">
@@ -94,6 +81,12 @@ <h1 id="Noop">Noop<a class="anchor-link" href="#Noop"> </a></h1>
 </div>
     {% endraw %}
 
+<div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
+<div class="text_cell_render border-box-sizing rendered_html">
+<h1 id="Noop">Noop<a class="anchor-link" href="#Noop"> </a></h1>
+</div>
+</div>
+</div>
     {% raw %}
     
 <div class="cell border-box-sizing code_cell rendered">
@@ -105,9 +98,8 @@ <h1 id="Noop">Noop<a class="anchor-link" href="#Noop"> </a></h1>
 
 
 <div class="output_markdown rendered_html output_subarea ">
-<h4 id="noop" class="doc_header"><code>noop</code><a href="https://github.com/fastai/fastcore/tree/master/fastcore/imports.py#L35" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>noop</code>(<strong><code>x</code></strong>=<em><code>None</code></em>, <strong>*<code>args</code></strong>, <strong>**<code>kwargs</code></strong>)</p>
+<h4 id="noop" class="doc_header"><code>noop</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/layers.py#L21" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>noop</code>(<strong><code>x</code></strong>)</p>
 </blockquote>
-<p>Do nothing</p>
 
 </div>
 
@@ -130,7 +122,7 @@ <h4 id="noop" class="doc_header"><code>noop</code><a href="https://github.com/fa
 
 
 <div class="output_markdown rendered_html output_subarea ">
-<h2 id="Noop" class="doc_header"><code>class</code> <code>Noop</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/layers.py#L21" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>Noop</code>() :: <code>Module</code></p>
+<h2 id="Noop" class="doc_header"><code>class</code> <code>Noop</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/layers.py#L25" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>Noop</code>() :: <code>Module</code></p>
 </blockquote>
 <p>Dummy module</p>
 
@@ -144,12 +136,6 @@ <h2 id="Noop" class="doc_header"><code>class</code> <code>Noop</code><a href="ht
 </div>
     {% endraw %}
 
-<div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
-<div class="text_cell_render border-box-sizing rendered_html">
-<h1 id="ConvLayer">ConvLayer<a class="anchor-link" href="#ConvLayer"> </a></h1>
-</div>
-</div>
-</div>
     {% raw %}
     
 <div class="cell border-box-sizing code_cell rendered">
@@ -157,6 +143,12 @@ <h1 id="ConvLayer">ConvLayer<a class="anchor-link" href="#ConvLayer"> </a></h1>
 </div>
     {% endraw %}
 
+<div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
+<div class="text_cell_render border-box-sizing rendered_html">
+<h1 id="ConvLayer">ConvLayer<a class="anchor-link" href="#ConvLayer"> </a></h1>
+</div>
+</div>
+</div>
     {% raw %}
     
 <div class="cell border-box-sizing code_cell rendered">
@@ -168,7 +160,7 @@ <h1 id="ConvLayer">ConvLayer<a class="anchor-link" href="#ConvLayer"> </a></h1>
 
 
 <div class="output_markdown rendered_html output_subarea ">
-<h2 id="ConvLayer" class="doc_header"><code>class</code> <code>ConvLayer</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/layers.py#L32" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>ConvLayer</code>(<strong><code>ni</code></strong>, <strong><code>nf</code></strong>, <strong><code>ks</code></strong>=<em><code>3</code></em>, <strong><code>stride</code></strong>=<em><code>1</code></em>, <strong><code>act</code></strong>=<em><code>True</code></em>, <strong><code>act_fn</code></strong>=<em><code>ReLU(inplace=True)</code></em>, <strong><code>bn_layer</code></strong>=<em><code>True</code></em>, <strong><code>bn_1st</code></strong>=<em><code>True</code></em>, <strong><code>zero_bn</code></strong>=<em><code>False</code></em>, <strong><code>padding</code></strong>=<em><code>None</code></em>, <strong><code>bias</code></strong>=<em><code>False</code></em>, <strong><code>groups</code></strong>=<em><code>1</code></em>, <strong>**<code>kwargs</code></strong>) :: <code>Sequential</code></p>
+<h2 id="ConvLayer" class="doc_header"><code>class</code> <code>ConvLayer</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/layers.py#L37" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>ConvLayer</code>(<strong><code>ni</code></strong>, <strong><code>nf</code></strong>, <strong><code>ks</code></strong>=<em><code>3</code></em>, <strong><code>stride</code></strong>=<em><code>1</code></em>, <strong><code>act</code></strong>=<em><code>True</code></em>, <strong><code>act_fn</code></strong>=<em><code>ReLU(inplace=True)</code></em>, <strong><code>bn_layer</code></strong>=<em><code>True</code></em>, <strong><code>bn_1st</code></strong>=<em><code>True</code></em>, <strong><code>zero_bn</code></strong>=<em><code>False</code></em>, <strong><code>padding</code></strong>=<em><code>None</code></em>, <strong><code>bias</code></strong>=<em><code>False</code></em>, <strong><code>groups</code></strong>=<em><code>1</code></em>, <strong>**<code>kwargs</code></strong>) :: <code>Sequential</code></p>
 </blockquote>
 <p>Basic conv layers block</p>
 
@@ -179,6 +171,13 @@ <h2 id="ConvLayer" class="doc_header"><code>class</code> <code>ConvLayer</code><
 </div>
 </div>
 
+</div>
+    {% endraw %}
+
+    {% raw %}
+    
+<div class="cell border-box-sizing code_cell rendered">
+
 </div>
     {% endraw %}
 
@@ -380,13 +379,6 @@ <h1 id="SimpleSelfAttention">SimpleSelfAttention<a class="anchor-link" href="#Si
     
 <div class="cell border-box-sizing code_cell rendered">
 
-</div>
-    {% endraw %}
-
-    {% raw %}
-    
-<div class="cell border-box-sizing code_cell rendered">
-
 <div class="output_wrapper">
 <div class="output">
 
@@ -394,9 +386,9 @@ <h1 id="SimpleSelfAttention">SimpleSelfAttention<a class="anchor-link" href="#Si
 
 
 <div class="output_markdown rendered_html output_subarea ">
-<h4 id="conv1d" class="doc_header"><code>conv1d</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/layers.py#L55" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>conv1d</code>(<strong><code>ni</code></strong>:<code>int</code>, <strong><code>no</code></strong>:<code>int</code>, <strong><code>ks</code></strong>:<code>int</code>=<em><code>1</code></em>, <strong><code>stride</code></strong>:<code>int</code>=<em><code>1</code></em>, <strong><code>padding</code></strong>:<code>int</code>=<em><code>0</code></em>, <strong><code>bias</code></strong>:<code>bool</code>=<em><code>False</code></em>)</p>
+<h4 id="conv1d" class="doc_header"><code>conv1d</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/layers.py#L62" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>conv1d</code>(<strong><code>ni</code></strong>:<code>int</code>, <strong><code>no</code></strong>:<code>int</code>, <strong><code>ks</code></strong>:<code>int</code>=<em><code>1</code></em>, <strong><code>stride</code></strong>:<code>int</code>=<em><code>1</code></em>, <strong><code>padding</code></strong>:<code>int</code>=<em><code>0</code></em>, <strong><code>bias</code></strong>:<code>bool</code>=<em><code>False</code></em>)</p>
 </blockquote>
-<p>Create and initialize a <a href="/model_constructor/Twist.html#nn.Conv1d"><code>nn.Conv1d</code></a> layer with spectral normalization.</p>
+<p>Create and initialize a <code>nn.Conv1d</code> layer with spectral normalization.</p>
 
 </div>
 
@@ -419,7 +411,7 @@ <h4 id="conv1d" class="doc_header"><code>conv1d</code><a href="https://github.co
 
 
 <div class="output_markdown rendered_html output_subarea ">
-<h2 id="SimpleSelfAttention" class="doc_header"><code>class</code> <code>SimpleSelfAttention</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/layers.py#L65" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>SimpleSelfAttention</code>(<strong><code>n_in</code></strong>:<code>int</code>, <strong><code>ks</code></strong>=<em><code>1</code></em>, <strong><code>sym</code></strong>=<em><code>False</code></em>) :: <code>Module</code></p>
+<h2 id="SimpleSelfAttention" class="doc_header"><code>class</code> <code>SimpleSelfAttention</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/layers.py#L74" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>SimpleSelfAttention</code>(<strong><code>n_in</code></strong>:<code>int</code>, <strong><code>ks</code></strong>=<em><code>1</code></em>, <strong><code>sym</code></strong>=<em><code>False</code></em>) :: <code>Module</code></p>
 </blockquote>
 <p>Base class for all neural network modules.</p>
 <p>Your models should also subclass this class.</p>
@@ -453,12 +445,6 @@ <h2 id="SimpleSelfAttention" class="doc_header"><code>class</code> <code>SimpleS
 </div>
     {% endraw %}
 
-<div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
-<div class="text_cell_render border-box-sizing rendered_html">
-<h1 id="SE-Block">SE Block<a class="anchor-link" href="#SE-Block"> </a></h1>
-</div>
-</div>
-</div>
     {% raw %}
     
 <div class="cell border-box-sizing code_cell rendered">
@@ -466,6 +452,12 @@ <h1 id="SE-Block">SE Block<a class="anchor-link" href="#SE-Block"> </a></h1>
 </div>
     {% endraw %}
 
+<div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
+<div class="text_cell_render border-box-sizing rendered_html">
+<h1 id="SE-Block">SE Block<a class="anchor-link" href="#SE-Block"> </a></h1>
+</div>
+</div>
+</div>
     {% raw %}
     
 <div class="cell border-box-sizing code_cell rendered">
@@ -477,7 +469,7 @@ <h1 id="SE-Block">SE Block<a class="anchor-link" href="#SE-Block"> </a></h1>
 
 
 <div class="output_markdown rendered_html output_subarea ">
-<h2 id="SEBlock" class="doc_header"><code>class</code> <code>SEBlock</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/layers.py#L89" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>SEBlock</code>(<strong><code>c</code></strong>, <strong><code>r</code></strong>=<em><code>16</code></em>) :: <code>Module</code></p>
+<h2 id="SEBlock" class="doc_header"><code>class</code> <code>SEBlock</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/layers.py#L99" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>SEBlock</code>(<strong><code>c</code></strong>, <strong><code>r</code></strong>=<em><code>16</code></em>) :: <code>Module</code></p>
 </blockquote>
 <p>se block</p>
 
@@ -488,6 +480,13 @@ <h2 id="SEBlock" class="doc_header"><code>class</code> <code>SEBlock</code><a hr
 </div>
 </div>
 
+</div>
+    {% endraw %}
+
+    {% raw %}
+    
+<div class="cell border-box-sizing code_cell rendered">
+
 </div>
     {% endraw %}
 
@@ -543,13 +542,6 @@ <h1 id="SEBlockConv">SEBlockConv<a class="anchor-link" href="#SEBlockConv"> </a>
     
 <div class="cell border-box-sizing code_cell rendered">
 
-</div>
-    {% endraw %}
-
-    {% raw %}
-    
-<div class="cell border-box-sizing code_cell rendered">
-
 <div class="output_wrapper">
 <div class="output">
 
@@ -557,7 +549,7 @@ <h1 id="SEBlockConv">SEBlockConv<a class="anchor-link" href="#SEBlockConv"> </a>
 
 
 <div class="output_markdown rendered_html output_subarea ">
-<h2 id="SEBlockConv" class="doc_header"><code>class</code> <code>SEBlockConv</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/layers.py#L112" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>SEBlockConv</code>(<strong><code>c</code></strong>, <strong><code>r</code></strong>=<em><code>16</code></em>) :: <code>Module</code></p>
+<h2 id="SEBlockConv" class="doc_header"><code>class</code> <code>SEBlockConv</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/layers.py#L122" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>SEBlockConv</code>(<strong><code>c</code></strong>, <strong><code>r</code></strong>=<em><code>16</code></em>) :: <code>Module</code></p>
 </blockquote>
 <p>se block with conv on excitation</p>
 
@@ -568,6 +560,13 @@ <h2 id="SEBlockConv" class="doc_header"><code>class</code> <code>SEBlockConv</co
 </div>
 </div>
 
+</div>
+    {% endraw %}
+
+    {% raw %}
+    
+<div class="cell border-box-sizing code_cell rendered">
+
 </div>
     {% endraw %}
 
diff --git a/docs/xresnet.html b/docs/xresnet.html
index e801565..c61c19b 100644
--- a/docs/xresnet.html
+++ b/docs/xresnet.html
@@ -40,13 +40,6 @@
     
 <div class="cell border-box-sizing code_cell rendered">
 
-</div>
-    {% endraw %}
-
-    {% raw %}
-    
-<div class="cell border-box-sizing code_cell rendered">
-
 </div>
     {% endraw %}
 
@@ -60,13 +53,6 @@ <h1 id="Xresnet-constructor">Xresnet constructor<a class="anchor-link" href="#Xr
     
 <div class="cell border-box-sizing code_cell rendered">
 
-</div>
-    {% endraw %}
-
-    {% raw %}
-    
-<div class="cell border-box-sizing code_cell rendered">
-
 <div class="output_wrapper">
 <div class="output">
 
@@ -74,7 +60,7 @@ <h1 id="Xresnet-constructor">Xresnet constructor<a class="anchor-link" href="#Xr
 
 
 <div class="output_markdown rendered_html output_subarea ">
-<h2 id="DownsampleLayer" class="doc_header"><code>class</code> <code>DownsampleLayer</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/xresnet.py#L15" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>DownsampleLayer</code>(<strong><code>conv_layer</code></strong>, <strong><code>ni</code></strong>, <strong><code>nf</code></strong>, <strong><code>stride</code></strong>, <strong><code>act</code></strong>, <strong><code>pool</code></strong>=<em><code>AvgPool2d(kernel_size=2, stride=2, padding=0)</code></em>, <strong><code>pool_1st</code></strong>=<em><code>True</code></em>, <strong>**<code>kwargs</code></strong>) :: <code>Sequential</code></p>
+<h2 id="DownsampleLayer" class="doc_header"><code>class</code> <code>DownsampleLayer</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/xresnet.py#L14" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>DownsampleLayer</code>(<strong><code>conv_layer</code></strong>, <strong><code>ni</code></strong>, <strong><code>nf</code></strong>, <strong><code>stride</code></strong>, <strong><code>act</code></strong>, <strong><code>pool</code></strong>=<em><code>AvgPool2d(kernel_size=2, stride=2, padding=0)</code></em>, <strong><code>pool_1st</code></strong>=<em><code>True</code></em>, <strong>**<code>kwargs</code></strong>) :: <code>Sequential</code></p>
 </blockquote>
 <p>Downsample layer for Xresnet Resblock</p>
 
@@ -137,6 +123,13 @@ <h2 id="XResBlock" class="doc_header"><code>class</code> <code>XResBlock</code><
 </div>
 </div>
 
+</div>
+    {% endraw %}
+
+    {% raw %}
+    
+<div class="cell border-box-sizing code_cell rendered">
+
 </div>
     {% endraw %}
 
@@ -423,13 +416,6 @@ <h1 id="Xresnet-models.">Xresnet models.<a class="anchor-link" href="#Xresnet-mo
     
 <div class="cell border-box-sizing code_cell rendered">
 
-</div>
-    {% endraw %}
-
-    {% raw %}
-    
-<div class="cell border-box-sizing code_cell rendered">
-
 <div class="output_wrapper">
 <div class="output">
 
@@ -437,7 +423,7 @@ <h1 id="Xresnet-models.">Xresnet models.<a class="anchor-link" href="#Xresnet-mo
 
 
 <div class="output_markdown rendered_html output_subarea ">
-<h4 id="xresnet18" class="doc_header"><code>xresnet18</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/xresnet.py#L46" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>xresnet18</code>(<strong>**<code>kwargs</code></strong>)</p>
+<h4 id="xresnet18" class="doc_header"><code>xresnet18</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/xresnet.py#L48" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>xresnet18</code>(<strong>**<code>kwargs</code></strong>)</p>
 </blockquote>
 <p>Constructs a xresnet-18 model.</p>
 
@@ -462,7 +448,7 @@ <h4 id="xresnet18" class="doc_header"><code>xresnet18</code><a href="https://git
 
 
 <div class="output_markdown rendered_html output_subarea ">
-<h4 id="xresnet34" class="doc_header"><code>xresnet34</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/xresnet.py#L49" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>xresnet34</code>(<strong>**<code>kwargs</code></strong>)</p>
+<h4 id="xresnet34" class="doc_header"><code>xresnet34</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/xresnet.py#L52" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>xresnet34</code>(<strong>**<code>kwargs</code></strong>)</p>
 </blockquote>
 <p>Constructs axresnet-34 model.</p>
 
@@ -487,7 +473,7 @@ <h4 id="xresnet34" class="doc_header"><code>xresnet34</code><a href="https://git
 
 
 <div class="output_markdown rendered_html output_subarea ">
-<h4 id="xresnet50" class="doc_header"><code>xresnet50</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/xresnet.py#L52" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>xresnet50</code>(<strong>**<code>kwargs</code></strong>)</p>
+<h4 id="xresnet50" class="doc_header"><code>xresnet50</code><a href="https://github.com/ayasyrev/model_constructor/tree/master/model_constructor/xresnet.py#L56" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>xresnet50</code>(<strong>**<code>kwargs</code></strong>)</p>
 </blockquote>
 <p>Constructs axresnet-34 model.</p>
 
@@ -498,6 +484,13 @@ <h4 id="xresnet50" class="doc_header"><code>xresnet50</code><a href="https://git
 </div>
 </div>
 
+</div>
+    {% endraw %}
+
+    {% raw %}
+    
+<div class="cell border-box-sizing code_cell rendered">
+
 </div>
     {% endraw %}
 
diff --git a/model_constructor/_nbdev.py b/model_constructor/_nbdev.py
index 127fa0c..a2182a1 100644
--- a/model_constructor/_nbdev.py
+++ b/model_constructor/_nbdev.py
@@ -40,8 +40,6 @@
          "yaresnet_parameters": "04_YaResNet.ipynb",
          "yaresnet34": "04_YaResNet.ipynb",
          "yaresnet50": "04_YaResNet.ipynb",
-         "nn": "05_Twist.ipynb",
-         "F": "05_Twist.ipynb",
          "ConvTwist": "05_Twist.ipynb",
          "ConvLayerTwist": "05_Twist.ipynb",
          "NewResBlockTwist": "05_Twist.ipynb",
@@ -71,4 +69,4 @@
 
 git_url = "https://github.com/ayasyrev/model_constructor/tree/master/"
 
-def custom_doc_links(name): return None
+def custom_doc_links(name): return None # noqa E704
diff --git a/model_constructor/base_constructor.py b/model_constructor/base_constructor.py
index 11ceabf..bc16184 100644
--- a/model_constructor/base_constructor.py
+++ b/model_constructor/base_constructor.py
@@ -8,11 +8,9 @@
 from collections import OrderedDict
 from .layers import ConvLayer, Noop, Flatten
 
-
 # Cell
 act_fn = nn.ReLU(inplace=True)
 
-
 # Cell
 class Stem(nn.Sequential):
     """Base stem"""
@@ -39,13 +37,11 @@ def __init__(self, c_in=3, stem_sizes=[], stem_out=64,
     def extra_repr(self):
         return f"sizes: {self.sizes}"
 
-
 # Cell
 def DownsampleBlock(conv_layer, ni, nf, ks, stride, act=False, **kwargs):
     '''Base downsample for res-like blocks'''
     return conv_layer(ni, nf, ks, stride, act, **kwargs)
 
-
 # Cell
 class BasicBlock(nn.Module):
     """Basic block (simplified) as in pytorch resnet"""
@@ -69,7 +65,6 @@ def forward(self, x):
             identity = self.downsample(x)
         return self.act_conn(self.merge(out + identity))
 
-
 # Cell
 class Bottleneck(nn.Module):
     '''Bottlneck block for resnet models'''
@@ -97,7 +92,6 @@ def forward(self, x):
             identity = self.downsample(x)
         return self.act_conn(self.merge(out + identity))
 
-
 # Cell
 class BasicLayer(nn.Sequential):
     '''Layer from blocks'''
@@ -116,7 +110,6 @@ def __init__(self, block, blocks, ni, nf, expansion, stride, sa=False, **kwargs)
     def extra_repr(self):
         return f'from {self.ni * self.expansion} to {self.nf}, {self.blocks} blocks, expansion {self.expansion}.'
 
-
 # Cell
 class Body(nn.Sequential):
     '''Constructor for body'''
@@ -135,7 +128,6 @@ def __init__(self, block,
                   for i in range(num_layers)]
         super().__init__(OrderedDict(layers))
 
-
 # Cell
 class Head(nn.Sequential):
     '''base head'''
@@ -146,7 +138,6 @@ def __init__(self, ni, nf, **kwargs):
              ('fc', nn.Linear(ni, nf)),
              ]))
 
-
 # Cell
 def init_model(model, nonlinearity='leaky_relu'):
     '''Init model'''
@@ -154,7 +145,6 @@ def init_model(model, nonlinearity='leaky_relu'):
         if isinstance(m, (nn.Conv2d, nn.Linear)):
             nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity=nonlinearity)
 
-
 # Cell
 class Net(nn.Sequential):
     '''Constructor for model'''
@@ -173,4 +163,4 @@ def __init__(self, stem=Stem,
                            sa=sa, **kwargs)),
              ('head', head(body_out * expansion, num_classes, **kwargs))
              ]))
-        self.init_model(self)
+        self.init_model(self)
\ No newline at end of file
diff --git a/model_constructor/mxresnet.py b/model_constructor/mxresnet.py
index b36761c..951be1e 100644
--- a/model_constructor/mxresnet.py
+++ b/model_constructor/mxresnet.py
@@ -4,8 +4,9 @@
 
 # Cell
 from functools import partial
-from .net import Net
+
 from .activations import Mish
+from .net import Net
 
 # Cell
 mxresnet_parameters = {'stem_sizes': [3, 32, 64, 64], 'act_fn': Mish()}
diff --git a/model_constructor/net.py b/model_constructor/net.py
index b939482..1b84b1d 100644
--- a/model_constructor/net.py
+++ b/model_constructor/net.py
@@ -3,10 +3,12 @@
 __all__ = ['init_cnn', 'act_fn', 'ResBlock', 'NewResBlock', 'Net', 'xresnet34', 'xresnet50']
 
 # Cell
-import torch.nn as nn
-from functools import partial
 from collections import OrderedDict
-from .layers import ConvLayer, noop, SEBlock, SimpleSelfAttention, Flatten
+from functools import partial
+
+import torch.nn as nn
+
+from .layers import ConvLayer, Flatten, SEBlock, SimpleSelfAttention, noop
 
 # Cell
 act_fn = nn.ReLU(inplace=True)
@@ -21,6 +23,8 @@ def init_cnn(module: nn.Module):
         init_cnn(layer)
 
 # Cell
+
+
 class ResBlock(nn.Module):
     '''Resnet block'''
     se_block = SEBlock
@@ -58,8 +62,6 @@ def forward(self, x):
 
 # Cell
 # NewResBlock now is YaResBlock - Yet Another ResNet Block! It is now at model_constructor.yaresnet.
-
-
 class NewResBlock(nn.Module):
     '''YaResnet block'''
     se_block = SEBlock
diff --git a/model_constructor/twist.py b/model_constructor/twist.py
index 94fd6e6..fab4baf 100644
--- a/model_constructor/twist.py
+++ b/model_constructor/twist.py
@@ -1,17 +1,17 @@
 # AUTOGENERATED! DO NOT EDIT! File to edit: nbs/05_Twist.ipynb (unless otherwise specified).
 
-__all__ = ['nn', 'F', 'ConvTwist', 'ConvLayerTwist', 'NewResBlockTwist', 'ResBlockTwist']
+__all__ = ['ConvTwist', 'ConvLayerTwist', 'NewResBlockTwist', 'ResBlockTwist']
 
 # Cell
-from functools import partial
+# from functools import partial
 from collections import OrderedDict
-from .layers import *
-from .net import *
+from .layers import ConvLayer, noop, act_fn, SimpleSelfAttention
 
 # Cell
-import sys, torch
-nn = torch.nn
-F = torch.nn.functional
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
 
 # Cell
 class ConvTwist(nn.Module):
@@ -20,49 +20,51 @@ class ConvTwist(nn.Module):
     twist = False
     use_groups = True
     groups_ch = 8
+
     def __init__(self, ni, nf,
                  ks=3, stride=1, padding=1, bias=False,
                  groups=1, iters=1, init_max=0.7, **kvargs):
         super().__init__()
-        self.same = ni==nf and stride==1
-        self.groups = ni//self.groups_ch if self.use_groups else 1
-
+        self.same = ni == nf and stride == 1
+        self.groups = ni // self.groups_ch if self.use_groups else 1
         self.conv = nn.Conv2d(ni, nf, kernel_size=3, stride=stride, padding=1, bias=False, groups=self.groups)
         if self.twist:
             std = self.conv.weight.std().item()
-            self.coeff_Ax = nn.Parameter(torch.empty((nf,ni//groups)).normal_(0, std), requires_grad=True)
-            self.coeff_Ay = nn.Parameter(torch.empty((nf,ni//groups)).normal_(0, std), requires_grad=True)
+            self.coeff_Ax = nn.Parameter(torch.empty((nf, ni // groups)).normal_(0, std), requires_grad=True)
+            self.coeff_Ay = nn.Parameter(torch.empty((nf, ni // groups)).normal_(0, std), requires_grad=True)
         self.iters = iters
         self.stride = stride
         self.DD = self.derivatives()
 
     def derivatives(self):
-        I = torch.Tensor([[0,0,0],[0,1,0],[0,0,0]]).view(1,1,3,3)
-        D_x = torch.Tensor([[-1,0,1],[-2,0,2],[-1,0,1]]).view(1,1,3,3) / 10
-        D_y = torch.Tensor([[1,2,1],[0,0,0],[-1,-2,-1]]).view(1,1,3,3) / 10
+        I = torch.Tensor([[0, 0, 0], [0, 1, 0], [0, 0, 0]]).view(1, 1, 3, 3)   # noqa E741
+        D_x = torch.Tensor([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]]).view(1, 1, 3, 3) / 10
+        D_y = torch.Tensor([[1, 2, 1], [0, 0, 0], [-1, -2, -1]]).view(1, 1, 3, 3) / 10
+
         def convolution(K1, K2):
             return F.conv2d(K1, K2.flip(2).flip(3), padding=2)
-        D_xx = convolution(I+D_x, I+D_x).view(5,5)
-        D_yy = convolution(I+D_y, I+D_y).view(5,5)
-        D_xy = convolution(I+D_x, I+D_y).view(5,5)
+        D_xx = convolution(I + D_x, I + D_x).view(5, 5)
+        D_yy = convolution(I + D_y, I + D_y).view(5, 5)
+        D_xy = convolution(I + D_x, I + D_y).view(5, 5)
         return {'x': D_x, 'y': D_y, 'xx': D_xx, 'yy': D_yy, 'xy': D_xy}
 
     def kernel(self, coeff_x, coeff_y):
-        D_x = torch.Tensor([[-1,0,1],[-2,0,2],[-1,0,1]]).to(coeff_x.device)
-        D_y = torch.Tensor([[1,2,1],[0,0,0],[-1,-2,-1]]).to(coeff_x.device)
-        return coeff_x[:,:,None,None] * D_x + coeff_y[:,:,None,None] * D_y
+        D_x = torch.Tensor([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]]).to(coeff_x.device)
+        D_y = torch.Tensor([[1, 2, 1], [0, 0, 0], [-1, -2, -1]]).to(coeff_x.device)
+        return coeff_x[:, :, None, None] * D_x + coeff_y[:, :, None, None] * D_y
 
-    def full_kernel(self, kernel): # permuting the groups
-        if self.groups==1: return kernel
+    def full_kernel(self, kernel):  # permuting the groups
+        if self.groups == 1:
+            return kernel
         n = self.groups
-        a,b,_,_ = kernel.size()
-        a = a//n
-        KK = torch.zeros((a*n,b*n,3,3)).to(kernel.device)
+        a, b, _, _ = kernel.size()
+        a = a // n
+        KK = torch.zeros((a * n, b * n, 3, 3)).to(kernel.device)
         for i in range(n):
-            if i%4==0:
-                KK[a*i:a*(i+1),b*(i+3):b*(i+4)] = kernel[a*i:a*(i+1)]
+            if i % 4 == 0:
+                KK[a * i:a * (i + 1), b * (i + 3):b * (i + 4)] = kernel[a * i:a * (i + 1)]
             else:
-                KK[a*i:a*(i+1),b*(i-1):b*i] = kernel[a*i:a*(i+1)]
+                KK[a * i:a * (i + 1), b * (i - 1):b * i] = kernel[a * i:a * (i + 1)]
         return KK
 
     def _conv(self, inpt, kernel=None):
@@ -77,23 +79,25 @@ def symmetrize(self, conv_wt):
         if self.same:
             n = conv_wt.size()[1]
             for i in range(self.groups):
-                conv_wt.data[n*i:n*(i+1)] = (conv_wt[n*i:n*(i+1)] + torch.transpose(conv_wt[n*i:n*(i+1)],0,1)) / 2
+                conv_wt.data[n * i:n * (i + 1)] = (conv_wt[n * i:n * (i + 1)]
+                                                   + torch.transpose(conv_wt[n * i:n * (i + 1)], 0, 1)) / 2  # noqa E503
 
     def forward(self, inpt):
         out = self._conv(inpt)
         if self.twist is False:
             return out
-        _,_,h,w = out.size()
-        XX = torch.from_numpy(np.indices((1,1,h,w))[3]*2/w-1).type(out.dtype).to(out.device)
-        YY = torch.from_numpy(np.indices((1,1,h,w))[2]*2/h-1).type(out.dtype).to(out.device)
+        _, _, h, w = out.size()
+        XX = torch.from_numpy(np.indices((1, 1, h, w))[3] * 2 / w - 1).type(out.dtype).to(out.device)
+        YY = torch.from_numpy(np.indices((1, 1, h, w))[2] * 2 / h - 1).type(out.dtype).to(out.device)
         kernel_x = self.kernel(self.coeff_Ax, self.coeff_Ay)
         self.symmetrize(kernel_x)
-        kernel_y = kernel_x.transpose(2,3).flip(3)  # make conv_y a 90 degree rotation of conv_x
+        kernel_y = kernel_x.transpose(2, 3).flip(3)  # make conv_y a 90 degree rotation of conv_x
         out = out + XX * self._conv(inpt, kernel_x) + YY * self._conv(inpt, kernel_y)
-        if self.same and self.iters>1:
+        if self.same and self.iters > 1:
             out = inpt + out / self.iters
-            for _ in range(self.iters-1):
-                out = out + (self._conv(out) + XX * self._conv(out, kernel_x) + YY * self._conv(out, kernel_y)) / self.iters
+            for _ in range(self.iters - 1):
+                out = out + (self._conv(out) + XX * self._conv(out, kernel_x)
+                                             + YY * self._conv(out, kernel_y)) / self.iters  # noqa E727
             out = out - inpt
         return out
 
@@ -101,7 +105,7 @@ def extra_repr(self):
         return f"twist: {self.twist}, permute: {self.permute}, same: {self.same}, groups: {self.groups}"
 
 # Cell
-class ConvLayerTwist(ConvLayer): # replace Conv2d by Twist
+class ConvLayerTwist(ConvLayer):  # replace Conv2d by Twist
     Conv2d = ConvTwist
 
 # Cell
@@ -110,20 +114,19 @@ def __init__(self, expansion, ni, nh, stride=1,
                  conv_layer=ConvLayer, act_fn=act_fn, bn_1st=True,
                  pool=nn.AvgPool2d(2, ceil_mode=True), sa=False, sym=False, zero_bn=True, **kvargs):
         super().__init__()
-        nf,ni = nh*expansion,ni*expansion
-#         conv_layer = ConvLayerTwist
-        self.reduce = noop if stride==1 else pool
-        layers  = [(f"conv_0", conv_layer(ni, nh, 3, act_fn=act_fn, bn_1st=bn_1st)),
-                   (f"conv_1", conv_layer(nh, nf, 3, zero_bn=zero_bn, act=False, bn_1st=bn_1st))
-        ] if expansion == 1 else [
-                   (f"conv_0", conv_layer(ni, nh, 1, act_fn=act_fn, bn_1st=bn_1st)),
-#                    (f"conv_1", conv_layer(nh, nh, 3, act_fn=act_fn, bn_1st=bn_1st)),
-                   (f"conv_1_twist", ConvLayerTwist(nh, nh, 3, act_fn=act_fn, bn_1st=bn_1st)),
-                   (f"conv_2", conv_layer(nh, nf, 1, zero_bn=zero_bn, act=False, bn_1st=bn_1st))
+        nf, ni = nh * expansion, ni * expansion
+        self.reduce = noop if stride == 1 else pool
+        layers = [("conv_0", conv_layer(ni, nh, 3, act_fn=act_fn, bn_1st=bn_1st)),
+                  ("conv_1", conv_layer(nh, nf, 3, zero_bn=zero_bn, act=False, bn_1st=bn_1st))
+                  ] if expansion == 1 else [
+                      ("conv_0", conv_layer(ni, nh, 1, act_fn=act_fn, bn_1st=bn_1st)),
+                      ("conv_1_twist", ConvLayerTwist(nh, nh, 3, act_fn=act_fn, bn_1st=bn_1st)),
+                      ("conv_2", conv_layer(nh, nf, 1, zero_bn=zero_bn, act=False, bn_1st=bn_1st))
         ]
-        if sa: layers.append(('sa', SimpleSelfAttention(nf,ks=1,sym=sym)))
+        if sa:
+            layers.append(('sa', SimpleSelfAttention(nf, ks=1, sym=sym)))
         self.convs = nn.Sequential(OrderedDict(layers))
-        self.idconv = noop if ni==nf else conv_layer(ni, nf, 1, act=False, bn_1st=bn_1st)
+        self.idconv = noop if ni == nf else conv_layer(ni, nf, 1, act=False, bn_1st=bn_1st)
         self.merge = act_fn
 
     def forward(self, x):
@@ -134,22 +137,22 @@ def forward(self, x):
 class ResBlockTwist(nn.Module):
     def __init__(self, expansion, ni, nh, stride=1,
                  conv_layer=ConvLayer, act_fn=act_fn, zero_bn=True, bn_1st=True,
-                 pool=nn.AvgPool2d(2, ceil_mode=True), sa=False,sym=False, **kvargs):
+                 pool=nn.AvgPool2d(2, ceil_mode=True), sa=False, sym=False, **kvargs):
         super().__init__()
-        nf,ni = nh*expansion,ni*expansion
-#         conv_layer = ConvLayerTwist
-        layers  = [(f"conv_0", conv_layer(ni, nh, 3, stride=stride, act_fn=act_fn, bn_1st=bn_1st)),
-                   (f"conv_1", conv_layer(nh, nf, 3, zero_bn=zero_bn, act=False, bn_1st=bn_1st))
-        ] if expansion == 1 else [
-                   (f"conv_0",conv_layer(ni, nh, 1, act_fn=act_fn, bn_1st=bn_1st)),
-#                    (f"conv_1",conv_layer(nh, nh, 3, stride=stride, act_fn=act_fn, bn_1st=bn_1st)),
-                   (f"conv_1_twist",ConvLayerTwist(nh, nh, 3, stride=stride, act_fn=act_fn, bn_1st=bn_1st)),
-                   (f"conv_2",conv_layer(nh, nf, 1, zero_bn=zero_bn, act=False, bn_1st=bn_1st))
+        nf, ni = nh * expansion, ni * expansion
+        layers = [("conv_0", conv_layer(ni, nh, 3, stride=stride, act_fn=act_fn, bn_1st=bn_1st)),
+                  ("conv_1", conv_layer(nh, nf, 3, zero_bn=zero_bn, act=False, bn_1st=bn_1st))
+                  ] if expansion == 1 else [
+                      ("conv_0", conv_layer(ni, nh, 1, act_fn=act_fn, bn_1st=bn_1st)),
+                      ("conv_1_twist", ConvLayerTwist(nh, nh, 3, stride=stride, act_fn=act_fn, bn_1st=bn_1st)),
+                      ("conv_2", conv_layer(nh, nf, 1, zero_bn=zero_bn, act=False, bn_1st=bn_1st))
         ]
-        if sa: layers.append(('sa', SimpleSelfAttention(nf,ks=1,sym=sym)))
+        if sa:
+            layers.append(('sa', SimpleSelfAttention(nf, ks=1, sym=sym)))
         self.convs = nn.Sequential(OrderedDict(layers))
-        self.pool = noop if stride==1 else pool
-        self.idconv = noop if ni==nf else conv_layer(ni, nf, 1, act=False)
-        self.act_fn =act_fn
+        self.pool = noop if stride == 1 else pool
+        self.idconv = noop if ni == nf else conv_layer(ni, nf, 1, act=False)
+        self.act_fn = act_fn
 
-    def forward(self, x): return self.act_fn(self.convs(x) + self.idconv(self.pool(x)))
\ No newline at end of file
+    def forward(self, x):
+        return self.act_fn(self.convs(x) + self.idconv(self.pool(x)))
\ No newline at end of file
diff --git a/model_constructor/xresnet.py b/model_constructor/xresnet.py
index 2394dd6..93edda5 100644
--- a/model_constructor/xresnet.py
+++ b/model_constructor/xresnet.py
@@ -4,11 +4,10 @@
 
 # Cell
 import torch.nn as nn
-import torch
 from collections import OrderedDict
 
 # Cell
-from .base_constructor import *
+from .base_constructor import Net
 from .layers import ConvLayer, Noop, act_fn
 
 # Cell
@@ -17,9 +16,10 @@ class DownsampleLayer(nn.Sequential):
     def __init__(self, conv_layer, ni, nf, stride, act,
                  pool=nn.AvgPool2d(2, ceil_mode=True), pool_1st=True,
                  **kwargs):
-        layers  = [] if stride==1 else [('pool', pool)]
-        layers += [] if ni==nf else [('idconv', conv_layer(ni, nf, 1, act=act, **kwargs))]
-        if not pool_1st: layers.reverse()
+        layers = [] if stride == 1 else [('pool', pool)]
+        layers += [] if ni == nf else [('idconv', conv_layer(ni, nf, 1, act=act, **kwargs))]
+        if not pool_1st:
+            layers.reverse()
         super().__init__(OrderedDict(layers))
 
 # Cell
@@ -27,28 +27,32 @@ class XResBlock(nn.Module):
     def __init__(self, ni, nh, expansion=1, stride=1, zero_bn=True,
                  conv_layer=ConvLayer, act_fn=act_fn, **kwargs):
         super().__init__()
-        nf,ni = nh*expansion,ni*expansion
-        layers  = [('conv_0', conv_layer(ni, nh, 3, stride=stride, act_fn=act_fn, **kwargs)),
-                   ('conv_1', conv_layer(nh, nf, 3, zero_bn=zero_bn, act=False, act_fn=act_fn, **kwargs))
-        ] if expansion == 1 else [
-                   ('conv_0', conv_layer(ni, nh, 1, act_fn=act_fn, **kwargs)),
-                   ('conv_1', conv_layer(nh, nh, 3, stride=stride, act_fn=act_fn, **kwargs)),
-                   ('conv_2', conv_layer(nh, nf, 1, zero_bn=zero_bn, act=False, act_fn=act_fn, **kwargs))
+        nf, ni = nh * expansion, ni * expansion
+        layers = [('conv_0', conv_layer(ni, nh, 3, stride=stride, act_fn=act_fn, **kwargs)),
+                  ('conv_1', conv_layer(nh, nf, 3, zero_bn=zero_bn, act=False, act_fn=act_fn, **kwargs))
+                  ] if expansion == 1 else [
+                      ('conv_0', conv_layer(ni, nh, 1, act_fn=act_fn, **kwargs)),
+                      ('conv_1', conv_layer(nh, nh, 3, stride=stride, act_fn=act_fn, **kwargs)),
+                      ('conv_2', conv_layer(nh, nf, 1, zero_bn=zero_bn, act=False, act_fn=act_fn, **kwargs))
         ]
         self.convs = nn.Sequential(OrderedDict(layers))
-        self.identity = DownsampleLayer(conv_layer, ni, nf, stride, act=False, act_fn=act_fn, **kwargs) if ni!=nf or stride==2 else Noop()
-        self.merge = Noop() # us it to visualize in repr residual connection
+        self.identity = DownsampleLayer(conv_layer, ni, nf, stride,
+                                        act=False, act_fn=act_fn, **kwargs) if ni != nf or stride == 2 else Noop()
+        self.merge = Noop()
         self.act_fn = act_fn
 
-    def forward(self, x): return self.act_fn(self.merge(self.convs(x) + self.identity(x)))
+    def forward(self, x):
+        return self.act_fn(self.merge(self.convs(x) + self.identity(x)))
 
 # Cell
 def xresnet18(**kwargs):
     """Constructs a xresnet-18 model. """
-    return Net(stem_sizes=[32,32], block=XResBlock, blocks=[2, 2, 2, 2], expansion=1, **kwargs)
+    return Net(stem_sizes=[32, 32], block=XResBlock, blocks=[2, 2, 2, 2], expansion=1, **kwargs)
+
 def xresnet34(**kwargs):
     """Constructs axresnet-34 model. """
-    return Net(stem_sizes=[32,32], block=XResBlock, blocks=[3, 4, 6, 3], expansion=1, **kwargs)
+    return Net(stem_sizes=[32, 32], block=XResBlock, blocks=[3, 4, 6, 3], expansion=1, **kwargs)
+
 def xresnet50(**kwargs):
     """Constructs axresnet-34 model. """
-    return Net(stem_sizes=[32,32],block=XResBlock, blocks=[3, 4, 6, 3], expansion=4, **kwargs)
\ No newline at end of file
+    return Net(stem_sizes=[32, 32], block=XResBlock, blocks=[3, 4, 6, 3], expansion=4, **kwargs)
\ No newline at end of file
diff --git a/model_constructor/yaresnet.py b/model_constructor/yaresnet.py
index 1dd7ed8..4f817a3 100644
--- a/model_constructor/yaresnet.py
+++ b/model_constructor/yaresnet.py
@@ -4,10 +4,9 @@
 
 # Cell
 import torch.nn as nn
-import sys, torch
 from functools import partial
 from collections import OrderedDict
-from .layers import *
+from .layers import SEBlock, ConvLayer, act_fn, noop, SimpleSelfAttention
 from .net import Net
 from .activations import Mish
 
@@ -17,35 +16,38 @@
 class YaResBlock(nn.Module):
     '''YaResBlock. Reduce by pool instead of stride 2'''
     se_block = SEBlock
+
     def __init__(self, expansion, ni, nh, stride=1,
                  conv_layer=ConvLayer, act_fn=act_fn, zero_bn=True, bn_1st=True,
-                 pool=nn.AvgPool2d(2, ceil_mode=True), sa=False,sym=False,  se=False,
+                 pool=nn.AvgPool2d(2, ceil_mode=True), sa=False, sym=False, se=False,
                  groups=1, dw=False):
         super().__init__()
-        nf,ni = nh*expansion,ni*expansion
-        if groups != 1: groups = int(nh/groups)
-        self.reduce = noop if stride==1 else pool
-        layers  = [(f"conv_0", conv_layer(ni, nh, 3, stride=1, act_fn=act_fn, bn_1st=bn_1st,
-                                         groups= nh if dw else groups)), # stride 1 !!!
-                   (f"conv_1", conv_layer(nh, nf, 3, zero_bn=zero_bn, act=False, bn_1st=bn_1st))
-        ] if expansion == 1 else [
-                   (f"conv_0",conv_layer(ni, nh, 1, act_fn=act_fn, bn_1st=bn_1st)),
-                   (f"conv_1",conv_layer(nh, nh, 3, stride=1, act_fn=act_fn, bn_1st=bn_1st,
-                                        groups= nh if dw else groups)), # stride 1 !!!
-                   (f"conv_2",conv_layer(nh, nf, 1, zero_bn=zero_bn, act=False, bn_1st=bn_1st))
+        nf, ni = nh * expansion, ni * expansion
+        if groups != 1:
+            groups = int(nh / groups)
+        self.reduce = noop if stride == 1 else pool
+        layers = [("conv_0", conv_layer(ni, nh, 3, stride=1, act_fn=act_fn, bn_1st=bn_1st,
+                                        groups=nh if dw else groups)),
+                  ("conv_1", conv_layer(nh, nf, 3, zero_bn=zero_bn, act=False, bn_1st=bn_1st))
+                  ] if expansion == 1 else [
+                      ("conv_0", conv_layer(ni, nh, 1, act_fn=act_fn, bn_1st=bn_1st)),
+                      ("conv_1", conv_layer(nh, nh, 3, stride=1, act_fn=act_fn, bn_1st=bn_1st,
+                                            groups=nh if dw else groups)),
+                      ("conv_2", conv_layer(nh, nf, 1, zero_bn=zero_bn, act=False, bn_1st=bn_1st))
         ]
-        if se: layers.append(('se', self.se_block(nf)))
-        if sa: layers.append(('sa', SimpleSelfAttention(nf,ks=1,sym=sym)))
+        if se:
+            layers.append(('se', self.se_block(nf)))
+        if sa:
+            layers.append(('sa', SimpleSelfAttention(nf, ks=1, sym=sym)))
         self.convs = nn.Sequential(OrderedDict(layers))
-        self.idconv = noop if ni==nf else conv_layer(ni, nf, 1, act=False)
-        self.merge =act_fn
+        self.idconv = noop if ni == nf else conv_layer(ni, nf, 1, act=False)
+        self.merge = act_fn
 
     def forward(self, x):
         o = self.reduce(x)
         return self.merge(self.convs(o) + self.idconv(o))
 
 # Cell
-yaresnet_parameters = {'block': YaResBlock, 'stem_sizes': [3, 32, 64, 64], 'act_fn': Mish(),
-                       'stem_sizes': [3,32,64,64], 'stem_stride_on': 1}
-yaresnet34  = partial(Net, name='YaResnet34', expansion=1, layers=[3, 4,  6, 3], **yaresnet_parameters)
-yaresnet50  = partial(Net, name='YaResnet50', expansion=4, layers=[3, 4,  6, 3], **yaresnet_parameters)
\ No newline at end of file
+yaresnet_parameters = {'block': YaResBlock, 'stem_sizes': [3, 32, 64, 64], 'act_fn': Mish(), 'stem_stride_on': 1}
+yaresnet34 = partial(Net, name='YaResnet34', expansion=1, layers=[3, 4, 6, 3], **yaresnet_parameters)
+yaresnet50 = partial(Net, name='YaResnet50', expansion=4, layers=[3, 4, 6, 3], **yaresnet_parameters)
\ No newline at end of file
diff --git a/nbs/00_Net.ipynb b/nbs/00_Net.ipynb
index b2d3353..f3ad44e 100644
--- a/nbs/00_Net.ipynb
+++ b/nbs/00_Net.ipynb
@@ -1,16 +1,5 @@
 {
  "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "hide_input": true
-   },
-   "outputs": [],
-   "source": [
-    "from nbdev import *"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -29,6 +18,20 @@
     "#default_exp net"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T07:17:22.914932Z",
+     "start_time": "2020-11-12T07:17:22.910201Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from nbdev import docs, showdoc, show_doc"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -56,7 +59,8 @@
    "source": [
     "#hide\n",
     "# from nbdev.showdoc import *\n",
-    "from fastcore.test import *"
+    "from fastcore.test import *\n",
+    "from dataclasses import dataclass, asdict"
    ]
   },
   {
@@ -66,10 +70,12 @@
    "outputs": [],
    "source": [
     "#export\n",
-    "import torch.nn as nn\n",
-    "from functools import partial\n",
     "from collections import OrderedDict\n",
-    "from model_constructor.layers import ConvLayer, noop, SEBlock, SimpleSelfAttention, Flatten"
+    "from functools import partial\n",
+    "\n",
+    "import torch.nn as nn\n",
+    "\n",
+    "from model_constructor.layers import ConvLayer, Flatten, SEBlock, SimpleSelfAttention, noop"
    ]
   },
   {
@@ -77,9 +83,7 @@
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": [
-    "from dataclasses import dataclass, asdict"
-   ]
+   "source": []
   },
   {
    "cell_type": "markdown",
@@ -121,6 +125,8 @@
    "outputs": [],
    "source": [
     "#export\n",
+    "\n",
+    "\n",
     "class ResBlock(nn.Module):\n",
     "    '''Resnet block'''\n",
     "    se_block = SEBlock\n",
@@ -386,8 +392,6 @@
    "source": [
     "#export\n",
     "# NewResBlock now is YaResBlock - Yet Another ResNet Block! It is now at model_constructor.yaresnet.\n",
-    "\n",
-    "\n",
     "class NewResBlock(nn.Module):\n",
     "    '''YaResnet block'''\n",
     "    se_block = SEBlock\n",
@@ -2738,14 +2742,27 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 2,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T07:15:22.388933Z",
+     "start_time": "2020-11-12T07:15:21.903684Z"
+    }
+   },
    "outputs": [
     {
-     "name": "stderr",
+     "name": "stdout",
      "output_type": "stream",
      "text": [
-      "UsageError: Line magic function `%nbdev_hide` not found.\n"
+      "Converted 00_Net.ipynb.\n",
+      "Converted 01_activations.ipynb.\n",
+      "Converted 01_layers.ipynb.\n",
+      "Converted 03_MXResNet.ipynb.\n",
+      "Converted 04_YaResNet.ipynb.\n",
+      "Converted 05_Twist.ipynb.\n",
+      "Converted 10_base_constructor.ipynb.\n",
+      "Converted 11_xresnet.ipynb.\n",
+      "Converted index.ipynb.\n"
      ]
     }
    ],
diff --git a/nbs/01_activations.ipynb b/nbs/01_activations.ipynb
index 176827d..4c3c4e9 100644
--- a/nbs/01_activations.ipynb
+++ b/nbs/01_activations.ipynb
@@ -338,8 +338,8 @@
    "execution_count": 1,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2020-11-11T16:24:49.841271Z",
-     "start_time": "2020-11-11T16:24:49.167581Z"
+     "end_time": "2020-11-12T07:27:35.048914Z",
+     "start_time": "2020-11-12T07:27:34.356516Z"
     }
    },
    "outputs": [
@@ -400,7 +400,12 @@
    "title_cell": "Table of Contents",
    "title_sidebar": "Contents",
    "toc_cell": false,
-   "toc_position": {},
+   "toc_position": {
+    "height": "calc(100% - 180px)",
+    "left": "10px",
+    "top": "150px",
+    "width": "341.333px"
+   },
    "toc_section_display": true,
    "toc_window_display": true
   }
diff --git a/nbs/03_MXResNet.ipynb b/nbs/03_MXResNet.ipynb
index 5cfe5f1..f2dc0d9 100644
--- a/nbs/03_MXResNet.ipynb
+++ b/nbs/03_MXResNet.ipynb
@@ -59,8 +59,9 @@
    "source": [
     "#export\n",
     "from functools import partial\n",
-    "from model_constructor.net import Net\n",
-    "from model_constructor.activations import Mish"
+    "\n",
+    "from model_constructor.activations import Mish\n",
+    "from model_constructor.net import Net"
    ]
   },
   {
diff --git a/nbs/04_YaResNet.ipynb b/nbs/04_YaResNet.ipynb
index d41879d..6e89dac 100644
--- a/nbs/04_YaResNet.ipynb
+++ b/nbs/04_YaResNet.ipynb
@@ -1,16 +1,5 @@
 {
  "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "hide_input": true
-   },
-   "outputs": [],
-   "source": [
-    "from nbdev import *"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -40,27 +29,37 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 1,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:35:33.138912Z",
+     "start_time": "2020-11-12T08:35:32.764886Z"
+    }
+   },
    "outputs": [],
    "source": [
     "#hide\n",
     "# from nbdev.showdoc import *\n",
-    "from fastcore.test import *"
+    "from fastcore.test import *\n",
+    "import torch"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 2,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:35:33.431038Z",
+     "start_time": "2020-11-12T08:35:33.393005Z"
+    }
+   },
    "outputs": [],
    "source": [
     "#export\n",
     "import torch.nn as nn\n",
-    "import sys, torch\n",
     "from functools import partial\n",
     "from collections import OrderedDict\n",
-    "from model_constructor.layers import *\n",
+    "from model_constructor.layers import SEBlock, ConvLayer, act_fn, noop, SimpleSelfAttention\n",
     "from model_constructor.net import Net\n",
     "from model_constructor.activations import Mish"
    ]
@@ -84,28 +83,32 @@
     "class YaResBlock(nn.Module):\n",
     "    '''YaResBlock. Reduce by pool instead of stride 2'''\n",
     "    se_block = SEBlock\n",
+    "\n",
     "    def __init__(self, expansion, ni, nh, stride=1,\n",
     "                 conv_layer=ConvLayer, act_fn=act_fn, zero_bn=True, bn_1st=True,\n",
-    "                 pool=nn.AvgPool2d(2, ceil_mode=True), sa=False,sym=False,  se=False,\n",
+    "                 pool=nn.AvgPool2d(2, ceil_mode=True), sa=False, sym=False, se=False,\n",
     "                 groups=1, dw=False):\n",
     "        super().__init__()\n",
-    "        nf,ni = nh*expansion,ni*expansion\n",
-    "        if groups != 1: groups = int(nh/groups)\n",
-    "        self.reduce = noop if stride==1 else pool\n",
-    "        layers  = [(f\"conv_0\", conv_layer(ni, nh, 3, stride=1, act_fn=act_fn, bn_1st=bn_1st,\n",
-    "                                         groups= nh if dw else groups)), # stride 1 !!!\n",
-    "                   (f\"conv_1\", conv_layer(nh, nf, 3, zero_bn=zero_bn, act=False, bn_1st=bn_1st))\n",
-    "        ] if expansion == 1 else [\n",
-    "                   (f\"conv_0\",conv_layer(ni, nh, 1, act_fn=act_fn, bn_1st=bn_1st)),\n",
-    "                   (f\"conv_1\",conv_layer(nh, nh, 3, stride=1, act_fn=act_fn, bn_1st=bn_1st,\n",
-    "                                        groups= nh if dw else groups)), # stride 1 !!!\n",
-    "                   (f\"conv_2\",conv_layer(nh, nf, 1, zero_bn=zero_bn, act=False, bn_1st=bn_1st))\n",
+    "        nf, ni = nh * expansion, ni * expansion\n",
+    "        if groups != 1:\n",
+    "            groups = int(nh / groups)\n",
+    "        self.reduce = noop if stride == 1 else pool\n",
+    "        layers = [(\"conv_0\", conv_layer(ni, nh, 3, stride=1, act_fn=act_fn, bn_1st=bn_1st,\n",
+    "                                        groups=nh if dw else groups)),\n",
+    "                  (\"conv_1\", conv_layer(nh, nf, 3, zero_bn=zero_bn, act=False, bn_1st=bn_1st))\n",
+    "                  ] if expansion == 1 else [\n",
+    "                      (\"conv_0\", conv_layer(ni, nh, 1, act_fn=act_fn, bn_1st=bn_1st)),\n",
+    "                      (\"conv_1\", conv_layer(nh, nh, 3, stride=1, act_fn=act_fn, bn_1st=bn_1st,\n",
+    "                                            groups=nh if dw else groups)),\n",
+    "                      (\"conv_2\", conv_layer(nh, nf, 1, zero_bn=zero_bn, act=False, bn_1st=bn_1st))\n",
     "        ]\n",
-    "        if se: layers.append(('se', self.se_block(nf)))\n",
-    "        if sa: layers.append(('sa', SimpleSelfAttention(nf,ks=1,sym=sym)))\n",
+    "        if se:\n",
+    "            layers.append(('se', self.se_block(nf)))\n",
+    "        if sa:\n",
+    "            layers.append(('sa', SimpleSelfAttention(nf, ks=1, sym=sym)))\n",
     "        self.convs = nn.Sequential(OrderedDict(layers))\n",
-    "        self.idconv = noop if ni==nf else conv_layer(ni, nf, 1, act=False)\n",
-    "        self.merge =act_fn\n",
+    "        self.idconv = noop if ni == nf else conv_layer(ni, nf, 1, act=False)\n",
+    "        self.merge = act_fn\n",
     "\n",
     "    def forward(self, x):\n",
     "        o = self.reduce(x)\n",
@@ -969,10 +972,9 @@
    "outputs": [],
    "source": [
     "#export\n",
-    "yaresnet_parameters = {'block': YaResBlock, 'stem_sizes': [3, 32, 64, 64], 'act_fn': Mish(),\n",
-    "                       'stem_sizes': [3,32,64,64], 'stem_stride_on': 1}\n",
-    "yaresnet34  = partial(Net, name='YaResnet34', expansion=1, layers=[3, 4,  6, 3], **yaresnet_parameters)\n",
-    "yaresnet50  = partial(Net, name='YaResnet50', expansion=4, layers=[3, 4,  6, 3], **yaresnet_parameters)"
+    "yaresnet_parameters = {'block': YaResBlock, 'stem_sizes': [3, 32, 64, 64], 'act_fn': Mish(), 'stem_stride_on': 1}\n",
+    "yaresnet34 = partial(Net, name='YaResnet34', expansion=1, layers=[3, 4, 6, 3], **yaresnet_parameters)\n",
+    "yaresnet50 = partial(Net, name='YaResnet50', expansion=4, layers=[3, 4, 6, 3], **yaresnet_parameters)"
    ]
   },
   {
@@ -1102,6 +1104,31 @@
    "display_name": "Python 3",
    "language": "python",
    "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.7"
+  },
+  "toc": {
+   "base_numbering": 1,
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "Table of Contents",
+   "title_sidebar": "Contents",
+   "toc_cell": false,
+   "toc_position": {},
+   "toc_section_display": true,
+   "toc_window_display": true
   }
  },
  "nbformat": 4,
diff --git a/nbs/05_Twist.ipynb b/nbs/05_Twist.ipynb
index 2dd28aa..13a4f2f 100644
--- a/nbs/05_Twist.ipynb
+++ b/nbs/05_Twist.ipynb
@@ -2,8 +2,12 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:18:25.512884Z",
+     "start_time": "2020-11-12T08:18:25.319186Z"
+    },
     "hide_input": true
    },
    "outputs": [],
@@ -13,18 +17,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Cells will be exported to model_constructor.twist,\n",
-      "unless a different module is specified after an export flag: `%nbdev_export special.module`\n"
-     ]
+   "execution_count": 2,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:18:26.436816Z",
+     "start_time": "2020-11-12T08:18:26.430668Z"
     }
-   ],
+   },
+   "outputs": [],
    "source": [
     "#default_exp twist"
    ]
@@ -40,38 +40,54 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 47,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:20:00.353036Z",
+     "start_time": "2020-11-12T08:20:00.345417Z"
+    }
+   },
    "outputs": [],
    "source": [
     "#hide\n",
     "# from nbdev.showdoc import *\n",
-    "from fastcore.test import *"
+    "from fastcore.test import *\n",
+    "from model_constructor.net import Net"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 4,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:18:29.558525Z",
+     "start_time": "2020-11-12T08:18:29.163775Z"
+    }
+   },
    "outputs": [],
    "source": [
     "#export\n",
-    "from functools import partial\n",
+    "# from functools import partial\n",
     "from collections import OrderedDict\n",
-    "from model_constructor.layers import *\n",
-    "from model_constructor.net import *"
+    "from model_constructor.layers import ConvLayer, noop, act_fn, SimpleSelfAttention"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 5,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:18:29.564828Z",
+     "start_time": "2020-11-12T08:18:29.561559Z"
+    }
+   },
    "outputs": [],
    "source": [
     "#export\n",
-    "import sys, torch\n",
-    "nn = torch.nn\n",
-    "F = torch.nn.functional"
+    "import torch\n",
+    "import torch.nn as nn\n",
+    "import torch.nn.functional as F\n",
+    "import numpy as np"
    ]
   },
   {
@@ -83,8 +99,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 6,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:18:30.293010Z",
+     "start_time": "2020-11-12T08:18:30.245062Z"
+    }
+   },
    "outputs": [],
    "source": [
     "#export\n",
@@ -94,49 +115,51 @@
     "    twist = False\n",
     "    use_groups = True\n",
     "    groups_ch = 8\n",
+    "\n",
     "    def __init__(self, ni, nf,\n",
     "                 ks=3, stride=1, padding=1, bias=False,\n",
     "                 groups=1, iters=1, init_max=0.7, **kvargs):\n",
     "        super().__init__()\n",
-    "        self.same = ni==nf and stride==1\n",
-    "        self.groups = ni//self.groups_ch if self.use_groups else 1\n",
-    "\n",
+    "        self.same = ni == nf and stride == 1\n",
+    "        self.groups = ni // self.groups_ch if self.use_groups else 1\n",
     "        self.conv = nn.Conv2d(ni, nf, kernel_size=3, stride=stride, padding=1, bias=False, groups=self.groups)\n",
     "        if self.twist:\n",
     "            std = self.conv.weight.std().item()\n",
-    "            self.coeff_Ax = nn.Parameter(torch.empty((nf,ni//groups)).normal_(0, std), requires_grad=True)\n",
-    "            self.coeff_Ay = nn.Parameter(torch.empty((nf,ni//groups)).normal_(0, std), requires_grad=True)\n",
+    "            self.coeff_Ax = nn.Parameter(torch.empty((nf, ni // groups)).normal_(0, std), requires_grad=True)\n",
+    "            self.coeff_Ay = nn.Parameter(torch.empty((nf, ni // groups)).normal_(0, std), requires_grad=True)\n",
     "        self.iters = iters\n",
     "        self.stride = stride\n",
     "        self.DD = self.derivatives()\n",
     "\n",
     "    def derivatives(self):\n",
-    "        I = torch.Tensor([[0,0,0],[0,1,0],[0,0,0]]).view(1,1,3,3)\n",
-    "        D_x = torch.Tensor([[-1,0,1],[-2,0,2],[-1,0,1]]).view(1,1,3,3) / 10\n",
-    "        D_y = torch.Tensor([[1,2,1],[0,0,0],[-1,-2,-1]]).view(1,1,3,3) / 10\n",
+    "        I = torch.Tensor([[0, 0, 0], [0, 1, 0], [0, 0, 0]]).view(1, 1, 3, 3)   # noqa E741\n",
+    "        D_x = torch.Tensor([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]]).view(1, 1, 3, 3) / 10\n",
+    "        D_y = torch.Tensor([[1, 2, 1], [0, 0, 0], [-1, -2, -1]]).view(1, 1, 3, 3) / 10\n",
+    "\n",
     "        def convolution(K1, K2):\n",
     "            return F.conv2d(K1, K2.flip(2).flip(3), padding=2)\n",
-    "        D_xx = convolution(I+D_x, I+D_x).view(5,5)\n",
-    "        D_yy = convolution(I+D_y, I+D_y).view(5,5)\n",
-    "        D_xy = convolution(I+D_x, I+D_y).view(5,5)\n",
+    "        D_xx = convolution(I + D_x, I + D_x).view(5, 5)\n",
+    "        D_yy = convolution(I + D_y, I + D_y).view(5, 5)\n",
+    "        D_xy = convolution(I + D_x, I + D_y).view(5, 5)\n",
     "        return {'x': D_x, 'y': D_y, 'xx': D_xx, 'yy': D_yy, 'xy': D_xy}\n",
     "\n",
     "    def kernel(self, coeff_x, coeff_y):\n",
-    "        D_x = torch.Tensor([[-1,0,1],[-2,0,2],[-1,0,1]]).to(coeff_x.device)\n",
-    "        D_y = torch.Tensor([[1,2,1],[0,0,0],[-1,-2,-1]]).to(coeff_x.device)\n",
-    "        return coeff_x[:,:,None,None] * D_x + coeff_y[:,:,None,None] * D_y\n",
+    "        D_x = torch.Tensor([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]]).to(coeff_x.device)\n",
+    "        D_y = torch.Tensor([[1, 2, 1], [0, 0, 0], [-1, -2, -1]]).to(coeff_x.device)\n",
+    "        return coeff_x[:, :, None, None] * D_x + coeff_y[:, :, None, None] * D_y\n",
     "\n",
-    "    def full_kernel(self, kernel): # permuting the groups\n",
-    "        if self.groups==1: return kernel\n",
+    "    def full_kernel(self, kernel):  # permuting the groups\n",
+    "        if self.groups == 1:\n",
+    "            return kernel\n",
     "        n = self.groups\n",
-    "        a,b,_,_ = kernel.size()\n",
-    "        a = a//n\n",
-    "        KK = torch.zeros((a*n,b*n,3,3)).to(kernel.device)\n",
+    "        a, b, _, _ = kernel.size()\n",
+    "        a = a // n\n",
+    "        KK = torch.zeros((a * n, b * n, 3, 3)).to(kernel.device)\n",
     "        for i in range(n):\n",
-    "            if i%4==0:\n",
-    "                KK[a*i:a*(i+1),b*(i+3):b*(i+4)] = kernel[a*i:a*(i+1)]\n",
+    "            if i % 4 == 0:\n",
+    "                KK[a * i:a * (i + 1), b * (i + 3):b * (i + 4)] = kernel[a * i:a * (i + 1)]\n",
     "            else:\n",
-    "                KK[a*i:a*(i+1),b*(i-1):b*i] = kernel[a*i:a*(i+1)]\n",
+    "                KK[a * i:a * (i + 1), b * (i - 1):b * i] = kernel[a * i:a * (i + 1)]\n",
     "        return KK\n",
     "\n",
     "    def _conv(self, inpt, kernel=None):\n",
@@ -151,23 +174,25 @@
     "        if self.same:\n",
     "            n = conv_wt.size()[1]\n",
     "            for i in range(self.groups):\n",
-    "                conv_wt.data[n*i:n*(i+1)] = (conv_wt[n*i:n*(i+1)] + torch.transpose(conv_wt[n*i:n*(i+1)],0,1)) / 2\n",
+    "                conv_wt.data[n * i:n * (i + 1)] = (conv_wt[n * i:n * (i + 1)]\n",
+    "                                                   + torch.transpose(conv_wt[n * i:n * (i + 1)], 0, 1)) / 2  # noqa E503\n",
     "\n",
     "    def forward(self, inpt):\n",
     "        out = self._conv(inpt)\n",
     "        if self.twist is False:\n",
     "            return out\n",
-    "        _,_,h,w = out.size()\n",
-    "        XX = torch.from_numpy(np.indices((1,1,h,w))[3]*2/w-1).type(out.dtype).to(out.device)\n",
-    "        YY = torch.from_numpy(np.indices((1,1,h,w))[2]*2/h-1).type(out.dtype).to(out.device)\n",
+    "        _, _, h, w = out.size()\n",
+    "        XX = torch.from_numpy(np.indices((1, 1, h, w))[3] * 2 / w - 1).type(out.dtype).to(out.device)\n",
+    "        YY = torch.from_numpy(np.indices((1, 1, h, w))[2] * 2 / h - 1).type(out.dtype).to(out.device)\n",
     "        kernel_x = self.kernel(self.coeff_Ax, self.coeff_Ay)\n",
     "        self.symmetrize(kernel_x)\n",
-    "        kernel_y = kernel_x.transpose(2,3).flip(3)  # make conv_y a 90 degree rotation of conv_x\n",
+    "        kernel_y = kernel_x.transpose(2, 3).flip(3)  # make conv_y a 90 degree rotation of conv_x\n",
     "        out = out + XX * self._conv(inpt, kernel_x) + YY * self._conv(inpt, kernel_y)\n",
-    "        if self.same and self.iters>1:\n",
+    "        if self.same and self.iters > 1:\n",
     "            out = inpt + out / self.iters\n",
-    "            for _ in range(self.iters-1):\n",
-    "                out = out + (self._conv(out) + XX * self._conv(out, kernel_x) + YY * self._conv(out, kernel_y)) / self.iters\n",
+    "            for _ in range(self.iters - 1):\n",
+    "                out = out + (self._conv(out) + XX * self._conv(out, kernel_x)\n",
+    "                                             + YY * self._conv(out, kernel_y)) / self.iters  # noqa E727\n",
     "            out = out - inpt\n",
     "        return out\n",
     "\n",
@@ -177,8 +202,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 7,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:18:31.165082Z",
+     "start_time": "2020-11-12T08:18:31.139082Z"
+    }
+   },
    "outputs": [
     {
      "data": {
@@ -189,7 +219,7 @@
        ")"
       ]
      },
-     "execution_count": null,
+     "execution_count": 7,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -200,8 +230,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 8,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:18:31.789815Z",
+     "start_time": "2020-11-12T08:18:31.778360Z"
+    }
+   },
    "outputs": [
     {
      "data": {
@@ -209,7 +244,7 @@
        "(False, True)"
       ]
      },
-     "execution_count": null,
+     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -220,8 +255,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 9,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:18:32.194529Z",
+     "start_time": "2020-11-12T08:18:32.188588Z"
+    }
+   },
    "outputs": [
     {
      "data": {
@@ -229,7 +269,7 @@
        "(True, 8)"
       ]
      },
-     "execution_count": null,
+     "execution_count": 9,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -240,8 +280,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 10,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:18:32.636064Z",
+     "start_time": "2020-11-12T08:18:32.618791Z"
+    }
+   },
    "outputs": [
     {
      "data": {
@@ -252,7 +297,7 @@
        ")"
       ]
      },
-     "execution_count": null,
+     "execution_count": 10,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -263,8 +308,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 11,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:18:33.086882Z",
+     "start_time": "2020-11-12T08:18:33.074498Z"
+    }
+   },
    "outputs": [
     {
      "data": {
@@ -275,7 +325,7 @@
        ")"
       ]
      },
-     "execution_count": null,
+     "execution_count": 11,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -295,19 +345,29 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 12,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:18:34.287872Z",
+     "start_time": "2020-11-12T08:18:34.283346Z"
+    }
+   },
    "outputs": [],
    "source": [
     "#export\n",
-    "class ConvLayerTwist(ConvLayer): # replace Conv2d by Twist\n",
+    "class ConvLayerTwist(ConvLayer):  # replace Conv2d by Twist\n",
     "    Conv2d = ConvTwist"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 13,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:18:34.985510Z",
+     "start_time": "2020-11-12T08:18:34.978384Z"
+    }
+   },
    "outputs": [
     {
      "data": {
@@ -322,7 +382,7 @@
        ")"
       ]
      },
-     "execution_count": null,
+     "execution_count": 13,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -333,8 +393,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 14,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:18:35.457612Z",
+     "start_time": "2020-11-12T08:18:35.454604Z"
+    }
+   },
    "outputs": [
     {
      "data": {
@@ -342,7 +407,7 @@
        "torch.nn.modules.conv.Conv2d"
       ]
      },
-     "execution_count": null,
+     "execution_count": 14,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -353,8 +418,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 15,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:18:35.830439Z",
+     "start_time": "2020-11-12T08:18:35.825197Z"
+    }
+   },
    "outputs": [
     {
      "data": {
@@ -362,7 +432,7 @@
        "__main__.ConvTwist"
       ]
      },
-     "execution_count": null,
+     "execution_count": 15,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -373,8 +443,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 16,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:18:36.197433Z",
+     "start_time": "2020-11-12T08:18:36.189636Z"
+    }
+   },
    "outputs": [
     {
      "data": {
@@ -389,7 +464,7 @@
        ")"
       ]
      },
-     "execution_count": null,
+     "execution_count": 16,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -401,8 +476,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 17,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:18:36.730907Z",
+     "start_time": "2020-11-12T08:18:36.723051Z"
+    }
+   },
    "outputs": [
     {
      "data": {
@@ -417,7 +497,7 @@
        ")"
       ]
      },
-     "execution_count": null,
+     "execution_count": 17,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -430,8 +510,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 18,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:18:37.264435Z",
+     "start_time": "2020-11-12T08:18:37.258921Z"
+    }
+   },
    "outputs": [
     {
      "data": {
@@ -445,7 +530,7 @@
        ")"
       ]
      },
-     "execution_count": null,
+     "execution_count": 18,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -457,8 +542,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 19,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:18:37.659139Z",
+     "start_time": "2020-11-12T08:18:37.642241Z"
+    }
+   },
    "outputs": [
     {
      "data": {
@@ -472,7 +562,7 @@
        ")"
       ]
      },
-     "execution_count": null,
+     "execution_count": 19,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -484,8 +574,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 20,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:18:38.065874Z",
+     "start_time": "2020-11-12T08:18:38.058467Z"
+    }
+   },
    "outputs": [
     {
      "data": {
@@ -500,7 +595,7 @@
        ")"
       ]
      },
-     "execution_count": null,
+     "execution_count": 20,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -512,8 +607,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 21,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:18:38.469685Z",
+     "start_time": "2020-11-12T08:18:38.449640Z"
+    }
+   },
    "outputs": [
     {
      "data": {
@@ -528,7 +628,7 @@
        ")"
       ]
      },
-     "execution_count": null,
+     "execution_count": 21,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -540,8 +640,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 22,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:18:38.895662Z",
+     "start_time": "2020-11-12T08:18:38.883054Z"
+    }
+   },
    "outputs": [
     {
      "data": {
@@ -556,7 +661,7 @@
        ")"
       ]
      },
-     "execution_count": null,
+     "execution_count": 22,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -568,8 +673,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 23,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:18:39.340478Z",
+     "start_time": "2020-11-12T08:18:39.330542Z"
+    }
+   },
    "outputs": [
     {
      "data": {
@@ -584,7 +694,7 @@
        ")"
       ]
      },
-     "execution_count": null,
+     "execution_count": 23,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -596,8 +706,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 24,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:18:39.797981Z",
+     "start_time": "2020-11-12T08:18:39.776137Z"
+    }
+   },
    "outputs": [
     {
      "data": {
@@ -612,7 +727,7 @@
        ")"
       ]
      },
-     "execution_count": null,
+     "execution_count": 24,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -624,8 +739,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 25,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:18:45.758367Z",
+     "start_time": "2020-11-12T08:18:45.738924Z"
+    }
+   },
    "outputs": [
     {
      "data": {
@@ -640,7 +760,7 @@
        ")"
       ]
      },
-     "execution_count": null,
+     "execution_count": 25,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -660,8 +780,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 26,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:18:50.010162Z",
+     "start_time": "2020-11-12T08:18:50.000426Z"
+    }
+   },
    "outputs": [],
    "source": [
     "#export\n",
@@ -670,20 +795,19 @@
     "                 conv_layer=ConvLayer, act_fn=act_fn, bn_1st=True,\n",
     "                 pool=nn.AvgPool2d(2, ceil_mode=True), sa=False, sym=False, zero_bn=True, **kvargs):\n",
     "        super().__init__()\n",
-    "        nf,ni = nh*expansion,ni*expansion\n",
-    "#         conv_layer = ConvLayerTwist\n",
-    "        self.reduce = noop if stride==1 else pool\n",
-    "        layers  = [(f\"conv_0\", conv_layer(ni, nh, 3, act_fn=act_fn, bn_1st=bn_1st)),\n",
-    "                   (f\"conv_1\", conv_layer(nh, nf, 3, zero_bn=zero_bn, act=False, bn_1st=bn_1st))\n",
-    "        ] if expansion == 1 else [\n",
-    "                   (f\"conv_0\", conv_layer(ni, nh, 1, act_fn=act_fn, bn_1st=bn_1st)),\n",
-    "#                    (f\"conv_1\", conv_layer(nh, nh, 3, act_fn=act_fn, bn_1st=bn_1st)),\n",
-    "                   (f\"conv_1_twist\", ConvLayerTwist(nh, nh, 3, act_fn=act_fn, bn_1st=bn_1st)),\n",
-    "                   (f\"conv_2\", conv_layer(nh, nf, 1, zero_bn=zero_bn, act=False, bn_1st=bn_1st))\n",
+    "        nf, ni = nh * expansion, ni * expansion\n",
+    "        self.reduce = noop if stride == 1 else pool\n",
+    "        layers = [(\"conv_0\", conv_layer(ni, nh, 3, act_fn=act_fn, bn_1st=bn_1st)),\n",
+    "                  (\"conv_1\", conv_layer(nh, nf, 3, zero_bn=zero_bn, act=False, bn_1st=bn_1st))\n",
+    "                  ] if expansion == 1 else [\n",
+    "                      (\"conv_0\", conv_layer(ni, nh, 1, act_fn=act_fn, bn_1st=bn_1st)),\n",
+    "                      (\"conv_1_twist\", ConvLayerTwist(nh, nh, 3, act_fn=act_fn, bn_1st=bn_1st)),\n",
+    "                      (\"conv_2\", conv_layer(nh, nf, 1, zero_bn=zero_bn, act=False, bn_1st=bn_1st))\n",
     "        ]\n",
-    "        if sa: layers.append(('sa', SimpleSelfAttention(nf,ks=1,sym=sym)))\n",
+    "        if sa:\n",
+    "            layers.append(('sa', SimpleSelfAttention(nf, ks=1, sym=sym)))\n",
     "        self.convs = nn.Sequential(OrderedDict(layers))\n",
-    "        self.idconv = noop if ni==nf else conv_layer(ni, nf, 1, act=False, bn_1st=bn_1st)\n",
+    "        self.idconv = noop if ni == nf else conv_layer(ni, nf, 1, act=False, bn_1st=bn_1st)\n",
     "        self.merge = act_fn\n",
     "\n",
     "    def forward(self, x):\n",
@@ -693,8 +817,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 27,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:18:50.982172Z",
+     "start_time": "2020-11-12T08:18:50.968881Z"
+    }
+   },
    "outputs": [
     {
      "data": {
@@ -726,7 +855,7 @@
        ")"
       ]
      },
-     "execution_count": null,
+     "execution_count": 27,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -738,8 +867,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 28,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:18:51.998734Z",
+     "start_time": "2020-11-12T08:18:51.809441Z"
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -760,8 +894,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 29,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:18:52.418937Z",
+     "start_time": "2020-11-12T08:18:52.411660Z"
+    }
+   },
    "outputs": [
     {
      "data": {
@@ -791,7 +930,7 @@
        ")"
       ]
      },
-     "execution_count": null,
+     "execution_count": 29,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -803,8 +942,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 30,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:18:53.209143Z",
+     "start_time": "2020-11-12T08:18:53.152654Z"
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -825,8 +969,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 31,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:18:53.789204Z",
+     "start_time": "2020-11-12T08:18:53.759211Z"
+    }
+   },
    "outputs": [
     {
      "data": {
@@ -860,7 +1009,7 @@
        ")"
       ]
      },
-     "execution_count": null,
+     "execution_count": 31,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -872,8 +1021,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 32,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:18:54.642171Z",
+     "start_time": "2020-11-12T08:18:54.527000Z"
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -894,13 +1048,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 35,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:19:11.269093Z",
+     "start_time": "2020-11-12T08:19:11.261967Z"
+    }
+   },
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "NewResBlock(\n",
+       "NewResBlockTwist(\n",
        "  (convs): Sequential(\n",
        "    (conv_0): ConvLayer(\n",
        "      (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
@@ -919,21 +1078,26 @@
        ")"
       ]
      },
-     "execution_count": null,
+     "execution_count": 35,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
     "#hide\n",
-    "bl = NewResBlock(1,64,64,sa=True)\n",
+    "bl = NewResBlockTwist(1,64,64,sa=True)\n",
     "bl"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 36,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:19:16.610982Z",
+     "start_time": "2020-11-12T08:19:16.548733Z"
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -954,13 +1118,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 37,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:19:22.398767Z",
+     "start_time": "2020-11-12T08:19:22.390688Z"
+    }
+   },
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "NewResBlock(\n",
+       "NewResBlockTwist(\n",
        "  (reduce): AvgPool2d(kernel_size=2, stride=2, padding=0)\n",
        "  (convs): Sequential(\n",
        "    (conv_0): ConvLayer(\n",
@@ -968,8 +1137,11 @@
        "      (act_fn): LeakyReLU(negative_slope=0.01)\n",
        "      (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
        "    )\n",
-       "    (conv_1): ConvLayer(\n",
-       "      (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
+       "    (conv_1_twist): ConvLayerTwist(\n",
+       "      (conv): ConvTwist(\n",
+       "        twist: False, permute: False, same: True, groups: 32\n",
+       "        (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)\n",
+       "      )\n",
        "      (act_fn): LeakyReLU(negative_slope=0.01)\n",
        "      (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
        "    )\n",
@@ -986,21 +1158,26 @@
        ")"
       ]
      },
-     "execution_count": null,
+     "execution_count": 37,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
     "#hide\n",
-    "bl = NewResBlock(4,64,128,stride=2,act_fn=nn.LeakyReLU(), bn_1st=False)\n",
+    "bl = NewResBlockTwist(4,64,128,stride=2,act_fn=nn.LeakyReLU(), bn_1st=False)\n",
     "bl"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 38,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:19:23.881803Z",
+     "start_time": "2020-11-12T08:19:23.733092Z"
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -1028,39 +1205,49 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 39,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:19:26.850428Z",
+     "start_time": "2020-11-12T08:19:26.812087Z"
+    }
+   },
    "outputs": [],
    "source": [
     "#export\n",
     "class ResBlockTwist(nn.Module):\n",
     "    def __init__(self, expansion, ni, nh, stride=1,\n",
     "                 conv_layer=ConvLayer, act_fn=act_fn, zero_bn=True, bn_1st=True,\n",
-    "                 pool=nn.AvgPool2d(2, ceil_mode=True), sa=False,sym=False, **kvargs):\n",
+    "                 pool=nn.AvgPool2d(2, ceil_mode=True), sa=False, sym=False, **kvargs):\n",
     "        super().__init__()\n",
-    "        nf,ni = nh*expansion,ni*expansion\n",
-    "#         conv_layer = ConvLayerTwist\n",
-    "        layers  = [(f\"conv_0\", conv_layer(ni, nh, 3, stride=stride, act_fn=act_fn, bn_1st=bn_1st)),\n",
-    "                   (f\"conv_1\", conv_layer(nh, nf, 3, zero_bn=zero_bn, act=False, bn_1st=bn_1st))\n",
-    "        ] if expansion == 1 else [\n",
-    "                   (f\"conv_0\",conv_layer(ni, nh, 1, act_fn=act_fn, bn_1st=bn_1st)),\n",
-    "#                    (f\"conv_1\",conv_layer(nh, nh, 3, stride=stride, act_fn=act_fn, bn_1st=bn_1st)),\n",
-    "                   (f\"conv_1_twist\",ConvLayerTwist(nh, nh, 3, stride=stride, act_fn=act_fn, bn_1st=bn_1st)),\n",
-    "                   (f\"conv_2\",conv_layer(nh, nf, 1, zero_bn=zero_bn, act=False, bn_1st=bn_1st))\n",
+    "        nf, ni = nh * expansion, ni * expansion\n",
+    "        layers = [(\"conv_0\", conv_layer(ni, nh, 3, stride=stride, act_fn=act_fn, bn_1st=bn_1st)),\n",
+    "                  (\"conv_1\", conv_layer(nh, nf, 3, zero_bn=zero_bn, act=False, bn_1st=bn_1st))\n",
+    "                  ] if expansion == 1 else [\n",
+    "                      (\"conv_0\", conv_layer(ni, nh, 1, act_fn=act_fn, bn_1st=bn_1st)),\n",
+    "                      (\"conv_1_twist\", ConvLayerTwist(nh, nh, 3, stride=stride, act_fn=act_fn, bn_1st=bn_1st)),\n",
+    "                      (\"conv_2\", conv_layer(nh, nf, 1, zero_bn=zero_bn, act=False, bn_1st=bn_1st))\n",
     "        ]\n",
-    "        if sa: layers.append(('sa', SimpleSelfAttention(nf,ks=1,sym=sym)))\n",
+    "        if sa:\n",
+    "            layers.append(('sa', SimpleSelfAttention(nf, ks=1, sym=sym)))\n",
     "        self.convs = nn.Sequential(OrderedDict(layers))\n",
-    "        self.pool = noop if stride==1 else pool\n",
-    "        self.idconv = noop if ni==nf else conv_layer(ni, nf, 1, act=False)\n",
-    "        self.act_fn =act_fn\n",
+    "        self.pool = noop if stride == 1 else pool\n",
+    "        self.idconv = noop if ni == nf else conv_layer(ni, nf, 1, act=False)\n",
+    "        self.act_fn = act_fn\n",
     "\n",
-    "    def forward(self, x): return self.act_fn(self.convs(x) + self.idconv(self.pool(x)))"
+    "    def forward(self, x):\n",
+    "        return self.act_fn(self.convs(x) + self.idconv(self.pool(x)))"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 40,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:19:27.745755Z",
+     "start_time": "2020-11-12T08:19:27.716441Z"
+    }
+   },
    "outputs": [
     {
      "data": {
@@ -1092,7 +1279,7 @@
        ")"
       ]
      },
-     "execution_count": null,
+     "execution_count": 40,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1104,8 +1291,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 41,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:19:28.942123Z",
+     "start_time": "2020-11-12T08:19:28.748609Z"
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -1126,8 +1318,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 42,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:19:30.285784Z",
+     "start_time": "2020-11-12T08:19:30.276885Z"
+    }
+   },
    "outputs": [
     {
      "data": {
@@ -1157,7 +1354,7 @@
        ")"
       ]
      },
-     "execution_count": null,
+     "execution_count": 42,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1169,8 +1366,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 43,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:19:31.429871Z",
+     "start_time": "2020-11-12T08:19:31.365409Z"
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -1191,8 +1393,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 44,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:19:32.205820Z",
+     "start_time": "2020-11-12T08:19:32.195805Z"
+    }
+   },
    "outputs": [
     {
      "data": {
@@ -1226,7 +1433,7 @@
        ")"
       ]
      },
-     "execution_count": null,
+     "execution_count": 44,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1238,8 +1445,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 45,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:19:33.144161Z",
+     "start_time": "2020-11-12T08:19:33.052565Z"
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -1267,8 +1479,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 48,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:20:05.027890Z",
+     "start_time": "2020-11-12T08:20:05.019776Z"
+    }
+   },
    "outputs": [],
    "source": [
     "model  = Net(expansion=4, layers=[3,4,6,3])"
@@ -1276,8 +1493,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 49,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:20:06.785550Z",
+     "start_time": "2020-11-12T08:20:06.783529Z"
+    }
+   },
    "outputs": [],
    "source": [
     "model.block = NewResBlockTwist"
@@ -1285,8 +1507,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 50,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:20:07.860796Z",
+     "start_time": "2020-11-12T08:20:07.744871Z"
+    }
+   },
    "outputs": [
     {
      "data": {
@@ -1674,7 +1901,7 @@
        ")"
       ]
      },
-     "execution_count": null,
+     "execution_count": 50,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1685,8 +1912,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 51,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:20:09.929905Z",
+     "start_time": "2020-11-12T08:20:09.814360Z"
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -1707,8 +1939,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 52,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:20:11.144544Z",
+     "start_time": "2020-11-12T08:20:10.869947Z"
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -1729,8 +1966,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 53,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:20:12.613607Z",
+     "start_time": "2020-11-12T08:20:12.608992Z"
+    }
+   },
    "outputs": [],
    "source": [
     "model.block = ResBlockTwist"
@@ -1738,8 +1980,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 54,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:20:14.002066Z",
+     "start_time": "2020-11-12T08:20:13.711883Z"
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -1760,8 +2007,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 55,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:20:15.284464Z",
+     "start_time": "2020-11-12T08:20:14.960297Z"
+    }
+   },
    "outputs": [],
    "source": [
     "m = model()"
@@ -1769,8 +2021,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 56,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:20:16.044885Z",
+     "start_time": "2020-11-12T08:20:16.017202Z"
+    }
+   },
    "outputs": [
     {
      "data": {
@@ -2184,7 +2441,7 @@
        ")"
       ]
      },
-     "execution_count": null,
+     "execution_count": 56,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2196,8 +2453,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 57,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:20:18.292655Z",
+     "start_time": "2020-11-12T08:20:17.639599Z"
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -2218,8 +2480,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 58,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:20:18.474912Z",
+     "start_time": "2020-11-12T08:20:18.462739Z"
+    }
+   },
    "outputs": [
     {
      "data": {
@@ -2244,7 +2511,7 @@
        ")"
       ]
      },
-     "execution_count": null,
+     "execution_count": 58,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2255,8 +2522,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 59,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:20:19.475034Z",
+     "start_time": "2020-11-12T08:20:19.470735Z"
+    }
+   },
    "outputs": [
     {
      "data": {
@@ -2268,7 +2540,7 @@
        ")"
       ]
      },
-     "execution_count": null,
+     "execution_count": 59,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2279,8 +2551,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 60,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:20:20.251290Z",
+     "start_time": "2020-11-12T08:20:20.245098Z"
+    }
+   },
    "outputs": [
     {
      "data": {
@@ -2359,7 +2636,7 @@
        ")"
       ]
      },
-     "execution_count": null,
+     "execution_count": 60,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2370,8 +2647,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 61,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:20:21.109147Z",
+     "start_time": "2020-11-12T08:20:21.094037Z"
+    }
+   },
    "outputs": [
     {
      "data": {
@@ -2473,7 +2755,7 @@
        ")"
       ]
      },
-     "execution_count": null,
+     "execution_count": 61,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2484,8 +2766,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 62,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:20:21.965972Z",
+     "start_time": "2020-11-12T08:20:21.949886Z"
+    }
+   },
    "outputs": [
     {
      "data": {
@@ -2631,7 +2918,7 @@
        ")"
       ]
      },
-     "execution_count": null,
+     "execution_count": 62,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2642,8 +2929,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 63,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:20:22.541277Z",
+     "start_time": "2020-11-12T08:20:22.535216Z"
+    }
+   },
    "outputs": [
     {
      "data": {
@@ -2723,7 +3015,7 @@
        ")"
       ]
      },
-     "execution_count": null,
+     "execution_count": 63,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2743,19 +3035,26 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 64,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:20:41.163167Z",
+     "start_time": "2020-11-12T08:20:40.672551Z"
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Converted 00_constructor.ipynb.\n",
+      "Converted 00_Net.ipynb.\n",
+      "Converted 01_activations.ipynb.\n",
       "Converted 01_layers.ipynb.\n",
-      "Converted 02_resnet.ipynb.\n",
-      "Converted 03_xresnet.ipynb.\n",
-      "Converted 04_Net.ipynb.\n",
+      "Converted 03_MXResNet.ipynb.\n",
+      "Converted 04_YaResNet.ipynb.\n",
       "Converted 05_Twist.ipynb.\n",
+      "Converted 10_base_constructor.ipynb.\n",
+      "Converted 11_xresnet.ipynb.\n",
       "Converted index.ipynb.\n"
      ]
     }
@@ -2779,6 +3078,31 @@
    "display_name": "Python 3",
    "language": "python",
    "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.7"
+  },
+  "toc": {
+   "base_numbering": 1,
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "Table of Contents",
+   "title_sidebar": "Contents",
+   "toc_cell": false,
+   "toc_position": {},
+   "toc_section_display": true,
+   "toc_window_display": true
   }
  },
  "nbformat": 4,
diff --git a/nbs/10_base_constructor.ipynb b/nbs/10_base_constructor.ipynb
index 011659f..eaa3e79 100644
--- a/nbs/10_base_constructor.ipynb
+++ b/nbs/10_base_constructor.ipynb
@@ -58,7 +58,7 @@
     "#export\n",
     "import torch.nn as nn\n",
     "from collections import OrderedDict\n",
-    "from model_constructor.layers import ConvLayer, Noop, Flatten\n"
+    "from model_constructor.layers import ConvLayer, Noop, Flatten"
    ]
   },
   {
@@ -73,7 +73,7 @@
    "outputs": [],
    "source": [
     "#export\n",
-    "act_fn = nn.ReLU(inplace=True)\n"
+    "act_fn = nn.ReLU(inplace=True)"
    ]
   },
   {
@@ -118,7 +118,7 @@
     "        super().__init__(OrderedDict(stem))\n",
     "\n",
     "    def extra_repr(self):\n",
-    "        return f\"sizes: {self.sizes}\"\n"
+    "        return f\"sizes: {self.sizes}\""
    ]
   },
   {
@@ -390,7 +390,7 @@
     "#export\n",
     "def DownsampleBlock(conv_layer, ni, nf, ks, stride, act=False, **kwargs):\n",
     "    '''Base downsample for res-like blocks'''\n",
-    "    return conv_layer(ni, nf, ks, stride, act, **kwargs)\n"
+    "    return conv_layer(ni, nf, ks, stride, act, **kwargs)"
    ]
   },
   {
@@ -425,7 +425,7 @@
     "        out = self.conv(x)\n",
     "        if self.downsample:\n",
     "            identity = self.downsample(x)\n",
-    "        return self.act_conn(self.merge(out + identity))\n"
+    "        return self.act_conn(self.merge(out + identity))"
    ]
   },
   {
@@ -454,7 +454,7 @@
     "            ('conv_1', conv_layer(nh, nh, stride=stride,   act_fn=act_fn, **kwargs)),   # noqa: E241\n",
     "            ('conv_2', conv_layer(nh, nf, ks=1, zero_bn=zero_bn, act=False, act_fn=act_fn, **kwargs))]))\n",
     "        if self.downsample:\n",
-    "            self.downsample = downsample_block(conv_layer, ni, nf, ks=1, \n",
+    "            self.downsample = downsample_block(conv_layer, ni, nf, ks=1,\n",
     "                                               stride=stride, act=False, act_fn=act_fn, **kwargs)\n",
     "        self.merge = Noop()\n",
     "        self.act_conn = act_fn\n",
@@ -464,7 +464,7 @@
     "        out = self.conv(x)\n",
     "        if self.downsample:\n",
     "            identity = self.downsample(x)\n",
-    "        return self.act_conn(self.merge(out + identity))\n"
+    "        return self.act_conn(self.merge(out + identity))"
    ]
   },
   {
@@ -494,7 +494,7 @@
     "             for i in range(blocks)]))\n",
     "\n",
     "    def extra_repr(self):\n",
-    "        return f'from {self.ni * self.expansion} to {self.nf}, {self.blocks} blocks, expansion {self.expansion}.'\n"
+    "        return f'from {self.ni * self.expansion} to {self.nf}, {self.blocks} blocks, expansion {self.expansion}.'"
    ]
   },
   {
@@ -531,7 +531,7 @@
     "                                           sa=sa if i == 0 else False,\n",
     "                                           **kwargs))\n",
     "                  for i in range(num_layers)]\n",
-    "        super().__init__(OrderedDict(layers))\n"
+    "        super().__init__(OrderedDict(layers))"
    ]
   },
   {
@@ -1015,7 +1015,7 @@
     "            [('pool', nn.AdaptiveAvgPool2d((1, 1))),\n",
     "             ('flat', Flatten()),\n",
     "             ('fc', nn.Linear(ni, nf)),\n",
-    "             ]))\n"
+    "             ]))"
    ]
   },
   {
@@ -1083,7 +1083,7 @@
     "    '''Init model'''\n",
     "    for m in model.modules():\n",
     "        if isinstance(m, (nn.Conv2d, nn.Linear)):\n",
-    "            nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity=nonlinearity)\n"
+    "            nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity=nonlinearity)"
    ]
   },
   {
@@ -1115,7 +1115,7 @@
     "                           sa=sa, **kwargs)),\n",
     "             ('head', head(body_out * expansion, num_classes, **kwargs))\n",
     "             ]))\n",
-    "        self.init_model(self)\n"
+    "        self.init_model(self)"
    ]
   },
   {
diff --git a/nbs/11_xresnet.ipynb b/nbs/11_xresnet.ipynb
index 6a1a5cd..8cdccbb 100644
--- a/nbs/11_xresnet.ipynb
+++ b/nbs/11_xresnet.ipynb
@@ -1,16 +1,5 @@
 {
  "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "hide_input": true
-   },
-   "outputs": [],
-   "source": [
-    "from nbdev import *"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -40,35 +29,51 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 7,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:27:00.230515Z",
+     "start_time": "2020-11-12T08:27:00.225163Z"
+    }
+   },
    "outputs": [],
    "source": [
     "#hide\n",
     "from nbdev.showdoc import *\n",
-    "from fastcore.test import *"
+    "from fastcore.test import *\n",
+    "import torch\n",
+    "from model_constructor.base_constructor import Body"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 2,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:26:33.911400Z",
+     "start_time": "2020-11-12T08:26:33.908197Z"
+    }
+   },
    "outputs": [],
    "source": [
     "#export\n",
     "import torch.nn as nn\n",
-    "import torch\n",
     "from collections import OrderedDict"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 3,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:26:35.974938Z",
+     "start_time": "2020-11-12T08:26:35.963223Z"
+    }
+   },
    "outputs": [],
    "source": [
     "#export\n",
-    "from model_constructor.base_constructor import *\n",
+    "from model_constructor.base_constructor import Net\n",
     "from model_constructor.layers import ConvLayer, Noop, act_fn"
    ]
   },
@@ -81,8 +86,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 4,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:26:37.307393Z",
+     "start_time": "2020-11-12T08:26:37.299905Z"
+    }
+   },
    "outputs": [],
    "source": [
     "#export\n",
@@ -91,16 +101,22 @@
     "    def __init__(self, conv_layer, ni, nf, stride, act,\n",
     "                 pool=nn.AvgPool2d(2, ceil_mode=True), pool_1st=True,\n",
     "                 **kwargs):\n",
-    "        layers  = [] if stride==1 else [('pool', pool)]\n",
-    "        layers += [] if ni==nf else [('idconv', conv_layer(ni, nf, 1, act=act, **kwargs))]\n",
-    "        if not pool_1st: layers.reverse()\n",
+    "        layers = [] if stride == 1 else [('pool', pool)]\n",
+    "        layers += [] if ni == nf else [('idconv', conv_layer(ni, nf, 1, act=act, **kwargs))]\n",
+    "        if not pool_1st:\n",
+    "            layers.reverse()\n",
     "        super().__init__(OrderedDict(layers))"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 5,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:26:38.091773Z",
+     "start_time": "2020-11-12T08:26:38.083440Z"
+    }
+   },
    "outputs": [],
    "source": [
     "#export\n",
@@ -108,26 +124,33 @@
     "    def __init__(self, ni, nh, expansion=1, stride=1, zero_bn=True,\n",
     "                 conv_layer=ConvLayer, act_fn=act_fn, **kwargs):\n",
     "        super().__init__()\n",
-    "        nf,ni = nh*expansion,ni*expansion\n",
-    "        layers  = [('conv_0', conv_layer(ni, nh, 3, stride=stride, act_fn=act_fn, **kwargs)),\n",
-    "                   ('conv_1', conv_layer(nh, nf, 3, zero_bn=zero_bn, act=False, act_fn=act_fn, **kwargs))\n",
-    "        ] if expansion == 1 else [\n",
-    "                   ('conv_0', conv_layer(ni, nh, 1, act_fn=act_fn, **kwargs)),\n",
-    "                   ('conv_1', conv_layer(nh, nh, 3, stride=stride, act_fn=act_fn, **kwargs)),\n",
-    "                   ('conv_2', conv_layer(nh, nf, 1, zero_bn=zero_bn, act=False, act_fn=act_fn, **kwargs))\n",
+    "        nf, ni = nh * expansion, ni * expansion\n",
+    "        layers = [('conv_0', conv_layer(ni, nh, 3, stride=stride, act_fn=act_fn, **kwargs)),\n",
+    "                  ('conv_1', conv_layer(nh, nf, 3, zero_bn=zero_bn, act=False, act_fn=act_fn, **kwargs))\n",
+    "                  ] if expansion == 1 else [\n",
+    "                      ('conv_0', conv_layer(ni, nh, 1, act_fn=act_fn, **kwargs)),\n",
+    "                      ('conv_1', conv_layer(nh, nh, 3, stride=stride, act_fn=act_fn, **kwargs)),\n",
+    "                      ('conv_2', conv_layer(nh, nf, 1, zero_bn=zero_bn, act=False, act_fn=act_fn, **kwargs))\n",
     "        ]\n",
     "        self.convs = nn.Sequential(OrderedDict(layers))\n",
-    "        self.identity = DownsampleLayer(conv_layer, ni, nf, stride, act=False, act_fn=act_fn, **kwargs) if ni!=nf or stride==2 else Noop()\n",
-    "        self.merge = Noop() # us it to visualize in repr residual connection\n",
+    "        self.identity = DownsampleLayer(conv_layer, ni, nf, stride,\n",
+    "                                        act=False, act_fn=act_fn, **kwargs) if ni != nf or stride == 2 else Noop()\n",
+    "        self.merge = Noop()\n",
     "        self.act_fn = act_fn\n",
     "\n",
-    "    def forward(self, x): return self.act_fn(self.merge(self.convs(x) + self.identity(x)))"
+    "    def forward(self, x):\n",
+    "        return self.act_fn(self.merge(self.convs(x) + self.identity(x)))"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 8,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:27:04.422376Z",
+     "start_time": "2020-11-12T08:27:04.311568Z"
+    }
+   },
    "outputs": [
     {
      "data": {
@@ -339,7 +362,7 @@
        ")"
       ]
      },
-     "execution_count": null,
+     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -351,8 +374,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 9,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:27:05.677644Z",
+     "start_time": "2020-11-12T08:27:05.230761Z"
+    }
+   },
    "outputs": [
     {
      "data": {
@@ -360,7 +388,7 @@
        "torch.Size([16, 2048, 4, 4])"
       ]
      },
-     "execution_count": null,
+     "execution_count": 9,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -380,26 +408,38 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 10,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:27:07.200891Z",
+     "start_time": "2020-11-12T08:27:07.193160Z"
+    }
+   },
    "outputs": [],
    "source": [
     "#export\n",
     "def xresnet18(**kwargs):\n",
     "    \"\"\"Constructs a xresnet-18 model. \"\"\"\n",
-    "    return Net(stem_sizes=[32,32], block=XResBlock, blocks=[2, 2, 2, 2], expansion=1, **kwargs)\n",
+    "    return Net(stem_sizes=[32, 32], block=XResBlock, blocks=[2, 2, 2, 2], expansion=1, **kwargs)\n",
+    "\n",
     "def xresnet34(**kwargs):\n",
     "    \"\"\"Constructs axresnet-34 model. \"\"\"\n",
-    "    return Net(stem_sizes=[32,32], block=XResBlock, blocks=[3, 4, 6, 3], expansion=1, **kwargs)\n",
+    "    return Net(stem_sizes=[32, 32], block=XResBlock, blocks=[3, 4, 6, 3], expansion=1, **kwargs)\n",
+    "\n",
     "def xresnet50(**kwargs):\n",
     "    \"\"\"Constructs axresnet-34 model. \"\"\"\n",
-    "    return Net(stem_sizes=[32,32],block=XResBlock, blocks=[3, 4, 6, 3], expansion=4, **kwargs)"
+    "    return Net(stem_sizes=[32, 32], block=XResBlock, blocks=[3, 4, 6, 3], expansion=4, **kwargs)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 11,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:27:08.398983Z",
+     "start_time": "2020-11-12T08:27:08.223017Z"
+    }
+   },
    "outputs": [
     {
      "data": {
@@ -592,7 +632,7 @@
        ")"
       ]
      },
-     "execution_count": null,
+     "execution_count": 11,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -603,8 +643,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 12,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:27:09.474372Z",
+     "start_time": "2020-11-12T08:27:09.151532Z"
+    }
+   },
    "outputs": [
     {
      "data": {
@@ -925,7 +970,7 @@
        ")"
       ]
      },
-     "execution_count": null,
+     "execution_count": 12,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -936,8 +981,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 13,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:27:10.416343Z",
+     "start_time": "2020-11-12T08:27:10.054562Z"
+    }
+   },
    "outputs": [],
    "source": [
     "model = xresnet50()"
@@ -945,8 +995,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 14,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:27:10.986719Z",
+     "start_time": "2020-11-12T08:27:10.980613Z"
+    }
+   },
    "outputs": [
     {
      "data": {
@@ -972,7 +1027,7 @@
        ")"
       ]
      },
-     "execution_count": null,
+     "execution_count": 14,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -983,8 +1038,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 15,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:27:11.658112Z",
+     "start_time": "2020-11-12T08:27:11.651507Z"
+    }
+   },
    "outputs": [
     {
      "data": {
@@ -1364,7 +1424,7 @@
        ")"
       ]
      },
-     "execution_count": null,
+     "execution_count": 15,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1375,8 +1435,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 16,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:27:12.307670Z",
+     "start_time": "2020-11-12T08:27:12.303289Z"
+    }
+   },
    "outputs": [
     {
      "data": {
@@ -1388,7 +1453,7 @@
        ")"
       ]
      },
-     "execution_count": null,
+     "execution_count": 16,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1399,8 +1464,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 17,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:27:13.380250Z",
+     "start_time": "2020-11-12T08:27:12.965825Z"
+    }
+   },
    "outputs": [
     {
      "data": {
@@ -1408,7 +1478,7 @@
        "torch.Size([8, 1000])"
       ]
      },
-     "execution_count": null,
+     "execution_count": 17,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1430,18 +1500,26 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 18,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-12T08:27:25.906210Z",
+     "start_time": "2020-11-12T08:27:25.404060Z"
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Converted 00_constructor.ipynb.\n",
+      "Converted 00_Net.ipynb.\n",
+      "Converted 01_activations.ipynb.\n",
       "Converted 01_layers.ipynb.\n",
-      "Converted 02_resnet.ipynb.\n",
-      "Converted 03_xresnet.ipynb.\n",
-      "Converted 80_test_layers.ipynb.\n",
+      "Converted 03_MXResNet.ipynb.\n",
+      "Converted 04_YaResNet.ipynb.\n",
+      "Converted 05_Twist.ipynb.\n",
+      "Converted 10_base_constructor.ipynb.\n",
+      "Converted 11_xresnet.ipynb.\n",
       "Converted index.ipynb.\n"
      ]
     }
@@ -1465,6 +1543,31 @@
    "display_name": "Python 3",
    "language": "python",
    "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.7"
+  },
+  "toc": {
+   "base_numbering": 1,
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "Table of Contents",
+   "title_sidebar": "Contents",
+   "toc_cell": false,
+   "toc_position": {},
+   "toc_section_display": true,
+   "toc_window_display": true
   }
  },
  "nbformat": 4,
diff --git a/setup.cfg b/setup.cfg
index 2fb20a2..000ba67 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -6,4 +6,4 @@ doctests = True
 verbose = 2
 # https://pep8.readthedocs.io/en/latest/intro.html#error-codes
 format = pylint
-# ignore =
\ No newline at end of file
+ignore = W292, E302, E305
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 9f3776e..7849a78 100644
--- a/setup.py
+++ b/setup.py
@@ -1,7 +1,8 @@
+# flake8: noqa E126
 from packaging.version import parse
 from configparser import ConfigParser
 import setuptools
-assert parse(setuptools.__version__)>=parse('36.2')
+assert parse(setuptools.__version__) >= parse('36.2')
 
 # note: all settings are in settings.ini; edit there, not here
 config = ConfigParser(delimiters=['='])
@@ -10,37 +11,37 @@
 
 cfg_keys = 'version description keywords author author_email'.split()
 expected = cfg_keys + "lib_name user branch license status min_python audience language".split()
-for o in expected: assert o in cfg, "missing expected setting: {}".format(o)
-setup_cfg = {o:cfg[o] for o in cfg_keys}
+for o in expected:
+    assert o in cfg, "missing expected setting: {}".format(o)
+setup_cfg = {o: cfg[o] for o in cfg_keys}
 
 licenses = {
     'apache2': ('Apache Software License 2.0','OSI Approved :: Apache Software License'),
 }
-statuses = [ '1 - Planning', '2 - Pre-Alpha', '3 - Alpha',
-    '4 - Beta', '5 - Production/Stable', '6 - Mature', '7 - Inactive' ]
+statuses = ['1 - Planning', '2 - Pre-Alpha', '3 - Alpha',
+            '4 - Beta', '5 - Production/Stable', '6 - Mature', '7 - Inactive']
 py_versions = '2.0 2.1 2.2 2.3 2.4 2.5 2.6 2.7 3.0 3.1 3.2 3.3 3.4 3.5 3.6 3.7 3.8'.split()
 
-requirements = cfg.get('requirements','').split()
+requirements = cfg.get('requirements', '').split()
 lic = licenses[cfg['license']]
 min_python = cfg['min_python']
 
 setuptools.setup(
-    name = cfg['lib_name'],
-    license = lic[0],
-    classifiers = [
-        'Development Status :: ' + statuses[int(cfg['status'])],
-        'Intended Audience :: ' + cfg['audience'].title(),
-        'License :: ' + lic[1],
-        'Natural Language :: ' + cfg['language'].title(),
-    ] + ['Programming Language :: Python :: '+o for o in py_versions[py_versions.index(min_python):]],
-    url = 'https://github.com/{}/{}'.format(cfg['user'],cfg['lib_name']),
-    packages = setuptools.find_packages(),
-    include_package_data = True,
-    install_requires = requirements,
-    python_requires  = '>=' + cfg['min_python'],
-    long_description = open('README.md').read(),
-    long_description_content_type = 'text/markdown',
-    zip_safe = False,
-    entry_points = { 'console_scripts': cfg.get('console_scripts','').split() },
-    **setup_cfg)
-
+                 name = cfg['lib_name'],
+                 license = lic[0],
+                 classifiers = [
+                    'Development Status :: ' + statuses[int(cfg['status'])],
+                    'Intended Audience :: ' + cfg['audience'].title(),
+                    'License :: ' + lic[1],
+                    'Natural Language :: ' + cfg['language'].title(),
+                ] + ['Programming Language :: Python :: '+o for o in py_versions[py_versions.index(min_python):]],
+                url = 'https://github.com/{}/{}'.format(cfg['user'],cfg['lib_name']),
+                packages = setuptools.find_packages(),
+                include_package_data = True,
+                install_requires = requirements,
+                python_requires  = '>=' + cfg['min_python'],
+                long_description = open('README.md').read(),
+                long_description_content_type = 'text/markdown',
+                zip_safe = False,
+                entry_points = { 'console_scripts': cfg.get('console_scripts','').split() },
+                **setup_cfg)