From 7885af4bf13cb0249a806b2cb6e13fac3a846863 Mon Sep 17 00:00:00 2001 From: ayasyrev Date: Wed, 11 Nov 2020 13:46:52 +0000 Subject: [PATCH 1/3] cleared magic --- model_constructor/activations.py | 1 - model_constructor/base_constructor.py | 2 - model_constructor/mxresnet.py | 1 + model_constructor/net.py | 1 - nbs/00_Net.ipynb | 989 +++++++------------------- nbs/01_activations.ipynb | 195 ++--- nbs/01_layers.ipynb | 286 ++------ nbs/03_MXResNet.ipynb | 24 +- nbs/04_YaResNet.ipynb | 50 +- nbs/05_Twist.ipynb | 54 +- nbs/10_base_constructor.ipynb | 463 ++++-------- nbs/11_xresnet.ipynb | 213 ++---- nbs/index.ipynb | 53 +- 13 files changed, 653 insertions(+), 1679 deletions(-) diff --git a/model_constructor/activations.py b/model_constructor/activations.py index c9a847b..a8cf539 100644 --- a/model_constructor/activations.py +++ b/model_constructor/activations.py @@ -155,7 +155,6 @@ def __init__(self, inplace: bool = False): def forward(self, x): return MishJitAutoFn.apply(x) - # Cell def hard_swish(x, inplace: bool = False): """Hard swish activation function""" diff --git a/model_constructor/base_constructor.py b/model_constructor/base_constructor.py index 325fcfd..600f2b4 100644 --- a/model_constructor/base_constructor.py +++ b/model_constructor/base_constructor.py @@ -60,7 +60,6 @@ def forward(self, x): identity = self.downsample(x) return self.act_conn(self.merge(out + identity)) - # Cell class Bottleneck(nn.Module): '''Bottlneck block for resnet models''' @@ -123,7 +122,6 @@ def __init__(self, block, for i in range(num_layers)] super().__init__(OrderedDict(layers)) - # Cell class Head(nn.Sequential): '''base head''' diff --git a/model_constructor/mxresnet.py b/model_constructor/mxresnet.py index def2a05..6ed334d 100644 --- a/model_constructor/mxresnet.py +++ b/model_constructor/mxresnet.py @@ -5,6 +5,7 @@ # Cell import torch.nn as nn import sys, torch +import os from functools import partial from collections import OrderedDict from .layers import * diff --git a/model_constructor/net.py b/model_constructor/net.py index 34d59e5..69022be 100644 --- a/model_constructor/net.py +++ b/model_constructor/net.py @@ -17,7 +17,6 @@ def init_cnn(m): if isinstance(m, (nn.Conv2d,nn.Linear)): nn.init.kaiming_normal_(m.weight) for l in m.children(): init_cnn(l) - # Cell class ResBlock(nn.Module): '''Resnet block''' diff --git a/nbs/00_Net.ipynb b/nbs/00_Net.ipynb index bae84d2..60df77b 100644 --- a/nbs/00_Net.ipynb +++ b/nbs/00_Net.ipynb @@ -2,12 +2,8 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:05:10.106995Z", - "start_time": "2020-09-15T15:05:09.505889Z" - }, "hide_input": true }, "outputs": [], @@ -17,13 +13,8 @@ }, { "cell_type": "code", - "execution_count": 2, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:05:10.480134Z", - "start_time": "2020-09-15T15:05:10.473435Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -35,21 +26,16 @@ } ], "source": [ - "%nbdev_default_export net" + "#default_exp net" ] }, { "cell_type": "code", - "execution_count": 3, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:05:12.777083Z", - "start_time": "2020-09-15T15:05:12.773799Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "%nbdev_hide" + "#hide" ] }, { @@ -63,32 +49,22 @@ }, { "cell_type": "code", - "execution_count": 4, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:05:14.588153Z", - "start_time": "2020-09-15T15:05:14.577005Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "%nbdev_hide\n", + "#hide\n", "# from nbdev.showdoc import *\n", "from fastcore.test import *" ] }, { "cell_type": "code", - "execution_count": 71, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:07:51.267131Z", - "start_time": "2020-09-15T15:07:51.259892Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "%nbdev_export\n", + "#export\n", "import torch.nn as nn\n", "import sys, torch\n", "from functools import partial\n", @@ -98,13 +74,8 @@ }, { "cell_type": "code", - "execution_count": 105, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T16:36:04.485843Z", - "start_time": "2020-09-15T16:36:04.476302Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "from dataclasses import dataclass, asdict" @@ -119,22 +90,17 @@ }, { "cell_type": "code", - "execution_count": 10, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:06:31.875738Z", - "start_time": "2020-09-15T15:06:31.851804Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "%nbdev_export\n", + "#export\n", "act_fn = nn.ReLU(inplace=True)\n", "\n", "def init_cnn(m):\n", " if getattr(m, 'bias', None) is not None: nn.init.constant_(m.bias, 0)\n", " if isinstance(m, (nn.Conv2d,nn.Linear)): nn.init.kaiming_normal_(m.weight)\n", - " for l in m.children(): init_cnn(l)\n" + " for l in m.children(): init_cnn(l)" ] }, { @@ -146,20 +112,15 @@ }, { "cell_type": "code", - "execution_count": 15, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:06:51.253988Z", - "start_time": "2020-09-15T15:06:51.234702Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "%nbdev_export\n", + "#export\n", "class ResBlock(nn.Module):\n", " '''Resnet block'''\n", " se_block = SEBlock\n", - " def __init__(self, expansion, ni, nh, stride=1, \n", + " def __init__(self, expansion, ni, nh, stride=1,\n", " conv_layer=ConvLayer, act_fn=act_fn, zero_bn=True, bn_1st=True,\n", " pool=nn.AvgPool2d(2, ceil_mode=True), sa=False, sym=False, se=False, se_reduction=16,\n", " groups=1, dw=False):\n", @@ -171,7 +132,7 @@ " (f\"conv_1\", conv_layer(nh, nf, 3, zero_bn=zero_bn, act=False, bn_1st=bn_1st))\n", " ] if expansion == 1 else [\n", " (f\"conv_0\",conv_layer(ni, nh, 1, act_fn=act_fn, bn_1st=bn_1st)),\n", - " (f\"conv_1\",conv_layer(nh, nh, 3, stride=stride, act_fn=act_fn, bn_1st=bn_1st, \n", + " (f\"conv_1\",conv_layer(nh, nh, 3, stride=stride, act_fn=act_fn, bn_1st=bn_1st,\n", " groups= nh if dw else groups)),\n", " (f\"conv_2\",conv_layer(nh, nf, 1, zero_bn=zero_bn, act=False, bn_1st=bn_1st))\n", " ]\n", @@ -187,13 +148,8 @@ }, { "cell_type": "code", - "execution_count": 16, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:06:51.661257Z", - "start_time": "2020-09-15T15:06:51.640107Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -217,7 +173,7 @@ ")" ] }, - "execution_count": 16, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -228,13 +184,8 @@ }, { "cell_type": "code", - "execution_count": 17, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:06:52.454965Z", - "start_time": "2020-09-15T15:06:52.445849Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -263,7 +214,7 @@ ")" ] }, - "execution_count": 17, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -274,13 +225,8 @@ }, { "cell_type": "code", - "execution_count": 18, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:06:53.430030Z", - "start_time": "2020-09-15T15:06:53.403478Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -309,7 +255,7 @@ ")" ] }, - "execution_count": 18, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -320,13 +266,8 @@ }, { "cell_type": "code", - "execution_count": 19, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:06:54.202712Z", - "start_time": "2020-09-15T15:06:54.175803Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -352,7 +293,7 @@ ")" ] }, - "execution_count": 19, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -363,13 +304,8 @@ }, { "cell_type": "code", - "execution_count": 20, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:06:54.867910Z", - "start_time": "2020-09-15T15:06:54.841626Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -407,7 +343,7 @@ ")" ] }, - "execution_count": 20, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -433,21 +369,16 @@ }, { "cell_type": "code", - "execution_count": 21, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:06:59.872031Z", - "start_time": "2020-09-15T15:06:59.824675Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "%nbdev_export\n", + "#export\n", "# NewResBlock now is YaResBlock - Yet Another ResNet Block! It is now at model_constructor.yaresnet.\n", "class NewResBlock(nn.Module):\n", " '''YaResnet block'''\n", " se_block = SEBlock\n", - " def __init__(self, expansion, ni, nh, stride=1, \n", + " def __init__(self, expansion, ni, nh, stride=1,\n", " conv_layer=ConvLayer, act_fn=act_fn, zero_bn=True, bn_1st=True,\n", " pool=nn.AvgPool2d(2, ceil_mode=True), sa=False,sym=False, se=False, se_reduction=16,\n", " groups=1, dw=False):\n", @@ -460,7 +391,7 @@ " (f\"conv_1\", conv_layer(nh, nf, 3, zero_bn=zero_bn, act=False, bn_1st=bn_1st))\n", " ] if expansion == 1 else [\n", " (f\"conv_0\",conv_layer(ni, nh, 1, act_fn=act_fn, bn_1st=bn_1st)),\n", - " (f\"conv_1\",conv_layer(nh, nh, 3, stride=1, act_fn=act_fn, bn_1st=bn_1st, \n", + " (f\"conv_1\",conv_layer(nh, nh, 3, stride=1, act_fn=act_fn, bn_1st=bn_1st,\n", " groups= nh if dw else groups)), # stride 1 !!!\n", " (f\"conv_2\",conv_layer(nh, nf, 1, zero_bn=zero_bn, act=False, bn_1st=bn_1st))\n", " ]\n", @@ -470,20 +401,15 @@ " self.idconv = noop if ni==nf else conv_layer(ni, nf, 1, act=False)\n", " self.merge =act_fn\n", "\n", - " def forward(self, x): \n", + " def forward(self, x):\n", " o = self.reduce(x)\n", " return self.merge(self.convs(o) + self.idconv(o))" ] }, { "cell_type": "code", - "execution_count": 22, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:07:00.617334Z", - "start_time": "2020-09-15T15:07:00.605327Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -513,7 +439,7 @@ ")" ] }, - "execution_count": 22, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -524,16 +450,11 @@ }, { "cell_type": "code", - "execution_count": 23, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:07:01.385225Z", - "start_time": "2020-09-15T15:07:01.375982Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "%nbdev_hide\n", + "#hide\n", "# %nbdev_export\n", "# from model_constructor.yaresnet import YaResBlock\n", "# NewResBlock = YaResBlock" @@ -541,13 +462,8 @@ }, { "cell_type": "code", - "execution_count": 24, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:07:02.101788Z", - "start_time": "2020-09-15T15:07:02.071627Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -571,26 +487,21 @@ ")" ] }, - "execution_count": 24, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "bl = NewResBlock(1,64,64,sa=True)\n", "bl" ] }, { "cell_type": "code", - "execution_count": 25, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:07:02.748148Z", - "start_time": "2020-09-15T15:07:02.694969Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -601,7 +512,7 @@ } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "bs_test = 16\n", "xb = torch.randn(bs_test, 64, 32, 32)\n", "y = bl(xb)\n", @@ -611,13 +522,8 @@ }, { "cell_type": "code", - "execution_count": 26, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:07:03.330137Z", - "start_time": "2020-09-15T15:07:03.297402Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -648,26 +554,21 @@ ")" ] }, - "execution_count": 26, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "bl = NewResBlock(4,64,128,stride=2,act_fn=nn.LeakyReLU(), bn_1st=False)\n", "bl" ] }, { "cell_type": "code", - "execution_count": 27, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:07:03.913925Z", - "start_time": "2020-09-15T15:07:03.810639Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -678,7 +579,7 @@ } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "bs_test = 16\n", "xb = torch.randn(bs_test, 256, 32, 32)\n", "y = bl(xb)\n", @@ -688,13 +589,8 @@ }, { "cell_type": "code", - "execution_count": 28, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:07:04.317012Z", - "start_time": "2020-09-15T15:07:04.284583Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -725,26 +621,21 @@ ")" ] }, - "execution_count": 28, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "bl = NewResBlock(4,64,128,stride=2,act_fn=nn.LeakyReLU(), bn_1st=False, groups=4)\n", "bl" ] }, { "cell_type": "code", - "execution_count": 29, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:07:04.932651Z", - "start_time": "2020-09-15T15:07:04.800290Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -755,7 +646,7 @@ } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "bs_test = 16\n", "xb = torch.randn(bs_test, 256, 32, 32)\n", "y = bl(xb)\n", @@ -772,20 +663,15 @@ }, { "cell_type": "code", - "execution_count": 30, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:07:05.800721Z", - "start_time": "2020-09-15T15:07:05.768220Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "%nbdev_export\n", + "#export\n", "def _make_stem(self):\n", - " stem = [(f\"conv_{i}\", self.conv_layer(self.stem_sizes[i], self.stem_sizes[i+1], \n", - "# stride=2 if i==0 else 1, \n", - " stride=2 if i==self.stem_stride_on else 1, \n", + " stem = [(f\"conv_{i}\", self.conv_layer(self.stem_sizes[i], self.stem_sizes[i+1],\n", + "# stride=2 if i==0 else 1,\n", + " stride=2 if i==self.stem_stride_on else 1,\n", " bn_layer=(not self.stem_bn_end) if i==(len(self.stem_sizes)-2) else True,\n", " act_fn=self.act_fn, bn_1st=self.bn_1st))\n", " for i in range(len(self.stem_sizes)-1)]\n", @@ -796,19 +682,14 @@ }, { "cell_type": "code", - "execution_count": 31, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:07:06.313839Z", - "start_time": "2020-09-15T15:07:06.305739Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "%nbdev_export\n", + "#export\n", "def _make_layer(self,expansion,ni,nf,blocks,stride,sa):\n", " return nn.Sequential(OrderedDict(\n", - " [(f\"bl_{i}\", self.block(expansion, ni if i==0 else nf, nf, \n", + " [(f\"bl_{i}\", self.block(expansion, ni if i==0 else nf, nf,\n", " stride if i==0 else 1, sa=sa if i==blocks-1 else False,\n", " conv_layer=self.conv_layer, act_fn=self.act_fn, pool=self.pool,\n", " zero_bn=self.zero_bn, bn_1st=self.bn_1st, groups=self.groups, dw=self.dw, se=self.se))\n", @@ -817,19 +698,14 @@ }, { "cell_type": "code", - "execution_count": 32, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:07:06.827551Z", - "start_time": "2020-09-15T15:07:06.803610Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "%nbdev_export\n", + "#export\n", "def _make_body(self):\n", - " blocks = [(f\"l_{i}\", self._make_layer(self,self.expansion, \n", - " self.block_sizes[i], self.block_sizes[i+1], l, \n", + " blocks = [(f\"l_{i}\", self._make_layer(self,self.expansion,\n", + " self.block_sizes[i], self.block_sizes[i+1], l,\n", " 1 if i==0 else 2, self.sa if i==0 else False))\n", " for i,l in enumerate(self.layers)]\n", " return nn.Sequential(OrderedDict(blocks))" @@ -837,16 +713,11 @@ }, { "cell_type": "code", - "execution_count": 33, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:07:07.429989Z", - "start_time": "2020-09-15T15:07:07.409485Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "%nbdev_export\n", + "#export\n", "def _make_head(self):\n", " head = [('pool', nn.AdaptiveAvgPool2d(1)),\n", " ('flat', Flatten()),\n", @@ -863,33 +734,28 @@ }, { "cell_type": "code", - "execution_count": 34, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:07:08.290093Z", - "start_time": "2020-09-15T15:07:08.254969Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "%nbdev_export\n", + "#export\n", "class Net():\n", " \"\"\"Model constructor. As default - xresnet18\"\"\"\n", - " def __init__(self, name='Net', c_in=3, c_out=1000, \n", + " def __init__(self, name='Net', c_in=3, c_out=1000,\n", " block=ResBlock, conv_layer = ConvLayer,\n", - " block_sizes = [64,128,256,512], layers=[2,2,2,2], \n", + " block_sizes = [64,128,256,512], layers=[2,2,2,2],\n", " norm = nn.BatchNorm2d,\n", " act_fn=nn.ReLU(inplace=True),\n", " pool = nn.AvgPool2d(2, ceil_mode=True),\n", - " expansion=1, groups = 1, dw=False, \n", + " expansion=1, groups = 1, dw=False,\n", " sa=False, se=False, se_reduction=16,\n", " bn_1st = True,\n", " zero_bn=True,\n", " stem_stride_on = 0,\n", - " stem_sizes = [32,32,64], \n", + " stem_sizes = [32,32,64],\n", " stem_pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1),\n", " stem_bn_end = False,\n", - " \n", + "\n", " _init_cnn = init_cnn,\n", " _make_stem = _make_stem,\n", " _make_layer = _make_layer,\n", @@ -903,10 +769,10 @@ " self.__dict__ = params\n", " self._block_sizes = params['block_sizes']\n", " if self.stem_sizes[0] != self.c_in: self.stem_sizes = [self.c_in] + self.stem_sizes\n", - " \n", + "\n", " @property\n", " def block_sizes(self):\n", - " return [self.stem_sizes[-1]//self.expansion] + self._block_sizes +[256]*(len(self.layers)-4) \n", + " return [self.stem_sizes[-1]//self.expansion] + self._block_sizes +[256]*(len(self.layers)-4)\n", "\n", " @property\n", " def stem(self):\n", @@ -919,7 +785,7 @@ " @property\n", " def body(self):\n", " return self._make_body(self)\n", - " \n", + "\n", " def __call__(self):\n", " model = nn.Sequential(OrderedDict([\n", " ('stem', self.stem),\n", @@ -941,13 +807,8 @@ }, { "cell_type": "code", - "execution_count": 35, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:07:08.930672Z", - "start_time": "2020-09-15T15:07:08.919167Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -961,7 +822,7 @@ " layers: [2, 2, 2, 2]" ] }, - "execution_count": 35, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -973,13 +834,8 @@ }, { "cell_type": "code", - "execution_count": 36, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:07:09.471080Z", - "start_time": "2020-09-15T15:07:09.460079Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -987,7 +843,7 @@ "[64, 128, 256, 512]" ] }, - "execution_count": 36, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -998,13 +854,8 @@ }, { "cell_type": "code", - "execution_count": 37, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:07:10.527262Z", - "start_time": "2020-09-15T15:07:10.514092Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -1012,7 +863,7 @@ "[64, 64, 128, 256, 512]" ] }, - "execution_count": 37, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -1023,13 +874,8 @@ }, { "cell_type": "code", - "execution_count": 38, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:07:10.911200Z", - "start_time": "2020-09-15T15:07:10.907783Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -1043,7 +889,7 @@ " layers: [2, 2, 2, 2]" ] }, - "execution_count": 38, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -1055,13 +901,8 @@ }, { "cell_type": "code", - "execution_count": 39, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:07:11.302065Z", - "start_time": "2020-09-15T15:07:11.290035Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -1069,7 +910,7 @@ "[64, 128, 256, 512, 1024]" ] }, - "execution_count": 39, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -1080,13 +921,8 @@ }, { "cell_type": "code", - "execution_count": 40, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:07:11.709098Z", - "start_time": "2020-09-15T15:07:11.702269Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -1100,26 +936,21 @@ " layers: [2, 2, 2, 2]" ] }, - "execution_count": 40, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "model = Net(stem_sizes=[3,32,32,64])\n", "model" ] }, { "cell_type": "code", - "execution_count": 41, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:07:12.090940Z", - "start_time": "2020-09-15T15:07:12.080199Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -1127,7 +958,7 @@ "[64, 64, 128, 256, 512]" ] }, - "execution_count": 41, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -1138,13 +969,8 @@ }, { "cell_type": "code", - "execution_count": 42, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:07:12.469528Z", - "start_time": "2020-09-15T15:07:12.467407Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "model = Net()" @@ -1152,29 +978,19 @@ }, { "cell_type": "code", - "execution_count": 43, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:07:12.830071Z", - "start_time": "2020-09-15T15:07:12.821423Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "%nbdev_hide\n", + "#hide\n", "# model.block_sizes = [64, 128, 256, 512] # wrong way --> use _block_sizes\n", "# model" ] }, { "cell_type": "code", - "execution_count": 44, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:07:13.183685Z", - "start_time": "2020-09-15T15:07:13.178314Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -1199,25 +1015,20 @@ ")" ] }, - "execution_count": 44, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "%nbdev_collapse_output\n", + "#collapse_output\n", "model.stem" ] }, { "cell_type": "code", - "execution_count": 45, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:07:13.578197Z", - "start_time": "2020-09-15T15:07:13.555787Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -1242,26 +1053,21 @@ ")" ] }, - "execution_count": 45, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "%nbdev_collapse_output\n", + "#collapse_output\n", "model.stem_stride_on = 1\n", "model.stem" ] }, { "cell_type": "code", - "execution_count": 46, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:07:14.101338Z", - "start_time": "2020-09-15T15:07:13.951890Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -1272,7 +1078,7 @@ } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "bs_test = 16\n", "xb = torch.randn(bs_test, 3, 128, 128)\n", "y = model.stem(xb)\n", @@ -1282,13 +1088,8 @@ }, { "cell_type": "code", - "execution_count": 47, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:07:14.269326Z", - "start_time": "2020-09-15T15:07:14.266922Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "model.bn_1st = False" @@ -1296,13 +1097,8 @@ }, { "cell_type": "code", - "execution_count": 48, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:07:14.782404Z", - "start_time": "2020-09-15T15:07:14.776999Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "model.act_fn =nn.LeakyReLU(inplace=True)" @@ -1310,13 +1106,8 @@ }, { "cell_type": "code", - "execution_count": 49, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:07:15.183253Z", - "start_time": "2020-09-15T15:07:15.175224Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "model.sa = True\n", @@ -1325,13 +1116,8 @@ }, { "cell_type": "code", - "execution_count": 50, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:07:15.677001Z", - "start_time": "2020-09-15T15:07:15.583127Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -1391,25 +1177,20 @@ ")" ] }, - "execution_count": 50, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "%nbdev_collapse_output\n", + "#collapse_output\n", "model.body.l_1" ] }, { "cell_type": "code", - "execution_count": 51, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:07:16.386300Z", - "start_time": "2020-09-15T15:07:16.216646Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -1420,7 +1201,7 @@ } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "bs_test = 16\n", "xb = torch.randn(bs_test, 64, 32, 32)\n", "y = model.body.l_0(xb)\n", @@ -1430,13 +1211,8 @@ }, { "cell_type": "code", - "execution_count": 52, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:07:16.733132Z", - "start_time": "2020-09-15T15:07:16.625466Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -1494,25 +1270,20 @@ ")" ] }, - "execution_count": 52, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "model.body.l_0" ] }, { "cell_type": "code", - "execution_count": 53, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:07:17.338636Z", - "start_time": "2020-09-15T15:07:17.199753Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -1523,7 +1294,7 @@ } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "bs_test = 16\n", "xb = torch.randn(bs_test, 64, 32, 32)\n", "y = model.body.l_0(xb)\n", @@ -1533,13 +1304,8 @@ }, { "cell_type": "code", - "execution_count": 54, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:07:17.785128Z", - "start_time": "2020-09-15T15:07:17.708421Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -1611,13 +1377,13 @@ ")" ] }, - "execution_count": 54, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "model.groups = 4\n", "model.expansion = 4\n", "model.body.l_0" @@ -1625,13 +1391,8 @@ }, { "cell_type": "code", - "execution_count": 55, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:07:18.351990Z", - "start_time": "2020-09-15T15:07:18.067907Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -1642,7 +1403,7 @@ } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "bs_test = 16\n", "xb = torch.randn(bs_test, 64, 32, 32)\n", "y = model.body.l_0(xb)\n", @@ -1652,13 +1413,8 @@ }, { "cell_type": "code", - "execution_count": 56, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:07:18.417339Z", - "start_time": "2020-09-15T15:07:18.354230Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -1730,13 +1486,13 @@ ")" ] }, - "execution_count": 56, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "model.groups = 1\n", "model.dw = True\n", "model.expansion = 4\n", @@ -1745,13 +1501,8 @@ }, { "cell_type": "code", - "execution_count": 57, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:07:19.018917Z", - "start_time": "2020-09-15T15:07:18.662599Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -1762,7 +1513,7 @@ } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "bs_test = 16\n", "xb = torch.randn(bs_test, 64, 32, 32)\n", "y = model.body.l_0(xb)\n", @@ -1772,16 +1523,11 @@ }, { "cell_type": "code", - "execution_count": 58, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:07:19.025610Z", - "start_time": "2020-09-15T15:07:19.021179Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "%nbdev_hide\n", + "#hide\n", "model.groups = 1\n", "model.dw = 0\n", "model.expansion = 1" @@ -1789,13 +1535,8 @@ }, { "cell_type": "code", - "execution_count": 59, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:07:19.270731Z", - "start_time": "2020-09-15T15:07:19.261966Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "model.block = NewResBlock" @@ -1803,13 +1544,8 @@ }, { "cell_type": "code", - "execution_count": 60, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:07:19.994546Z", - "start_time": "2020-09-15T15:07:19.851080Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -1820,7 +1556,7 @@ } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "bs_test = 16\n", "xb = torch.randn(bs_test, 64, 32, 32)\n", "y = model.body.l_1.bl_0(xb)\n", @@ -1830,13 +1566,8 @@ }, { "cell_type": "code", - "execution_count": 61, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:07:20.532951Z", - "start_time": "2020-09-15T15:07:20.436949Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -1871,25 +1602,20 @@ ")" ] }, - "execution_count": 61, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "model.body.l_1.bl_0" ] }, { "cell_type": "code", - "execution_count": 62, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:07:20.889742Z", - "start_time": "2020-09-15T15:07:20.886369Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "# model = Net(expansion=4)\n", @@ -1898,28 +1624,18 @@ }, { "cell_type": "code", - "execution_count": 63, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:07:21.361772Z", - "start_time": "2020-09-15T15:07:21.358970Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "%nbdev_hide\n", + "#hide\n", "model.stem_bn_end = True" ] }, { "cell_type": "code", - "execution_count": 65, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:07:27.697499Z", - "start_time": "2020-09-15T15:07:27.684261Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -1944,25 +1660,20 @@ ")" ] }, - "execution_count": 65, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "model.stem" ] }, { "cell_type": "code", - "execution_count": 66, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:07:28.043163Z", - "start_time": "2020-09-15T15:07:27.957348Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -2002,25 +1713,20 @@ ")" ] }, - "execution_count": 66, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "model.body.l_1.bl_0" ] }, { "cell_type": "code", - "execution_count": 67, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:07:28.430335Z", - "start_time": "2020-09-15T15:07:28.199731Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -2031,7 +1737,7 @@ } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "bs_test = 16\n", "xb = torch.randn(bs_test, 256, 32, 32)\n", "y = model.body.l_1.bl_0(xb)\n", @@ -2041,43 +1747,28 @@ }, { "cell_type": "code", - "execution_count": 68, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:07:28.435805Z", - "start_time": "2020-09-15T15:07:28.432826Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "%nbdev_hide\n", + "#hide\n", "model.stem_bn_end = False" ] }, { "cell_type": "code", - "execution_count": 72, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:08:01.044096Z", - "start_time": "2020-09-15T15:08:00.746890Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "%nbdev_collapse_input\n", + "#collapse_input\n", "m = model()" ] }, { "cell_type": "code", - "execution_count": 73, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:08:01.318234Z", - "start_time": "2020-09-15T15:08:01.291990Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -2366,25 +2057,20 @@ ")" ] }, - "execution_count": 73, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "m" ] }, { "cell_type": "code", - "execution_count": 74, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:08:02.174918Z", - "start_time": "2020-09-15T15:08:02.147042Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -2647,25 +2333,20 @@ ")" ] }, - "execution_count": 74, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "%nbdev_collapse_output\n", + "#collapse_output\n", "m.body" ] }, { "cell_type": "code", - "execution_count": 75, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:08:03.486811Z", - "start_time": "2020-09-15T15:08:02.911398Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -2676,7 +2357,7 @@ } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "bs_test = 16\n", "xb = torch.randn(bs_test, 3, 128, 128)\n", "y = m(xb)\n", @@ -2686,13 +2367,8 @@ }, { "cell_type": "code", - "execution_count": 76, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:08:03.501054Z", - "start_time": "2020-09-15T15:08:03.499056Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "## xresnet constructor\n", @@ -2712,13 +2388,8 @@ }, { "cell_type": "code", - "execution_count": 77, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:08:04.282047Z", - "start_time": "2020-09-15T15:08:04.273710Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "# m = net50(c_out=10)" @@ -2726,13 +2397,8 @@ }, { "cell_type": "code", - "execution_count": 78, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:08:04.693899Z", - "start_time": "2020-09-15T15:08:04.691824Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "# m, m.c_out" @@ -2740,16 +2406,11 @@ }, { "cell_type": "code", - "execution_count": 79, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:08:05.884571Z", - "start_time": "2020-09-15T15:08:05.871787Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "%nbdev_hide\n", + "#hide\n", "xresnet34_parameters = {\n", " 'name': 'xresnet34',\n", " 'expansion': 1,\n", @@ -2765,13 +2426,8 @@ }, { "cell_type": "code", - "execution_count": 80, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:08:06.495561Z", - "start_time": "2020-09-15T15:08:06.489809Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -2785,26 +2441,21 @@ " layers: [3, 4, 6, 3]" ] }, - "execution_count": 80, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "model34 = Net(**xresnet34_parameters)\n", "model34" ] }, { "cell_type": "code", - "execution_count": 81, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:08:07.115853Z", - "start_time": "2020-09-15T15:08:07.098473Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -2818,42 +2469,32 @@ " layers: [3, 4, 6, 3]" ] }, - "execution_count": 81, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "model50 = Net(**xresnet50_parameters)\n", "model50" ] }, { "cell_type": "code", - "execution_count": 82, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:08:07.761469Z", - "start_time": "2020-09-15T15:08:07.745240Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "%nbdev_export\n", + "#export\n", "xresnet34 = partial(Net, name='xresnet34', expansion=1, layers = [3, 4, 6, 3])\n", "xresnet50 = partial(Net, name='xresnet34', expansion=4, layers = [3, 4, 6, 3])" ] }, { "cell_type": "code", - "execution_count": 83, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:08:08.451245Z", - "start_time": "2020-09-15T15:08:08.435714Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -2867,26 +2508,21 @@ " layers: [3, 4, 6, 3]" ] }, - "execution_count": 83, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "model34 = xresnet34()\n", "model34" ] }, { "cell_type": "code", - "execution_count": 84, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:08:09.050303Z", - "start_time": "2020-09-15T15:08:09.032704Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -2900,26 +2536,21 @@ " layers: [3, 4, 6, 3]" ] }, - "execution_count": 84, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "model50 = xresnet50()\n", "model50" ] }, { "cell_type": "code", - "execution_count": 100, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:23:29.673176Z", - "start_time": "2020-09-15T15:23:29.668886Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "class XResNet34(Net):\n", @@ -2930,14 +2561,8 @@ }, { "cell_type": "code", - "execution_count": 104, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T15:24:03.920353Z", - "start_time": "2020-09-15T15:24:03.912929Z" - }, - "scrolled": true - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -2951,7 +2576,7 @@ " layers: [3, 4, 6, 3]" ] }, - "execution_count": 104, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -2963,13 +2588,8 @@ }, { "cell_type": "code", - "execution_count": 112, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T16:41:41.886520Z", - "start_time": "2020-09-15T16:41:41.884324Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "from dataclasses import field" @@ -2977,13 +2597,8 @@ }, { "cell_type": "code", - "execution_count": 119, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T16:43:31.023793Z", - "start_time": "2020-09-15T16:43:31.018109Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "@dataclass\n", @@ -2994,13 +2609,8 @@ }, { "cell_type": "code", - "execution_count": 120, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T16:43:32.399958Z", - "start_time": "2020-09-15T16:43:32.394016Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -3008,7 +2618,7 @@ "{'name': 'xresnet34', 'layers': [3, 4, 6, 3]}" ] }, - "execution_count": 120, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -3019,13 +2629,8 @@ }, { "cell_type": "code", - "execution_count": 126, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T16:45:55.696580Z", - "start_time": "2020-09-15T16:45:55.692231Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "@dataclass\n", @@ -3036,13 +2641,8 @@ }, { "cell_type": "code", - "execution_count": 127, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T16:45:56.207660Z", - "start_time": "2020-09-15T16:45:56.203225Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -3050,7 +2650,7 @@ "Xres50(name='xresnet34', layers=[3, 4, 6, 3], expansion=4)" ] }, - "execution_count": 127, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -3061,13 +2661,8 @@ }, { "cell_type": "code", - "execution_count": 128, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T16:46:01.313728Z", - "start_time": "2020-09-15T16:46:01.307542Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -3075,7 +2670,7 @@ "{'name': 'xresnet34', 'layers': [3, 4, 6, 3], 'expansion': 4}" ] }, - "execution_count": 128, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -3086,13 +2681,8 @@ }, { "cell_type": "code", - "execution_count": 129, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T16:46:27.728315Z", - "start_time": "2020-09-15T16:46:27.722310Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -3106,7 +2696,7 @@ " layers: [2, 2, 2, 2]" ] }, - "execution_count": 129, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -3127,13 +2717,8 @@ }, { "cell_type": "code", - "execution_count": 1, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-19T07:31:33.882175Z", - "start_time": "2020-09-19T07:31:33.877122Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "name": "stderr", @@ -3144,7 +2729,7 @@ } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "from nbdev.export import *\n", "notebook2script()" ] @@ -3162,36 +2747,6 @@ "display_name": "Python 3", "language": "python", "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.7" - }, - "toc": { - "base_numbering": 1, - "nav_menu": {}, - "number_sections": true, - "sideBar": true, - "skip_h1_title": false, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": false, - "toc_position": { - "height": "calc(100% - 180px)", - "left": "10px", - "top": "150px", - "width": "247.323px" - }, - "toc_section_display": true, - "toc_window_display": true } }, "nbformat": 4, diff --git a/nbs/01_activations.ipynb b/nbs/01_activations.ipynb index 38a440b..067b62d 100644 --- a/nbs/01_activations.ipynb +++ b/nbs/01_activations.ipynb @@ -2,12 +2,8 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": { - "ExecuteTime": { - "end_time": "2020-09-19T07:39:27.761520Z", - "start_time": "2020-09-19T07:39:27.123512Z" - }, "hide_input": true }, "outputs": [], @@ -17,13 +13,8 @@ }, { "cell_type": "code", - "execution_count": 2, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-19T07:39:27.766214Z", - "start_time": "2020-09-19T07:39:27.762895Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -35,7 +26,7 @@ } ], "source": [ - "%nbdev_default_export activations" + "#default_exp activations" ] }, { @@ -66,16 +57,11 @@ }, { "cell_type": "code", - "execution_count": 3, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-19T07:39:30.846492Z", - "start_time": "2020-09-19T07:39:30.492725Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "%nbdev_export\n", + "#export\n", "# forked from https://github.com/rwightman/pytorch-image-models/timm/models/layers/activations.py\n", "import torch\n", "from torch import nn as nn\n", @@ -91,16 +77,11 @@ }, { "cell_type": "code", - "execution_count": 4, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-19T07:39:52.285184Z", - "start_time": "2020-09-19T07:39:52.258095Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "%nbdev_export\n", + "#export\n", "def swish(x, inplace: bool = False):\n", " \"\"\"Swish - Described in: https://arxiv.org/abs/1710.05941\"\"\"\n", " return x.mul_(x.sigmoid()) if inplace else x.mul(x.sigmoid())\n", @@ -124,16 +105,11 @@ }, { "cell_type": "code", - "execution_count": 33, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-19T07:59:58.190958Z", - "start_time": "2020-09-19T07:59:58.181329Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "%nbdev_export\n", + "#export\n", "@torch.jit.script\n", "def swish_jit(x, inplace: bool = False):\n", " \"\"\"Jit version of Swish.\n", @@ -142,7 +118,7 @@ " return x.mul(x.sigmoid())\n", "\n", "class SwishJit(nn.Module):\n", - " \"\"\"Jit version of Swish. \n", + " \"\"\"Jit version of Swish.\n", " Swish - Described in: https://arxiv.org/abs/1710.05941\"\"\"\n", " def __init__(self, inplace: bool = False):\n", " super(SwishJit, self).__init__()\n", @@ -160,16 +136,11 @@ }, { "cell_type": "code", - "execution_count": 35, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-19T08:06:09.243621Z", - "start_time": "2020-09-19T08:06:09.204597Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "%nbdev_export\n", + "#export\n", "\n", "@torch.jit.script\n", "def swish_jit_bwd(x, grad_output):\n", @@ -215,16 +186,11 @@ }, { "cell_type": "code", - "execution_count": 15, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-19T07:48:01.044576Z", - "start_time": "2020-09-19T07:48:01.036045Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "%nbdev_export\n", + "#export\n", "def mish(x, inplace: bool = False):\n", " \"\"\"Mish: A Self Regularized Non-Monotonic Neural Activation Function - https://arxiv.org/abs/1908.08681\n", " NOTE: I don't have a working inplace variant\n", @@ -251,26 +217,21 @@ }, { "cell_type": "code", - "execution_count": 32, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-19T07:59:31.493260Z", - "start_time": "2020-09-19T07:59:31.464218Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "%nbdev_export\n", + "#export\n", "@torch.jit.script\n", "def mish_jit(x, _inplace: bool = False):\n", - " \"\"\"Jit version of Mish. \n", + " \"\"\"Jit version of Mish.\n", " Mish: A Self Regularized Non-Monotonic Neural Activation Function - https://arxiv.org/abs/1908.08681\n", " \"\"\"\n", " return x.mul(F.softplus(x).tanh())\n", "\n", "class MishJit(nn.Module):\n", " def __init__(self, inplace: bool = False):\n", - " \"\"\"Jit version of Mish. \n", + " \"\"\"Jit version of Mish.\n", " Mish: A Self Regularized Non-Monotonic Neural Activation Function - https://arxiv.org/abs/1908.08681\"\"\"\n", " super(MishJit, self).__init__()\n", "\n", @@ -287,16 +248,11 @@ }, { "cell_type": "code", - "execution_count": 40, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-19T08:12:49.922152Z", - "start_time": "2020-09-19T08:12:49.903635Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "%nbdev_export\n", + "#export\n", "\n", "@torch.jit.script\n", "def mish_jit_fwd(x):\n", @@ -336,7 +292,7 @@ " super(MishMe, self).__init__()\n", "\n", " def forward(self, x):\n", - " return MishJitAutoFn.apply(x)\n" + " return MishJitAutoFn.apply(x)" ] }, { @@ -355,16 +311,11 @@ }, { "cell_type": "code", - "execution_count": 7, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-19T07:42:36.529178Z", - "start_time": "2020-09-19T07:42:36.519813Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "%nbdev_export\n", + "#export\n", "def hard_swish(x, inplace: bool = False):\n", " \"\"\"Hard swish activation function\"\"\"\n", " inner = F.relu6(x + 3.).div_(6.)\n", @@ -394,7 +345,7 @@ "metadata": {}, "outputs": [], "source": [ - "%nbdev_export\n", + "#export\n", "@torch.jit.script\n", "def hard_swish_jit(x, inplace: bool = False):\n", " # return x * (F.relu6(x + 3.) / 6)\n", @@ -418,16 +369,11 @@ }, { "cell_type": "code", - "execution_count": 38, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-19T08:12:29.059919Z", - "start_time": "2020-09-19T08:12:29.044200Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "%nbdev_export\n", + "#export\n", "\n", "@torch.jit.script\n", "def hard_swish_jit_fwd(x):\n", @@ -477,16 +423,11 @@ }, { "cell_type": "code", - "execution_count": 25, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-19T07:50:17.702309Z", - "start_time": "2020-09-19T07:50:17.690530Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "%nbdev_export\n", + "#export\n", "def hard_mish(x, inplace: bool = False):\n", " \"\"\" Hard Mish\n", " Experimental, based on notes by Mish author Diganta Misra at\n", @@ -532,16 +473,11 @@ }, { "cell_type": "code", - "execution_count": 34, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-19T08:01:27.116469Z", - "start_time": "2020-09-19T08:01:27.106399Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "%nbdev_export\n", + "#export\n", "@torch.jit.script\n", "def hard_mish_jit(x, inplace: bool = False):\n", " \"\"\" Hard Mish\n", @@ -576,7 +512,7 @@ "metadata": {}, "outputs": [], "source": [ - "%nbdev_export\n", + "#export\n", "@torch.jit.script\n", "def hard_mish_jit_fwd(x):\n", " return 0.5 * x * (x + 2).clamp(min=0, max=2)\n", @@ -634,7 +570,7 @@ "metadata": {}, "outputs": [], "source": [ - "%nbdev_hide\n", + "#hide\n", "act_fn = Swish(inplace=True)" ] }, @@ -644,7 +580,7 @@ "metadata": {}, "outputs": [], "source": [ - "%nbdev_hide\n", + "#hide\n", "act_fn = Mish(inplace=True)" ] }, @@ -659,13 +595,8 @@ }, { "cell_type": "code", - "execution_count": 41, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-19T08:13:09.610451Z", - "start_time": "2020-09-19T08:13:09.045551Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -684,7 +615,7 @@ } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "from nbdev.export import *\n", "notebook2script()" ] @@ -702,36 +633,6 @@ "display_name": "Python 3", "language": "python", "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.7" - }, - "toc": { - "base_numbering": 1, - "nav_menu": {}, - "number_sections": true, - "sideBar": true, - "skip_h1_title": false, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": false, - "toc_position": { - "height": "calc(100% - 180px)", - "left": "10px", - "top": "150px", - "width": "341.333px" - }, - "toc_section_display": true, - "toc_window_display": true } }, "nbformat": 4, diff --git a/nbs/01_layers.ipynb b/nbs/01_layers.ipynb index 4308e2a..fd04abc 100644 --- a/nbs/01_layers.ipynb +++ b/nbs/01_layers.ipynb @@ -2,12 +2,8 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T09:02:28.613591Z", - "start_time": "2020-09-15T09:02:28.034529Z" - }, "hide_input": true }, "outputs": [], @@ -17,13 +13,8 @@ }, { "cell_type": "code", - "execution_count": 2, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T09:02:28.724960Z", - "start_time": "2020-09-15T09:02:28.712914Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -35,7 +26,7 @@ } ], "source": [ - "%nbdev_default_export layers" + "#default_exp layers" ] }, { @@ -49,35 +40,25 @@ }, { "cell_type": "code", - "execution_count": 3, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T09:02:29.280009Z", - "start_time": "2020-09-15T09:02:29.274755Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "%nbdev_hide\n", + "#hide\n", "# from nbdev.showdoc import *\n", "from fastcore.test import *" ] }, { "cell_type": "code", - "execution_count": 4, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T09:02:29.881698Z", - "start_time": "2020-09-15T09:02:29.560082Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "%nbdev_export\n", + "#export\n", "import torch.nn as nn\n", "import torch\n", - "from torch.nn.utils import spectral_norm # weight_norm, \n", + "from torch.nn.utils import spectral_norm # weight_norm,\n", "from collections import OrderedDict" ] }, @@ -90,16 +71,11 @@ }, { "cell_type": "code", - "execution_count": 6, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T09:02:59.401503Z", - "start_time": "2020-09-15T09:02:59.394107Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "%nbdev_export\n", + "#export\n", "class Flatten(nn.Module):\n", " '''flat x to vector'''\n", " def __init__(self):\n", @@ -116,23 +92,18 @@ }, { "cell_type": "code", - "execution_count": 7, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T09:03:06.151213Z", - "start_time": "2020-09-15T09:03:06.144829Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "%nbdev_export\n", + "#export\n", "def noop(x): return x\n", "\n", - "class Noop(nn.Module): \n", + "class Noop(nn.Module):\n", " '''Dummy module'''\n", " def __init__(self):\n", " super().__init__()\n", - " \n", + "\n", " def forward(self, x):\n", " return x" ] @@ -146,32 +117,27 @@ }, { "cell_type": "code", - "execution_count": 8, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T09:03:32.218769Z", - "start_time": "2020-09-15T09:03:32.206265Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "%nbdev_export\n", + "#export\n", "act_fn = nn.ReLU(inplace=True)\n", "\n", "class ConvLayer(nn.Sequential):\n", " \"\"\"Basic conv layers block\"\"\"\n", " Conv2d = nn.Conv2d\n", - " def __init__(self, ni, nf, ks=3, stride=1, \n", - " act=True, act_fn=act_fn, \n", - " bn_layer=True, bn_1st=True, zero_bn=False, \n", + " def __init__(self, ni, nf, ks=3, stride=1,\n", + " act=True, act_fn=act_fn,\n", + " bn_layer=True, bn_1st=True, zero_bn=False,\n", " padding=None, bias=False, groups=1, **kwargs):\n", "\n", - " if padding==None: padding = ks//2 \n", + " if padding==None: padding = ks//2\n", " layers = [('conv', self.Conv2d(ni, nf, ks, stride=stride, padding=padding, bias=bias, groups=groups))]\n", " act_bn = [('act_fn', act_fn)] if act else []\n", " if bn_layer:\n", " bn = nn.BatchNorm2d(nf)\n", - " nn.init.constant_(bn.weight, 0. if zero_bn else 1.) \n", + " nn.init.constant_(bn.weight, 0. if zero_bn else 1.)\n", " act_bn += [('bn', bn)]\n", " if bn_1st: act_bn.reverse()\n", " layers += act_bn\n", @@ -180,13 +146,8 @@ }, { "cell_type": "code", - "execution_count": 9, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T09:04:12.561785Z", - "start_time": "2020-09-15T09:04:12.552909Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -198,7 +159,7 @@ ")" ] }, - "execution_count": 9, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -210,13 +171,8 @@ }, { "cell_type": "code", - "execution_count": 10, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T09:04:13.100662Z", - "start_time": "2020-09-15T09:04:13.083970Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -227,7 +183,7 @@ ")" ] }, - "execution_count": 10, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -239,13 +195,8 @@ }, { "cell_type": "code", - "execution_count": 11, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T09:04:13.619415Z", - "start_time": "2020-09-15T09:04:13.614435Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -256,7 +207,7 @@ ")" ] }, - "execution_count": 11, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -268,13 +219,8 @@ }, { "cell_type": "code", - "execution_count": 12, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T09:04:14.115042Z", - "start_time": "2020-09-15T09:04:14.100292Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -286,7 +232,7 @@ ")" ] }, - "execution_count": 12, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -298,13 +244,8 @@ }, { "cell_type": "code", - "execution_count": 13, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T09:04:14.808440Z", - "start_time": "2020-09-15T09:04:14.798904Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -316,7 +257,7 @@ ")" ] }, - "execution_count": 13, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -328,16 +269,11 @@ }, { "cell_type": "code", - "execution_count": 32, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T09:09:38.178529Z", - "start_time": "2020-09-15T09:09:38.161811Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "%nbdev_hide\n", + "#hide\n", "bs = 8\n", "xb = torch.randn(bs, 32, 32, 32)\n", "y = conv_layer(xb)\n", @@ -354,16 +290,11 @@ }, { "cell_type": "code", - "execution_count": 20, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T09:06:35.947787Z", - "start_time": "2020-09-15T09:06:35.932076Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "%nbdev_export\n", + "#export\n", "# SA module from mxresnet at fastai. todo - add persons!!!\n", "\n", "#Unmodified from https://github.com/fastai/fastai/blob/5c51f9eabf76853a89a9bc5741804d2ed4407e49/fastai/layers.py\n", @@ -380,7 +311,7 @@ "class SimpleSelfAttention(nn.Module):\n", " def __init__(self, n_in:int, ks=1, sym=False):#, n_out:int):\n", " super().__init__()\n", - " self.conv = conv1d(n_in, n_in, ks, padding=ks//2, bias=False) \n", + " self.conv = conv1d(n_in, n_in, ks, padding=ks//2, bias=False)\n", " self.gamma = nn.Parameter(torch.tensor([0.]))\n", " self.sym = sym\n", " self.n_in = n_in\n", @@ -390,7 +321,7 @@ " c = self.conv.weight.view(self.n_in,self.n_in)\n", " c = (c + c.t())/2\n", " self.conv.weight = c.view(self.n_in,self.n_in,1)\n", - " size = x.size() \n", + " size = x.size()\n", " x = x.view(*size[:2],-1) # (C,N)\n", " # changed the order of mutiplication to avoid O(N^2) complexity\n", " # (x*xT)*(W*x) instead of (x*(xT*(W*x)))\n", @@ -410,16 +341,11 @@ }, { "cell_type": "code", - "execution_count": 21, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T09:06:43.209459Z", - "start_time": "2020-09-15T09:06:43.195928Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "%nbdev_export\n", + "#export\n", "class SEBlock(nn.Module):\n", " \"se block\"\n", " se_layer = nn.Linear\n", @@ -434,7 +360,7 @@ " ('se_act', self.act_fn),\n", " ('fc_expand', self.se_layer(ch, c, bias=False)),\n", " ('sigmoid', nn.Sigmoid())\n", - " ])) \n", + " ]))\n", "\n", " def forward(self, x):\n", " bs, c, _, _ = x.shape\n", @@ -445,28 +371,18 @@ }, { "cell_type": "code", - "execution_count": 22, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T09:07:02.304137Z", - "start_time": "2020-09-15T09:07:02.299521Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "%nbdev_hide\n", + "#hide\n", "# return torch.mul(x, y.expand_as(x))# " ] }, { "cell_type": "code", - "execution_count": 23, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T09:07:02.934031Z", - "start_time": "2020-09-15T09:07:02.922985Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -482,7 +398,7 @@ ")" ] }, - "execution_count": 23, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -494,13 +410,8 @@ }, { "cell_type": "code", - "execution_count": 24, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T09:07:05.796153Z", - "start_time": "2020-09-15T09:07:05.732761Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -511,7 +422,7 @@ } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "bs = 8\n", "xb = torch.randn(bs, 128, 32, 32)\n", "y = se_block(xb)\n", @@ -528,16 +439,11 @@ }, { "cell_type": "code", - "execution_count": 25, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T09:07:07.917795Z", - "start_time": "2020-09-15T09:07:07.883347Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "%nbdev_export\n", + "#export\n", "class SEBlockConv(nn.Module):\n", " \"se block with conv on excitation\"\n", " se_layer = nn.Conv2d\n", @@ -558,18 +464,13 @@ " def forward(self, x):\n", " y = self.squeeze(x)\n", " y = self.excitation(y)\n", - " return x * y.expand_as(x) " + " return x * y.expand_as(x)" ] }, { "cell_type": "code", - "execution_count": 26, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T09:07:10.529716Z", - "start_time": "2020-09-15T09:07:10.520001Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -585,7 +486,7 @@ ")" ] }, - "execution_count": 26, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -597,13 +498,8 @@ }, { "cell_type": "code", - "execution_count": 27, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T09:07:12.126044Z", - "start_time": "2020-09-15T09:07:12.106701Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -614,7 +510,7 @@ } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "bs = 8\n", "xb = torch.randn(bs, 128, 32, 32)\n", "y = se_block(xb)\n", @@ -633,13 +529,8 @@ }, { "cell_type": "code", - "execution_count": 1, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-19T07:29:31.169362Z", - "start_time": "2020-09-19T07:29:31.163089Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "name": "stderr", @@ -650,7 +541,7 @@ } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "from nbdev.export import *\n", "notebook2script()" ] @@ -675,31 +566,6 @@ "display_name": "Python 3", "language": "python", "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.7" - }, - "toc": { - "base_numbering": 1, - "nav_menu": {}, - "number_sections": true, - "sideBar": true, - "skip_h1_title": false, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": false, - "toc_position": {}, - "toc_section_display": true, - "toc_window_display": true } }, "nbformat": 4, diff --git a/nbs/03_MXResNet.ipynb b/nbs/03_MXResNet.ipynb index c24b1b5..7910e53 100644 --- a/nbs/03_MXResNet.ipynb +++ b/nbs/03_MXResNet.ipynb @@ -26,7 +26,7 @@ } ], "source": [ - "%nbdev_default_export mxresnet" + "#default_exp mxresnet" ] }, { @@ -51,7 +51,7 @@ "metadata": {}, "outputs": [], "source": [ - "%nbdev_hide\n", + "#hide\n", "# from nbdev.showdoc import *\n", "from fastcore.test import *" ] @@ -62,9 +62,10 @@ "metadata": {}, "outputs": [], "source": [ - "%nbdev_export\n", + "#export\n", "import torch.nn as nn\n", "import sys, torch\n", + "import os\n", "from functools import partial\n", "from collections import OrderedDict\n", "from model_constructor.layers import *\n", @@ -165,7 +166,7 @@ } ], "source": [ - "%nbdev_collapse_output\n", + "#collapse_output\n", "mxresnet.stem" ] }, @@ -183,7 +184,7 @@ } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "bs_test = 16\n", "xb = torch.randn(bs_test, 3, 128, 128)\n", "y = mxresnet.stem(xb)\n", @@ -344,7 +345,7 @@ } ], "source": [ - "%nbdev_collapse_output\n", + "#collapse_output\n", "mxresnet.body" ] }, @@ -501,7 +502,7 @@ } ], "source": [ - "%nbdev_collapse_output\n", + "#collapse_output\n", "mxresnet.body" ] }, @@ -526,7 +527,7 @@ } ], "source": [ - "%nbdev_collapse_output\n", + "#collapse_output\n", "mxresnet.head" ] }, @@ -544,7 +545,7 @@ } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "bs_test = 16\n", "xb = torch.randn(bs_test, 3, 128, 128)\n", "y = mxresnet()(xb)\n", @@ -558,8 +559,7 @@ "metadata": {}, "outputs": [], "source": [ - "%nbdev_export\n", - "%nbdev_export_and_show\n", + "#export\n", "mxresnet_parameters = {'stem_sizes': [3, 32, 64, 64], 'act_fn': Mish()}\n", "mxresnet34 = partial(Net, name='MXResnet32', expansion=1, layers=[3, 4, 6, 3], **mxresnet_parameters)\n", "mxresnet50 = partial(Net, name='MXResnet50', expansion=4, layers=[3, 4, 6, 3], **mxresnet_parameters)" @@ -647,7 +647,7 @@ } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "from nbdev.export import *\n", "notebook2script()" ] diff --git a/nbs/04_YaResNet.ipynb b/nbs/04_YaResNet.ipynb index 87a5136..d41879d 100644 --- a/nbs/04_YaResNet.ipynb +++ b/nbs/04_YaResNet.ipynb @@ -26,7 +26,7 @@ } ], "source": [ - "%nbdev_default_export yaresnet" + "#default_exp yaresnet" ] }, { @@ -44,7 +44,7 @@ "metadata": {}, "outputs": [], "source": [ - "%nbdev_hide\n", + "#hide\n", "# from nbdev.showdoc import *\n", "from fastcore.test import *" ] @@ -55,7 +55,7 @@ "metadata": {}, "outputs": [], "source": [ - "%nbdev_export\n", + "#export\n", "import torch.nn as nn\n", "import sys, torch\n", "from functools import partial\n", @@ -78,13 +78,13 @@ "metadata": {}, "outputs": [], "source": [ - "%nbdev_export\n", + "#export\n", "# YaResBlock - former NewResBlock.\n", "# Yet another ResNet.\n", "class YaResBlock(nn.Module):\n", " '''YaResBlock. Reduce by pool instead of stride 2'''\n", " se_block = SEBlock\n", - " def __init__(self, expansion, ni, nh, stride=1, \n", + " def __init__(self, expansion, ni, nh, stride=1,\n", " conv_layer=ConvLayer, act_fn=act_fn, zero_bn=True, bn_1st=True,\n", " pool=nn.AvgPool2d(2, ceil_mode=True), sa=False,sym=False, se=False,\n", " groups=1, dw=False):\n", @@ -97,7 +97,7 @@ " (f\"conv_1\", conv_layer(nh, nf, 3, zero_bn=zero_bn, act=False, bn_1st=bn_1st))\n", " ] if expansion == 1 else [\n", " (f\"conv_0\",conv_layer(ni, nh, 1, act_fn=act_fn, bn_1st=bn_1st)),\n", - " (f\"conv_1\",conv_layer(nh, nh, 3, stride=1, act_fn=act_fn, bn_1st=bn_1st, \n", + " (f\"conv_1\",conv_layer(nh, nh, 3, stride=1, act_fn=act_fn, bn_1st=bn_1st,\n", " groups= nh if dw else groups)), # stride 1 !!!\n", " (f\"conv_2\",conv_layer(nh, nf, 1, zero_bn=zero_bn, act=False, bn_1st=bn_1st))\n", " ]\n", @@ -107,7 +107,7 @@ " self.idconv = noop if ni==nf else conv_layer(ni, nf, 1, act=False)\n", " self.merge =act_fn\n", "\n", - " def forward(self, x): \n", + " def forward(self, x):\n", " o = self.reduce(x)\n", " return self.merge(self.convs(o) + self.idconv(o))" ] @@ -145,7 +145,7 @@ } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "bl = YaResBlock(1,64,64,sa=True)\n", "bl" ] @@ -164,7 +164,7 @@ } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "bs_test = 16\n", "xb = torch.randn(bs_test, 64, 32, 32)\n", "y = bl(xb)\n", @@ -211,7 +211,7 @@ } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "bl = YaResBlock(1,64,64,se=True)\n", "bl" ] @@ -230,7 +230,7 @@ } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "bs_test = 16\n", "xb = torch.randn(bs_test, 64, 32, 32)\n", "y = bl(xb)\n", @@ -278,7 +278,7 @@ } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "bl = YaResBlock(4,64,128,stride=2,act_fn=nn.LeakyReLU(), bn_1st=False)\n", "bl" ] @@ -297,7 +297,7 @@ } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "bs_test = 16\n", "xb = torch.randn(bs_test, 256, 32, 32)\n", "y = bl(xb)\n", @@ -345,7 +345,7 @@ } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "bl = YaResBlock(4,64,128,stride=2,act_fn=nn.LeakyReLU(), bn_1st=False, groups=4)\n", "bl" ] @@ -364,7 +364,7 @@ } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "bs_test = 16\n", "xb = torch.randn(bs_test, 256, 32, 32)\n", "y = bl(xb)\n", @@ -412,7 +412,7 @@ } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "bl = YaResBlock(4,64,128,stride=2,act_fn=nn.LeakyReLU(), bn_1st=False, dw=True)\n", "bl" ] @@ -431,7 +431,7 @@ } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "bs_test = 16\n", "xb = torch.randn(bs_test, 256, 32, 32)\n", "y = bl(xb)\n", @@ -535,7 +535,7 @@ } ], "source": [ - "%nbdev_collapse_output\n", + "#collapse_output\n", "yaresnet.stem" ] }, @@ -553,7 +553,7 @@ } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "bs_test = 16\n", "xb = torch.randn(bs_test, 3, 128, 128)\n", "y = yaresnet.stem(xb)\n", @@ -714,7 +714,7 @@ } ], "source": [ - "%nbdev_collapse_output\n", + "#collapse_output\n", "yaresnet.body" ] }, @@ -739,7 +739,7 @@ } ], "source": [ - "%nbdev_collapse_output\n", + "#collapse_output\n", "yaresnet.head" ] }, @@ -943,7 +943,7 @@ } ], "source": [ - "%nbdev_collapse_output\n", + "#collapse_output\n", "yaresnet.act_fn = Mish()\n", "yaresnet()" ] @@ -968,7 +968,7 @@ "metadata": {}, "outputs": [], "source": [ - "%nbdev_export\n", + "#export\n", "yaresnet_parameters = {'block': YaResBlock, 'stem_sizes': [3, 32, 64, 64], 'act_fn': Mish(),\n", " 'stem_sizes': [3,32,64,64], 'stem_stride_on': 1}\n", "yaresnet34 = partial(Net, name='YaResnet34', expansion=1, layers=[3, 4, 6, 3], **yaresnet_parameters)\n", @@ -1044,7 +1044,7 @@ } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "bs_test = 16\n", "xb = torch.randn(bs_test, 3, 128, 128)\n", "y = model()(xb)\n", @@ -1084,7 +1084,7 @@ } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "from nbdev.export import *\n", "notebook2script()" ] diff --git a/nbs/05_Twist.ipynb b/nbs/05_Twist.ipynb index 83a54ca..2dd28aa 100644 --- a/nbs/05_Twist.ipynb +++ b/nbs/05_Twist.ipynb @@ -26,7 +26,7 @@ } ], "source": [ - "%nbdev_default_export twist" + "#default_exp twist" ] }, { @@ -44,7 +44,7 @@ "metadata": {}, "outputs": [], "source": [ - "%nbdev_hide\n", + "#hide\n", "# from nbdev.showdoc import *\n", "from fastcore.test import *" ] @@ -55,7 +55,7 @@ "metadata": {}, "outputs": [], "source": [ - "%nbdev_export\n", + "#export\n", "from functools import partial\n", "from collections import OrderedDict\n", "from model_constructor.layers import *\n", @@ -68,7 +68,7 @@ "metadata": {}, "outputs": [], "source": [ - "%nbdev_export\n", + "#export\n", "import sys, torch\n", "nn = torch.nn\n", "F = torch.nn.functional" @@ -87,7 +87,7 @@ "metadata": {}, "outputs": [], "source": [ - "%nbdev_export\n", + "#export\n", "class ConvTwist(nn.Module):\n", " '''Replacement for Conv2d (kernelsize 3x3)'''\n", " permute = True\n", @@ -138,7 +138,7 @@ " else:\n", " KK[a*i:a*(i+1),b*(i-1):b*i] = kernel[a*i:a*(i+1)]\n", " return KK\n", - " \n", + "\n", " def _conv(self, inpt, kernel=None):\n", " if kernel is None:\n", " kernel = self.conv.weight\n", @@ -299,8 +299,8 @@ "metadata": {}, "outputs": [], "source": [ - "%nbdev_export\n", - "class ConvLayerTwist(ConvLayer): # replace Conv2d by Twist \n", + "#export\n", + "class ConvLayerTwist(ConvLayer): # replace Conv2d by Twist\n", " Conv2d = ConvTwist" ] }, @@ -664,7 +664,7 @@ "metadata": {}, "outputs": [], "source": [ - "%nbdev_export\n", + "#export\n", "class NewResBlockTwist(nn.Module):\n", " def __init__(self, expansion, ni, nh, stride=1,\n", " conv_layer=ConvLayer, act_fn=act_fn, bn_1st=True,\n", @@ -750,7 +750,7 @@ } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "bs_test = 16\n", "xb = torch.randn(bs_test, 256, 32, 32)\n", "y = bl(xb)\n", @@ -815,7 +815,7 @@ } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "bs_test = 16\n", "xb = torch.randn(bs_test, 256, 32, 32)\n", "y = bl(xb)\n", @@ -884,7 +884,7 @@ } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "bs_test = 16\n", "xb = torch.randn(bs_test, 256, 32, 32)\n", "y = bl(xb)\n", @@ -925,7 +925,7 @@ } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "bl = NewResBlock(1,64,64,sa=True)\n", "bl" ] @@ -944,7 +944,7 @@ } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "bs_test = 16\n", "xb = torch.randn(bs_test, 64, 32, 32)\n", "y = bl(xb)\n", @@ -992,7 +992,7 @@ } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "bl = NewResBlock(4,64,128,stride=2,act_fn=nn.LeakyReLU(), bn_1st=False)\n", "bl" ] @@ -1011,7 +1011,7 @@ } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "bs_test = 16\n", "xb = torch.randn(bs_test, 256, 32, 32)\n", "y = bl(xb)\n", @@ -1032,9 +1032,9 @@ "metadata": {}, "outputs": [], "source": [ - "%nbdev_export\n", + "#export\n", "class ResBlockTwist(nn.Module):\n", - " def __init__(self, expansion, ni, nh, stride=1, \n", + " def __init__(self, expansion, ni, nh, stride=1,\n", " conv_layer=ConvLayer, act_fn=act_fn, zero_bn=True, bn_1st=True,\n", " pool=nn.AvgPool2d(2, ceil_mode=True), sa=False,sym=False, **kvargs):\n", " super().__init__()\n", @@ -1116,7 +1116,7 @@ } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "bs_test = 16\n", "xb = torch.randn(bs_test, 256, 32, 32)\n", "y = bl(xb)\n", @@ -1181,7 +1181,7 @@ } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "bs_test = 16\n", "xb = torch.randn(bs_test, 256, 32, 32)\n", "y = bl(xb)\n", @@ -1250,7 +1250,7 @@ } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "bs_test = 16\n", "xb = torch.randn(bs_test, 256, 32, 32)\n", "y = bl(xb)\n", @@ -1697,7 +1697,7 @@ } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "bs_test = 16\n", "xb = torch.randn(bs_test, 3, 128, 128)\n", "y = model.stem(xb)\n", @@ -1719,7 +1719,7 @@ } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "bs_test = 16\n", "xb = torch.randn(bs_test, 64, 32, 32)\n", "y = model.body.l_0(xb)\n", @@ -1750,7 +1750,7 @@ } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "bs_test = 16\n", "xb = torch.randn(bs_test, 64, 32, 32)\n", "y = model.body.l_0(xb)\n", @@ -2190,7 +2190,7 @@ } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "m" ] }, @@ -2208,7 +2208,7 @@ } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "bs_test = 16\n", "xb = torch.randn(bs_test, 3, 128, 128)\n", "y = m(xb)\n", @@ -2761,7 +2761,7 @@ } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "from nbdev.export import *\n", "notebook2script()" ] diff --git a/nbs/10_base_constructor.ipynb b/nbs/10_base_constructor.ipynb index 9172408..c85ee82 100644 --- a/nbs/10_base_constructor.ipynb +++ b/nbs/10_base_constructor.ipynb @@ -2,12 +2,8 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T07:44:19.736730Z", - "start_time": "2020-09-15T07:44:19.154933Z" - }, "hide_input": true }, "outputs": [], @@ -17,13 +13,8 @@ }, { "cell_type": "code", - "execution_count": 2, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T07:44:20.901006Z", - "start_time": "2020-09-15T07:44:20.888522Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -35,7 +26,7 @@ } ], "source": [ - "%nbdev_default_export base_constructor" + "#default_exp base_constructor" ] }, { @@ -49,32 +40,22 @@ }, { "cell_type": "code", - "execution_count": 3, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T07:44:43.817966Z", - "start_time": "2020-09-15T07:44:43.812740Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "%nbdev_hide\n", + "#hide\n", "from nbdev.showdoc import *\n", "from fastcore.test import *" ] }, { "cell_type": "code", - "execution_count": 31, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T07:47:53.716094Z", - "start_time": "2020-09-15T07:47:53.712373Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "%nbdev_export\n", + "#export\n", "import torch.nn as nn\n", "import torch\n", "from collections import OrderedDict\n", @@ -83,16 +64,11 @@ }, { "cell_type": "code", - "execution_count": 17, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T07:46:31.796210Z", - "start_time": "2020-09-15T07:46:31.790752Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "%nbdev_export\n", + "#export\n", "act_fn = nn.ReLU(inplace=True)" ] }, @@ -105,30 +81,25 @@ }, { "cell_type": "code", - "execution_count": 5, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T07:44:48.264407Z", - "start_time": "2020-09-15T07:44:48.221947Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "%nbdev_export\n", + "#export\n", "class Stem(nn.Sequential):\n", " \"\"\"Base stem\"\"\"\n", - " def __init__(self, c_in=3, stem_sizes=[], stem_out=64, \n", - " conv_layer=ConvLayer, stride_on=0, \n", - " stem_bn_last=False, bn_1st=True, \n", - " stem_use_pool=True, stem_pool=nn.MaxPool2d(kernel_size=3, stride=2, padding=1), **kwargs): \n", + " def __init__(self, c_in=3, stem_sizes=[], stem_out=64,\n", + " conv_layer=ConvLayer, stride_on=0,\n", + " stem_bn_last=False, bn_1st=True,\n", + " stem_use_pool=True, stem_pool=nn.MaxPool2d(kernel_size=3, stride=2, padding=1), **kwargs):\n", " self.sizes = [c_in] + stem_sizes + [stem_out]\n", " num_layers = len(self.sizes)-1\n", - " stem = [(f\"conv_{i}\", conv_layer(self.sizes[i], self.sizes[i+1], \n", + " stem = [(f\"conv_{i}\", conv_layer(self.sizes[i], self.sizes[i+1],\n", " stride=2 if i==stride_on else 1, act=True,\n", - " bn_layer=not stem_bn_last if i==num_layers-1 else True, \n", + " bn_layer=not stem_bn_last if i==num_layers-1 else True,\n", " bn_1st=bn_1st, **kwargs))\n", " for i in range(num_layers)]\n", - " if stem_use_pool: stem += [('pool', stem_pool)] \n", + " if stem_use_pool: stem += [('pool', stem_pool)]\n", " if stem_bn_last: stem.append(('bn', nn.BatchNorm2d(stem_out)))\n", " super().__init__(OrderedDict(stem))\n", " def extra_repr(self):\n", @@ -137,13 +108,8 @@ }, { "cell_type": "code", - "execution_count": 6, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T07:44:48.987592Z", - "start_time": "2020-09-15T07:44:48.965376Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -159,7 +125,7 @@ ")" ] }, - "execution_count": 6, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -171,13 +137,8 @@ }, { "cell_type": "code", - "execution_count": 7, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T07:44:51.733455Z", - "start_time": "2020-09-15T07:44:51.703154Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -185,7 +146,7 @@ "torch.Size([8, 64, 32, 32])" ] }, - "execution_count": 7, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -198,13 +159,8 @@ }, { "cell_type": "code", - "execution_count": 8, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T07:44:52.270167Z", - "start_time": "2020-09-15T07:44:52.254111Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -220,7 +176,7 @@ ")" ] }, - "execution_count": 8, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -232,13 +188,8 @@ }, { "cell_type": "code", - "execution_count": 9, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T07:44:53.128168Z", - "start_time": "2020-09-15T07:44:53.022016Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -246,7 +197,7 @@ "torch.Size([8, 64, 32, 32])" ] }, - "execution_count": 9, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -259,13 +210,8 @@ }, { "cell_type": "code", - "execution_count": 10, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T07:44:53.838929Z", - "start_time": "2020-09-15T07:44:53.829075Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -286,7 +232,7 @@ ")" ] }, - "execution_count": 10, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -298,13 +244,8 @@ }, { "cell_type": "code", - "execution_count": 11, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T07:44:54.773691Z", - "start_time": "2020-09-15T07:44:54.716307Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -312,7 +253,7 @@ "torch.Size([8, 64, 32, 32])" ] }, - "execution_count": 11, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -325,13 +266,8 @@ }, { "cell_type": "code", - "execution_count": 12, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T07:44:55.640343Z", - "start_time": "2020-09-15T07:44:55.637028Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "assert y.shape, torch.Size([64, 64, 32, 32])" @@ -339,13 +275,8 @@ }, { "cell_type": "code", - "execution_count": 13, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T07:44:58.194349Z", - "start_time": "2020-09-15T07:44:58.173191Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -366,7 +297,7 @@ ")" ] }, - "execution_count": 13, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -392,16 +323,11 @@ }, { "cell_type": "code", - "execution_count": 19, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T07:47:02.853520Z", - "start_time": "2020-09-15T07:47:02.843712Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "%nbdev_export\n", + "#export\n", "def DownsampleBlock(conv_layer, ni, nf, ks, stride, act=False, **kwargs):\n", " '''Base downsample for res-like blocks'''\n", " return conv_layer(ni, nf, ks, stride, act, **kwargs)" @@ -409,16 +335,11 @@ }, { "cell_type": "code", - "execution_count": 20, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T07:47:03.166537Z", - "start_time": "2020-09-15T07:47:03.157366Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "%nbdev_export\n", + "#export\n", "class BasicBlock(nn.Module):\n", " \"\"\"Basic block (simplified) as in pytorch resnet\"\"\"\n", " def __init__(self, ni, nf, expansion=1, stride=1, zero_bn=False,\n", @@ -439,7 +360,7 @@ " out = self.conv(x)\n", " if self.downsample:\n", " identity = self.downsample(x)\n", - " return self.act_conn(self.merge(out + identity))\n" + " return self.act_conn(self.merge(out + identity))" ] }, { @@ -448,7 +369,7 @@ "metadata": {}, "outputs": [], "source": [ - "%nbdev_export\n", + "#export\n", "class Bottleneck(nn.Module):\n", " '''Bottlneck block for resnet models'''\n", " def __init__(self, ni, nh, expansion=4, stride=1, zero_bn=False,\n", @@ -479,15 +400,10 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T07:47:10.025598Z", - "start_time": "2020-09-15T07:47:10.009381Z" - } - }, + "metadata": {}, "outputs": [], "source": [ - "%nbdev_export\n", + "#export\n", "class BasicLayer(nn.Sequential):\n", " '''Layer from blocks'''\n", " def __init__(self, block, blocks, ni, nf, expansion, stride, sa=False,**kwargs):\n", @@ -496,13 +412,13 @@ " self.blocks = blocks\n", " self.expansion = expansion\n", " super().__init__(OrderedDict(\n", - " [(f'block_{i}', block(ni if i==0 else nf, nf, expansion, \n", + " [(f'block_{i}', block(ni if i==0 else nf, nf, expansion,\n", " stride if i==0 else 1,\n", - " sa=sa if i==blocks-1 else False, \n", + " sa=sa if i==blocks-1 else False,\n", " **kwargs))\n", " for i in range(blocks)]))\n", " def extra_repr(self):\n", - " return f'from {self.ni*self.expansion} to {self.nf}, {self.blocks} blocks, expansion {self.expansion}.' " + " return f'from {self.ni*self.expansion} to {self.nf}, {self.blocks} blocks, expansion {self.expansion}.'" ] }, { @@ -514,44 +430,33 @@ }, { "cell_type": "code", - "execution_count": 21, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T07:47:10.025598Z", - "start_time": "2020-09-15T07:47:10.009381Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "%nbdev_export\n", - "class Body(nn.Sequential): \n", + "#export\n", + "class Body(nn.Sequential):\n", " '''Constructor for body'''\n", - " def __init__(self, block, \n", - " body_in=64, body_out=512, \n", + " def __init__(self, block,\n", + " body_in=64, body_out=512,\n", " bodylayer=BasicLayer, expansion=1,\n", " layer_szs=[64,128,256,], blocks=[2,2,2,2],\n", - " sa=False, **kwargs): \n", + " sa=False, **kwargs):\n", " layer_szs = [body_in//expansion] + layer_szs + [body_out]\n", " num_layers = len(layer_szs)-1\n", - " layers = [(f\"layer_{i}\", bodylayer(block, blocks[i], \n", + " layers = [(f\"layer_{i}\", bodylayer(block, blocks[i],\n", " layer_szs[i], layer_szs[i+1], expansion,\n", - " 1 if i==0 else 2, \n", - " sa=sa if i==0 else False, \n", + " 1 if i==0 else 2,\n", + " sa=sa if i==0 else False,\n", " **kwargs))\n", " for i in range(num_layers)]\n", - " super().__init__(OrderedDict(layers))\n", - " " + " super().__init__(OrderedDict(layers))" ] }, { "cell_type": "code", - "execution_count": 24, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T07:47:32.232273Z", - "start_time": "2020-09-15T07:47:32.159289Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -704,7 +609,7 @@ ")" ] }, - "execution_count": 24, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -716,13 +621,8 @@ }, { "cell_type": "code", - "execution_count": 25, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T07:47:33.801922Z", - "start_time": "2020-09-15T07:47:33.684159Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -875,26 +775,21 @@ ")" ] }, - "execution_count": 25, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "body = Body(BasicBlock, bn_1st=False)\n", "body" ] }, { "cell_type": "code", - "execution_count": 26, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T07:47:34.729432Z", - "start_time": "2020-09-15T07:47:34.620483Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -914,13 +809,8 @@ }, { "cell_type": "code", - "execution_count": 27, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T07:47:35.655255Z", - "start_time": "2020-09-15T07:47:35.575441Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -960,26 +850,21 @@ ")" ] }, - "execution_count": 27, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "body = Body(BasicBlock, sa=True)\n", "body.layer_0" ] }, { "cell_type": "code", - "execution_count": 28, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T07:47:36.664623Z", - "start_time": "2020-09-15T07:47:36.519727Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -990,7 +875,7 @@ } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "bs = 8\n", "xb = torch.randn(bs, 64, 32, 32)\n", "y = body(xb)\n", @@ -1007,16 +892,11 @@ }, { "cell_type": "code", - "execution_count": 29, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T07:47:39.115248Z", - "start_time": "2020-09-15T07:47:39.107197Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "%nbdev_export\n", + "#export\n", "class Head(nn.Sequential):\n", " '''base head'''\n", " def __init__(self, ni, nf, **kwargs):\n", @@ -1029,13 +909,8 @@ }, { "cell_type": "code", - "execution_count": 32, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T07:47:58.484497Z", - "start_time": "2020-09-15T07:47:58.480454Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "head = Head(512, 10)" @@ -1043,13 +918,8 @@ }, { "cell_type": "code", - "execution_count": 33, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T07:47:59.217773Z", - "start_time": "2020-09-15T07:47:59.207843Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -1057,7 +927,7 @@ "torch.Size([8, 10])" ] }, - "execution_count": 33, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -1085,16 +955,11 @@ }, { "cell_type": "code", - "execution_count": 34, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T07:48:01.277470Z", - "start_time": "2020-09-15T07:48:01.270200Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "%nbdev_export\n", + "#export\n", "def init_model(model, nonlinearity='leaky_relu'):\n", " '''Init model'''\n", " for m in model.modules():\n", @@ -1105,30 +970,25 @@ }, { "cell_type": "code", - "execution_count": 35, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T07:48:03.022644Z", - "start_time": "2020-09-15T07:48:02.983887Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "%nbdev_export\n", + "#export\n", "class Net(nn.Sequential):\n", " '''Constructor for model'''\n", " def __init__(self, stem=Stem,\n", " body=Body, block=BasicBlock, sa=False,\n", " layer_szs=[64,128,256,], blocks=[2,2,2,2],\n", - " head=Head, \n", - " c_in=3, num_classes=1000, \n", + " head=Head,\n", + " c_in=3, num_classes=1000,\n", " body_in=64, body_out=512, expansion=1,\n", " init_fn=init_model, **kwargs):\n", " self.init_model=init_fn\n", " super().__init__(OrderedDict([\n", " ('stem', stem(c_in=c_in,stem_out=body_in, **kwargs)),\n", - " ('body', body(block, body_in, body_out, \n", - " layer_szs=layer_szs, blocks=blocks, expansion=expansion, \n", + " ('body', body(block, body_in, body_out,\n", + " layer_szs=layer_szs, blocks=blocks, expansion=expansion,\n", " sa=sa, **kwargs)),\n", " ('head', head(body_out*expansion, num_classes, **kwargs))\n", " ]))\n", @@ -1137,13 +997,8 @@ }, { "cell_type": "code", - "execution_count": 36, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T07:48:04.552204Z", - "start_time": "2020-09-15T07:48:04.392342Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "model = Net()" @@ -1151,13 +1006,8 @@ }, { "cell_type": "code", - "execution_count": 37, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T07:48:05.372416Z", - "start_time": "2020-09-15T07:48:05.367071Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -1326,7 +1176,7 @@ ")" ] }, - "execution_count": 37, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -1337,13 +1187,8 @@ }, { "cell_type": "code", - "execution_count": 38, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T07:48:06.841792Z", - "start_time": "2020-09-15T07:48:06.575545Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -1351,7 +1196,7 @@ "torch.Size([16, 1000])" ] }, - "execution_count": 38, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -1365,13 +1210,8 @@ }, { "cell_type": "code", - "execution_count": 39, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T07:48:07.771461Z", - "start_time": "2020-09-15T07:48:07.599293Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -1411,26 +1251,21 @@ ")" ] }, - "execution_count": 39, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "model = Net(bn_1st=False)\n", "model.body.layer_0" ] }, { "cell_type": "code", - "execution_count": 40, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T07:48:08.706261Z", - "start_time": "2020-09-15T07:48:08.567145Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -1441,7 +1276,7 @@ } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "bs_test = 8\n", "xb = torch.randn(bs_test, 3, 128, 128)\n", "y = model(xb)\n", @@ -1451,13 +1286,8 @@ }, { "cell_type": "code", - "execution_count": 41, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T07:48:09.591909Z", - "start_time": "2020-09-15T07:48:09.433365Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -1626,26 +1456,21 @@ ")" ] }, - "execution_count": 41, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "model = Net(sa=True)\n", "model" ] }, { "cell_type": "code", - "execution_count": 42, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T07:48:10.888883Z", - "start_time": "2020-09-15T07:48:10.772022Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -1656,7 +1481,7 @@ } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "bs_test = 8\n", "xb = torch.randn(bs_test, 3, 128, 128)\n", "y = model(xb)\n", @@ -1674,13 +1499,8 @@ }, { "cell_type": "code", - "execution_count": 44, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T07:48:53.788843Z", - "start_time": "2020-09-15T07:48:53.151412Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -1700,7 +1520,7 @@ } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "from nbdev.export import *\n", "notebook2script()" ] @@ -1732,31 +1552,6 @@ "display_name": "Python 3", "language": "python", "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.7" - }, - "toc": { - "base_numbering": 1, - "nav_menu": {}, - "number_sections": true, - "sideBar": true, - "skip_h1_title": false, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": false, - "toc_position": {}, - "toc_section_display": true, - "toc_window_display": true } }, "nbformat": 4, diff --git a/nbs/11_xresnet.ipynb b/nbs/11_xresnet.ipynb index 806b913..6a1a5cd 100644 --- a/nbs/11_xresnet.ipynb +++ b/nbs/11_xresnet.ipynb @@ -2,12 +2,8 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T07:58:46.593635Z", - "start_time": "2020-09-15T07:58:45.999276Z" - }, "hide_input": true }, "outputs": [], @@ -17,13 +13,8 @@ }, { "cell_type": "code", - "execution_count": 2, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T07:58:46.598475Z", - "start_time": "2020-09-15T07:58:46.595162Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -35,7 +26,7 @@ } ], "source": [ - "%nbdev_default_export xresnet" + "#default_exp xresnet" ] }, { @@ -49,32 +40,22 @@ }, { "cell_type": "code", - "execution_count": 3, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T07:58:46.651217Z", - "start_time": "2020-09-15T07:58:46.647967Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "%nbdev_hide\n", + "#hide\n", "from nbdev.showdoc import *\n", "from fastcore.test import *" ] }, { "cell_type": "code", - "execution_count": 4, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T07:58:47.536442Z", - "start_time": "2020-09-15T07:58:47.235892Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "%nbdev_export\n", + "#export\n", "import torch.nn as nn\n", "import torch\n", "from collections import OrderedDict" @@ -82,16 +63,11 @@ }, { "cell_type": "code", - "execution_count": 10, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T07:59:50.981128Z", - "start_time": "2020-09-15T07:59:50.975818Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "%nbdev_export\n", + "#export\n", "from model_constructor.base_constructor import *\n", "from model_constructor.layers import ConvLayer, Noop, act_fn" ] @@ -105,16 +81,11 @@ }, { "cell_type": "code", - "execution_count": 23, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T08:00:33.445205Z", - "start_time": "2020-09-15T08:00:33.438414Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "%nbdev_export\n", + "#export\n", "class DownsampleLayer(nn.Sequential):\n", " \"\"\"Downsample layer for Xresnet Resblock\"\"\"\n", " def __init__(self, conv_layer, ni, nf, stride, act,\n", @@ -128,16 +99,11 @@ }, { "cell_type": "code", - "execution_count": 12, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T07:59:52.802349Z", - "start_time": "2020-09-15T07:59:52.767093Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "%nbdev_export\n", + "#export\n", "class XResBlock(nn.Module):\n", " def __init__(self, ni, nh, expansion=1, stride=1, zero_bn=True,\n", " conv_layer=ConvLayer, act_fn=act_fn, **kwargs):\n", @@ -160,14 +126,8 @@ }, { "cell_type": "code", - "execution_count": 13, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T07:59:53.363415Z", - "start_time": "2020-09-15T07:59:53.262651Z" - }, - "scrolled": true - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -379,7 +339,7 @@ ")" ] }, - "execution_count": 13, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -391,13 +351,8 @@ }, { "cell_type": "code", - "execution_count": 14, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T07:59:54.659184Z", - "start_time": "2020-09-15T07:59:54.243419Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -405,7 +360,7 @@ "torch.Size([16, 2048, 4, 4])" ] }, - "execution_count": 14, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -425,16 +380,11 @@ }, { "cell_type": "code", - "execution_count": 15, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T07:59:57.404159Z", - "start_time": "2020-09-15T07:59:57.394665Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "%nbdev_export\n", + "#export\n", "def xresnet18(**kwargs):\n", " \"\"\"Constructs a xresnet-18 model. \"\"\"\n", " return Net(stem_sizes=[32,32], block=XResBlock, blocks=[2, 2, 2, 2], expansion=1, **kwargs)\n", @@ -448,13 +398,8 @@ }, { "cell_type": "code", - "execution_count": 16, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T07:59:57.999165Z", - "start_time": "2020-09-15T07:59:57.797163Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -647,7 +592,7 @@ ")" ] }, - "execution_count": 16, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -658,13 +603,8 @@ }, { "cell_type": "code", - "execution_count": 17, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T07:59:59.209879Z", - "start_time": "2020-09-15T07:59:58.860378Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -985,7 +925,7 @@ ")" ] }, - "execution_count": 17, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -996,13 +936,8 @@ }, { "cell_type": "code", - "execution_count": 18, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T08:00:00.927778Z", - "start_time": "2020-09-15T08:00:00.557930Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "model = xresnet50()" @@ -1010,13 +945,8 @@ }, { "cell_type": "code", - "execution_count": 19, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T08:00:01.052196Z", - "start_time": "2020-09-15T08:00:01.040363Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -1042,7 +972,7 @@ ")" ] }, - "execution_count": 19, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -1053,13 +983,8 @@ }, { "cell_type": "code", - "execution_count": 20, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T08:00:01.914108Z", - "start_time": "2020-09-15T08:00:01.884870Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -1439,7 +1364,7 @@ ")" ] }, - "execution_count": 20, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -1450,13 +1375,8 @@ }, { "cell_type": "code", - "execution_count": 21, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T08:00:02.593246Z", - "start_time": "2020-09-15T08:00:02.587983Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -1468,7 +1388,7 @@ ")" ] }, - "execution_count": 21, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -1479,13 +1399,8 @@ }, { "cell_type": "code", - "execution_count": 22, - "metadata": { - "ExecuteTime": { - "end_time": "2020-09-15T08:00:03.624497Z", - "start_time": "2020-09-15T08:00:03.210627Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -1493,7 +1408,7 @@ "torch.Size([8, 1000])" ] }, - "execution_count": 22, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -1532,7 +1447,7 @@ } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "from nbdev.export import *\n", "notebook2script()" ] @@ -1550,36 +1465,6 @@ "display_name": "Python 3", "language": "python", "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.7" - }, - "toc": { - "base_numbering": 1, - "nav_menu": {}, - "number_sections": true, - "sideBar": true, - "skip_h1_title": false, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": false, - "toc_position": { - "height": "calc(100% - 180px)", - "left": "10px", - "top": "150px", - "width": "215.323px" - }, - "toc_section_display": true, - "toc_window_display": true } }, "nbformat": 4, diff --git a/nbs/index.ipynb b/nbs/index.ipynb index fd4eab7..3b2fa6d 100644 --- a/nbs/index.ipynb +++ b/nbs/index.ipynb @@ -17,7 +17,7 @@ "metadata": {}, "outputs": [], "source": [ - "%nbdev_hide\n", + "#hide\n", "%reload_ext autoreload\n", "%autoreload 2" ] @@ -28,7 +28,7 @@ "metadata": {}, "outputs": [], "source": [ - "%nbdev_hide\n", + "#hide\n", "from functools import partial\n", "import torch" ] @@ -39,7 +39,7 @@ "metadata": {}, "outputs": [], "source": [ - "%nbdev_hide\n", + "#hide\n", "from model_constructor.layers import *" ] }, @@ -441,7 +441,7 @@ } ], "source": [ - "%nbdev_collapse_output\n", + "#collapse_output\n", "model()" ] }, @@ -819,7 +819,7 @@ } ], "source": [ - "%nbdev_collapse_output\n", + "#collapse_output\n", "model.body" ] }, @@ -837,7 +837,7 @@ } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "bs_test = 16\n", "xb = torch.randn(bs_test, 3, 128, 128)\n", "y = model()(xb)\n", @@ -862,7 +862,7 @@ } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "model.block_sizes" ] }, @@ -1147,7 +1147,7 @@ } ], "source": [ - "%nbdev_collapse_output\n", + "#collapse_output\n", "model()" ] }, @@ -1232,7 +1232,7 @@ } ], "source": [ - "%nbdev_collapse_output\n", + "#collapse_output\n", "model.stem.conv_1" ] }, @@ -1275,7 +1275,7 @@ } ], "source": [ - "%nbdev_collapse_output\n", + "#collapse_output\n", "model.body.l_0.bl_0" ] }, @@ -1293,7 +1293,7 @@ } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "bs_test = 16\n", "xb = torch.randn(bs_test, 3, 128, 128)\n", "y = model()(xb)\n", @@ -1363,7 +1363,7 @@ } ], "source": [ - "%nbdev_collapse_output\n", + "#collapse_output\n", "model.name = 'YaResNet'\n", "model" ] @@ -1415,7 +1415,7 @@ } ], "source": [ - "%nbdev_collapse_output\n", + "#collapse_output\n", "model.body.l_1.bl_0" ] }, @@ -1433,7 +1433,7 @@ } ], "source": [ - "%nbdev_hide\n", + "#hide\n", "bs_test = 16\n", "xb = torch.randn(bs_test, 3, 128, 128)\n", "y = model()(xb)\n", @@ -1461,31 +1461,6 @@ "display_name": "Python 3", "language": "python", "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.7" - }, - "toc": { - "base_numbering": 1, - "nav_menu": {}, - "number_sections": true, - "sideBar": true, - "skip_h1_title": false, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": false, - "toc_position": {}, - "toc_section_display": true, - "toc_window_display": true } }, "nbformat": 4, From 1c0861aa890372e6f8baf881f79db83bccd79388 Mon Sep 17 00:00:00 2001 From: ayasyrev Date: Thu, 12 Nov 2020 07:01:25 +0000 Subject: [PATCH 2/3] cleared code --- model_constructor/_nbdev.py | 19 - model_constructor/activations.py | 171 +-------- model_constructor/base_constructor.py | 127 ++++--- model_constructor/layers.py | 81 ++-- model_constructor/mxresnet.py | 9 +- model_constructor/net.py | 210 +++++----- nbs/00_Net.ipynb | 238 +++++++----- nbs/01_activations.ipynb | 420 +++++--------------- nbs/01_layers.ipynb | 81 ++-- nbs/03_MXResNet.ipynb | 252 ++++++++---- nbs/10_base_constructor.ipynb | 527 +++++++++++++++++--------- setup.cfg | 9 + 12 files changed, 1060 insertions(+), 1084 deletions(-) create mode 100644 setup.cfg diff --git a/model_constructor/_nbdev.py b/model_constructor/_nbdev.py index 22fca4a..127fa0c 100644 --- a/model_constructor/_nbdev.py +++ b/model_constructor/_nbdev.py @@ -9,14 +9,6 @@ "Net": "10_base_constructor.ipynb", "xresnet34": "11_xresnet.ipynb", "xresnet50": "11_xresnet.ipynb", - "swish": "01_activations.ipynb", - "Swish": "01_activations.ipynb", - "swish_jit": "01_activations.ipynb", - "SwishJit": "01_activations.ipynb", - "swish_jit_bwd": "01_activations.ipynb", - "SwishJitAutoFn": "01_activations.ipynb", - "swish_me": "01_activations.ipynb", - "SwishMe": "01_activations.ipynb", "mish": "01_activations.ipynb", "Mish": "01_activations.ipynb", "mish_jit": "01_activations.ipynb", @@ -26,17 +18,6 @@ "MishJitAutoFn": "01_activations.ipynb", "mish_me": "01_activations.ipynb", "MishMe": "01_activations.ipynb", - "hard_swish": "01_activations.ipynb", - "HardSwish": "01_activations.ipynb", - "hard_swish_jit": "01_activations.ipynb", - "HardSwishJit": "01_activations.ipynb", - "hard_swish_jit_fwd": "01_activations.ipynb", - "hard_swish_jit_bwd": "01_activations.ipynb", - "HardSwishJitAutoFn": "01_activations.ipynb", - "hard_swish_me": "01_activations.ipynb", - "HardSwishMe": "01_activations.ipynb", - "hard_mish": "01_activations.ipynb", - "HardMish": "01_activations.ipynb", "hard_mish_jit": "01_activations.ipynb", "HardMishJit": "01_activations.ipynb", "hard_mish_jit_fwd": "01_activations.ipynb", diff --git a/model_constructor/activations.py b/model_constructor/activations.py index a8cf539..19aabaf 100644 --- a/model_constructor/activations.py +++ b/model_constructor/activations.py @@ -1,10 +1,8 @@ # AUTOGENERATED! DO NOT EDIT! File to edit: nbs/01_activations.ipynb (unless otherwise specified). -__all__ = ['swish', 'Swish', 'swish_jit', 'SwishJit', 'swish_jit_bwd', 'SwishJitAutoFn', 'swish_me', 'SwishMe', 'mish', - 'Mish', 'mish_jit', 'MishJit', 'mish_jit_fwd', 'mish_jit_bwd', 'MishJitAutoFn', 'mish_me', 'MishMe', - 'hard_swish', 'HardSwish', 'hard_swish_jit', 'HardSwishJit', 'hard_swish_jit_fwd', 'hard_swish_jit_bwd', - 'HardSwishJitAutoFn', 'hard_swish_me', 'HardSwishMe', 'hard_mish', 'HardMish', 'hard_mish_jit', - 'HardMishJit', 'hard_mish_jit_fwd', 'hard_mish_jit_bwd', 'HardMishJitAutoFn', 'hard_mish_me', 'HardMishMe'] +__all__ = ['mish', 'Mish', 'mish_jit', 'MishJit', 'mish_jit_fwd', 'mish_jit_bwd', 'MishJitAutoFn', 'mish_me', 'MishMe', + 'hard_mish_jit', 'HardMishJit', 'hard_mish_jit_fwd', 'hard_mish_jit_bwd', 'HardMishJitAutoFn', + 'hard_mish_me', 'HardMishMe'] # Cell # forked from https://github.com/rwightman/pytorch-image-models/timm/models/layers/activations.py @@ -12,73 +10,6 @@ from torch import nn as nn from torch.nn import functional as F -# Cell -def swish(x, inplace: bool = False): - """Swish - Described in: https://arxiv.org/abs/1710.05941""" - return x.mul_(x.sigmoid()) if inplace else x.mul(x.sigmoid()) - -class Swish(nn.Module): - """Swish - Described in: https://arxiv.org/abs/1710.05941""" - def __init__(self, inplace: bool = False): - super(Swish, self).__init__() - self.inplace = inplace - - def forward(self, x): - return swish(x, self.inplace) - -# Cell -@torch.jit.script -def swish_jit(x, inplace: bool = False): - """Jit version of Swish. - Swish- Described in: https://arxiv.org/abs/1710.05941 - """ - return x.mul(x.sigmoid()) - -class SwishJit(nn.Module): - """Jit version of Swish. - Swish - Described in: https://arxiv.org/abs/1710.05941""" - def __init__(self, inplace: bool = False): - super(SwishJit, self).__init__() - - def forward(self, x): - return swish_jit(x) - -# Cell - -@torch.jit.script -def swish_jit_bwd(x, grad_output): - x_sigmoid = torch.sigmoid(x) - return grad_output * (x_sigmoid * (1 + x * (1 - x_sigmoid))) - - -class SwishJitAutoFn(torch.autograd.Function): - """ torch.jit.script optimised Swish w/ memory-efficient checkpoint - Inspired by conversation btw Jeremy Howard & Adam Pazske - https://twitter.com/jeremyphoward/status/1188251041835315200 - """ - - @staticmethod - def forward(ctx, x): - ctx.save_for_backward(x) - return swish_jit_fwd(x) - - @staticmethod - def backward(ctx, grad_output): - x = ctx.saved_tensors[0] - return swish_jit_bwd(x, grad_output) - - -def swish_me(x, inplace=False): - return SwishJitAutoFn.apply(x) - - -class SwishMe(nn.Module): - def __init__(self, inplace: bool = False): - super(SwishMe, self).__init__() - - def forward(self, x): - return SwishJitAutoFn.apply(x) - # Cell def mish(x, inplace: bool = False): """Mish: A Self Regularized Non-Monotonic Neural Activation Function - https://arxiv.org/abs/1908.08681 @@ -104,6 +35,7 @@ def mish_jit(x, _inplace: bool = False): """ return x.mul(F.softplus(x).tanh()) + class MishJit(nn.Module): def __init__(self, inplace: bool = False): """Jit version of Mish. @@ -114,10 +46,9 @@ def forward(self, x): return mish_jit(x) # Cell - @torch.jit.script def mish_jit_fwd(x): -# return x.mul(torch.tanh(F.softplus(x))) + # return x.mul(torch.tanh(F.softplus(x))) return x.mul(F.softplus(x).tanh()) @@ -155,98 +86,6 @@ def __init__(self, inplace: bool = False): def forward(self, x): return MishJitAutoFn.apply(x) -# Cell -def hard_swish(x, inplace: bool = False): - """Hard swish activation function""" - inner = F.relu6(x + 3.).div_(6.) - return x.mul_(inner) if inplace else x.mul(inner) - - -class HardSwish(nn.Module): - """Hard swish activation function""" - def __init__(self, inplace: bool = False): - super(HardSwish, self).__init__() - self.inplace = inplace - - def forward(self, x): - return hard_swish(x, self.inplace) - -# Cell -@torch.jit.script -def hard_swish_jit(x, inplace: bool = False): - # return x * (F.relu6(x + 3.) / 6) - return x * (x + 3).clamp(min=0, max=6).div(6.) # clamp seems ever so slightly faster? - - -class HardSwishJit(nn.Module): - def __init__(self, inplace: bool = False): - super(HardSwishJit, self).__init__() - - def forward(self, x): - return hard_swish_jit(x) - -# Cell - -@torch.jit.script -def hard_swish_jit_fwd(x): - return x * (x + 3).clamp(min=0, max=6).div(6.) - - -@torch.jit.script -def hard_swish_jit_bwd(x, grad_output): - m = torch.ones_like(x) * (x >= 3.) - m = torch.where((x >= -3.) & (x <= 3.), x / 3. + .5, m) - return grad_output * m - - -class HardSwishJitAutoFn(torch.autograd.Function): - """A memory efficient, jit-scripted HardSwish activation""" - @staticmethod - def forward(ctx, x): - ctx.save_for_backward(x) - return hard_swish_jit_fwd(x) - - @staticmethod - def backward(ctx, grad_output): - x = ctx.saved_tensors[0] - return hard_swish_jit_bwd(x, grad_output) - - -def hard_swish_me(x, inplace=False): - """A memory efficient, jit-scripted HardSwish activation""" - return HardSwishJitAutoFn.apply(x) - - -class HardSwishMe(nn.Module): - """A memory efficient, jit-scripted HardSwish activation""" - def __init__(self, inplace: bool = False): - super(HardSwishMe, self).__init__() - - def forward(self, x): - return HardSwishJitAutoFn.apply(x) - -# Cell -def hard_mish(x, inplace: bool = False): - """ Hard Mish - Experimental, based on notes by Mish author Diganta Misra at - https://github.com/digantamisra98/H-Mish/blob/0da20d4bc58e696b6803f2523c58d3c8a82782d0/README.md - """ - if inplace: - return x.mul_(0.5 * (x + 2).clamp(min=0, max=2)) - else: - return 0.5 * x * (x + 2).clamp(min=0, max=2) - - -class HardMish(nn.Module): - """Hard Mish, Experimental, based on notes by Mish author Diganta Misra at - https://github.com/digantamisra98/H-Mish/blob/0da20d4bc58e696b6803f2523c58d3c8a82782d0/README.md""" - def __init__(self, inplace: bool = False): - super(HardMish, self).__init__() - self.inplace = inplace - - def forward(self, x): - return hard_mish(x, self.inplace) - # Cell @torch.jit.script def hard_mish_jit(x, inplace: bool = False): diff --git a/model_constructor/base_constructor.py b/model_constructor/base_constructor.py index 600f2b4..11ceabf 100644 --- a/model_constructor/base_constructor.py +++ b/model_constructor/base_constructor.py @@ -5,48 +5,57 @@ # Cell import torch.nn as nn -import torch from collections import OrderedDict from .layers import ConvLayer, Noop, Flatten + # Cell act_fn = nn.ReLU(inplace=True) + # Cell class Stem(nn.Sequential): """Base stem""" - def __init__(self, c_in=3, stem_sizes=[], stem_out=64, - conv_layer=ConvLayer, stride_on=0, - stem_bn_last=False, bn_1st=True, - stem_use_pool=True, stem_pool=nn.MaxPool2d(kernel_size=3, stride=2, padding=1), **kwargs): + + def __init__(self, c_in=3, stem_sizes=[], stem_out=64, + conv_layer=ConvLayer, stride_on=0, + stem_bn_last=False, bn_1st=True, + stem_use_pool=True, stem_pool=nn.MaxPool2d(kernel_size=3, stride=2, padding=1), + **kwargs + ): self.sizes = [c_in] + stem_sizes + [stem_out] - num_layers = len(self.sizes)-1 - stem = [(f"conv_{i}", conv_layer(self.sizes[i], self.sizes[i+1], - stride=2 if i==stride_on else 1, act=True, - bn_layer=not stem_bn_last if i==num_layers-1 else True, + num_layers = len(self.sizes) - 1 + stem = [(f"conv_{i}", conv_layer(self.sizes[i], self.sizes[i + 1], + stride=2 if i == stride_on else 1, act=True, + bn_layer=not stem_bn_last if i == num_layers - 1 else True, bn_1st=bn_1st, **kwargs)) - for i in range(num_layers)] - if stem_use_pool: stem += [('pool', stem_pool)] - if stem_bn_last: stem.append(('bn', nn.BatchNorm2d(stem_out))) + for i in range(num_layers)] + if stem_use_pool: + stem += [('pool', stem_pool)] + if stem_bn_last: + stem.append(('bn', nn.BatchNorm2d(stem_out))) super().__init__(OrderedDict(stem)) + def extra_repr(self): - return f"sizes: {self.sizes}" + return f"sizes: {self.sizes}" + # Cell def DownsampleBlock(conv_layer, ni, nf, ks, stride, act=False, **kwargs): '''Base downsample for res-like blocks''' return conv_layer(ni, nf, ks, stride, act, **kwargs) + # Cell class BasicBlock(nn.Module): """Basic block (simplified) as in pytorch resnet""" - def __init__(self, ni, nf, expansion=1, stride=1, zero_bn=False, - conv_layer=ConvLayer, act_fn=act_fn, - downsample_block=DownsampleBlock, **kwargs): + def __init__(self, ni, nf, expansion=1, stride=1, zero_bn=False, + conv_layer=ConvLayer, act_fn=act_fn, + downsample_block=DownsampleBlock, **kwargs): super().__init__() - self.downsample = not ni==nf or stride==2 + self.downsample = not ni == nf or stride == 2 self.conv = nn.Sequential(OrderedDict([ - ('conv_0', conv_layer(ni, nf, stride=stride, act_fn=act_fn, **kwargs)), + ('conv_0', conv_layer(ni, nf, stride=stride, act_fn=act_fn, **kwargs)), ('conv_1', conv_layer(nf, nf, zero_bn=zero_bn, act=False, act_fn=act_fn, **kwargs))])) if self.downsample: self.downsample = downsample_block(conv_layer, ni, nf, ks=1, stride=stride, act=False, **kwargs) @@ -60,23 +69,24 @@ def forward(self, x): identity = self.downsample(x) return self.act_conn(self.merge(out + identity)) + # Cell class Bottleneck(nn.Module): '''Bottlneck block for resnet models''' def __init__(self, ni, nh, expansion=4, stride=1, zero_bn=False, - conv_layer=ConvLayer, act_fn=act_fn, + conv_layer=ConvLayer, act_fn=act_fn, downsample_block=DownsampleBlock, **kwargs): -# groups=1, base_width=64, dilation=1, norm_layer=None super().__init__() - self.downsample = not ni==nh or stride==2 - ni = ni*expansion - nf = nh*expansion + self.downsample = not ni == nh or stride == 2 + ni = ni * expansion + nf = nh * expansion self.conv = nn.Sequential(OrderedDict([ - ('conv_0', conv_layer(ni, nh, ks=1, act_fn=act_fn, **kwargs)), - ('conv_1', conv_layer(nh, nh, stride=stride, act_fn=act_fn, **kwargs)), + ('conv_0', conv_layer(ni, nh, ks=1, act_fn=act_fn, **kwargs)), # noqa: E241 + ('conv_1', conv_layer(nh, nh, stride=stride, act_fn=act_fn, **kwargs)), # noqa: E241 ('conv_2', conv_layer(nh, nf, ks=1, zero_bn=zero_bn, act=False, act_fn=act_fn, **kwargs))])) if self.downsample: - self.downsample = downsample_block(conv_layer, ni, nf, ks=1, stride=stride, act=False, act_fn=act_fn, **kwargs) + self.downsample = downsample_block(conv_layer, ni, nf, ks=1, + stride=stride, act=False, act_fn=act_fn, **kwargs) self.merge = Noop() self.act_conn = act_fn @@ -87,22 +97,25 @@ def forward(self, x): identity = self.downsample(x) return self.act_conn(self.merge(out + identity)) + # Cell class BasicLayer(nn.Sequential): '''Layer from blocks''' - def __init__(self, block, blocks, ni, nf, expansion, stride, sa=False,**kwargs): + def __init__(self, block, blocks, ni, nf, expansion, stride, sa=False, **kwargs): self.ni = ni self.nf = nf self.blocks = blocks self.expansion = expansion super().__init__(OrderedDict( - [(f'block_{i}', block(ni if i==0 else nf, nf, expansion, - stride if i==0 else 1, - sa=sa if i==blocks-1 else False, + [(f'block_{i}', block(ni if i == 0 else nf, nf, expansion, + stride if i == 0 else 1, + sa=sa if i == blocks - 1 else False, **kwargs)) - for i in range(blocks)])) + for i in range(blocks)])) + def extra_repr(self): - return f'from {self.ni*self.expansion} to {self.nf}, {self.blocks} blocks, expansion {self.expansion}.' + return f'from {self.ni * self.expansion} to {self.nf}, {self.blocks} blocks, expansion {self.expansion}.' + # Cell class Body(nn.Sequential): @@ -110,18 +123,19 @@ class Body(nn.Sequential): def __init__(self, block, body_in=64, body_out=512, bodylayer=BasicLayer, expansion=1, - layer_szs=[64,128,256,], blocks=[2,2,2,2], + layer_szs=[64, 128, 256, ], blocks=[2, 2, 2, 2], sa=False, **kwargs): - layer_szs = [body_in//expansion] + layer_szs + [body_out] - num_layers = len(layer_szs)-1 + layer_szs = [body_in // expansion] + layer_szs + [body_out] + num_layers = len(layer_szs) - 1 layers = [(f"layer_{i}", bodylayer(block, blocks[i], - layer_szs[i], layer_szs[i+1], expansion, - 1 if i==0 else 2, - sa=sa if i==0 else False, + layer_szs[i], layer_szs[i + 1], expansion, + stride=1 if i == 0 else 2, + sa=sa if i == 0 else False, **kwargs)) - for i in range(num_layers)] + for i in range(num_layers)] super().__init__(OrderedDict(layers)) + # Cell class Head(nn.Sequential): '''base head''' @@ -129,33 +143,34 @@ def __init__(self, ni, nf, **kwargs): super().__init__(OrderedDict( [('pool', nn.AdaptiveAvgPool2d((1, 1))), ('flat', Flatten()), - ('fc', nn.Linear(ni, nf)), + ('fc', nn.Linear(ni, nf)), ])) + # Cell def init_model(model, nonlinearity='leaky_relu'): '''Init model''' for m in model.modules(): -# if isinstance(m, nn.Conv2d): - if isinstance(m, (nn.Conv2d, nn.Linear)): - nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity=nonlinearity) + if isinstance(m, (nn.Conv2d, nn.Linear)): + nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity=nonlinearity) + # Cell class Net(nn.Sequential): '''Constructor for model''' - def __init__(self, stem=Stem, + def __init__(self, stem=Stem, body=Body, block=BasicBlock, sa=False, - layer_szs=[64,128,256,], blocks=[2,2,2,2], + layer_szs=[64, 128, 256, ], blocks=[2, 2, 2, 2], head=Head, - c_in=3, num_classes=1000, + c_in=3, num_classes=1000, body_in=64, body_out=512, expansion=1, - init_fn=init_model, **kwargs): - self.init_model=init_fn - super().__init__(OrderedDict([ - ('stem', stem(c_in=c_in,stem_out=body_in, **kwargs)), - ('body', body(block, body_in, body_out, - layer_szs=layer_szs, blocks=blocks, expansion=expansion, - sa=sa, **kwargs)), - ('head', head(body_out*expansion, num_classes, **kwargs)) - ])) - self.init_model(self) \ No newline at end of file + init_fn=init_model, **kwargs): + self.init_model = init_fn + super().__init__(OrderedDict( + [('stem', stem(c_in=c_in, stem_out=body_in, **kwargs)), + ('body', body(block, body_in, body_out, + layer_szs=layer_szs, blocks=blocks, expansion=expansion, + sa=sa, **kwargs)), + ('head', head(body_out * expansion, num_classes, **kwargs)) + ])) + self.init_model(self) diff --git a/model_constructor/layers.py b/model_constructor/layers.py index 3b251d6..4c09ac2 100644 --- a/model_constructor/layers.py +++ b/model_constructor/layers.py @@ -5,7 +5,7 @@ # Cell import torch.nn as nn import torch -from torch.nn.utils import spectral_norm # weight_norm, +from torch.nn.utils import spectral_norm from collections import OrderedDict # Cell @@ -13,10 +13,14 @@ class Flatten(nn.Module): '''flat x to vector''' def __init__(self): super().__init__() - def forward(self, x): return x.view(x.size(0), -1) + + def forward(self, x): + return x.view(x.size(0), -1) # Cell -def noop(x): return x +def noop(x): + return x + class Noop(nn.Module): '''Dummy module''' @@ -29,58 +33,64 @@ def forward(self, x): # Cell act_fn = nn.ReLU(inplace=True) + class ConvLayer(nn.Sequential): """Basic conv layers block""" Conv2d = nn.Conv2d + def __init__(self, ni, nf, ks=3, stride=1, - act=True, act_fn=act_fn, - bn_layer=True, bn_1st=True, zero_bn=False, - padding=None, bias=False, groups=1, **kwargs): + act=True, act_fn=act_fn, + bn_layer=True, bn_1st=True, zero_bn=False, + padding=None, bias=False, groups=1, **kwargs): - if padding==None: padding = ks//2 + if padding is None: + padding = ks // 2 layers = [('conv', self.Conv2d(ni, nf, ks, stride=stride, padding=padding, bias=bias, groups=groups))] act_bn = [('act_fn', act_fn)] if act else [] if bn_layer: bn = nn.BatchNorm2d(nf) nn.init.constant_(bn.weight, 0. if zero_bn else 1.) act_bn += [('bn', bn)] - if bn_1st: act_bn.reverse() + if bn_1st: + act_bn.reverse() layers += act_bn super().__init__(OrderedDict(layers)) # Cell # SA module from mxresnet at fastai. todo - add persons!!! - -#Unmodified from https://github.com/fastai/fastai/blob/5c51f9eabf76853a89a9bc5741804d2ed4407e49/fastai/layers.py -def conv1d(ni:int, no:int, ks:int=1, stride:int=1, padding:int=0, bias:bool=False): +# Unmodified from https://github.com/fastai/fastai/blob/5c51f9eabf76853a89a9bc5741804d2ed4407e49/fastai/layers.py +def conv1d(ni: int, no: int, ks: int = 1, stride: int = 1, padding: int = 0, bias: bool = False): "Create and initialize a `nn.Conv1d` layer with spectral normalization." conv = nn.Conv1d(ni, no, ks, stride=stride, padding=padding, bias=bias) nn.init.kaiming_normal_(conv.weight) - if bias: conv.bias.data.zero_() + if bias: + conv.bias.data.zero_() return spectral_norm(conv) -# Adapted from SelfAttention layer at https://github.com/fastai/fastai/blob/5c51f9eabf76853a89a9bc5741804d2ed4407e49/fastai/layers.py +# Adapted from SelfAttention layer at +# https://github.com/fastai/fastai/blob/5c51f9eabf76853a89a9bc5741804d2ed4407e49/fastai/layers.py # Inspired by https://arxiv.org/pdf/1805.08318.pdf class SimpleSelfAttention(nn.Module): - def __init__(self, n_in:int, ks=1, sym=False):#, n_out:int): + def __init__(self, n_in: int, ks=1, sym=False): # , n_out:int): super().__init__() - self.conv = conv1d(n_in, n_in, ks, padding=ks//2, bias=False) + self.conv = conv1d(n_in, n_in, ks, padding=ks // 2, bias=False) self.gamma = nn.Parameter(torch.tensor([0.])) self.sym = sym self.n_in = n_in - def forward(self,x): + + def forward(self, x): if self.sym: # symmetry hack by https://github.com/mgrankin - c = self.conv.weight.view(self.n_in,self.n_in) - c = (c + c.t())/2 - self.conv.weight = c.view(self.n_in,self.n_in,1) + c = self.conv.weight.view(self.n_in, self.n_in) + c = (c + c.t()) / 2 + self.conv.weight = c.view(self.n_in, self.n_in, 1) size = x.size() - x = x.view(*size[:2],-1) # (C,N) + x = x.view(*size[:2], -1) # (C,N) # changed the order of mutiplication to avoid O(N^2) complexity # (x*xT)*(W*x) instead of (x*(xT*(W*x))) convx = self.conv(x) # (C,C) * (C,N) = (C,N) => O(NC^2) - xxT = torch.bmm(x,x.permute(0,2,1).contiguous()) # (C,N) * (N,C) = (C,C) => O(NC^2) + xxT = torch.bmm(x, x.permute(0, 2, 1).contiguous()) # (C,N) * (N,C) = (C,C) => O(NC^2) o = torch.bmm(xxT, convx) # (C,C) * (C,N) = (C,N) => O(NC^2) o = self.gamma * o + x return o.view(*size).contiguous() @@ -90,17 +100,17 @@ class SEBlock(nn.Module): "se block" se_layer = nn.Linear act_fn = nn.ReLU(inplace=True) + def __init__(self, c, r=16): super().__init__() ch = c // r self.squeeze = nn.AdaptiveAvgPool2d(1) self.excitation = nn.Sequential( - OrderedDict([ - ('fc_reduce', self.se_layer(c, ch, bias=False)), - ('se_act', self.act_fn), - ('fc_expand', self.se_layer(ch, c, bias=False)), - ('sigmoid', nn.Sigmoid()) - ])) + OrderedDict([('fc_reduce', self.se_layer(c, ch, bias=False)), + ('se_act', self.act_fn), + ('fc_expand', self.se_layer(ch, c, bias=False)), + ('sigmoid', nn.Sigmoid()) + ])) def forward(self, x): bs, c, _, _ = x.shape @@ -113,18 +123,19 @@ class SEBlockConv(nn.Module): "se block with conv on excitation" se_layer = nn.Conv2d act_fn = nn.ReLU(inplace=True) + def __init__(self, c, r=16): super().__init__() # c_in = math.ceil(c//r/8)*8 - c_in = c//r - + c_in = c // r self.squeeze = nn.AdaptiveAvgPool2d(1) - self.excitation = nn.Sequential(OrderedDict([ - ('conv_reduce', self.se_layer(c, c_in, 1, bias=False)), - ('se_act', self.act_fn), - ('conv_expand', self.se_layer(c_in, c, 1, bias=False)), - ('sigmoid', nn.Sigmoid()) - ])) + self.excitation = nn.Sequential( + OrderedDict([ + ('conv_reduce', self.se_layer(c, c_in, 1, bias=False)), + ('se_act', self.act_fn), + ('conv_expand', self.se_layer(c_in, c, 1, bias=False)), + ('sigmoid', nn.Sigmoid()) + ])) def forward(self, x): y = self.squeeze(x) diff --git a/model_constructor/mxresnet.py b/model_constructor/mxresnet.py index 6ed334d..b36761c 100644 --- a/model_constructor/mxresnet.py +++ b/model_constructor/mxresnet.py @@ -3,16 +3,11 @@ __all__ = ['mxresnet_parameters', 'mxresnet34', 'mxresnet50'] # Cell -import torch.nn as nn -import sys, torch -import os from functools import partial -from collections import OrderedDict -from .layers import * from .net import Net from .activations import Mish # Cell mxresnet_parameters = {'stem_sizes': [3, 32, 64, 64], 'act_fn': Mish()} -mxresnet34 = partial(Net, name='MXResnet32', expansion=1, layers=[3, 4, 6, 3], **mxresnet_parameters) -mxresnet50 = partial(Net, name='MXResnet50', expansion=4, layers=[3, 4, 6, 3], **mxresnet_parameters) \ No newline at end of file +mxresnet34 = partial(Net, name='MXResnet32', expansion=1, layers=[3, 4, 6, 3], **mxresnet_parameters) +mxresnet50 = partial(Net, name='MXResnet50', expansion=4, layers=[3, 4, 6, 3], **mxresnet_parameters) \ No newline at end of file diff --git a/model_constructor/net.py b/model_constructor/net.py index 69022be..b939482 100644 --- a/model_constructor/net.py +++ b/model_constructor/net.py @@ -4,7 +4,6 @@ # Cell import torch.nn as nn -import sys, torch from functools import partial from collections import OrderedDict from .layers import ConvLayer, noop, SEBlock, SimpleSelfAttention, Flatten @@ -12,67 +11,86 @@ # Cell act_fn = nn.ReLU(inplace=True) -def init_cnn(m): - if getattr(m, 'bias', None) is not None: nn.init.constant_(m.bias, 0) - if isinstance(m, (nn.Conv2d,nn.Linear)): nn.init.kaiming_normal_(m.weight) - for l in m.children(): init_cnn(l) + +def init_cnn(module: nn.Module): + if getattr(module, 'bias', None) is not None: + nn.init.constant_(module.bias, 0) + if isinstance(module, (nn.Conv2d, nn.Linear)): + nn.init.kaiming_normal_(module.weight) + for layer in module.children(): + init_cnn(layer) # Cell class ResBlock(nn.Module): '''Resnet block''' se_block = SEBlock + def __init__(self, expansion, ni, nh, stride=1, conv_layer=ConvLayer, act_fn=act_fn, zero_bn=True, bn_1st=True, pool=nn.AvgPool2d(2, ceil_mode=True), sa=False, sym=False, se=False, se_reduction=16, groups=1, dw=False): super().__init__() - nf,ni = nh*expansion,ni*expansion - if groups != 1: groups = int(nh/groups) - layers = [(f"conv_0", conv_layer(ni, nh, 3, stride=stride, act_fn=act_fn, bn_1st=bn_1st, - groups= nh if dw else groups)), - (f"conv_1", conv_layer(nh, nf, 3, zero_bn=zero_bn, act=False, bn_1st=bn_1st)) - ] if expansion == 1 else [ - (f"conv_0",conv_layer(ni, nh, 1, act_fn=act_fn, bn_1st=bn_1st)), - (f"conv_1",conv_layer(nh, nh, 3, stride=stride, act_fn=act_fn, bn_1st=bn_1st, - groups= nh if dw else groups)), - (f"conv_2",conv_layer(nh, nf, 1, zero_bn=zero_bn, act=False, bn_1st=bn_1st)) - ] - if se: layers.append(('se', self.se_block(nf, se_reduction))) - if sa: layers.append(('sa', SimpleSelfAttention(nf,ks=1,sym=sym))) + nf, ni = nh * expansion, ni * expansion + if groups != 1: + groups = int(nh / groups) + if expansion == 1: + layers = [("conv_0", conv_layer(ni, nh, 3, stride=stride, act_fn=act_fn, bn_1st=bn_1st, + groups=nh if dw else groups)), + ("conv_1", conv_layer(nh, nf, 3, zero_bn=zero_bn, act=False, bn_1st=bn_1st)) + ] + else: + layers = [("conv_0", conv_layer(ni, nh, 1, act_fn=act_fn, bn_1st=bn_1st)), + ("conv_1", conv_layer(nh, nh, 3, stride=stride, act_fn=act_fn, bn_1st=bn_1st, + groups=nh if dw else groups)), + ("conv_2", conv_layer(nh, nf, 1, zero_bn=zero_bn, act=False, bn_1st=bn_1st)) + ] + if se: + layers.append(('se', self.se_block(nf, se_reduction))) + if sa: + layers.append(('sa', SimpleSelfAttention(nf, ks=1, sym=sym))) self.convs = nn.Sequential(OrderedDict(layers)) - self.pool = noop if stride==1 else pool - self.idconv = noop if ni==nf else conv_layer(ni, nf, 1, act=False) - self.act_fn =act_fn + self.pool = noop if stride == 1 else pool + self.idconv = noop if ni == nf else conv_layer(ni, nf, 1, act=False) + self.act_fn = act_fn - def forward(self, x): return self.act_fn(self.convs(x) + self.idconv(self.pool(x))) + def forward(self, x): + return self.act_fn(self.convs(x) + self.idconv(self.pool(x))) # Cell # NewResBlock now is YaResBlock - Yet Another ResNet Block! It is now at model_constructor.yaresnet. + + class NewResBlock(nn.Module): '''YaResnet block''' se_block = SEBlock + def __init__(self, expansion, ni, nh, stride=1, conv_layer=ConvLayer, act_fn=act_fn, zero_bn=True, bn_1st=True, - pool=nn.AvgPool2d(2, ceil_mode=True), sa=False,sym=False, se=False, se_reduction=16, + pool=nn.AvgPool2d(2, ceil_mode=True), sa=False, sym=False, se=False, se_reduction=16, groups=1, dw=False): super().__init__() - nf,ni = nh*expansion,ni*expansion - if groups != 1: groups = int(nh/groups) - self.reduce = noop if stride==1 else pool - layers = [(f"conv_0", conv_layer(ni, nh, 3, stride=1, act_fn=act_fn, bn_1st=bn_1st, - groups= nh if dw else groups)), # stride 1 !!! - (f"conv_1", conv_layer(nh, nf, 3, zero_bn=zero_bn, act=False, bn_1st=bn_1st)) - ] if expansion == 1 else [ - (f"conv_0",conv_layer(ni, nh, 1, act_fn=act_fn, bn_1st=bn_1st)), - (f"conv_1",conv_layer(nh, nh, 3, stride=1, act_fn=act_fn, bn_1st=bn_1st, - groups= nh if dw else groups)), # stride 1 !!! - (f"conv_2",conv_layer(nh, nf, 1, zero_bn=zero_bn, act=False, bn_1st=bn_1st)) - ] - if se: layers.append(('se', self.se_block(nf, se_reduction))) - if sa: layers.append(('sa', SimpleSelfAttention(nf,ks=1,sym=sym))) + nf, ni = nh * expansion, ni * expansion + if groups != 1: + groups = int(nh / groups) + self.reduce = noop if stride == 1 else pool + if expansion == 1: + layers = [("conv_0", conv_layer(ni, nh, 3, stride=1, act_fn=act_fn, bn_1st=bn_1st, + groups=nh if dw else groups)), + ("conv_1", conv_layer(nh, nf, 3, zero_bn=zero_bn, act=False, bn_1st=bn_1st)) + ] + else: + layers = [("conv_0", conv_layer(ni, nh, 1, act_fn=act_fn, bn_1st=bn_1st)), + ("conv_1", conv_layer(nh, nh, 3, stride=1, act_fn=act_fn, bn_1st=bn_1st, + groups=nh if dw else groups)), + ("conv_2", conv_layer(nh, nf, 1, zero_bn=zero_bn, act=False, bn_1st=bn_1st)) + ] + if se: + layers.append(('se', self.se_block(nf, se_reduction))) + if sa: + layers.append(('sa', SimpleSelfAttention(nf, ks=1, sym=sym))) self.convs = nn.Sequential(OrderedDict(layers)) - self.idconv = noop if ni==nf else conv_layer(ni, nf, 1, act=False) - self.merge =act_fn + self.idconv = noop if ni == nf else conv_layer(ni, nf, 1, act=False) + self.merge = act_fn def forward(self, x): o = self.reduce(x) @@ -80,75 +98,77 @@ def forward(self, x): # Cell def _make_stem(self): - stem = [(f"conv_{i}", self.conv_layer(self.stem_sizes[i], self.stem_sizes[i+1], -# stride=2 if i==0 else 1, - stride=2 if i==self.stem_stride_on else 1, - bn_layer=(not self.stem_bn_end) if i==(len(self.stem_sizes)-2) else True, - act_fn=self.act_fn, bn_1st=self.bn_1st)) - for i in range(len(self.stem_sizes)-1)] - stem.append(('stem_pool', self.stem_pool)) - if self.stem_bn_end: stem.append(('norm', self.norm(self.stem_sizes[-1]))) - return nn.Sequential(OrderedDict(stem)) + stem = [(f"conv_{i}", self.conv_layer(self.stem_sizes[i], self.stem_sizes[i + 1], + stride=2 if i == self.stem_stride_on else 1, + bn_layer=(not self.stem_bn_end) if i == (len(self.stem_sizes) - 2) else True, + act_fn=self.act_fn, bn_1st=self.bn_1st)) + for i in range(len(self.stem_sizes) - 1)] + stem.append(('stem_pool', self.stem_pool)) + if self.stem_bn_end: + stem.append(('norm', self.norm(self.stem_sizes[-1]))) + return nn.Sequential(OrderedDict(stem)) # Cell -def _make_layer(self,expansion,ni,nf,blocks,stride,sa): - return nn.Sequential(OrderedDict( - [(f"bl_{i}", self.block(expansion, ni if i==0 else nf, nf, - stride if i==0 else 1, sa=sa if i==blocks-1 else False, - conv_layer=self.conv_layer, act_fn=self.act_fn, pool=self.pool, - zero_bn=self.zero_bn, bn_1st=self.bn_1st, groups=self.groups, dw=self.dw, se=self.se)) - for i in range(blocks)])) +def _make_layer(self, expansion, ni, nf, blocks, stride, sa): + layers = [(f"bl_{i}", self.block(expansion, ni if i == 0 else nf, nf, + stride if i == 0 else 1, sa=sa if i == blocks - 1 else False, + conv_layer=self.conv_layer, act_fn=self.act_fn, pool=self.pool, + zero_bn=self.zero_bn, bn_1st=self.bn_1st, groups=self.groups, + dw=self.dw, se=self.se)) + for i in range(blocks)] + return nn.Sequential(OrderedDict(layers)) # Cell def _make_body(self): - blocks = [(f"l_{i}", self._make_layer(self,self.expansion, - self.block_sizes[i], self.block_sizes[i+1], l, - 1 if i==0 else 2, self.sa if i==0 else False)) - for i,l in enumerate(self.layers)] - return nn.Sequential(OrderedDict(blocks)) + blocks = [(f"l_{i}", self._make_layer(self, self.expansion, + ni=self.block_sizes[i], nf=self.block_sizes[i + 1], + blocks=l, stride=1 if i == 0 else 2, + sa=self.sa if i == 0 else False)) + for i, l in enumerate(self.layers)] + return nn.Sequential(OrderedDict(blocks)) # Cell def _make_head(self): - head = [('pool', nn.AdaptiveAvgPool2d(1)), - ('flat', Flatten()), - ('fc', nn.Linear(self.block_sizes[-1]*self.expansion, self.c_out))] - return nn.Sequential(OrderedDict(head)) + head = [('pool', nn.AdaptiveAvgPool2d(1)), + ('flat', Flatten()), + ('fc', nn.Linear(self.block_sizes[-1] * self.expansion, self.c_out))] + return nn.Sequential(OrderedDict(head)) # Cell class Net(): """Model constructor. As default - xresnet18""" def __init__(self, name='Net', c_in=3, c_out=1000, - block=ResBlock, conv_layer = ConvLayer, - block_sizes = [64,128,256,512], layers=[2,2,2,2], - norm = nn.BatchNorm2d, - act_fn=nn.ReLU(inplace=True), - pool = nn.AvgPool2d(2, ceil_mode=True), - expansion=1, groups = 1, dw=False, - sa=False, se=False, se_reduction=16, - bn_1st = True, - zero_bn=True, - stem_stride_on = 0, - stem_sizes = [32,32,64], - stem_pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1), - stem_bn_end = False, - - _init_cnn = init_cnn, - _make_stem = _make_stem, - _make_layer = _make_layer, - _make_body = _make_body, - _make_head = _make_head, - ): + block=ResBlock, conv_layer=ConvLayer, + block_sizes=[64, 128, 256, 512], layers=[2, 2, 2, 2], + norm=nn.BatchNorm2d, + act_fn=nn.ReLU(inplace=True), + pool=nn.AvgPool2d(2, ceil_mode=True), + expansion=1, groups=1, dw=False, + sa=False, se=False, se_reduction=16, + bn_1st=True, + zero_bn=True, + stem_stride_on=0, + stem_sizes=[32, 32, 64], + stem_pool=nn.MaxPool2d(kernel_size=3, stride=2, padding=1), + stem_bn_end=False, + _init_cnn=init_cnn, + _make_stem=_make_stem, + _make_layer=_make_layer, + _make_body=_make_body, + _make_head=_make_head, + ): super().__init__() params = locals() del params['self'] self.__dict__ = params self._block_sizes = params['block_sizes'] - if self.stem_sizes[0] != self.c_in: self.stem_sizes = [self.c_in] + self.stem_sizes + if self.stem_sizes[0] != self.c_in: + self.stem_sizes = [self.c_in] + self.stem_sizes @property def block_sizes(self): - return [self.stem_sizes[-1]//self.expansion] + self._block_sizes +[256]*(len(self.layers)-4) + return [self.stem_sizes[-1] // self.expansion] + self._block_sizes + [256] * (len(self.layers) - 4) @property def stem(self): @@ -168,18 +188,18 @@ def __call__(self): ('body', self.body), ('head', self.head)])) self._init_cnn(model) - model.extra_repr = lambda : f"model {self.name}" + model.extra_repr = lambda: f"model {self.name}" return model def __repr__(self): return (f"{self.name} constructor\n" - f" c_in: {self.c_in}, c_out: {self.c_out}\n" - f" expansion: {self.expansion}, groups: {self.groups}, dw: {self.dw}\n" - f" sa: {self.sa}, se: {self.se}\n" - f" stem sizes: {self.stem_sizes}, stide on {self.stem_stride_on}\n" - f" body sizes {self._block_sizes}\n" - f" layers: {self.layers}") + f" c_in: {self.c_in}, c_out: {self.c_out}\n" + f" expansion: {self.expansion}, groups: {self.groups}, dw: {self.dw}\n" + f" sa: {self.sa}, se: {self.se}\n" + f" stem sizes: {self.stem_sizes}, stide on {self.stem_stride_on}\n" + f" body sizes {self._block_sizes}\n" + f" layers: {self.layers}") # Cell -xresnet34 = partial(Net, name='xresnet34', expansion=1, layers = [3, 4, 6, 3]) -xresnet50 = partial(Net, name='xresnet34', expansion=4, layers = [3, 4, 6, 3]) \ No newline at end of file +xresnet34 = partial(Net, name='xresnet34', expansion=1, layers=[3, 4, 6, 3]) +xresnet50 = partial(Net, name='xresnet34', expansion=4, layers=[3, 4, 6, 3]) \ No newline at end of file diff --git a/nbs/00_Net.ipynb b/nbs/00_Net.ipynb index 60df77b..b2d3353 100644 --- a/nbs/00_Net.ipynb +++ b/nbs/00_Net.ipynb @@ -35,7 +35,8 @@ "metadata": {}, "outputs": [], "source": [ - "#hide" + "#hide\n", + "import torch #sys" ] }, { @@ -66,7 +67,6 @@ "source": [ "#export\n", "import torch.nn as nn\n", - "import sys, torch\n", "from functools import partial\n", "from collections import OrderedDict\n", "from model_constructor.layers import ConvLayer, noop, SEBlock, SimpleSelfAttention, Flatten" @@ -97,10 +97,14 @@ "#export\n", "act_fn = nn.ReLU(inplace=True)\n", "\n", - "def init_cnn(m):\n", - " if getattr(m, 'bias', None) is not None: nn.init.constant_(m.bias, 0)\n", - " if isinstance(m, (nn.Conv2d,nn.Linear)): nn.init.kaiming_normal_(m.weight)\n", - " for l in m.children(): init_cnn(l)" + "\n", + "def init_cnn(module: nn.Module):\n", + " if getattr(module, 'bias', None) is not None:\n", + " nn.init.constant_(module.bias, 0)\n", + " if isinstance(module, (nn.Conv2d, nn.Linear)):\n", + " nn.init.kaiming_normal_(module.weight)\n", + " for layer in module.children():\n", + " init_cnn(layer)" ] }, { @@ -120,30 +124,37 @@ "class ResBlock(nn.Module):\n", " '''Resnet block'''\n", " se_block = SEBlock\n", + "\n", " def __init__(self, expansion, ni, nh, stride=1,\n", " conv_layer=ConvLayer, act_fn=act_fn, zero_bn=True, bn_1st=True,\n", " pool=nn.AvgPool2d(2, ceil_mode=True), sa=False, sym=False, se=False, se_reduction=16,\n", " groups=1, dw=False):\n", " super().__init__()\n", - " nf,ni = nh*expansion,ni*expansion\n", - " if groups != 1: groups = int(nh/groups)\n", - " layers = [(f\"conv_0\", conv_layer(ni, nh, 3, stride=stride, act_fn=act_fn, bn_1st=bn_1st,\n", - " groups= nh if dw else groups)),\n", - " (f\"conv_1\", conv_layer(nh, nf, 3, zero_bn=zero_bn, act=False, bn_1st=bn_1st))\n", - " ] if expansion == 1 else [\n", - " (f\"conv_0\",conv_layer(ni, nh, 1, act_fn=act_fn, bn_1st=bn_1st)),\n", - " (f\"conv_1\",conv_layer(nh, nh, 3, stride=stride, act_fn=act_fn, bn_1st=bn_1st,\n", - " groups= nh if dw else groups)),\n", - " (f\"conv_2\",conv_layer(nh, nf, 1, zero_bn=zero_bn, act=False, bn_1st=bn_1st))\n", - " ]\n", - " if se: layers.append(('se', self.se_block(nf, se_reduction)))\n", - " if sa: layers.append(('sa', SimpleSelfAttention(nf,ks=1,sym=sym)))\n", + " nf, ni = nh * expansion, ni * expansion\n", + " if groups != 1:\n", + " groups = int(nh / groups)\n", + " if expansion == 1:\n", + " layers = [(\"conv_0\", conv_layer(ni, nh, 3, stride=stride, act_fn=act_fn, bn_1st=bn_1st,\n", + " groups=nh if dw else groups)),\n", + " (\"conv_1\", conv_layer(nh, nf, 3, zero_bn=zero_bn, act=False, bn_1st=bn_1st))\n", + " ]\n", + " else:\n", + " layers = [(\"conv_0\", conv_layer(ni, nh, 1, act_fn=act_fn, bn_1st=bn_1st)),\n", + " (\"conv_1\", conv_layer(nh, nh, 3, stride=stride, act_fn=act_fn, bn_1st=bn_1st,\n", + " groups=nh if dw else groups)),\n", + " (\"conv_2\", conv_layer(nh, nf, 1, zero_bn=zero_bn, act=False, bn_1st=bn_1st))\n", + " ]\n", + " if se:\n", + " layers.append(('se', self.se_block(nf, se_reduction)))\n", + " if sa:\n", + " layers.append(('sa', SimpleSelfAttention(nf, ks=1, sym=sym)))\n", " self.convs = nn.Sequential(OrderedDict(layers))\n", - " self.pool = noop if stride==1 else pool\n", - " self.idconv = noop if ni==nf else conv_layer(ni, nf, 1, act=False)\n", - " self.act_fn =act_fn\n", + " self.pool = noop if stride == 1 else pool\n", + " self.idconv = noop if ni == nf else conv_layer(ni, nf, 1, act=False)\n", + " self.act_fn = act_fn\n", "\n", - " def forward(self, x): return self.act_fn(self.convs(x) + self.idconv(self.pool(x)))" + " def forward(self, x):\n", + " return self.act_fn(self.convs(x) + self.idconv(self.pool(x)))" ] }, { @@ -375,31 +386,39 @@ "source": [ "#export\n", "# NewResBlock now is YaResBlock - Yet Another ResNet Block! It is now at model_constructor.yaresnet.\n", + "\n", + "\n", "class NewResBlock(nn.Module):\n", " '''YaResnet block'''\n", " se_block = SEBlock\n", + "\n", " def __init__(self, expansion, ni, nh, stride=1,\n", " conv_layer=ConvLayer, act_fn=act_fn, zero_bn=True, bn_1st=True,\n", - " pool=nn.AvgPool2d(2, ceil_mode=True), sa=False,sym=False, se=False, se_reduction=16,\n", + " pool=nn.AvgPool2d(2, ceil_mode=True), sa=False, sym=False, se=False, se_reduction=16,\n", " groups=1, dw=False):\n", " super().__init__()\n", - " nf,ni = nh*expansion,ni*expansion\n", - " if groups != 1: groups = int(nh/groups)\n", - " self.reduce = noop if stride==1 else pool\n", - " layers = [(f\"conv_0\", conv_layer(ni, nh, 3, stride=1, act_fn=act_fn, bn_1st=bn_1st,\n", - " groups= nh if dw else groups)), # stride 1 !!!\n", - " (f\"conv_1\", conv_layer(nh, nf, 3, zero_bn=zero_bn, act=False, bn_1st=bn_1st))\n", - " ] if expansion == 1 else [\n", - " (f\"conv_0\",conv_layer(ni, nh, 1, act_fn=act_fn, bn_1st=bn_1st)),\n", - " (f\"conv_1\",conv_layer(nh, nh, 3, stride=1, act_fn=act_fn, bn_1st=bn_1st,\n", - " groups= nh if dw else groups)), # stride 1 !!!\n", - " (f\"conv_2\",conv_layer(nh, nf, 1, zero_bn=zero_bn, act=False, bn_1st=bn_1st))\n", - " ]\n", - " if se: layers.append(('se', self.se_block(nf, se_reduction)))\n", - " if sa: layers.append(('sa', SimpleSelfAttention(nf,ks=1,sym=sym)))\n", + " nf, ni = nh * expansion, ni * expansion\n", + " if groups != 1:\n", + " groups = int(nh / groups)\n", + " self.reduce = noop if stride == 1 else pool\n", + " if expansion == 1:\n", + " layers = [(\"conv_0\", conv_layer(ni, nh, 3, stride=1, act_fn=act_fn, bn_1st=bn_1st,\n", + " groups=nh if dw else groups)),\n", + " (\"conv_1\", conv_layer(nh, nf, 3, zero_bn=zero_bn, act=False, bn_1st=bn_1st))\n", + " ]\n", + " else:\n", + " layers = [(\"conv_0\", conv_layer(ni, nh, 1, act_fn=act_fn, bn_1st=bn_1st)),\n", + " (\"conv_1\", conv_layer(nh, nh, 3, stride=1, act_fn=act_fn, bn_1st=bn_1st,\n", + " groups=nh if dw else groups)),\n", + " (\"conv_2\", conv_layer(nh, nf, 1, zero_bn=zero_bn, act=False, bn_1st=bn_1st))\n", + " ]\n", + " if se:\n", + " layers.append(('se', self.se_block(nf, se_reduction)))\n", + " if sa:\n", + " layers.append(('sa', SimpleSelfAttention(nf, ks=1, sym=sym)))\n", " self.convs = nn.Sequential(OrderedDict(layers))\n", - " self.idconv = noop if ni==nf else conv_layer(ni, nf, 1, act=False)\n", - " self.merge =act_fn\n", + " self.idconv = noop if ni == nf else conv_layer(ni, nf, 1, act=False)\n", + " self.merge = act_fn\n", "\n", " def forward(self, x):\n", " o = self.reduce(x)\n", @@ -669,15 +688,15 @@ "source": [ "#export\n", "def _make_stem(self):\n", - " stem = [(f\"conv_{i}\", self.conv_layer(self.stem_sizes[i], self.stem_sizes[i+1],\n", - "# stride=2 if i==0 else 1,\n", - " stride=2 if i==self.stem_stride_on else 1,\n", - " bn_layer=(not self.stem_bn_end) if i==(len(self.stem_sizes)-2) else True,\n", - " act_fn=self.act_fn, bn_1st=self.bn_1st))\n", - " for i in range(len(self.stem_sizes)-1)]\n", - " stem.append(('stem_pool', self.stem_pool))\n", - " if self.stem_bn_end: stem.append(('norm', self.norm(self.stem_sizes[-1])))\n", - " return nn.Sequential(OrderedDict(stem))" + " stem = [(f\"conv_{i}\", self.conv_layer(self.stem_sizes[i], self.stem_sizes[i + 1],\n", + " stride=2 if i == self.stem_stride_on else 1,\n", + " bn_layer=(not self.stem_bn_end) if i == (len(self.stem_sizes) - 2) else True,\n", + " act_fn=self.act_fn, bn_1st=self.bn_1st))\n", + " for i in range(len(self.stem_sizes) - 1)]\n", + " stem.append(('stem_pool', self.stem_pool))\n", + " if self.stem_bn_end:\n", + " stem.append(('norm', self.norm(self.stem_sizes[-1])))\n", + " return nn.Sequential(OrderedDict(stem))" ] }, { @@ -687,13 +706,14 @@ "outputs": [], "source": [ "#export\n", - "def _make_layer(self,expansion,ni,nf,blocks,stride,sa):\n", - " return nn.Sequential(OrderedDict(\n", - " [(f\"bl_{i}\", self.block(expansion, ni if i==0 else nf, nf,\n", - " stride if i==0 else 1, sa=sa if i==blocks-1 else False,\n", - " conv_layer=self.conv_layer, act_fn=self.act_fn, pool=self.pool,\n", - " zero_bn=self.zero_bn, bn_1st=self.bn_1st, groups=self.groups, dw=self.dw, se=self.se))\n", - " for i in range(blocks)]))" + "def _make_layer(self, expansion, ni, nf, blocks, stride, sa):\n", + " layers = [(f\"bl_{i}\", self.block(expansion, ni if i == 0 else nf, nf,\n", + " stride if i == 0 else 1, sa=sa if i == blocks - 1 else False,\n", + " conv_layer=self.conv_layer, act_fn=self.act_fn, pool=self.pool,\n", + " zero_bn=self.zero_bn, bn_1st=self.bn_1st, groups=self.groups,\n", + " dw=self.dw, se=self.se))\n", + " for i in range(blocks)]\n", + " return nn.Sequential(OrderedDict(layers))" ] }, { @@ -704,11 +724,12 @@ "source": [ "#export\n", "def _make_body(self):\n", - " blocks = [(f\"l_{i}\", self._make_layer(self,self.expansion,\n", - " self.block_sizes[i], self.block_sizes[i+1], l,\n", - " 1 if i==0 else 2, self.sa if i==0 else False))\n", - " for i,l in enumerate(self.layers)]\n", - " return nn.Sequential(OrderedDict(blocks))" + " blocks = [(f\"l_{i}\", self._make_layer(self, self.expansion,\n", + " ni=self.block_sizes[i], nf=self.block_sizes[i + 1],\n", + " blocks=l, stride=1 if i == 0 else 2,\n", + " sa=self.sa if i == 0 else False))\n", + " for i, l in enumerate(self.layers)]\n", + " return nn.Sequential(OrderedDict(blocks))" ] }, { @@ -719,10 +740,10 @@ "source": [ "#export\n", "def _make_head(self):\n", - " head = [('pool', nn.AdaptiveAvgPool2d(1)),\n", - " ('flat', Flatten()),\n", - " ('fc', nn.Linear(self.block_sizes[-1]*self.expansion, self.c_out))]\n", - " return nn.Sequential(OrderedDict(head))" + " head = [('pool', nn.AdaptiveAvgPool2d(1)),\n", + " ('flat', Flatten()),\n", + " ('fc', nn.Linear(self.block_sizes[-1] * self.expansion, self.c_out))]\n", + " return nn.Sequential(OrderedDict(head))" ] }, { @@ -742,37 +763,37 @@ "class Net():\n", " \"\"\"Model constructor. As default - xresnet18\"\"\"\n", " def __init__(self, name='Net', c_in=3, c_out=1000,\n", - " block=ResBlock, conv_layer = ConvLayer,\n", - " block_sizes = [64,128,256,512], layers=[2,2,2,2],\n", - " norm = nn.BatchNorm2d,\n", - " act_fn=nn.ReLU(inplace=True),\n", - " pool = nn.AvgPool2d(2, ceil_mode=True),\n", - " expansion=1, groups = 1, dw=False,\n", - " sa=False, se=False, se_reduction=16,\n", - " bn_1st = True,\n", - " zero_bn=True,\n", - " stem_stride_on = 0,\n", - " stem_sizes = [32,32,64],\n", - " stem_pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1),\n", - " stem_bn_end = False,\n", - "\n", - " _init_cnn = init_cnn,\n", - " _make_stem = _make_stem,\n", - " _make_layer = _make_layer,\n", - " _make_body = _make_body,\n", - " _make_head = _make_head,\n", - " ):\n", + " block=ResBlock, conv_layer=ConvLayer,\n", + " block_sizes=[64, 128, 256, 512], layers=[2, 2, 2, 2],\n", + " norm=nn.BatchNorm2d,\n", + " act_fn=nn.ReLU(inplace=True),\n", + " pool=nn.AvgPool2d(2, ceil_mode=True),\n", + " expansion=1, groups=1, dw=False,\n", + " sa=False, se=False, se_reduction=16,\n", + " bn_1st=True,\n", + " zero_bn=True,\n", + " stem_stride_on=0,\n", + " stem_sizes=[32, 32, 64],\n", + " stem_pool=nn.MaxPool2d(kernel_size=3, stride=2, padding=1),\n", + " stem_bn_end=False,\n", + " _init_cnn=init_cnn,\n", + " _make_stem=_make_stem,\n", + " _make_layer=_make_layer,\n", + " _make_body=_make_body,\n", + " _make_head=_make_head,\n", + " ):\n", " super().__init__()\n", "\n", " params = locals()\n", " del params['self']\n", " self.__dict__ = params\n", " self._block_sizes = params['block_sizes']\n", - " if self.stem_sizes[0] != self.c_in: self.stem_sizes = [self.c_in] + self.stem_sizes\n", + " if self.stem_sizes[0] != self.c_in:\n", + " self.stem_sizes = [self.c_in] + self.stem_sizes\n", "\n", " @property\n", " def block_sizes(self):\n", - " return [self.stem_sizes[-1]//self.expansion] + self._block_sizes +[256]*(len(self.layers)-4)\n", + " return [self.stem_sizes[-1] // self.expansion] + self._block_sizes + [256] * (len(self.layers) - 4)\n", "\n", " @property\n", " def stem(self):\n", @@ -792,17 +813,17 @@ " ('body', self.body),\n", " ('head', self.head)]))\n", " self._init_cnn(model)\n", - " model.extra_repr = lambda : f\"model {self.name}\"\n", + " model.extra_repr = lambda: f\"model {self.name}\"\n", " return model\n", "\n", " def __repr__(self):\n", " return (f\"{self.name} constructor\\n\"\n", - " f\" c_in: {self.c_in}, c_out: {self.c_out}\\n\"\n", - " f\" expansion: {self.expansion}, groups: {self.groups}, dw: {self.dw}\\n\"\n", - " f\" sa: {self.sa}, se: {self.se}\\n\"\n", - " f\" stem sizes: {self.stem_sizes}, stide on {self.stem_stride_on}\\n\"\n", - " f\" body sizes {self._block_sizes}\\n\"\n", - " f\" layers: {self.layers}\")" + " f\" c_in: {self.c_in}, c_out: {self.c_out}\\n\"\n", + " f\" expansion: {self.expansion}, groups: {self.groups}, dw: {self.dw}\\n\"\n", + " f\" sa: {self.sa}, se: {self.se}\\n\"\n", + " f\" stem sizes: {self.stem_sizes}, stide on {self.stem_stride_on}\\n\"\n", + " f\" body sizes {self._block_sizes}\\n\"\n", + " f\" layers: {self.layers}\")" ] }, { @@ -2487,8 +2508,8 @@ "outputs": [], "source": [ "#export\n", - "xresnet34 = partial(Net, name='xresnet34', expansion=1, layers = [3, 4, 6, 3])\n", - "xresnet50 = partial(Net, name='xresnet34', expansion=4, layers = [3, 4, 6, 3])" + "xresnet34 = partial(Net, name='xresnet34', expansion=1, layers=[3, 4, 6, 3])\n", + "xresnet50 = partial(Net, name='xresnet34', expansion=4, layers=[3, 4, 6, 3])" ] }, { @@ -2747,6 +2768,31 @@ "display_name": "Python 3", "language": "python", "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.7" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": true } }, "nbformat": 4, diff --git a/nbs/01_activations.ipynb b/nbs/01_activations.ipynb index 067b62d..176827d 100644 --- a/nbs/01_activations.ipynb +++ b/nbs/01_activations.ipynb @@ -2,8 +2,12 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": { + "ExecuteTime": { + "end_time": "2020-11-11T16:20:00.256061Z", + "start_time": "2020-11-11T16:20:00.027667Z" + }, "hide_input": true }, "outputs": [], @@ -13,18 +17,14 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Cells will be exported to model_constructor.activations,\n", - "unless a different module is specified after an export flag: `%nbdev_export special.module`\n" - ] + "execution_count": 2, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-11T16:20:01.430479Z", + "start_time": "2020-11-11T16:20:01.424122Z" } - ], + }, + "outputs": [], "source": [ "#default_exp activations" ] @@ -57,8 +57,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 3, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-11T16:20:04.565883Z", + "start_time": "2020-11-11T16:20:04.071041Z" + } + }, "outputs": [], "source": [ "#export\n", @@ -68,115 +73,6 @@ "from torch.nn import functional as F" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Swish" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#export\n", - "def swish(x, inplace: bool = False):\n", - " \"\"\"Swish - Described in: https://arxiv.org/abs/1710.05941\"\"\"\n", - " return x.mul_(x.sigmoid()) if inplace else x.mul(x.sigmoid())\n", - "\n", - "class Swish(nn.Module):\n", - " \"\"\"Swish - Described in: https://arxiv.org/abs/1710.05941\"\"\"\n", - " def __init__(self, inplace: bool = False):\n", - " super(Swish, self).__init__()\n", - " self.inplace = inplace\n", - "\n", - " def forward(self, x):\n", - " return swish(x, self.inplace)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# SwishJit" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#export\n", - "@torch.jit.script\n", - "def swish_jit(x, inplace: bool = False):\n", - " \"\"\"Jit version of Swish.\n", - " Swish- Described in: https://arxiv.org/abs/1710.05941\n", - " \"\"\"\n", - " return x.mul(x.sigmoid())\n", - "\n", - "class SwishJit(nn.Module):\n", - " \"\"\"Jit version of Swish.\n", - " Swish - Described in: https://arxiv.org/abs/1710.05941\"\"\"\n", - " def __init__(self, inplace: bool = False):\n", - " super(SwishJit, self).__init__()\n", - "\n", - " def forward(self, x):\n", - " return swish_jit(x)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# SwishJitMe - memory-efficient." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#export\n", - "\n", - "@torch.jit.script\n", - "def swish_jit_bwd(x, grad_output):\n", - " x_sigmoid = torch.sigmoid(x)\n", - " return grad_output * (x_sigmoid * (1 + x * (1 - x_sigmoid)))\n", - "\n", - "\n", - "class SwishJitAutoFn(torch.autograd.Function):\n", - " \"\"\" torch.jit.script optimised Swish w/ memory-efficient checkpoint\n", - " Inspired by conversation btw Jeremy Howard & Adam Pazske\n", - " https://twitter.com/jeremyphoward/status/1188251041835315200\n", - " \"\"\"\n", - "\n", - " @staticmethod\n", - " def forward(ctx, x):\n", - " ctx.save_for_backward(x)\n", - " return swish_jit_fwd(x)\n", - "\n", - " @staticmethod\n", - " def backward(ctx, grad_output):\n", - " x = ctx.saved_tensors[0]\n", - " return swish_jit_bwd(x, grad_output)\n", - "\n", - "\n", - "def swish_me(x, inplace=False):\n", - " return SwishJitAutoFn.apply(x)\n", - "\n", - "\n", - "class SwishMe(nn.Module):\n", - " def __init__(self, inplace: bool = False):\n", - " super(SwishMe, self).__init__()\n", - "\n", - " def forward(self, x):\n", - " return SwishJitAutoFn.apply(x)" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -186,8 +82,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 4, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-11T16:20:08.693991Z", + "start_time": "2020-11-11T16:20:08.687244Z" + } + }, "outputs": [], "source": [ "#export\n", @@ -217,8 +118,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 5, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-11T16:20:15.683629Z", + "start_time": "2020-11-11T16:20:15.639490Z" + } + }, "outputs": [], "source": [ "#export\n", @@ -229,6 +135,7 @@ " \"\"\"\n", " return x.mul(F.softplus(x).tanh())\n", "\n", + "\n", "class MishJit(nn.Module):\n", " def __init__(self, inplace: bool = False):\n", " \"\"\"Jit version of Mish.\n", @@ -248,15 +155,19 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 6, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-11T16:20:24.777571Z", + "start_time": "2020-11-11T16:20:24.765170Z" + } + }, "outputs": [], "source": [ "#export\n", - "\n", "@torch.jit.script\n", "def mish_jit_fwd(x):\n", - "# return x.mul(torch.tanh(F.softplus(x)))\n", + " # return x.mul(torch.tanh(F.softplus(x)))\n", " return x.mul(F.softplus(x).tanh())\n", "\n", "\n", @@ -295,175 +206,6 @@ " return MishJitAutoFn.apply(x)" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# HardSwish" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#export\n", - "def hard_swish(x, inplace: bool = False):\n", - " \"\"\"Hard swish activation function\"\"\"\n", - " inner = F.relu6(x + 3.).div_(6.)\n", - " return x.mul_(inner) if inplace else x.mul(inner)\n", - "\n", - "\n", - "class HardSwish(nn.Module):\n", - " \"\"\"Hard swish activation function\"\"\"\n", - " def __init__(self, inplace: bool = False):\n", - " super(HardSwish, self).__init__()\n", - " self.inplace = inplace\n", - "\n", - " def forward(self, x):\n", - " return hard_swish(x, self.inplace)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# HardSwishJit" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#export\n", - "@torch.jit.script\n", - "def hard_swish_jit(x, inplace: bool = False):\n", - " # return x * (F.relu6(x + 3.) / 6)\n", - " return x * (x + 3).clamp(min=0, max=6).div(6.) # clamp seems ever so slightly faster?\n", - "\n", - "\n", - "class HardSwishJit(nn.Module):\n", - " def __init__(self, inplace: bool = False):\n", - " super(HardSwishJit, self).__init__()\n", - "\n", - " def forward(self, x):\n", - " return hard_swish_jit(x)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# HardSwishJitMe" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#export\n", - "\n", - "@torch.jit.script\n", - "def hard_swish_jit_fwd(x):\n", - " return x * (x + 3).clamp(min=0, max=6).div(6.)\n", - "\n", - "\n", - "@torch.jit.script\n", - "def hard_swish_jit_bwd(x, grad_output):\n", - " m = torch.ones_like(x) * (x >= 3.)\n", - " m = torch.where((x >= -3.) & (x <= 3.), x / 3. + .5, m)\n", - " return grad_output * m\n", - "\n", - "\n", - "class HardSwishJitAutoFn(torch.autograd.Function):\n", - " \"\"\"A memory efficient, jit-scripted HardSwish activation\"\"\"\n", - " @staticmethod\n", - " def forward(ctx, x):\n", - " ctx.save_for_backward(x)\n", - " return hard_swish_jit_fwd(x)\n", - "\n", - " @staticmethod\n", - " def backward(ctx, grad_output):\n", - " x = ctx.saved_tensors[0]\n", - " return hard_swish_jit_bwd(x, grad_output)\n", - "\n", - "\n", - "def hard_swish_me(x, inplace=False):\n", - " \"\"\"A memory efficient, jit-scripted HardSwish activation\"\"\"\n", - " return HardSwishJitAutoFn.apply(x)\n", - "\n", - "\n", - "class HardSwishMe(nn.Module):\n", - " \"\"\"A memory efficient, jit-scripted HardSwish activation\"\"\"\n", - " def __init__(self, inplace: bool = False):\n", - " super(HardSwishMe, self).__init__()\n", - "\n", - " def forward(self, x):\n", - " return HardSwishJitAutoFn.apply(x)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# HardMish" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#export\n", - "def hard_mish(x, inplace: bool = False):\n", - " \"\"\" Hard Mish\n", - " Experimental, based on notes by Mish author Diganta Misra at\n", - " https://github.com/digantamisra98/H-Mish/blob/0da20d4bc58e696b6803f2523c58d3c8a82782d0/README.md\n", - " \"\"\"\n", - " if inplace:\n", - " return x.mul_(0.5 * (x + 2).clamp(min=0, max=2))\n", - " else:\n", - " return 0.5 * x * (x + 2).clamp(min=0, max=2)\n", - "\n", - "\n", - "class HardMish(nn.Module):\n", - " \"\"\"Hard Mish, Experimental, based on notes by Mish author Diganta Misra at\n", - " https://github.com/digantamisra98/H-Mish/blob/0da20d4bc58e696b6803f2523c58d3c8a82782d0/README.md\"\"\"\n", - " def __init__(self, inplace: bool = False):\n", - " super(HardMish, self).__init__()\n", - " self.inplace = inplace\n", - "\n", - " def forward(self, x):\n", - " return hard_mish(x, self.inplace)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "metadata": {}, @@ -473,8 +215,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 7, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-11T16:20:43.826063Z", + "start_time": "2020-11-11T16:20:43.817902Z" + } + }, "outputs": [], "source": [ "#export\n", @@ -508,8 +255,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 8, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-11T16:20:46.737592Z", + "start_time": "2020-11-11T16:20:46.698069Z" + } + }, "outputs": [], "source": [ "#export\n", @@ -559,25 +311,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#hide\n", - "act_fn = Swish(inplace=True)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 10, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-11T16:20:52.980532Z", + "start_time": "2020-11-11T16:20:52.975600Z" + } + }, "outputs": [], "source": [ "#hide\n", @@ -595,8 +335,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 1, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-11T16:24:49.841271Z", + "start_time": "2020-11-11T16:24:49.167581Z" + } + }, "outputs": [ { "name": "stdout", @@ -633,6 +378,31 @@ "display_name": "Python 3", "language": "python", "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.7" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": true } }, "nbformat": 4, diff --git a/nbs/01_layers.ipynb b/nbs/01_layers.ipynb index fd04abc..9ef6e68 100644 --- a/nbs/01_layers.ipynb +++ b/nbs/01_layers.ipynb @@ -58,7 +58,7 @@ "#export\n", "import torch.nn as nn\n", "import torch\n", - "from torch.nn.utils import spectral_norm # weight_norm,\n", + "from torch.nn.utils import spectral_norm\n", "from collections import OrderedDict" ] }, @@ -80,7 +80,9 @@ " '''flat x to vector'''\n", " def __init__(self):\n", " super().__init__()\n", - " def forward(self, x): return x.view(x.size(0), -1)" + "\n", + " def forward(self, x):\n", + " return x.view(x.size(0), -1)" ] }, { @@ -97,7 +99,9 @@ "outputs": [], "source": [ "#export\n", - "def noop(x): return x\n", + "def noop(x):\n", + " return x\n", + "\n", "\n", "class Noop(nn.Module):\n", " '''Dummy module'''\n", @@ -124,22 +128,26 @@ "#export\n", "act_fn = nn.ReLU(inplace=True)\n", "\n", + "\n", "class ConvLayer(nn.Sequential):\n", " \"\"\"Basic conv layers block\"\"\"\n", " Conv2d = nn.Conv2d\n", + "\n", " def __init__(self, ni, nf, ks=3, stride=1,\n", - " act=True, act_fn=act_fn,\n", - " bn_layer=True, bn_1st=True, zero_bn=False,\n", - " padding=None, bias=False, groups=1, **kwargs):\n", + " act=True, act_fn=act_fn,\n", + " bn_layer=True, bn_1st=True, zero_bn=False,\n", + " padding=None, bias=False, groups=1, **kwargs):\n", "\n", - " if padding==None: padding = ks//2\n", + " if padding is None:\n", + " padding = ks // 2\n", " layers = [('conv', self.Conv2d(ni, nf, ks, stride=stride, padding=padding, bias=bias, groups=groups))]\n", " act_bn = [('act_fn', act_fn)] if act else []\n", " if bn_layer:\n", " bn = nn.BatchNorm2d(nf)\n", " nn.init.constant_(bn.weight, 0. if zero_bn else 1.)\n", " act_bn += [('bn', bn)]\n", - " if bn_1st: act_bn.reverse()\n", + " if bn_1st:\n", + " act_bn.reverse()\n", " layers += act_bn\n", " super().__init__(OrderedDict(layers))" ] @@ -296,37 +304,39 @@ "source": [ "#export\n", "# SA module from mxresnet at fastai. todo - add persons!!!\n", - "\n", - "#Unmodified from https://github.com/fastai/fastai/blob/5c51f9eabf76853a89a9bc5741804d2ed4407e49/fastai/layers.py\n", - "def conv1d(ni:int, no:int, ks:int=1, stride:int=1, padding:int=0, bias:bool=False):\n", + "# Unmodified from https://github.com/fastai/fastai/blob/5c51f9eabf76853a89a9bc5741804d2ed4407e49/fastai/layers.py\n", + "def conv1d(ni: int, no: int, ks: int = 1, stride: int = 1, padding: int = 0, bias: bool = False):\n", " \"Create and initialize a `nn.Conv1d` layer with spectral normalization.\"\n", " conv = nn.Conv1d(ni, no, ks, stride=stride, padding=padding, bias=bias)\n", " nn.init.kaiming_normal_(conv.weight)\n", - " if bias: conv.bias.data.zero_()\n", + " if bias:\n", + " conv.bias.data.zero_()\n", " return spectral_norm(conv)\n", "\n", "\n", - "# Adapted from SelfAttention layer at https://github.com/fastai/fastai/blob/5c51f9eabf76853a89a9bc5741804d2ed4407e49/fastai/layers.py\n", + "# Adapted from SelfAttention layer at\n", + "# https://github.com/fastai/fastai/blob/5c51f9eabf76853a89a9bc5741804d2ed4407e49/fastai/layers.py\n", "# Inspired by https://arxiv.org/pdf/1805.08318.pdf\n", "class SimpleSelfAttention(nn.Module):\n", - " def __init__(self, n_in:int, ks=1, sym=False):#, n_out:int):\n", + " def __init__(self, n_in: int, ks=1, sym=False): # , n_out:int):\n", " super().__init__()\n", - " self.conv = conv1d(n_in, n_in, ks, padding=ks//2, bias=False)\n", + " self.conv = conv1d(n_in, n_in, ks, padding=ks // 2, bias=False)\n", " self.gamma = nn.Parameter(torch.tensor([0.]))\n", " self.sym = sym\n", " self.n_in = n_in\n", - " def forward(self,x):\n", + "\n", + " def forward(self, x):\n", " if self.sym:\n", " # symmetry hack by https://github.com/mgrankin\n", - " c = self.conv.weight.view(self.n_in,self.n_in)\n", - " c = (c + c.t())/2\n", - " self.conv.weight = c.view(self.n_in,self.n_in,1)\n", + " c = self.conv.weight.view(self.n_in, self.n_in)\n", + " c = (c + c.t()) / 2\n", + " self.conv.weight = c.view(self.n_in, self.n_in, 1)\n", " size = x.size()\n", - " x = x.view(*size[:2],-1) # (C,N)\n", + " x = x.view(*size[:2], -1) # (C,N)\n", " # changed the order of mutiplication to avoid O(N^2) complexity\n", " # (x*xT)*(W*x) instead of (x*(xT*(W*x)))\n", " convx = self.conv(x) # (C,C) * (C,N) = (C,N) => O(NC^2)\n", - " xxT = torch.bmm(x,x.permute(0,2,1).contiguous()) # (C,N) * (N,C) = (C,C) => O(NC^2)\n", + " xxT = torch.bmm(x, x.permute(0, 2, 1).contiguous()) # (C,N) * (N,C) = (C,C) => O(NC^2)\n", " o = torch.bmm(xxT, convx) # (C,C) * (C,N) = (C,N) => O(NC^2)\n", " o = self.gamma * o + x\n", " return o.view(*size).contiguous()" @@ -350,17 +360,17 @@ " \"se block\"\n", " se_layer = nn.Linear\n", " act_fn = nn.ReLU(inplace=True)\n", + "\n", " def __init__(self, c, r=16):\n", " super().__init__()\n", " ch = c // r\n", " self.squeeze = nn.AdaptiveAvgPool2d(1)\n", " self.excitation = nn.Sequential(\n", - " OrderedDict([\n", - " ('fc_reduce', self.se_layer(c, ch, bias=False)),\n", - " ('se_act', self.act_fn),\n", - " ('fc_expand', self.se_layer(ch, c, bias=False)),\n", - " ('sigmoid', nn.Sigmoid())\n", - " ]))\n", + " OrderedDict([('fc_reduce', self.se_layer(c, ch, bias=False)),\n", + " ('se_act', self.act_fn),\n", + " ('fc_expand', self.se_layer(ch, c, bias=False)),\n", + " ('sigmoid', nn.Sigmoid())\n", + " ]))\n", "\n", " def forward(self, x):\n", " bs, c, _, _ = x.shape\n", @@ -448,18 +458,19 @@ " \"se block with conv on excitation\"\n", " se_layer = nn.Conv2d\n", " act_fn = nn.ReLU(inplace=True)\n", + "\n", " def __init__(self, c, r=16):\n", " super().__init__()\n", "# c_in = math.ceil(c//r/8)*8\n", - " c_in = c//r\n", - "\n", + " c_in = c // r\n", " self.squeeze = nn.AdaptiveAvgPool2d(1)\n", - " self.excitation = nn.Sequential(OrderedDict([\n", - " ('conv_reduce', self.se_layer(c, c_in, 1, bias=False)),\n", - " ('se_act', self.act_fn),\n", - " ('conv_expand', self.se_layer(c_in, c, 1, bias=False)),\n", - " ('sigmoid', nn.Sigmoid())\n", - " ]))\n", + " self.excitation = nn.Sequential(\n", + " OrderedDict([\n", + " ('conv_reduce', self.se_layer(c, c_in, 1, bias=False)),\n", + " ('se_act', self.act_fn),\n", + " ('conv_expand', self.se_layer(c_in, c, 1, bias=False)),\n", + " ('sigmoid', nn.Sigmoid())\n", + " ]))\n", "\n", " def forward(self, x):\n", " y = self.squeeze(x)\n", diff --git a/nbs/03_MXResNet.ipynb b/nbs/03_MXResNet.ipynb index 7910e53..5cfe5f1 100644 --- a/nbs/03_MXResNet.ipynb +++ b/nbs/03_MXResNet.ipynb @@ -2,29 +2,14 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": { - "hide_input": true + "ExecuteTime": { + "end_time": "2020-11-11T17:05:20.113995Z", + "start_time": "2020-11-11T17:05:20.110467Z" + } }, "outputs": [], - "source": [ - "from nbdev import *" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Cells will be exported to model_constructor.mxresnet,\n", - "unless a different module is specified after an export flag: `%nbdev_export special.module`\n" - ] - } - ], "source": [ "#default_exp mxresnet" ] @@ -47,28 +32,33 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 3, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-11T17:05:22.054950Z", + "start_time": "2020-11-11T17:05:21.717082Z" + } + }, "outputs": [], "source": [ "#hide\n", - "# from nbdev.showdoc import *\n", - "from fastcore.test import *" + "from fastcore.test import *\n", + "import torch\n" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 4, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-11T17:05:22.567153Z", + "start_time": "2020-11-11T17:05:22.542806Z" + } + }, "outputs": [], "source": [ "#export\n", - "import torch.nn as nn\n", - "import sys, torch\n", - "import os\n", "from functools import partial\n", - "from collections import OrderedDict\n", - "from model_constructor.layers import *\n", "from model_constructor.net import Net\n", "from model_constructor.activations import Mish" ] @@ -82,8 +72,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 5, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-11T17:05:23.723171Z", + "start_time": "2020-11-11T17:05:23.718256Z" + } + }, "outputs": [], "source": [ "mxresnet = Net(stem_sizes = [3, 32, 64, 64], name='MXResNet', act_fn=Mish())" @@ -91,19 +86,27 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 6, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-11T17:05:24.482988Z", + "start_time": "2020-11-11T17:05:24.471791Z" + } + }, "outputs": [ { "data": { "text/plain": [ "MXResNet constructor\n", - " expansion: 1, sa: 0, groups: 1\n", - " stem sizes: [3, 32, 64, 64]\n", - " body sizes [64, 64, 128, 256, 512]" + " c_in: 3, c_out: 1000\n", + " expansion: 1, groups: 1, dw: False\n", + " sa: False, se: False\n", + " stem sizes: [3, 32, 64, 64], stide on 0\n", + " body sizes [64, 128, 256, 512]\n", + " layers: [2, 2, 2, 2]" ] }, - "execution_count": null, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -114,8 +117,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 7, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-11T17:05:24.801976Z", + "start_time": "2020-11-11T17:05:24.789771Z" + } + }, "outputs": [ { "data": { @@ -123,7 +131,7 @@ "([64, 64, 128, 256, 512], [2, 2, 2, 2])" ] }, - "execution_count": null, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -134,8 +142,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 8, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-11T17:05:25.341362Z", + "start_time": "2020-11-11T17:05:25.329372Z" + } + }, "outputs": [ { "data": { @@ -160,7 +173,7 @@ ")" ] }, - "execution_count": null, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -172,8 +185,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 9, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-11T17:05:25.914806Z", + "start_time": "2020-11-11T17:05:25.711184Z" + } + }, "outputs": [ { "name": "stdout", @@ -194,8 +212,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 10, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-11T17:05:26.270885Z", + "start_time": "2020-11-11T17:05:26.133631Z" + } + }, "outputs": [ { "data": { @@ -339,7 +362,7 @@ ")" ] }, - "execution_count": null, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -351,8 +374,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 11, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-11T17:05:26.711041Z", + "start_time": "2020-11-11T17:05:26.633012Z" + } + }, "outputs": [ { "data": { @@ -496,7 +524,7 @@ ")" ] }, - "execution_count": null, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -508,8 +536,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 12, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-11T17:05:27.558832Z", + "start_time": "2020-11-11T17:05:27.549740Z" + } + }, "outputs": [ { "data": { @@ -521,7 +554,7 @@ ")" ] }, - "execution_count": null, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -533,8 +566,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 13, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-11T17:05:28.582657Z", + "start_time": "2020-11-11T17:05:28.028508Z" + } + }, "outputs": [ { "name": "stdout", @@ -555,20 +593,30 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 14, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-11T17:05:28.587488Z", + "start_time": "2020-11-11T17:05:28.584077Z" + } + }, "outputs": [], "source": [ "#export\n", "mxresnet_parameters = {'stem_sizes': [3, 32, 64, 64], 'act_fn': Mish()}\n", - "mxresnet34 = partial(Net, name='MXResnet32', expansion=1, layers=[3, 4, 6, 3], **mxresnet_parameters)\n", - "mxresnet50 = partial(Net, name='MXResnet50', expansion=4, layers=[3, 4, 6, 3], **mxresnet_parameters)" + "mxresnet34 = partial(Net, name='MXResnet32', expansion=1, layers=[3, 4, 6, 3], **mxresnet_parameters)\n", + "mxresnet50 = partial(Net, name='MXResnet50', expansion=4, layers=[3, 4, 6, 3], **mxresnet_parameters)" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 15, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-11T17:05:30.655698Z", + "start_time": "2020-11-11T17:05:30.650994Z" + } + }, "outputs": [], "source": [ "model = mxresnet50(c_out=10)" @@ -576,19 +624,27 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 16, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-11T17:05:32.842793Z", + "start_time": "2020-11-11T17:05:32.837170Z" + } + }, "outputs": [ { "data": { "text/plain": [ "MXResnet50 constructor\n", - " expansion: 4, sa: 0, groups: 1\n", - " stem sizes: [3, 32, 64, 64]\n", - " body sizes [16, 64, 128, 256, 512]" + " c_in: 3, c_out: 10\n", + " expansion: 4, groups: 1, dw: False\n", + " sa: False, se: False\n", + " stem sizes: [3, 32, 64, 64], stide on 0\n", + " body sizes [64, 128, 256, 512]\n", + " layers: [3, 4, 6, 3]" ] }, - "execution_count": null, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -599,8 +655,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 17, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-11T17:05:33.206370Z", + "start_time": "2020-11-11T17:05:33.194473Z" + } + }, "outputs": [ { "data": { @@ -608,7 +669,7 @@ "(10, [3, 4, 6, 3])" ] }, - "execution_count": null, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -628,20 +689,26 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 20, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-11T17:07:53.000101Z", + "start_time": "2020-11-11T17:07:52.520078Z" + } + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Converted 00_constructor.ipynb.\n", + "Converted 00_Net.ipynb.\n", + "Converted 01_activations.ipynb.\n", "Converted 01_layers.ipynb.\n", - "Converted 02_resnet.ipynb.\n", - "Converted 03_xresnet.ipynb.\n", - "Converted 04_Net.ipynb.\n", + "Converted 03_MXResNet.ipynb.\n", + "Converted 04_YaResNet.ipynb.\n", "Converted 05_Twist.ipynb.\n", - "Converted 06_YaResNet.ipynb.\n", + "Converted 10_base_constructor.ipynb.\n", + "Converted 11_xresnet.ipynb.\n", "Converted index.ipynb.\n" ] } @@ -665,6 +732,31 @@ "display_name": "Python 3", "language": "python", "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.7" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": true } }, "nbformat": 4, diff --git a/nbs/10_base_constructor.ipynb b/nbs/10_base_constructor.ipynb index c85ee82..011659f 100644 --- a/nbs/10_base_constructor.ipynb +++ b/nbs/10_base_constructor.ipynb @@ -1,16 +1,5 @@ { "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "hide_input": true - }, - "outputs": [], - "source": [ - "from nbdev import *" - ] - }, { "cell_type": "code", "execution_count": null, @@ -40,36 +29,51 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 1, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T06:30:39.411413Z", + "start_time": "2020-11-12T06:30:38.396874Z" + } + }, "outputs": [], "source": [ "#hide\n", "from nbdev.showdoc import *\n", - "from fastcore.test import *" + "from fastcore.test import *\n", + "import torch\n" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 2, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T06:31:40.474152Z", + "start_time": "2020-11-12T06:31:40.458394Z" + } + }, "outputs": [], "source": [ "#export\n", "import torch.nn as nn\n", - "import torch\n", "from collections import OrderedDict\n", - "from model_constructor.layers import ConvLayer, Noop, Flatten" + "from model_constructor.layers import ConvLayer, Noop, Flatten\n" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 3, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T06:31:42.115714Z", + "start_time": "2020-11-12T06:31:42.110964Z" + } + }, "outputs": [], "source": [ "#export\n", - "act_fn = nn.ReLU(inplace=True)" + "act_fn = nn.ReLU(inplace=True)\n" ] }, { @@ -81,35 +85,51 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 4, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T06:31:57.186999Z", + "start_time": "2020-11-12T06:31:57.175738Z" + } + }, "outputs": [], "source": [ "#export\n", "class Stem(nn.Sequential):\n", " \"\"\"Base stem\"\"\"\n", - " def __init__(self, c_in=3, stem_sizes=[], stem_out=64,\n", - " conv_layer=ConvLayer, stride_on=0,\n", - " stem_bn_last=False, bn_1st=True,\n", - " stem_use_pool=True, stem_pool=nn.MaxPool2d(kernel_size=3, stride=2, padding=1), **kwargs):\n", + "\n", + " def __init__(self, c_in=3, stem_sizes=[], stem_out=64,\n", + " conv_layer=ConvLayer, stride_on=0,\n", + " stem_bn_last=False, bn_1st=True,\n", + " stem_use_pool=True, stem_pool=nn.MaxPool2d(kernel_size=3, stride=2, padding=1),\n", + " **kwargs\n", + " ):\n", " self.sizes = [c_in] + stem_sizes + [stem_out]\n", - " num_layers = len(self.sizes)-1\n", - " stem = [(f\"conv_{i}\", conv_layer(self.sizes[i], self.sizes[i+1],\n", - " stride=2 if i==stride_on else 1, act=True,\n", - " bn_layer=not stem_bn_last if i==num_layers-1 else True,\n", + " num_layers = len(self.sizes) - 1\n", + " stem = [(f\"conv_{i}\", conv_layer(self.sizes[i], self.sizes[i + 1],\n", + " stride=2 if i == stride_on else 1, act=True,\n", + " bn_layer=not stem_bn_last if i == num_layers - 1 else True,\n", " bn_1st=bn_1st, **kwargs))\n", - " for i in range(num_layers)]\n", - " if stem_use_pool: stem += [('pool', stem_pool)]\n", - " if stem_bn_last: stem.append(('bn', nn.BatchNorm2d(stem_out)))\n", + " for i in range(num_layers)]\n", + " if stem_use_pool:\n", + " stem += [('pool', stem_pool)]\n", + " if stem_bn_last:\n", + " stem.append(('bn', nn.BatchNorm2d(stem_out)))\n", " super().__init__(OrderedDict(stem))\n", + "\n", " def extra_repr(self):\n", - " return f\"sizes: {self.sizes}\"" + " return f\"sizes: {self.sizes}\"\n" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 5, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T06:31:58.643757Z", + "start_time": "2020-11-12T06:31:58.624038Z" + } + }, "outputs": [ { "data": { @@ -125,7 +145,7 @@ ")" ] }, - "execution_count": null, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -137,8 +157,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 6, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T06:32:01.418364Z", + "start_time": "2020-11-12T06:32:01.383260Z" + } + }, "outputs": [ { "data": { @@ -146,7 +171,7 @@ "torch.Size([8, 64, 32, 32])" ] }, - "execution_count": null, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -159,8 +184,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 7, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T06:32:02.220418Z", + "start_time": "2020-11-12T06:32:02.213909Z" + } + }, "outputs": [ { "data": { @@ -176,7 +206,7 @@ ")" ] }, - "execution_count": null, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -188,8 +218,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 8, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T06:32:03.112277Z", + "start_time": "2020-11-12T06:32:03.078549Z" + } + }, "outputs": [ { "data": { @@ -197,7 +232,7 @@ "torch.Size([8, 64, 32, 32])" ] }, - "execution_count": null, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -210,8 +245,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 9, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T06:32:05.456464Z", + "start_time": "2020-11-12T06:32:05.447631Z" + } + }, "outputs": [ { "data": { @@ -232,7 +272,7 @@ ")" ] }, - "execution_count": null, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -244,8 +284,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 10, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T06:32:07.526283Z", + "start_time": "2020-11-12T06:32:07.463289Z" + } + }, "outputs": [ { "data": { @@ -253,7 +298,7 @@ "torch.Size([8, 64, 32, 32])" ] }, - "execution_count": null, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -266,8 +311,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 11, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T06:32:09.662366Z", + "start_time": "2020-11-12T06:32:09.657136Z" + } + }, "outputs": [], "source": [ "assert y.shape, torch.Size([64, 64, 32, 32])" @@ -275,8 +325,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 12, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T06:32:14.897155Z", + "start_time": "2020-11-12T06:32:14.888814Z" + } + }, "outputs": [ { "data": { @@ -297,7 +352,7 @@ ")" ] }, - "execution_count": null, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -323,32 +378,42 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 14, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T06:32:26.721501Z", + "start_time": "2020-11-12T06:32:26.716463Z" + } + }, "outputs": [], "source": [ "#export\n", "def DownsampleBlock(conv_layer, ni, nf, ks, stride, act=False, **kwargs):\n", " '''Base downsample for res-like blocks'''\n", - " return conv_layer(ni, nf, ks, stride, act, **kwargs)" + " return conv_layer(ni, nf, ks, stride, act, **kwargs)\n" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 15, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T06:32:36.693247Z", + "start_time": "2020-11-12T06:32:36.684089Z" + } + }, "outputs": [], "source": [ "#export\n", "class BasicBlock(nn.Module):\n", " \"\"\"Basic block (simplified) as in pytorch resnet\"\"\"\n", - " def __init__(self, ni, nf, expansion=1, stride=1, zero_bn=False,\n", - " conv_layer=ConvLayer, act_fn=act_fn,\n", - " downsample_block=DownsampleBlock, **kwargs):\n", + " def __init__(self, ni, nf, expansion=1, stride=1, zero_bn=False,\n", + " conv_layer=ConvLayer, act_fn=act_fn,\n", + " downsample_block=DownsampleBlock, **kwargs):\n", " super().__init__()\n", - " self.downsample = not ni==nf or stride==2\n", + " self.downsample = not ni == nf or stride == 2\n", " self.conv = nn.Sequential(OrderedDict([\n", - " ('conv_0', conv_layer(ni, nf, stride=stride, act_fn=act_fn, **kwargs)),\n", + " ('conv_0', conv_layer(ni, nf, stride=stride, act_fn=act_fn, **kwargs)),\n", " ('conv_1', conv_layer(nf, nf, zero_bn=zero_bn, act=False, act_fn=act_fn, **kwargs))]))\n", " if self.downsample:\n", " self.downsample = downsample_block(conv_layer, ni, nf, ks=1, stride=stride, act=False, **kwargs)\n", @@ -360,32 +425,37 @@ " out = self.conv(x)\n", " if self.downsample:\n", " identity = self.downsample(x)\n", - " return self.act_conn(self.merge(out + identity))" + " return self.act_conn(self.merge(out + identity))\n" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 16, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T06:32:41.889641Z", + "start_time": "2020-11-12T06:32:41.879334Z" + } + }, "outputs": [], "source": [ "#export\n", "class Bottleneck(nn.Module):\n", " '''Bottlneck block for resnet models'''\n", " def __init__(self, ni, nh, expansion=4, stride=1, zero_bn=False,\n", - " conv_layer=ConvLayer, act_fn=act_fn,\n", + " conv_layer=ConvLayer, act_fn=act_fn,\n", " downsample_block=DownsampleBlock, **kwargs):\n", - "# groups=1, base_width=64, dilation=1, norm_layer=None\n", " super().__init__()\n", - " self.downsample = not ni==nh or stride==2\n", - " ni = ni*expansion\n", - " nf = nh*expansion\n", + " self.downsample = not ni == nh or stride == 2\n", + " ni = ni * expansion\n", + " nf = nh * expansion\n", " self.conv = nn.Sequential(OrderedDict([\n", - " ('conv_0', conv_layer(ni, nh, ks=1, act_fn=act_fn, **kwargs)),\n", - " ('conv_1', conv_layer(nh, nh, stride=stride, act_fn=act_fn, **kwargs)),\n", + " ('conv_0', conv_layer(ni, nh, ks=1, act_fn=act_fn, **kwargs)), # noqa: E241\n", + " ('conv_1', conv_layer(nh, nh, stride=stride, act_fn=act_fn, **kwargs)), # noqa: E241\n", " ('conv_2', conv_layer(nh, nf, ks=1, zero_bn=zero_bn, act=False, act_fn=act_fn, **kwargs))]))\n", " if self.downsample:\n", - " self.downsample = downsample_block(conv_layer, ni, nf, ks=1, stride=stride, act=False, act_fn=act_fn, **kwargs)\n", + " self.downsample = downsample_block(conv_layer, ni, nf, ks=1, \n", + " stride=stride, act=False, act_fn=act_fn, **kwargs)\n", " self.merge = Noop()\n", " self.act_conn = act_fn\n", "\n", @@ -394,31 +464,37 @@ " out = self.conv(x)\n", " if self.downsample:\n", " identity = self.downsample(x)\n", - " return self.act_conn(self.merge(out + identity))" + " return self.act_conn(self.merge(out + identity))\n" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 17, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T06:32:47.811518Z", + "start_time": "2020-11-12T06:32:47.804120Z" + } + }, "outputs": [], "source": [ "#export\n", "class BasicLayer(nn.Sequential):\n", " '''Layer from blocks'''\n", - " def __init__(self, block, blocks, ni, nf, expansion, stride, sa=False,**kwargs):\n", + " def __init__(self, block, blocks, ni, nf, expansion, stride, sa=False, **kwargs):\n", " self.ni = ni\n", " self.nf = nf\n", " self.blocks = blocks\n", " self.expansion = expansion\n", " super().__init__(OrderedDict(\n", - " [(f'block_{i}', block(ni if i==0 else nf, nf, expansion,\n", - " stride if i==0 else 1,\n", - " sa=sa if i==blocks-1 else False,\n", + " [(f'block_{i}', block(ni if i == 0 else nf, nf, expansion,\n", + " stride if i == 0 else 1,\n", + " sa=sa if i == blocks - 1 else False,\n", " **kwargs))\n", - " for i in range(blocks)]))\n", + " for i in range(blocks)]))\n", + "\n", " def extra_repr(self):\n", - " return f'from {self.ni*self.expansion} to {self.nf}, {self.blocks} blocks, expansion {self.expansion}.'" + " return f'from {self.ni * self.expansion} to {self.nf}, {self.blocks} blocks, expansion {self.expansion}.'\n" ] }, { @@ -430,8 +506,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 18, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T06:32:52.952888Z", + "start_time": "2020-11-12T06:32:52.945018Z" + } + }, "outputs": [], "source": [ "#export\n", @@ -440,23 +521,28 @@ " def __init__(self, block,\n", " body_in=64, body_out=512,\n", " bodylayer=BasicLayer, expansion=1,\n", - " layer_szs=[64,128,256,], blocks=[2,2,2,2],\n", + " layer_szs=[64, 128, 256, ], blocks=[2, 2, 2, 2],\n", " sa=False, **kwargs):\n", - " layer_szs = [body_in//expansion] + layer_szs + [body_out]\n", - " num_layers = len(layer_szs)-1\n", + " layer_szs = [body_in // expansion] + layer_szs + [body_out]\n", + " num_layers = len(layer_szs) - 1\n", " layers = [(f\"layer_{i}\", bodylayer(block, blocks[i],\n", - " layer_szs[i], layer_szs[i+1], expansion,\n", - " 1 if i==0 else 2,\n", - " sa=sa if i==0 else False,\n", + " layer_szs[i], layer_szs[i + 1], expansion,\n", + " stride=1 if i == 0 else 2,\n", + " sa=sa if i == 0 else False,\n", " **kwargs))\n", - " for i in range(num_layers)]\n", - " super().__init__(OrderedDict(layers))" + " for i in range(num_layers)]\n", + " super().__init__(OrderedDict(layers))\n" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 19, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T06:32:53.745373Z", + "start_time": "2020-11-12T06:32:53.672090Z" + } + }, "outputs": [ { "data": { @@ -609,7 +695,7 @@ ")" ] }, - "execution_count": null, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -621,8 +707,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 20, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T06:32:56.260747Z", + "start_time": "2020-11-12T06:32:56.184140Z" + } + }, "outputs": [ { "data": { @@ -775,7 +866,7 @@ ")" ] }, - "execution_count": null, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -788,8 +879,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 21, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T06:32:57.657647Z", + "start_time": "2020-11-12T06:32:57.544627Z" + } + }, "outputs": [ { "name": "stdout", @@ -809,8 +905,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 22, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T06:32:59.370043Z", + "start_time": "2020-11-12T06:32:59.303879Z" + } + }, "outputs": [ { "data": { @@ -850,7 +951,7 @@ ")" ] }, - "execution_count": null, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } @@ -863,8 +964,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 23, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T06:33:01.422419Z", + "start_time": "2020-11-12T06:33:01.279749Z" + } + }, "outputs": [ { "name": "stdout", @@ -892,8 +998,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 25, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T06:33:11.171678Z", + "start_time": "2020-11-12T06:33:11.167517Z" + } + }, "outputs": [], "source": [ "#export\n", @@ -903,14 +1014,19 @@ " super().__init__(OrderedDict(\n", " [('pool', nn.AdaptiveAvgPool2d((1, 1))),\n", " ('flat', Flatten()),\n", - " ('fc', nn.Linear(ni, nf)),\n", - " ]))" + " ('fc', nn.Linear(ni, nf)),\n", + " ]))\n" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 26, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T06:33:11.594299Z", + "start_time": "2020-11-12T06:33:11.590324Z" + } + }, "outputs": [], "source": [ "head = Head(512, 10)" @@ -918,8 +1034,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 27, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T06:33:12.744267Z", + "start_time": "2020-11-12T06:33:12.732280Z" + } + }, "outputs": [ { "data": { @@ -927,7 +1048,7 @@ "torch.Size([8, 10])" ] }, - "execution_count": null, + "execution_count": 27, "metadata": {}, "output_type": "execute_result" } @@ -939,13 +1060,6 @@ "y.shape" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "metadata": {}, @@ -955,50 +1069,64 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 28, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T06:33:21.010150Z", + "start_time": "2020-11-12T06:33:21.005904Z" + } + }, "outputs": [], "source": [ "#export\n", "def init_model(model, nonlinearity='leaky_relu'):\n", " '''Init model'''\n", " for m in model.modules():\n", - "# if isinstance(m, nn.Conv2d):\n", - " if isinstance(m, (nn.Conv2d, nn.Linear)):\n", - " nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity=nonlinearity)" + " if isinstance(m, (nn.Conv2d, nn.Linear)):\n", + " nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity=nonlinearity)\n" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 29, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T06:33:33.550879Z", + "start_time": "2020-11-12T06:33:33.542345Z" + } + }, "outputs": [], "source": [ "#export\n", "class Net(nn.Sequential):\n", " '''Constructor for model'''\n", - " def __init__(self, stem=Stem,\n", + " def __init__(self, stem=Stem,\n", " body=Body, block=BasicBlock, sa=False,\n", - " layer_szs=[64,128,256,], blocks=[2,2,2,2],\n", + " layer_szs=[64, 128, 256, ], blocks=[2, 2, 2, 2],\n", " head=Head,\n", - " c_in=3, num_classes=1000,\n", + " c_in=3, num_classes=1000,\n", " body_in=64, body_out=512, expansion=1,\n", - " init_fn=init_model, **kwargs):\n", - " self.init_model=init_fn\n", - " super().__init__(OrderedDict([\n", - " ('stem', stem(c_in=c_in,stem_out=body_in, **kwargs)),\n", - " ('body', body(block, body_in, body_out,\n", - " layer_szs=layer_szs, blocks=blocks, expansion=expansion,\n", - " sa=sa, **kwargs)),\n", - " ('head', head(body_out*expansion, num_classes, **kwargs))\n", - " ]))\n", - " self.init_model(self)" + " init_fn=init_model, **kwargs):\n", + " self.init_model = init_fn\n", + " super().__init__(OrderedDict(\n", + " [('stem', stem(c_in=c_in, stem_out=body_in, **kwargs)),\n", + " ('body', body(block, body_in, body_out,\n", + " layer_szs=layer_szs, blocks=blocks, expansion=expansion,\n", + " sa=sa, **kwargs)),\n", + " ('head', head(body_out * expansion, num_classes, **kwargs))\n", + " ]))\n", + " self.init_model(self)\n" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 30, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T06:33:36.506266Z", + "start_time": "2020-11-12T06:33:36.348713Z" + } + }, "outputs": [], "source": [ "model = Net()" @@ -1006,8 +1134,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 31, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T06:33:36.827690Z", + "start_time": "2020-11-12T06:33:36.806284Z" + } + }, "outputs": [ { "data": { @@ -1176,7 +1309,7 @@ ")" ] }, - "execution_count": null, + "execution_count": 31, "metadata": {}, "output_type": "execute_result" } @@ -1187,8 +1320,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 32, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T06:33:38.249477Z", + "start_time": "2020-11-12T06:33:38.019788Z" + } + }, "outputs": [ { "data": { @@ -1196,7 +1334,7 @@ "torch.Size([16, 1000])" ] }, - "execution_count": null, + "execution_count": 32, "metadata": {}, "output_type": "execute_result" } @@ -1210,8 +1348,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 33, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T06:33:39.362878Z", + "start_time": "2020-11-12T06:33:39.195752Z" + } + }, "outputs": [ { "data": { @@ -1251,7 +1394,7 @@ ")" ] }, - "execution_count": null, + "execution_count": 33, "metadata": {}, "output_type": "execute_result" } @@ -1264,8 +1407,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 34, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T06:33:40.223688Z", + "start_time": "2020-11-12T06:33:40.091324Z" + } + }, "outputs": [ { "name": "stdout", @@ -1286,8 +1434,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 35, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T06:33:40.803038Z", + "start_time": "2020-11-12T06:33:40.591347Z" + } + }, "outputs": [ { "data": { @@ -1456,7 +1609,7 @@ ")" ] }, - "execution_count": null, + "execution_count": 35, "metadata": {}, "output_type": "execute_result" } @@ -1469,8 +1622,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 36, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T06:33:41.679937Z", + "start_time": "2020-11-12T06:33:41.562299Z" + } + }, "outputs": [ { "name": "stdout", @@ -1499,8 +1657,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 1, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T06:58:53.867375Z", + "start_time": "2020-11-12T06:58:53.190412Z" + } + }, "outputs": [ { "name": "stdout", @@ -1512,9 +1675,8 @@ "Converted 03_MXResNet.ipynb.\n", "Converted 04_YaResNet.ipynb.\n", "Converted 05_Twist.ipynb.\n", - "Converted 10_constructor.ipynb.\n", - "Converted 11_resnet.ipynb.\n", - "Converted 12_xresnet.ipynb.\n", + "Converted 10_base_constructor.ipynb.\n", + "Converted 11_xresnet.ipynb.\n", "Converted index.ipynb.\n" ] } @@ -1552,6 +1714,31 @@ "display_name": "Python 3", "language": "python", "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.7" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": true } }, "nbformat": 4, diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..2fb20a2 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,9 @@ +[flake8] +max-line-length = 120 +exclude = .tox,*.egg,build,temp +select = E,W,F +doctests = True +verbose = 2 +# https://pep8.readthedocs.io/en/latest/intro.html#error-codes +format = pylint +# ignore = \ No newline at end of file From 9bd8b96172e9e46d0867a98c6032a2453a71fa7b Mon Sep 17 00:00:00 2001 From: ayasyrev Date: Thu, 12 Nov 2020 08:36:49 +0000 Subject: [PATCH 3/3] Clear code, remove nbdev %magic Fixes #2 --- README.md | 64 +-- docs/MXResNet.html | 25 +- docs/Net.html | 89 ++- docs/Twist.html | 62 +- docs/YaResNet.html | 23 +- docs/activations.html | 692 ++-------------------- docs/base_constructor.html | 93 ++- docs/layers.html | 95 ++- docs/xresnet.html | 43 +- model_constructor/_nbdev.py | 4 +- model_constructor/base_constructor.py | 12 +- model_constructor/mxresnet.py | 3 +- model_constructor/net.py | 12 +- model_constructor/twist.py | 133 ++--- model_constructor/xresnet.py | 40 +- model_constructor/yaresnet.py | 46 +- nbs/00_Net.ipynb | 65 ++- nbs/01_activations.ipynb | 11 +- nbs/03_MXResNet.ipynb | 5 +- nbs/04_YaResNet.ipynb | 103 ++-- nbs/05_Twist.ipynb | 798 ++++++++++++++++++-------- nbs/10_base_constructor.ipynb | 24 +- nbs/11_xresnet.ipynb | 251 +++++--- setup.cfg | 2 +- setup.py | 51 +- 25 files changed, 1291 insertions(+), 1455 deletions(-) diff --git a/README.md b/README.md index 117a110..e91e829 100644 --- a/README.md +++ b/README.md @@ -21,15 +21,15 @@ First import constructor class, then create model constructor oject. Now you can change every part of model. -```python +``` from model_constructor.net import * ``` -```python +``` model = Net() ``` -```python +``` model ``` @@ -48,7 +48,7 @@ model Now we have model consructor, default setting as xresnet18. And we can get model after call it. -```python +``` model.c_in ``` @@ -59,7 +59,7 @@ model.c_in -```python +``` model.c_out ``` @@ -70,7 +70,7 @@ model.c_out -```python +``` model.stem_sizes ``` @@ -81,7 +81,7 @@ model.stem_sizes -```python +``` model.layers ``` @@ -92,7 +92,7 @@ model.layers -```python +``` model.expansion ``` @@ -103,8 +103,8 @@ model.expansion -```python -%nbdev_collapse_output +``` +#collapse_output model() ```
@@ -284,15 +284,15 @@ model() If you want to change model, just change constructor parameters. Lets create xresnet50. -```python +``` model.expansion = 4 model.layers = [3,4,6,3] ``` Now we can look at model body and if we call constructor - we have pytorch model! -```python -%nbdev_collapse_output +``` +#collapse_output model.body ```
@@ -649,13 +649,13 @@ But now lets create model as mxresnet50 from fastai forums tread https://forums. Lets create mxresnet constructor. -```python +``` model = Net(name='MxResNet') ``` Then lets modify stem. -```python +``` model.stem_sizes = [3,32,64,64] ``` @@ -663,15 +663,15 @@ Now lets change activation function to Mish. Here is link to forum disscussion https://forums.fast.ai/t/meet-mish-new-activation-function-possible-successor-to-relu Mish is in model_constructor.activations -```python +``` from model_constructor.activations import Mish ``` -```python +``` model.act_fn = Mish() ``` -```python +``` model ``` @@ -688,8 +688,8 @@ model -```python -%nbdev_collapse_output +``` +#collapse_output model() ```
@@ -870,7 +870,7 @@ model() Now lets make MxResNet50 -```python +``` model.expansion = 4 model.layers = [3,4,6,3] model.name = 'mxresnet50' @@ -880,7 +880,7 @@ Now we have mxresnet50 constructor. We can inspect every parts of it. And after call it we got model. -```python +``` model ``` @@ -897,8 +897,8 @@ model -```python -%nbdev_collapse_output +``` +#collapse_output model.stem.conv_1 ```
@@ -917,8 +917,8 @@ model.stem.conv_1
-```python -%nbdev_collapse_output +``` +#collapse_output model.body.l_0.bl_0 ```
@@ -959,18 +959,18 @@ model.body.l_0.bl_0 Now lets change Resblock to YaResBlock (Yet another ResNet, former NewResBlock) is in lib from version 0.1.0 -```python +``` from model_constructor.yaresnet import YaResBlock ``` -```python +``` model.block = YaResBlock ``` That all. Now we have YaResNet constructor -```python -%nbdev_collapse_output +``` +#collapse_output model.name = 'YaResNet' model ``` @@ -994,8 +994,8 @@ model Let see what we have. -```python -%nbdev_collapse_output +``` +#collapse_output model.body.l_1.bl_0 ```
diff --git a/docs/MXResNet.html b/docs/MXResNet.html index 2aed8f5..75a31fa 100644 --- a/docs/MXResNet.html +++ b/docs/MXResNet.html @@ -26,13 +26,6 @@
-
- {% endraw %} - - {% raw %} - -
-
{% endraw %} @@ -96,9 +89,12 @@

MXResNet constructor.
MXResNet constructor
- expansion: 1, sa: 0, groups: 1
- stem sizes: [3, 32, 64, 64]
- body sizes [64, 64, 128, 256, 512]
+ c_in: 3, c_out: 1000 + expansion: 1, groups: 1, dw: False + sa: False, se: False + stem sizes: [3, 32, 64, 64], stide on 0 + body sizes [64, 128, 256, 512] + layers: [2, 2, 2, 2] @@ -630,9 +626,12 @@

MXResNet constructor.
MXResnet50 constructor
- expansion: 4, sa: 0, groups: 1
- stem sizes: [3, 32, 64, 64]
- body sizes [16, 64, 128, 256, 512]
+ c_in: 3, c_out: 10 + expansion: 4, groups: 1, dw: False + sa: False, se: False + stem sizes: [3, 32, 64, 64], stide on 0 + body sizes [64, 128, 256, 512] + layers: [3, 4, 6, 3] diff --git a/docs/Net.html b/docs/Net.html index 4535bc6..81a2910 100644 --- a/docs/Net.html +++ b/docs/Net.html @@ -26,20 +26,6 @@
-
- {% endraw %} - - {% raw %} - -
- -
- {% endraw %} - - {% raw %} - -
-
{% endraw %} @@ -50,7 +36,7 @@
-
from dataclasses import dataclass, asdict
+
from nbdev import docs, showdoc, show_doc
 
@@ -60,12 +46,6 @@
{% endraw %} -
{% raw %}
@@ -73,6 +53,12 @@

Utils

{% endraw %} +
+
+

Utils

+
+
+
{% raw %}
@@ -84,7 +70,7 @@

Utils

-

init_cnn[source]

init_cnn(m)

+

init_cnn[source]

init_cnn(module:Module)

@@ -97,12 +83,6 @@

init_cnn -

{% raw %}
@@ -110,6 +90,12 @@

ResBlock

{% endraw %} +
+
+

ResBlock

+
+
+
{% raw %}
@@ -121,7 +107,7 @@

ResBlock

-

class ResBlock[source]

ResBlock(expansion, ni, nh, stride=1, conv_layer=ConvLayer, act_fn=ReLU(inplace=True), zero_bn=True, bn_1st=True, pool=AvgPool2d(kernel_size=2, stride=2, padding=0), sa=False, sym=False, se=False, se_reduction=16, groups=1, dw=False) :: Module

+

class ResBlock[source]

ResBlock(expansion, ni, nh, stride=1, conv_layer=ConvLayer, act_fn=ReLU(inplace=True), zero_bn=True, bn_1st=True, pool=AvgPool2d(kernel_size=2, stride=2, padding=0), sa=False, sym=False, se=False, se_reduction=16, groups=1, dw=False) :: Module

Resnet block

@@ -132,6 +118,13 @@

class ResBlock

+
+ {% endraw %} + + {% raw %} + +
+
{% endraw %} @@ -424,13 +417,6 @@

NewResBlock
-
- {% endraw %} - - {% raw %} - -
-
@@ -438,7 +424,7 @@

NewResBlock
-

class NewResBlock[source]

NewResBlock(expansion, ni, nh, stride=1, conv_layer=ConvLayer, act_fn=ReLU(inplace=True), zero_bn=True, bn_1st=True, pool=AvgPool2d(kernel_size=2, stride=2, padding=0), sa=False, sym=False, se=False, se_reduction=16, groups=1, dw=False) :: Module

+

class NewResBlock[source]

NewResBlock(expansion, ni, nh, stride=1, conv_layer=ConvLayer, act_fn=ReLU(inplace=True), zero_bn=True, bn_1st=True, pool=AvgPool2d(kernel_size=2, stride=2, padding=0), sa=False, sym=False, se=False, se_reduction=16, groups=1, dw=False) :: Module

YaResnet block

@@ -449,6 +435,13 @@

class NewResBlock

+

+ {% endraw %} + + {% raw %} + +
+
{% endraw %} @@ -551,13 +544,6 @@

Net - Model Constructor. -

- {% endraw %} - - {% raw %} - -
-
@@ -565,7 +551,7 @@

Net - Model Constructor. -

class Net[source]

Net(name='Net', c_in=3, c_out=1000, block=ResBlock, conv_layer=ConvLayer, block_sizes=[64, 128, 256, 512], layers=[2, 2, 2, 2], norm=BatchNorm2d, act_fn=ReLU(inplace=True), pool=AvgPool2d(kernel_size=2, stride=2, padding=0), expansion=1, groups=1, dw=False, sa=False, se=False, se_reduction=16, bn_1st=True, zero_bn=True, stem_stride_on=0, stem_sizes=[32, 32, 64], stem_pool=MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False), stem_bn_end=False, _init_cnn=init_cnn, _make_stem=_make_stem, _make_layer=_make_layer, _make_body=_make_body, _make_head=_make_head)

+

class Net[source]

Net(name='Net', c_in=3, c_out=1000, block=ResBlock, conv_layer=ConvLayer, block_sizes=[64, 128, 256, 512], layers=[2, 2, 2, 2], norm=BatchNorm2d, act_fn=ReLU(inplace=True), pool=AvgPool2d(kernel_size=2, stride=2, padding=0), expansion=1, groups=1, dw=False, sa=False, se=False, se_reduction=16, bn_1st=True, zero_bn=True, stem_stride_on=0, stem_sizes=[32, 32, 64], stem_pool=MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False), stem_bn_end=False, _init_cnn=init_cnn, _make_stem=_make_stem, _make_layer=_make_layer, _make_body=_make_body, _make_head=_make_head)

Model constructor. As default - xresnet18

@@ -576,6 +562,13 @@

class Net +

{% endraw %} @@ -1095,10 +1088,7 @@

class Net -
- - -
+
@@ -1109,7 +1099,6 @@

class NetConvTwist

-
- {% endraw %} - - {% raw %} - -
-
@@ -85,6 +78,13 @@

class ConvTwist<

+
+ {% endraw %} + + {% raw %} + +
+
{% endraw %} @@ -274,13 +274,6 @@

ConvLayerTwist -

- {% endraw %} - - {% raw %} - -
-
@@ -288,7 +281,7 @@

ConvLayerTwist -

class ConvLayerTwist[source]

ConvLayerTwist(ni, nf, ks=3, stride=1, act=True, act_fn=ReLU(inplace=True), bn_layer=True, bn_1st=True, zero_bn=False, padding=None, bias=False, groups=1, **kwargs) :: ConvLayer

+

class ConvLayerTwist[source]

ConvLayerTwist(ni, nf, ks=3, stride=1, act=True, act_fn=ReLU(inplace=True), bn_layer=True, bn_1st=True, zero_bn=False, padding=None, bias=False, groups=1, **kwargs) :: ConvLayer

Basic conv layers block

@@ -299,6 +292,13 @@

class ConvLayerTwi

+
+ {% endraw %} + + {% raw %} + +
+
{% endraw %} @@ -828,13 +828,6 @@

NewResBlockTwist -

- {% endraw %} - - {% raw %} - -
-
@@ -842,7 +835,7 @@

NewResBlockTwist -

class NewResBlockTwist[source]

NewResBlockTwist(expansion, ni, nh, stride=1, conv_layer=ConvLayer, act_fn=ReLU(inplace=True), bn_1st=True, pool=AvgPool2d(kernel_size=2, stride=2, padding=0), sa=False, sym=False, zero_bn=True, **kvargs) :: Module

+

class NewResBlockTwist[source]

NewResBlockTwist(expansion, ni, nh, stride=1, conv_layer=ConvLayer, act_fn=ReLU(inplace=True), bn_1st=True, pool=AvgPool2d(kernel_size=2, stride=2, padding=0), sa=False, sym=False, zero_bn=True, **kvargs) :: Module

Base class for all neural network modules.

Your models should also subclass this class.

@@ -873,6 +866,13 @@

class NewResBloc

+
+ {% endraw %} + + {% raw %} + +
+
{% endraw %} @@ -1060,13 +1060,6 @@

ResBlockTwist -

- {% endraw %} - - {% raw %} - -
-
@@ -1074,7 +1067,7 @@

ResBlockTwist -

class ResBlockTwist[source]

ResBlockTwist(expansion, ni, nh, stride=1, conv_layer=ConvLayer, act_fn=ReLU(inplace=True), zero_bn=True, bn_1st=True, pool=AvgPool2d(kernel_size=2, stride=2, padding=0), sa=False, sym=False, **kvargs) :: Module

+

class ResBlockTwist[source]

ResBlockTwist(expansion, ni, nh, stride=1, conv_layer=ConvLayer, act_fn=ReLU(inplace=True), zero_bn=True, bn_1st=True, pool=AvgPool2d(kernel_size=2, stride=2, padding=0), sa=False, sym=False, **kvargs) :: Module

Base class for all neural network modules.

Your models should also subclass this class.

@@ -1105,6 +1098,13 @@

class ResBlockTwist

+
+ {% endraw %} + + {% raw %} + +
+
{% endraw %} diff --git a/docs/YaResNet.html b/docs/YaResNet.html index 0e31dde..27e5445 100644 --- a/docs/YaResNet.html +++ b/docs/YaResNet.html @@ -33,13 +33,6 @@
-
- {% endraw %} - - {% raw %} - -
-
{% endraw %} @@ -53,13 +46,6 @@

YaResBlock -

- {% endraw %} - - {% raw %} - -
-
@@ -67,7 +53,7 @@

YaResBlock -

class YaResBlock[source]

YaResBlock(expansion, ni, nh, stride=1, conv_layer=ConvLayer, act_fn=ReLU(inplace=True), zero_bn=True, bn_1st=True, pool=AvgPool2d(kernel_size=2, stride=2, padding=0), sa=False, sym=False, se=False, groups=1, dw=False) :: Module

+

class YaResBlock[source]

YaResBlock(expansion, ni, nh, stride=1, conv_layer=ConvLayer, act_fn=ReLU(inplace=True), zero_bn=True, bn_1st=True, pool=AvgPool2d(kernel_size=2, stride=2, padding=0), sa=False, sym=False, se=False, groups=1, dw=False) :: Module

YaResBlock. Reduce by pool instead of stride 2

@@ -78,6 +64,13 @@

class YaResBlock

+
+ {% endraw %} + + {% raw %} + +
+
{% endraw %} diff --git a/docs/activations.html b/docs/activations.html index 5267562..74137ea 100644 --- a/docs/activations.html +++ b/docs/activations.html @@ -62,524 +62,10 @@
-

Swish

-
-
-
- {% raw %} - -
- -
- {% endraw %} - - {% raw %} - -
- -
-
- -
- - -
-

swish[source]

swish(x, inplace:bool=False)

-
-

Swish - Described in: https://arxiv.org/abs/1710.05941

- -
- -
- -
-
- -
- {% endraw %} - - {% raw %} - -
- -
-
- -
- - -
-

class Swish[source]

Swish(inplace:bool=False) :: Module

-
-

Swish - Described in: https://arxiv.org/abs/1710.05941

- -
- -
- -
-
- -
- {% endraw %} - -
-
-

SwishJit

-
-
-
- {% raw %} - -
- -
- {% endraw %} - - {% raw %} - -
- -
-
- -
- - -
-

ScriptFunction object at 0x7f9f833e1e90>[source]

ScriptFunction object at 0x7f9f833e1e90>()

-
-

Jit version of Swish. -Swish- Described in: https://arxiv.org/abs/1710.05941

- -
- -
- -
-
- -
- {% endraw %} - - {% raw %} - -
- -
-
- -
- - -
-

class SwishJit[source]

SwishJit(inplace:bool=False) :: Module

-
-

Jit version of Swish. -Swish - Described in: https://arxiv.org/abs/1710.05941

- -
- -
- -
-
- -
- {% endraw %} - -
-
-

SwishJitMe - memory-efficient.

-
-
-
- {% raw %} - -
- -
- {% endraw %} - - {% raw %} - -
- -
-
- -
- - -
-

ScriptFunction object at 0x7f9f2684f2f0>[source]

ScriptFunction object at 0x7f9f2684f2f0>()

-
- -
- -
- -
-
- -
- {% endraw %} - - {% raw %} - -
- -
-
- -
- - -
-

class SwishJitAutoFn[source]

SwishJitAutoFn() :: Function

-
-

torch.jit.script optimised Swish w/ memory-efficient checkpoint -Inspired by conversation btw Jeremy Howard & Adam Pazske -https://twitter.com/jeremyphoward/status/1188251041835315200

- -
- -
- -
-
- -
- {% endraw %} - - {% raw %} - -
- -
-
- -
- - -
-

swish_me[source]

swish_me(x, inplace=False)

-
- -
- -
- -
-
- -
- {% endraw %} - - {% raw %} - -
- -
-
- -
- - -
-

class SwishMe[source]

SwishMe(inplace:bool=False) :: Module

-
-

Base class for all neural network modules.

-

Your models should also subclass this class.

-

Modules can also contain other Modules, allowing to nest them in -a tree structure. You can assign the submodules as regular attributes::

- -
import torch.nn as nn
-import torch.nn.functional as F
-
-class Model(nn.Module):
-    def __init__(self):
-        super(Model, self).__init__()
-        self.conv1 = nn.Conv2d(1, 20, 5)
-        self.conv2 = nn.Conv2d(20, 20, 5)
-
-    def forward(self, x):
-        x = F.relu(self.conv1(x))
-        return F.relu(self.conv2(x))
-
-
-

Submodules assigned in this way will be registered, and will have their -parameters converted too when you call :meth:to, etc.

- -
- -
- -
-
- -
- {% endraw %} - -
-
-

Mish

-
-
-
- {% raw %} - -
- -
- {% endraw %} - - {% raw %} - -
- -
-
- -
- - -
-

mish[source]

mish(x, inplace:bool=False)

-
-

Mish: A Self Regularized Non-Monotonic Neural Activation Function - https://arxiv.org/abs/1908.08681 -NOTE: I don't have a working inplace variant

- -
- -
- -
-
- -
- {% endraw %} - - {% raw %} - -
- -
-
- -
- - -
-

class Mish[source]

Mish(inplace:bool=False) :: Module

-
-

Mish: A Self Regularized Non-Monotonic Neural Activation Function - https://arxiv.org/abs/1908.08681

- -
- -
- -
-
- -
- {% endraw %} - -
-
-

MishJit

-
-
-
- {% raw %} - -
- -
- {% endraw %} - - {% raw %} - -
- -
-
- -
- - -
-

ScriptFunction object at 0x7f9f26062290>[source]

ScriptFunction object at 0x7f9f26062290>()

-
-

Jit version of Mish. -Mish: A Self Regularized Non-Monotonic Neural Activation Function - https://arxiv.org/abs/1908.08681

- -
- -
- -
-
- -
- {% endraw %} - - {% raw %} - -
- -
-
- -
- - -
-

class MishJit[source]

MishJit(inplace:bool=False) :: Module

-
-

Base class for all neural network modules.

-

Your models should also subclass this class.

-

Modules can also contain other Modules, allowing to nest them in -a tree structure. You can assign the submodules as regular attributes::

- -
import torch.nn as nn
-import torch.nn.functional as F
-
-class Model(nn.Module):
-    def __init__(self):
-        super(Model, self).__init__()
-        self.conv1 = nn.Conv2d(1, 20, 5)
-        self.conv2 = nn.Conv2d(20, 20, 5)
-
-    def forward(self, x):
-        x = F.relu(self.conv1(x))
-        return F.relu(self.conv2(x))
-
-
-

Submodules assigned in this way will be registered, and will have their -parameters converted too when you call :meth:to, etc.

- -
- -
- -
-
- -
- {% endraw %} - -
-
-

MishJitMe - memory-efficient.

-
-
-
- {% raw %} - -
- -
- {% endraw %} - - {% raw %} - -
- -
-
- -
- - -
-

ScriptFunction object at 0x7f9f260623b0>[source]

ScriptFunction object at 0x7f9f260623b0>()

-
- -
- -
- -
-
- -
- {% endraw %} - - {% raw %} - -
- -
-
- -
- - -
-

ScriptFunction object at 0x7f9f26062470>[source]

ScriptFunction object at 0x7f9f26062470>()

-
- -
- -
- -
-
- -
- {% endraw %} - - {% raw %} - -
- -
-
- -
- - -
-

class MishJitAutoFn[source]

MishJitAutoFn() :: Function

-
-

Mish: A Self Regularized Non-Monotonic Neural Activation Function - https://arxiv.org/abs/1908.08681 -A memory efficient, jit scripted variant of Mish

- -
- -
- -
-
- -
- {% endraw %} - - {% raw %} - -
- -
-
- -
- - -
-

mish_me[source]

mish_me(x, inplace=False)

-
- -
- -
- +

Mish

-
- {% endraw %} - {% raw %}
@@ -591,30 +77,17 @@

mish_me -

class MishMe[source]

MishMe(inplace:bool=False) :: Module

+

mish[source]

mish(x, inplace:bool=False)

Mish: A Self Regularized Non-Monotonic Neural Activation Function - https://arxiv.org/abs/1908.08681 -A memory efficient, jit scripted variant of Mish

- -

- -
+NOTE: I don't have a working inplace variant

-

- {% endraw %} -
-
-

HardSwish

-
- {% raw %} - -
{% endraw %} @@ -630,9 +103,9 @@

HardSwish

-

hard_swish[source]

hard_swish(x, inplace:bool=False)

+

class Mish[source]

Mish(inplace:bool=False) :: Module

-

Hard swish activation function

+

Mish: A Self Regularized Non-Monotonic Neural Activation Function - https://arxiv.org/abs/1908.08681

@@ -648,40 +121,15 @@

hard_swish -
-
- -
- - -
-

class HardSwish[source]

HardSwish(inplace:bool=False) :: Module

-
-

Hard swish activation function

- -
- -
- -
-
-

{% endraw %}
-

HardSwishJit

-
+

MishJit

- {% raw %} - -
-
- {% endraw %} - {% raw %}
@@ -693,8 +141,10 @@

HardSwishJit <
-

ScriptFunction object at 0x7f9f260626b0>[source]

ScriptFunction object at 0x7f9f260626b0>()

+

ScriptFunction object at 0x7f60a962c7c0>[source]

ScriptFunction object at 0x7f60a962c7c0>()

+

Jit version of Mish. +Mish: A Self Regularized Non-Monotonic Neural Activation Function - https://arxiv.org/abs/1908.08681

@@ -717,7 +167,7 @@

Scrip
-

class HardSwishJit[source]

HardSwishJit(inplace:bool=False) :: Module

+

class MishJit[source]

MishJit(inplace:bool=False) :: Module

Base class for all neural network modules.

Your models should also subclass this class.

@@ -751,12 +201,6 @@

class HardSwishJit {% endraw %} -
-
-

HardSwishJitMe

-
-
-
{% raw %}
@@ -764,30 +208,12 @@

HardSwishJitMe - -
-
- -
- - -
-

ScriptFunction object at 0x7f9f26062770>[source]

ScriptFunction object at 0x7f9f26062770>()

-
- -
- -
- +
+
+

MishJitMe - memory-efficient.

-
- {% endraw %} - {% raw %}
@@ -799,7 +225,7 @@

Scrip
-

ScriptFunction object at 0x7f9f2684f6b0>[source]

ScriptFunction object at 0x7f9f2684f6b0>()

+

ScriptFunction object at 0x7f60a962c950>[source]

ScriptFunction object at 0x7f60a962c950>()

@@ -823,9 +249,8 @@

Scrip
-

class HardSwishJitAutoFn[source]

HardSwishJitAutoFn() :: Function

+

ScriptFunction object at 0x7f60a962c360>[source]

ScriptFunction object at 0x7f60a962c360>()

-

A memory efficient, jit-scripted HardSwish activation

@@ -848,9 +273,10 @@

class HardSwis
-

hard_swish_me[source]

hard_swish_me(x, inplace=False)

+

class MishJitAutoFn[source]

MishJitAutoFn() :: Function

-

A memory efficient, jit-scripted HardSwish activation

+

Mish: A Self Regularized Non-Monotonic Neural Activation Function - https://arxiv.org/abs/1908.08681 +A memory efficient, jit scripted variant of Mish

@@ -873,29 +299,15 @@

hard_swish_me -

class HardSwishMe[source]

HardSwishMe(inplace:bool=False) :: Module

+

mish_me[source]

mish_me(x, inplace=False)

-

A memory efficient, jit-scripted HardSwish activation

- -

- -
-

- {% endraw %} -
-
-

HardMish

-
- {% raw %} - -
{% endraw %} @@ -911,11 +323,10 @@

HardMish

-

hard_mish[source]

hard_mish(x, inplace:bool=False)

+

class MishMe[source]

MishMe(inplace:bool=False) :: Module

-

Hard Mish -Experimental, based on notes by Mish author Diganta Misra at - https://github.com/digantamisra98/H-Mish/blob/0da20d4bc58e696b6803f2523c58d3c8a82782d0/README.md

+

Mish: A Self Regularized Non-Monotonic Neural Activation Function - https://arxiv.org/abs/1908.08681 +A memory efficient, jit scripted variant of Mish

@@ -931,25 +342,6 @@

hard_mish -
-
- -
- - -
-

class HardMish[source]

HardMish(inplace:bool=False) :: Module

-
-

Hard Mish, Experimental, based on notes by Mish author Diganta Misra at -https://github.com/digantamisra98/H-Mish/blob/0da20d4bc58e696b6803f2523c58d3c8a82782d0/README.md

- -
- -
- -
-
-

{% endraw %} @@ -963,13 +355,6 @@

HardMishJit
-
- {% endraw %} - - {% raw %} - -
-
@@ -977,7 +362,7 @@

HardMishJit
-

ScriptFunction object at 0x7f9f26062230>[source]

ScriptFunction object at 0x7f9f26062230>()

+

ScriptFunction object at 0x7f60a962c130>[source]

ScriptFunction object at 0x7f60a962c130>()

Hard Mish Experimental, based on notes by Mish author Diganta Misra at @@ -1004,7 +389,7 @@

Scrip
-

class HardMishJit[source]

HardMishJit(inplace:bool=False) :: Module

+

class HardMishJit[source]

HardMishJit(inplace:bool=False) :: Module

Hard Mish Experimental, based on notes by Mish author Diganta Misra at @@ -1020,12 +405,6 @@

class HardMishJit {% endraw %} -
-
-

HardMishJitMe - memory efficient.

-
-
-
{% raw %}
@@ -1033,6 +412,12 @@

HardMishJitMe - memory efficient. {% endraw %} + {% raw %}
@@ -1044,7 +429,7 @@

HardMishJitMe - memory efficient. -

ScriptFunction object at 0x7f9f260628f0>[source]

ScriptFunction object at 0x7f9f260628f0>()

+

ScriptFunction object at 0x7f60a962c180>[source]

ScriptFunction object at 0x7f60a962c180>()

@@ -1068,7 +453,7 @@

Scrip
-

ScriptFunction object at 0x7f9f26062a10>[source]

ScriptFunction object at 0x7f9f26062a10>()

+

ScriptFunction object at 0x7f605af1eb80>[source]

ScriptFunction object at 0x7f605af1eb80>()

@@ -1092,7 +477,7 @@

Scrip
-

class HardMishJitAutoFn[source]

HardMishJitAutoFn() :: Function

+

class HardMishJitAutoFn[source]

HardMishJitAutoFn() :: Function

A memory efficient, jit scripted variant of Hard Mish Experimental, based on notes by Mish author Diganta Misra at @@ -1119,7 +504,7 @@

class HardMishJ
-

hard_mish_me[source]

hard_mish_me(x, inplace:bool=False)

+

hard_mish_me[source]

hard_mish_me(x, inplace:bool=False)

@@ -1143,7 +528,7 @@

hard_mish_me -

class HardMishMe[source]

HardMishMe(inplace:bool=False) :: Module

+

class HardMishMe[source]

HardMishMe(inplace:bool=False) :: Module

A memory efficient, jit scripted variant of Hard Mish Experimental, based on notes by Mish author Diganta Misra at @@ -1156,6 +541,13 @@

class HardMishMe

+

+ {% endraw %} + + {% raw %} + +
+
{% endraw %} diff --git a/docs/base_constructor.html b/docs/base_constructor.html index f61cfb8..c497454 100644 --- a/docs/base_constructor.html +++ b/docs/base_constructor.html @@ -40,13 +40,6 @@
-
- {% endraw %} - - {% raw %} - -
-
{% endraw %} @@ -60,13 +53,6 @@

Stem

-
- {% endraw %} - - {% raw %} - -
-
@@ -74,7 +60,7 @@

Stem

-

class Stem[source]

Stem(c_in=3, stem_sizes=[], stem_out=64, conv_layer=ConvLayer, stride_on=0, stem_bn_last=False, bn_1st=True, stem_use_pool=True, stem_pool=MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False), **kwargs) :: Sequential

+

class Stem[source]

Stem(c_in=3, stem_sizes=[], stem_out=64, conv_layer=ConvLayer, stride_on=0, stem_bn_last=False, bn_1st=True, stem_use_pool=True, stem_pool=MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False), **kwargs) :: Sequential

Base stem

@@ -85,6 +71,13 @@

class Stem +

{% endraw %} @@ -404,13 +397,6 @@

Blocks

-
- {% endraw %} - - {% raw %} - -
-
@@ -418,7 +404,7 @@

Blocks

-

DownsampleBlock[source]

DownsampleBlock(conv_layer, ni, nf, ks, stride, act=False, **kwargs)

+

DownsampleBlock[source]

DownsampleBlock(conv_layer, ni, nf, ks, stride, act=False, **kwargs)

Base downsample for res-like blocks

@@ -450,7 +436,7 @@

DownsampleBlock -

class BasicBlock[source]

BasicBlock(ni, nf, expansion=1, stride=1, zero_bn=False, conv_layer=ConvLayer, act_fn=ReLU(inplace=True), downsample_block=DownsampleBlock, **kwargs) :: Module

+

class BasicBlock[source]

BasicBlock(ni, nf, expansion=1, stride=1, zero_bn=False, conv_layer=ConvLayer, act_fn=ReLU(inplace=True), downsample_block=DownsampleBlock, **kwargs) :: Module

Basic block (simplified) as in pytorch resnet

@@ -482,7 +468,7 @@

class BasicBlock -

class Bottleneck[source]

Bottleneck(ni, nh, expansion=4, stride=1, zero_bn=False, conv_layer=ConvLayer, act_fn=ReLU(inplace=True), downsample_block=DownsampleBlock, **kwargs) :: Module

+

class Bottleneck[source]

Bottleneck(ni, nh, expansion=4, stride=1, zero_bn=False, conv_layer=ConvLayer, act_fn=ReLU(inplace=True), downsample_block=DownsampleBlock, **kwargs) :: Module

Bottlneck block for resnet models

@@ -514,7 +500,7 @@

class Bottleneck -

class BasicLayer[source]

BasicLayer(block, blocks, ni, nf, expansion, stride, sa=False, **kwargs) :: Sequential

+

class BasicLayer[source]

BasicLayer(block, blocks, ni, nf, expansion, stride, sa=False, **kwargs) :: Sequential

Layer from blocks

@@ -528,12 +514,6 @@

class BasicLayer {% endraw %} -
-
-

Body constructor

-
-
-
{% raw %}
@@ -541,6 +521,12 @@

Body constructor +

{% raw %}
@@ -552,7 +538,7 @@

Body constructor -

class Body[source]

Body(block, body_in=64, body_out=512, bodylayer=BasicLayer, expansion=1, layer_szs=[64, 128, 256], blocks=[2, 2, 2, 2], sa=False, **kwargs) :: Sequential

+

class Body[source]

Body(block, body_in=64, body_out=512, bodylayer=BasicLayer, expansion=1, layer_szs=[64, 128, 256], blocks=[2, 2, 2, 2], sa=False, **kwargs) :: Sequential

Constructor for body

@@ -563,6 +549,13 @@

class Body +

{% endraw %} @@ -790,13 +783,6 @@

Head

-
- {% endraw %} - - {% raw %} - -
-
@@ -804,7 +790,7 @@

Head

-

Head(ni, nf, **kwargs) :: Sequential

+

Head(ni, nf, **kwargs) :: Sequential

base head

@@ -815,6 +801,13 @@
{% endraw %} @@ -881,13 +874,6 @@

class Net

-
- {% endraw %} - - {% raw %} - -
-
@@ -895,7 +881,7 @@

class Net

-

init_model[source]

init_model(model, nonlinearity='leaky_relu')

+

init_model[source]

init_model(model, nonlinearity='leaky_relu')

Init model

@@ -927,7 +913,7 @@

init_model -

class Net[source]

Net(stem=Stem, body=Body, block=BasicBlock, sa=False, layer_szs=[64, 128, 256], blocks=[2, 2, 2, 2], head=Head, c_in=3, num_classes=1000, body_in=64, body_out=512, expansion=1, init_fn=init_model, **kwargs) :: Sequential

+

class Net[source]

Net(stem=Stem, body=Body, block=BasicBlock, sa=False, layer_szs=[64, 128, 256], blocks=[2, 2, 2, 2], head=Head, c_in=3, num_classes=1000, body_in=64, body_out=512, expansion=1, init_fn=init_model, **kwargs) :: Sequential

Constructor for model

@@ -938,6 +924,13 @@

class Net +

{% endraw %} diff --git a/docs/layers.html b/docs/layers.html index 47de2f4..548e8cb 100644 --- a/docs/layers.html +++ b/docs/layers.html @@ -53,13 +53,6 @@

Flatten

-
- {% endraw %} - - {% raw %} - -
-
@@ -81,12 +74,6 @@

class Flatten {% endraw %} - {% raw %}
@@ -94,6 +81,12 @@

Noop

{% endraw %} +
+
+

Noop

+
+
+
{% raw %}
@@ -105,9 +98,8 @@

Noop

-

noop[source]

noop(x=None, *args, **kwargs)

+

noop[source]

noop(x)

-

Do nothing

@@ -130,7 +122,7 @@

noop -

class Noop[source]

Noop() :: Module

+

class Noop[source]

Noop() :: Module

Dummy module

@@ -144,12 +136,6 @@

class Noop -

{% raw %}
@@ -157,6 +143,12 @@

ConvLayer

{% endraw %} +
+
+

ConvLayer

+
+
+
{% raw %}
@@ -168,7 +160,7 @@

ConvLayer

-

class ConvLayer[source]

ConvLayer(ni, nf, ks=3, stride=1, act=True, act_fn=ReLU(inplace=True), bn_layer=True, bn_1st=True, zero_bn=False, padding=None, bias=False, groups=1, **kwargs) :: Sequential

+

class ConvLayer[source]

ConvLayer(ni, nf, ks=3, stride=1, act=True, act_fn=ReLU(inplace=True), bn_layer=True, bn_1st=True, zero_bn=False, padding=None, bias=False, groups=1, **kwargs) :: Sequential

Basic conv layers block

@@ -179,6 +171,13 @@

class ConvLayer<

+

+ {% endraw %} + + {% raw %} + +
+
{% endraw %} @@ -380,13 +379,6 @@

SimpleSelfAttention -

- {% endraw %} - - {% raw %} - -
-
@@ -394,9 +386,9 @@

SimpleSelfAttention -

conv1d[source]

conv1d(ni:int, no:int, ks:int=1, stride:int=1, padding:int=0, bias:bool=False)

+

conv1d[source]

conv1d(ni:int, no:int, ks:int=1, stride:int=1, padding:int=0, bias:bool=False)

-

Create and initialize a nn.Conv1d layer with spectral normalization.

+

Create and initialize a nn.Conv1d layer with spectral normalization.

@@ -419,7 +411,7 @@

conv1d -

class SimpleSelfAttention[source]

SimpleSelfAttention(n_in:int, ks=1, sym=False) :: Module

+

class SimpleSelfAttention[source]

SimpleSelfAttention(n_in:int, ks=1, sym=False) :: Module

Base class for all neural network modules.

Your models should also subclass this class.

@@ -453,12 +445,6 @@

class SimpleS

{% endraw %} -
-
-

SE Block

-
-
-
{% raw %}
@@ -466,6 +452,12 @@

SE Block

{% endraw %} +
+
+

SE Block

+
+
+
{% raw %}
@@ -477,7 +469,7 @@

SE Block

-

class SEBlock[source]

SEBlock(c, r=16) :: Module

+

class SEBlock[source]

SEBlock(c, r=16) :: Module

se block

@@ -488,6 +480,13 @@

class SEBlock

+
+ {% endraw %} + + {% raw %} + +
+
{% endraw %} @@ -543,13 +542,6 @@

SEBlockConv
-
- {% endraw %} - - {% raw %} - -
-
@@ -557,7 +549,7 @@

SEBlockConv
-

class SEBlockConv[source]

SEBlockConv(c, r=16) :: Module

+

class SEBlockConv[source]

SEBlockConv(c, r=16) :: Module

se block with conv on excitation

@@ -568,6 +560,13 @@

class SEBlockConv

+

+ {% endraw %} + + {% raw %} + +
+
{% endraw %} diff --git a/docs/xresnet.html b/docs/xresnet.html index e801565..c61c19b 100644 --- a/docs/xresnet.html +++ b/docs/xresnet.html @@ -40,13 +40,6 @@
-
- {% endraw %} - - {% raw %} - -
-
{% endraw %} @@ -60,13 +53,6 @@

Xresnet constructor -

- {% endraw %} - - {% raw %} - -
-
@@ -74,7 +60,7 @@

Xresnet constructor -

class DownsampleLayer[source]

DownsampleLayer(conv_layer, ni, nf, stride, act, pool=AvgPool2d(kernel_size=2, stride=2, padding=0), pool_1st=True, **kwargs) :: Sequential

+

class DownsampleLayer[source]

DownsampleLayer(conv_layer, ni, nf, stride, act, pool=AvgPool2d(kernel_size=2, stride=2, padding=0), pool_1st=True, **kwargs) :: Sequential

Downsample layer for Xresnet Resblock

@@ -137,6 +123,13 @@

class XResBlock<

+
+ {% endraw %} + + {% raw %} + +
+
{% endraw %} @@ -423,13 +416,6 @@

Xresnet models. -

- {% endraw %} - - {% raw %} - -
-
@@ -437,7 +423,7 @@

Xresnet models. -

xresnet18[source]

xresnet18(**kwargs)

+

xresnet18[source]

xresnet18(**kwargs)

Constructs a xresnet-18 model.

@@ -462,7 +448,7 @@

xresnet18 -

xresnet34[source]

xresnet34(**kwargs)

+

xresnet34[source]

xresnet34(**kwargs)

Constructs axresnet-34 model.

@@ -487,7 +473,7 @@

xresnet34 -

xresnet50[source]

xresnet50(**kwargs)

+

xresnet50[source]

xresnet50(**kwargs)

Constructs axresnet-34 model.

@@ -498,6 +484,13 @@

xresnet50 +

{% endraw %} diff --git a/model_constructor/_nbdev.py b/model_constructor/_nbdev.py index 127fa0c..a2182a1 100644 --- a/model_constructor/_nbdev.py +++ b/model_constructor/_nbdev.py @@ -40,8 +40,6 @@ "yaresnet_parameters": "04_YaResNet.ipynb", "yaresnet34": "04_YaResNet.ipynb", "yaresnet50": "04_YaResNet.ipynb", - "nn": "05_Twist.ipynb", - "F": "05_Twist.ipynb", "ConvTwist": "05_Twist.ipynb", "ConvLayerTwist": "05_Twist.ipynb", "NewResBlockTwist": "05_Twist.ipynb", @@ -71,4 +69,4 @@ git_url = "https://github.com/ayasyrev/model_constructor/tree/master/" -def custom_doc_links(name): return None +def custom_doc_links(name): return None # noqa E704 diff --git a/model_constructor/base_constructor.py b/model_constructor/base_constructor.py index 11ceabf..bc16184 100644 --- a/model_constructor/base_constructor.py +++ b/model_constructor/base_constructor.py @@ -8,11 +8,9 @@ from collections import OrderedDict from .layers import ConvLayer, Noop, Flatten - # Cell act_fn = nn.ReLU(inplace=True) - # Cell class Stem(nn.Sequential): """Base stem""" @@ -39,13 +37,11 @@ def __init__(self, c_in=3, stem_sizes=[], stem_out=64, def extra_repr(self): return f"sizes: {self.sizes}" - # Cell def DownsampleBlock(conv_layer, ni, nf, ks, stride, act=False, **kwargs): '''Base downsample for res-like blocks''' return conv_layer(ni, nf, ks, stride, act, **kwargs) - # Cell class BasicBlock(nn.Module): """Basic block (simplified) as in pytorch resnet""" @@ -69,7 +65,6 @@ def forward(self, x): identity = self.downsample(x) return self.act_conn(self.merge(out + identity)) - # Cell class Bottleneck(nn.Module): '''Bottlneck block for resnet models''' @@ -97,7 +92,6 @@ def forward(self, x): identity = self.downsample(x) return self.act_conn(self.merge(out + identity)) - # Cell class BasicLayer(nn.Sequential): '''Layer from blocks''' @@ -116,7 +110,6 @@ def __init__(self, block, blocks, ni, nf, expansion, stride, sa=False, **kwargs) def extra_repr(self): return f'from {self.ni * self.expansion} to {self.nf}, {self.blocks} blocks, expansion {self.expansion}.' - # Cell class Body(nn.Sequential): '''Constructor for body''' @@ -135,7 +128,6 @@ def __init__(self, block, for i in range(num_layers)] super().__init__(OrderedDict(layers)) - # Cell class Head(nn.Sequential): '''base head''' @@ -146,7 +138,6 @@ def __init__(self, ni, nf, **kwargs): ('fc', nn.Linear(ni, nf)), ])) - # Cell def init_model(model, nonlinearity='leaky_relu'): '''Init model''' @@ -154,7 +145,6 @@ def init_model(model, nonlinearity='leaky_relu'): if isinstance(m, (nn.Conv2d, nn.Linear)): nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity=nonlinearity) - # Cell class Net(nn.Sequential): '''Constructor for model''' @@ -173,4 +163,4 @@ def __init__(self, stem=Stem, sa=sa, **kwargs)), ('head', head(body_out * expansion, num_classes, **kwargs)) ])) - self.init_model(self) + self.init_model(self) \ No newline at end of file diff --git a/model_constructor/mxresnet.py b/model_constructor/mxresnet.py index b36761c..951be1e 100644 --- a/model_constructor/mxresnet.py +++ b/model_constructor/mxresnet.py @@ -4,8 +4,9 @@ # Cell from functools import partial -from .net import Net + from .activations import Mish +from .net import Net # Cell mxresnet_parameters = {'stem_sizes': [3, 32, 64, 64], 'act_fn': Mish()} diff --git a/model_constructor/net.py b/model_constructor/net.py index b939482..1b84b1d 100644 --- a/model_constructor/net.py +++ b/model_constructor/net.py @@ -3,10 +3,12 @@ __all__ = ['init_cnn', 'act_fn', 'ResBlock', 'NewResBlock', 'Net', 'xresnet34', 'xresnet50'] # Cell -import torch.nn as nn -from functools import partial from collections import OrderedDict -from .layers import ConvLayer, noop, SEBlock, SimpleSelfAttention, Flatten +from functools import partial + +import torch.nn as nn + +from .layers import ConvLayer, Flatten, SEBlock, SimpleSelfAttention, noop # Cell act_fn = nn.ReLU(inplace=True) @@ -21,6 +23,8 @@ def init_cnn(module: nn.Module): init_cnn(layer) # Cell + + class ResBlock(nn.Module): '''Resnet block''' se_block = SEBlock @@ -58,8 +62,6 @@ def forward(self, x): # Cell # NewResBlock now is YaResBlock - Yet Another ResNet Block! It is now at model_constructor.yaresnet. - - class NewResBlock(nn.Module): '''YaResnet block''' se_block = SEBlock diff --git a/model_constructor/twist.py b/model_constructor/twist.py index 94fd6e6..fab4baf 100644 --- a/model_constructor/twist.py +++ b/model_constructor/twist.py @@ -1,17 +1,17 @@ # AUTOGENERATED! DO NOT EDIT! File to edit: nbs/05_Twist.ipynb (unless otherwise specified). -__all__ = ['nn', 'F', 'ConvTwist', 'ConvLayerTwist', 'NewResBlockTwist', 'ResBlockTwist'] +__all__ = ['ConvTwist', 'ConvLayerTwist', 'NewResBlockTwist', 'ResBlockTwist'] # Cell -from functools import partial +# from functools import partial from collections import OrderedDict -from .layers import * -from .net import * +from .layers import ConvLayer, noop, act_fn, SimpleSelfAttention # Cell -import sys, torch -nn = torch.nn -F = torch.nn.functional +import torch +import torch.nn as nn +import torch.nn.functional as F +import numpy as np # Cell class ConvTwist(nn.Module): @@ -20,49 +20,51 @@ class ConvTwist(nn.Module): twist = False use_groups = True groups_ch = 8 + def __init__(self, ni, nf, ks=3, stride=1, padding=1, bias=False, groups=1, iters=1, init_max=0.7, **kvargs): super().__init__() - self.same = ni==nf and stride==1 - self.groups = ni//self.groups_ch if self.use_groups else 1 - + self.same = ni == nf and stride == 1 + self.groups = ni // self.groups_ch if self.use_groups else 1 self.conv = nn.Conv2d(ni, nf, kernel_size=3, stride=stride, padding=1, bias=False, groups=self.groups) if self.twist: std = self.conv.weight.std().item() - self.coeff_Ax = nn.Parameter(torch.empty((nf,ni//groups)).normal_(0, std), requires_grad=True) - self.coeff_Ay = nn.Parameter(torch.empty((nf,ni//groups)).normal_(0, std), requires_grad=True) + self.coeff_Ax = nn.Parameter(torch.empty((nf, ni // groups)).normal_(0, std), requires_grad=True) + self.coeff_Ay = nn.Parameter(torch.empty((nf, ni // groups)).normal_(0, std), requires_grad=True) self.iters = iters self.stride = stride self.DD = self.derivatives() def derivatives(self): - I = torch.Tensor([[0,0,0],[0,1,0],[0,0,0]]).view(1,1,3,3) - D_x = torch.Tensor([[-1,0,1],[-2,0,2],[-1,0,1]]).view(1,1,3,3) / 10 - D_y = torch.Tensor([[1,2,1],[0,0,0],[-1,-2,-1]]).view(1,1,3,3) / 10 + I = torch.Tensor([[0, 0, 0], [0, 1, 0], [0, 0, 0]]).view(1, 1, 3, 3) # noqa E741 + D_x = torch.Tensor([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]]).view(1, 1, 3, 3) / 10 + D_y = torch.Tensor([[1, 2, 1], [0, 0, 0], [-1, -2, -1]]).view(1, 1, 3, 3) / 10 + def convolution(K1, K2): return F.conv2d(K1, K2.flip(2).flip(3), padding=2) - D_xx = convolution(I+D_x, I+D_x).view(5,5) - D_yy = convolution(I+D_y, I+D_y).view(5,5) - D_xy = convolution(I+D_x, I+D_y).view(5,5) + D_xx = convolution(I + D_x, I + D_x).view(5, 5) + D_yy = convolution(I + D_y, I + D_y).view(5, 5) + D_xy = convolution(I + D_x, I + D_y).view(5, 5) return {'x': D_x, 'y': D_y, 'xx': D_xx, 'yy': D_yy, 'xy': D_xy} def kernel(self, coeff_x, coeff_y): - D_x = torch.Tensor([[-1,0,1],[-2,0,2],[-1,0,1]]).to(coeff_x.device) - D_y = torch.Tensor([[1,2,1],[0,0,0],[-1,-2,-1]]).to(coeff_x.device) - return coeff_x[:,:,None,None] * D_x + coeff_y[:,:,None,None] * D_y + D_x = torch.Tensor([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]]).to(coeff_x.device) + D_y = torch.Tensor([[1, 2, 1], [0, 0, 0], [-1, -2, -1]]).to(coeff_x.device) + return coeff_x[:, :, None, None] * D_x + coeff_y[:, :, None, None] * D_y - def full_kernel(self, kernel): # permuting the groups - if self.groups==1: return kernel + def full_kernel(self, kernel): # permuting the groups + if self.groups == 1: + return kernel n = self.groups - a,b,_,_ = kernel.size() - a = a//n - KK = torch.zeros((a*n,b*n,3,3)).to(kernel.device) + a, b, _, _ = kernel.size() + a = a // n + KK = torch.zeros((a * n, b * n, 3, 3)).to(kernel.device) for i in range(n): - if i%4==0: - KK[a*i:a*(i+1),b*(i+3):b*(i+4)] = kernel[a*i:a*(i+1)] + if i % 4 == 0: + KK[a * i:a * (i + 1), b * (i + 3):b * (i + 4)] = kernel[a * i:a * (i + 1)] else: - KK[a*i:a*(i+1),b*(i-1):b*i] = kernel[a*i:a*(i+1)] + KK[a * i:a * (i + 1), b * (i - 1):b * i] = kernel[a * i:a * (i + 1)] return KK def _conv(self, inpt, kernel=None): @@ -77,23 +79,25 @@ def symmetrize(self, conv_wt): if self.same: n = conv_wt.size()[1] for i in range(self.groups): - conv_wt.data[n*i:n*(i+1)] = (conv_wt[n*i:n*(i+1)] + torch.transpose(conv_wt[n*i:n*(i+1)],0,1)) / 2 + conv_wt.data[n * i:n * (i + 1)] = (conv_wt[n * i:n * (i + 1)] + + torch.transpose(conv_wt[n * i:n * (i + 1)], 0, 1)) / 2 # noqa E503 def forward(self, inpt): out = self._conv(inpt) if self.twist is False: return out - _,_,h,w = out.size() - XX = torch.from_numpy(np.indices((1,1,h,w))[3]*2/w-1).type(out.dtype).to(out.device) - YY = torch.from_numpy(np.indices((1,1,h,w))[2]*2/h-1).type(out.dtype).to(out.device) + _, _, h, w = out.size() + XX = torch.from_numpy(np.indices((1, 1, h, w))[3] * 2 / w - 1).type(out.dtype).to(out.device) + YY = torch.from_numpy(np.indices((1, 1, h, w))[2] * 2 / h - 1).type(out.dtype).to(out.device) kernel_x = self.kernel(self.coeff_Ax, self.coeff_Ay) self.symmetrize(kernel_x) - kernel_y = kernel_x.transpose(2,3).flip(3) # make conv_y a 90 degree rotation of conv_x + kernel_y = kernel_x.transpose(2, 3).flip(3) # make conv_y a 90 degree rotation of conv_x out = out + XX * self._conv(inpt, kernel_x) + YY * self._conv(inpt, kernel_y) - if self.same and self.iters>1: + if self.same and self.iters > 1: out = inpt + out / self.iters - for _ in range(self.iters-1): - out = out + (self._conv(out) + XX * self._conv(out, kernel_x) + YY * self._conv(out, kernel_y)) / self.iters + for _ in range(self.iters - 1): + out = out + (self._conv(out) + XX * self._conv(out, kernel_x) + + YY * self._conv(out, kernel_y)) / self.iters # noqa E727 out = out - inpt return out @@ -101,7 +105,7 @@ def extra_repr(self): return f"twist: {self.twist}, permute: {self.permute}, same: {self.same}, groups: {self.groups}" # Cell -class ConvLayerTwist(ConvLayer): # replace Conv2d by Twist +class ConvLayerTwist(ConvLayer): # replace Conv2d by Twist Conv2d = ConvTwist # Cell @@ -110,20 +114,19 @@ def __init__(self, expansion, ni, nh, stride=1, conv_layer=ConvLayer, act_fn=act_fn, bn_1st=True, pool=nn.AvgPool2d(2, ceil_mode=True), sa=False, sym=False, zero_bn=True, **kvargs): super().__init__() - nf,ni = nh*expansion,ni*expansion -# conv_layer = ConvLayerTwist - self.reduce = noop if stride==1 else pool - layers = [(f"conv_0", conv_layer(ni, nh, 3, act_fn=act_fn, bn_1st=bn_1st)), - (f"conv_1", conv_layer(nh, nf, 3, zero_bn=zero_bn, act=False, bn_1st=bn_1st)) - ] if expansion == 1 else [ - (f"conv_0", conv_layer(ni, nh, 1, act_fn=act_fn, bn_1st=bn_1st)), -# (f"conv_1", conv_layer(nh, nh, 3, act_fn=act_fn, bn_1st=bn_1st)), - (f"conv_1_twist", ConvLayerTwist(nh, nh, 3, act_fn=act_fn, bn_1st=bn_1st)), - (f"conv_2", conv_layer(nh, nf, 1, zero_bn=zero_bn, act=False, bn_1st=bn_1st)) + nf, ni = nh * expansion, ni * expansion + self.reduce = noop if stride == 1 else pool + layers = [("conv_0", conv_layer(ni, nh, 3, act_fn=act_fn, bn_1st=bn_1st)), + ("conv_1", conv_layer(nh, nf, 3, zero_bn=zero_bn, act=False, bn_1st=bn_1st)) + ] if expansion == 1 else [ + ("conv_0", conv_layer(ni, nh, 1, act_fn=act_fn, bn_1st=bn_1st)), + ("conv_1_twist", ConvLayerTwist(nh, nh, 3, act_fn=act_fn, bn_1st=bn_1st)), + ("conv_2", conv_layer(nh, nf, 1, zero_bn=zero_bn, act=False, bn_1st=bn_1st)) ] - if sa: layers.append(('sa', SimpleSelfAttention(nf,ks=1,sym=sym))) + if sa: + layers.append(('sa', SimpleSelfAttention(nf, ks=1, sym=sym))) self.convs = nn.Sequential(OrderedDict(layers)) - self.idconv = noop if ni==nf else conv_layer(ni, nf, 1, act=False, bn_1st=bn_1st) + self.idconv = noop if ni == nf else conv_layer(ni, nf, 1, act=False, bn_1st=bn_1st) self.merge = act_fn def forward(self, x): @@ -134,22 +137,22 @@ def forward(self, x): class ResBlockTwist(nn.Module): def __init__(self, expansion, ni, nh, stride=1, conv_layer=ConvLayer, act_fn=act_fn, zero_bn=True, bn_1st=True, - pool=nn.AvgPool2d(2, ceil_mode=True), sa=False,sym=False, **kvargs): + pool=nn.AvgPool2d(2, ceil_mode=True), sa=False, sym=False, **kvargs): super().__init__() - nf,ni = nh*expansion,ni*expansion -# conv_layer = ConvLayerTwist - layers = [(f"conv_0", conv_layer(ni, nh, 3, stride=stride, act_fn=act_fn, bn_1st=bn_1st)), - (f"conv_1", conv_layer(nh, nf, 3, zero_bn=zero_bn, act=False, bn_1st=bn_1st)) - ] if expansion == 1 else [ - (f"conv_0",conv_layer(ni, nh, 1, act_fn=act_fn, bn_1st=bn_1st)), -# (f"conv_1",conv_layer(nh, nh, 3, stride=stride, act_fn=act_fn, bn_1st=bn_1st)), - (f"conv_1_twist",ConvLayerTwist(nh, nh, 3, stride=stride, act_fn=act_fn, bn_1st=bn_1st)), - (f"conv_2",conv_layer(nh, nf, 1, zero_bn=zero_bn, act=False, bn_1st=bn_1st)) + nf, ni = nh * expansion, ni * expansion + layers = [("conv_0", conv_layer(ni, nh, 3, stride=stride, act_fn=act_fn, bn_1st=bn_1st)), + ("conv_1", conv_layer(nh, nf, 3, zero_bn=zero_bn, act=False, bn_1st=bn_1st)) + ] if expansion == 1 else [ + ("conv_0", conv_layer(ni, nh, 1, act_fn=act_fn, bn_1st=bn_1st)), + ("conv_1_twist", ConvLayerTwist(nh, nh, 3, stride=stride, act_fn=act_fn, bn_1st=bn_1st)), + ("conv_2", conv_layer(nh, nf, 1, zero_bn=zero_bn, act=False, bn_1st=bn_1st)) ] - if sa: layers.append(('sa', SimpleSelfAttention(nf,ks=1,sym=sym))) + if sa: + layers.append(('sa', SimpleSelfAttention(nf, ks=1, sym=sym))) self.convs = nn.Sequential(OrderedDict(layers)) - self.pool = noop if stride==1 else pool - self.idconv = noop if ni==nf else conv_layer(ni, nf, 1, act=False) - self.act_fn =act_fn + self.pool = noop if stride == 1 else pool + self.idconv = noop if ni == nf else conv_layer(ni, nf, 1, act=False) + self.act_fn = act_fn - def forward(self, x): return self.act_fn(self.convs(x) + self.idconv(self.pool(x))) \ No newline at end of file + def forward(self, x): + return self.act_fn(self.convs(x) + self.idconv(self.pool(x))) \ No newline at end of file diff --git a/model_constructor/xresnet.py b/model_constructor/xresnet.py index 2394dd6..93edda5 100644 --- a/model_constructor/xresnet.py +++ b/model_constructor/xresnet.py @@ -4,11 +4,10 @@ # Cell import torch.nn as nn -import torch from collections import OrderedDict # Cell -from .base_constructor import * +from .base_constructor import Net from .layers import ConvLayer, Noop, act_fn # Cell @@ -17,9 +16,10 @@ class DownsampleLayer(nn.Sequential): def __init__(self, conv_layer, ni, nf, stride, act, pool=nn.AvgPool2d(2, ceil_mode=True), pool_1st=True, **kwargs): - layers = [] if stride==1 else [('pool', pool)] - layers += [] if ni==nf else [('idconv', conv_layer(ni, nf, 1, act=act, **kwargs))] - if not pool_1st: layers.reverse() + layers = [] if stride == 1 else [('pool', pool)] + layers += [] if ni == nf else [('idconv', conv_layer(ni, nf, 1, act=act, **kwargs))] + if not pool_1st: + layers.reverse() super().__init__(OrderedDict(layers)) # Cell @@ -27,28 +27,32 @@ class XResBlock(nn.Module): def __init__(self, ni, nh, expansion=1, stride=1, zero_bn=True, conv_layer=ConvLayer, act_fn=act_fn, **kwargs): super().__init__() - nf,ni = nh*expansion,ni*expansion - layers = [('conv_0', conv_layer(ni, nh, 3, stride=stride, act_fn=act_fn, **kwargs)), - ('conv_1', conv_layer(nh, nf, 3, zero_bn=zero_bn, act=False, act_fn=act_fn, **kwargs)) - ] if expansion == 1 else [ - ('conv_0', conv_layer(ni, nh, 1, act_fn=act_fn, **kwargs)), - ('conv_1', conv_layer(nh, nh, 3, stride=stride, act_fn=act_fn, **kwargs)), - ('conv_2', conv_layer(nh, nf, 1, zero_bn=zero_bn, act=False, act_fn=act_fn, **kwargs)) + nf, ni = nh * expansion, ni * expansion + layers = [('conv_0', conv_layer(ni, nh, 3, stride=stride, act_fn=act_fn, **kwargs)), + ('conv_1', conv_layer(nh, nf, 3, zero_bn=zero_bn, act=False, act_fn=act_fn, **kwargs)) + ] if expansion == 1 else [ + ('conv_0', conv_layer(ni, nh, 1, act_fn=act_fn, **kwargs)), + ('conv_1', conv_layer(nh, nh, 3, stride=stride, act_fn=act_fn, **kwargs)), + ('conv_2', conv_layer(nh, nf, 1, zero_bn=zero_bn, act=False, act_fn=act_fn, **kwargs)) ] self.convs = nn.Sequential(OrderedDict(layers)) - self.identity = DownsampleLayer(conv_layer, ni, nf, stride, act=False, act_fn=act_fn, **kwargs) if ni!=nf or stride==2 else Noop() - self.merge = Noop() # us it to visualize in repr residual connection + self.identity = DownsampleLayer(conv_layer, ni, nf, stride, + act=False, act_fn=act_fn, **kwargs) if ni != nf or stride == 2 else Noop() + self.merge = Noop() self.act_fn = act_fn - def forward(self, x): return self.act_fn(self.merge(self.convs(x) + self.identity(x))) + def forward(self, x): + return self.act_fn(self.merge(self.convs(x) + self.identity(x))) # Cell def xresnet18(**kwargs): """Constructs a xresnet-18 model. """ - return Net(stem_sizes=[32,32], block=XResBlock, blocks=[2, 2, 2, 2], expansion=1, **kwargs) + return Net(stem_sizes=[32, 32], block=XResBlock, blocks=[2, 2, 2, 2], expansion=1, **kwargs) + def xresnet34(**kwargs): """Constructs axresnet-34 model. """ - return Net(stem_sizes=[32,32], block=XResBlock, blocks=[3, 4, 6, 3], expansion=1, **kwargs) + return Net(stem_sizes=[32, 32], block=XResBlock, blocks=[3, 4, 6, 3], expansion=1, **kwargs) + def xresnet50(**kwargs): """Constructs axresnet-34 model. """ - return Net(stem_sizes=[32,32],block=XResBlock, blocks=[3, 4, 6, 3], expansion=4, **kwargs) \ No newline at end of file + return Net(stem_sizes=[32, 32], block=XResBlock, blocks=[3, 4, 6, 3], expansion=4, **kwargs) \ No newline at end of file diff --git a/model_constructor/yaresnet.py b/model_constructor/yaresnet.py index 1dd7ed8..4f817a3 100644 --- a/model_constructor/yaresnet.py +++ b/model_constructor/yaresnet.py @@ -4,10 +4,9 @@ # Cell import torch.nn as nn -import sys, torch from functools import partial from collections import OrderedDict -from .layers import * +from .layers import SEBlock, ConvLayer, act_fn, noop, SimpleSelfAttention from .net import Net from .activations import Mish @@ -17,35 +16,38 @@ class YaResBlock(nn.Module): '''YaResBlock. Reduce by pool instead of stride 2''' se_block = SEBlock + def __init__(self, expansion, ni, nh, stride=1, conv_layer=ConvLayer, act_fn=act_fn, zero_bn=True, bn_1st=True, - pool=nn.AvgPool2d(2, ceil_mode=True), sa=False,sym=False, se=False, + pool=nn.AvgPool2d(2, ceil_mode=True), sa=False, sym=False, se=False, groups=1, dw=False): super().__init__() - nf,ni = nh*expansion,ni*expansion - if groups != 1: groups = int(nh/groups) - self.reduce = noop if stride==1 else pool - layers = [(f"conv_0", conv_layer(ni, nh, 3, stride=1, act_fn=act_fn, bn_1st=bn_1st, - groups= nh if dw else groups)), # stride 1 !!! - (f"conv_1", conv_layer(nh, nf, 3, zero_bn=zero_bn, act=False, bn_1st=bn_1st)) - ] if expansion == 1 else [ - (f"conv_0",conv_layer(ni, nh, 1, act_fn=act_fn, bn_1st=bn_1st)), - (f"conv_1",conv_layer(nh, nh, 3, stride=1, act_fn=act_fn, bn_1st=bn_1st, - groups= nh if dw else groups)), # stride 1 !!! - (f"conv_2",conv_layer(nh, nf, 1, zero_bn=zero_bn, act=False, bn_1st=bn_1st)) + nf, ni = nh * expansion, ni * expansion + if groups != 1: + groups = int(nh / groups) + self.reduce = noop if stride == 1 else pool + layers = [("conv_0", conv_layer(ni, nh, 3, stride=1, act_fn=act_fn, bn_1st=bn_1st, + groups=nh if dw else groups)), + ("conv_1", conv_layer(nh, nf, 3, zero_bn=zero_bn, act=False, bn_1st=bn_1st)) + ] if expansion == 1 else [ + ("conv_0", conv_layer(ni, nh, 1, act_fn=act_fn, bn_1st=bn_1st)), + ("conv_1", conv_layer(nh, nh, 3, stride=1, act_fn=act_fn, bn_1st=bn_1st, + groups=nh if dw else groups)), + ("conv_2", conv_layer(nh, nf, 1, zero_bn=zero_bn, act=False, bn_1st=bn_1st)) ] - if se: layers.append(('se', self.se_block(nf))) - if sa: layers.append(('sa', SimpleSelfAttention(nf,ks=1,sym=sym))) + if se: + layers.append(('se', self.se_block(nf))) + if sa: + layers.append(('sa', SimpleSelfAttention(nf, ks=1, sym=sym))) self.convs = nn.Sequential(OrderedDict(layers)) - self.idconv = noop if ni==nf else conv_layer(ni, nf, 1, act=False) - self.merge =act_fn + self.idconv = noop if ni == nf else conv_layer(ni, nf, 1, act=False) + self.merge = act_fn def forward(self, x): o = self.reduce(x) return self.merge(self.convs(o) + self.idconv(o)) # Cell -yaresnet_parameters = {'block': YaResBlock, 'stem_sizes': [3, 32, 64, 64], 'act_fn': Mish(), - 'stem_sizes': [3,32,64,64], 'stem_stride_on': 1} -yaresnet34 = partial(Net, name='YaResnet34', expansion=1, layers=[3, 4, 6, 3], **yaresnet_parameters) -yaresnet50 = partial(Net, name='YaResnet50', expansion=4, layers=[3, 4, 6, 3], **yaresnet_parameters) \ No newline at end of file +yaresnet_parameters = {'block': YaResBlock, 'stem_sizes': [3, 32, 64, 64], 'act_fn': Mish(), 'stem_stride_on': 1} +yaresnet34 = partial(Net, name='YaResnet34', expansion=1, layers=[3, 4, 6, 3], **yaresnet_parameters) +yaresnet50 = partial(Net, name='YaResnet50', expansion=4, layers=[3, 4, 6, 3], **yaresnet_parameters) \ No newline at end of file diff --git a/nbs/00_Net.ipynb b/nbs/00_Net.ipynb index b2d3353..f3ad44e 100644 --- a/nbs/00_Net.ipynb +++ b/nbs/00_Net.ipynb @@ -1,16 +1,5 @@ { "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "hide_input": true - }, - "outputs": [], - "source": [ - "from nbdev import *" - ] - }, { "cell_type": "code", "execution_count": null, @@ -29,6 +18,20 @@ "#default_exp net" ] }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T07:17:22.914932Z", + "start_time": "2020-11-12T07:17:22.910201Z" + } + }, + "outputs": [], + "source": [ + "from nbdev import docs, showdoc, show_doc" + ] + }, { "cell_type": "code", "execution_count": null, @@ -56,7 +59,8 @@ "source": [ "#hide\n", "# from nbdev.showdoc import *\n", - "from fastcore.test import *" + "from fastcore.test import *\n", + "from dataclasses import dataclass, asdict" ] }, { @@ -66,10 +70,12 @@ "outputs": [], "source": [ "#export\n", - "import torch.nn as nn\n", - "from functools import partial\n", "from collections import OrderedDict\n", - "from model_constructor.layers import ConvLayer, noop, SEBlock, SimpleSelfAttention, Flatten" + "from functools import partial\n", + "\n", + "import torch.nn as nn\n", + "\n", + "from model_constructor.layers import ConvLayer, Flatten, SEBlock, SimpleSelfAttention, noop" ] }, { @@ -77,9 +83,7 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "from dataclasses import dataclass, asdict" - ] + "source": [] }, { "cell_type": "markdown", @@ -121,6 +125,8 @@ "outputs": [], "source": [ "#export\n", + "\n", + "\n", "class ResBlock(nn.Module):\n", " '''Resnet block'''\n", " se_block = SEBlock\n", @@ -386,8 +392,6 @@ "source": [ "#export\n", "# NewResBlock now is YaResBlock - Yet Another ResNet Block! It is now at model_constructor.yaresnet.\n", - "\n", - "\n", "class NewResBlock(nn.Module):\n", " '''YaResnet block'''\n", " se_block = SEBlock\n", @@ -2738,14 +2742,27 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 2, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T07:15:22.388933Z", + "start_time": "2020-11-12T07:15:21.903684Z" + } + }, "outputs": [ { - "name": "stderr", + "name": "stdout", "output_type": "stream", "text": [ - "UsageError: Line magic function `%nbdev_hide` not found.\n" + "Converted 00_Net.ipynb.\n", + "Converted 01_activations.ipynb.\n", + "Converted 01_layers.ipynb.\n", + "Converted 03_MXResNet.ipynb.\n", + "Converted 04_YaResNet.ipynb.\n", + "Converted 05_Twist.ipynb.\n", + "Converted 10_base_constructor.ipynb.\n", + "Converted 11_xresnet.ipynb.\n", + "Converted index.ipynb.\n" ] } ], diff --git a/nbs/01_activations.ipynb b/nbs/01_activations.ipynb index 176827d..4c3c4e9 100644 --- a/nbs/01_activations.ipynb +++ b/nbs/01_activations.ipynb @@ -338,8 +338,8 @@ "execution_count": 1, "metadata": { "ExecuteTime": { - "end_time": "2020-11-11T16:24:49.841271Z", - "start_time": "2020-11-11T16:24:49.167581Z" + "end_time": "2020-11-12T07:27:35.048914Z", + "start_time": "2020-11-12T07:27:34.356516Z" } }, "outputs": [ @@ -400,7 +400,12 @@ "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, - "toc_position": {}, + "toc_position": { + "height": "calc(100% - 180px)", + "left": "10px", + "top": "150px", + "width": "341.333px" + }, "toc_section_display": true, "toc_window_display": true } diff --git a/nbs/03_MXResNet.ipynb b/nbs/03_MXResNet.ipynb index 5cfe5f1..f2dc0d9 100644 --- a/nbs/03_MXResNet.ipynb +++ b/nbs/03_MXResNet.ipynb @@ -59,8 +59,9 @@ "source": [ "#export\n", "from functools import partial\n", - "from model_constructor.net import Net\n", - "from model_constructor.activations import Mish" + "\n", + "from model_constructor.activations import Mish\n", + "from model_constructor.net import Net" ] }, { diff --git a/nbs/04_YaResNet.ipynb b/nbs/04_YaResNet.ipynb index d41879d..6e89dac 100644 --- a/nbs/04_YaResNet.ipynb +++ b/nbs/04_YaResNet.ipynb @@ -1,16 +1,5 @@ { "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "hide_input": true - }, - "outputs": [], - "source": [ - "from nbdev import *" - ] - }, { "cell_type": "code", "execution_count": null, @@ -40,27 +29,37 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 1, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:35:33.138912Z", + "start_time": "2020-11-12T08:35:32.764886Z" + } + }, "outputs": [], "source": [ "#hide\n", "# from nbdev.showdoc import *\n", - "from fastcore.test import *" + "from fastcore.test import *\n", + "import torch" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 2, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:35:33.431038Z", + "start_time": "2020-11-12T08:35:33.393005Z" + } + }, "outputs": [], "source": [ "#export\n", "import torch.nn as nn\n", - "import sys, torch\n", "from functools import partial\n", "from collections import OrderedDict\n", - "from model_constructor.layers import *\n", + "from model_constructor.layers import SEBlock, ConvLayer, act_fn, noop, SimpleSelfAttention\n", "from model_constructor.net import Net\n", "from model_constructor.activations import Mish" ] @@ -84,28 +83,32 @@ "class YaResBlock(nn.Module):\n", " '''YaResBlock. Reduce by pool instead of stride 2'''\n", " se_block = SEBlock\n", + "\n", " def __init__(self, expansion, ni, nh, stride=1,\n", " conv_layer=ConvLayer, act_fn=act_fn, zero_bn=True, bn_1st=True,\n", - " pool=nn.AvgPool2d(2, ceil_mode=True), sa=False,sym=False, se=False,\n", + " pool=nn.AvgPool2d(2, ceil_mode=True), sa=False, sym=False, se=False,\n", " groups=1, dw=False):\n", " super().__init__()\n", - " nf,ni = nh*expansion,ni*expansion\n", - " if groups != 1: groups = int(nh/groups)\n", - " self.reduce = noop if stride==1 else pool\n", - " layers = [(f\"conv_0\", conv_layer(ni, nh, 3, stride=1, act_fn=act_fn, bn_1st=bn_1st,\n", - " groups= nh if dw else groups)), # stride 1 !!!\n", - " (f\"conv_1\", conv_layer(nh, nf, 3, zero_bn=zero_bn, act=False, bn_1st=bn_1st))\n", - " ] if expansion == 1 else [\n", - " (f\"conv_0\",conv_layer(ni, nh, 1, act_fn=act_fn, bn_1st=bn_1st)),\n", - " (f\"conv_1\",conv_layer(nh, nh, 3, stride=1, act_fn=act_fn, bn_1st=bn_1st,\n", - " groups= nh if dw else groups)), # stride 1 !!!\n", - " (f\"conv_2\",conv_layer(nh, nf, 1, zero_bn=zero_bn, act=False, bn_1st=bn_1st))\n", + " nf, ni = nh * expansion, ni * expansion\n", + " if groups != 1:\n", + " groups = int(nh / groups)\n", + " self.reduce = noop if stride == 1 else pool\n", + " layers = [(\"conv_0\", conv_layer(ni, nh, 3, stride=1, act_fn=act_fn, bn_1st=bn_1st,\n", + " groups=nh if dw else groups)),\n", + " (\"conv_1\", conv_layer(nh, nf, 3, zero_bn=zero_bn, act=False, bn_1st=bn_1st))\n", + " ] if expansion == 1 else [\n", + " (\"conv_0\", conv_layer(ni, nh, 1, act_fn=act_fn, bn_1st=bn_1st)),\n", + " (\"conv_1\", conv_layer(nh, nh, 3, stride=1, act_fn=act_fn, bn_1st=bn_1st,\n", + " groups=nh if dw else groups)),\n", + " (\"conv_2\", conv_layer(nh, nf, 1, zero_bn=zero_bn, act=False, bn_1st=bn_1st))\n", " ]\n", - " if se: layers.append(('se', self.se_block(nf)))\n", - " if sa: layers.append(('sa', SimpleSelfAttention(nf,ks=1,sym=sym)))\n", + " if se:\n", + " layers.append(('se', self.se_block(nf)))\n", + " if sa:\n", + " layers.append(('sa', SimpleSelfAttention(nf, ks=1, sym=sym)))\n", " self.convs = nn.Sequential(OrderedDict(layers))\n", - " self.idconv = noop if ni==nf else conv_layer(ni, nf, 1, act=False)\n", - " self.merge =act_fn\n", + " self.idconv = noop if ni == nf else conv_layer(ni, nf, 1, act=False)\n", + " self.merge = act_fn\n", "\n", " def forward(self, x):\n", " o = self.reduce(x)\n", @@ -969,10 +972,9 @@ "outputs": [], "source": [ "#export\n", - "yaresnet_parameters = {'block': YaResBlock, 'stem_sizes': [3, 32, 64, 64], 'act_fn': Mish(),\n", - " 'stem_sizes': [3,32,64,64], 'stem_stride_on': 1}\n", - "yaresnet34 = partial(Net, name='YaResnet34', expansion=1, layers=[3, 4, 6, 3], **yaresnet_parameters)\n", - "yaresnet50 = partial(Net, name='YaResnet50', expansion=4, layers=[3, 4, 6, 3], **yaresnet_parameters)" + "yaresnet_parameters = {'block': YaResBlock, 'stem_sizes': [3, 32, 64, 64], 'act_fn': Mish(), 'stem_stride_on': 1}\n", + "yaresnet34 = partial(Net, name='YaResnet34', expansion=1, layers=[3, 4, 6, 3], **yaresnet_parameters)\n", + "yaresnet50 = partial(Net, name='YaResnet50', expansion=4, layers=[3, 4, 6, 3], **yaresnet_parameters)" ] }, { @@ -1102,6 +1104,31 @@ "display_name": "Python 3", "language": "python", "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.7" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": true } }, "nbformat": 4, diff --git a/nbs/05_Twist.ipynb b/nbs/05_Twist.ipynb index 2dd28aa..13a4f2f 100644 --- a/nbs/05_Twist.ipynb +++ b/nbs/05_Twist.ipynb @@ -2,8 +2,12 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:18:25.512884Z", + "start_time": "2020-11-12T08:18:25.319186Z" + }, "hide_input": true }, "outputs": [], @@ -13,18 +17,14 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Cells will be exported to model_constructor.twist,\n", - "unless a different module is specified after an export flag: `%nbdev_export special.module`\n" - ] + "execution_count": 2, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:18:26.436816Z", + "start_time": "2020-11-12T08:18:26.430668Z" } - ], + }, + "outputs": [], "source": [ "#default_exp twist" ] @@ -40,38 +40,54 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 47, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:20:00.353036Z", + "start_time": "2020-11-12T08:20:00.345417Z" + } + }, "outputs": [], "source": [ "#hide\n", "# from nbdev.showdoc import *\n", - "from fastcore.test import *" + "from fastcore.test import *\n", + "from model_constructor.net import Net" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 4, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:18:29.558525Z", + "start_time": "2020-11-12T08:18:29.163775Z" + } + }, "outputs": [], "source": [ "#export\n", - "from functools import partial\n", + "# from functools import partial\n", "from collections import OrderedDict\n", - "from model_constructor.layers import *\n", - "from model_constructor.net import *" + "from model_constructor.layers import ConvLayer, noop, act_fn, SimpleSelfAttention" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 5, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:18:29.564828Z", + "start_time": "2020-11-12T08:18:29.561559Z" + } + }, "outputs": [], "source": [ "#export\n", - "import sys, torch\n", - "nn = torch.nn\n", - "F = torch.nn.functional" + "import torch\n", + "import torch.nn as nn\n", + "import torch.nn.functional as F\n", + "import numpy as np" ] }, { @@ -83,8 +99,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 6, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:18:30.293010Z", + "start_time": "2020-11-12T08:18:30.245062Z" + } + }, "outputs": [], "source": [ "#export\n", @@ -94,49 +115,51 @@ " twist = False\n", " use_groups = True\n", " groups_ch = 8\n", + "\n", " def __init__(self, ni, nf,\n", " ks=3, stride=1, padding=1, bias=False,\n", " groups=1, iters=1, init_max=0.7, **kvargs):\n", " super().__init__()\n", - " self.same = ni==nf and stride==1\n", - " self.groups = ni//self.groups_ch if self.use_groups else 1\n", - "\n", + " self.same = ni == nf and stride == 1\n", + " self.groups = ni // self.groups_ch if self.use_groups else 1\n", " self.conv = nn.Conv2d(ni, nf, kernel_size=3, stride=stride, padding=1, bias=False, groups=self.groups)\n", " if self.twist:\n", " std = self.conv.weight.std().item()\n", - " self.coeff_Ax = nn.Parameter(torch.empty((nf,ni//groups)).normal_(0, std), requires_grad=True)\n", - " self.coeff_Ay = nn.Parameter(torch.empty((nf,ni//groups)).normal_(0, std), requires_grad=True)\n", + " self.coeff_Ax = nn.Parameter(torch.empty((nf, ni // groups)).normal_(0, std), requires_grad=True)\n", + " self.coeff_Ay = nn.Parameter(torch.empty((nf, ni // groups)).normal_(0, std), requires_grad=True)\n", " self.iters = iters\n", " self.stride = stride\n", " self.DD = self.derivatives()\n", "\n", " def derivatives(self):\n", - " I = torch.Tensor([[0,0,0],[0,1,0],[0,0,0]]).view(1,1,3,3)\n", - " D_x = torch.Tensor([[-1,0,1],[-2,0,2],[-1,0,1]]).view(1,1,3,3) / 10\n", - " D_y = torch.Tensor([[1,2,1],[0,0,0],[-1,-2,-1]]).view(1,1,3,3) / 10\n", + " I = torch.Tensor([[0, 0, 0], [0, 1, 0], [0, 0, 0]]).view(1, 1, 3, 3) # noqa E741\n", + " D_x = torch.Tensor([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]]).view(1, 1, 3, 3) / 10\n", + " D_y = torch.Tensor([[1, 2, 1], [0, 0, 0], [-1, -2, -1]]).view(1, 1, 3, 3) / 10\n", + "\n", " def convolution(K1, K2):\n", " return F.conv2d(K1, K2.flip(2).flip(3), padding=2)\n", - " D_xx = convolution(I+D_x, I+D_x).view(5,5)\n", - " D_yy = convolution(I+D_y, I+D_y).view(5,5)\n", - " D_xy = convolution(I+D_x, I+D_y).view(5,5)\n", + " D_xx = convolution(I + D_x, I + D_x).view(5, 5)\n", + " D_yy = convolution(I + D_y, I + D_y).view(5, 5)\n", + " D_xy = convolution(I + D_x, I + D_y).view(5, 5)\n", " return {'x': D_x, 'y': D_y, 'xx': D_xx, 'yy': D_yy, 'xy': D_xy}\n", "\n", " def kernel(self, coeff_x, coeff_y):\n", - " D_x = torch.Tensor([[-1,0,1],[-2,0,2],[-1,0,1]]).to(coeff_x.device)\n", - " D_y = torch.Tensor([[1,2,1],[0,0,0],[-1,-2,-1]]).to(coeff_x.device)\n", - " return coeff_x[:,:,None,None] * D_x + coeff_y[:,:,None,None] * D_y\n", + " D_x = torch.Tensor([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]]).to(coeff_x.device)\n", + " D_y = torch.Tensor([[1, 2, 1], [0, 0, 0], [-1, -2, -1]]).to(coeff_x.device)\n", + " return coeff_x[:, :, None, None] * D_x + coeff_y[:, :, None, None] * D_y\n", "\n", - " def full_kernel(self, kernel): # permuting the groups\n", - " if self.groups==1: return kernel\n", + " def full_kernel(self, kernel): # permuting the groups\n", + " if self.groups == 1:\n", + " return kernel\n", " n = self.groups\n", - " a,b,_,_ = kernel.size()\n", - " a = a//n\n", - " KK = torch.zeros((a*n,b*n,3,3)).to(kernel.device)\n", + " a, b, _, _ = kernel.size()\n", + " a = a // n\n", + " KK = torch.zeros((a * n, b * n, 3, 3)).to(kernel.device)\n", " for i in range(n):\n", - " if i%4==0:\n", - " KK[a*i:a*(i+1),b*(i+3):b*(i+4)] = kernel[a*i:a*(i+1)]\n", + " if i % 4 == 0:\n", + " KK[a * i:a * (i + 1), b * (i + 3):b * (i + 4)] = kernel[a * i:a * (i + 1)]\n", " else:\n", - " KK[a*i:a*(i+1),b*(i-1):b*i] = kernel[a*i:a*(i+1)]\n", + " KK[a * i:a * (i + 1), b * (i - 1):b * i] = kernel[a * i:a * (i + 1)]\n", " return KK\n", "\n", " def _conv(self, inpt, kernel=None):\n", @@ -151,23 +174,25 @@ " if self.same:\n", " n = conv_wt.size()[1]\n", " for i in range(self.groups):\n", - " conv_wt.data[n*i:n*(i+1)] = (conv_wt[n*i:n*(i+1)] + torch.transpose(conv_wt[n*i:n*(i+1)],0,1)) / 2\n", + " conv_wt.data[n * i:n * (i + 1)] = (conv_wt[n * i:n * (i + 1)]\n", + " + torch.transpose(conv_wt[n * i:n * (i + 1)], 0, 1)) / 2 # noqa E503\n", "\n", " def forward(self, inpt):\n", " out = self._conv(inpt)\n", " if self.twist is False:\n", " return out\n", - " _,_,h,w = out.size()\n", - " XX = torch.from_numpy(np.indices((1,1,h,w))[3]*2/w-1).type(out.dtype).to(out.device)\n", - " YY = torch.from_numpy(np.indices((1,1,h,w))[2]*2/h-1).type(out.dtype).to(out.device)\n", + " _, _, h, w = out.size()\n", + " XX = torch.from_numpy(np.indices((1, 1, h, w))[3] * 2 / w - 1).type(out.dtype).to(out.device)\n", + " YY = torch.from_numpy(np.indices((1, 1, h, w))[2] * 2 / h - 1).type(out.dtype).to(out.device)\n", " kernel_x = self.kernel(self.coeff_Ax, self.coeff_Ay)\n", " self.symmetrize(kernel_x)\n", - " kernel_y = kernel_x.transpose(2,3).flip(3) # make conv_y a 90 degree rotation of conv_x\n", + " kernel_y = kernel_x.transpose(2, 3).flip(3) # make conv_y a 90 degree rotation of conv_x\n", " out = out + XX * self._conv(inpt, kernel_x) + YY * self._conv(inpt, kernel_y)\n", - " if self.same and self.iters>1:\n", + " if self.same and self.iters > 1:\n", " out = inpt + out / self.iters\n", - " for _ in range(self.iters-1):\n", - " out = out + (self._conv(out) + XX * self._conv(out, kernel_x) + YY * self._conv(out, kernel_y)) / self.iters\n", + " for _ in range(self.iters - 1):\n", + " out = out + (self._conv(out) + XX * self._conv(out, kernel_x)\n", + " + YY * self._conv(out, kernel_y)) / self.iters # noqa E727\n", " out = out - inpt\n", " return out\n", "\n", @@ -177,8 +202,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 7, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:18:31.165082Z", + "start_time": "2020-11-12T08:18:31.139082Z" + } + }, "outputs": [ { "data": { @@ -189,7 +219,7 @@ ")" ] }, - "execution_count": null, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -200,8 +230,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 8, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:18:31.789815Z", + "start_time": "2020-11-12T08:18:31.778360Z" + } + }, "outputs": [ { "data": { @@ -209,7 +244,7 @@ "(False, True)" ] }, - "execution_count": null, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -220,8 +255,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 9, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:18:32.194529Z", + "start_time": "2020-11-12T08:18:32.188588Z" + } + }, "outputs": [ { "data": { @@ -229,7 +269,7 @@ "(True, 8)" ] }, - "execution_count": null, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -240,8 +280,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 10, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:18:32.636064Z", + "start_time": "2020-11-12T08:18:32.618791Z" + } + }, "outputs": [ { "data": { @@ -252,7 +297,7 @@ ")" ] }, - "execution_count": null, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -263,8 +308,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 11, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:18:33.086882Z", + "start_time": "2020-11-12T08:18:33.074498Z" + } + }, "outputs": [ { "data": { @@ -275,7 +325,7 @@ ")" ] }, - "execution_count": null, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -295,19 +345,29 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 12, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:18:34.287872Z", + "start_time": "2020-11-12T08:18:34.283346Z" + } + }, "outputs": [], "source": [ "#export\n", - "class ConvLayerTwist(ConvLayer): # replace Conv2d by Twist\n", + "class ConvLayerTwist(ConvLayer): # replace Conv2d by Twist\n", " Conv2d = ConvTwist" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 13, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:18:34.985510Z", + "start_time": "2020-11-12T08:18:34.978384Z" + } + }, "outputs": [ { "data": { @@ -322,7 +382,7 @@ ")" ] }, - "execution_count": null, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -333,8 +393,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 14, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:18:35.457612Z", + "start_time": "2020-11-12T08:18:35.454604Z" + } + }, "outputs": [ { "data": { @@ -342,7 +407,7 @@ "torch.nn.modules.conv.Conv2d" ] }, - "execution_count": null, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -353,8 +418,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 15, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:18:35.830439Z", + "start_time": "2020-11-12T08:18:35.825197Z" + } + }, "outputs": [ { "data": { @@ -362,7 +432,7 @@ "__main__.ConvTwist" ] }, - "execution_count": null, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -373,8 +443,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 16, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:18:36.197433Z", + "start_time": "2020-11-12T08:18:36.189636Z" + } + }, "outputs": [ { "data": { @@ -389,7 +464,7 @@ ")" ] }, - "execution_count": null, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -401,8 +476,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 17, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:18:36.730907Z", + "start_time": "2020-11-12T08:18:36.723051Z" + } + }, "outputs": [ { "data": { @@ -417,7 +497,7 @@ ")" ] }, - "execution_count": null, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -430,8 +510,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 18, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:18:37.264435Z", + "start_time": "2020-11-12T08:18:37.258921Z" + } + }, "outputs": [ { "data": { @@ -445,7 +530,7 @@ ")" ] }, - "execution_count": null, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -457,8 +542,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 19, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:18:37.659139Z", + "start_time": "2020-11-12T08:18:37.642241Z" + } + }, "outputs": [ { "data": { @@ -472,7 +562,7 @@ ")" ] }, - "execution_count": null, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -484,8 +574,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 20, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:18:38.065874Z", + "start_time": "2020-11-12T08:18:38.058467Z" + } + }, "outputs": [ { "data": { @@ -500,7 +595,7 @@ ")" ] }, - "execution_count": null, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -512,8 +607,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 21, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:18:38.469685Z", + "start_time": "2020-11-12T08:18:38.449640Z" + } + }, "outputs": [ { "data": { @@ -528,7 +628,7 @@ ")" ] }, - "execution_count": null, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -540,8 +640,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 22, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:18:38.895662Z", + "start_time": "2020-11-12T08:18:38.883054Z" + } + }, "outputs": [ { "data": { @@ -556,7 +661,7 @@ ")" ] }, - "execution_count": null, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } @@ -568,8 +673,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 23, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:18:39.340478Z", + "start_time": "2020-11-12T08:18:39.330542Z" + } + }, "outputs": [ { "data": { @@ -584,7 +694,7 @@ ")" ] }, - "execution_count": null, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } @@ -596,8 +706,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 24, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:18:39.797981Z", + "start_time": "2020-11-12T08:18:39.776137Z" + } + }, "outputs": [ { "data": { @@ -612,7 +727,7 @@ ")" ] }, - "execution_count": null, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } @@ -624,8 +739,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 25, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:18:45.758367Z", + "start_time": "2020-11-12T08:18:45.738924Z" + } + }, "outputs": [ { "data": { @@ -640,7 +760,7 @@ ")" ] }, - "execution_count": null, + "execution_count": 25, "metadata": {}, "output_type": "execute_result" } @@ -660,8 +780,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 26, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:18:50.010162Z", + "start_time": "2020-11-12T08:18:50.000426Z" + } + }, "outputs": [], "source": [ "#export\n", @@ -670,20 +795,19 @@ " conv_layer=ConvLayer, act_fn=act_fn, bn_1st=True,\n", " pool=nn.AvgPool2d(2, ceil_mode=True), sa=False, sym=False, zero_bn=True, **kvargs):\n", " super().__init__()\n", - " nf,ni = nh*expansion,ni*expansion\n", - "# conv_layer = ConvLayerTwist\n", - " self.reduce = noop if stride==1 else pool\n", - " layers = [(f\"conv_0\", conv_layer(ni, nh, 3, act_fn=act_fn, bn_1st=bn_1st)),\n", - " (f\"conv_1\", conv_layer(nh, nf, 3, zero_bn=zero_bn, act=False, bn_1st=bn_1st))\n", - " ] if expansion == 1 else [\n", - " (f\"conv_0\", conv_layer(ni, nh, 1, act_fn=act_fn, bn_1st=bn_1st)),\n", - "# (f\"conv_1\", conv_layer(nh, nh, 3, act_fn=act_fn, bn_1st=bn_1st)),\n", - " (f\"conv_1_twist\", ConvLayerTwist(nh, nh, 3, act_fn=act_fn, bn_1st=bn_1st)),\n", - " (f\"conv_2\", conv_layer(nh, nf, 1, zero_bn=zero_bn, act=False, bn_1st=bn_1st))\n", + " nf, ni = nh * expansion, ni * expansion\n", + " self.reduce = noop if stride == 1 else pool\n", + " layers = [(\"conv_0\", conv_layer(ni, nh, 3, act_fn=act_fn, bn_1st=bn_1st)),\n", + " (\"conv_1\", conv_layer(nh, nf, 3, zero_bn=zero_bn, act=False, bn_1st=bn_1st))\n", + " ] if expansion == 1 else [\n", + " (\"conv_0\", conv_layer(ni, nh, 1, act_fn=act_fn, bn_1st=bn_1st)),\n", + " (\"conv_1_twist\", ConvLayerTwist(nh, nh, 3, act_fn=act_fn, bn_1st=bn_1st)),\n", + " (\"conv_2\", conv_layer(nh, nf, 1, zero_bn=zero_bn, act=False, bn_1st=bn_1st))\n", " ]\n", - " if sa: layers.append(('sa', SimpleSelfAttention(nf,ks=1,sym=sym)))\n", + " if sa:\n", + " layers.append(('sa', SimpleSelfAttention(nf, ks=1, sym=sym)))\n", " self.convs = nn.Sequential(OrderedDict(layers))\n", - " self.idconv = noop if ni==nf else conv_layer(ni, nf, 1, act=False, bn_1st=bn_1st)\n", + " self.idconv = noop if ni == nf else conv_layer(ni, nf, 1, act=False, bn_1st=bn_1st)\n", " self.merge = act_fn\n", "\n", " def forward(self, x):\n", @@ -693,8 +817,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 27, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:18:50.982172Z", + "start_time": "2020-11-12T08:18:50.968881Z" + } + }, "outputs": [ { "data": { @@ -726,7 +855,7 @@ ")" ] }, - "execution_count": null, + "execution_count": 27, "metadata": {}, "output_type": "execute_result" } @@ -738,8 +867,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 28, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:18:51.998734Z", + "start_time": "2020-11-12T08:18:51.809441Z" + } + }, "outputs": [ { "name": "stdout", @@ -760,8 +894,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 29, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:18:52.418937Z", + "start_time": "2020-11-12T08:18:52.411660Z" + } + }, "outputs": [ { "data": { @@ -791,7 +930,7 @@ ")" ] }, - "execution_count": null, + "execution_count": 29, "metadata": {}, "output_type": "execute_result" } @@ -803,8 +942,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 30, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:18:53.209143Z", + "start_time": "2020-11-12T08:18:53.152654Z" + } + }, "outputs": [ { "name": "stdout", @@ -825,8 +969,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 31, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:18:53.789204Z", + "start_time": "2020-11-12T08:18:53.759211Z" + } + }, "outputs": [ { "data": { @@ -860,7 +1009,7 @@ ")" ] }, - "execution_count": null, + "execution_count": 31, "metadata": {}, "output_type": "execute_result" } @@ -872,8 +1021,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 32, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:18:54.642171Z", + "start_time": "2020-11-12T08:18:54.527000Z" + } + }, "outputs": [ { "name": "stdout", @@ -894,13 +1048,18 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 35, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:19:11.269093Z", + "start_time": "2020-11-12T08:19:11.261967Z" + } + }, "outputs": [ { "data": { "text/plain": [ - "NewResBlock(\n", + "NewResBlockTwist(\n", " (convs): Sequential(\n", " (conv_0): ConvLayer(\n", " (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", @@ -919,21 +1078,26 @@ ")" ] }, - "execution_count": null, + "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#hide\n", - "bl = NewResBlock(1,64,64,sa=True)\n", + "bl = NewResBlockTwist(1,64,64,sa=True)\n", "bl" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 36, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:19:16.610982Z", + "start_time": "2020-11-12T08:19:16.548733Z" + } + }, "outputs": [ { "name": "stdout", @@ -954,13 +1118,18 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 37, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:19:22.398767Z", + "start_time": "2020-11-12T08:19:22.390688Z" + } + }, "outputs": [ { "data": { "text/plain": [ - "NewResBlock(\n", + "NewResBlockTwist(\n", " (reduce): AvgPool2d(kernel_size=2, stride=2, padding=0)\n", " (convs): Sequential(\n", " (conv_0): ConvLayer(\n", @@ -968,8 +1137,11 @@ " (act_fn): LeakyReLU(negative_slope=0.01)\n", " (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", " )\n", - " (conv_1): ConvLayer(\n", - " (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", + " (conv_1_twist): ConvLayerTwist(\n", + " (conv): ConvTwist(\n", + " twist: False, permute: False, same: True, groups: 32\n", + " (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)\n", + " )\n", " (act_fn): LeakyReLU(negative_slope=0.01)\n", " (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", " )\n", @@ -986,21 +1158,26 @@ ")" ] }, - "execution_count": null, + "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#hide\n", - "bl = NewResBlock(4,64,128,stride=2,act_fn=nn.LeakyReLU(), bn_1st=False)\n", + "bl = NewResBlockTwist(4,64,128,stride=2,act_fn=nn.LeakyReLU(), bn_1st=False)\n", "bl" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 38, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:19:23.881803Z", + "start_time": "2020-11-12T08:19:23.733092Z" + } + }, "outputs": [ { "name": "stdout", @@ -1028,39 +1205,49 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 39, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:19:26.850428Z", + "start_time": "2020-11-12T08:19:26.812087Z" + } + }, "outputs": [], "source": [ "#export\n", "class ResBlockTwist(nn.Module):\n", " def __init__(self, expansion, ni, nh, stride=1,\n", " conv_layer=ConvLayer, act_fn=act_fn, zero_bn=True, bn_1st=True,\n", - " pool=nn.AvgPool2d(2, ceil_mode=True), sa=False,sym=False, **kvargs):\n", + " pool=nn.AvgPool2d(2, ceil_mode=True), sa=False, sym=False, **kvargs):\n", " super().__init__()\n", - " nf,ni = nh*expansion,ni*expansion\n", - "# conv_layer = ConvLayerTwist\n", - " layers = [(f\"conv_0\", conv_layer(ni, nh, 3, stride=stride, act_fn=act_fn, bn_1st=bn_1st)),\n", - " (f\"conv_1\", conv_layer(nh, nf, 3, zero_bn=zero_bn, act=False, bn_1st=bn_1st))\n", - " ] if expansion == 1 else [\n", - " (f\"conv_0\",conv_layer(ni, nh, 1, act_fn=act_fn, bn_1st=bn_1st)),\n", - "# (f\"conv_1\",conv_layer(nh, nh, 3, stride=stride, act_fn=act_fn, bn_1st=bn_1st)),\n", - " (f\"conv_1_twist\",ConvLayerTwist(nh, nh, 3, stride=stride, act_fn=act_fn, bn_1st=bn_1st)),\n", - " (f\"conv_2\",conv_layer(nh, nf, 1, zero_bn=zero_bn, act=False, bn_1st=bn_1st))\n", + " nf, ni = nh * expansion, ni * expansion\n", + " layers = [(\"conv_0\", conv_layer(ni, nh, 3, stride=stride, act_fn=act_fn, bn_1st=bn_1st)),\n", + " (\"conv_1\", conv_layer(nh, nf, 3, zero_bn=zero_bn, act=False, bn_1st=bn_1st))\n", + " ] if expansion == 1 else [\n", + " (\"conv_0\", conv_layer(ni, nh, 1, act_fn=act_fn, bn_1st=bn_1st)),\n", + " (\"conv_1_twist\", ConvLayerTwist(nh, nh, 3, stride=stride, act_fn=act_fn, bn_1st=bn_1st)),\n", + " (\"conv_2\", conv_layer(nh, nf, 1, zero_bn=zero_bn, act=False, bn_1st=bn_1st))\n", " ]\n", - " if sa: layers.append(('sa', SimpleSelfAttention(nf,ks=1,sym=sym)))\n", + " if sa:\n", + " layers.append(('sa', SimpleSelfAttention(nf, ks=1, sym=sym)))\n", " self.convs = nn.Sequential(OrderedDict(layers))\n", - " self.pool = noop if stride==1 else pool\n", - " self.idconv = noop if ni==nf else conv_layer(ni, nf, 1, act=False)\n", - " self.act_fn =act_fn\n", + " self.pool = noop if stride == 1 else pool\n", + " self.idconv = noop if ni == nf else conv_layer(ni, nf, 1, act=False)\n", + " self.act_fn = act_fn\n", "\n", - " def forward(self, x): return self.act_fn(self.convs(x) + self.idconv(self.pool(x)))" + " def forward(self, x):\n", + " return self.act_fn(self.convs(x) + self.idconv(self.pool(x)))" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 40, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:19:27.745755Z", + "start_time": "2020-11-12T08:19:27.716441Z" + } + }, "outputs": [ { "data": { @@ -1092,7 +1279,7 @@ ")" ] }, - "execution_count": null, + "execution_count": 40, "metadata": {}, "output_type": "execute_result" } @@ -1104,8 +1291,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 41, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:19:28.942123Z", + "start_time": "2020-11-12T08:19:28.748609Z" + } + }, "outputs": [ { "name": "stdout", @@ -1126,8 +1318,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 42, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:19:30.285784Z", + "start_time": "2020-11-12T08:19:30.276885Z" + } + }, "outputs": [ { "data": { @@ -1157,7 +1354,7 @@ ")" ] }, - "execution_count": null, + "execution_count": 42, "metadata": {}, "output_type": "execute_result" } @@ -1169,8 +1366,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 43, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:19:31.429871Z", + "start_time": "2020-11-12T08:19:31.365409Z" + } + }, "outputs": [ { "name": "stdout", @@ -1191,8 +1393,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 44, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:19:32.205820Z", + "start_time": "2020-11-12T08:19:32.195805Z" + } + }, "outputs": [ { "data": { @@ -1226,7 +1433,7 @@ ")" ] }, - "execution_count": null, + "execution_count": 44, "metadata": {}, "output_type": "execute_result" } @@ -1238,8 +1445,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 45, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:19:33.144161Z", + "start_time": "2020-11-12T08:19:33.052565Z" + } + }, "outputs": [ { "name": "stdout", @@ -1267,8 +1479,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 48, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:20:05.027890Z", + "start_time": "2020-11-12T08:20:05.019776Z" + } + }, "outputs": [], "source": [ "model = Net(expansion=4, layers=[3,4,6,3])" @@ -1276,8 +1493,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 49, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:20:06.785550Z", + "start_time": "2020-11-12T08:20:06.783529Z" + } + }, "outputs": [], "source": [ "model.block = NewResBlockTwist" @@ -1285,8 +1507,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 50, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:20:07.860796Z", + "start_time": "2020-11-12T08:20:07.744871Z" + } + }, "outputs": [ { "data": { @@ -1674,7 +1901,7 @@ ")" ] }, - "execution_count": null, + "execution_count": 50, "metadata": {}, "output_type": "execute_result" } @@ -1685,8 +1912,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 51, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:20:09.929905Z", + "start_time": "2020-11-12T08:20:09.814360Z" + } + }, "outputs": [ { "name": "stdout", @@ -1707,8 +1939,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 52, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:20:11.144544Z", + "start_time": "2020-11-12T08:20:10.869947Z" + } + }, "outputs": [ { "name": "stdout", @@ -1729,8 +1966,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 53, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:20:12.613607Z", + "start_time": "2020-11-12T08:20:12.608992Z" + } + }, "outputs": [], "source": [ "model.block = ResBlockTwist" @@ -1738,8 +1980,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 54, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:20:14.002066Z", + "start_time": "2020-11-12T08:20:13.711883Z" + } + }, "outputs": [ { "name": "stdout", @@ -1760,8 +2007,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 55, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:20:15.284464Z", + "start_time": "2020-11-12T08:20:14.960297Z" + } + }, "outputs": [], "source": [ "m = model()" @@ -1769,8 +2021,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 56, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:20:16.044885Z", + "start_time": "2020-11-12T08:20:16.017202Z" + } + }, "outputs": [ { "data": { @@ -2184,7 +2441,7 @@ ")" ] }, - "execution_count": null, + "execution_count": 56, "metadata": {}, "output_type": "execute_result" } @@ -2196,8 +2453,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 57, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:20:18.292655Z", + "start_time": "2020-11-12T08:20:17.639599Z" + } + }, "outputs": [ { "name": "stdout", @@ -2218,8 +2480,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 58, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:20:18.474912Z", + "start_time": "2020-11-12T08:20:18.462739Z" + } + }, "outputs": [ { "data": { @@ -2244,7 +2511,7 @@ ")" ] }, - "execution_count": null, + "execution_count": 58, "metadata": {}, "output_type": "execute_result" } @@ -2255,8 +2522,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 59, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:20:19.475034Z", + "start_time": "2020-11-12T08:20:19.470735Z" + } + }, "outputs": [ { "data": { @@ -2268,7 +2540,7 @@ ")" ] }, - "execution_count": null, + "execution_count": 59, "metadata": {}, "output_type": "execute_result" } @@ -2279,8 +2551,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 60, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:20:20.251290Z", + "start_time": "2020-11-12T08:20:20.245098Z" + } + }, "outputs": [ { "data": { @@ -2359,7 +2636,7 @@ ")" ] }, - "execution_count": null, + "execution_count": 60, "metadata": {}, "output_type": "execute_result" } @@ -2370,8 +2647,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 61, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:20:21.109147Z", + "start_time": "2020-11-12T08:20:21.094037Z" + } + }, "outputs": [ { "data": { @@ -2473,7 +2755,7 @@ ")" ] }, - "execution_count": null, + "execution_count": 61, "metadata": {}, "output_type": "execute_result" } @@ -2484,8 +2766,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 62, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:20:21.965972Z", + "start_time": "2020-11-12T08:20:21.949886Z" + } + }, "outputs": [ { "data": { @@ -2631,7 +2918,7 @@ ")" ] }, - "execution_count": null, + "execution_count": 62, "metadata": {}, "output_type": "execute_result" } @@ -2642,8 +2929,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 63, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:20:22.541277Z", + "start_time": "2020-11-12T08:20:22.535216Z" + } + }, "outputs": [ { "data": { @@ -2723,7 +3015,7 @@ ")" ] }, - "execution_count": null, + "execution_count": 63, "metadata": {}, "output_type": "execute_result" } @@ -2743,19 +3035,26 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 64, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:20:41.163167Z", + "start_time": "2020-11-12T08:20:40.672551Z" + } + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Converted 00_constructor.ipynb.\n", + "Converted 00_Net.ipynb.\n", + "Converted 01_activations.ipynb.\n", "Converted 01_layers.ipynb.\n", - "Converted 02_resnet.ipynb.\n", - "Converted 03_xresnet.ipynb.\n", - "Converted 04_Net.ipynb.\n", + "Converted 03_MXResNet.ipynb.\n", + "Converted 04_YaResNet.ipynb.\n", "Converted 05_Twist.ipynb.\n", + "Converted 10_base_constructor.ipynb.\n", + "Converted 11_xresnet.ipynb.\n", "Converted index.ipynb.\n" ] } @@ -2779,6 +3078,31 @@ "display_name": "Python 3", "language": "python", "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.7" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": true } }, "nbformat": 4, diff --git a/nbs/10_base_constructor.ipynb b/nbs/10_base_constructor.ipynb index 011659f..eaa3e79 100644 --- a/nbs/10_base_constructor.ipynb +++ b/nbs/10_base_constructor.ipynb @@ -58,7 +58,7 @@ "#export\n", "import torch.nn as nn\n", "from collections import OrderedDict\n", - "from model_constructor.layers import ConvLayer, Noop, Flatten\n" + "from model_constructor.layers import ConvLayer, Noop, Flatten" ] }, { @@ -73,7 +73,7 @@ "outputs": [], "source": [ "#export\n", - "act_fn = nn.ReLU(inplace=True)\n" + "act_fn = nn.ReLU(inplace=True)" ] }, { @@ -118,7 +118,7 @@ " super().__init__(OrderedDict(stem))\n", "\n", " def extra_repr(self):\n", - " return f\"sizes: {self.sizes}\"\n" + " return f\"sizes: {self.sizes}\"" ] }, { @@ -390,7 +390,7 @@ "#export\n", "def DownsampleBlock(conv_layer, ni, nf, ks, stride, act=False, **kwargs):\n", " '''Base downsample for res-like blocks'''\n", - " return conv_layer(ni, nf, ks, stride, act, **kwargs)\n" + " return conv_layer(ni, nf, ks, stride, act, **kwargs)" ] }, { @@ -425,7 +425,7 @@ " out = self.conv(x)\n", " if self.downsample:\n", " identity = self.downsample(x)\n", - " return self.act_conn(self.merge(out + identity))\n" + " return self.act_conn(self.merge(out + identity))" ] }, { @@ -454,7 +454,7 @@ " ('conv_1', conv_layer(nh, nh, stride=stride, act_fn=act_fn, **kwargs)), # noqa: E241\n", " ('conv_2', conv_layer(nh, nf, ks=1, zero_bn=zero_bn, act=False, act_fn=act_fn, **kwargs))]))\n", " if self.downsample:\n", - " self.downsample = downsample_block(conv_layer, ni, nf, ks=1, \n", + " self.downsample = downsample_block(conv_layer, ni, nf, ks=1,\n", " stride=stride, act=False, act_fn=act_fn, **kwargs)\n", " self.merge = Noop()\n", " self.act_conn = act_fn\n", @@ -464,7 +464,7 @@ " out = self.conv(x)\n", " if self.downsample:\n", " identity = self.downsample(x)\n", - " return self.act_conn(self.merge(out + identity))\n" + " return self.act_conn(self.merge(out + identity))" ] }, { @@ -494,7 +494,7 @@ " for i in range(blocks)]))\n", "\n", " def extra_repr(self):\n", - " return f'from {self.ni * self.expansion} to {self.nf}, {self.blocks} blocks, expansion {self.expansion}.'\n" + " return f'from {self.ni * self.expansion} to {self.nf}, {self.blocks} blocks, expansion {self.expansion}.'" ] }, { @@ -531,7 +531,7 @@ " sa=sa if i == 0 else False,\n", " **kwargs))\n", " for i in range(num_layers)]\n", - " super().__init__(OrderedDict(layers))\n" + " super().__init__(OrderedDict(layers))" ] }, { @@ -1015,7 +1015,7 @@ " [('pool', nn.AdaptiveAvgPool2d((1, 1))),\n", " ('flat', Flatten()),\n", " ('fc', nn.Linear(ni, nf)),\n", - " ]))\n" + " ]))" ] }, { @@ -1083,7 +1083,7 @@ " '''Init model'''\n", " for m in model.modules():\n", " if isinstance(m, (nn.Conv2d, nn.Linear)):\n", - " nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity=nonlinearity)\n" + " nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity=nonlinearity)" ] }, { @@ -1115,7 +1115,7 @@ " sa=sa, **kwargs)),\n", " ('head', head(body_out * expansion, num_classes, **kwargs))\n", " ]))\n", - " self.init_model(self)\n" + " self.init_model(self)" ] }, { diff --git a/nbs/11_xresnet.ipynb b/nbs/11_xresnet.ipynb index 6a1a5cd..8cdccbb 100644 --- a/nbs/11_xresnet.ipynb +++ b/nbs/11_xresnet.ipynb @@ -1,16 +1,5 @@ { "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "hide_input": true - }, - "outputs": [], - "source": [ - "from nbdev import *" - ] - }, { "cell_type": "code", "execution_count": null, @@ -40,35 +29,51 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 7, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:27:00.230515Z", + "start_time": "2020-11-12T08:27:00.225163Z" + } + }, "outputs": [], "source": [ "#hide\n", "from nbdev.showdoc import *\n", - "from fastcore.test import *" + "from fastcore.test import *\n", + "import torch\n", + "from model_constructor.base_constructor import Body" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 2, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:26:33.911400Z", + "start_time": "2020-11-12T08:26:33.908197Z" + } + }, "outputs": [], "source": [ "#export\n", "import torch.nn as nn\n", - "import torch\n", "from collections import OrderedDict" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 3, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:26:35.974938Z", + "start_time": "2020-11-12T08:26:35.963223Z" + } + }, "outputs": [], "source": [ "#export\n", - "from model_constructor.base_constructor import *\n", + "from model_constructor.base_constructor import Net\n", "from model_constructor.layers import ConvLayer, Noop, act_fn" ] }, @@ -81,8 +86,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 4, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:26:37.307393Z", + "start_time": "2020-11-12T08:26:37.299905Z" + } + }, "outputs": [], "source": [ "#export\n", @@ -91,16 +101,22 @@ " def __init__(self, conv_layer, ni, nf, stride, act,\n", " pool=nn.AvgPool2d(2, ceil_mode=True), pool_1st=True,\n", " **kwargs):\n", - " layers = [] if stride==1 else [('pool', pool)]\n", - " layers += [] if ni==nf else [('idconv', conv_layer(ni, nf, 1, act=act, **kwargs))]\n", - " if not pool_1st: layers.reverse()\n", + " layers = [] if stride == 1 else [('pool', pool)]\n", + " layers += [] if ni == nf else [('idconv', conv_layer(ni, nf, 1, act=act, **kwargs))]\n", + " if not pool_1st:\n", + " layers.reverse()\n", " super().__init__(OrderedDict(layers))" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 5, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:26:38.091773Z", + "start_time": "2020-11-12T08:26:38.083440Z" + } + }, "outputs": [], "source": [ "#export\n", @@ -108,26 +124,33 @@ " def __init__(self, ni, nh, expansion=1, stride=1, zero_bn=True,\n", " conv_layer=ConvLayer, act_fn=act_fn, **kwargs):\n", " super().__init__()\n", - " nf,ni = nh*expansion,ni*expansion\n", - " layers = [('conv_0', conv_layer(ni, nh, 3, stride=stride, act_fn=act_fn, **kwargs)),\n", - " ('conv_1', conv_layer(nh, nf, 3, zero_bn=zero_bn, act=False, act_fn=act_fn, **kwargs))\n", - " ] if expansion == 1 else [\n", - " ('conv_0', conv_layer(ni, nh, 1, act_fn=act_fn, **kwargs)),\n", - " ('conv_1', conv_layer(nh, nh, 3, stride=stride, act_fn=act_fn, **kwargs)),\n", - " ('conv_2', conv_layer(nh, nf, 1, zero_bn=zero_bn, act=False, act_fn=act_fn, **kwargs))\n", + " nf, ni = nh * expansion, ni * expansion\n", + " layers = [('conv_0', conv_layer(ni, nh, 3, stride=stride, act_fn=act_fn, **kwargs)),\n", + " ('conv_1', conv_layer(nh, nf, 3, zero_bn=zero_bn, act=False, act_fn=act_fn, **kwargs))\n", + " ] if expansion == 1 else [\n", + " ('conv_0', conv_layer(ni, nh, 1, act_fn=act_fn, **kwargs)),\n", + " ('conv_1', conv_layer(nh, nh, 3, stride=stride, act_fn=act_fn, **kwargs)),\n", + " ('conv_2', conv_layer(nh, nf, 1, zero_bn=zero_bn, act=False, act_fn=act_fn, **kwargs))\n", " ]\n", " self.convs = nn.Sequential(OrderedDict(layers))\n", - " self.identity = DownsampleLayer(conv_layer, ni, nf, stride, act=False, act_fn=act_fn, **kwargs) if ni!=nf or stride==2 else Noop()\n", - " self.merge = Noop() # us it to visualize in repr residual connection\n", + " self.identity = DownsampleLayer(conv_layer, ni, nf, stride,\n", + " act=False, act_fn=act_fn, **kwargs) if ni != nf or stride == 2 else Noop()\n", + " self.merge = Noop()\n", " self.act_fn = act_fn\n", "\n", - " def forward(self, x): return self.act_fn(self.merge(self.convs(x) + self.identity(x)))" + " def forward(self, x):\n", + " return self.act_fn(self.merge(self.convs(x) + self.identity(x)))" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 8, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:27:04.422376Z", + "start_time": "2020-11-12T08:27:04.311568Z" + } + }, "outputs": [ { "data": { @@ -339,7 +362,7 @@ ")" ] }, - "execution_count": null, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -351,8 +374,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 9, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:27:05.677644Z", + "start_time": "2020-11-12T08:27:05.230761Z" + } + }, "outputs": [ { "data": { @@ -360,7 +388,7 @@ "torch.Size([16, 2048, 4, 4])" ] }, - "execution_count": null, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -380,26 +408,38 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 10, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:27:07.200891Z", + "start_time": "2020-11-12T08:27:07.193160Z" + } + }, "outputs": [], "source": [ "#export\n", "def xresnet18(**kwargs):\n", " \"\"\"Constructs a xresnet-18 model. \"\"\"\n", - " return Net(stem_sizes=[32,32], block=XResBlock, blocks=[2, 2, 2, 2], expansion=1, **kwargs)\n", + " return Net(stem_sizes=[32, 32], block=XResBlock, blocks=[2, 2, 2, 2], expansion=1, **kwargs)\n", + "\n", "def xresnet34(**kwargs):\n", " \"\"\"Constructs axresnet-34 model. \"\"\"\n", - " return Net(stem_sizes=[32,32], block=XResBlock, blocks=[3, 4, 6, 3], expansion=1, **kwargs)\n", + " return Net(stem_sizes=[32, 32], block=XResBlock, blocks=[3, 4, 6, 3], expansion=1, **kwargs)\n", + "\n", "def xresnet50(**kwargs):\n", " \"\"\"Constructs axresnet-34 model. \"\"\"\n", - " return Net(stem_sizes=[32,32],block=XResBlock, blocks=[3, 4, 6, 3], expansion=4, **kwargs)" + " return Net(stem_sizes=[32, 32], block=XResBlock, blocks=[3, 4, 6, 3], expansion=4, **kwargs)" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 11, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:27:08.398983Z", + "start_time": "2020-11-12T08:27:08.223017Z" + } + }, "outputs": [ { "data": { @@ -592,7 +632,7 @@ ")" ] }, - "execution_count": null, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -603,8 +643,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 12, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:27:09.474372Z", + "start_time": "2020-11-12T08:27:09.151532Z" + } + }, "outputs": [ { "data": { @@ -925,7 +970,7 @@ ")" ] }, - "execution_count": null, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -936,8 +981,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 13, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:27:10.416343Z", + "start_time": "2020-11-12T08:27:10.054562Z" + } + }, "outputs": [], "source": [ "model = xresnet50()" @@ -945,8 +995,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 14, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:27:10.986719Z", + "start_time": "2020-11-12T08:27:10.980613Z" + } + }, "outputs": [ { "data": { @@ -972,7 +1027,7 @@ ")" ] }, - "execution_count": null, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -983,8 +1038,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 15, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:27:11.658112Z", + "start_time": "2020-11-12T08:27:11.651507Z" + } + }, "outputs": [ { "data": { @@ -1364,7 +1424,7 @@ ")" ] }, - "execution_count": null, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -1375,8 +1435,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 16, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:27:12.307670Z", + "start_time": "2020-11-12T08:27:12.303289Z" + } + }, "outputs": [ { "data": { @@ -1388,7 +1453,7 @@ ")" ] }, - "execution_count": null, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -1399,8 +1464,13 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 17, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:27:13.380250Z", + "start_time": "2020-11-12T08:27:12.965825Z" + } + }, "outputs": [ { "data": { @@ -1408,7 +1478,7 @@ "torch.Size([8, 1000])" ] }, - "execution_count": null, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -1430,18 +1500,26 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 18, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-12T08:27:25.906210Z", + "start_time": "2020-11-12T08:27:25.404060Z" + } + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Converted 00_constructor.ipynb.\n", + "Converted 00_Net.ipynb.\n", + "Converted 01_activations.ipynb.\n", "Converted 01_layers.ipynb.\n", - "Converted 02_resnet.ipynb.\n", - "Converted 03_xresnet.ipynb.\n", - "Converted 80_test_layers.ipynb.\n", + "Converted 03_MXResNet.ipynb.\n", + "Converted 04_YaResNet.ipynb.\n", + "Converted 05_Twist.ipynb.\n", + "Converted 10_base_constructor.ipynb.\n", + "Converted 11_xresnet.ipynb.\n", "Converted index.ipynb.\n" ] } @@ -1465,6 +1543,31 @@ "display_name": "Python 3", "language": "python", "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.7" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": true } }, "nbformat": 4, diff --git a/setup.cfg b/setup.cfg index 2fb20a2..000ba67 100644 --- a/setup.cfg +++ b/setup.cfg @@ -6,4 +6,4 @@ doctests = True verbose = 2 # https://pep8.readthedocs.io/en/latest/intro.html#error-codes format = pylint -# ignore = \ No newline at end of file +ignore = W292, E302, E305 \ No newline at end of file diff --git a/setup.py b/setup.py index 9f3776e..7849a78 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,8 @@ +# flake8: noqa E126 from packaging.version import parse from configparser import ConfigParser import setuptools -assert parse(setuptools.__version__)>=parse('36.2') +assert parse(setuptools.__version__) >= parse('36.2') # note: all settings are in settings.ini; edit there, not here config = ConfigParser(delimiters=['=']) @@ -10,37 +11,37 @@ cfg_keys = 'version description keywords author author_email'.split() expected = cfg_keys + "lib_name user branch license status min_python audience language".split() -for o in expected: assert o in cfg, "missing expected setting: {}".format(o) -setup_cfg = {o:cfg[o] for o in cfg_keys} +for o in expected: + assert o in cfg, "missing expected setting: {}".format(o) +setup_cfg = {o: cfg[o] for o in cfg_keys} licenses = { 'apache2': ('Apache Software License 2.0','OSI Approved :: Apache Software License'), } -statuses = [ '1 - Planning', '2 - Pre-Alpha', '3 - Alpha', - '4 - Beta', '5 - Production/Stable', '6 - Mature', '7 - Inactive' ] +statuses = ['1 - Planning', '2 - Pre-Alpha', '3 - Alpha', + '4 - Beta', '5 - Production/Stable', '6 - Mature', '7 - Inactive'] py_versions = '2.0 2.1 2.2 2.3 2.4 2.5 2.6 2.7 3.0 3.1 3.2 3.3 3.4 3.5 3.6 3.7 3.8'.split() -requirements = cfg.get('requirements','').split() +requirements = cfg.get('requirements', '').split() lic = licenses[cfg['license']] min_python = cfg['min_python'] setuptools.setup( - name = cfg['lib_name'], - license = lic[0], - classifiers = [ - 'Development Status :: ' + statuses[int(cfg['status'])], - 'Intended Audience :: ' + cfg['audience'].title(), - 'License :: ' + lic[1], - 'Natural Language :: ' + cfg['language'].title(), - ] + ['Programming Language :: Python :: '+o for o in py_versions[py_versions.index(min_python):]], - url = 'https://github.com/{}/{}'.format(cfg['user'],cfg['lib_name']), - packages = setuptools.find_packages(), - include_package_data = True, - install_requires = requirements, - python_requires = '>=' + cfg['min_python'], - long_description = open('README.md').read(), - long_description_content_type = 'text/markdown', - zip_safe = False, - entry_points = { 'console_scripts': cfg.get('console_scripts','').split() }, - **setup_cfg) - + name = cfg['lib_name'], + license = lic[0], + classifiers = [ + 'Development Status :: ' + statuses[int(cfg['status'])], + 'Intended Audience :: ' + cfg['audience'].title(), + 'License :: ' + lic[1], + 'Natural Language :: ' + cfg['language'].title(), + ] + ['Programming Language :: Python :: '+o for o in py_versions[py_versions.index(min_python):]], + url = 'https://github.com/{}/{}'.format(cfg['user'],cfg['lib_name']), + packages = setuptools.find_packages(), + include_package_data = True, + install_requires = requirements, + python_requires = '>=' + cfg['min_python'], + long_description = open('README.md').read(), + long_description_content_type = 'text/markdown', + zip_safe = False, + entry_points = { 'console_scripts': cfg.get('console_scripts','').split() }, + **setup_cfg)