In [None]:
!pip install fastai==1.0.60

Collecting fastai==1.0.60
  Downloading fastai-1.0.60-py3-none-any.whl (237 kB)
[?25l[K     |█▍                              | 10 kB 19.9 MB/s eta 0:00:01[K     |██▊                             | 20 kB 26.0 MB/s eta 0:00:01[K     |████▏                           | 30 kB 13.3 MB/s eta 0:00:01[K     |█████▌                          | 40 kB 10.4 MB/s eta 0:00:01[K     |███████                         | 51 kB 8.7 MB/s eta 0:00:01[K     |████████▎                       | 61 kB 7.5 MB/s eta 0:00:01[K     |█████████▋                      | 71 kB 8.4 MB/s eta 0:00:01[K     |███████████                     | 81 kB 9.3 MB/s eta 0:00:01[K     |████████████▍                   | 92 kB 8.7 MB/s eta 0:00:01[K     |█████████████▉                  | 102 kB 8.1 MB/s eta 0:00:01[K     |███████████████▏                | 112 kB 8.1 MB/s eta 0:00:01[K     |████████████████▋               | 122 kB 8.1 MB/s eta 0:00:01[K     |██████████████████              | 133 kB 8.1 MB/s eta 0:0

In [None]:
!pip install pytorch_model_summary

Collecting pytorch_model_summary
  Downloading pytorch_model_summary-0.1.2-py3-none-any.whl (9.3 kB)
Installing collected packages: pytorch-model-summary
Successfully installed pytorch-model-summary-0.1.2


In [None]:
!git clone https://github.com/Seonwhee-Genome/ABINet.git

Cloning into 'ABINet'...
remote: Enumerating objects: 143, done.[K
remote: Counting objects: 100% (143/143), done.[K
remote: Compressing objects: 100% (100/100), done.[K
remote: Total 143 (delta 57), reused 111 (delta 38), pack-reused 0[K
Receiving objects: 100% (143/143), 3.82 MiB | 20.48 MiB/s, done.
Resolving deltas: 100% (57/57), done.


In [None]:
%cd ABINet

/content/ABINet


In [None]:
import torch
import torch.nn as nn
from fastai.vision import *

from modules.model import _default_tfmer_cfg
from modules.resnet import resnet45
from modules.transformer import (PositionalEncoding,
                                 TransformerEncoder,
                                 TransformerEncoderLayer)


class ResTranformer(nn.Module):
    def __init__(self, config):
        super().__init__()
        self.resnet = resnet45()

        self.d_model = ifnone(config.model_vision_d_model, _default_tfmer_cfg['d_model'])
        nhead = ifnone(config.model_vision_nhead, _default_tfmer_cfg['nhead'])
        d_inner = ifnone(config.model_vision_d_inner, _default_tfmer_cfg['d_inner'])
        dropout = ifnone(config.model_vision_dropout, _default_tfmer_cfg['dropout'])
        activation = ifnone(config.model_vision_activation, _default_tfmer_cfg['activation'])
        num_layers = ifnone(config.model_vision_backbone_ln, 2)

        self.pos_encoder = PositionalEncoding(self.d_model, max_len=8*32)
        encoder_layer = TransformerEncoderLayer(d_model=self.d_model, nhead=nhead, 
                dim_feedforward=d_inner, dropout=dropout, activation=activation)
        self.transformer = TransformerEncoder(encoder_layer, num_layers)

    def forward(self, images):
        feature = self.resnet(images)
        n, c, h, w = feature.shape
        feature = feature.view(n, c, -1).permute(2, 0, 1)
        feature = self.pos_encoder(feature)
        feature = self.transformer(feature)
        feature = feature.permute(1, 2, 0).view(n, c, h, w)
        return feature

In [None]:
import math

import torch.nn as nn
import torch.nn.functional as F
import torch.utils.model_zoo as model_zoo


def conv1x1(in_planes, out_planes, stride=1):
    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)


def conv3x3(in_planes, out_planes, stride=1):
    "3x3 convolution with padding"
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                     padding=1, bias=False)


class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = conv1x1(inplanes, planes)
        self.bn1 = nn.BatchNorm2d(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(planes, planes, stride)
        self.bn2 = nn.BatchNorm2d(planes)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out


class ResNet(nn.Module):

    def __init__(self, block, layers):
        self.inplanes = 32
        super(ResNet, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1,
                               bias=False)
        self.bn1 = nn.BatchNorm2d(32)
        self.relu = nn.ReLU(inplace=True)

        self.layer1 = self._make_layer(block, 32, layers[0], stride=2)
        self.layer2 = self._make_layer(block, 64, layers[1], stride=1)
        self.layer3 = self._make_layer(block, 128, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 256, layers[3], stride=1)
        self.layer5 = self._make_layer(block, 512, layers[4], stride=1)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        #print("====",self.inplanes, planes * block.expansion, planes, blocks, stride,  block.expansion)
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes * block.expansion,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.layer5(x)
        return x


def resnet45():
    return ResNet(BasicBlock, [3, 4, 6, 6, 3])

In [None]:
import pytorch_model_summary
import torch
net = ResNet(BasicBlock, [3, 4, 6, 6, 3])
print(net)

==== 32 32 32 3 2 1
==== 32 64 64 4 1 1
==== 64 128 128 6 2 1
==== 128 256 256 6 1 1
==== 256 512 512 3 1 1
ResNet(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(32, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(32, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (downsample): Sequential(
        (0): Conv2d(32, 32, kernel_size=(1, 1), stride=(2, 2), bias=False)
        (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (1): BasicBlock(
      (co

In [None]:
print(pytorch_model_summary.summary(net, torch.zeros(1, 3, 32, 100 ), show_input = True))

------------------------------------------------------------------------
      Layer (type)          Input Shape         Param #     Tr. Param #
          Conv2d-1      [1, 3, 32, 100]             864             864
     BatchNorm2d-2     [1, 32, 32, 100]              64              64
            ReLU-3     [1, 32, 32, 100]               0               0
      BasicBlock-4     [1, 32, 32, 100]          11,456          11,456
      BasicBlock-5      [1, 32, 16, 50]          10,368          10,368
      BasicBlock-6      [1, 32, 16, 50]          10,368          10,368
      BasicBlock-7      [1, 32, 16, 50]          41,344          41,344
      BasicBlock-8      [1, 64, 16, 50]          41,216          41,216
      BasicBlock-9      [1, 64, 16, 50]          41,216          41,216
     BasicBlock-10      [1, 64, 16, 50]          41,216          41,216
     BasicBlock-11      [1, 64, 16, 50]         164,608         164,608
     BasicBlock-12      [1, 128, 8, 25]         164,352        

In [None]:
print(pytorch_model_summary.summary(net, torch.zeros(1, 3, 32, 100 ), show_input = True, show_hierarchical=True))

------------------------------------------------------------------------
      Layer (type)          Input Shape         Param #     Tr. Param #
          Conv2d-1      [1, 3, 32, 100]             864             864
     BatchNorm2d-2     [1, 32, 32, 100]              64              64
            ReLU-3     [1, 32, 32, 100]               0               0
      BasicBlock-4     [1, 32, 32, 100]          11,456          11,456
      BasicBlock-5      [1, 32, 16, 50]          10,368          10,368
      BasicBlock-6      [1, 32, 16, 50]          10,368          10,368
      BasicBlock-7      [1, 32, 16, 50]          41,344          41,344
      BasicBlock-8      [1, 64, 16, 50]          41,216          41,216
      BasicBlock-9      [1, 64, 16, 50]          41,216          41,216
     BasicBlock-10      [1, 64, 16, 50]          41,216          41,216
     BasicBlock-11      [1, 64, 16, 50]         164,608         164,608
     BasicBlock-12      [1, 128, 8, 25]         164,352        

In [None]:
from tensorflow.keras.applications.resnet50 import ResNet50
resnet = ResNet50(weights='imagenet')
resnet.summary()


Model: "resnet50"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_5 (InputLayer)           [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv1_pad (ZeroPadding2D)      (None, 230, 230, 3)  0           ['input_5[0][0]']                
                                                                                                  
 conv1_conv (Conv2D)            (None, 112, 112, 64  9472        ['conv1_pad[0][0]']              
                                )                                                                 
                                                                                           

In [None]:
_default_tfmer_cfg = dict(d_model=512, nhead=8, d_inner=2048, # 1024
                          dropout=0.1, activation='relu')
_default_tfmer_cfg

{'activation': 'relu',
 'd_inner': 2048,
 'd_model': 512,
 'dropout': 0.1,
 'nhead': 8}

In [None]:
resnet = resnet45()
feature = resnet(torch.zeros(1, 3, 32, 100 ))
feature.shape

torch.Size([1, 512, 8, 25])

In [None]:
n, c, h, w = feature.shape

In [None]:
feature.view(n, c, -1).shape

torch.Size([1, 512, 200])

In [None]:
feature = feature.view(n, c, -1).permute(2, 0, 1)
feature.shape

torch.Size([200, 1, 512])

# Transformer

In [None]:
class PositionalEncoding(nn.Module):
    r"""Inject some information about the relative or absolute position of the tokens
        in the sequence. The positional encodings have the same dimension as
        the embeddings, so that the two can be summed. Here, we use sine and cosine
        functions of different frequencies.
    .. math::
        \text{PosEncoder}(pos, 2i) = sin(pos/10000^(2i/d_model))
        \text{PosEncoder}(pos, 2i+1) = cos(pos/10000^(2i/d_model))
        \text{where pos is the word position and i is the embed idx)
    Args:
        d_model: the embed dim (required).
        dropout: the dropout value (default=0.1).
        max_len: the max. length of the incoming sequence (default=5000).
    Examples:
        >>> pos_encoder = PositionalEncoding(d_model)
    """

    def __init__(self, d_model, dropout=0.1, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)

        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)

    def forward(self, x):
        r"""Inputs of forward function
        Args:
            x: the sequence fed to the positional encoder model (required).
        Shape:
            x: [sequence length, batch size, embed dim]
            output: [sequence length, batch size, embed dim]
        Examples:
            >>> output = pos_encoder(x)
        """
        print("x:::", x)
        x = x + self.pe[:x.size(0), :]
        return self.dropout(x)

In [None]:
pos_encoder = PositionalEncoding(512, max_len=8*32)
pos_encoder

PositionalEncoding(
  (dropout): Dropout(p=0.1, inplace=False)
)

In [None]:
feature = pos_encoder(feature)
feature

torch.Size([200, 1, 512])

In [None]:
max_len = 8*32
d_model = 512
pe = torch.zeros(max_len, d_model)
pe.shape

torch.Size([256, 512])

In [None]:
position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
position.shape

torch.Size([256, 1])