In [1]:
import torch
import math
import torch.nn as nn
import torch.nn.functional as F


class BasicConv(nn.Module):
    def __init__(self, in_planes, out_planes, kernel_size, stride=1, padding=0, dilation=1, groups=1, relu=True, bn=True, bias=False):
        super(BasicConv, self).__init__()
        self.out_channels = out_planes
        self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=bias)
        self.bn = nn.BatchNorm2d(out_planes,eps=1e-5, momentum=0.01, affine=True) if bn else None
        self.relu = nn.ReLU() if relu else None

    def forward(self, x):
        x = self.conv(x)
        if self.bn is not None:
            x = self.bn(x)
        if self.relu is not None:
            x = self.relu(x)
        return x
    
    
class Flatten(nn.Module):
    def forward(self, x):
        return x.view(x.size(0), -1)

    
class ChannelGate(nn.Module):
    '''Generate 2 different(avg, pool) spatial context descriptors to refine input feature'''
    def __init__(self, gate_channels, reduction_ratio=16, pool_types=['avg', 'max']):
        super(ChannelGate, self).__init__()
        self.gate_channels = gate_channels

        # Shared MLP
        self.mlp = nn.Sequential(
            Flatten(),
            nn.Linear(gate_channels, gate_channels // reduction_ratio),  # reduction
            nn.ReLU(),
            nn.Linear(gate_channels // reduction_ratio, gate_channels)  # restoration
            )
        
        self.pool_types = pool_types

    def forward(self, x):
        '''x: Input feature  (N, C, h, w)
           kernel_size of pooling operation = (h, w) -> squeeze the spatial dimension'''
        channel_att_sum = None  # It should be MLP(AvgPool(x)) + MLP(MaxPool(x))
        for pool_type in self.pool_types:
            if pool_type=='avg':
                avg_pool = F.avg_pool2d(x, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3)))  # (N, C, 1, 1)
                channel_att_raw = self.mlp(avg_pool)  # (N, C)

            elif pool_type=='max':
                max_pool = F.max_pool2d(x, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3)))  # (N, C, 1, 1)
                channel_att_raw = self.mlp(max_pool)  # (N, C)

            if channel_att_sum is None:
                channel_att_sum = channel_att_raw

            else:
                channel_att_sum = channel_att_sum + channel_att_raw  # (N, C) - Channel Attention Map
        
        # Sigmoid & Broad-casting (N, C) -> (N, C, 1) -> (N, C, 1, 1) -> (N, C, h, w)
        scale = F.sigmoid(channel_att_sum).unsqueeze(2).unsqueeze(3).expand_as(x)  
        
        # Feature Refinement 
        return x * scale  # (N, C, h, w)
    
    
class ChannelPool(nn.Module):
    '''Apply max pooling & avg pooling along the channel axis and concatenate them
       to generate an efficient feature descriptor'''
    def forward(self, x):
        return torch.cat((torch.max(x, 1)[0].unsqueeze(1), torch.mean(x, 1).unsqueeze(1)), dim=1)

    
class SpatialGate(nn.Module):
    '''Produce 2D spatial attention map to refine channel-refined feature (sequential)'''
    def __init__(self):
        super(SpatialGate, self).__init__()
        kernel_size = 7
        self.compress = ChannelPool()
        self.spatial = BasicConv(2, 1, kernel_size, stride=1, padding=(kernel_size-1) // 2, relu=False)

    def forward(self, x):
        '''x: channel-refined feature (sequential)'''
        x_compress = self.compress(x)  # (N, 2, h, w)
        x_out = self.spatial(x_compress)  # (N, 1, h, w) - Spatial Attention Map
        scale = F.sigmoid(x_out)  # broadcasting

        return x * scale
    

class CBAM(nn.Module):
    def __init__(self, gate_channels, reduction_ratio=16, pool_types=['avg', 'max'], no_spatial=False):
        super(CBAM, self).__init__()
        self.ChannelGate = ChannelGate(gate_channels, reduction_ratio, pool_types)
        self.no_spatial = no_spatial
        if not no_spatial:
            self.SpatialGate = SpatialGate()

    def forward(self, x):
        '''x: Input feature'''
        x_out = self.ChannelGate(x)  # Channel-refinement
        if not self.no_spatial:
            x_out = self.SpatialGate(x_out)  # Spatial-refinement

        return x_out  # Refined feature


In [2]:
import torch
from torch import nn
import torch.nn.functional as F
#from cbam import CBAM

class double_conv(nn.Module):
    '''(Conv + B.N + ReLU) x 2 with Attention'''
    def __init__(self, in_ch, out_ch):
        super(double_conv, self).__init__()

        # Conv block
        self.conv = nn.Sequential(
            nn.Conv2d(in_ch, out_ch, 3, padding=1),
            nn.BatchNorm2d(out_ch),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_ch, out_ch, 3, padding=1),
            nn.BatchNorm2d(out_ch),
            nn.ReLU(inplace=True)
        )
        
        # Attention module
        self.cbam = CBAM(out_ch)

    def forward(self, input):
        input = self.conv(input)
        output = self.cbam(input)  # feature-refinement

        return output


class inconv(nn.Module):
    '''Initial block of UNet with Attention'''
    def __init__(self, in_ch, out_ch):
        super(inconv, self).__init__()
        self.conv = double_conv(in_ch, out_ch)

    def forward(self, x):
        x = self.conv(x)

        return x


class down(nn.Module):
    '''Down-sampling block with Attention'''
    def __init__(self, in_ch, out_ch):
        super(down, self).__init__()
        self.mpconv = nn.Sequential(
            nn.MaxPool2d(2),
            double_conv(in_ch, out_ch)
        )

    def forward(self, x):
        x = self.mpconv(x)

        return x


class up(nn.Module):
    '''Up-sampling block with Attention'''
    def __init__(self, in_ch, out_ch, bilinear=True):
        super(up, self).__init__()

        # bilinear=True -> up-sample with bilinear interpolation (rule-based)
        # bilinear=False -> up-sample with conv (trained, requires sufficient memory)
        if bilinear:
            self.up = nn.Upsample(scale_factor=2, mode='bilinear')
        else:
            self.up = nn.ConvTranspose2d(in_ch, out_ch, 2, stride=2)

        self.conv = double_conv(in_ch, out_ch)

    def forward(self, x1, x2):
        '''x1 should be up-sampled'''
        x1 = self.up(x1)
        diffX = x1.size()[2] - x2.size()[2]
        diffY = x1.size()[3] - x2.size()[3]
        x2 = F.pad(x2, (diffX // 2, int(diffX / 2),
                        diffY // 2, int(diffY / 2)))

        x = torch.cat([x2, x1], dim=1)
        x = self.conv(x)

        return x


class outconv(nn.Module):
    '''Last block of UNet with Attention'''
    def __init__(self, in_ch, out_ch):
        super(outconv, self).__init__()
        # self.conv = nn.Conv2d(in_ch, out_ch, 1)
        self.conv = nn.Sequential(
            nn.Conv2d(in_ch, in_ch // 2, 1),
            nn.BatchNorm2d(in_ch // 2),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_ch // 2, out_ch, 1),
        )

    def forward(self, x):
        x = self.conv(x)

        return x

In [3]:
class UNet(nn.Module):
    def __init__(self):
        super(UNet, self).__init__()
        self.inc = inconv(3, 64)

        self.down1 = down(64, 128)
        self.down2 = down(128, 256)
        self.down3 = down(256, 512)
        self.down4 = down(512, 512)

        self.up1 = up(1024, 256)
        self.up2 = up(512, 128)
        self.up3 = up(256, 64)
        self.up4 = up(128, 64)

        # Segmentation heads
        self.sem_out = outconv(64, 2)  # semantic classes=2 (f.g / b.g)
        self.ins_out = outconv(64, 32)  # instance classes=32 (enoguh large to represent all instances)

    def forward(self, x):
        x1 = self.inc(x)  # (N, 64, h, w)

        x2 = self.down1(x1)  # (N, 128, h/2, w/2)
        x3 = self.down2(x2)  # (N, 256, h/4, w/4)
        x4 = self.down3(x3)  # (N, 512, h/8, w/8)
        x5 = self.down4(x4)  # (N, 512, h/16, w/16)

        x = self.up1(x5, x4)  # (N, 256, h/8, w/8)
        x = self.up2(x, x3)  # (N, 128, h/4, w/4)
        x = self.up3(x, x2)  # (N, 64, h/2, w/2)
        x = self.up4(x, x1)  # (N, 64, h, w)

        sem = self.sem_out(x)  # (N, 2, h, w)
        ins = self.ins_out(x)  # (N, 32, h, w)

        return sem, ins

In [4]:
class UNet_Deeper(nn.Module):
    def __init__(self):
        super(UNet_Deeper, self).__init__()
        self.inc = inconv(3, 64)

        self.down1 = down(64, 128)
        self.down2 = down(128, 256)
        self.down3 = down(256, 512)
        self.down4 = down(512, 1024)
        self.down5 = down(1024, 1024)
        
        self.up1 = up(2048, 512)
        self.up2 = up(1024, 256)
        self.up3 = up(512, 128)
        self.up4 = up(256, 64)
        self.up5 = up(128, 64)

        # Segmentation heads
        self.sem_out = outconv(64, 2)  # semantic classes=2 (f.g / b.g)
        self.ins_out = outconv(64, 32)  # instance classes=32 (enoguh large to represent all instances)

    def forward(self, x):
        x1 = self.inc(x)  # (N, 64, h, w)

        x2 = self.down1(x1)  # (N, 128, h/2, w/2)
        x3 = self.down2(x2)  # (N, 256, h/4, w/4)
        x4 = self.down3(x3)  # (N, 512, h/8, w/8)
        x5 = self.down4(x4)  # (N, 1024, h/16, w/16)
        x6 = self.down5(x5)  # (N, 1024, h/32, w/32)

        x = self.up1(x6, x5)  # (N, 512, h/16, w/16)
        x = self.up2(x, x4)  # (N, 256, h/8, w/8)
        x = self.up3(x, x3)  # (N, 128, h/4, w/4)
        x = self.up4(x, x2)  # (N, 64, h/2, w/2)
        x = self.up5(x, x1)  # (N, 64, h, w)

        sem = self.sem_out(x)  # (N, 2, h, w)
        ins = self.ins_out(x)  # (N, 32, h, w)

        return sem, ins

In [5]:
x = torch.randn([2, 3, 256, 256])
model = UNet_Deeper()

sem, ins = model(x)
print(sem.shape)
print(ins.shape)



torch.Size([2, 2, 256, 256])
torch.Size([2, 32, 256, 256])


In [6]:
x = torch.randn([2, 3, 256, 256])
model = UNet()

sem, ins = model(x)
print(sem.shape)
print(ins.shape)



torch.Size([2, 2, 256, 256])
torch.Size([2, 32, 256, 256])
