# Channel Attention实现方式

- SENet中的实现
- Attention map shape 为torch.Size([16, 64, 1, 1])，相当于给每个通道指定了一个权重

In [8]:
from torch import nn


class SELayer(nn.Module):
    def __init__(self, channel, reduction=16):
        super(SELayer, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Sequential(
            nn.Linear(channel, channel // reduction, bias=False),
            nn.ReLU(inplace=True),
            nn.Linear(channel // reduction, channel, bias=False),
            nn.Sigmoid()
        )

    def forward(self, x):
        b, c, _, _ = x.size()
        print(b, c)
        y = self.avg_pool(x).view(b, c)
        y = self.fc(y).view(b, c, 1, 1)
        print('attention map shape:', y.shape) #torch.Size([16, 64, 1, 1])
        return x * y.expand_as(x) # *号表示哈达玛积，既element-wise乘积


- 测试SENet

In [9]:
import torch
features = torch.randn(16, 64, 64,48)

selayer = SELayer(64)

feature_out = selayer(features)
print(feature_out.shape)

16 64
attention map shape: torch.Size([16, 64, 1, 1])
torch.Size([16, 64, 64, 48])


- DANet中的实现
- attention map shape 为torch.Size([16, 64, 64])，相当于两两通道之间的关系使用一个矩阵表示了出来

In [21]:
from mmcv.cnn import ConvModule, Scale
import torch.nn.functional as F

class CAM(nn.Module):
    """Channel Attention Module (CAM)"""

    def __init__(self):
        super(CAM, self).__init__()
        self.gamma = Scale(0) #A learnable scale parameter. 论文中的beta

    def forward(self, x):
        """Forward function."""
        batch_size, channels, height, width = x.size()
        proj_query = x.view(batch_size, channels, -1)
        proj_key = x.view(batch_size, channels, -1).permute(0, 2, 1)
        energy = torch.bmm(proj_query, proj_key) #matrix multiplication
        print('energy shape: ', energy.shape)
        energy_new = torch.max(
            energy, -1, keepdim=True)[0].expand_as(energy) - energy
        print('energy_new shape: ', energy_new.shape)
        
        attention = F.softmax(energy_new, dim=-1)
        print('attention map shape: ', attention.shape) # torch.Size([16, 64, 64])
        proj_value = x.view(batch_size, channels, -1)
        print('proj_value shape:', proj_value.shape)
        out = torch.bmm(attention, proj_value)
        print('out shape: ',out.shape)
        out = out.view(batch_size, channels, height, width)

        out = self.gamma(out) + x
        return out


In [22]:
import torch
features = torch.randn(16, 64, 64,48)

camlayer = CAM()

feature_out = camlayer(features)
print(feature_out.shape)

energy shape:  torch.Size([16, 64, 64])
energy_new shape:  torch.Size([16, 64, 64])
attention map shape:  torch.Size([16, 64, 64])
proj_value shape: torch.Size([16, 64, 3072])
out shape:  torch.Size([16, 64, 3072])
torch.Size([16, 64, 64, 48])


- ENCAM中的实现
- 其与SENet的不同之处在于：SENet中的squeeze操作使用global average pooling来实现，pytorch函数为AdaptiveAvgPool2d，而ENCAM中使用global average pooling 和 global max pooling的和来实现，pytorch的函数为AdaptiveAvgPool2d + AdaptiveMaxPool2d
- Attention map shape 为torch.Size([16, 64, 1, 1])，相当于给每个通道指定了一个权重

In [23]:
class ChannelAttention(nn.Module):
    def __init__(self, in_planes, ):
        super(ChannelAttention, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.max_pool = nn.AdaptiveMaxPool2d(1)

        self.fc1 = nn.Conv2d(in_planes, in_planes // 8, 1, bias=False)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Conv2d(in_planes // 8, in_planes, 1, bias=False)

        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        avg_out = self.fc2(self.relu1(self.fc1(self.avg_pool(x))))
        max_out = self.fc2(self.relu1(self.fc1(self.max_pool(x))))
        out = avg_out + max_out
        return self.sigmoid(out)

In [25]:
import torch
features = torch.randn(16, 64, 64,48)

channel_attention = ChannelAttention(64)
feature_out = channel_attention(features)
print('attention map shape: ', feature_out.shape)

attention map shape:  torch.Size([16, 64, 1, 1])
