# 分组卷积
## 解决的问题
+ 降低模型参数量和计算量
## 解决方式
+ 通道分组，每一组通道公用一个卷积核
## 存在的问题
+ 各组之间信息不流动

In [None]:
def GConv(in_channel,out_channel,kernel_size,padding,stride,bias,groups):
    return nn.Conv2d(in_channel, out_channel, kernel_size, padding=padding, groups=groups ,stride=stride, bias=bias)

# 深度可分离卷积
## 解决的问题
+ 解决分离卷积导致的各个通道之间信息不流动
## 解决方式
+ 分组卷积之后加1X1卷积，加强通道之间信息传递
## 存在的问题
+ 每一个分组卷积之后都要进行一次点卷积，相当于人为加深模型深度
+ 1X1卷积耗费大量的资源

In [None]:
import torch.nn as nn
def DepthwiseConv(in_channel,out_channel,kernel_size,padding,stride,bias):
    layers=list()
    layers.append(nn.Conv2d(in_channel, in_channel, kernel_size, padding=padding, groups=in_channel ,stride=stride, bias=bias))
    layers.append(nn.Conv2d(in_channel,out_channel,kernel_size=1))
    return nn.Sequential(*layers)

+ MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications

# shuffleNet
## 解决的问题
+ 解决深度可分离卷积中1X1占据资源问题
## 解决方式
+ 对于1X1卷积进一次使用分组卷积
+ 加上channel shuffle，具体解释之后介绍
## channel shuffle
假定将输入层分为g组，总通道数为g*n，首先你将通道那个维度拆分为(g,n)两个维度，然后将这两个维度转置变成(n,g)，最后重新reshape成一个维度
## 存在的问题


<div align=center>
<img src=./img/channel_shuffle.png />
</div>


In [None]:
import torch
#使用的时候需要添加梯度剪切
class ShuffleV1Block(nn.Module):

    def __init__(self, inp, oup, mid_channels, group=4, ksize=3, stride=1):
        super(ShuffleV1Block, self).__init__()
        self.stride = stride
        assert stride in [1, 2]

        self.mid_channels = mid_channels
        self.ksize = ksize
        pad = ksize // 2
        self.pad = pad
        self.inp = inp
        self.group = group

        if stride == 2:
            outputs = oup - inp
        else:
            outputs = oup

        branch_main_1 = [
            # pw
            nn.Conv2d(inp, mid_channels, 1, 1, 0, groups=group, bias=False),
            nn.BatchNorm2d(mid_channels),
            nn.ReLU(inplace=True),
            # dw
            nn.Conv2d(mid_channels,
                      mid_channels,
                      ksize,
                      stride,
                      pad,
                      groups=mid_channels,
                      bias=False),
            nn.BatchNorm2d(mid_channels),
        ]
        branch_main_2 = [
            # pw-linear
            nn.Conv2d(mid_channels, outputs, 1, 1, 0, groups=group,
                      bias=False),
            nn.BatchNorm2d(outputs),
        ]
        self.branch_main_1 = nn.Sequential(*branch_main_1)
        self.branch_main_2 = nn.Sequential(*branch_main_2)

        if stride == 2:
            self.branch_proj = nn.AvgPool2d(kernel_size=3, stride=2, padding=1)

    def forward(self, old_x):
        x = old_x
        x_proj = old_x
        x = self.branch_main_1(x)
        if self.group > 1:
            x = self.channel_shuffle(x)
        x = self.branch_main_2(x)
        if self.stride == 1:
            return F.relu(x + x_proj)
        elif self.stride == 2:
            return torch.cat((self.branch_proj(x_proj), F.relu(x)), 1)

    def channel_shuffle(self, x):
        batchsize, num_channels, height, width = x.data.size()
        assert num_channels % self.group == 0
        group_channels = num_channels // self.group

        x = x.reshape(batchsize, group_channels, self.group, height, width)
        x = x.permute(0, 2, 1, 3, 4)
        x = x.reshape(batchsize, num_channels, height, width)

        return x
