# reference
+ [official code](https://github.com/csjliang/DASR)
+ [讲解MoE](https://zhuanlan.zhihu.com/p/542465517)
+ [Dynamic convolution 代码详解](https://zhuanlan.zhihu.com/p/208519425)
+ [Dynamic convolution 论文详解](https://zhuanlan.zhihu.com/p/142381725)

# Related work
+ 动态卷积
+ MoE
+ 对抗损失

# question
## 什么是MoE(mixture of experts)？
### 核心思想
提出一种新的监督学习过程，一个系统中包含多个分开的网络，每个网络去处理全部训练样本的一个子集。假如我们已经知道数据集中存在一些天然的子集(比如：不同的模糊场景)。那么使用单个模型去学习就会收到很多干扰，导致学习缓慢、泛化困难。这时，我们可以使用**多个模型(expert)去学习**。使用一个**门网络来决定每个数据应该被哪一个模型去训练**，这样可以减少不同类型样本之间的干扰。
## 什么是动态卷积(Dynamic convolution)？
### 核心思想
特点：动态卷积在不增加网络深度和宽度的前提下，增加网络的复杂度。

1、动态卷积采用基于输入的注意力机制，动态地聚合多个并行的卷积核。

2、将多个卷积核集合在一起，一方面，核的尺寸较小，具有较高的计算效率，另一方面，由于这些核采用非线性的方式聚集在一起，因此具有更强的表示能力。
### 结构图
<div align=center>
<img src=.\img\介绍图.jpg>
</div>

训练时：所有参数都会进行更改

推理时：红色框的参数是固定的，黄色框的参数随着输入数据的变化而更改

## 模型退化参数如何获取？
作者自定义的，详见代码DASR_model 75~420

## DASR是如何使用动态卷积？
采用动态卷积搭建MoE，详见代码arch_util 196~226

# 模型流程
<div align=center>
<img src=.\img\model.png>
</div>


# Main Idea
作者提出一种高效且有用的degradation-adaptive super-resolution(DASR)，该网络大致可以分为两部分：一个微小的回归网络被用来预测输入图像的退化参数，几个具有相同拓扑的convolutional experts联合优化，通过experts的非线性混合来指定网络参数。

# Degradation_prediction_network(net_p)

In [None]:
class Degradation_Predictor(nn.Module):
    def __init__(self, in_nc=3, nf=64, num_params=100, num_networks=5, use_bias=True):
        super(Degradation_Predictor, self).__init__()

        self.ConvNet = nn.Sequential(*[
            nn.Conv2d(in_nc, nf, kernel_size=5, stride=1, padding=2),
            nn.LeakyReLU(0.2, True),
            nn.Conv2d(nf, nf, kernel_size=5, stride=1, padding=2, bias=use_bias),
            nn.LeakyReLU(0.2, True),
            nn.Conv2d(nf, nf, kernel_size=5, stride=1, padding=2, bias=use_bias),
            nn.LeakyReLU(0.2, True),
            nn.Conv2d(nf, nf, kernel_size=5, stride=2, padding=2, bias=use_bias),
            nn.LeakyReLU(0.2, True),
            nn.Conv2d(nf, nf, kernel_size=5, stride=1, padding=2, bias=use_bias),
            nn.LeakyReLU(0.2, True),
            nn.Conv2d(nf, num_params, kernel_size=5, stride=1, padding=2, bias=use_bias),
            nn.LeakyReLU(0.2, True),
        ])

        self.globalPooling = nn.AdaptiveAvgPool2d((1, 1))
        # model-A
        self.MappingNet = nn.Sequential(*[
            nn.Linear(num_params, 15),
            nn.Linear(15, num_networks),
        ])

    def forward(self, input):
        conv = self.ConvNet(input)
        flat = self.globalPooling(conv)
        out_params = flat.view(flat.size()[:2])
        mapped_weights = self.MappingNet(out_params)
        return out_params, mapped_weights

# 超分(net_g)

In [None]:
class MSRResNetDynamic(nn.Module):

    def __init__(self, num_in_ch=3, num_out_ch=3, num_feat=64, num_block=16, num_models=5, upscale=4):
        super(MSRResNetDynamic, self).__init__()
        self.upscale = upscale

        self.conv_first = Dynamic_conv2d(num_in_ch, num_feat, 3, groups=1, if_bias=True, K=num_models)
        self.body = make_layer(ResidualBlockNoBNDynamic, num_block, num_feat=num_feat, num_models=num_models)

        # upsampling
        if self.upscale in [2, 3]:
            self.upconv1 = Dynamic_conv2d(num_feat, num_feat * self.upscale * self.upscale, 3, groups=1, if_bias=True, K=num_models)
            self.pixel_shuffle = nn.PixelShuffle(self.upscale)
        elif self.upscale == 4:
            self.upconv1 = Dynamic_conv2d(num_feat, num_feat * 4, 3, groups=1, if_bias=True, K=num_models)
            self.upconv2 = Dynamic_conv2d(num_feat, num_feat * 4, 3, groups=1, if_bias=True, K=num_models)
            self.pixel_shuffle = nn.PixelShuffle(2)

        self.conv_hr = Dynamic_conv2d(num_feat, num_feat, 3, groups=1, if_bias=True, K=num_models)
        self.conv_last = Dynamic_conv2d(num_feat, num_out_ch, 3, groups=1, if_bias=True, K=num_models)

        # activation function
        self.lrelu = nn.LeakyReLU(negative_slope=0.1, inplace=True)


    def forward(self, x, weights):
        out = self.lrelu(self.conv_first({'x': x, 'weights': weights}))
        out = self.body({'x': out, 'weights': weights})['x']

        if self.upscale == 4:
            out = self.lrelu(self.pixel_shuffle(self.upconv1({'x': out, 'weights': weights})))
            out = self.lrelu(self.pixel_shuffle(self.upconv2({'x': out, 'weights': weights})))
        elif self.upscale in [2, 3]:
            out = self.lrelu(self.pixel_shuffle(self.upconv1({'x': out, 'weights': weights})))

        out = self.lrelu(self.conv_hr({'x': out, 'weights': weights}))
        out = self.conv_last({'x': out, 'weights': weights})
        base = F.interpolate(x, scale_factor=self.upscale, mode='bilinear', align_corners=False)
        out += base
        return out

## ResidualBlockNoBNDynamic

In [None]:
class ResidualBlockNoBNDynamic(nn.Module):
    """Residual block without BN.

    It has a style of:
        ---Conv-ReLU-Conv-+-
         |________________|

    Args:
        num_feat (int): Channel number of intermediate features.
            Default: 64.
        res_scale (float): Residual scale. Default: 1.
        pytorch_init (bool): If set to True, use pytorch default init,
            otherwise, use default_init_weights. Default: False.
    """

    def __init__(self, num_feat=64, res_scale=1, num_models=5):
        super(ResidualBlockNoBNDynamic, self).__init__()
        self.res_scale = res_scale
        self.conv1 = Dynamic_conv2d(num_feat, num_feat, 3, groups=1, if_bias=True, K=num_models)
        self.conv2 = Dynamic_conv2d(num_feat, num_feat, 3, groups=1, if_bias=True, K=num_models)
        self.relu = nn.ReLU(inplace=True)

        default_init_weights([self.conv1, self.conv2], 0.1)

    def forward(self, inputs):
        identity = inputs['x'].clone()
        out = self.relu(self.conv1(inputs))
        conv2_input = {'x':out, 'weights':inputs['weights']}
        out = self.conv2(conv2_input)
        out = identity + out * self.res_scale
        return {'x':out, 'weights':inputs['weights']}