In [1]:
import paddle
import paddle.nn as nn


from einops import rearrange


from collections.abc import Iterable
from numpy import repeat

In [8]:

# From PyTorch internals
"""对repeat进行封装，让代码更加健壮"""
def _ntuple(n):
    def parse(x):
        if isinstance(x, Iterable):#如果已经是转换后的值，直接返回，不需要再做转换操作
            return x
        return tuple(repeat(x, n))

    return parse
to_1tuple = _ntuple(1)
to_2tuple = _ntuple(2)
to_3tuple = _ntuple(3)
to_4tuple = _ntuple(4)
to_ntuple = _ntuple

处理fp16(16位小数),按照fp32进行处理。

In [4]:
class LayerNorm(nn.LayerNorm):
    """Subclass torch's LayerNorm to handle fp16."""

    def forward(self, x: paddle.Tensor):
        orig_type = x.dtype
        ret = super().forward(x.type(paddle.float32))
        return ret.type(orig_type)


重写GELU函数，降低处理精度

In [10]:
class QuickGELU(nn.Layer):
    def forward(self, x: paddle.Tensor):
        return x * paddle.sigmoid(1.702 * x)

全连接网络，复用自 paddle vit

In [11]:
class Mlp(nn.Layer):
    """ MLP module
    Impl using nn.Linear and activation is GELU, dropout is applied.
    Ops: fc -> act -> dropout -> fc -> dropout
    Attributes:
        fc1: nn.Linear
        fc2: nn.Linear
        act: GELU
        dropout1: dropout after fc1
        dropout2: dropout after fc2
    """
    def __init__(self,
                 embed_dim,
                 mlp_ratio,
                 dropout=0.):
        super().__init__()
        w_attr_1, b_attr_1 = self._init_weights()
        self.fc1 = nn.Linear(embed_dim,
                             int(embed_dim * mlp_ratio),
                             weight_attr=w_attr_1,
                             bias_attr=b_attr_1)

        w_attr_2, b_attr_2 = self._init_weights()
        self.fc2 = nn.Linear(int(embed_dim * mlp_ratio),
                             embed_dim,
                             weight_attr=w_attr_2,
                             bias_attr=b_attr_2)
        self.act = nn.GELU()
        self.dropout1 = nn.Dropout(dropout)
        self.dropout2 = nn.Dropout(dropout)

    def _init_weights(self):
        weight_attr = paddle.ParamAttr(
            initializer=paddle.nn.initializer.XavierUniform()) #default in pp: xavier
        bias_attr = paddle.ParamAttr(
            initializer=paddle.nn.initializer.Normal(std=1e-6)) #default in pp: zero
        return weight_attr, bias_attr

    def forward(self, x):
        x = self.fc1(x)
        x = self.act(x)
        x = self.dropout1(x)
        x = self.fc2(x)
        x = self.dropout2(x)
        return x

In [12]:
class ConvEmbed(nn.Layer):
    """ Image to Conv Embedding

    """

    def __init__(self,
                 patch_size=7,
                 in_chans=3,
                 embed_dim=64,
                 stride=4,
                 padding=2,
                 norm_layer=None):
        super().__init__()
        patch_size = to_2tuple(patch_size)#把patch初始化为一个正方形,这里是(7,7)

        self.patch_size = patch_size
        self.proj = nn.Conv2D(
            in_chans, embed_dim,
            kernel_size=patch_size,
            stride=stride,
            padding=padding
        )
        self.norm = norm_layer(embed_dim) if norm_layer else None

    def forward(self, x):
        x = self.proj(x)

        B, C, H, W = x.shape#B个图片 C*H的大小 W个通道(example：W==3:红黄蓝)
        x = rearrange(x, 'b c h w -> b (h w) c')#对每个图片进行嵌入，相当于对每个图片线性的堆叠
        if self.norm:
            x = self.norm(x)
        x = rearrange(x, 'b (h w) c -> b c h w', h=H, w=W)#把x回归原来的形状

        return x

In [27]:
import numpy as np
x=np.random.rand(0,30,10,90,90,3)#0~30 (10,90,90,3)的随机数
x=x.astype(np.float)
ConvEmbed()(paddle.to_tensor(x))

(7, 7)


ValueError: (InvalidArgument) input and filter data type should be consistent, but received input data type is double and filter type is float
  [Hint: Expected input_data_type == filter_data_type, but received input_data_type:6 != filter_data_type:5.] (at C:\home\workspace\Paddle_release\paddle/fluid/operators/conv_op.cc:211)
  [operator < conv2d > error]