[![](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/itmorn/AI.handbook/blob/main/DL/torch/nn/Transformer/TransformerDecoder.ipynb)

# TransformerDecoder
TransformerDecoder是N个TransformerDecoderLayer的堆叠. 

**定义**：  
torch.nn.TransformerDecoder(decoder_layer, num_layers, norm=None)

**参数**:  
- decoder_layer – an instance of the TransformerDecoderLayer() class (required).  需要一个TransformerDecoderLayer对象

- num_layers – the number of sub-decoder-layers in the decoder  (required).  堆叠多少层

- norm – the layer normalization component (optional).   对最后的输出做norm的方式，默认是无

# 图解TransformerDecoder
右边的方块×N就是TransformerDecoder，N就是num_layers
<p align="center">
<img src="./imgs/transformer.jpg"
    width="700" /></p>

In [3]:
# 单个样本简单举例
import torch
import torch.nn as nn

torch.manual_seed(6688)
d_model = 4

decoder_layer = nn.TransformerDecoderLayer(d_model=d_model, nhead=2, dim_feedforward=5, dropout=0.0)

memory = torch.tensor([[[-0.1235, 1.6189, -1.0983, -0.3970]],
                       [[-1.5583, 1.2149, 0.0358, 0.3076]],
                       [[1.4580, 0.3129, -0.5627, -1.2082]]]) # TransformerEncoder的输出L_encoder_out, N, E

# TransformerEncoder的输出L_encoder_out和TransformerDecoder的输入L_decoder_in可以是不同的。

tgt = torch.rand(2, 1, d_model)  # TransformerDecoder的输入L_decoder_in, N, E
out = decoder_layer(tgt, memory)
print("out:\n", out, "\n")  # 可以看到和上图输出的结果是一致的


out:
 tensor([[[ 1.5059,  0.2510, -0.6276, -1.1293]],

        [[-0.5577,  1.1509, -1.3426,  0.7494]]],
       grad_fn=<NativeLayerNormBackward0>) 



In [4]:
# 单个样本简单举例
import torch
import torch.nn as nn

torch.manual_seed(6688)
d_model = 4

decoder_layer = nn.TransformerDecoderLayer(d_model=d_model, nhead=2, dim_feedforward=5, dropout=0.0)
transformer_decoder  = torch.nn.TransformerDecoder(decoder_layer, num_layers=1, norm=None)

memory = torch.tensor([[[-0.1235, 1.6189, -1.0983, -0.3970]],
                       [[-1.5583, 1.2149, 0.0358, 0.3076]],
                       [[1.4580, 0.3129, -0.5627, -1.2082]]]) # TransformerEncoder的输出L_encoder_out, N, E

# TransformerEncoder的输出L_encoder_out和TransformerDecoder的输入L_decoder_in可以是不同的。

tgt = torch.rand(2, 1, d_model)  # TransformerDecoder的输入L_decoder_in, N, E
out = transformer_decoder(tgt, memory)
print("out:\n", out, "\n")  
# 可以看到，当TransformerDecoder的num_layers=1时，结果和TransformerDecoderLayer是一致的。


out:
 tensor([[[ 1.5059,  0.2510, -0.6276, -1.1293]],

        [[-0.5577,  1.1509, -1.3426,  0.7494]]],
       grad_fn=<NativeLayerNormBackward0>) 

