In [1]:
# 文本嵌入层的代码分析

import torch
import torch.nn as nn # 预定义的网络层，包含一些现成的工具
import math # 数学计算工具包
from torch.autograd import Variable

# 定义类来实现文本嵌入层
class Embeddings(nn.Module):
    def __init__(self, d_model, vocab):
        '''
        d_model:指词嵌入的维度
        vocab:指词表的大小
        '''
        super(Embeddings, self).__init__()
        self.lut = nn.Embedding(vocab, d_model)
        self.d_model = d_model
    
    def forward(self, x):
        return self.lut(x) * math.sqrt(self.d_model)

In [5]:
embedding = nn.Embedding(10, 3)
input = torch.LongTensor([[1, 2, 4, 5], [4, 3, 2, 9]])
embedding(input).shape

torch.Size([2, 4, 3])

In [4]:
d_model = 512
vocab = 1000

x = Variable(torch.LongTensor([[100, 2, 421, 508], [491, 998, 1, 221]]))
emb = Embeddings(d_model, vocab)
embr = emb(x)
(embr, embr.shape)

(tensor([[[ 17.9887, -21.2604,  22.2643,  ..., -17.2853,   6.5057,  -6.7348],
          [-21.8817,   8.0818,  14.5089,  ...,  21.4535,   6.0248,  -2.3179],
          [ 46.3037,  -3.1423,   1.7513,  ...,  28.7432, -17.5639,  17.1703],
          [  8.2219,  17.9838,  17.0450,  ...,  -7.0680,  54.4908,  14.2220]],
 
         [[  3.8480,  13.0536, -35.2835,  ..., -18.1659, -24.6003,   3.8836],
          [ 28.0201,  33.8831,  16.9795,  ..., -24.0986,   9.1528,   7.5900],
          [-35.8207,   3.6164,  -6.5116,  ...,  17.4589,  26.3022, -20.9856],
          [ 13.3537,  28.6510,  32.0331,  ..., -16.0911,  42.2110,  -3.9310]]],
        grad_fn=<MulBackward0>),
 torch.Size([2, 4, 512]))

In [6]:
# 定义编码器类
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, dropout, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)
        pe = torch.zeros(max_len, d_model)
        # 绝对位置矩阵
        position = torch.arange(0, max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) * -(math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0)
        self.register_buffer('pe', pe)
    
    def forward(self, x):
        x = x + Variable(self.pe[:, :x.size(1)], requires_grad=False)
        return self.dropout(x)

In [7]:
m = nn.Dropout(p=0.2)
input = torch.randn(4, 5)
output = m(input)
output

tensor([[-2.2121,  0.8715,  0.9482, -0.8871,  1.0714],
        [-0.0000,  0.0000, -0.1685,  0.3339, -1.0596],
        [ 1.6276, -0.7780, -1.3569, -0.0000, -0.8909],
        [ 0.0000, -0.0000,  1.3403,  0.9246, -0.0000]])

In [9]:
x = torch.tensor([1, 2, 3, 4])
torch.unsqueeze(x, 0), torch.unsqueeze(x, 1)

(tensor([[1, 2, 3, 4]]),
 tensor([[1],
         [2],
         [3],
         [4]]))