Create learnable positional embeddings and add to token embeddings

Allows learning of position encoding of tokens during pretraining

In [1]:
from torch import nn, bmm
import torch

from math import sqrt
from transformers import AutoConfig, AutoTokenizer

In [2]:
model_name = 'bert-base-uncased'
config = AutoConfig.from_pretrained(model_name)

In [3]:
class Embeddings(nn.Module):

    def __init__(self, config):
        super().__init__()
        self.token_embeddings = nn.Embedding(config.vocab_size, config.hidden_size)
        self.position_embeddings = nn.Embedding(config.max_position_embeddings, config.hidden_size)
        self.layer_norm = nn.LayerNorm(config.hidden_size, eps=1e-12)
        self.dropout = nn.Dropout()

    def forward(self, input_ids):

        #pos ids for input seq
        seq_length = input_ids.size(1)
        position_ids = torch.arange(seq_length, dtype=torch.long).unsqueeze(0)
        
        token_embeddings = self.token_embeddings(input_ids)
        position_embeddings = self.position_embeddings(position_ids)

        #combine position and token embeddings
        embeddings = token_embeddings + position_embeddings
        embeddings = self.layer_norm(embeddings)

        embeddings = self.dropout(embeddings)

        return embeddings



In [4]:
text = 'life is like a box of chocolates'
tokenizer = AutoTokenizer.from_pretrained(model_name)
inputs = tokenizer(text, return_tensors='pt', add_special_tokens=False)

In [5]:
inputs.input_ids[0]

tensor([2166, 2003, 2066, 1037, 3482, 1997, 7967, 2015])

In [6]:
tokenizer.convert_ids_to_tokens(inputs.input_ids[0])

['life', 'is', 'like', 'a', 'box', 'of', 'chocolate', '##s']

In [7]:
embedding_layer = Embeddings(config)
embedding_layer(inputs.input_ids).size()

torch.Size([1, 8, 768])

In [9]:
seq_length = inputs.input_ids.size(1)
torch.arange(seq_length, dtype=torch.long).unsqueeze(0)

tensor([[0, 1, 2, 3, 4, 5, 6, 7]])

How layer norm and dropout are implemented in Torch??