# BERT's Anatomy Step by Step: Positional Embeddings

In [None]:
import matplotlib
import matplotlib.pyplot as plt
%config InlineBackend.figure_format = 'svg'

import torch
from sklearn.metrics.pairwise import cosine_similarity
from transformers import AutoConfig, AutoTokenizer
from transformers import BertForPreTraining

In [None]:
model_checkpoint = 'bert-base-uncased'

In [None]:
model = BertForPreTraining.from_pretrained(model_checkpoint)
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
config = AutoConfig.from_pretrained(model_checkpoint)

In [None]:
encoding = tokenizer.encode("let's tokenize something?", return_tensors="pt")
# tokens = tokenizer.convert_ids_to_tokens(encoding.flatten())
seq_embedding = model.bert.embeddings.word_embeddings(encoding)
seq_embedding.shape   # (batch_size, seq_len, hidden_size)

In [None]:
model.bert.embeddings

In [None]:
config.hidden_size  # size of the embeddings

In [None]:
config.max_position_embeddings  # max seq_len

In [None]:
seq_len = encoding.shape[-1]

positions = torch.arange(0, seq_len)
positions = positions.reshape((1, seq_len))   # make it (batch_size, seq_len)
positions

In [None]:
pos_embedding_511 = model.bert.embeddings.position_embeddings(positions)
pos_embedding_511.shape  # (batch_size, seq_len, hidden_size)

In [None]:
seq_embedding + pos_embedding_511

In [None]:
positions = torch.arange(0, config.max_position_embeddings)
# positions = positions.reshape((1, config.max_position_embeddings))    # make it (batch_size, seq_len)

pos_embedding = model.bert.embeddings.position_embeddings(positions)
pos_embedding.shape

In [None]:
matplotlib.rcParams['figure.figsize'] = (12, 1)

for i in [0, 1, 2, 10, 100, 200, 300, 400, 500]:
    # plt.plot((seq_embedding[0, 2] + pos_embedding[i]).detach().numpy(), c='green')
    plt.plot(pos_embedding.detach().numpy()[i],    alpha=0.5, c='red')
    plt.plot(seq_embedding[0, 2].detach().numpy(), alpha=0.5, c='blue')
    plt.xlim([0, config.hidden_size])
    plt.ylim([-0.15, 0.15])
    plt.show()

matplotlib.rcParams['figure.figsize'] = (6, 4)

In [None]:
similarity_matrix = cosine_similarity(pos_embedding.detach().numpy())
plt.imshow(similarity_matrix, cmap='Blues')  #, aspect='auto', extent=[0, max_len, 0, max_len])
# plt.colorbar()
# plt.title('Position-wise Similarity of Positional Embeddings')
plt.xlabel('Position')
plt.ylabel('Position')
plt.gca().spines['top'].set_visible(False)
plt.gca().spines['right'].set_visible(False)
plt.gca().spines['bottom'].set_visible(False)
plt.gca().spines['left'].set_visible(False)
plt.show()