# Sinusoidal Positional Encodings

In [None]:
import matplotlib
import matplotlib.pyplot as plt
%config InlineBackend.figure_format = 'svg'

import torch
from sklearn.metrics.pairwise import cosine_similarity
from transformers import AutoConfig, AutoTokenizer
from transformers import T5ForSequenceClassification

In [None]:
model_checkpoint = 't5-small'

In [None]:
model = T5ForSequenceClassification.from_pretrained(model_checkpoint)
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
config = AutoConfig.from_pretrained(model_checkpoint)

In [None]:
model.transformer

**No positional embeddings!** t5 uses sinusoidal positional encodings.

In [None]:
def generate_positional_encoding(max_len, d_model):
    position = torch.arange(0, max_len)[:, None]
    div_term = torch.exp(torch.arange(0, d_model, 2) * -(torch.log(torch.tensor(10000.0)) / d_model))
    pos_enc = torch.zeros((max_len, d_model))

    pos_enc[:, 0::2] = torch.sin(position * div_term)
    pos_enc[:, 1::2] = torch.cos(position * div_term)

    return pos_enc

In [None]:
config.max_position_embeddings = config.task_specific_params['translation_en_to_de']['max_length']
config.hidden_size = config.d_model

sin_pos_encoding = generate_positional_encoding(config.max_position_embeddings, config.hidden_size)

In [None]:
matplotlib.rcParams['figure.figsize'] = (12, 1)

for i in [0, 1, 2, 10, 50, 100, 150, 200, 250, 299]:
    plt.plot(sin_pos_encoding[i], alpha=0.5, c='blue')
    plt.xlim([0, config.hidden_size])
    plt.ylim([-1.5, 1.5])
    plt.show()

matplotlib.rcParams['figure.figsize'] = (6, 4)

In [None]:
plt.imshow(sin_pos_encoding, cmap='Blues')
plt.xlabel('Embedding Dimensions')
plt.ylabel('Position in Sequence')
plt.gca().spines['top'].set_visible(False)
plt.gca().spines['right'].set_visible(False)
plt.gca().spines['bottom'].set_visible(False)
plt.gca().spines['left'].set_visible(False)
plt.show()

In [None]:
similarity_matrix = cosine_similarity(sin_pos_encoding)
plt.imshow(similarity_matrix, cmap='Blues')  #, aspect='auto', extent=[0, max_len, 0, max_len])
# plt.colorbar()
# plt.title('Position-wise Similarity of Positional Embeddings')
plt.xlabel('Position')
plt.ylabel('Position')
plt.gca().spines['top'].set_visible(False)
plt.gca().spines['right'].set_visible(False)
plt.gca().spines['bottom'].set_visible(False)
plt.gca().spines['left'].set_visible(False)
plt.show()