In [10]:
import numpy as np

import torch
import torch.nn as nn
from transformers import AutoTokenizer, AutoModel

In [11]:
def sinusoidal_pos_encoding(n_position, dim):
    def cal_angle(position, hid_idx):
        return position / np.power(10000, 2 * (hid_idx // 2) / dim)
    def get_posi_angle_vec(position):
        return [cal_angle(position, hid_j) for hid_j in range(dim)]
    sinusoidal_encoding = torch.tensor([get_posi_angle_vec(pos_i) for pos_i in range(n_position)])
    sinusoidal_encoding[:, 0::2] = torch.sin(sinusoidal_encoding[:, 0::2])  # dim 2i
    sinusoidal_encoding[:, 1::2] = torch.cos(sinusoidal_encoding[:, 1::2])  # dim 2i+1
    return sinusoidal_encoding

In [4]:
model_id = "Qwen/Qwen2.5-0.5B"
tok = AutoTokenizer.from_pretrained(model_id)
model = AutoModel.from_pretrained(model_id)

sequence1 = "Naomi went to the store."
sequence2 = "Naomi went to the store to buy some reaction mass pellets."
tokens1 = tok(sequence1, return_tensors="pt")["input_ids"]
embeddings1 = model.embed_tokens(tokens1)
tokens2 = tok(sequence2, return_tensors="pt")["input_ids"]
embeddings2 = model.embed_tokens(tokens2)

tokenizer_config.json:   0%|          | 0.00/7.23k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/681 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/988M [00:00<?, ?B/s]

In [25]:
print(tokens1.shape, embeddings1.shape)
print(tokens2.shape, embeddings2.shape)

# Generate position encodings for each sequence
pos_enc1 = sinusoidal_pos_encoding(tokens1.shape[1], model.config.hidden_size)
pos_enc2 = sinusoidal_pos_encoding(tokens2.shape[1], model.config.hidden_size)

print(pos_enc1.shape, pos_enc2.shape)

# compare the positional encodings beteween the two sequences
for i in range(pos_enc1.shape[0]):
    print(f"pos {i} same: ", torch.allclose(pos_enc1[i], pos_enc2[i]))

# show distances beween i and i+1 for each encoding for the first 7 positions
print("Distances between consecutive positions for encoding 1")
for i in range(6):
    print(f"pos {i} diff: ", torch.dist(pos_enc1[i], pos_enc1[i+1]))

# show distances beween i and i+1 for each encoding for the first 7 positions
print("Distances between consecutive positions for encoding 2")
for i in range(6):
    print(f"pos {i} diff: ", torch.dist(pos_enc2[i], pos_enc2[i+1]))

torch.Size([1, 7]) torch.Size([1, 7, 896])
torch.Size([1, 13]) torch.Size([1, 13, 896])
torch.Size([7, 896]) torch.Size([13, 896])
pos 0 same:  True
pos 1 same:  True
pos 2 same:  True
pos 3 same:  True
pos 4 same:  True
pos 5 same:  True
pos 6 same:  True
Distances between consecutive positions for encoding 1
pos 0 diff:  tensor(4.8777, dtype=torch.float64)
pos 1 diff:  tensor(4.8777, dtype=torch.float64)
pos 2 diff:  tensor(4.8777, dtype=torch.float64)
pos 3 diff:  tensor(4.8777, dtype=torch.float64)
pos 4 diff:  tensor(4.8777, dtype=torch.float64)
pos 5 diff:  tensor(4.8777, dtype=torch.float64)
Distances between consecutive positions for encoding 2
pos 0 diff:  tensor(4.8777, dtype=torch.float64)
pos 1 diff:  tensor(4.8777, dtype=torch.float64)
pos 2 diff:  tensor(4.8777, dtype=torch.float64)
pos 3 diff:  tensor(4.8777, dtype=torch.float64)
pos 4 diff:  tensor(4.8777, dtype=torch.float64)
pos 5 diff:  tensor(4.8777, dtype=torch.float64)
