## Token IDs

In [1]:
import torch

In [2]:
all_token_ids = torch.tensor([0, 1, 2, 3, 4])

print(f"All token IDs: {all_token_ids}")

All token IDs: tensor([0, 1, 2, 3, 4])


In [3]:
input_token_ids = torch.tensor([2, 1, 4])

print(f"Input token IDs: {input_token_ids}")

Input token IDs: tensor([2, 1, 4])


## Embedding Layer

In [4]:
VOCAB_SIZE = all_token_ids.size(0)
VECTOR_DIMENSION = 8

print(f"Vocabulary Size: {VOCAB_SIZE}")
print(f"Vector Dimension: {VECTOR_DIMENSION}")

Vocabulary Size: 5
Vector Dimension: 8


In [6]:
torch.manual_seed(1234)

embedding_layer = torch.nn.Embedding(num_embeddings=VOCAB_SIZE, embedding_dim=VECTOR_DIMENSION)

print(f"Embedding Layer Weights (Random Initialization):\n{embedding_layer.weight}")

Embedding Layer Weights (Random Initialization):
Parameter containing:
tensor([[-0.1117, -0.4966,  0.1631, -0.8817,  0.0539,  0.6684, -0.0597, -0.4675],
        [-0.2153,  0.8840, -0.7584, -0.3689, -0.3424, -1.4020,  0.3206, -1.0219],
        [ 0.7988, -0.0923, -0.7049, -1.6024,  0.2891,  0.4899, -0.3853, -0.7120],
        [ 0.7667,  0.0190,  0.0220,  1.1532, -0.3393,  0.1559,  0.8966, -0.2968],
        [-0.6857, -0.0496, -1.2485, -0.8509, -0.7690, -1.5606, -0.5309,  0.2178]],
       requires_grad=True)


### Access Vector with ID

In [11]:
idx = 3

embedding_vector = embedding_layer(torch.tensor([idx]))

print(f"Embedding vector for id={idx}:\n\n{embedding_vector}")

Embedding vector for id=3:

tensor([[ 0.7667,  0.0190,  0.0220,  1.1532, -0.3393,  0.1559,  0.8966, -0.2968]],
       grad_fn=<EmbeddingBackward0>)


### Access Vectors with IDs

In [10]:
ids = input_token_ids

embedding_vectors = embedding_layer(ids)

print(f"Vector embeddings for the ids {ids}\n\n{embedding_vectors}")

Vector embeddings for the ids tensor([2, 1, 4])

tensor([[ 0.7988, -0.0923, -0.7049, -1.6024,  0.2891,  0.4899, -0.3853, -0.7120],
        [-0.2153,  0.8840, -0.7584, -0.3689, -0.3424, -1.4020,  0.3206, -1.0219],
        [-0.6857, -0.0496, -1.2485, -0.8509, -0.7690, -1.5606, -0.5309,  0.2178]],
       grad_fn=<EmbeddingBackward0>)
