### Creating token embeddings

In [1]:
import torch


input_ids = torch.tensor([2, 3, 5, 1])

In [2]:
# For simplicity, using a small vocab size of 6 and embeddings of size 3
vocab_size = 6
output_dim = 3

torch.manual_seed(123)
embedding_layer = torch.nn.Embedding(vocab_size, output_dim)

In [3]:
print(embedding_layer.weight)

Parameter containing:
tensor([[ 0.3374, -0.1778, -0.1690],
        [ 0.9178,  1.5810,  1.3010],
        [ 1.2753, -0.2010, -0.1606],
        [-0.4015,  0.9666, -1.1481],
        [-1.1589,  0.3255, -0.6315],
        [-2.8400, -0.7849, -1.4096]], requires_grad=True)


In [4]:
## We can see that the weight matrix of the embedding layer contains random values.
## These values are optimized during the LLM training as part of the LLM optimization itself.
## We can see the weight matrix has 6 rows and 3 columns. 
## There is one row for each of the 6 possible tokens in the vocabulary and there is one column for each of the three embedding dimensions.


In [9]:
print(embedding_layer(torch.tensor([3]))) # Obtaining the vector embedding for a particular ID.

## The embedding layer is basically a look up table for a particular ID

tensor([[-0.4015,  0.9666, -1.1481]], grad_fn=<EmbeddingBackward0>)


In [6]:
print(embedding_layer(input_ids))

tensor([[ 1.2753, -0.2010, -0.1606],
        [-0.4015,  0.9666, -1.1481],
        [-2.8400, -0.7849, -1.4096],
        [ 0.9178,  1.5810,  1.3010]], grad_fn=<EmbeddingBackward0>)
