Source: https://sebastianraschka.com/blog/2023/self-attention-from-scratch.html

# Embedding an Input Sentence

In [3]:
import torch

sentence = 'Life is short, eat dessert first'

# For simplicity, here our dictionary dc is restricted to the words that occur in the input sentence. In a real-world application, we would consider all words in the training dataset (typical vocabulary sizes range between 30k to 50k).

# word -> integer
dc = {s:i for i,s in enumerate(sorted(sentence.replace(',', '').split()))}

sentence_int = torch.tensor([dc[s] for s in sentence.replace(',', '').split()])
print(sentence_int)

# Now, using the integer-vector representation of the input sentence, we can use an embedding layer to encode the inputs into a real-vector embedding.
# Here, we will use a 16-dimensional embedding such that each input word is represented by a 16-dimensional vector. 
# Since the sentence consists of 6 words, this will result in a 6×16 dimensional embedding
torch.manual_seed(123)
embed = torch.nn.Embedding(6, 16)
embedded_sentence = embed(sentence_int).detach()

print(embedded_sentence)
print(embedded_sentence.shape)

tensor([0, 4, 5, 2, 1, 3])
