In [1]:
import sys
import os
repo_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if repo_root not in sys.path:
    sys.path.insert(0, repo_root)

import torch

In [2]:
input_ids = torch.tensor([2, 3, 5, 1])

In [3]:
vocab_size = 6
output_dim = 3

In [4]:
torch.manual_seed(123)

embedding_layer = torch.nn.Embedding(vocab_size, output_dim)
print(embedding_layer.weight)
print(embedding_layer.weight.data)

Parameter containing:
tensor([[ 0.3374, -0.1778, -0.1690],
        [ 0.9178,  1.5810,  1.3010],
        [ 1.2753, -0.2010, -0.1606],
        [-0.4015,  0.9666, -1.1481],
        [-1.1589,  0.3255, -0.6315],
        [-2.8400, -0.7849, -1.4096]], requires_grad=True)
tensor([[ 0.3374, -0.1778, -0.1690],
        [ 0.9178,  1.5810,  1.3010],
        [ 1.2753, -0.2010, -0.1606],
        [-0.4015,  0.9666, -1.1481],
        [-1.1589,  0.3255, -0.6315],
        [-2.8400, -0.7849, -1.4096]])


In [5]:
print(embedding_layer(torch.tensor([3])))

tensor([[-0.4015,  0.9666, -1.1481]], grad_fn=<EmbeddingBackward0>)


In [6]:
print(embedding_layer(input_ids))

tensor([[ 1.2753, -0.2010, -0.1606],
        [-0.4015,  0.9666, -1.1481],
        [-2.8400, -0.7849, -1.4096],
        [ 0.9178,  1.5810,  1.3010]], grad_fn=<EmbeddingBackward0>)


In [7]:
vocab_size = 50257
output_dim = 256

token_embedding_layer = torch.nn.Embedding(vocab_size, output_dim)

In [8]:
from lib.data_sampling import create_dataloader_v1

In [9]:
with open("the-verdict.txt", "r", encoding="utf-8") as f:
    raw_text = f.read()

In [10]:
max_length = 4
dataloader = create_dataloader_v1(
    raw_text, 
    batch_size=8, 
    max_context_length=max_length, 
    stride=max_length,
    num_workers=0
)

In [11]:
data_iter = iter(dataloader)
inputs, targets = next(data_iter)

In [12]:
print(inputs)

tensor([[24818,   417,    12, 12239],
        [  314,  3114,   379,   262],
        [ 2156,   286,  4116,    13],
        [  866,   262,  2119,    11],
        [ 3363,    11,   340,   373],
        [  198,     1,    40,  2900],
        [  465, 14475,    13,   198],
        [ 3081,   286,  2045,  1190]])


In [13]:
print(targets)

tensor([[  417,    12, 12239,   438],
        [ 3114,   379,   262, 50085],
        [  286,  4116,    13,   679],
        [  262,  2119,    11, 12225],
        [   11,   340,   373,   314],
        [    1,    40,  2900,   736],
        [14475,    13,   198,   198],
        [  286,  2045,  1190,  4119]])


In [14]:
print(inputs.shape)

torch.Size([8, 4])


In [15]:
token_embeddings = token_embedding_layer(inputs)
print(token_embeddings.shape)

torch.Size([8, 4, 256])


In [17]:
context_length = max_length
position_embedding_layer = torch.nn.Embedding(context_length, output_dim)

positional_embeddings = position_embedding_layer(torch.arange(context_length))

In [18]:
print(positional_embeddings.shape)

torch.Size([4, 256])


In [19]:
input_embeddings = token_embeddings + positional_embeddings

In [20]:
print(input_embeddings.shape)

torch.Size([8, 4, 256])
