In [1]:
import sys
import importlib
from matplotlib import pyplot as plt

import torch
from torch import nn
from torchtext.datasets import WikiText2
from torchtext.data.utils import get_tokenizer
from torchtext.vocab import build_vocab_from_iterator

  from .autonotebook import tqdm as notebook_tqdm


### Pull in my modules

In [2]:
# import my modules 
from path import get_model_folder_path
sys.path.append(get_model_folder_path())

from model import Transformer
from data_utils import data_process, batchify
from model_utils import fit, predict


### Initilize Torchtext tools

In [3]:
train_iter = WikiText2(split='train')
tokenizer = get_tokenizer('basic_english')
vocab = build_vocab_from_iterator(map(tokenizer, train_iter), specials=['<unk>'])
vocab.set_default_index(vocab['<unk>'])

In [5]:
# train_iter was "consumed" by the process of building the vocab,
# so we have to create it again
train_iter, val_iter, test_iter = WikiText2()
train_data = data_process(train_iter, vocab, tokenizer)
val_data = data_process(val_iter, vocab, tokenizer)
test_data = data_process(test_iter, vocab, tokenizer)

#### Print shape of raw data


In [11]:
print("Training Data Size:", len(train_data))
print("Val Data Size:", len(val_data))
print("Test Data Size:", len(test_data))

Training Data Size: 102499
Val Data Size: 21441
Test Data Size: 24185


### Batching the Data

In [6]:
batch_size = 20
eval_batch_size = 10
train_data = batchify(train_data, batch_size)
val_data = batchify(val_data, eval_batch_size)
test_data = batchify(test_data, eval_batch_size)

#### Print shape of datasets
Note that the format is torch.Size([ <sequence_length>, <batch_size>])

In [9]:
print("Train Data Shape:", train_data.shape)
print("Val Data Shape", val_data.shape)
print("Test Data Shape:", test_data.shape)

Train Data Shape: torch.Size([102499, 20])
Val Data Shape torch.Size([21441, 10])
Test Data Shape: torch.Size([24185, 10])


### Basic Model creation

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model = Transformer(
    num_tokens=len(vocab), 
    dim_model=, 
    num_heads=2, 
    num_encoder_layers=2, 
    num_decoder_layers=2, 
    dropout_p=0.2
).to(device)
opt = torch.optim.SGD(model.parameters(), lr=0.01)
loss_fn = nn.CrossEntropyLoss()

### Training

In [None]:
EPOCHS = 10

train_loss_list, validation_loss_list = fit(
    model=model,
    opt= opt,
    loss_fn=loss_fn,
    train_dataloader=train_dataloader,
    val_dataloader=val_dataloader,
    epochs=EPOCHS
)

Training and validating model
------------------------- Epoch 1 -------------------------
X: tensor([[0, 4, 2, 3, 4, 3, 5, 3, 2, 1],
        [0, 2, 2, 5, 5, 4, 2, 2, 3, 1],
        [0, 2, 3, 2, 4, 3, 5, 2, 2, 1],
        [0, 5, 4, 2, 2, 4, 4, 3, 5, 1],
        [0, 2, 2, 2, 3, 4, 4, 5, 4, 1],
        [0, 3, 3, 3, 3, 5, 3, 5, 4, 1],
        [0, 4, 4, 3, 3, 4, 5, 3, 4, 1],
        [0, 4, 3, 5, 4, 4, 5, 2, 5, 1],
        [0, 5, 4, 3, 3, 2, 5, 4, 3, 1],
        [0, 5, 2, 2, 2, 4, 5, 4, 3, 1],
        [0, 3, 4, 3, 5, 5, 2, 4, 5, 1],
        [0, 4, 4, 2, 2, 5, 3, 2, 2, 1],
        [0, 2, 5, 5, 5, 3, 3, 2, 4, 1],
        [0, 5, 5, 2, 2, 5, 5, 2, 4, 1],
        [0, 2, 3, 4, 5, 2, 4, 5, 2, 1],
        [0, 5, 2, 2, 4, 4, 5, 2, 3, 1]])
y_input: tensor([[0, 5, 4, 4, 3, 4, 2, 3, 4],
        [0, 2, 2, 2, 4, 3, 2, 5, 2],
        [0, 5, 5, 2, 5, 5, 2, 2, 4],
        [0, 5, 2, 4, 4, 3, 2, 4, 5],
        [0, 2, 5, 4, 5, 4, 2, 2, 2],
        [0, 4, 2, 4, 4, 3, 5, 5, 5],
        [0, 2, 2, 3, 4, 4, 5, 3, 5]

IndexError: index out of range in self

### Visualization

In [None]:
epoch_iters = [i+1 for i in range(EPOCHS)]

plot_axes = plt.axes()
plot_axes.set_xticks(epoch_iters)

plt.plot(epoch_iters, train_loss_list, label="training loss")
plt.plot(epoch_iters, validation_loss_list, label="validation loss")
plt.legend()
plt.show()

## Generate new data with predictions

In [None]:
# Here we test some examples to observe how the model predicts
examples = [
    torch.tensor([[model.s, 0, 0, 0, 0, 0, 0, 0, 0, 3]], dtype=torch.long, device=device),
    torch.tensor([[2, 1, 1, 1, 1, 1, 1, 1, 1, 3]], dtype=torch.long, device=device),
    torch.tensor([[2, 1, 0, 1, 0, 1, 0, 1, 0, 3]], dtype=torch.long, device=device),
    torch.tensor([[2, 0, 1, 0, 1, 0, 1, 0, 1, 3]], dtype=torch.long, device=device),
    torch.tensor([[2, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 3]], dtype=torch.long, device=device),
    torch.tensor([[2, 0, 1, 3]], dtype=torch.long, device=device)
]

for idx, example in enumerate(examples):
    result = predict(model, example)
    print(f"Example {idx}")
    print(f"Input: {example.view(-1).tolist()[1:-1]}")
    print(f"Continuation: {result[1:-1]}")

    print("len of prediction:", len(result[1:-1]))
    print()

## Actual Text Generation
Given our predictions above, let create a function which can 