# T5 Finetuning Playground

## 1. Feed forward simple translation task

In [1]:
import torch
from models import device, tokenizer, T5WithAdapter

sample_text = """
translate previous numbers to English typed text. For example: 1 -> one, 7 -> seven
"""

# Tokenize
encodings = tokenizer(
    sample_text,
    return_tensors="pt",
    padding=True,
    truncation=True,
).to(device)

  from .autonotebook import tqdm as notebook_tqdm
You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


First, let's ensure the adapter works as expected.

In [2]:
# Test adapter

from models import TimeSeriesAdapter

input_tensor = torch.randn(1, 20).to(device)

adapter = TimeSeriesAdapter()
adapter = adapter.to(device)

output_tensor = adapter(input_tensor)
print(output_tensor.shape)

model = T5WithAdapter(freeze_t5=True)
model = model.to(device)

torch.Size([1, 768])


In [9]:
# Create empty 1x20 tensor
empty_time_series_features = torch.zeros(1, 20).to(device)

# Switch to eval mode
model.eval()

with torch.no_grad():
    generated_text = model.generate(
        encodings.input_ids, encodings.attention_mask, empty_time_series_features
    )

for i, text in enumerate(generated_text):
    print(f"Output {i+1}: {text}")

TypeError: T5WithAdapter.generate() takes 2 positional arguments but 4 were given

## 2. Practice with training

First, we create the dataset.

In [7]:
from num2words import num2words

train_dataset = []

for i in range(1, 1000):
    train_dataset.append((i, num2words(i)))

print(train_dataset[20:30])

test_dataset = [
    (11, "eleven"),
    (12, "twelve"),
    (13, "thirteen"),
    (14, "fourteen"),
    (15, "fifteen"),
    (16, "sixteen"),
    (17, "seventeen"),
]

[(21, 'twenty-one'), (22, 'twenty-two'), (23, 'twenty-three'), (24, 'twenty-four'), (25, 'twenty-five'), (26, 'twenty-six'), (27, 'twenty-seven'), (28, 'twenty-eight'), (29, 'twenty-nine'), (30, 'thirty')]


Then, we train on the dataset.

In [8]:
epochs = 1000

optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3)

# Set the model to train mode
model.train()

# Track losses for each epoch
epoch_losses = []

for epoch in range(epochs):
    epoch_loss = 0
    
    for number, output in train_dataset:
        # zero the gradients
        optimizer.zero_grad()  # Use optimizer.zero_grad() instead of model.zero_grad()

        # build the target encodings
        target_encodings = tokenizer(
            output,
            return_tensors="pt",
            padding=True,
            truncation=True,
        ).to(device)

        # build the input feature
        input_feature = torch.zeros(1, 20).to(device)
        input_feature[0, 0] = number

        # forward pass - no need to calculate input_embeds and attention_mask separately
        outputs = model(
            time_series_features=input_feature,
            labels=target_encodings.input_ids,
        )

        # compute the loss
        loss = outputs.loss
        epoch_loss += loss.item()

        # backward pass
        loss.backward()

        # update the weights
        optimizer.step()
    
    # Print average loss for the epoch
    avg_epoch_loss = epoch_loss / len(train_dataset)
    epoch_losses.append(avg_epoch_loss)
    print(f"Epoch {epoch+1}/{epochs}, Average Loss: {avg_epoch_loss:.4f}")

Epoch 1/1000, Average Loss: 1.4568
Epoch 2/1000, Average Loss: 1.3211


KeyboardInterrupt: 

In [None]:
# Evaluate the model

model.eval()

with torch.no_grad():
    for number, output in [
        (1, None),
        (2, None),
        (3, None),
        (4, None),
        (5, None),
        (6, None),
        (7, None),
        (8, None),
        (9, None),
        (10, None),
        (11, None),
        (12, None),
        (13, None),
        (14, None),
    ]:
        # build the input feature
        input_feature = torch.zeros(1, 20).to(device)
        input_feature[0, 0] = number

        # forward pass
        outputs = model.generate(
            input_feature
        )
        print(f"Input: {number}, Output: {outputs[0]}")

NameError: name 'instruction_encodings' is not defined