In [1]:
import sys
sys.path.append("..")

from torch.optim import AdamW
from transformers import T5Tokenizer
from torch.utils.data import DataLoader
from transformers import T5ForConditionalGeneration

from datasets import load_dataset
from transformers import get_linear_schedule_with_warmup

from scripts.response.training import train_model
from scripts.response.inference import inference_model
from scripts.response.preprocessing import ResponseDataset

from scripts.global_vars import DEVICE, MAX_LENGTH, BATCH_SIZE

In [2]:
dataset = load_dataset("multi_woz_v22", trust_remote_code=True)

train_data = dataset['train']
val_data = dataset['validation']

In [3]:
tokenizer = T5Tokenizer.from_pretrained(
    legacy=True,
    pretrained_model_name_or_path="google/t5-efficient-mini"
)

train_response_dataset = ResponseDataset(
    data=dataset['train'],
    tokenizer=tokenizer,
    max_output_len=MAX_LENGTH
)

valid_response_dataset = ResponseDataset(
    data=dataset['validation'],
    tokenizer=tokenizer,
    max_output_len=MAX_LENGTH
)

train_loader_response = DataLoader(train_response_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader_response = DataLoader(valid_response_dataset, batch_size=BATCH_SIZE)

batch = next(iter(train_loader_response))
print("Action IDs shape:", batch['encoder_input_ids'].shape)
print("Response IDs shape:", batch['decoder_input_ids'].shape)

Processing dialogues: 100%|██████████| 8437/8437 [00:03<00:00, 2227.64it/s]
Processing dialogues: 100%|██████████| 1000/1000 [00:00<00:00, 2136.82it/s]

Action IDs shape: torch.Size([64, 128])
Response IDs shape: torch.Size([64, 128])





In [4]:
num_epochs = 5
num_training_steps = len(train_loader_response) * num_epochs
num_warmup_steps = num_training_steps // 10

response_model = T5ForConditionalGeneration.from_pretrained(
    "google/t5-efficient-mini"
).to(DEVICE)

optimizer = AdamW(
    response_model.parameters(),
    lr=1e-3,
    eps=1e-8
)

scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=num_warmup_steps,
    num_training_steps=num_training_steps
)

In [None]:
response_model = train_model(
    response_model,
    optimizer,
    scheduler,
    train_loader_response,
    val_loader_response,
    num_epochs=num_epochs,
    device=DEVICE,
    save=True
)


Epoch 1/5
--------------------------------------------------


Training:   0%|          | 0/888 [00:00<?, ?it/s]Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.48.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.
Training:  79%|███████▉  | 703/888 [01:30<00:24,  7.61it/s]

In [None]:
inference_model(
    response_model,
    tokenizer,
    train_response_dataset.actions[4],
    MAX_LENGTH,
    DEVICE
)

'I was able to book you a table for you. Your reference number is FRGZWQL2 . Is there anything else I can help you with?'

In [None]:
train_response_dataset.actions[4], train_response_dataset.responses[4]

('Booking-Book(ref=FRGZWQL2) | general-reqmore(none=none)',
 'Your booking was successful. Your reference number is FRGZWQL2 . May I help you further?')