In [1]:
import torch
from torch.utils.data import DataLoader
from transformers import T5ForConditionalGeneration, AdamW, AutoTokenizer
from datasets import load_dataset

In [2]:
# Load Multi30k dataset
# dataset = load_dataset("bentrevett/multi30k", split="train[:10000]")  # Load only a subset for demonstration
dataset = load_dataset("bentrevett/multi30k")  # Load only a subset for demonstration

In [3]:
dataset

DatasetDict({
    train: Dataset({
        features: ['en', 'de'],
        num_rows: 29000
    })
    validation: Dataset({
        features: ['en', 'de'],
        num_rows: 1014
    })
    test: Dataset({
        features: ['en', 'de'],
        num_rows: 1000
    })
})

In [4]:
dataset["train"]

Dataset({
    features: ['en', 'de'],
    num_rows: 29000
})

In [5]:
# Initialize T5 tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("t5-small")
model = T5ForConditionalGeneration.from_pretrained("t5-small")

In [6]:
# def tokenize_data(example):
#     source_text = example["en"]
#     target_text = example["de"]
#     tokenized_inputs = tokenizer(
#         source_text,
#         padding="max_length",
#         truncation=True,
#         max_length=128,
#         return_tensors="pt"
#     )
#     tokenized_targets = tokenizer(
#         target_text,
#         padding="max_length",
#         truncation=True,
#         max_length=128,
#         return_tensors="pt"
#     )
#     return {
#         "input_ids": tokenized_inputs.input_ids.flatten(),
#         "attention_mask": tokenized_inputs.attention_mask.flatten(),
#         "labels": tokenized_targets.input_ids.flatten(),
#         "labels_attention_mask": tokenized_targets.attention_mask.flatten(),
#     }


# def tokenize_data(example):
#     source_text = example["en"]
#     target_text = example["de"]
#     tokenized_inputs = tokenizer(
#         source_text,
#         padding="max_length",
#         truncation=True,
#         max_length=128,
#         return_tensors="pt"
#     )
#     tokenized_targets = tokenizer(
#         target_text,
#         padding="max_length",
#         truncation=True,
#         max_length=128,
#         return_tensors="pt"
#     )
#     return {
#         "input_ids": tokenized_inputs.input_ids[0],
#         "attention_mask": tokenized_inputs.attention_mask[0],
#         "labels": tokenized_targets.input_ids[0],
#         "labels_attention_mask": tokenized_targets.attention_mask[0],
#     }


# Tokenize and preprocess data
def tokenize_data(batch):
    src_texts = batch["en"]
    tgt_texts = batch["de"]
    tokenized_batch = tokenizer.prepare_seq2seq_batch(src_texts, tgt_texts, truncation=True, padding="max_length", max_length=128, return_tensors="pt")
    return {
        "input_ids": tokenized_batch.input_ids,
        "attention_mask": tokenized_batch.attention_mask,
        "labels": tokenized_batch.labels,
    }

In [7]:
dataset = dataset.map(tokenize_data, batched=True)
train_dataloader = DataLoader(dataset['train'], batch_size=8, shuffle=True)

Map:   0%|          | 0/1014 [00:00<?, ? examples/s]

`prepare_seq2seq_batch` is deprecated and will be removed in version 5 of HuggingFace Transformers. Use the regular
`__call__` method to prepare your inputs and targets.

Here is a short example:

model_inputs = tokenizer(src_texts, text_target=tgt_texts, ...)

If you either need to use different keyword arguments for the source and target texts, you should do two calls like
this:

model_inputs = tokenizer(src_texts, ...)
labels = tokenizer(text_target=tgt_texts, ...)
model_inputs["labels"] = labels["input_ids"]

See the documentation of your specific tokenizer for more details on the specific arguments to the tokenizer of choice.
For a more complete example, see the implementation of `prepare_seq2seq_batch`.



In [8]:
# Training loop
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.train()

T5ForConditionalGeneration(
  (shared): Embedding(32128, 512)
  (encoder): T5Stack(
    (embed_tokens): Embedding(32128, 512)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
              (relative_attention_bias): Embedding(32, 8)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseActDense(
              (wi): Linear(in_features=512, out_features=2048, bias=False)
              (wo): Linear(in_features=2048, out_features=512, bias=False)
              (dropout): Drop

In [None]:
optimizer = AdamW(model.parameters(), lr=1e-4)

# for epoch in range(3):  # Adjust number of epochs as needed
#     for batch in train_dataloader:
#         input_ids = batch["input_ids"].to(device)
#         attention_mask = batch["attention_mask"].to(device)
#         labels = batch["labels"].to(device)

#         optimizer.zero_grad()
#         outputs = model(
#             input_ids=input_ids,
#             attention_mask=attention_mask,
#             labels=labels,
#         )
#         loss = outputs.loss
#         loss.backward()
#         optimizer.step()

#         print(f"Epoch {epoch + 1}, Loss: {loss.item()}")

for epoch in range(1):  # Adjust number of epochs as needed
    for batch in train_dataloader:
        # Convert each sequence in the batch to tensors and move them to device
        input_ids = torch.tensor([item for sublist in batch["input_ids"] for item in sublist]).to(device)
        attention_mask = torch.tensor([item for sublist in batch["attention_mask"] for item in sublist]).to(device)
        labels = torch.tensor([item for sublist in batch["labels"] for item in sublist]).to(device)
        
        optimizer.zero_grad()
        outputs = model(
            input_ids=input_ids.unsqueeze(0),  
            attention_mask=attention_mask.unsqueeze(0),  
            labels=labels.unsqueeze(0),  
        )
        loss = outputs.loss
        loss.backward()
        optimizer.step()

        print(f"Epoch {epoch + 1}, Loss: {loss.item()}")



Epoch 1, Loss: 15.820640563964844
Epoch 1, Loss: 16.644502639770508
Epoch 1, Loss: 12.472295761108398
Epoch 1, Loss: 10.575727462768555
Epoch 1, Loss: 13.414263725280762
Epoch 1, Loss: 11.630632400512695
Epoch 1, Loss: 9.811060905456543
Epoch 1, Loss: 8.01409912109375
Epoch 1, Loss: 7.290450096130371
Epoch 1, Loss: 6.96305513381958
Epoch 1, Loss: 5.64630651473999
Epoch 1, Loss: 4.715773582458496
Epoch 1, Loss: 5.045876979827881
Epoch 1, Loss: 5.344414234161377
Epoch 1, Loss: 3.6341774463653564
Epoch 1, Loss: 3.9242188930511475
Epoch 1, Loss: 2.660752534866333
Epoch 1, Loss: 2.435250997543335
Epoch 1, Loss: 2.7774152755737305
Epoch 1, Loss: 2.654569625854492
Epoch 1, Loss: 2.1253368854522705
Epoch 1, Loss: 2.2670328617095947
Epoch 1, Loss: 2.0567479133605957
Epoch 1, Loss: 1.770506501197815
Epoch 1, Loss: 2.1777429580688477
Epoch 1, Loss: 1.9934669733047485
Epoch 1, Loss: 2.3370704650878906
Epoch 1, Loss: 2.2311651706695557
Epoch 1, Loss: 2.0362725257873535
Epoch 1, Loss: 2.055796384811

Epoch 1, Loss: 1.3288955688476562
Epoch 1, Loss: 1.2222703695297241
Epoch 1, Loss: 1.0391194820404053
Epoch 1, Loss: 1.7210607528686523
Epoch 1, Loss: 1.1088308095932007
Epoch 1, Loss: 1.3022595643997192
Epoch 1, Loss: 1.1816068887710571
Epoch 1, Loss: 1.2047709226608276
Epoch 1, Loss: 1.146044373512268
Epoch 1, Loss: 1.5455447435379028
Epoch 1, Loss: 1.1193480491638184
Epoch 1, Loss: 0.9558967351913452
Epoch 1, Loss: 1.0505965948104858
Epoch 1, Loss: 1.3518226146697998
Epoch 1, Loss: 1.2798446416854858
Epoch 1, Loss: 1.1271202564239502
Epoch 1, Loss: 1.3075603246688843
Epoch 1, Loss: 1.2696930170059204
Epoch 1, Loss: 1.1641111373901367
Epoch 1, Loss: 1.424397349357605
Epoch 1, Loss: 1.1116316318511963
Epoch 1, Loss: 1.0655595064163208
Epoch 1, Loss: 1.3450392484664917
Epoch 1, Loss: 1.2386810779571533
Epoch 1, Loss: 1.2947450876235962
Epoch 1, Loss: 1.1056290864944458
Epoch 1, Loss: 1.34517240524292
Epoch 1, Loss: 1.1433374881744385
Epoch 1, Loss: 1.1724401712417603
Epoch 1, Loss: 1.2

Epoch 1, Loss: 1.0925483703613281
Epoch 1, Loss: 0.9925301671028137
Epoch 1, Loss: 0.9065122008323669
Epoch 1, Loss: 0.9817745685577393
Epoch 1, Loss: 1.169045329093933
Epoch 1, Loss: 0.8850644826889038
Epoch 1, Loss: 1.0782181024551392
Epoch 1, Loss: 1.0987434387207031
Epoch 1, Loss: 1.2270654439926147
Epoch 1, Loss: 0.8952287435531616
Epoch 1, Loss: 1.123023509979248
Epoch 1, Loss: 1.150870442390442
Epoch 1, Loss: 1.1861785650253296
Epoch 1, Loss: 0.8979470133781433
Epoch 1, Loss: 1.0820505619049072
Epoch 1, Loss: 1.2280534505844116
Epoch 1, Loss: 0.9071065783500671
Epoch 1, Loss: 0.9913535118103027
Epoch 1, Loss: 1.0470881462097168
Epoch 1, Loss: 1.2596890926361084
Epoch 1, Loss: 1.0410308837890625
Epoch 1, Loss: 1.0973645448684692
Epoch 1, Loss: 1.2278499603271484
Epoch 1, Loss: 0.8783794045448303
Epoch 1, Loss: 1.0774587392807007
Epoch 1, Loss: 1.177904725074768
Epoch 1, Loss: 1.0632684230804443
Epoch 1, Loss: 1.0536056756973267
Epoch 1, Loss: 0.9760329723358154
Epoch 1, Loss: 0.9

Epoch 1, Loss: 0.9348334074020386
Epoch 1, Loss: 0.9938778877258301
Epoch 1, Loss: 1.9604206085205078
Epoch 1, Loss: 0.8776105046272278
Epoch 1, Loss: 1.1393396854400635
Epoch 1, Loss: 0.9990116357803345
Epoch 1, Loss: 0.9174244403839111
Epoch 1, Loss: 0.7713714838027954
Epoch 1, Loss: 1.152059555053711
Epoch 1, Loss: 1.4263888597488403
Epoch 1, Loss: 1.2049397230148315
Epoch 1, Loss: 1.120863914489746
Epoch 1, Loss: 1.1336137056350708
Epoch 1, Loss: 1.1913071870803833
Epoch 1, Loss: 0.903164803981781
Epoch 1, Loss: 0.8930233716964722
Epoch 1, Loss: 1.2130651473999023
Epoch 1, Loss: 0.8147178888320923
Epoch 1, Loss: 0.8809313774108887
Epoch 1, Loss: 1.0299816131591797
Epoch 1, Loss: 1.3423792123794556
Epoch 1, Loss: 0.9237436652183533
Epoch 1, Loss: 0.8957298994064331
Epoch 1, Loss: 1.0771201848983765
Epoch 1, Loss: 1.0095648765563965
Epoch 1, Loss: 1.0321826934814453
Epoch 1, Loss: 0.7436906695365906
Epoch 1, Loss: 1.4529914855957031
Epoch 1, Loss: 1.379470944404602
Epoch 1, Loss: 1.0

Epoch 1, Loss: 0.9256994128227234
Epoch 1, Loss: 1.169124960899353
Epoch 1, Loss: 1.1519508361816406
Epoch 1, Loss: 0.9163281917572021
Epoch 1, Loss: 0.9894132018089294
Epoch 1, Loss: 0.9285848140716553
Epoch 1, Loss: 0.9802454113960266
Epoch 1, Loss: 1.0638347864151
Epoch 1, Loss: 1.1791871786117554
Epoch 1, Loss: 0.9312562346458435
Epoch 1, Loss: 0.9086233973503113
Epoch 1, Loss: 1.0347466468811035
Epoch 1, Loss: 0.9783806204795837
Epoch 1, Loss: 1.0336073637008667
Epoch 1, Loss: 1.0561054944992065
Epoch 1, Loss: 1.0325379371643066
Epoch 1, Loss: 1.0244206190109253
Epoch 1, Loss: 1.0418925285339355
Epoch 1, Loss: 0.9455490708351135
Epoch 1, Loss: 1.1853142976760864
Epoch 1, Loss: 1.035148024559021
Epoch 1, Loss: 1.1702507734298706
Epoch 1, Loss: 1.2606501579284668
Epoch 1, Loss: 0.8065730333328247
Epoch 1, Loss: 1.072683334350586
Epoch 1, Loss: 0.9969582557678223
Epoch 1, Loss: 1.145089864730835
Epoch 1, Loss: 0.948080837726593
Epoch 1, Loss: 1.0851714611053467
Epoch 1, Loss: 0.91052

Epoch 1, Loss: 1.0745693445205688
Epoch 1, Loss: 0.9548928141593933
Epoch 1, Loss: 0.8847711682319641
Epoch 1, Loss: 0.6692931056022644
Epoch 1, Loss: 1.0216764211654663
Epoch 1, Loss: 1.089709758758545
Epoch 1, Loss: 0.8790271282196045
Epoch 1, Loss: 0.9045357704162598
Epoch 1, Loss: 1.060943603515625
Epoch 1, Loss: 1.018239140510559
Epoch 1, Loss: 1.182443618774414
Epoch 1, Loss: 0.7284845113754272
Epoch 1, Loss: 0.8001991510391235
Epoch 1, Loss: 1.1180188655853271
Epoch 1, Loss: 1.1720999479293823
Epoch 1, Loss: 1.2311949729919434
Epoch 1, Loss: 0.8016642928123474
Epoch 1, Loss: 1.3805642127990723
Epoch 1, Loss: 0.9212497472763062
Epoch 1, Loss: 1.1573493480682373
Epoch 1, Loss: 1.1153925657272339
Epoch 1, Loss: 0.8781893849372864
Epoch 1, Loss: 0.9925499558448792
Epoch 1, Loss: 0.9872846007347107
Epoch 1, Loss: 1.2269577980041504
Epoch 1, Loss: 1.0368698835372925
Epoch 1, Loss: 0.9110292792320251
Epoch 1, Loss: 1.0320338010787964
Epoch 1, Loss: 1.0306347608566284
Epoch 1, Loss: 1.0

Epoch 1, Loss: 0.9888074398040771
Epoch 1, Loss: 0.8259889483451843
Epoch 1, Loss: 0.843654453754425
Epoch 1, Loss: 1.0074650049209595
Epoch 1, Loss: 0.958469569683075
Epoch 1, Loss: 0.972416877746582
Epoch 1, Loss: 1.070834755897522
Epoch 1, Loss: 0.9505323171615601
Epoch 1, Loss: 1.1050238609313965
Epoch 1, Loss: 0.7680666446685791
Epoch 1, Loss: 1.1507292985916138
Epoch 1, Loss: 0.9686877131462097
Epoch 1, Loss: 0.9584017992019653
Epoch 1, Loss: 1.2455490827560425
Epoch 1, Loss: 1.0088424682617188
Epoch 1, Loss: 1.0003879070281982
Epoch 1, Loss: 0.9307259321212769
Epoch 1, Loss: 0.9016596078872681
Epoch 1, Loss: 1.1326358318328857
Epoch 1, Loss: 1.0510433912277222
Epoch 1, Loss: 0.8966044187545776
Epoch 1, Loss: 0.977003276348114
Epoch 1, Loss: 1.4683992862701416
Epoch 1, Loss: 1.1058359146118164
Epoch 1, Loss: 0.8870081305503845
Epoch 1, Loss: 0.9649162292480469
Epoch 1, Loss: 0.9418430924415588
Epoch 1, Loss: 1.0651559829711914
Epoch 1, Loss: 1.1920582056045532
Epoch 1, Loss: 1.16

In [None]:
# Save trained model
model.save_pretrained("model_weights.pth")