<a href="https://colab.research.google.com/github/edmarRod/autowiki/blob/main/trec_car_training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Get dataset from drive

In [1]:
from google.colab import drive
import json

drive.mount('/content/drive')
path = r"drive/MyDrive/Unicamp/Pos/ia376e_2021S2/autowiki/"

with open(path + 'sample.json', 'r') as f:
  data = json.load(f)

Mounted at /content/drive


# Transform to training dataset

In [2]:
dataset = []
for key, val in data.items():
  try:
    x = key + '[SEP]' + val['abstract']
    y = '[SEP]'.join(val['sections'])
    if y == '':
      raise ValueError()
    dataset.append((x,y))
  except:
    pass

# Training

In [3]:
! pip install sacrebleu
! pip install pytorch-lightning
! pip install transformers
! pip install sentencepiece

Collecting sacrebleu
  Downloading sacrebleu-2.0.0-py3-none-any.whl (90 kB)
[?25l[K     |███▋                            | 10 kB 30.9 MB/s eta 0:00:01[K     |███████▏                        | 20 kB 29.8 MB/s eta 0:00:01[K     |██████████▉                     | 30 kB 18.9 MB/s eta 0:00:01[K     |██████████████▍                 | 40 kB 16.1 MB/s eta 0:00:01[K     |██████████████████              | 51 kB 8.9 MB/s eta 0:00:01[K     |█████████████████████▋          | 61 kB 8.7 MB/s eta 0:00:01[K     |█████████████████████████▎      | 71 kB 9.0 MB/s eta 0:00:01[K     |████████████████████████████▉   | 81 kB 10.0 MB/s eta 0:00:01[K     |████████████████████████████████| 90 kB 6.1 MB/s 
Collecting portalocker
  Downloading portalocker-2.3.2-py2.py3-none-any.whl (15 kB)
Collecting colorama
  Downloading colorama-0.4.4-py2.py3-none-any.whl (16 kB)
Installing collected packages: portalocker, colorama, sacrebleu
Successfully installed colorama-0.4.4 portalocker-2.3.2 sacrebleu-2

In [4]:
# Configurações gerais
model_name = "unicamp-dl/ptt5-small-portuguese-vocab"
batch_size = 64
accumulate_grad_batches = 2
source_max_length = 128
target_max_length = 128
learning_rate = 1e-3

In [5]:
# Importar todos os pacotes de uma só vez para evitar duplicados ao longo do notebook.
import gzip
import nvidia_smi
import os
import pytorch_lightning as pl
import random
import sacrebleu
import torch
import torch.nn.functional as F

from google.colab import drive

from pytorch_lightning.callbacks import ModelCheckpoint

from transformers import T5ForConditionalGeneration
from transformers import T5Tokenizer
from torch.utils.data import DataLoader
from torch.utils.data import Dataset

from typing import Dict
from typing import List
from typing import Tuple

In [6]:
# Important: Fix seeds so we can replicate results
seed = 123
random.seed(seed)
# np.random.seed(seed)
torch.random.manual_seed(seed)
torch.cuda.manual_seed(seed)

In [7]:
print(f"Pytorch Lightning Version: {pl.__version__}")
nvidia_smi.nvmlInit()
handle = nvidia_smi.nvmlDeviceGetHandleByIndex(0)
print(f"Device name: {nvidia_smi.nvmlDeviceGetName(handle)}")

def gpu_usage():
    global handle
    return str(nvidia_smi.nvmlDeviceGetUtilizationRates(handle).gpu) + '%'

Pytorch Lightning Version: 1.5.0
Device name: b'Tesla P100-PCIE-16GB'


In [8]:
random.shuffle(dataset)

x_train = dataset[:18000]
x_val = dataset[18000:]

In [9]:
tokenizer = T5Tokenizer.from_pretrained(model_name)

Downloading:   0%|          | 0.00/738k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/454 [00:00<?, ?B/s]

In [10]:
class MyDataset(Dataset):
    def __init__(self, text_pairs: List[Tuple[str]], tokenizer,
                 source_max_length: int = 32, target_max_length: int = 32):
        self.tokenizer = tokenizer
        self.text_pairs = text_pairs
        self.source_max_length = source_max_length
        self.target_max_length = target_max_length
        
    def __len__(self):
        return len(self.text_pairs)
    
    def __getitem__(self, idx):
        source, target = self.text_pairs[idx]

        # task_prefix = 'translate English to Portuguese: '
        source_tokenizer_output = self.tokenizer(source, truncation=True, padding='max_length', max_length=self.source_max_length, return_tensors='pt')
        target_tokenizer_output = self.tokenizer(target, truncation=True, padding='max_length', max_length=self.target_max_length, return_tensors='pt')

        source_token_ids = source_tokenizer_output['input_ids'].squeeze(0)
        target_token_ids = target_tokenizer_output['input_ids'].squeeze(0)

        source_mask = source_tokenizer_output['attention_mask'].squeeze(0)
        target_mask = target_tokenizer_output['attention_mask'].squeeze(0)

        original_source = source
        original_target = target
        
        return (source_token_ids, source_mask, target_token_ids, target_mask,
                original_source, original_target)

In [11]:
text_pairs = [('we like pizza', 'eu gosto de pizza')]
dataset_debug = MyDataset(
    text_pairs=text_pairs,
    tokenizer=tokenizer,
    source_max_length=source_max_length,
    target_max_length=target_max_length)

dataloader_debug = DataLoader(dataset_debug, batch_size=10, shuffle=True, 
                              num_workers=0)

source_token_ids, source_mask, target_token_ids, target_mask, _, _ = next(iter(dataloader_debug))
print('source_token_ids:\n', source_token_ids)
print('source_mask:\n', source_mask)
print('target_token_ids:\n', target_token_ids)
print('target_mask:\n', target_mask)

print('source_token_ids.shape:', source_token_ids.shape)
print('source_mask.shape:', source_mask.shape)
print('target_token_ids.shape:', target_token_ids.shape)
print('target_mask.shape:', target_mask.shape)

source_token_ids:
 tensor([[  31, 1528, 1079,  634, 1241, 7531,    1,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0]])
source_mask:
 tensor([[1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0,

In [17]:
dataset_train = MyDataset(text_pairs=x_train,
                          tokenizer=tokenizer,
                          source_max_length=source_max_length,
                          target_max_length=target_max_length)

dataset_val = MyDataset(text_pairs=x_val,
                        tokenizer=tokenizer,
                        source_max_length=source_max_length,
                        target_max_length=target_max_length)

# dataset_test = MyDataset(text_pairs=x_test,
#                          tokenizer=tokenizer,
#                          source_max_length=source_max_length,
#                          target_max_length=target_max_length)

train_dataloader = DataLoader(dataset_train, batch_size=batch_size,
                              shuffle=True, num_workers=0)

val_dataloader = DataLoader(dataset_val, batch_size=batch_size, shuffle=False, 
                            num_workers=0)

# test_dataloader = DataLoader(dataset_test, batch_size=batch_size,
#                              shuffle=False, num_workers=0)

#TODO change to real test
test_dataloader = val_dataloader

In [13]:
from sacrebleu.metrics import BLEU

class T5Finetuner(pl.LightningModule):

    def __init__(self, tokenizer, train_dataloader, val_dataloader,
                 test_dataloader, learning_rate, target_max_length=32):
        super(T5Finetuner, self).__init__()
        
        self._train_dataloader = train_dataloader
        self._val_dataloader = val_dataloader
        self._test_dataloader = test_dataloader

        self.model = T5ForConditionalGeneration.from_pretrained(model_name)
        
        self.tokenizer = tokenizer
        self.learning_rate = learning_rate
        self.target_max_length = target_max_length

        self.bleu = BLEU()

        self.debug=True

    def forward(self, source_token_ids, source_mask, target_token_ids=None,
                target_mask=None):

        if self.training:
            loss = self.model(input_ids=source_token_ids, attention_mask=source_mask, labels=target_token_ids).loss
            return loss
        else:
            generated_ids = self.model.generate(input_ids=source_token_ids, attention_mask=source_mask, max_length=self.target_max_length)
            return generated_ids

    def training_step(self, batch, batch_nb):
        source_token_ids, source_mask, target_token_ids, target_mask, _, _ = batch
         
        # fwd
        loss = self(source_token_ids, source_mask, target_token_ids, target_mask)

        # logs
        tensorboard_logs = {'train_loss': loss}
        progress_bar = {'gpu_usage': gpu_usage()}
        return {'loss': loss, 'log': tensorboard_logs,
                'progress_bar': progress_bar}

    def validation_step(self, batch, batch_nb):
        avg_bleu = self.get_bleu(batch, batch_nb)
        return {'val_bleu': avg_bleu}

    def test_step(self, batch, batch_nb):
        avg_bleu = self.get_bleu(batch, batch_nb)
        return {'test_bleu': avg_bleu}

    def get_bleu(self, batch, batch_nb):
      source_token_ids, source_mask, target_token_ids, target_mask, original_source, original_target = batch

      generated_ids = self(source_token_ids, source_mask, target_token_ids, target_mask)

      output_seq = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)

      avg_bleu = self.bleu.corpus_score(output_seq, [original_target]).score

      if self.debug:
        print(f"""Source: {original_source[:1]},
              Target: {original_target[:1]},
              Predicted: {output_seq[:1]}""")

      return avg_bleu


    def validation_epoch_end(self, outputs):
        avg_bleu = sum([x['val_bleu'] for x in outputs]) / len(outputs)

        self.log("avg_val_bleu", avg_bleu, prog_bar=True)

    def test_epoch_end(self, outputs):
        avg_bleu = sum([x['test_bleu'] for x in outputs]) / len(outputs)

        self.log("avg_test_bleu", avg_bleu, prog_bar=True)
    
    def configure_optimizers(self):
        optimizer = torch.optim.Adam(
            [p for p in self.parameters() if p.requires_grad],
            lr=self.learning_rate, eps=1e-08)
        scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1000, gamma=1.0)  # This is the same as no LR decay.
        return {'optimizer': optimizer, 'lr_scheduler': scheduler, 'monitor': 'avg_val_bleu'}

    def train_dataloader(self):
        return self._train_dataloader

    def val_dataloader(self):
        return self._val_dataloader

    def test_dataloader(self):
        return self._test_dataloader

In [21]:
model = T5Finetuner(tokenizer=tokenizer,
                    train_dataloader=train_dataloader,
                    val_dataloader=val_dataloader,
                    test_dataloader=test_dataloader,
                    learning_rate=learning_rate, 
                    target_max_length=target_max_length)

In [22]:
trainer = pl.Trainer(gpus=1,
                     precision=16, 
                     checkpoint_callback=False,  # Disable checkpoint saving.
                     fast_dev_run=True)
trainer.fit(model)
trainer.test(model)
del model  # Para não ter estouro de mémoria da GPU

Using 16bit native Automatic Mixed Precision (AMP)
  f"Setting `Trainer(checkpoint_callback={checkpoint_callback})` is deprecated in v1.5 and will "
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
Running in fast_dev_run mode: will run a full train, val, test and prediction loop using 1 batch(es).
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type                       | Params
-----------------------------------------------------
0 | model | T5ForConditionalGeneration | 60.5 M
-----------------------------------------------------
60.5 M    Trainable params
0         Non-trainable params
60.5 M    Total params
121.013   Total estimated model params size (MB)
  f"The number of training samples ({self.num_training_batches}) is smaller than the logging interval"


Training: 0it [00:00, ?it/s]

  f"One of the returned values {set(extra.keys())} has a `grad_fn`. We will detach it automatically"


Validating: 0it [00:00, ?it/s]

Source: ('SR U class[SEP]The SR U class were 2-6-0 steam locomotives designed by Richard Maunsell for passenger duties on the Southern Railway (SR). The class represented the penultimate stage in the development of the Southern Railway\'s 2-6-0 "family", which improved upon the basic principles established by GWR Chief Mechanical Engineer (CME) George Jackson Churchward for Great Western Railway (GWR) locomotives. The U class design drew from experience with the GWR 4300s and N classes, improved by applying Midland Railway ideas to the design, enabling the SECR to influence development of the 2-6-0 in Britain.The U class was designed in the mid-1920s for production at a time when more obsolete 4-4-0 locomotives were withdrawn, and derived from Maunsell\'s earlier SECR K (“River”) class 2-6-4 tank locomotives. The first 20\xa0members of the U class were rebuilds of the K class locomotives, one of which was involved in the Sevenoaks railway accident. A further 20\xa0U class locomotives w

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

Source: ('SR U class[SEP]The SR U class were 2-6-0 steam locomotives designed by Richard Maunsell for passenger duties on the Southern Railway (SR). The class represented the penultimate stage in the development of the Southern Railway\'s 2-6-0 "family", which improved upon the basic principles established by GWR Chief Mechanical Engineer (CME) George Jackson Churchward for Great Western Railway (GWR) locomotives. The U class design drew from experience with the GWR 4300s and N classes, improved by applying Midland Railway ideas to the design, enabling the SECR to influence development of the 2-6-0 in Britain.The U class was designed in the mid-1920s for production at a time when more obsolete 4-4-0 locomotives were withdrawn, and derived from Maunsell\'s earlier SECR K (“River”) class 2-6-4 tank locomotives. The first 20\xa0members of the U class were rebuilds of the K class locomotives, one of which was involved in the Sevenoaks railway accident. A further 20\xa0U class locomotives w

In [23]:
trainer = pl.Trainer(gpus=1,
                     precision=16,
                     max_epochs=30,
                     check_val_every_n_epoch=10,
                     checkpoint_callback=False,  # Disable checkpoint saving
                     overfit_batches=0.005)

# Dataset usando apenas um batch de amostras de treino.
dataset_debug = MyDataset(text_pairs=x_train,
                          tokenizer=tokenizer,
                          source_max_length=source_max_length,
                          target_max_length=target_max_length)

debug_dataloader = DataLoader(dataset_debug, batch_size=batch_size,
                              shuffle=False, num_workers=0)

model = T5Finetuner(tokenizer=tokenizer,
                    train_dataloader=debug_dataloader,
                    val_dataloader=debug_dataloader,
                    test_dataloader=None,
                    learning_rate=learning_rate, 
                    target_max_length=target_max_length)

trainer.fit(model)
del model  # Para não ter estouro de mémoria da GPU

Using 16bit native Automatic Mixed Precision (AMP)
  f"Setting `Trainer(checkpoint_callback={checkpoint_callback})` is deprecated in v1.5 and will "
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type                       | Params
-----------------------------------------------------
0 | model | T5ForConditionalGeneration | 60.5 M
-----------------------------------------------------
60.5 M    Trainable params
0         Non-trainable params
60.5 M    Total params
121.013   Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

Source: ('Barium[SEP]Barium is a chemical element with the symbol\xa0Ba and atomic number\xa056. It is the fifth element in group 2 and is a soft, silvery alkaline earth metal. Because of its high chemical reactivity, barium is never found in nature as a free element.The most common minerals of barium are baryte (barium sulfate, BaSO4) and witherite (barium carbonate, BaCO3). The name barium originates from the alchemical derivative "baryta", from Greek βαρὺς (barys), meaning "heavy". Baric is the adjectival form of barium. Barium was identified as a new element in 1774, but not reduced to a metal until 1808 with the advent of electrolysis.Barium has few industrial applications. Historically, it was used as a getter for vacuum tubes and in oxide form as the emissive coating on indirectly heated cathodes. It is a component of YBCO (high-temperature superconductors) and electroceramics, and is added to steel and cast iron to reduce the size of carbon grains within the microstructure. Bar

  f"The number of training samples ({self.num_training_batches}) is smaller than the logging interval"


Training: 0it [00:00, ?it/s]

  f"One of the returned values {set(extra.keys())} has a `grad_fn`. We will detach it automatically"


Validating: 0it [00:00, ?it/s]

Source: ('Barium[SEP]Barium is a chemical element with the symbol\xa0Ba and atomic number\xa056. It is the fifth element in group 2 and is a soft, silvery alkaline earth metal. Because of its high chemical reactivity, barium is never found in nature as a free element.The most common minerals of barium are baryte (barium sulfate, BaSO4) and witherite (barium carbonate, BaCO3). The name barium originates from the alchemical derivative "baryta", from Greek βαρὺς (barys), meaning "heavy". Baric is the adjectival form of barium. Barium was identified as a new element in 1774, but not reduced to a metal until 1808 with the advent of electrolysis.Barium has few industrial applications. Historically, it was used as a getter for vacuum tubes and in oxide form as the emissive coating on indirectly heated cathodes. It is a component of YBCO (high-temperature superconductors) and electroceramics, and is added to steel and cast iron to reduce the size of carbon grains within the microstructure. Bar

Validating: 0it [00:00, ?it/s]

Source: ('Barium[SEP]Barium is a chemical element with the symbol\xa0Ba and atomic number\xa056. It is the fifth element in group 2 and is a soft, silvery alkaline earth metal. Because of its high chemical reactivity, barium is never found in nature as a free element.The most common minerals of barium are baryte (barium sulfate, BaSO4) and witherite (barium carbonate, BaCO3). The name barium originates from the alchemical derivative "baryta", from Greek βαρὺς (barys), meaning "heavy". Baric is the adjectival form of barium. Barium was identified as a new element in 1774, but not reduced to a metal until 1808 with the advent of electrolysis.Barium has few industrial applications. Historically, it was used as a getter for vacuum tubes and in oxide form as the emissive coating on indirectly heated cathodes. It is a component of YBCO (high-temperature superconductors) and electroceramics, and is added to steel and cast iron to reduce the size of carbon grains within the microstructure. Bar

Validating: 0it [00:00, ?it/s]

Source: ('Barium[SEP]Barium is a chemical element with the symbol\xa0Ba and atomic number\xa056. It is the fifth element in group 2 and is a soft, silvery alkaline earth metal. Because of its high chemical reactivity, barium is never found in nature as a free element.The most common minerals of barium are baryte (barium sulfate, BaSO4) and witherite (barium carbonate, BaCO3). The name barium originates from the alchemical derivative "baryta", from Greek βαρὺς (barys), meaning "heavy". Baric is the adjectival form of barium. Barium was identified as a new element in 1774, but not reduced to a metal until 1808 with the advent of electrolysis.Barium has few industrial applications. Historically, it was used as a getter for vacuum tubes and in oxide form as the emissive coating on indirectly heated cathodes. It is a component of YBCO (high-temperature superconductors) and electroceramics, and is added to steel and cast iron to reduce the size of carbon grains within the microstructure. Bar

In [25]:
max_epochs = 10

checkpoint_path = r"drive/MyDrive/Unicamp/Pos/ia376e_2021S2/autowiki/checkpoints.ckpt"
checkpoint_dir = os.path.dirname(os.path.abspath(checkpoint_path))
os.makedirs(checkpoint_dir, exist_ok=True)
print(f'Files in {checkpoint_dir}: {os.listdir(checkpoint_dir)}')
print(f'Saving checkpoints to {checkpoint_dir}')
checkpoint_callback = ModelCheckpoint(dirpath=checkpoint_dir,
                                      save_top_k=1)  # Keeps all checkpoints.

resume_from_checkpoint = None
if os.path.exists(checkpoint_path):
    print(f'Restoring checkpoint: {checkpoint_path}')
    resume_from_checkpoint = checkpoint_path

trainer = pl.Trainer(gpus=1,
                     precision=16,
                     max_epochs=max_epochs,
                     check_val_every_n_epoch=1,
                     accumulate_grad_batches=accumulate_grad_batches,
                     callbacks=[checkpoint_callback],
                     progress_bar_refresh_rate=50,
                     resume_from_checkpoint=resume_from_checkpoint)

model = T5Finetuner(tokenizer=tokenizer,
                    train_dataloader=train_dataloader,
                    val_dataloader=val_dataloader,
                    test_dataloader=test_dataloader,
                    learning_rate=learning_rate, 
                    target_max_length=target_max_length)

trainer.fit(model)

Using 16bit native Automatic Mixed Precision (AMP)
  f"Setting `Trainer(progress_bar_refresh_rate={progress_bar_refresh_rate})` is deprecated in v1.5 and"
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


Files in /content/drive/MyDrive/Unicamp/Pos/ia376e_2021S2/autowiki: ['sample.json']
Saving checkpoints to /content/drive/MyDrive/Unicamp/Pos/ia376e_2021S2/autowiki


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type                       | Params
-----------------------------------------------------
0 | model | T5ForConditionalGeneration | 60.5 M
-----------------------------------------------------
60.5 M    Trainable params
0         Non-trainable params
60.5 M    Total params
121.013   Total estimated model params size (MB)
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")


Validation sanity check: 0it [00:00, ?it/s]

Source: ('SR U class[SEP]The SR U class were 2-6-0 steam locomotives designed by Richard Maunsell for passenger duties on the Southern Railway (SR). The class represented the penultimate stage in the development of the Southern Railway\'s 2-6-0 "family", which improved upon the basic principles established by GWR Chief Mechanical Engineer (CME) George Jackson Churchward for Great Western Railway (GWR) locomotives. The U class design drew from experience with the GWR 4300s and N classes, improved by applying Midland Railway ideas to the design, enabling the SECR to influence development of the 2-6-0 in Britain.The U class was designed in the mid-1920s for production at a time when more obsolete 4-4-0 locomotives were withdrawn, and derived from Maunsell\'s earlier SECR K (“River”) class 2-6-4 tank locomotives. The first 20\xa0members of the U class were rebuilds of the K class locomotives, one of which was involved in the Sevenoaks railway accident. A further 20\xa0U class locomotives w

Training: 0it [00:00, ?it/s]

  f"One of the returned values {set(extra.keys())} has a `grad_fn`. We will detach it automatically"
  "Trying to infer the `batch_size` from an ambiguous collection. The batch size we"


Validating: 0it [00:00, ?it/s]

Source: ('SR U class[SEP]The SR U class were 2-6-0 steam locomotives designed by Richard Maunsell for passenger duties on the Southern Railway (SR). The class represented the penultimate stage in the development of the Southern Railway\'s 2-6-0 "family", which improved upon the basic principles established by GWR Chief Mechanical Engineer (CME) George Jackson Churchward for Great Western Railway (GWR) locomotives. The U class design drew from experience with the GWR 4300s and N classes, improved by applying Midland Railway ideas to the design, enabling the SECR to influence development of the 2-6-0 in Britain.The U class was designed in the mid-1920s for production at a time when more obsolete 4-4-0 locomotives were withdrawn, and derived from Maunsell\'s earlier SECR K (“River”) class 2-6-4 tank locomotives. The first 20\xa0members of the U class were rebuilds of the K class locomotives, one of which was involved in the Sevenoaks railway accident. A further 20\xa0U class locomotives w

  "Trying to infer the `batch_size` from an ambiguous collection. The batch size we"


Source: ('Kinolhas (Raa Atoll)[SEP]Kinolhas (Dhivehi: ކިނޮޅަސް) is one of the inhabited islands of the Raa Atoll administrative division of the Maldives.',),
              Target: ('Education[SEP]Health care[SEP]Economy[SEP]Crime[SEP]NGOs[SEP]Migration',),
              Predicted: ['History[SEP]History[SEP]Economy[SEP]Education[SEP]Education[SEP]Education[SEP]Education[SEP]Education[SEP]Education[SEP]Education[SEP]Education[SEP]Education[SEP]Education[SEP]Education[SEP]Education[SEP]Education[SEP]Education[SEP]Education[SEP]Educ']


Validating: 0it [00:00, ?it/s]

Source: ('SR U class[SEP]The SR U class were 2-6-0 steam locomotives designed by Richard Maunsell for passenger duties on the Southern Railway (SR). The class represented the penultimate stage in the development of the Southern Railway\'s 2-6-0 "family", which improved upon the basic principles established by GWR Chief Mechanical Engineer (CME) George Jackson Churchward for Great Western Railway (GWR) locomotives. The U class design drew from experience with the GWR 4300s and N classes, improved by applying Midland Railway ideas to the design, enabling the SECR to influence development of the 2-6-0 in Britain.The U class was designed in the mid-1920s for production at a time when more obsolete 4-4-0 locomotives were withdrawn, and derived from Maunsell\'s earlier SECR K (“River”) class 2-6-4 tank locomotives. The first 20\xa0members of the U class were rebuilds of the K class locomotives, one of which was involved in the Sevenoaks railway accident. A further 20\xa0U class locomotives w

Validating: 0it [00:00, ?it/s]

Source: ('SR U class[SEP]The SR U class were 2-6-0 steam locomotives designed by Richard Maunsell for passenger duties on the Southern Railway (SR). The class represented the penultimate stage in the development of the Southern Railway\'s 2-6-0 "family", which improved upon the basic principles established by GWR Chief Mechanical Engineer (CME) George Jackson Churchward for Great Western Railway (GWR) locomotives. The U class design drew from experience with the GWR 4300s and N classes, improved by applying Midland Railway ideas to the design, enabling the SECR to influence development of the 2-6-0 in Britain.The U class was designed in the mid-1920s for production at a time when more obsolete 4-4-0 locomotives were withdrawn, and derived from Maunsell\'s earlier SECR K (“River”) class 2-6-4 tank locomotives. The first 20\xa0members of the U class were rebuilds of the K class locomotives, one of which was involved in the Sevenoaks railway accident. A further 20\xa0U class locomotives w

Validating: 0it [00:00, ?it/s]

Source: ('SR U class[SEP]The SR U class were 2-6-0 steam locomotives designed by Richard Maunsell for passenger duties on the Southern Railway (SR). The class represented the penultimate stage in the development of the Southern Railway\'s 2-6-0 "family", which improved upon the basic principles established by GWR Chief Mechanical Engineer (CME) George Jackson Churchward for Great Western Railway (GWR) locomotives. The U class design drew from experience with the GWR 4300s and N classes, improved by applying Midland Railway ideas to the design, enabling the SECR to influence development of the 2-6-0 in Britain.The U class was designed in the mid-1920s for production at a time when more obsolete 4-4-0 locomotives were withdrawn, and derived from Maunsell\'s earlier SECR K (“River”) class 2-6-4 tank locomotives. The first 20\xa0members of the U class were rebuilds of the K class locomotives, one of which was involved in the Sevenoaks railway accident. A further 20\xa0U class locomotives w

Validating: 0it [00:00, ?it/s]

Source: ('SR U class[SEP]The SR U class were 2-6-0 steam locomotives designed by Richard Maunsell for passenger duties on the Southern Railway (SR). The class represented the penultimate stage in the development of the Southern Railway\'s 2-6-0 "family", which improved upon the basic principles established by GWR Chief Mechanical Engineer (CME) George Jackson Churchward for Great Western Railway (GWR) locomotives. The U class design drew from experience with the GWR 4300s and N classes, improved by applying Midland Railway ideas to the design, enabling the SECR to influence development of the 2-6-0 in Britain.The U class was designed in the mid-1920s for production at a time when more obsolete 4-4-0 locomotives were withdrawn, and derived from Maunsell\'s earlier SECR K (“River”) class 2-6-4 tank locomotives. The first 20\xa0members of the U class were rebuilds of the K class locomotives, one of which was involved in the Sevenoaks railway accident. A further 20\xa0U class locomotives w

In [26]:
trainer.test(model)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

Source: ('SR U class[SEP]The SR U class were 2-6-0 steam locomotives designed by Richard Maunsell for passenger duties on the Southern Railway (SR). The class represented the penultimate stage in the development of the Southern Railway\'s 2-6-0 "family", which improved upon the basic principles established by GWR Chief Mechanical Engineer (CME) George Jackson Churchward for Great Western Railway (GWR) locomotives. The U class design drew from experience with the GWR 4300s and N classes, improved by applying Midland Railway ideas to the design, enabling the SECR to influence development of the 2-6-0 in Britain.The U class was designed in the mid-1920s for production at a time when more obsolete 4-4-0 locomotives were withdrawn, and derived from Maunsell\'s earlier SECR K (“River”) class 2-6-4 tank locomotives. The first 20\xa0members of the U class were rebuilds of the K class locomotives, one of which was involved in the Sevenoaks railway accident. A further 20\xa0U class locomotives w

[{'avg_test_bleu': 9.603736877441406}]