In [1]:
!pip install bert_score
!pip install rouge_score
!pip install datasets

import torch
from transformers import T5ForConditionalGeneration, T5Tokenizer, DataCollatorForSeq2Seq, Seq2SeqTrainingArguments, Seq2SeqTrainer

from datasets import load_metric, Dataset
metric = load_metric("rouge")
from bert_score import score

import pandas as pd
import numpy as np
import nltk

Collecting bert_score
  Downloading bert_score-0.3.13-py3-none-any.whl.metadata (15 kB)
Downloading bert_score-0.3.13-py3-none-any.whl (61 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.1/61.1 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: bert_score
Successfully installed bert_score-0.3.13
Collecting rouge_score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25ldone
Building wheels for collected packages: rouge_score
  Building wheel for rouge_score (setup.py) ... [?25ldone
[?25h  Created wheel for rouge_score: filename=rouge_score-0.1.2-py3-none-any.whl size=24934 sha256=904929d58c420fd04e3aa5fb4e726ddf236bd7b9a207e1a967f5839ed1442c29
  Stored in directory: /root/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4
Successfully built rouge_score
Installing collected packages: rouge_score
Successfully installed rouge_score-0.1.2


2024-05-19 14:53:44.234526: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-05-19 14:53:44.234628: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-05-19 14:53:44.374791: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
  metric = load_metric("rouge")
You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this metric from the next major release of `datasets`.


Downloading builder script:   0%|          | 0.00/2.17k [00:00<?, ?B/s]

In [2]:
tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-small")
model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small")

tokenizer_config.json:   0%|          | 0.00/2.32k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/242M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

In [3]:
MIN_ALLOWED_SEQUENCE = 30
MAX_ALLOWED_SEQUENCE = 2048
BATCH_SIZE = 1
ACCUMULATION_STEPS = 4
LEARNING_RATE = 1e-6
EPOCHS = 20

data_collator = DataCollatorForSeq2Seq(tokenizer)

In [4]:
model.config.max_length = MAX_ALLOWED_SEQUENCE

In [5]:
def preprocess_function(examples):
    references = ["summarize: " + ref for ref in examples["reference"]]
    
    inputs = tokenizer(references, truncation=True, max_length=MAX_ALLOWED_SEQUENCE)
    targets = tokenizer(examples["summary"], truncation=True, max_length=MAX_ALLOWED_SEQUENCE)

    # Update examples with tokenized inputs and targets
    return {"input_ids": inputs.input_ids, "attention_mask": inputs.attention_mask, "labels": targets.input_ids}

In [6]:
train_df = pd.read_csv("train_processed.csv")
valid_df = pd.read_csv("validation_processed.csv")

train_df = train_df[train_df['reference_tokens_preprocessed'] < MAX_ALLOWED_SEQUENCE].reset_index(drop=True)
valid_df = valid_df[valid_df['reference_tokens_preprocessed'] < MAX_ALLOWED_SEQUENCE].reset_index(drop=True)

train_dataset = Dataset.from_pandas(train_df)
valid_dataset = Dataset.from_pandas(valid_df)

train_dataset = train_dataset.map(preprocess_function, batched=True)
valid_dataset = valid_dataset.map(preprocess_function, batched=True)

Map:   0%|          | 0/438 [00:00<?, ? examples/s]

Map:   0%|          | 0/14 [00:00<?, ? examples/s]

In [7]:
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
    
    # Replace -100 in the labels as we can't decode them.
    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
    
    # Rouge expects a newline after each sentence
    decoded_preds = ["\n".join(nltk.sent_tokenize(pred.strip())) for pred in decoded_preds]
    decoded_labels = ["\n".join(nltk.sent_tokenize(label.strip())) for label in decoded_labels]
    
    # Calculate ROUGE score
    rouge_result = metric.compute(predictions=decoded_preds, references=decoded_labels, use_stemmer=True)
    rouge_result = {key: value.mid.fmeasure * 100 for key, value in rouge_result.items()}
    
    # Calculate BERTScore
    P, R, F1 = score(decoded_preds, decoded_labels, lang='en', verbose=False)
    bertscore_result = {
        "bert_precision": P.mean().item() * 100,
        "bert_recall": R.mean().item() * 100,
        "bert_f1": F1.mean().item() * 100
    }
    
    # Calculate average prediction length
    prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in predictions]
    gen_len = np.mean(prediction_lens)
    
    result = {**rouge_result, **bertscore_result, "gen_len": gen_len}
    
    return {k: round(v, 4) for k, v in result.items()}

In [8]:
args = Seq2SeqTrainingArguments(
    output_dir="./my_fine_tuned_t5_small_model",
    overwrite_output_dir=True,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=LEARNING_RATE,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=BATCH_SIZE,
    weight_decay=0.01,
    num_train_epochs=EPOCHS,
    predict_with_generate=True,
    gradient_accumulation_steps=ACCUMULATION_STEPS,
    eval_accumulation_steps=ACCUMULATION_STEPS,
    report_to="none"
)

trainer = Seq2SeqTrainer(
    model=model,
    args=args,
    train_dataset=train_dataset,
    eval_dataset=valid_dataset,
    data_collator=data_collator,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


In [9]:
trainer.train()

Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Bert Precision,Bert Recall,Bert F1,Gen Len
0,No log,7.535876,12.2895,5.1449,8.9611,11.5133,85.387,78.2573,81.6565,76.5714
2,No log,4.462596,12.8383,5.1543,9.3347,12.0832,85.051,78.2721,81.5123,82.2143
4,6.610200,3.954201,13.4787,6.0715,9.8879,12.6272,85.453,78.5332,81.8389,84.2143
6,6.610200,3.715705,13.4456,5.3659,9.6541,12.4933,84.434,78.1274,81.1458,90.5
8,6.610200,3.596324,13.2509,5.3275,9.368,12.3065,84.6207,78.2216,81.2872,86.3571
10,4.256000,3.522705,14.125,6.0345,10.2565,13.1485,84.6827,78.4354,81.4308,92.8571
12,4.256000,3.475759,14.2918,5.9189,10.0975,13.1645,84.5974,78.43,81.3874,94.8571
14,3.996200,3.446184,14.4238,5.9677,10.2074,13.3584,84.5104,78.4294,81.348,95.5714
16,3.996200,3.428485,14.1061,5.7894,10.0648,13.0534,84.4271,78.3508,81.2666,95.2143
18,3.915900,3.420532,14.1292,5.7872,10.0911,13.114,84.4289,78.3524,81.2683,95.2857




tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Non-default generation parameters: {'max_length': 2048}
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Non-default generation parameters: {'max_length': 2048}
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Non-default 

TrainOutput(global_step=2180, training_loss=4.62855342200043, metrics={'train_runtime': 2150.1089, 'train_samples_per_second': 4.074, 'train_steps_per_second': 1.014, 'total_flos': 2873064045969408.0, 'train_loss': 4.62855342200043, 'epoch': 19.91})

In [10]:
test_df = pd.read_csv("test_processed.csv")
test_df = test_df[test_df['reference_tokens_preprocessed'] < MAX_ALLOWED_SEQUENCE].reset_index(drop=True)

In [15]:
trained_results = []
device = torch.device("cuda")
model_trained = T5ForConditionalGeneration.from_pretrained("./my_fine_tuned_t5_small_model/checkpoint-2080").to(device)

#MIN_ALLOWED_SEQUENCE = 30
#MAX_ALLOWED_SEQUENCE = 2048
NUM_BEAMS = 4

for index, row in test_df.iterrows():
    inputs = tokenizer("summarize: " + row["reference"], max_length=MAX_ALLOWED_SEQUENCE, truncation=True, return_tensors="pt").to(device)
    outputs = model_trained.generate(**inputs, min_length=MIN_ALLOWED_SEQUENCE, max_length=MAX_ALLOWED_SEQUENCE,\
                                        num_beams=NUM_BEAMS, early_stopping=True)
    result_summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
    print(result_summary)

    P, R, F1 = score([result_summary], [row["summary"]], lang='en', verbose=False)
    print(f"T5 BertScore F1: {F1.item():.2f}")
    trained_results.append(F1.item())
    torch.cuda.empty_cache()
    
np.mean(trained_results) 

the commission ( eurostat ) shall adopt delegated act in accordance with the principle laid down in the interinstitutional agreement of 13 april 2016 on better law-making. the commission ( eurostat ) shall establish a formal expert group, composed of representative of all the member state and chaired by a representative of the commission ( eurostat ). the commission ( eurostat ) shall establish a formal expert group, composed of representative of all the member state and chaired by


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


T5 BertScore F1: 0.80
article 1 subject matter the year 2021 shall be designated a ‘ european year of rail ’. the specific objective of the european year shall be to encourage and support the effort of the union, member state, regional and local authority, and other organisation to increase the share of passenger and freight moving by rail. the commission shall regularly convene meeting of the national contact person in order to coordinate the running of the european year.


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


T5 BertScore F1: 0.84
the european union recovery instrument ( the ‘ instrument ’ ) shall be carried out under specific union programme and in accordance with the objective of the instrument. the instrument shall be financed up to an amount of eur 750 000 million in 2018 price on the basis of the empowerment provided for in article 5 of the own resource decision. the measure shall be carried out under specific union programme and in accordance with the relevant union act laying down rule for those programme.


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


T5 BertScore F1: 0.81
article 1 subject matter this regulation establishes common rule for the decennial provision of comprehensive data on population and housing. article 2 definition for the purpose of this regulation establishes common rule for the decennial provision of comprehensive data on population and housing. article 2 definition for the purpose of this regulation establishes common rule for the decennial provision of comprehensive data on population and housing.


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


T5 BertScore F1: 0.80
article 1 the strategic innovation agenda of the european institute of innovation and technology for the period from 2021 to 2027 ( sia 2021-2027 ) a set out in the annex is hereby adopted. article 3 decision no 1312/2013/eu is repealed with effect from 1 january 2021.


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


T5 BertScore F1: 0.82
the commission ( eurostat ) shall submit a report on the implementation of the regulation ( ec ) no 1107/2009. the commission ( eurostat ) shall adopt the definition of the ‘ area treated ’ a referred to in section 2 of regulation ( ec ) no 1107/2009. the commission ( eurostat ) shall submit a report on the implementation of the regulation ( ec ) no 223/2009.


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


T5 BertScore F1: 0.75
the measure referred to in article 4 shall be adopted in respect of country allowing non-sustainable fishing. the commission shall provide the country concerned with a reasonable opportunity to respond to the notification in writing and to remedy the situation within one month of receiving that notification. the measure referred to in article 4 shall be a framework for the adoption of certain measure regarding the conservation and management of the stock of common interest to the union and those third country. the measure referred to in article 4 shall provide for an appropriate system for their enforcement by


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


T5 BertScore F1: 0.82
the fado system may contain information on authentic document issued by member state, the union and third party, such a third country, territorial entity, international organisation and other entity subject to international law, and on false version thereof. the fado system may contain information on travel, identity, residence and civil status document, driving licence and vehicle licence issued by member state, such a third country, territorial entity, international organisation and other entity subject to international law, and


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


T5 BertScore F1: 0.83
an authorised entity established in a member state carrying out the act referred to in article 3 and 4 shall establish and follow it own practice to ensure that it distributes, communicates and make available accessible format copy only to beneficiary person or other authorised entity. an authorised entity established in a member state carrying out the act referred to in point 3 and 4 shall establish and follow it own practice to ensure that it : ( a ) distributes, communicates and make available accessible format copy only to beneficiary person or


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


T5 BertScore F1: 0.79
article 1 definition for the purpose of this regulation shall apply : ( 1 ) ‘ circulation coin ’ mean euro coin intended for circulation, the denomination and technical specification of which are laid down in regulation ( ec ) no 975/98 ; ( 2 ) ‘ commemorative coin ’ mean euro coin intended for collection that are not issued with a view to their entry into circulation. the commission shall conduct an impact assessment on the continued issuance of 1- and 2-cent coin. each member state whose currency is


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


T5 BertScore F1: 0.82
the commission ( eurostat ) shall provide the commission ( eurostat ) with data on their population and vital event referred to in paragraph 1 ( ec ) no 223/2009 ( ec ) no 223/2009 ( ec ) no 223/2009 ( ec ) no 223/2009 ( ec ) no 223/2009 ( ec ) no 223/2009 ( ec ) no 223/2009 ( ec ) no 223/2009 ( e


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


T5 BertScore F1: 0.78
this regulation lay down the community regime relating to liability and insurance for the carriage of passenger by sea a set out in annex ii. the commission shall, if appropriate, present a legislative proposal in order, inter alia, to extend the scope of this regulation to ship of class a and b. the liability regime in respect of passenger, their luggage and their vehicle shall be governed only by article 3 ( 3 ) of the athens convention. the commission shall


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


T5 BertScore F1: 0.81
this regulation establishes a common framework for the systematic production of community statistic on public health and health and safety at work. the statistic shall include, in the form of a harmonised and common data set, information required for community action in the field of public health, for supporting national strategy for the development of high-quality, universally accessible and sustainable health care a well a for community action in the field of health and safety at work. the commission ( eurostat ) shall prepare a report


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


T5 BertScore F1: 0.82
the commission ( eurostat ) shall provide data on job vacancy at least for business unit with one employee or more. the data shall cover all economic activity defined by the common classification system for economic activity in the community ( nace in force ), except for the activity of household a employer and the activity of extraterritorial organisation and body. the data shall be broken down by economic activity in accordance with the nace in force at section level.


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


T5 BertScore F1: 0.86
a cohesion fund is hereby established for the purpose of strengthening the economic, social and territorial cohesion of the union in the interest of promoting sustainable development. the cohesion fund shall, while ensuring an appropriate balance and according to the investment and infrastructure need specific to each member state, support : (a ) investment in the environment, including area related to sustainable development and energy which present environmental benefit ; ( b ) investment in the water sector unless related to the promotion of energy efficiency


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


T5 BertScore F1: 0.82
have adopted this regulation : article 1 subject matter this regulation establishes a common framework for the systematic production of community statistic in the field of education and lifelong learning. article 2 definition for the purpose of this regulation ( ec ) no 322/97 ; ( b ) ‘ production of statistic ’ shall be defined a in the second indent of article 2 of regulation ( ec ) no 322/97 ; ( c ) ‘ national authority ’ shall be defined a in


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


T5 BertScore F1: 0.82
this regulation establishes a common framework for the production, transmission, evaluation and dissemination of comparable energy statistic in the community. this regulation establishes a common framework for the production, transmission, evaluation and dissemination of comparable energy statistic in the community. the commission ( eurostat ) shall present and disseminate the national statistic referred to in article 11 ( 2 ).


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


T5 BertScore F1: 0.82
have adopted this regulation : article 1 obligation of the member state member state shall submit to the commission statistic on all the aquaculture activity conducted in freshwater and saltwater on their territory. article 2 definition 1. for the purpose of this regulation, the following definition shall apply : ( a ) ‘ community statistic ’ a defined in article 2 of regulation ( ec ) no 322/97 ; ( b ) ‘ capture-based aquaculture ’ mean the practice of collecting specimen from the wild and their subsequent


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


T5 BertScore F1: 0.82


0.8139538235134549

In [12]:
del model_trained
untrained_results = []
device = torch.device("cuda")
model_untrained = T5ForConditionalGeneration.from_pretrained("google-t5/t5-large").to(device)
tokenizer = T5Tokenizer.from_pretrained("google-t5/t5-large")

for index, row in test_df.iterrows():
    inputs = tokenizer("summarize: " + row["reference"], max_length=MAX_ALLOWED_SEQUENCE, truncation=True, return_tensors="pt").to(device)
    outputs = model_untrained.generate(**inputs, min_length=MIN_ALLOWED_SEQUENCE, max_length=MAX_ALLOWED_SEQUENCE,\
                                        num_beams=NUM_BEAMS, early_stopping=True)
    result_summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
    print(result_summary)

    P, R, F1 = score([result_summary], [row["summary"]], lang='en', verbose=False)
    print(f"T5 BertScore F1: {F1.item():.2f}")
    untrained_results.append(F1.item())
    torch.cuda.empty_cache()
    
np.mean(untrained_results)   

config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.95G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.
- Be aware that you SHOULD NOT rely on google-t5/t5-large automatically truncating your input to 512 when padding/encoding.
- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


gross national income at market price ( gni ) and gross domestic product at market price ( gdp ) shall be defined in accordance with the european system of account 2010 ( esa 2010 ) established by regulation ( eu ) no 549/2013. member state shall calculate gni in the context of national accounting procedure.


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


T5 BertScore F1: 0.85
the year 2021 shall be designated a the ‘ european year of rail’. the general objective of the european year shall be to encourage and support the effort of the union, member state, regional and local authority. by 31 march 2021, the commission shall inform the european parliament and the council of it plan.


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


T5 BertScore F1: 0.83
support under the instrument shall in particular finance measures to tackle the adverse economic consequence of the covid-19 crisis. legal commitment of at least 60 % of the amount referred to in point ( a ) of article 2 ( 2 ) shall be entered into by 31 december 2022. decision on the granting of the loan referred to in point ( b ) of article 2 ( 2 ) shall be adopted by 31 december 2023.


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


T5 BertScore F1: 0.79
member state shall submit to the commission ( eurostat ) data on the population covering determined demographic, social and economic characteristic. reference date shall fall in a year specified on the basis of this regulation. member state shall provide the commission ( eurostat ) with a report on the quality of the data transmitted.


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


T5 BertScore F1: 0.84
sia 2021-2027 shall be implemented in accordance with regulation ( eu ) 2021/819. decision no 1312/2013/eu is repealed with effect from 1 january 2021.


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


T5 BertScore F1: 0.80
this regulation establishes a common framework for the systematic production of community statistic on the placing on the market and use of those pesticide. statistic shall apply to : — the annual amount of pesticide placed on the market in accordance with annex i.


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


T5 BertScore F1: 0.74
measure may apply where cooperation between third country and the union is required for the joint management of the stock of common interest. a country may be identified a a country allowing non-sustainable fishing where it fails to cooperate in the management of a stock of common interest. the measure referred to in article 4 shall cease to apply when the country allowing non-sustainable fishing adopts appropriate corrective measure.


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


T5 BertScore F1: 0.84
fado system contains information on authentic document issued by member state. also contains information on false document issued by third party. purpose of fado system is to contribute to fight against document and identity fraud.


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


T5 BertScore F1: 0.84
authorised entity established in a member state shall establish and follow its own practice to ensure it distributes, communicates and make available accessible format copy. an authorised entity established in a member state shall provide the following information in an accessible way. an authorised entity established in a member state carrying out the act referred to in article 3 and 4 shall provide the following information.


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


T5 BertScore F1: 0.80
circulation coin means euro coin intended for circulation. commemorative coin means circulation coin intended to commemorate a specific subject. collector coin means euro coin intended for collection that are not issued with a view to their entry into circulation. member state may issue two commemorative coin per year.


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


T5 BertScore F1: 0.81
the regulation establishes a common legal framework for the development, production and dissemination of european statistic on population and vital event. member state shall provide the commission ( eurostat ) with data on their usually resident population at the reference time. data shall cover population by age, sex and region of residence.


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


T5 BertScore F1: 0.82
this regulation shall apply to carriage of passenger by sea within a single member state on board ship of class a and b under article 4 of directive 98/18/ec. it shall apply from the date of entry into force of the athens convention for the community, and in any case from no later than 31 december 2012. it shall be binding in it entirety and directly applicable in all member state.


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


T5 BertScore F1: 0.81
the statistic shall be produced in compliance with standard on impartiality, reliability, objectivity, cost-effectiveness and statistical confidentiality. member state shall ensure that the transmitted data do not permit the direct identification of the statistical unit ( individual )


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


T5 BertScore F1: 0.78
each member state shall submit to the commission ( eurostat ) data on job vacancy at least for business unit with one employee or more. data shall cover all economic activity defined by the common classification system for economic activity in the community.


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


T5 BertScore F1: 0.85
a cohesion fund is hereby established for the purpose of strengthening the economic, social and territorial cohesion of the union. it shall support investment for growth and job goal referred to in article 89 of regulation ( eu ) no 1303/2013. the cohesion fund shall not support. the decommissioning or the construction of nuclear power station.


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


T5 BertScore F1: 0.83
this regulation establishes a common framework for the systematic production of community statistic in the field of education and lifelong learning. member state shall ensure that the transmitted data do not permit the direct identification of the statistical unit concerned.


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


T5 BertScore F1: 0.82
this regulation establishes a common framework for the production, transmission, evaluation and dissemination of comparable energy statistic in the community. member state shall compile data concerning energy product and their aggregate in the community. they shall be transmitted with the frequency laid out in the annex.


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


T5 BertScore F1: 0.82
a member state shall submit to the commission statistic on all the aquaculture activity conducted in freshwater and saltwater on their territory. member state shall use survey or other statistically validated method covering at least 90 % of the total production by volume. the remaining part of the total production may be estimated.


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


T5 BertScore F1: 0.84


0.8173354268074036

In [16]:
np.save("trained.npy", trained_results)
np.save("untrained.npy", untrained_results)