In [8]:
#!pip install bert_score

In [15]:
import torch
from torch.autograd import Variable
from transformers import (
    AutoTokenizer,
    Adafactor,
    DataCollatorForSeq2Seq,
    AutoModelForSeq2SeqLM,
    Seq2SeqTrainingArguments,
    Seq2SeqTrainer,
)
from datasets import load_dataset, load_metric

# import wandb

# wandb.login()
# %env WANDB_PROJECT=WebNLG_exp_bs

# %load_ext jupyter_black

# Use gpu
if torch.cuda.is_available():
    dev = torch.device("cuda")#all gpu
    print("Running on the GPU")
else:
    dev = torch.device("cpu")
    print("Running on the CPU")

tokenizer = AutoTokenizer.from_pretrained("t5-base")
model = AutoModelForSeq2SeqLM.from_pretrained("t5-base").to(dev)
data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model)

Running on the GPU


In [16]:
dataset = load_dataset(
    "csv",
    data_files={
        "train": "train_set.csv",
        "dev": "dev_set.csv",
        "test": "test_set.csv",
    },
)


def data_map(examples):
    inputs = ["WebNLG: " + eg + "</s>" for eg in examples["triple"]]
    outputs = [eg + "</s>" for eg in examples["sentence"]]
    model_inputs = tokenizer(
        inputs, max_length=tokenizer.model_max_length, truncation=True
    )

    with tokenizer.as_target_tokenizer():
        labels = tokenizer(
            outputs, max_length=tokenizer.model_max_length, truncation=True
        )

    model_inputs["labels"] = labels["input_ids"]
    return model_inputs


dataset = dataset.map(
    data_map,
    batched=True,
    batch_size=8,
    num_proc=32,
)

In [17]:
bleu = load_metric("bleu")
ter = load_metric("ter")


def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    b = bleu.compute(predictions=predictions, references=labels)
    t = ter.compute(predictions=predictions, references=labels)
    return {"BLEU": b, "TER": t}

In [18]:
class BERTScoreTrainer(Seq2SeqTrainer):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

    def compute_loss(self, model, inputs, return_outputs=False, score_type="f1"):
        # BERTScore
        labels = inputs.get("labels")
        outputs = model(**inputs)
        logits = outputs.get("logits")

        metric = load_metric("bertscore")
        metric.add_batch(predictions=logits, references=labels)
        scores = metric.compute(lang="en")
        if score_type == "p":
            loss = scores["precision"]
        elif score_type == "r":
            loss = scores["recall"]
        else:
            loss = scores["f1"]
        loss = -1 * torch.FloatTensor(loss).requires_grad_().mean().to(dev)
        return (loss, outputs) if return_outputs else loss

In [19]:
training_args = Seq2SeqTrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    #debug="underflow_overflow",
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    report_to="none",
    # weight_decay=0.01,
    save_total_limit=3,
    num_train_epochs=1,
    fp16=True,
)
optimizer = Adafactor(
    model.parameters(),
    lr=1e-3,
    eps=(1e-30, 1e-3),
    clip_threshold=1.0,
    decay_rate=-0.8,
    beta1=None,
    weight_decay=0.0,
    relative_step=False,
    scale_parameter=False,
    warmup_init=False,
)

trainer_bs = BERTScoreTrainer(
    model=model,
    args=training_args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["dev"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    optimizers=(optimizer, None),
)

In [20]:
# wandb.jupyter.__IFrame = wandb.jupyter.IFrame(
#     opts={"height": 420, "workspace": False, "quiet": False}
# )
trainer_bs.train()
# wandb.finish()

# wandb.jupyter.__IFrame = None

You're using a T5TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['ro

KeyboardInterrupt: 