In [1]:
import torch
import torch.nn as nn

from transformers import AutoConfig, AutoTokenizer, AutoModelForQuestionAnswering
from transformers import TrainingArguments
from transformers.trainer_utils import set_seed, EvalPrediction

import datasets
from datasets import load_metric

from arguments import DatasetArguments
from processor import QAProcessor
from trainer_qa import QATrainer

In [2]:
training_args = TrainingArguments(
    output_dir = "./saved",
    logging_dir = "./logs",

    do_train=True,
    do_eval=True,
    seed=42,
    evaluation_strategy="steps",

    per_device_train_batch_size=8,
    per_device_eval_batch_size=16,
    gradient_accumulation_steps=1,

    learning_rate=3e-5,
    weight_decay=0.01,
    label_smoothing_factor=0.1,

    num_train_epochs=3,
    lr_scheduler_type="constant",

    log_level="info",
    eval_steps=100,
    logging_steps=100,
    save_steps=100,
    save_total_limit=5,

    metric_for_best_model="eval_exact_match", # "eval_loss", "eval_f1"
    greater_is_better=True,
)

In [None]:
from transformers.trainer_callback import TrainerCallback

TrainerCallback()
# should_eval, should_train, ... should_save, .. 

In [3]:
MODEL_NAME = "klue/roberta-large"

config = AutoConfig.from_pretrained(MODEL_NAME)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

In [4]:
dataset_args = DatasetArguments(
    dataset_path="/opt/ml/data",
    max_seq_len=512,
    stride_len=128, 
    max_ans_len=30,
    use_max_padding=True, # need to be removed
)

In [5]:
set_seed(training_args.seed)
# set seed before model is initialized

model = AutoModelForQuestionAnswering.from_pretrained(MODEL_NAME)

Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaForQuestionAnswering: ['lm_head.decoder.bias', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForQuestionAnswering were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['qa_outputs.weight', 'qa_outputs.bias']
You should probably TRAIN this model on a

In [6]:
qa_processor = QAProcessor(
    dataset_args=dataset_args,
    tokenizer=tokenizer,
    concat=False
)

In [7]:
train_examples = qa_processor.get_train_examples()
eval_examples  = qa_processor.get_eval_examples()

train_features = qa_processor.get_train_features()
eval_features  = qa_processor.get_eval_features()

Loading cached processed dataset at /opt/ml/data/train_dataset/train/cache-2be0bd568b416eb6.arrow
Loading cached processed dataset at /opt/ml/data/train_dataset/validation/cache-c233008588ad4978.arrow


In [8]:
train_features.features

{'attention_mask': Sequence(feature=Value(dtype='int8', id=None), length=-1, id=None),
 'end_positions': Value(dtype='int64', id=None),
 'example_id': Value(dtype='string', id=None),
 'input_ids': Sequence(feature=Value(dtype='int32', id=None), length=-1, id=None),
 'offset_mapping': Sequence(feature=Sequence(feature=Value(dtype='int64', id=None), length=-1, id=None), length=-1, id=None),
 'overflow_to_sample_mapping': Value(dtype='int64', id=None),
 'start_positions': Value(dtype='int64', id=None),
 'token_type_ids': Sequence(feature=Value(dtype='int8', id=None), length=-1, id=None)}

In [9]:
eval_features.features

{'attention_mask': Sequence(feature=Value(dtype='int8', id=None), length=-1, id=None),
 'end_positions': Value(dtype='int64', id=None),
 'example_id': Value(dtype='string', id=None),
 'input_ids': Sequence(feature=Value(dtype='int32', id=None), length=-1, id=None),
 'offset_mapping': Sequence(feature=Sequence(feature=Value(dtype='int64', id=None), length=-1, id=None), length=-1, id=None),
 'overflow_to_sample_mapping': Value(dtype='int64', id=None),
 'start_positions': Value(dtype='int64', id=None),
 'token_type_ids': Sequence(feature=Value(dtype='int8', id=None), length=-1, id=None)}

In [10]:
eval_examples.features

{'title': Value(dtype='string', id=None),
 'context': Value(dtype='string', id=None),
 'question': Value(dtype='string', id=None),
 'id': Value(dtype='string', id=None),
 'answers': {'answer_start': Sequence(feature=Value(dtype='int64', id=None), length=-1, id=None),
  'text': Sequence(feature=Value(dtype='string', id=None), length=-1, id=None)},
 'document_id': Value(dtype='int64', id=None),
 '__index_level_0__': Value(dtype='int64', id=None)}

In [11]:
metric = load_metric("squad")

def compute_metrics(pred: EvalPrediction):
    return metric.compute(predictions=pred.predictions, references=pred.label_ids)

In [12]:
trainer = QATrainer(
    model=model,
    args=training_args, 
    train_dataset=train_features,
    eval_dataset=eval_features,
    eval_examples=eval_examples,
    post_process_function=qa_processor.post_processing_function,
    compute_metrics=compute_metrics,
)

In [13]:
trainer.train()

The following columns in the training set  don't have a corresponding argument in `RobertaForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id, overflow_to_sample_mapping.
***** Running training *****
  Num examples = 5769
  Num Epochs = 3
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 2166
Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mthis-is-real[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.12.6 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade

CondaEnvException: Unable to determine environment

Please re-run thi

Step,Training Loss,Validation Loss,Exact Match,F1
100,2.9291,1.697486,56.25,64.937169
200,1.6269,1.546786,59.583333,68.568529
300,1.1841,1.234544,64.166667,72.159392
400,1.034,1.096147,66.666667,75.796883
500,1.1466,1.336357,65.0,74.508929
600,1.0463,1.22516,62.083333,71.115652
700,1.0386,1.049474,61.25,70.228761
800,0.5676,1.694854,62.083333,72.129134
900,0.5285,1.483064,61.25,69.213701
1000,0.5407,1.686491,66.25,73.68439


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id, overflow_to_sample_mapping.
***** Running Evaluation *****
  Num examples = 351
  Batch size = 16
100%|██████████| 240/240 [00:01<00:00, 140.03it/s]
Saving model checkpoint to ./saved/checkpoint-100
Configuration saved in ./saved/checkpoint-100/config.json
Model weights saved in ./saved/checkpoint-100/pytorch_model.bin
Deleting older checkpoint [saved/checkpoint-500] due to args.save_total_limit
The following columns in the evaluation set  don't have a corresponding argument in `RobertaForQuestionAnswering.forward` and have been ignored: offset_mapping, example_id, overflow_to_sample_mapping.
***** Running Evaluation *****
  Num examples = 351
  Batch size = 16
100%|██████████| 240/240 [00:01<00:00, 139.70it/s]
Saving model checkpoint to ./saved/checkpoint-200
Configuration saved in ./saved/checkpoint-200/config.jso

KeyboardInterrupt: 