## Masked Language Modeling
Using MLM, we train adapters for each of the GLUE tasks. This adapts the pre-trained language model to the language corpus specific to the GLUE task

In [12]:
# !pip install -Uqq adapter-transformers datasets
%load_ext autoreload
%autoreload 2

from utils.mlm import masked_language_modeling
from utils.mlm_utils import DomainModelArguments, DomainDataTrainingArguments
from transformers import TrainingArguments, MultiLingAdapterArguments

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [13]:
glue_tasks = [
    #"cola",
    #"mnli",
    "mrpc",
    #"qnli",
    #"qqp",
    #"rte",
    #"sst2",
    #"stsb",
    #"wnli",
]

In [14]:
model = DomainModelArguments(
    model_name_or_path="roberta-base",
)

adapter = MultiLingAdapterArguments(
    train_adapter=True,
    adapter_config="pfeiffer+inv",
)

In [15]:
# %%capture
results = {}
for dataset in glue_tasks[:1]:
    data = DomainDataTrainingArguments(
        dataset_name="glue",
        dataset_config_name=dataset,
    )
    
    training = TrainingArguments(
        learning_rate=1e-4,
        overwrite_output_dir=True,
        output_dir=f"./adapter/mlm/{dataset}",
        do_train=True,
        do_eval=True,
        num_train_epochs=10,
    )

    train_stats, eval_stats = masked_language_modeling(
        model_args=model, data_args=data, training_args=training, adapter_args=adapter
    )
    
    results[dataset] = {"training" : train_stats, "eval" : eval_stats}


[INFO|training_args.py:784] 2021-08-02 15:25:07,338 >> PyTorch: setting up devices
[INFO|training_args.py:680] 2021-08-02 15:25:07,339 >> The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
08/02/2021 15:25:07 - INFO - utils.mlm -   Training/evaluation parameters TrainingArguments(
_n_gpu=1,
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_pin_memory=True,
ddp_find_unused_parameters=None,
debug=[],
deepspeed=None,
disable_tqdm=False,
do_eval=True,
do_predict=False,
do_train=True,
eval_accumulation_steps=None,
eval_steps=500,
evaluation_strategy=IntervalStrategy.NO,
fp16=False,
fp16_backend=auto,
fp16_full_eval=False,
fp16_opt_level=O1,
gradient_accumulation_steps=1,
greater_is_better=None

Step,Training Loss


[INFO|trainer.py:1403] 2021-08-02 15:25:48,306 >> 

Training completed. Do not forget to share your model on huggingface.co/models =)


[INFO|trainer.py:1989] 2021-08-02 15:25:48,307 >> Saving model checkpoint to ./adapter/mlm/mrpc
[INFO|loading.py:59] 2021-08-02 15:25:48,308 >> Configuration saved in ./adapter/mlm/mrpc/glue/adapter_config.json
[INFO|loading.py:72] 2021-08-02 15:25:48,359 >> Module weights saved in ./adapter/mlm/mrpc/glue/pytorch_adapter.bin
[INFO|loading.py:59] 2021-08-02 15:25:48,359 >> Configuration saved in ./adapter/mlm/mrpc/glue/head_config.json
[INFO|loading.py:72] 2021-08-02 15:25:48,528 >> Module weights saved in ./adapter/mlm/mrpc/glue/pytorch_model_head.bin
[INFO|loading.py:59] 2021-08-02 15:25:48,529 >> Configuration saved in ./adapter/mlm/mrpc/glue/head_config.json
[INFO|loading.py:72] 2021-08-02 15:25:48,735 >> Module weights saved in ./adapter/mlm/mrpc/glue/pytorch_model_head.bin
[INFO|tokenization_utils_base.py:1948] 2021-08-02 15:25:48,736 >> tokenizer

***** train metrics *****
  epoch                    =       10.0
  total_flos               =   687921GF
  train_loss               =     1.8425
  train_runtime            = 0:00:38.39
  train_samples            =        191
  train_samples_per_second =     49.745
  train_steps_per_second   =      6.251


***** eval metrics *****
  epoch                   =       10.0
  eval_loss               =     1.6364
  eval_runtime            = 0:00:00.18
  eval_samples            =         21
  eval_samples_per_second =    114.161
  eval_steps_per_second   =     16.309
  perplexity              =     5.1365


In [16]:
from pprint import pprint
pprint(results)

{'mrpc': {'eval': {'epoch': 10.0,
                   'eval_loss': 1.636365294456482,
                   'eval_runtime': 0.184,
                   'eval_samples': 21,
                   'eval_samples_per_second': 114.161,
                   'eval_steps_per_second': 16.309,
                   'perplexity': 5.136466000500812},
          'training': {'epoch': 10.0,
                       'total_flos': 738650504448000.0,
                       'train_loss': 1.8425042470296225,
                       'train_runtime': 38.3961,
                       'train_samples': 191,
                       'train_samples_per_second': 49.745,
                       'train_steps_per_second': 6.251}}}
