## Masked Language Modeling
Using MLM, we train adapters for each of the GLUE tasks. This adapts the pre-trained language model to the language corpus specific to the GLUE task

In [None]:
# !pip install -Uqq adapter-transformers datasets
%load_ext autoreload
%autoreload 2

from utils.mlm import masked_language_modeling
from utils.mlm_utils import DomainModelArguments, DomainDataTrainingArguments
from transformers import TrainingArguments, MultiLingAdapterArguments

In [None]:
glue_tasks = [
    #"cola",
    #"mnli",
    "mrpc",
    #"qnli",
    #"qqp",
    #"rte",
    #"sst2",
    #"stsb",
    #"wnli",
]

In [None]:
model = DomainModelArguments(
    model_name_or_path="roberta-base",
)

adapter = MultiLingAdapterArguments(
    train_adapter=True,
    adapter_config="pfeiffer+inv",
)

In [None]:
# %%capture
results = {}
for dataset in glue_tasks[:1]:
    data = DomainDataTrainingArguments(
        dataset_name="glue",
        dataset_config_name=dataset,
    )
    
    training = TrainingArguments(
        learning_rate=1e-4,
        overwrite_output_dir=True,
        output_dir=f"./adapter/mlm/{dataset}",
        do_train=True,
        do_eval=True,
        num_train_epochs=10,
    )

    train_stats, eval_stats = masked_language_modeling(
        model_args=model, data_args=data, training_args=training, adapter_args=adapter
    )
    
    results[dataset] = {"training" : train_stats, "eval" : eval_stats}


In [None]:
from pprint import pprint
pprint(results)