In [1]:
import torch
import numpy as np
from datasets import load_dataset
from transformers import RobertaForSequenceClassification, RobertaTokenizer, RobertaConfig, RobertaModelWithHeads
from transformers import AdapterType
from transformers import Trainer, TrainingArguments, EvalPrediction
from sklearn.metrics import f1_score

In [2]:
# dictionary for dataset, name: (classes, type of f1 score)
dataset_dict = {'chemprot': (13, 'micro'), 'rct': (5, 'micro'),
                'CI': (6, 'macro'), 'sciie': (7, 'm2cro'),
                'HN': (2, 'macro'), 'ag': (4, 'macro'),
                'amazon': (2, 'macro'), 'imdb': (2, 'macro')}

In [3]:
ds_name = 'chemprot'
n_labels = dataset_dict[ds_name][0]
f1_type = dataset_dict[ds_name][1]

In [4]:
dataset = load_dataset(f'data_loaders/{ds_name}_data_loader.py')

Reusing dataset task_dataset (/home/qiyuan/.cache/huggingface/datasets/task_dataset/task/1.0.0/5154b9e4ddb61d69749d3fc1f0afa65b89f7dac4c88e7d21a6d00c5090e813b4)


In [5]:
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
model = RobertaModelWithHeads.from_pretrained('roberta-base')
model.add_adapter(ds_name, AdapterType.text_task)
model.train_adapter([ds_name])

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModelWithHeads: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaModelWithHeads from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModelWithHeads from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaModelWithHeads were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.embeddings.position_ids']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and infere

In [6]:
model.add_classification_head(ds_name, num_labels=n_labels)
model.set_active_adapters([[ds_name]])

In [7]:
def encode_batch(batch):
    return tokenizer(batch['text'], max_length=120, truncation=True, padding='max_length')

In [8]:
dataset = dataset.map(encode_batch, batched=True)
dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])

Loading cached processed dataset at /home/qiyuan/.cache/huggingface/datasets/task_dataset/task/1.0.0/5154b9e4ddb61d69749d3fc1f0afa65b89f7dac4c88e7d21a6d00c5090e813b4/cache-6ef7a533742e09c3.arrow
Loading cached processed dataset at /home/qiyuan/.cache/huggingface/datasets/task_dataset/task/1.0.0/5154b9e4ddb61d69749d3fc1f0afa65b89f7dac4c88e7d21a6d00c5090e813b4/cache-30c8391a4d964213.arrow
Loading cached processed dataset at /home/qiyuan/.cache/huggingface/datasets/task_dataset/task/1.0.0/5154b9e4ddb61d69749d3fc1f0afa65b89f7dac4c88e7d21a6d00c5090e813b4/cache-32485901e8dc5002.arrow


In [9]:
def compute_accuracy(p: EvalPrediction):
    preds = np.argmax(p.predictions, axis=1)
    return {'acc': (preds==p.label_ids).mean()}

def compute_f1(p: EvalPrediction):
    preds = np.argmax(p.predictions, axis=1)
    return {'f1': f1_score(p.label_ids, preds, average=f1_type)}

In [10]:
t_a = TrainingArguments(output_dir='./results',
                        num_train_epochs=9,
                        per_device_train_batch_size=16,
                        per_device_eval_batch_size=16,
                        remove_unused_columns=False)
trainer = Trainer(model=model,
                  args=t_a,
                  train_dataset=dataset['train'],
                  eval_dataset=dataset['validation'],
                  compute_metrics=compute_f1
                 )

In [11]:
trainer.train()

Step,Training Loss
500,1.576207
1000,0.866955
1500,0.657101
2000,0.598273


TrainOutput(global_step=2349, training_loss=0.8707611616645381)

In [12]:
trainer.evaluate()

{'eval_loss': 0.6644195318222046, 'eval_f1': 0.8034610630407911, 'epoch': 9.0}

In [13]:
tapt_model = RobertaModelWithHeads.from_pretrained('allenai/dsp_roberta_base_tapt_chemprot_4169')
tapt_model.add_adapter(ds_name, AdapterType.text_task)
tapt_model.train_adapter([ds_name])

Some weights of the model checkpoint at allenai/dsp_roberta_base_tapt_chemprot_4169 were not used when initializing RobertaModelWithHeads: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaModelWithHeads from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModelWithHeads from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaModelWithHeads were not initialized from the model checkpoint at allenai/dsp_roberta_base_tapt_chemprot_4169 and are newly initialized: ['roberta.embeddings.position_ids']
You should probably TRAIN this model on a do

In [14]:
tapt_model.add_classification_head(ds_name, num_labels=n_labels)
tapt_model.set_active_adapters([[ds_name]])

In [15]:
tapt_trainer = Trainer(model=tapt_model,
                  args=t_a,
                  train_dataset=dataset['train'],
                  eval_dataset=dataset['validation'],
                  compute_metrics=compute_f1
                 )

In [16]:
tapt_trainer.train()

Step,Training Loss
500,1.545737
1000,0.866464
1500,0.679146
2000,0.604712


TrainOutput(global_step=2349, training_loss=0.8714210896757929)

In [17]:
tapt_trainer.evaluate()

{'eval_loss': 0.7092375755310059, 'eval_f1': 0.7824474660074165, 'epoch': 9.0}