In [1]:
import torch
import numpy as np
from datasets import load_dataset
from transformers import RobertaForSequenceClassification, RobertaTokenizer, RobertaConfig, RobertaModelWithHeads
from transformers import Trainer, TrainingArguments, EvalPrediction
from sklearn.metrics import f1_score

In [2]:
# dictionary for dataset, name: (classes, type of f1 score)
dataset_dict = {'chemprot': (13, 'micro'), 'rct': (5, 'micro'),
                'CI': (6, 'macro'), 'sciie': (7, 'm2cro'),
                'HN': (2, 'macro'), 'ag': (4, 'macro'),
                'amazon': (2, 'macro'), 'imdb': (2, 'macro')}

In [3]:
ds_name = 'chemprot'
n_labels = dataset_dict[ds_name][0]
f1_type = dataset_dict[ds_name][1]

In [4]:
dataset = load_dataset(f'data_loaders/{ds_name}_data_loader.py')

Reusing dataset task_dataset (/home/qiyuan/.cache/huggingface/datasets/task_dataset/task/1.0.0/5154b9e4ddb61d69749d3fc1f0afa65b89f7dac4c88e7d21a6d00c5090e813b4)


In [5]:
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
config = RobertaConfig.from_pretrained('roberta-base', num_labels=n_labels)
model = RobertaForSequenceClassification.from_pretrained('roberta-base', config=config)

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifie

In [6]:
def encode_batch(batch):
    return tokenizer(batch['text'], max_length=120, truncation=True, padding='max_length')

In [7]:
dataset = dataset.map(encode_batch, batched=True)
dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])

HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))




In [8]:
def compute_accuracy(p: EvalPrediction):
    preds = np.argmax(p.predictions, axis=1)
    return {'acc': (preds==p.label_ids).mean()}

def compute_f1(p: EvalPrediction):
    preds = np.argmax(p.predictions, axis=1)
    return {'f1': f1_score(p.label_ids, preds, average=f1_type)}

In [9]:
t_a = TrainingArguments(output_dir='./results',
                        num_train_epochs=3,
                        per_device_train_batch_size=16,
                        per_device_eval_batch_size=16,
                        remove_unused_columns=False)
trainer = Trainer(model=model,
                  args=t_a,
                  train_dataset=dataset['train'],
                  eval_dataset=dataset['validation'],
                  compute_metrics=compute_f1
                 )

In [10]:
trainer.train()

Step,Training Loss
500,1.089957


TrainOutput(global_step=783, training_loss=0.867498366159802)

In [11]:
trainer.evaluate()

{'eval_loss': 0.7591868042945862, 'eval_f1': 0.789039967037495, 'epoch': 3.0}

In [12]:
tapt_config = RobertaConfig.from_pretrained('allenai/dsp_roberta_base_tapt_chemprot_4169', num_labels=n_labels)
tapt_model = RobertaForSequenceClassification.from_pretrained('allenai/dsp_roberta_base_tapt_chemprot_4169', config=tapt_config)

Some weights of the model checkpoint at allenai/dsp_roberta_base_tapt_chemprot_4169 were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at allenai/dsp_roberta_base_tapt_chemprot_4169 and

In [13]:
tapt_trainer = Trainer(model=tapt_model,
                  args=t_a,
                  train_dataset=dataset['train'],
                  eval_dataset=dataset['validation'],
                  compute_metrics=compute_f1
                 )

In [14]:
tapt_trainer.train()

Step,Training Loss
500,0.898746


TrainOutput(global_step=783, training_loss=0.6984208497964559)

In [15]:
tapt_trainer.evaluate()

{'eval_loss': 0.7100901007652283, 'eval_f1': 0.826534816646065, 'epoch': 3.0}