# This demo tests the effect of different language modelling heads

In [1]:
import sys

sys.path.append("..")  # ensure we can run examples as-is in the package's poetry env

In [2]:
import pandas as pd
import transformers
from datasets import load_dataset
from transformers import AutoTokenizer, TrainingArguments, AutoModelForSequenceClassification, Trainer
import torch
from grouphug import AutoMultiTaskModel, ClassificationHeadConfig, DatasetFormatter, LMHeadConfig, MultiTaskTrainer

from utils import compute_classification_metrics

  metrics = {k: load_metric(k) for k in ["accuracy", "f1", "recall", "precision", "matthews_correlation"]}


## A basic modelling task similar to the readme example

In [3]:
tweet_emotion = load_dataset("tweet_eval","emotion")

base_model = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(base_model)
#tokenizer.pad_token = tokenizer.eos_token
#tokenizer.pad_token_id = tokenizer.eos_token_id
formatter = DatasetFormatter().tokenize()
data = formatter.apply(tweet_emotion, tokenizer=tokenizer)

Found cached dataset tweet_eval (/root/.cache/huggingface/datasets/tweet_eval/emotion/1.1.0/12aee5282b8784f3e95459466db4cdf45c6bf49719c25cdb0743d71ed0410343)


  0%|          | 0/3 [00:00<?, ?it/s]

Loading cached processed dataset at /root/.cache/huggingface/datasets/tweet_eval/emotion/1.1.0/12aee5282b8784f3e95459466db4cdf45c6bf49719c25cdb0743d71ed0410343/cache-b61659831d00a363.arrow


  0%|          | 0/15 [00:00<?, ?ba/s]

Loading cached processed dataset at /root/.cache/huggingface/datasets/tweet_eval/emotion/1.1.0/12aee5282b8784f3e95459466db4cdf45c6bf49719c25cdb0743d71ed0410343/cache-47f3ee1b71ce58c0.arrow


In [4]:
model = AutoModelForSequenceClassification.from_pretrained(base_model,num_labels=4)
model.config.pad_token_id = tokenizer.pad_token_id

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_projector.weight', 'vocab_layer_norm.bias', 'vocab_transform.bias', 'vocab_projector.bias', 'vocab_transform.weight', 'vocab_layer_norm.weight']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.bias', 'classifier.bias', 'pre_classifi

In [5]:
from sklearn.metrics import precision_recall_fscore_support, accuracy_score

def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='weighted')
    acc = accuracy_score(labels, preds)
    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }

In [6]:
training_args = TrainingArguments(
    output_dir="../output",
    evaluation_strategy="epoch",
    num_train_epochs=10,
    logging_steps=100,
    save_strategy="no",
)
trainer = Trainer(
        model=model,
        tokenizer=tokenizer,
        train_dataset=data['data', "train"],
        eval_dataset=data['data', "test"],
        args=training_args,
        compute_metrics=compute_metrics

    )
trainer.train()

The following columns in the training set don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: special_tokens_mask. If special_tokens_mask are not expected by `DistilBertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 3257
  Num Epochs = 10
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 4080
You're using a DistilBertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6566,0.641721,0.78114,0.773309,0.783692,0.78114
2,0.4078,0.665775,0.791696,0.792081,0.793982,0.791696
3,0.2583,0.943289,0.791696,0.790663,0.791703,0.791696
4,0.126,1.112468,0.795215,0.794374,0.79797,0.795215
5,0.0848,1.258937,0.795215,0.793691,0.794703,0.795215
6,0.0461,1.354011,0.783955,0.784712,0.789452,0.783955
7,0.0283,1.423641,0.788177,0.788877,0.790249,0.788177
8,0.0207,1.457274,0.793807,0.793281,0.793114,0.793807


The following columns in the evaluation set don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: special_tokens_mask. If special_tokens_mask are not expected by `DistilBertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1421
  Batch size = 8
The following columns in the evaluation set don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: special_tokens_mask. If special_tokens_mask are not expected by `DistilBertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1421
  Batch size = 8
The following columns in the evaluation set don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: special_tokens_mask. If special_tokens_mask are not expected by `DistilBertForSequenceClassification.forward

KeyboardInterrupt: 