## Fine-tuning a model with Trainer API

Link: https://huggingface.co/learn/llm-course/chapter3/3

## Data Loading and Preprocessing

In [1]:
from datasets import load_dataset
from transformers import AutoTokenizer, DataCollatorWithPadding

raw_datasets = load_dataset("glue", "mrpc")
checkpoint = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)


def tokenize_function(example):
    return tokenizer(example["sentence1"], example["sentence2"], truncation=True)


tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

Map:   0%|          | 0/3668 [00:00<?, ? examples/s]

Map:   0%|          | 0/408 [00:00<?, ? examples/s]

Map:   0%|          | 0/1725 [00:00<?, ? examples/s]

## Training

In [2]:
from transformers import TrainingArguments

training_args = TrainingArguments('test-trainer')

In [3]:
from transformers import AutoModelForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [4]:
from transformers import Trainer

trainer = Trainer(
    model,
    training_args,
    train_dataset=tokenized_datasets['train'],
    eval_dataset=tokenized_datasets['validation'],
    data_collator=data_collator,
    processing_class=tokenizer,
)

In [5]:
trainer.train()



Step,Training Loss
500,0.511
1000,0.2947




TrainOutput(global_step=1377, training_loss=0.3346653912668221, metrics={'train_runtime': 232.7319, 'train_samples_per_second': 47.282, 'train_steps_per_second': 5.917, 'total_flos': 405114969714960.0, 'train_loss': 0.3346653912668221, 'epoch': 3.0})

## Evaluation

In [6]:
predictions = trainer.predict(tokenized_datasets['validation'])
print(predictions.predictions.shape, predictions.label_ids.shape)



(408, 2) (408,)


In [7]:
import numpy as np

preds = np.argmax(predictions.predictions, axis=-1)
preds

array([1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1,
       0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0,
       0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0,
       1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0,
       1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1,
       1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0,
       1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1,
       1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1,
       1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0,
       1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1,
       1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1,
       1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1,

In [8]:
import evaluate

metric = evaluate.load('glue', 'mrpc')
metric.compute(predictions=preds, references=predictions.label_ids)

Downloading builder script: 0.00B [00:00, ?B/s]

{'accuracy': 0.8651960784313726, 'f1': 0.9069373942470389}

In [9]:
def compute_metrics(eval_preds):
    metric = evaluate.load('glue', 'mrpc')
    logits, labels = eval_preds
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, reference=labels)

In [12]:
training_args = TrainingArguments('test-trainer-v2')
model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)

trainer_v2 = Trainer(
    model,
    training_args,
    train_dataset=tokenized_datasets['train'],
    eval_dataset=tokenized_datasets['validation'],
    data_collator=data_collator,
    processing_class=tokenizer,
    compute_metrics=compute_metrics,
)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [13]:
trainer_v2.train()



Step,Training Loss
500,0.5115
1000,0.2892




TrainOutput(global_step=1377, training_loss=0.33050122950158434, metrics={'train_runtime': 230.9486, 'train_samples_per_second': 47.647, 'train_steps_per_second': 5.962, 'total_flos': 405114969714960.0, 'train_loss': 0.33050122950158434, 'epoch': 3.0})

## Advanced training Features 

### Mixed Precision Training

In [15]:
training_args_adv = TrainingArguments(
    'test-trainer-fp16',
    eval_strategy='epoch',
    fp16=True,
)

### Gradient Accumulation

In [17]:
training_args = TrainingArguments(
    'test-trainer-grdAcc',
    eval_strategy='epoch',
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
)

## Learning Rate Scheduling

In [18]:
training_args = TrainingArguments(
    'test-trainer-grdAcc',
    eval_strategy='epoch',
    learning_rate=2e-5,
    lr_scheduler_type='cosine',
)