# Data Cartography for Multi-class Classification

Goals:
1. Subclass `Trainer` and overwrite to save out `ids`, `logits`, `gold_label` for each example after each training step.
2. Use plotting utilities from Data Cartography repo to generate plots

In [1]:
!pip install --upgrade transformers torch datasets evaluate accelerate peft ipywidgets scikit-learn

Collecting peft
  Downloading peft-0.7.1-py3-none-any.whl (168 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m168.3/168.3 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Installing collected packages: peft
Successfully installed peft-0.7.1


## Load, explore, and prepare data

In [1]:
from datasets import load_dataset

dataset = load_dataset("lmsys/toxic-chat")

In [2]:
dataset = dataset.rename_columns({"user_input": "text", "toxicity": "label"})

In [3]:
dataset

DatasetDict({
    train: Dataset({
        features: ['conv_id', 'text', 'model_output', 'human_annotation', 'label', 'jailbreaking', 'openai_moderation'],
        num_rows: 5082
    })
    test: Dataset({
        features: ['conv_id', 'text', 'model_output', 'human_annotation', 'label', 'jailbreaking', 'openai_moderation'],
        num_rows: 5083
    })
})

In [4]:
dataset["train"][0]

{'conv_id': 'e0c9b3e05414814485dbdcb9a29334d502e59803af9c26df03e9d1de5e7afe67',
 'text': 'Masturbacja jest proces co oitrzebuje',
 'model_output': 'Masturbacja to proces, który może pozytywnie wpłynąć na zdrowie psychiczne i fizyczne człowieka, ponieważ pomaga w relaksie, redukuje stres i pomaga w uśpieniu. Może też być używana jako dodatkowa form',
 'human_annotation': True,
 'label': 0,
 'jailbreaking': 0.0,
 'openai_moderation': "[['sexual', 0.16727015376091003], ['hate', 0.0014814797323197126], ['violence', 7.014749280642718e-05], ['self-harm', 5.5694090406177565e-06], ['sexual/minors', 4.127006832277402e-06], ['violence/graphic', 5.00143471526826e-07], ['hate/threatening', 2.3769071155754773e-09]]"}

#### Let's see if/how many examples will be truncated

In [5]:
from transformers import AutoTokenizer

model_id = "distilbert-base-uncased"
# model_id = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_id)

In [6]:
def check_seq_length(dataset, tokenizer):
    """Check to see how many samples are longer than max seq length"""

    print("----Number samples longer than model's context length----")

    dataset = dataset.map(
        lambda examples: {"num_tokens": len(tokenizer(examples["text"])["input_ids"])}
    )

    for split in ["test", "train"]:
        df = dataset["train"].to_pandas()
        count = len(df.loc[df["num_tokens"] >= tokenizer.model_max_length])

        print(f"{split.upper()}: {count}")

In [7]:
check_seq_length(dataset, tokenizer)

----Number samples longer than model's context length----


Map:   0%|          | 0/5083 [00:00<?, ? examples/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (721 > 512). Running this sequence through the model will result in indexing errors


TEST: 15
TRAIN: 15


#### Now preprocess the data

In [8]:
# First lets add a unique _numeric_ id to each example (required for tensors)
def add_id(example, idx):
    example["id"] = idx
    return example


# Apply the function to each example in the dataset
dataset = dataset.map(add_id, with_indices=True)

In [9]:
def preprocess_function(examples):
    return tokenizer(
        examples["text"],
        truncation=True,
    )


tokenized_dataset = dataset.map(preprocess_function, batched=True)

Map:   0%|          | 0/5082 [00:00<?, ? examples/s]

Map:   0%|          | 0/5083 [00:00<?, ? examples/s]

In [12]:
tokenized_dataset = tokenized_dataset.remove_columns(
    [
        "conv_id",
        "text",
        "model_output",
        "human_annotation",
        "jailbreaking",
        "openai_moderation",
    ]
)

In [13]:
tokenized_dataset

DatasetDict({
    train: Dataset({
        features: ['label', 'id', 'input_ids', 'attention_mask'],
        num_rows: 5082
    })
    test: Dataset({
        features: ['label', 'id', 'input_ids', 'attention_mask'],
        num_rows: 5083
    })
})

## Train a baseline model

In [14]:
from transformers import (
    DataCollatorWithPadding,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer,
)

model = AutoModelForSequenceClassification.from_pretrained(model_id, num_labels=2)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'pre_classifier.bias', 'classifier.weight', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [15]:
import numpy as np
from evaluate import load

f1_metric = load("f1")


def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    f1 = f1_metric.compute(predictions=predictions, references=labels)["f1"]

    return {"f1": f1}

In [16]:
training_args = TrainingArguments(
    output_dir="toxicity_baseline",
    learning_rate=2e-5,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    num_train_epochs=3,
    weight_decay=0.01,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    logging_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="f1",
    push_to_hub=False,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["test"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

# trainer.train()

In [15]:
trainer.evaluate(tokenized_dataset["test"])

{'eval_loss': 0.2474513202905655,
 'eval_f1': 0.7290780141843971,
 'eval_runtime': 12.4033,
 'eval_samples_per_second': 409.81,
 'eval_steps_per_second': 12.819,
 'epoch': 10.0}

In [18]:
# trainer.model.save_pretrained("toxicity_baseline_model")

In [28]:
?TrainingArguments

[0;31mInit signature:[0m
[0mTrainingArguments[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0moutput_dir[0m[0;34m:[0m [0mstr[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0moverwrite_output_dir[0m[0;34m:[0m [0mbool[0m [0;34m=[0m [0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mdo_train[0m[0;34m:[0m [0mbool[0m [0;34m=[0m [0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mdo_eval[0m[0;34m:[0m [0mbool[0m [0;34m=[0m [0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mdo_predict[0m[0;34m:[0m [0mbool[0m [0;34m=[0m [0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mevaluation_strategy[0m[0;34m:[0m [0mUnion[0m[0;34m[[0m[0mtransformers[0m[0;34m.[0m[0mtrainer_utils[0m[0;34m.[0m[0mIntervalStrategy[0m[0;34m,[0m [0mstr[0m[0;34m][0m [0;34m=[0m [0;34m'no'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mprediction_loss_only[0m[0;34m:[0m [0mbool[0m [0;34m=[0m [0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0

## Setup `Trainer` for data cartography

In [24]:
class IdDataCollatorWithPadding(DataCollatorWithPadding):
    """Data collator that removes "id" col from features for padding, then adds it back"""

    def __call__(self, features):
        # remove "id" col from features for padding
        if "id" in features[0]:
            ids = [feature.pop("id") for feature in features]

        # use parent class method for padding
        batch = super().__call__(features)

        # add "id" col back to features
        batch["id"] = ids

        return batch

In [25]:
id_data_collator = IdDataCollatorWithPadding(tokenizer=tokenizer)

In [50]:
import os
import json
from pathlib import Path
from peft import PeftModel
from transformers.modeling_utils import unwrap_model
from transformers.models.auto.modeling_auto import MODEL_FOR_CAUSAL_LM_MAPPING_NAMES
from transformers.utils import is_peft_available


def _is_peft_model(model):
    return is_peft_available() and isinstance(model, PeftModel)


class DataCartographyTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        """
        How the loss is computed by Trainer. By default, all models return the loss in the first element.

        Subclass and override for custom behavior.
        """
        if self.label_smoother is not None and "labels" in inputs:
            labels = inputs.pop("labels")
        else:
            labels = None

        outputs = model(
            input_ids=inputs["input_ids"],
            attention_mask=inputs["attention_mask"],
            labels=inputs["labels"],
        )

        # Save data cartography outputs
        if model.training:
            if "id" in inputs:
                ids_out = inputs["id"]

            logits_out = outputs["logits"].detach().cpu().tolist()
            labels_out = inputs["labels"].detach().cpu().tolist()

            current_epoch = int(self.state.epoch)

            directory = Path(self.args.output_dir) / "training_dynamics"
            os.makedirs(directory, exist_ok=True)
            filename = directory / f"dynamics_epoch_{current_epoch}.jsonl"

            with open(filename, "a") as f:
                for id, logit, label in zip(ids_out, logits_out, labels_out):
                    data_dict = {
                        "guid": id,
                        f"logits_epoch_{current_epoch}": logit,
                        "gold": label,
                    }
                    json_line = json.dumps(data_dict)
                    f.write(json_line + "\n")

        # Save past state if it exists
        # TODO: this needs to be fixed and made cleaner later.
        if self.args.past_index >= 0:
            self._past = outputs[self.args.past_index]

        if labels is not None:
            unwrapped_model = unwrap_model(model)
            if _is_peft_model(unwrapped_model):
                model_name = unwrapped_model.base_model.model._get_name()
            else:
                model_name = unwrapped_model._get_name()
            if model_name in MODEL_FOR_CAUSAL_LM_MAPPING_NAMES.values():
                loss = self.label_smoother(outputs, labels, shift_labels=True)
            else:
                loss = self.label_smoother(outputs, labels)
        else:
            if isinstance(outputs, dict) and "loss" not in outputs:
                raise ValueError(
                    "The model did not return a loss from the inputs, only the following keys: "
                    f"{','.join(outputs.keys())}. For reference, the inputs it received are {','.join(inputs.keys())}."
                )
            # We don't use .loss here since the model may return tuples instead of ModelOutput.
            loss = outputs["loss"] if isinstance(outputs, dict) else outputs[0]

        return (loss, outputs) if return_outputs else loss

In [51]:
training_args = TrainingArguments(
    output_dir="TESTING",
    learning_rate=2e-5,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    num_train_epochs=3,
    weight_decay=0.01,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    logging_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="f1",
    push_to_hub=False,
    remove_unused_columns=False,
)

trainer = DataCartographyTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"].select(range(100)),
    eval_dataset=tokenized_dataset["test"].select(range(100)),
    tokenizer=tokenizer,
    data_collator=id_data_collator,
    compute_metrics=compute_metrics,
)

trainer.train()

Epoch,Training Loss,Validation Loss,F1
1,0.3379,0.651507,0.64
2,0.3375,0.660949,0.626263
3,0.3131,0.662676,0.612245


Checkpoint destination directory TESTING/checkpoint-4 already exists and is non-empty.Saving will proceed but saved results may be invalid.
Checkpoint destination directory TESTING/checkpoint-8 already exists and is non-empty.Saving will proceed but saved results may be invalid.
Checkpoint destination directory TESTING/checkpoint-12 already exists and is non-empty.Saving will proceed but saved results may be invalid.


TrainOutput(global_step=12, training_loss=0.3294784526030223, metrics={'train_runtime': 19.7297, 'train_samples_per_second': 15.206, 'train_steps_per_second': 0.608, 'total_flos': 28461862483104.0, 'train_loss': 0.3294784526030223, 'epoch': 3.0})