# Multi label classification - toxicity classification

Inspired by the following:

https://colab.research.google.com/drive/1aue7x525rKy6yYLqqt-5Ll96qjQvpqS7

**Additional datasets to try**

Twitter News : Financial categorization
https://huggingface.co/datasets/zeroshot/twitter-financial-news-topic

**Learnings:**

1. Prepare datasets for multi label classification
2. Learn about evaluate function
3. Effect of hyperparameters on evaluation loss
4. Use of custom metric for evaluation
   

In [1]:
import transformers
import datasets

print(f"Running on transformers v{transformers.__version__} and datasets v{datasets.__version__}")

Running on transformers v4.46.3 and datasets v3.1.0


In [2]:
import torch
from pathlib import Path
from datasets import load_dataset
from transformers import (AutoTokenizer, AutoModelForSequenceClassification, 
                          TrainingArguments, Trainer)

## 1. Load dataset

Loads a very small demo dataset 

In [3]:
from datasets import load_dataset

dataset_id = "acloudfan/toxicity-multi-label-classifier"

# Number of labels in the dataset ['toxic', 'threat', 'insult', 'identity_hate']
NUM_LABELS=4

# Load
dataset = load_dataset(dataset_id)

# Show one row
dataset['train'][0]

{'comment_text': '"""Nazi filth"" is impolite  04:27, 20 Jan 2004 (UTC)\n\n"',
 'toxic': 1,
 'threat': 0,
 'insult': 1,
 'identity_hate': 0}

## 2. Create labels column

Create a column named labels with *One Hot Encoding*.

Trainer requires the **labels** column with all classes encoded in a list such as [1,0,1,0]

At the end of this we will have an additional column with 1's and 0s for applicable labels.

In [4]:
# create labels column
cols = dataset["train"].column_names

# Print column names
print(cols)

# Function for pre-processing the labels column
def pre_process_ohc(sample):
    labels = []

    # Create the labels array
    for col in cols:
        if col != "comment_text":
            labels.append(sample[col])
    
    sample["labels"]=labels
    return sample

# Call map on dataset to pre-process
dataset_pre_processed = dataset.map(pre_process_ohc)

dataset_pre_processed
   

['comment_text', 'toxic', 'threat', 'insult', 'identity_hate']


DatasetDict({
    train: Dataset({
        features: ['comment_text', 'toxic', 'threat', 'insult', 'identity_hate', 'labels'],
        num_rows: 89
    })
    validation: Dataset({
        features: ['comment_text', 'toxic', 'threat', 'insult', 'identity_hate', 'labels'],
        num_rows: 35
    })
    test: Dataset({
        features: ['comment_text', 'toxic', 'threat', 'insult', 'identity_hate', 'labels'],
        num_rows: 35
    })
})

## 3. Tokenize

Pay attention to the *problem_type*, this leads to the selection of the loss function that applies to multi-label classification. Cross entropy is used instead of MSE. After tokenization, we need to remove all columns expcept [labels, input_ids, attention_mask]

In [5]:
# Feel free to try out other models
MODEL_ID = "distilbert-base-uncased"

# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, problem_type="multi_label_classification")

In [6]:
# Encoder function
def tokenize_and_encode(examples):
  return tokenizer(examples["comment_text"], truncation=True)

In [7]:
# Prepare list of columns, that will be deleted - labels column will be kept
cols = dataset_pre_processed["train"].column_names
cols.remove("labels")

# Tokenize, Encode and remove columns
ds_enc = dataset_pre_processed.map(tokenize_and_encode, batched=True, remove_columns=cols)
ds_enc

DatasetDict({
    train: Dataset({
        features: ['labels', 'input_ids', 'attention_mask'],
        num_rows: 89
    })
    validation: Dataset({
        features: ['labels', 'input_ids', 'attention_mask'],
        num_rows: 35
    })
    test: Dataset({
        features: ['labels', 'input_ids', 'attention_mask'],
        num_rows: 35
    })
})

In [8]:
# PyTorch requires labels to be Float, cast labels to floats
ds_enc.set_format("torch")

# Create a new column 'float_labels' to hold the labels, remove labels column, rename float_labels to labels
ds_enc = (ds_enc
          .map(lambda x : {"float_labels": x["labels"].to(torch.float)}, remove_columns=["labels"])
          .rename_column("float_labels", "labels"))

## 4. Load model

Load the model for training using the AutoModelForSequenceClassification

In [9]:
num_labels=4
model = AutoModelForSequenceClassification.from_pretrained(MODEL_ID, num_labels=num_labels, problem_type="multi_label_classification") #.to('cuda')

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


## 5. Create Trainer & Evaluate

You may change training arguments to make the model perform better and see its impact on evaluation loss.
* Change the epoch to 2, 3, 4, 5  and observe the eval loss

In [10]:
# ds_enc["train"][0]

# Adjust number of epochs and see the change in loss
eval_strategy='epoch'
num_train_epochs=5

# Set if eval strategy is steps
eval_steps = 5

# Create the training arguments
args = TrainingArguments("./temp", num_train_epochs=num_train_epochs,  report_to='none', eval_strategy=eval_strategy, eval_steps=eval_steps)

trainer = Trainer(model=model, args=args, train_dataset=ds_enc["train"], eval_dataset=ds_enc["test"], tokenizer=tokenizer)

  trainer = Trainer(model=model, args=args, train_dataset=ds_enc["train"], eval_dataset=ds_enc["test"], tokenizer=tokenizer)


In [11]:
trainer.evaluate()

{'eval_loss': 0.7024906277656555,
 'eval_model_preparation_time': 0.0032,
 'eval_runtime': 3.833,
 'eval_samples_per_second': 9.131,
 'eval_steps_per_second': 1.304}

## 6. Run fine tuning & Evaluate

In [12]:
trainer.train()

Step,Training Loss,Validation Loss,Model Preparation Time
10,No log,0.582331,0.0032
20,No log,0.562377,0.0032
30,No log,0.539277,0.0032
40,No log,0.517577,0.0032
50,No log,0.501184,0.0032
60,No log,0.495507,0.0032


TrainOutput(global_step=60, training_loss=0.5028909365336101, metrics={'train_runtime': 212.8337, 'train_samples_per_second': 2.091, 'train_steps_per_second': 0.282, 'total_flos': 23880687587568.0, 'train_loss': 0.5028909365336101, 'epoch': 5.0})

In [13]:
trainer.evaluate()

{'eval_loss': 0.4955069422721863,
 'eval_model_preparation_time': 0.0032,
 'eval_runtime': 2.1477,
 'eval_samples_per_second': 16.296,
 'eval_steps_per_second': 2.328,
 'epoch': 5.0}

## 7. Test the model

In [14]:


def predict_labels_with_scores(model, text):
    """
    Predicts confidence scores for all labels for a given text using a fine-tuned model.

    Args:
        model: The fine-tuned multi-label classification model.
        text (str): The input text to predict labels for.

    Returns:
        list: A list of confidence scores for all labels.
              Example: [0.95, 0.12, 0.87, 0.45] for 4 labels.
    """

    
    # Tokenize the input text
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)

    print(inputs)
    # Ensure the model is in evaluation mode
    model.eval()
    
    # Disable gradient calculation for inference
    with torch.no_grad():
        # Forward pass: get the logits from the model
        outputs = model(**inputs)
    
    # Get the logits (raw predictions) from the model
    logits = outputs.logits
    
    # Apply sigmoid to convert logits to probabilities (for multi-label classification)
    probabilities = torch.sigmoid(logits).squeeze().tolist()
    
    # If probabilities is a single value (e.g., for single-sample inputs), convert it to a list
    if isinstance(probabilities, float):
        probabilities = [probabilities]
    
    return probabilities

# Example usage:
# Assuming `model` is your fine-tuned multi-label classification model and `text` is the input text
# predicted_scores = predict_labels_with_scores(model, "Your input text here")
# print(predicted_scores)

In [15]:
# id2label={'0':'toxic', '1':'threat', '2':'insult', '3':'identity_hate'}
predict_labels_with_scores(trainer.model,"I will kill you")

{'input_ids': tensor([[ 101, 1045, 2097, 3102, 2017,  102]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1]])}


[0.7324583530426025,
 0.6496965885162354,
 0.28404712677001953,
 0.2604326605796814]