In [17]:
from loading_dataset_toxigen import load_toxigen, post_process_toxigen
from hatebert_model import load_hatebert, tokenize_function, train_epoch_hatebert, evaluate_hatebert, evaluate_hatebert_with_bias
from torch.utils.data import DataLoader
import torch
import torch.nn as  nn
import pandas as pd
from sklearn.metrics import f1_score, accuracy_score


In [18]:
toxigen = load_toxigen()

In [19]:
print(toxigen)

DatasetDict({
    test: Dataset({
        features: ['text', 'target_group', 'factual?', 'ingroup_effect', 'lewd', 'framing', 'predicted_group', 'stereotyping', 'intent', 'toxicity_ai', 'toxicity_human', 'predicted_author', 'actual_method', 'labels'],
        num_rows: 940
    })
    train: Dataset({
        features: ['text', 'target_group', 'factual?', 'ingroup_effect', 'lewd', 'framing', 'predicted_group', 'stereotyping', 'intent', 'toxicity_ai', 'toxicity_human', 'predicted_author', 'actual_method', 'labels'],
        num_rows: 8960
    })
})


In [20]:
tokenizer, model_hatebert = load_hatebert()

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Some weights of the model checkpoint at GroNLP/hateBERT were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some we

In [21]:
'''
 apply the function to all the elements in the dataset (individually or in batches)
 https://huggingface.co/docs/datasets/v1.11.0/package_reference/main_classes.html?highlight=dataset%20map#datasets.Dataset.map
 batch mode is very powerful. It allows you to speed up processing
 more info here: https://huggingface.co/docs/datasets/en/about_map_batch
'''
cache_files = {
    "test": ".cache/datasets/toxigen/toxigen_test_tokenized.arrow",
    "train": ".cache/datasets/toxigen/toxigen_train_tokenized.arrow"
} #path to the local cache files, where the current computation from the following function will be stored. 
# Caching saves RAM when working with large datasets and saves time instead of doing transformations on the fly.
tokenized_toxigen = toxigen.map(lambda x: tokenize_function(tokenizer, x, "text"), batched=True, cache_file_names=cache_files)

In [22]:
print(tokenized_toxigen)


DatasetDict({
    test: Dataset({
        features: ['text', 'target_group', 'factual?', 'ingroup_effect', 'lewd', 'framing', 'predicted_group', 'stereotyping', 'intent', 'toxicity_ai', 'toxicity_human', 'predicted_author', 'actual_method', 'labels', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 940
    })
    train: Dataset({
        features: ['text', 'target_group', 'factual?', 'ingroup_effect', 'lewd', 'framing', 'predicted_group', 'stereotyping', 'intent', 'toxicity_ai', 'toxicity_human', 'predicted_author', 'actual_method', 'labels', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 8960
    })
})


In [23]:
tokenized_toxigen = post_process_toxigen(tokenized_toxigen)

In [24]:
# create a smaller subset of the dataset as previously shown to speed up the fine-tuning

small_train_dataset = tokenized_toxigen["train"].shuffle(seed=42).select(range(50))
small_eval_dataset = tokenized_toxigen["test"].shuffle(seed=42).select(range(50))

In [25]:
# create a DataLoader for your training and test datasets so you can iterate over batches of data:
train_dataloader = DataLoader(small_train_dataset, shuffle=True, batch_size=8)
test_dataloader = DataLoader(small_eval_dataset, batch_size=8)

In [26]:
print("HateBERT number of parameters: ", model_hatebert.num_parameters())

HateBERT number of parameters:  109483778


In [27]:
from torch.optim import AdamW

optimizer = AdamW(model_hatebert.parameters(), lr=5e-5)

In [28]:
from transformers import get_scheduler

num_epochs = 2
num_training_steps = num_epochs * len(train_dataloader)
# feel free to experiment with different num_warmup_steps
lr_scheduler = get_scheduler(
    name="linear", optimizer=optimizer, num_warmup_steps=1, num_training_steps=num_training_steps
)

In [29]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
model_hatebert.to(device)

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12,

In [30]:
train_epoch_hatebert(model_hatebert, num_epochs, train_dataloader, optimizer, lr_scheduler, device, num_training_steps)

  0%|          | 0/14 [00:00<?, ?it/s]

Batch type: <class 'dict'>


  7%|▋         | 1/14 [00:50<10:59, 50.72s/it]

Batch type: <class 'dict'>


 14%|█▍        | 2/14 [01:26<08:22, 41.86s/it]

Batch type: <class 'dict'>


 21%|██▏       | 3/14 [01:57<06:47, 37.00s/it]

Batch type: <class 'dict'>


 29%|██▊       | 4/14 [02:37<06:21, 38.13s/it]

Batch type: <class 'dict'>


 36%|███▌      | 5/14 [03:10<05:26, 36.32s/it]

Batch type: <class 'dict'>


 43%|████▎     | 6/14 [03:43<04:42, 35.32s/it]

Batch type: <class 'dict'>


 50%|█████     | 7/14 [03:52<03:05, 26.43s/it]

Batch type: <class 'dict'>


 57%|█████▋    | 8/14 [04:20<02:43, 27.21s/it]

Batch type: <class 'dict'>


 64%|██████▍   | 9/14 [04:50<02:19, 27.94s/it]

Batch type: <class 'dict'>


 71%|███████▏  | 10/14 [05:21<01:55, 28.88s/it]

Batch type: <class 'dict'>


 79%|███████▊  | 11/14 [05:52<01:28, 29.42s/it]

Batch type: <class 'dict'>


 86%|████████▌ | 12/14 [06:22<00:59, 29.63s/it]

Batch type: <class 'dict'>


 93%|█████████▎| 13/14 [06:51<00:29, 29.39s/it]

Batch type: <class 'dict'>


100%|██████████| 14/14 [06:58<00:00, 29.91s/it]


In [31]:
evaluate_hatebert(model_hatebert, test_dataloader, device)

  0%|          | 0/7 [00:00<?, ?it/s]

Batch type: <class 'dict'>


 14%|█▍        | 1/7 [00:08<00:49,  8.20s/it]

Batch type: <class 'dict'>


 29%|██▊       | 2/7 [00:18<00:47,  9.47s/it]

Batch type: <class 'dict'>


 43%|████▎     | 3/7 [00:29<00:41, 10.35s/it]

Batch type: <class 'dict'>


 57%|█████▋    | 4/7 [00:47<00:40, 13.34s/it]

Batch type: <class 'dict'>


 71%|███████▏  | 5/7 [01:01<00:26, 13.44s/it]

Batch type: <class 'dict'>


 86%|████████▌ | 6/7 [01:15<00:13, 13.59s/it]

Batch type: <class 'dict'>


100%|██████████| 7/7 [01:19<00:00, 11.38s/it]

{'accuracy': 0.64}





In [32]:
evaluate_hatebert_with_bias(model_hatebert, test_dataloader, device)

Batch type: <class 'dict'>
Batch type: <class 'dict'>
Batch type: <class 'dict'>
Batch type: <class 'dict'>
Batch type: <class 'dict'>
Batch type: <class 'dict'>
Batch type: <class 'dict'>
Overall Accuracy: 0.6400
Accuracy for target group 'asian folks': 1.0000
Number of samples in group 'asian folks': 1
Confusion Matrix for 'asian folks':
[[1]]

Accuracy for target group 'black folks / african-americans': 0.5000
Number of samples in group 'black folks / african-americans': 2
Confusion Matrix for 'black folks / african-americans':
[[1 0]
 [1 0]]

Accuracy for target group 'black/african-american folks': 0.2500
Number of samples in group 'black/african-american folks': 4
Confusion Matrix for 'black/african-american folks':
[[1 0]
 [3 0]]

Accuracy for target group 'chinese folks': 1.0000
Number of samples in group 'chinese folks': 3
Confusion Matrix for 'chinese folks':
[[3]]

Accuracy for target group 'folks with mental disabilities': 0.2500
Number of samples in group 'folks with menta



0.64