# Finetuning HateBERT (Topics)

This notebook fine tunes HateBERT on topics generated from the NLPositionality Toxity and Hate Speech dataset.

Pretrained HateBERT: https://huggingface.co/tomh/toxigen_hatebert

NLPositionality: https://nlpositionality.cs.washington.edu/toxicity/

Runs available on Kaggle:

https://www.kaggle.com/code/haniyehehsani/toxigen-topics

In [None]:
!pip install accelerate -U
!pip install datasets
!pip install evaluate
!pip install transformers
!pip install transformers[torch]

Collecting accelerate
  Downloading accelerate-0.27.2-py3-none-any.whl (279 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/280.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m [32m276.5/280.0 kB[0m [31m9.0 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m280.0/280.0 kB[0m [31m7.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: accelerate
Successfully installed accelerate-0.27.2
Collecting datasets
  Downloading datasets-2.18.0-py3-none-any.whl (510 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m510.5/510.5 kB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m17.8 MB/s[0m eta [36m0:00:00[0m
Collecting multiprocess (from datasets)
  Downloadi

In [None]:
from google.colab import drive
import sys

drive.mount('/content/drive')
%cd /content/drive/My Drive/247 Project

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/.shortcut-targets-by-id/11dCuBITl5umJqjJki52-YAId8zeeW9i7/247 Project


In [None]:
%ls

 christina-finetuning-hatebert.ipynb
'Christina marg-finetuning-tweetbert.ipynb'
'Claire Christina marg-finetuning-tweetbert.ipynb'
 claire-finetuning-tweetbert.ipynb
 copy-marg-load-for-csv.ipynb
'Copy of Claire Christina marg-finetuning-tweetbert.ipynb'
'Copy of Claire Copy of Copy of Copy of marg-finetuning-tweetbert (1).ipynb'
'Copy of Claire Copy of Copy of Copy of marg-finetuning-tweetbert.ipynb'
'Copy of Copy of marg-finetuning-tweetbert.ipynb'
'Copy of marg-finetuning-tweetbert.ipynb'
'CS 247 Final Presentation.gslides'
'CS 247 Project Results.gsheet'
 [0m[01;34mdata[0m/
 evp-marg-finetuning-tweetbert-full.ipynb
 finetuning-hatebert-full.ipynb
 finetuning-hatebert-topics.ipynb
 lda-split.ipynb
 lda-topic-modeling.ipynb
 marg-finetuning-tweetbert-full.ipynb
 marg-finetuning-tweetbert.ipynb
 marg-load-for-csv.ipynb
 [01;34mmodels[0m/
 nlpositionality-analysis.ipynb
'Project Ideas.gdoc'
'Project Proposal.gdoc'
 [01;34mresults[0m/
 [01;34mruns[0m/
 tech-marg-finetuning-twe

In [None]:
import nltk
import torch
import numpy as np
import pandas as pd

In [None]:
torch.manual_seed(0)

<torch._C.Generator at 0x78a4f6405530>

In [None]:
# shift the labels by one since we aren't able to have negtaive labels for training

def shift(example):
    example["labels"] = 1 + example["labels"]
    return example

In [None]:
from datasets import load_dataset, Dataset, DatasetDict

# Load train, test and validation sets for each topic

train_datasets = []
test_datasets = []
val_datasets = []


num_topics = 3
for t in range(num_topics):
  train_data = pd.read_csv(f'data/toxicity_processed_train_topic_{t}.csv')
  train_data['labels'] = train_data['litw'].astype(int)
  train_data = Dataset.from_pandas(train_data)
  train_data = train_data.map(shift)
  train_datasets.append(train_data)

  val_data = pd.read_csv(f'data/toxicity_processed_val_topic_{t}.csv')
  val_data['labels'] = val_data['litw'].astype(int)
  val_data = Dataset.from_pandas(val_data)
  val_data = val_data.map(shift)
  val_datasets.append(val_data)

  test_data = pd.read_csv(f'data/toxicity_processed_test_topic_{t}.csv')
  test_data['labels'] = test_data['litw'].astype(int)
  test_data = Dataset.from_pandas(test_data)
  test_data = test_data.map(shift)
  test_datasets.append(test_data)


Map:   0%|          | 0/1596 [00:00<?, ? examples/s]

Map:   0%|          | 0/168 [00:00<?, ? examples/s]

Map:   0%|          | 0/433 [00:00<?, ? examples/s]

Map:   0%|          | 0/1839 [00:00<?, ? examples/s]

Map:   0%|          | 0/223 [00:00<?, ? examples/s]

Map:   0%|          | 0/549 [00:00<?, ? examples/s]

Map:   0%|          | 0/1672 [00:00<?, ? examples/s]

Map:   0%|          | 0/177 [00:00<?, ? examples/s]

Map:   0%|          | 0/437 [00:00<?, ? examples/s]

In [None]:
train_data[0]["labels"]

2

In [None]:
# Setup tokenizer
from transformers import AutoModelForSequenceClassification, AutoTokenizer


tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
tokenizer.pad_token = tokenizer.eos_token

hatebert = AutoModelForSequenceClassification.from_pretrained("tomh/toxigen_hatebert", num_labels=3, ignore_mismatched_sizes=True)
if tokenizer.pad_token is None:
  tokenizer.add_special_tokens({'pad_token': '[PAD]'})

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at tomh/toxigen_hatebert and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([2, 768]) in the checkpoint and torch.Size([3, 768]) in the model instantiated
- classifier.bias: found shape torch.Size([2]) in the checkpoint and torch.Size([3]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
def tokenize_function(examples):
    return tokenizer(examples["action"], padding="max_length", truncation=True)

In [None]:
# Tokenize train, test and validation sets for each topic

tokenized_trains = [train_data.map(tokenize_function, batched=True) for train_data in train_datasets]
tokenized_vals = [val_data.map(tokenize_function, batched=True) for val_data in val_datasets]
tokenized_tests = [test_data.map(tokenize_function, batched=True) for test_data in test_datasets]


Map:   0%|          | 0/1596 [00:00<?, ? examples/s]

Map:   0%|          | 0/1839 [00:00<?, ? examples/s]

Map:   0%|          | 0/1672 [00:00<?, ? examples/s]

Map:   0%|          | 0/168 [00:00<?, ? examples/s]

Map:   0%|          | 0/223 [00:00<?, ? examples/s]

Map:   0%|          | 0/177 [00:00<?, ? examples/s]

Map:   0%|          | 0/433 [00:00<?, ? examples/s]

Map:   0%|          | 0/549 [00:00<?, ? examples/s]

Map:   0%|          | 0/437 [00:00<?, ? examples/s]

In [None]:
# Use litw as the target
tokenized_trains = [train_tokens.remove_columns(['litw', 'action', 'dynahate', 'perspective', 'rewire', 'hateroberta', 'gpt4', 'gender', 'ethnicity', 'annotator_id']) for train_tokens in tokenized_trains]
tokenized_vals = [val_tokens.remove_columns(['litw','action', 'dynahate', 'perspective', 'rewire', 'hateroberta', 'gpt4', 'gender', 'ethnicity', 'annotator_id']) for val_tokens in tokenized_vals]
tokenized_tests = [test_tokens.remove_columns(['litw','action', 'dynahate', 'perspective', 'rewire', 'hateroberta', 'gpt4', 'gender', 'ethnicity', 'annotator_id']) for test_tokens in tokenized_tests]

In [None]:
# Set format to torch
for i in range(3):
    tokenized_trains[i].set_format("torch")
    tokenized_vals[i].set_format("torch")
    tokenized_tests[i].set_format("torch")

In [None]:
model = hatebert
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
model = model.to(device)

In [None]:
for name, param in model.named_parameters():
  print(str(name))
  if name.startswith("bert.encoder") or name.startswith("bert.embeddings"): # choose whatever you like here
    param.requires_grad = False

bert.embeddings.word_embeddings.weight
bert.embeddings.position_embeddings.weight
bert.embeddings.token_type_embeddings.weight
bert.embeddings.LayerNorm.weight
bert.embeddings.LayerNorm.bias
bert.encoder.layer.0.attention.self.query.weight
bert.encoder.layer.0.attention.self.query.bias
bert.encoder.layer.0.attention.self.key.weight
bert.encoder.layer.0.attention.self.key.bias
bert.encoder.layer.0.attention.self.value.weight
bert.encoder.layer.0.attention.self.value.bias
bert.encoder.layer.0.attention.output.dense.weight
bert.encoder.layer.0.attention.output.dense.bias
bert.encoder.layer.0.attention.output.LayerNorm.weight
bert.encoder.layer.0.attention.output.LayerNorm.bias
bert.encoder.layer.0.intermediate.dense.weight
bert.encoder.layer.0.intermediate.dense.bias
bert.encoder.layer.0.output.dense.weight
bert.encoder.layer.0.output.dense.bias
bert.encoder.layer.0.output.LayerNorm.weight
bert.encoder.layer.0.output.LayerNorm.bias
bert.encoder.layer.1.attention.self.query.weight
bert.enc

In [None]:
import numpy as np
import evaluate

metric = evaluate.load("f1")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels, average="macro")

In [None]:
from transformers import TrainingArguments, Trainer

# Setup training args for each topic and directories for saving trained models and predictions

num_topics = 3
trainer = []
model_dirs = []
result_dirs = []

for t in range(num_topics):
  training_args = TrainingArguments(output_dir=f"test_trainer/hatebert/topic{t}",
                                         num_train_epochs=70,
                                         evaluation_strategy="epoch")

  trainer.append(Trainer(model=model,
                         args=training_args,
                         train_dataset=tokenized_trains[t],
                         eval_dataset=tokenized_vals[t],
                         compute_metrics=compute_metrics))

  model_dir = f"/content/drive/My Drive/247 Project/models/hatebert/topic{t}"
  model_dirs.append(model_dir)

  result_dir = f"/content/drive/My Drive/247 Project/results/hatebert/metric_results-topic{t}.csv"
  result_dirs.append(result_dir)


In [None]:
# Train on topic 0 and save trained model

trainer[0].train()
trainer[0].save_model(model_dirs[0])

In [None]:
# Get and save predictions
predictions = trainer[0].predict(tokenized_tests[0])
preds = np.argmax(predictions.predictions, axis=-1)
metric = evaluate.load("f1")
metric.compute(predictions=preds, references=predictions.label_ids, average="macro")

In [None]:

# Assuming dataset is your Dataset object
test_dataset_dict = test_datasets[0].to_dict()  # Convert Dataset to a dictionary

# Create DataFrame from dictionary
results_df = pd.DataFrame(test_dataset_dict)

annotator_ids = test_datasets[0]["annotator_id"]

results_df['predictions'] = preds


# Save DataFrame to CSV
results_df.to_csv(result_dirs[0], index=False)


In [None]:
# Train on topic 1 and save trained model

trainer[1].train()
trainer[1].save_model(model_dirs[1])

In [None]:
# Get and save predictions
predictions = trainer[1].predict(tokenized_tests[1])
preds = np.argmax(predictions.predictions, axis=-1)
metric = evaluate.load("f1")
metric.compute(predictions=preds, references=predictions.label_ids, average="macro")

In [None]:

# Assuming dataset is your Dataset object
test_dataset_dict = test_datasets[1].to_dict()  # Convert Dataset to a dictionary

# Create DataFrame from dictionary
results_df = pd.DataFrame(test_dataset_dict)

annotator_ids = test_datasets[1]["annotator_id"]

results_df['predictions'] = preds


# Save DataFrame to CSV
results_df.to_csv(result_dirs[1], index=False)


In [None]:
# Train on topic 2 and save trained model

trainer[2].train()
trainer[2].save_model(model_dirs[2])

In [None]:
# Get and save predictions
predictions = trainer[2].predict(tokenized_tests[2])
preds = np.argmax(predictions.predictions, axis=-1)
metric = evaluate.load("f1")
metric.compute(predictions=preds, references=predictions.label_ids, average="macro")

In [None]:

# Assuming dataset is your Dataset object
test_dataset_dict = test_datasets[2].to_dict()  # Convert Dataset to a dictionary

# Create DataFrame from dictionary
results_df = pd.DataFrame(test_dataset_dict)

annotator_ids = test_datasets[2]["annotator_id"]

results_df['predictions'] = preds


# Save DataFrame to CSV
results_df.to_csv(result_dirs[2], index=False)