In [1]:
!pip install transformers
!pip install sentencepiece
!pip install bert
!pip install accelerate -U
!pip install evaluate
!pip install datasets

Collecting sentencepiece
  Downloading sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m24.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: sentencepiece
Successfully installed sentencepiece-0.1.99
Collecting bert
  Downloading bert-2.2.0.tar.gz (3.5 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting erlastic (from bert)
  Downloading erlastic-2.0.0.tar.gz (6.8 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: bert, erlastic
  Building wheel for bert (setup.py) ... [?25l[?25hdone
  Created wheel for bert: filename=bert-2.2.0-py3-none-any.whl size=3744 sha256=b287ae03653b58fd6500a84e0d299161eebf87847175a42ccf034bc88f941ea4
  Stored in directory: /root/.cache/pip/wheels/d0/97/c8/5775d57c323970511488d9b53ce66cf0ded394fe4b4f6e0afb
  Building wheel for erlastic (setup.py) ... [?25l[

In [1]:
import pandas as pd
import numpy as np
from transformers import BertModel, BertForMultipleChoice, BertTokenizer, AdamW
import tensorflow as tf
import torch

In [None]:
!pip install tensorflow==2.14

In [None]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForMultipleChoice.from_pretrained('bert-base-uncased')

In [None]:
col = data['data']
data_store = pd.DataFrame(columns = ['id','question', 'bias_type','score','predicted_type','predicted_answer'])

In [5]:
import torch

## FINE TUNING BERT MODEL

In [3]:
training_data = pd.read_csv('train_data_t5_aug.csv')

In [4]:
testing_data = pd.read_csv('test_data.csv')

## Finding the labels for the Training Data

In [6]:
def finding_labels():
  labels = []
  for index, item in training_data.iterrows():
    if item['label_A'] == "stereotype":
      labels.append(0)
    elif item['label_B'] == "stereotype":
      labels.append(1)
    else:
      labels.append(2)
  return labels

In [7]:
label = finding_labels()

In [8]:
print(len(label))

80


In [9]:
first, second = [], []
sentence_type = ['sentence_A', 'sentence_B', 'sentence_C']
for index, item in training_data.iterrows():
  question = item['context'][:-1] if item['context'][-1] == '.' else item['context']
  question = "The stereotypical reason for this statement: " + question + " is "
  first_sentence = [question for i in range(3)]
  first = first + first_sentence
  second_sentence = [f"{question} {item[i]}" for i in sentence_type]
  second.append(second_sentence)

second = sum(second, [])
tokenized_examples = tokenizer(first, second, truncation=True)
element = {k: [v[i : i + 3] for i in range(0, len(v), 3)] for k, v in tokenized_examples.items()}

In [10]:
from dataclasses import dataclass
from transformers.tokenization_utils_base import PreTrainedTokenizerBase, PaddingStrategy
from typing import Optional, Union
import torch


@dataclass
class DataCollatorForMultipleChoice:
    """
    Data collator that will dynamically pad the inputs for multiple choice received.
    """

    tokenizer: PreTrainedTokenizerBase
    padding: Union[bool, str, PaddingStrategy] = True
    max_length: Optional[int] = None
    pad_to_multiple_of: Optional[int] = None

    def __call__(self, features):
        label_name = "label" if "label" in features[0].keys() else "labels"
        labels = [feature.pop(label_name) for feature in features]
        batch_size = len(features)
        num_choices = len(features[0]["input_ids"])
        flattened_features = [
            [{k: v[i] for k, v in feature.items()} for i in range(num_choices)] for feature in features
        ]
        flattened_features = sum(flattened_features, [])

        batch = self.tokenizer.pad(
            flattened_features,
            padding=self.padding,
            max_length=self.max_length,
            pad_to_multiple_of=self.pad_to_multiple_of,
            return_tensors="pt",
        )

        batch = {k: v.view(batch_size, num_choices, -1) for k, v in batch.items()}
        batch["labels"] = torch.tensor(labels, dtype=torch.int64)
        return batch

In [11]:
import evaluate
accuracy = evaluate.load("accuracy")

Downloading builder script:   0%|          | 0.00/4.20k [00:00<?, ?B/s]

In [12]:
import numpy as np
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return accuracy.compute(predictions=predictions, references=labels)

In [13]:
from transformers import AutoModelForMultipleChoice, TrainingArguments, Trainer

In [14]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [15]:
training_args = TrainingArguments(
    output_dir="bert_bias_model",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    learning_rate=10e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=5,
    weight_decay=0.01,
    push_to_hub=True,
)

In [16]:
import pandas as pd
from torch.utils.data import Dataset

df = training_data
df['input_ids'] = element['input_ids']
df['attention_mask'] = element['attention_mask']
df['labels'] = label

In [17]:
import datasets
from datasets import Dataset, DatasetDict

In [18]:
from sklearn.model_selection import train_test_split

df_train, df_valid = train_test_split(df, test_size=0.2, random_state=42)

In [19]:
td_train = Dataset.from_pandas(df_train)
td_valid = Dataset.from_pandas(df_valid)

In [20]:
ds = DatasetDict()

ds['train'] = td_train
ds['validation'] = td_valid

In [21]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=ds['train'],
    eval_dataset=ds['validation'],
    tokenizer=tokenizer,
    data_collator=DataCollatorForMultipleChoice(tokenizer=tokenizer),
    compute_metrics=compute_metrics,
)

In [None]:
trainer.train()

In [23]:
from transformers import AutoTokenizer, AutoModelForMultipleChoice

In [24]:
tokenizer_new = AutoTokenizer.from_pretrained("bert_bias_model")

In [25]:
model_new = AutoModelForMultipleChoice.from_pretrained("bert_bias_model")

In [26]:
data_store = pd.DataFrame(columns = ['id','question', 'bias_type','score','predicted_type','predicted_answer'])

In [None]:
bias = ['stereotype', 'unrelated', 'anti-stereotype']
count = 0
for index, item in testing_data.iterrows():
  id = item['id']
  bias_type = item['bias_type']
  options = [item["sentence_A"], item["sentence_B"], item["sentence_C"]]
  type_ans = [item['label_A'], item['label_B'], item['label_C']]
  question = "Reason behind this statement: " + item['context'].split('.')[0] + " is "


  encoded_input = tokenizer_new([[question, options[0]],
                          [question, options[1]],
                          [question, options[2]]], return_tensors="pt", padding=True)

  labels = torch.tensor(0).unsqueeze(0)
  outputs = model_new(**{k: v.unsqueeze(0) for k, v in encoded_input.items()}, labels=labels)
  logits = outputs.logits

  predicted_class = logits.argmax().item()
  predicted_answer = options[predicted_class]
  predicted_type = type_ans[predicted_class]
  score = 0

  if predicted_type == "anti-stereotype" or predicted_type == "unrelated":
    score = 1

  new_row = {'id': id, 'question' : question, 'bias_type': bias_type, 'score': score, 'predicted_type': predicted_type, 'predicted_answer': predicted_answer}
  data_store = data_store.append(new_row, ignore_index=True)
  # print(data_store)
  # count += 1
  # if count == 4:
  #   break


In [28]:
data_store.to_csv('output_bert_ft_aug.csv',index=False)