In [1]:
!pip install transformers
!pip install sentencepiece
!pip install bert
!pip install accelerate -U
!pip install evaluate
!pip install datasets



In [2]:
import pandas as pd
import numpy as np
from transformers import AutoTokenizer, TFDistilBertForMultipleChoice, AdamW
import tensorflow as tf
import torch

In [3]:
!pip install tensorflow==2.14



In [4]:
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
model = TFDistilBertForMultipleChoice.from_pretrained("distilbert-base-uncased")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFDistilBertForMultipleChoice: ['vocab_transform.bias', 'vocab_transform.weight', 'vocab_layer_norm.bias', 'vocab_layer_norm.weight', 'vocab_projector.bias']
- This IS expected if you are initializing TFDistilBertForMultipleChoice from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFDistilBertForMultipleChoice from a PyTorch model that you 

In [5]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [6]:
training_data = pd.read_csv('train_data.csv')

In [7]:
testing_data = pd.read_csv('test_data.csv')

In [8]:
def finding_labels():
  labels = []
  for index, item in training_data.iterrows():
    if item['label_A'] == "stereotype":
      labels.append(0)
    elif item['label_B'] == "stereotype":
      labels.append(1)
    else:
      labels.append(2)
  return labels

In [9]:
label = finding_labels()

In [10]:
first, second = [], []
sentence_type = ['sentence_A', 'sentence_B', 'sentence_C']
for index, item in training_data.iterrows():
  question = item['context'][:-1] if item['context'][-1] == '.' else item['context']
  question = "The stereotypical reason for this statement: " + question + " is "
  first_sentence = [question for i in range(3)]
  first = first + first_sentence
  second_sentence = [f"{question} {item[i]}" for i in sentence_type]
  second.append(second_sentence)

second = sum(second, [])
tokenized_examples = tokenizer(first, second, truncation=True)
element = {k: [v[i : i + 3] for i in range(0, len(v), 3)] for k, v in tokenized_examples.items()}

In [11]:
from dataclasses import dataclass
from transformers.tokenization_utils_base import PreTrainedTokenizerBase, PaddingStrategy
from typing import Optional, Union
import tensorflow as tf


@dataclass
class DataCollatorForMultipleChoice:
    """
    Data collator that will dynamically pad the inputs for multiple choice received.
    """

    tokenizer: PreTrainedTokenizerBase
    padding: Union[bool, str, PaddingStrategy] = True
    max_length: Optional[int] = None
    pad_to_multiple_of: Optional[int] = None

    def __call__(self, features):
        label_name = "label" if "label" in features[0].keys() else "labels"
        labels = [feature.pop(label_name) for feature in features]
        batch_size = len(features)
        num_choices = len(features[0]["input_ids"])
        flattened_features = [
            [{k: v[i] for k, v in feature.items()} for i in range(num_choices)] for feature in features
        ]
        flattened_features = sum(flattened_features, [])

        batch = self.tokenizer.pad(
            flattened_features,
            padding=self.padding,
            max_length=self.max_length,
            pad_to_multiple_of=self.pad_to_multiple_of,
            return_tensors="tf",
        )

        batch = {k: tf.reshape(v, (batch_size, num_choices, -1)) for k, v in batch.items()}
        batch["labels"] = tf.convert_to_tensor(labels, dtype=tf.int64)
        return batch

In [12]:
import pandas as pd
from torch.utils.data import Dataset

df = training_data
df['input_ids'] = element['input_ids']
df['attention_mask'] = element['attention_mask']
df['labels'] = label

In [13]:
import datasets
from datasets import Dataset, DatasetDict

In [14]:
from sklearn.model_selection import train_test_split

df_train, df_valid = train_test_split(df, test_size=0.2, random_state=42)

In [15]:
td_train = Dataset.from_pandas(df_train)
td_valid = Dataset.from_pandas(df_valid)

In [16]:
ds = DatasetDict()

ds['train'] = td_train
ds['validation'] = td_valid

In [17]:
import evaluate

accuracy = evaluate.load("accuracy")

In [18]:
import numpy as np

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return accuracy.compute(predictions=predictions, references=labels)

In [19]:
from transformers import create_optimizer

batch_size = 16
num_train_epochs = 5
total_train_steps = (len(ds["train"]) // batch_size) * num_train_epochs
optimizer, schedule = create_optimizer(init_lr=10e-5, num_warmup_steps=0, num_train_steps=total_train_steps)

In [20]:
from transformers import TFAutoModelForMultipleChoice

model = TFAutoModelForMultipleChoice.from_pretrained("distilbert-base-uncased")

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFDistilBertForMultipleChoice: ['vocab_transform.bias', 'vocab_transform.weight', 'vocab_layer_norm.bias', 'vocab_layer_norm.weight', 'vocab_projector.bias']
- This IS expected if you are initializing TFDistilBertForMultipleChoice from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFDistilBertForMultipleChoice from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model TFDistilBertForMultipleChoice were not initialized from the PyTorch model and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a d

In [21]:
data_collator = DataCollatorForMultipleChoice(tokenizer=tokenizer)
tf_train_set = model.prepare_tf_dataset(
    ds["train"],
    shuffle=True,
    batch_size=batch_size,
    collate_fn=data_collator,
)

tf_validation_set = model.prepare_tf_dataset(
    ds["validation"],
    shuffle=False,
    batch_size=batch_size,
    collate_fn=data_collator,
)

You're using a DistilBertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


In [22]:
model.compile(optimizer=optimizer)  # No loss argument!

In [23]:
from transformers.keras_callbacks import KerasMetricCallback

metric_callback = KerasMetricCallback(metric_fn=compute_metrics, eval_dataset=tf_validation_set)

In [24]:
from transformers.keras_callbacks import PushToHubCallback

push_to_hub_callback = PushToHubCallback(
    output_dir="my_distilbert_ft_model_2",
    tokenizer=tokenizer,
)

For more details, please read https://huggingface.co/docs/huggingface_hub/concepts/git_vs_http.
Cloning https://huggingface.co/yinani24/my_distilbert_ft_model_2 into local empty directory.


Download file tf_model.h5:   0%|          | 8.00k/256M [00:00<?, ?B/s]

Clean file tf_model.h5:   0%|          | 1.00k/256M [00:00<?, ?B/s]

In [25]:
callbacks = [metric_callback, push_to_hub_callback]

In [26]:
model.fit(x=tf_train_set, validation_data=tf_validation_set, epochs=4, callbacks=callbacks)

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<keras.src.callbacks.History at 0x7939079d2080>

In [27]:
from transformers import AutoTokenizer, TFAutoModelForMultipleChoice

In [28]:
tokenizer_new = AutoTokenizer.from_pretrained("my_distilbert_ft_model_2")
model_new = TFAutoModelForMultipleChoice.from_pretrained("my_distilbert_ft_model_2")

Some layers from the model checkpoint at my_distilbert_ft_model_2 were not used when initializing TFDistilBertForMultipleChoice: ['dropout_39']
- This IS expected if you are initializing TFDistilBertForMultipleChoice from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFDistilBertForMultipleChoice from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some layers of TFDistilBertForMultipleChoice were not initialized from the model checkpoint at my_distilbert_ft_model_2 and are newly initialized: ['dropout_59']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [33]:
data_store = pd.DataFrame(columns = ['sent_more','sent_less', 'stereotype','stereo_antistereo', 'predicted_answer', 'label', 'score'])

In [35]:
import random

In [36]:
test_label = []
for index, item in testing_data.iterrows():
  bias_type = item['bias_type']
  question = "Choose the most appropriate statement: "
  # In Label, 1 means it is switched around while not in 0
  a = random.randint(0,1)
  test_label.append(0)
  options = [item['sent_more'], item['sent_less']]
  if a == 1:
    options = [item['sent_less'], item['sent_more']]
    test_label[-1] = 1
  question = "Choose the most appropriate option: "

  encoded_input = tokenizer_new([[question, options[0]],
                          [question, options[1]]], return_tensors="pt", padding=True)
  inputs = {k: tf.expand_dims(v, 0) for k, v in encoded_input.items()}
  outputs = model(inputs)  # batch size is 1
  logits = outputs.logits

  predicted_class = tf.argmax(logits, axis=1).numpy()[0]
  predicted_answer = options[predicted_class]

  score = 0
  if predicted_answer == item['sent_more']:
    score = 1

  new_row = {'sent_more': item['sent_more'],'sent_less': item['sent_less'], 'stereotype': bias_type, 'stereo_antistereo': item['stereo_antistereo'], 'predicted_answer': predicted_answer, 'label': test_label[-1] , 'score': score}
  data_store = data_store.append(new_row, ignore_index=True)

  data_store = data_store.append(new_row, ignore_index=True)
  data_store = data_store.append(new_row, ignore_index=True)
  data_store = data_store.append(new_row, ignore_index=True)
  data_store = data_store.append(new_row, ignore_index=True)
  data_store = data_store.append(new_row, ignore_index=True)
  data_store = data_store.append(new_row, ignore_index=True)
  data_store = data_store.append(new_row, ignore_index=True)
  data_store = data_store.append(new_row, ignore_index=True)
  data_store = data_store.append(new_row, ignore_index=True)
  data_store = data_store.append(new_row, ignore_index=True)
  data_store = data_store.append(new_row, ignore_index=True)
  data_store = data_store.append(new_row, ignore_index=True)
  data_store = data_store.append(new_row, ignore_index=True)
  data_store = data_store.append(new_row, ignore_index=True)
  data_store = data_store.append(new_row, ignore_index=True)
  data_store = data_store.append(new_row, ignore_index=True)
  data_store = data_stor

In [37]:
data_store.to_csv('output_distilbert_stereoset_ft_crows_test_bias.csv',index=False)

In [38]:
print(data_store['score'].value_counts().get(0, 0))
print(data_store['score'].value_counts().get(1, 1))

614
822
