In [None]:
!git clone https://github.com/SoumyaBarikeri/debias_transformers.git


Cloning into 'debias_transformers'...
remote: Enumerating objects: 44565, done.[K
remote: Counting objects: 100% (10/10), done.[K
remote: Compressing objects: 100% (7/7), done.[K
remote: Total 44565 (delta 4), reused 3 (delta 3), pack-reused 44555 (from 1)[K
Receiving objects: 100% (44565/44565), 38.49 MiB | 9.30 MiB/s, done.
Resolving deltas: 100% (30346/30346), done.


In [None]:
import os
os.chdir('/content/debias_transformers')


In [None]:
!pip install -e .


Obtaining file:///content/debias_transformers
  Installing build dependencies ... [?25l[?25hdone
  Checking if build backend supports build_editable ... [?25l[?25hdone
  Getting requirements to build editable ... [?25l[?25hdone
  Preparing editable metadata (pyproject.toml) ... [?25l[?25hdone
Collecting tokenizers==0.8.1.rc2 (from transformers==3.3.0)
  Using cached tokenizers-0.8.1rc2.tar.gz (97 kB)
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting sacremoses (from transformers==3.3.0)
  Using cached sacremoses-0.1.1-py3-none-any.whl.metadata (8.3 kB)
Using cached sacremoses-0.1.1-py3-none-any.whl (897 kB)
Building wheels for collected packages: transformers, tokenizers
  Building editable for transformers (pyproject.toml) ... [?25l[?25hdone
  Created wheel for transformers: filename=transformers-3.3.0-0.editable-py3-none-any.whl size=9194 sha25

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# List files in your Google Drive
data_path = '/content/drive/MyDrive/Reddit-Data'
print(os.listdir(data_path))

['reddit_comments_gender_female_raw_3.csv', 'gender_female.txt', 'reddit_comments_gender_female_raw_2.csv', 'reddit_comments_gender_female_raw_0.csv', 'reddit_comments_gender_female_raw_5.csv', 'reddit_comments_gender_female_raw_4.csv', 'reddit_comments_gender_female_raw_1.csv', 'gender', 'text_files', 'reddit_comments_race_black_raw_4.csv', 'reddit_comments_race_black_raw_3.csv', 'reddit_comments_race_black_raw_1.csv', 'reddit_comments_race_black_raw_0.csv', 'reddit_comments_race_black_raw_2.csv', 'race', 'race_black.txt', 'reddit_comments_orientation_lgbtq_raw_3.csv', 'reddit_comments_orientation_lgbtq_raw_4.csv', 'reddit_comments_orientation_lgbtq_raw_2.csv', 'reddit_comments_orientation_lgbtq_raw_0.csv', 'reddit_comments_orientation_lgbtq_raw_1.csv', 'reddit_comments_orientation_lgbtq_merged.csv', 'orientation', 'orientation_lgbtq.txt', 'reddit_comments_race_black_merged.csv', 'reddit_comments_gender_female_merged.csv', 'models', 'race_bias_manual_swapped_attr_train.txt']


In [None]:
# Example: Load the gender training data file
gender_train_file = data_path+'/text_files/gender/gender_bias_manual_train.txt'
gender_bias_manual_swapped_targets_train_file='/content/drive/MyDrive/Reddit-Data/text_files/gender/gender_bias_manual_swapped_targets_train.txt'
# Read the contents of the file
with open(gender_train_file, 'r') as file:
    gender_data = file.readlines()

# Print the first few lines
print(gender_data[:5])


['the dance in that vine where that girl is dancing to the fire alarm\n', 'gonna bang her as soon as your mom is done cleaning her snizz off my\n', 'insinuating that hug from mom is not better than shakespeare. gabdisgust\n', 'doras supposedly-well-endowed mom is comparable to  tai in modern\n', 'dance moms are literally demons in honda civics source\n']


In [None]:
# coding=utf-8

import logging
import math
from transformers import (
    AutoConfig,
    AutoModelWithLMHead,
    AutoTokenizer,
    DataCollatorForLanguageModeling,
    Trainer,
    set_seed,
    TrainingArguments
)

logger = logging.getLogger(__name__)

def main():
    # Hardcoded arguments
    output_dir = "/content/debias_transformers/models/gender/lm_loss_swapped_target/"
    model_type = "gpt2"
    model_name_or_path = "microsoft/DialoGPT-small"
    config_name = "microsoft/DialoGPT-small"
    tokenizer_name = "microsoft/DialoGPT-small"
    train_data_file = gender_bias_manual_swapped_targets_train_file
    eval_data_file = "/content/drive/MyDrive/Reddit-Data/text_files/humanref6k.txt"

    num_train_epochs = 2.0
    save_total_limit = 2
    logging_steps = 2000
    save_steps = 2000
    per_device_train_batch_size = 2
    per_device_eval_batch_size = 2
    block_size = 36
    gradient_accumulation_steps = 1
    seed = 42

    line_by_line = True
    force_pad_token = True
    overwrite_output_dir = True

    # Set seed
    set_seed(seed)

    # Load pretrained model and tokenizer
    config = AutoConfig.from_pretrained(config_name, cache_dir=None)
    tokenizer = AutoTokenizer.from_pretrained(tokenizer_name, cache_dir=None)
    model = AutoModelWithLMHead.from_pretrained(
        model_name_or_path, from_tf=bool(".ckpt" in model_name_or_path), config=config
    )

    if force_pad_token:
        special_tokens_dict = {'bos_token': '<bos>', 'eos_token': '<eos>', 'pad_token': '<pad>'}
        tokenizer.add_special_tokens(special_tokens_dict)
        model.resize_token_embeddings(len(tokenizer))

    # Load datasets
    from transformers import LineByLineTextDataset
    train_dataset = LineByLineTextDataset(
        tokenizer=tokenizer, file_path=train_data_file, block_size=block_size
    )
    eval_dataset = LineByLineTextDataset(
        tokenizer=tokenizer, file_path=eval_data_file, block_size=block_size
    )

    # Data collator
    data_collator = DataCollatorForLanguageModeling(
        tokenizer=tokenizer, mlm=False
    )

    # Initialize trainer
    training_args = TrainingArguments(
        output_dir=output_dir,
        overwrite_output_dir=overwrite_output_dir,
        num_train_epochs=num_train_epochs,
        per_device_train_batch_size=per_device_train_batch_size,
        per_device_eval_batch_size=per_device_eval_batch_size,
        save_steps=save_steps,
        logging_steps=logging_steps,
        save_total_limit=save_total_limit,
        gradient_accumulation_steps=gradient_accumulation_steps,
        report_to="none",  # Disable W&B logging
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        data_collator=data_collator,
        train_dataset=train_dataset,
        eval_dataset=eval_dataset,
    )

    # Training
    trainer.train()
    trainer.save_model()
    tokenizer.save_pretrained(output_dir)

    # Evaluation
    eval_output = trainer.evaluate()
    perplexity = math.exp(eval_output["eval_loss"])
    print(f"Perplexity: {perplexity}")


main()


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/641 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/614 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]



model.safetensors:   0%|          | 0.00/351M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

The new embeddings will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`
`loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`.


Step,Training Loss
2000,3.2889


Perplexity: 675.6247879955285


In [None]:
import pandas as pd
import numpy as np
from scipy import stats
from transformers import (
    AutoModelForMaskedLM,
    AutoModelForCausalLM,
    AutoTokenizer,
)
import time
import logging
import math
import torch

# Hardcoded arguments
data_path = "/content/drive/MyDrive/Reddit-Data/"
log_path = "/content/drive/MyDrive/Reddit-Data/"
GET_PERPLEXITY = "yes"
SAVE_PERPLEXITY = "yes"
demo = "gender"
demo_1 = "female"
demo_2 = "male"
input_file_1 = "reddit_comments_gender_female_processed_phrase_biased_testset_reduced.csv"
input_file_2 = "reddit_comments_gender_male_processed_phrase_biased_testset_reduced.csv"
output_file_1 = "reddit_comments_gender_female_with_perplexity.csv"
output_file_2 = "reddit_comments_gender_male_with_perplexity.csv"
pretrained_model = "/content/debias_transformers/models/gender/lm_loss_swapped_target/"
debiasing_head = None
model_name = "lm_loss_swapped_target"

pd.set_option("max_colwidth", 600)
pd.options.display.max_columns = 10
logging.basicConfig(
    filename=log_path + "measure_bias_" + demo + ".log",
    filemode="a",
    level=logging.DEBUG,
    format="%(asctime)s %(message)s",
)

logging.info("Evaluating bias for model: {}".format(model_name))

# Perplexity functions
def perplexity_score(sentence):
    global model, tokenizer
    with torch.no_grad():
        model.eval()
        tokenize_input = tokenizer.tokenize(sentence)
        tensor_input = torch.tensor([tokenizer.convert_tokens_to_ids(tokenize_input)])
        loss = model(tensor_input, labels=tensor_input)
        return math.exp(loss[0])


def get_perplexity_list(df):
    perplexity_list = []
    for idx, row in df.iterrows():
        try:
            perplexity = perplexity_score(row["comments_processed"])
        except Exception as ex:
            logging.error(f"Error in perplexity calculation: {ex}")
            perplexity = 0
        perplexity_list.append(perplexity)
    return perplexity_list


# Main execution
start = time.time()

if GET_PERPLEXITY == "yes":
    logging.info("Calculating perplexity")
    gender_df = pd.read_csv(data_path + demo + "/" + input_file_1)
    gender_df_2 = pd.read_csv(data_path + demo + "/" + input_file_2)
    tokenizer = AutoTokenizer.from_pretrained(pretrained_model)

    if debiasing_head:
        logging.info("Loading debiased model..")
        model = AutoModelForMaskedLM.from_pretrained(pretrained_model, debiasing_head=debiasing_head)
    else:
        if "bert" in pretrained_model.lower():
            logging.info("Using BERT-based model")
            model = AutoModelForMaskedLM.from_pretrained(pretrained_model)
        elif "gpt" in pretrained_model.lower():
            logging.info("Using GPT-based model")
            model = AutoModelForCausalLM.from_pretrained(pretrained_model)
        else:
            logging.info("Using default causal language model")
            model = AutoModelForCausalLM.from_pretrained(pretrained_model)

    gender_1_perplexity = get_perplexity_list(gender_df)
    logging.info("Completed demo1 perplexity in {:.2f} minutes".format((time.time() - start) / 60))
    gender_2_perplexity = get_perplexity_list(gender_df_2)
    logging.info("Completed demo2 perplexity in {:.2f} minutes".format((time.time() - start) / 60))

    gender_df["perplexity"] = gender_1_perplexity
    gender_df_2["perplexity"] = gender_2_perplexity

    if SAVE_PERPLEXITY == "yes":
        logging.info("Saving perplexity results to files.")
        gender_df.to_csv(data_path + demo + "/" + output_file_1, index=False)
        gender_df_2.to_csv(data_path + demo + "/" + output_file_2, index=False)
else:
    logging.info("Loading saved perplexities")
    gender_df = pd.read_csv(data_path + demo + "/" + output_file_1)
    gender_df_2 = pd.read_csv(data_path + demo + "/" + output_file_2)
    gender_1_perplexity = gender_df["perplexity"]
    gender_2_perplexity = gender_df_2["perplexity"]

logging.info(
    "Mean and variance for demo1 perplexities: Mean = {:.4f}, Variance = {:.4f}".format(
        np.mean(gender_1_perplexity), np.var(gender_1_perplexity)
    )
)
logging.info(
    "Mean and variance for demo2 perplexities: Mean = {:.4f}, Variance = {:.4f}".format(
        np.mean(gender_2_perplexity), np.var(gender_2_perplexity)
    )
)

logging.info("Test samples: demo1 = {}, demo2 = {}".format(len(gender_1_perplexity), len(gender_2_perplexity)))

# Statistical testing
t_value, p_value = stats.ttest_ind(gender_1_perplexity, gender_2_perplexity, equal_var=False)
logging.info("Unpaired t-test: t-value = {:.4f}, p-value = {:.4f}".format(t_value, p_value))

t_paired, p_paired = stats.ttest_rel(gender_df["perplexity"].tolist(), gender_df_2["perplexity"].tolist())
logging.info("Paired t-test: t-value = {:.4f}, p-value = {:.4f}".format(t_paired, p_paired))

# Logging output to both file and console
console = logging.StreamHandler()
console.setLevel(logging.INFO)
formatter = logging.Formatter("%(asctime)s %(message)s")
console.setFormatter(formatter)
logging.getLogger().addHandler(console)

# Print final results to console
print("\n--- Final Results ---")
print(f"Mean and variance for {demo_1} perplexities: Mean = {np.mean(gender_1_perplexity):.4f}, Variance = {np.var(gender_1_perplexity):.4f}")
print(f"Mean and variance for {demo_2} perplexities: Mean = {np.mean(gender_2_perplexity):.4f}, Variance = {np.var(gender_2_perplexity):.4f}")

print(f"Unpaired t-test: t-value = {t_value:.4f}, p-value = {p_value:.4f}")
print(f"Paired t-test: t-value = {t_paired:.4f}, p-value = {p_paired:.4f}")

print(f"Test samples: {demo_1} = {len(gender_1_perplexity)}, {demo_2} = {len(gender_2_perplexity)}")
print("\n----------------------")



--- Final Results ---
Mean and variance for female perplexities: Mean = 284.0309, Variance = 324722.9097
Mean and variance for male perplexities: Mean = 293.0374, Variance = 318604.4102
Unpaired t-test: t-value = -0.3348, p-value = 0.7378
Paired t-test: t-value = -2.2697, p-value = 0.0235
Test samples: female = 890, male = 890

----------------------
