In [1]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from transformers import BertTokenizer,BertForSequenceClassification, BertConfig
from transformers.pipelines import pipeline
from datasets import load_dataset
import datasets

import os
import pandas as pd
import numpy as np
import torch
from tqdm import tqdm
from torch import nn
import h5py

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = "cuda:0" if torch.cuda.is_available() else "cpu"

In [3]:
ds = datasets.load_from_disk("bias_in_bios/opposite/data")

In [4]:
ds

DatasetDict({
    train: Dataset({
        features: ['hard_text', 'all_profession_id', 'gender', 'all_profession_name', 'gender_name', 'profession_name', 'profession_id'],
        num_rows: 39270
    })
    test: Dataset({
        features: ['hard_text', 'all_profession_id', 'gender', 'all_profession_name', 'gender_name', 'profession_name', 'profession_id'],
        num_rows: 15108
    })
    drift: Dataset({
        features: ['hard_text', 'all_profession_id', 'gender', 'all_profession_name', 'gender_name', 'profession_name', 'profession_id'],
        num_rows: 106242
    })
    new_unseen: Dataset({
        features: ['hard_text', 'all_profession_id', 'gender', 'all_profession_name', 'gender_name', 'profession_name', 'profession_id'],
        num_rows: 94184
    })
})

In [32]:
df_train = ds["train"].to_pandas()
df_test = ds["test"].to_pandas()
df_new_unseen = ds["new_unseen"].to_pandas()
df_drift = ds["drift"].to_pandas()

In [33]:
df_train.to_csv("bias_in_bios/opposite/data/df_train.csv")
df_test.to_csv("bias_in_bios/opposite/data/df_test.csv")
df_new_unseen.to_csv("bias_in_bios/opposite/data/df_new_unseen.csv")


In [34]:
import csv

df_drift.to_csv("bias_in_bios/opposite/data/df_drift.csv", index=False, quoting=csv.QUOTE_ALL)


In [7]:
OUTPUT_DIR = "bias_in_bios/opposite/saved_model/best_model"
CONFIG_NAME = "config.json"
WEIGHTS_NAME = "pytorch_model.bin"
BERT_MODEL = 'bert-base-uncased' # BERT model type

config = BertConfig.from_pretrained(os.path.join(OUTPUT_DIR, CONFIG_NAME), output_hidden_states=True)
model = BertForSequenceClassification.from_pretrained(os.path.join(OUTPUT_DIR), config=config)
model = model.to(device)
tokenizer = BertTokenizer.from_pretrained(BERT_MODEL, do_lower_case=True)

In [8]:
tokenizer_kwargs = {"padding":"max_length", "truncation":True}

In [9]:
id2label = {0: "professor", 1: "physician", 2: "attorney", 3: "photographer", 4: "journalist", 5: "nurse"}

label2id = {v: k for k, v in id2label.items()}

In [10]:
ds

DatasetDict({
    train: Dataset({
        features: ['hard_text', 'all_profession_id', 'gender', 'all_profession_name', 'gender_name', 'profession_name', 'profession_id'],
        num_rows: 39270
    })
    test: Dataset({
        features: ['hard_text', 'all_profession_id', 'gender', 'all_profession_name', 'gender_name', 'profession_name', 'profession_id'],
        num_rows: 15108
    })
    drift: Dataset({
        features: ['hard_text', 'all_profession_id', 'gender', 'all_profession_name', 'gender_name', 'profession_name', 'profession_id'],
        num_rows: 106242
    })
    new_unseen: Dataset({
        features: ['hard_text', 'all_profession_id', 'gender', 'all_profession_name', 'gender_name', 'profession_name', 'profession_id'],
        num_rows: 94184
    })
})

In [11]:
import torch
import numpy as np
import torch.nn as nn
from tqdm import tqdm
from datasets import Dataset

def extract_embedding_and_predict(model, tokenizer, hf_dataset, text_column, label_column, layer_id):
    
    X = hf_dataset[text_column]  # List of input texts
    Y_original = hf_dataset[label_column]  # List of original labels (GT)
    Y_original_names = [id2label[l] for l in Y_original]  # List of original labels' names (GT)
    E = np.empty((0, 768))  # Initialize empty array of embeddings
    Y_predicted = []  # Initialize empty list of predicted labels (IDs)
    Y_predicted_names = []  # Initialize empty list of predicted labels (Names)
    
    BATCH_SIZE = 256
    n_batch = len(hf_dataset) // BATCH_SIZE
    remainder = len(hf_dataset) % BATCH_SIZE
    
    for i in tqdm(range(n_batch)):
        input_texts = X[i * BATCH_SIZE : (i + 1) * BATCH_SIZE]
        
        tokenized_texts = tokenizer(input_texts, padding=True, truncation=True, return_tensors="pt")
        
        with torch.no_grad():
            outputs = model(**tokenized_texts.to(device))
            
        batch_probabilities = nn.functional.softmax(outputs["logits"], dim=-1)
        batch_labels = torch.argmax(batch_probabilities, dim=1).tolist()
        batch_labels_name = [id2label[l] for l in batch_labels] 

        Y_predicted.extend(batch_labels)
        Y_predicted_names.extend(batch_labels_name)

        last_layer_hidden_states_arr = outputs["hidden_states"][layer_id].detach().cpu().numpy()                   
        embedding_CLS_arr = last_layer_hidden_states_arr[:, 0, :]  # [BATCH_SIZE, 0 = CLS, 768]
        E = np.vstack([E, embedding_CLS_arr])
            
    if remainder > 0:
        input_texts = X[-remainder:]

        tokenized_texts = tokenizer(input_texts, padding=True, truncation=True, return_tensors="pt")

        with torch.no_grad():
            outputs = model(**tokenized_texts.to(device))

        batch_probabilities = nn.functional.softmax(outputs["logits"], dim=-1)
        batch_labels = torch.argmax(batch_probabilities, dim=1).tolist()
        batch_labels_name = [id2label[l] for l in batch_labels] 

        Y_predicted.extend(batch_labels)
        Y_predicted_names.extend(batch_labels_name)

        last_layer_hidden_states_arr = outputs["hidden_states"][layer_id].detach().cpu().numpy()                   
        embedding_CLS_arr = last_layer_hidden_states_arr[:, 0, :]  # [BATCH_SIZE, 0 = CLS, 768]
        E = np.vstack([E, embedding_CLS_arr])

    return X, E, Y_original, Y_original_names, Y_predicted, Y_predicted_names


In [12]:
def save_embedding(output_path, X, E, Y_original, Y_original_names, Y_predicted, Y_predicted_names):

    fp = h5py.File(output_path, "w")

    #fp.create_dataset("X", data=X, compression="gzip")
    fp.create_dataset("E", data=E, compression="gzip")
    fp.create_dataset("Y_original", data=Y_original, compression="gzip")
    fp.create_dataset("Y_original_names", data=Y_original_names, compression="gzip")
    fp.create_dataset("Y_predicted", data=Y_predicted, compression="gzip")
    fp.create_dataset("Y_predicted_names", data=Y_predicted_names, compression="gzip")
    fp.close()
    return

In [13]:
embedding_dir = os.path.join("bias_in_bios", "opposite", "saved_embedding")

layer_id = -1


In [14]:
X_test, E_test, Y_original_test, Y_original_names_test, Y_predicted_test, Y_predicted_names_test = extract_embedding_and_predict(model, tokenizer, ds["test"], layer_id=layer_id, text_column="hard_text", label_column="profession_id")


100%|██████████| 59/59 [01:24<00:00,  1.43s/it]


In [15]:
X_train, E_train, Y_original_train, Y_original_names_train, Y_predicted_train, Y_predicted_names_train = extract_embedding_and_predict(model, tokenizer, ds["train"], layer_id=layer_id, text_column="hard_text", label_column="profession_id")


100%|██████████| 153/153 [04:04<00:00,  1.60s/it]


In [16]:
X_drift, E_drift, Y_original_drift, Y_original_names_drift, Y_predicted_drift, Y_predicted_names_drift = extract_embedding_and_predict(model, tokenizer, ds["drift"], layer_id=layer_id, text_column="hard_text", label_column="profession_id")



100%|██████████| 415/415 [12:04<00:00,  1.75s/it]


In [17]:
X_new_unseen, E_new_unseen, Y_original_new_unseen, Y_original_names_new_unseen, Y_predicted_new_unseen, Y_predicted_names_new_unseen = extract_embedding_and_predict(model, tokenizer, ds["new_unseen"], layer_id=layer_id, text_column="hard_text", label_column="profession_id")

100%|██████████| 367/367 [09:43<00:00,  1.59s/it]


In [18]:
save_embedding(os.path.join(embedding_dir, "train_embedding.hdf5"), 
                X_train, 
                E_train, 
                Y_original_train, 
                Y_original_names_train, 
                Y_predicted_train, 
                Y_predicted_names_train)

save_embedding(os.path.join(embedding_dir, "test_embedding.hdf5"), 
                X_test, 
                E_test, 
                Y_original_test, 
                Y_original_names_test, 
                Y_predicted_test, 
                Y_predicted_names_test)

save_embedding(os.path.join(embedding_dir, "drift_embedding.hdf5"), 
                X_drift, 
                E_drift, 
                Y_original_drift, 
                Y_original_names_drift, 
                Y_predicted_drift, 
                Y_predicted_names_drift)

save_embedding(os.path.join(embedding_dir, "new_unseen_embedding.hdf5"), 
                X_new_unseen, 
                E_new_unseen, 
                Y_original_new_unseen, 
                Y_original_names_new_unseen, 
                Y_predicted_new_unseen, 
                Y_predicted_names_new_unseen)

In [19]:
Y_original_names_train[:3]

['professor', 'professor', 'professor']

In [20]:
Y_predicted_names_train[:3]

['professor', 'professor', 'professor']

In [21]:
Y_original_names_drift[:3]

['attorney', 'professor', 'professor']

In [22]:
Y_predicted_names_drift[:3]

['nurse', 'nurse', 'nurse']

In [23]:
from sklearn.metrics import classification_report

In [24]:
report = classification_report(Y_original_new_unseen, Y_predicted_new_unseen)
print(report)

              precision    recall  f1-score   support

           0       0.99      0.91      0.95     55754
           1       0.81      0.95      0.87     11695
           2       0.90      0.95      0.92     11036
           3       0.93      0.95      0.94      6540
           4       0.35      0.90      0.50      1007
           5       1.00      1.00      1.00      8152

    accuracy                           0.93     94184
   macro avg       0.83      0.94      0.86     94184
weighted avg       0.94      0.93      0.93     94184



In [25]:
report = classification_report(Y_original_drift, Y_predicted_drift)
print(report)

              precision    recall  f1-score   support

           0       0.68      0.01      0.02     53259
           1       0.03      0.00      0.00     20240
           2       0.77      0.02      0.03     12471
           3       0.78      0.02      0.04      8666
           4       0.70      0.03      0.05      9871
           5       0.00      0.04      0.00      1735

    accuracy                           0.01    106242
   macro avg       0.49      0.02      0.02    106242
weighted avg       0.56      0.01      0.02    106242



In [26]:
ds["drift"][:10]

{'hard_text': ['Prior to law school, Brittni graduated magna cum laude from DePaul University in 2011 with her Bachelor’s Degree in Psychology and Spanish. In 2014, she earned her law degree from Chicago-Kent College of Law. While at Chicago-Kent, Brittni was awarded two CALI Excellence for the Future Awards in both Legal Writing and for her seminar article regarding President Obama’s executive action, Deferred Action for Childhood Arrivals.',
  'Previously, Dr. Lilly was a Research Assistant Professor in the Department of Pediatrics in the School of Medicine, also at WVU, where she worked as the biostatistician of the Coronary Artery Risk Detection in Appalachian Communities (CARDIAC) project.',
  'She received her Ph.D. in Economics from the University of California, Irvine in 2013. Her research focuses on monetary economics, search theory, and international economics, with a particular emphasis on the effects of monetary policy on payment systems and credit markets.',
  "She receive

In [27]:
print(len(ds["drift"]))

106242


In [28]:
E_drift.shape

(106242, 768)

In [31]:
print(len(df_drift))

106242
