# Biography Text Ablation Testing

To understand why the LLM is able to predict race so effectively, we performed an ablation analysis by replacing salient keywords with generic terms (e.g. "Ang Lee" is replaced with "PERSON", "Venezuela" is replaced with "LOCATION") and observing how model performance changes. We focus on `names` (was the model trained on these people, so it has background information from pre-training), `location` (does the model have priors about what races tend to be concentrated in what locations?), and `ethnicity` (does it recognize explicit race-related information in the bio).

In [None]:
!pip install transformers
import pandas as pd
import numpy as np
from transformers import AutoTokenizer, TFAutoModelForSequenceClassification
import tensorflow as tf
from sklearn.metrics import classification_report
from tqdm import tqdm
from google.colab import drive
drive.mount('/content/drive')

root_dir = "/content/drive/MyDrive/Undergrad/Summer 2023/Race Classification/biography" # jw10
flair = pd.read_csv(f"{root_dir}/flair_bios.csv")
flair = flair.replace(np.nan, "", regex=True)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
flair.sample(1)

Unnamed: 0,name,href,race,role,image,bio,bio_preprocessed,flair_ethn_bio,flair_loc_bio,flair_ppl_bio,flair_ethn+ppl_bio,flair_ethn+loc_bio,flair_loc+ppl_bio,flair_ethn+loc+ppl_bio,flair_person_only_bio,flair_person+ethn_bio,flair_person+loc_bio,flair_person+ethn+loc_bio
1121,Zadran Wali,/name/nm1731608,Asian,Actor or Actress,https://m.media-amazon.com/images/M/MV5BZTY1Nz...,"Zadran Wali is known for 12 Strong (2018), The...",zadran wali know strong kite runner charlie wi...,"Zadran Wali is known for 12 Strong ( 2018 ) , ...","Zadran Wali is known for 12 Strong ( 2018 ) , ...","PERSON is known for 12 Strong ( 2018 ) , The K...","PERSON is known for 12 Strong ( 2018 ) , The K...","Zadran Wali is known for 12 Strong ( 2018 ) , ...","PERSON is known for 12 Strong ( 2018 ) , The K...","PERSON is known for 12 Strong ( 2018 ) , The K...","PERSON is known for 12 Strong (2018), The Kite...","PERSON is known for 12 Strong ( 2018 ) , The K...","PERSON is known for 12 Strong ( 2018 ) , The K...","PERSON is known for 12 Strong ( 2018 ) , The K..."


In [None]:
def predict_probs(text):
  '''
  Returns - numpy arr with 4 prob categories
  '''
  predict_input = loaded_tokenizer.encode(text,
                                truncation=True,
                                padding=True,
                                return_tensors="tf")

  output = loaded_model(predict_input)[0]
  preds = tf.nn.softmax(output)
  return preds.numpy()

In [None]:
# NOTE: Need to have a lot of runtime for this to run all at once! Recommend breaking into pieces by column

# Predict race categories for bios in each relabeled column
for relabeled_bio in flair.loc[:, "flair_ethn_bio": "flair_person+ethn+loc_bio"].columns:
  print(relabeled_bio)
  # Conduct on 5 folds to cover the entire flair dataset
  full_dataset = []
  for i in range(5):
    print(f"Fold {i}")
    loaded_tokenizer = AutoTokenizer.from_pretrained(f"{root_dir}/BioRaceBERT/BioRaceBERT-{i}")
    loaded_model = TFAutoModelForSequenceClassification.from_pretrained(f"{root_dir}/BioRaceBERT/BioRaceBERT-{i}")

    # Merge flair bio index on test data index
    test_df = pd.read_csv(f"{root_dir}/BioRaceBERT/BioRaceBERT-test-{i}.csv")
    test_df = test_df.merge(flair, how="left", left_on="val_index", right_index=True)

    # Save prediction probabilities as numpy arr
    test_df = test_df[["val_index", "name", "href", "bio", relabeled_bio]].copy()
    test_df[f"{relabeled_bio}"] = test_df[f"{relabeled_bio}"].astype(str)
    test_df[f"{relabeled_bio}_probs"] = test_df[f"{relabeled_bio}"].apply(predict_probs)
    full_dataset.append(test_df)

  # combines 5 folds into total set of names
  full_dataset = pd.concat(full_dataset)
  full_dataset.to_csv(f"{root_dir}/{relabeled_bio}_flair_bios_probs.csv", index=None)

Finally, we manually combined columns of probabilities from individual csvs `{relabeled_bio}_flair_bios_probs.csv` to create `flair_bios_probs.csv`.


In [None]:
# I forgot to label the race so I do it here
import pandas as pd
from google.colab import drive
drive.mount('/content/drive')

root_dir = "/content/drive/MyDrive/Undergrad/Summer 2023/Race Classification/biography" # jw10
flair_probs = pd.read_csv(f"{root_dir}/flair_bios_probs.csv")
df = pd.read_csv(f"{root_dir}/BioRaceBERT-final.csv")
df = df[["href", "race", "race_cat"]].copy()
flair_probs = flair_probs.merge(df, how="left", on="href")
flair_probs.to_csv(f"{root_dir}/flair_bios_probs.csv", index=None)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# LIME (Local Interpretable Model-Agnostic Explanations)

We use LIME to try to understand what BioRaceBERT is using to make predictions.

In [None]:
!pip install transformers lime
import pandas as pd
import numpy as np
from transformers import AutoTokenizer, TFAutoModelForSequenceClassification
import tensorflow as tf
from sklearn.metrics import classification_report
from tqdm import tqdm
from google.colab import drive
drive.mount('/content/drive')

# Load data
root_dir = "/content/drive/MyDrive/Undergrad/Summer 2023/Race Classification/biography" # jw10
bios = pd.read_csv(f"{root_dir}/flair_bios_subset.csv")
bios = bios.replace(np.nan, "", regex=True)

# Load the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(f"{root_dir}/BioRaceBERT/BioRaceBERT-0")
model = TFAutoModelForSequenceClassification.from_pretrained(f"{root_dir}/BioRaceBERT/BioRaceBERT-0")

Collecting transformers
  Downloading transformers-4.33.1-py3-none-any.whl (7.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.6/7.6 MB[0m [31m27.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting lime
  Downloading lime-0.2.0.1.tar.gz (275 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m275.7/275.7 kB[0m [31m26.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting huggingface-hub<1.0,>=0.15.1 (from transformers)
  Downloading huggingface_hub-0.17.1-py3-none-any.whl (294 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m294.8/294.8 kB[0m [31m28.5 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers)
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m68.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting safeten

All model checkpoint layers were used when initializing TFDistilBertForSequenceClassification.

All the layers of TFDistilBertForSequenceClassification were initialized from the model checkpoint at /content/drive/MyDrive/Undergrad/Summer 2023/Race Classification/biography/BioRaceBERT/BioRaceBERT-0.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFDistilBertForSequenceClassification for predictions without further training.


In [None]:
def predict(text):
  '''
  Tokenizes text and predicts model output as category beween [0,4)

  Params:
    text - preprocessed text
  '''
  predict_input = tokenizer.encode(text,
                                  truncation=True,
                                  padding=True,
                                  return_tensors="tf")

  output = model(predict_input)[0]
  prediction_value = tf.argmax(output, axis=1).numpy()[0]
  return prediction_value

def predict_probs(text):
  '''
  Returns - numpy arr with 4 prob categories
  '''
  predict_input = tokenizer.encode(text,
                                truncation=True,
                                padding=True,
                                return_tensors="tf")

  output = model(predict_input)[0]
  preds = tf.nn.softmax(output)
  print(preds)
  return preds.numpy()

In [None]:
from lime.lime_text import LimeTextExplainer

explainer = LimeTextExplainer(class_names=class_names)

text = 'Building more bypasses'

def predict_probs(text):
    '''
    Returns - numpy arr with 4 prob categories
    '''
    print(text)
    text = [str(word) for word in text]
    predict_input = tokenizer.encode(text,  # Use text_list[0] to access the first element
                                     truncation=True,
                                     padding=True,
                                     return_tensors="tf")
    print(predict_probs)
    output = model(predict_input)[0]
    preds = tf.nn.softmax(output)
    print(preds)
    return preds.numpy()

# Explain the instance using predict_probs
exp = explainer.explain_instance(
    text,  # Provide the list of text
    predict_probs,
    labels=["Asian", "Black", "Hispanic", "White"],
    num_features=20
)

# Show the explanation in the notebook
exp.show_in_notebook(text=text)


['Building more bypasses', '  bypasses', 'Building  ', ' more ', '  bypasses', 'Building  ', '  ', '  ', 'Building  ', 'Building  ', '  ', 'Building more ', '  ', '  ', 'Building more ', 'Building  ', '  ', 'Building more ', '  bypasses', '  bypasses', ' more ', '  bypasses', 'Building more ', 'Building more ', 'Building more ', '  ', 'Building more ', 'Building  bypasses', 'Building  bypasses', '  ', '  ', '  ', '  ', '  ', ' more bypasses', ' more bypasses', ' more ', ' more bypasses', '  ', '  ', '  ', '  ', '  bypasses', ' more bypasses', ' more bypasses', 'Building  ', '  ', 'Building  bypasses', ' more bypasses', '  ', ' more bypasses', '  ', '  ', ' more ', '  ', 'Building  bypasses', '  ', 'Building  bypasses', 'Building  bypasses', ' more ', ' more ', ' more bypasses', ' more bypasses', '  bypasses', '  ', 'Building  ', 'Building  bypasses', '  ', 'Building  bypasses', '  ', ' more bypasses', 'Building  ', 'Building  bypasses', 'Building  bypasses', ' more bypasses', 'Building

TypeError: ignored

In [10]:
# !pip install lime transformers
import numpy as np
import lime
import torch
import tensorflow as tf
import transformers
import torch.nn.functional as F
from lime.lime_text import LimeTextExplainer

from transformers import AutoTokenizer, AutoModelForSequenceClassification

filename_model = 'dhpollack/distilbert-dummy-sentiment'
tokenizer = AutoTokenizer.from_pretrained(filename_model)
model = AutoModelForSequenceClassification.from_pretrained(filename_model)
class_names = ['positive','negative', 'neutral']

def predictor(texts):
    outputs = model(**tokenizer(texts, return_tensors="tf", padding=True))
    tensor_logits = outputs[0]
    probas = F.softmax(tensor_logits).detach().numpy()
    return probas

text = 'Building more bypasses will help the environment by reducing pollution and traffic jams in towns and cities.'
print(tokenizer(text, return_tensors='pt', padding=True))

explainer = LimeTextExplainer(class_names=class_names)
exp = explainer.explain_instance(text, predictor, num_features=20, num_samples=2000)
exp.show_in_notebook(text=text)

{'input_ids': tensor([[3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}


TypeError: ignored

In [None]:
# Predict race categories for bios in each relabeled column
for relabeled_bio in bios.loc[:, "flair_ethn_bio": "flair_person+ethn+loc_bio"].columns:
  print(relabeled_bio)
  # Conduct on 5 folds to cover the entire flair dataset
  full_dataset = []
  for i in range(5):
    print(f"Fold {i}")
    loaded_tokenizer = AutoTokenizer.from_pretrained(f"{root_dir}/BioRaceBERT/BioRaceBERT-{i}")
    loaded_model = TFAutoModelForSequenceClassification.from_pretrained(f"{root_dir}/BioRaceBERT/BioRaceBERT-{i}")

    # Merge flair bio index on test data index
    test_df = pd.read_csv(f"{root_dir}/BioRaceBERT/BioRaceBERT-test-{i}.csv")
    test_df = test_df.merge(bios, how="left", left_on="val_index", right_index=True)

    # Save prediction probabilities as numpy arr
    test_df = test_df[["val_index", "name", "href", "bio", relabeled_bio]].copy()
    test_df[f"{relabeled_bio}"] = test_df[f"{relabeled_bio}"].astype(str)



In [None]:
import transformers
import datasets
import torch
import numpy as np
import scipy as sp

# https://shap.readthedocs.io/en/latest/example_notebooks/overviews/An%20introduction%20to%20explainable%20AI%20with%20Shapley%20values.html
# load a BERT sentiment analysis model
tokenizer = transformers.DistilBertTokenizerFast.from_pretrained("distilbert-base-uncased")
model = transformers.DistilBertForSequenceClassification.from_pretrained(
    "distilbert-base-uncased-finetuned-sst-2-english"
).cuda()

# define a prediction function
def f(x):
    tv = torch.tensor([tokenizer.encode(v, padding='max_length', max_length=500, truncation=True) for v in x]).cuda()
    outputs = model(tv)[0].detach().cpu().numpy()
    scores = (np.exp(outputs).T / np.exp(outputs).sum(-1)).T
    val = sp.special.logit(scores[:,1]) # use one vs rest logit units
    return val

# build an explainer using a token masker
explainer = shap.Explainer(f, tokenizer)

# explain the model's predictions on IMDB reviews
imdb_train = datasets.load_dataset("imdb")["train"]
shap_values = explainer(imdb_train[:10], fixed_context=1, batch_size=2)