In [None]:
import ast
import asyncio
from collections import Counter
from datasets import load_dataset
import datetime as dt
from dotenv import load_dotenv
from huggingface_hub import login
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import nest_asyncio
import numpy as np
import os
import pandas as pd
from transformers import AutoTokenizer, TFAutoModelForSequenceClassification
from transformers import pipeline

### Installs
"""
pip install datasets
pip install ipywidgets
pip install matplotlib
"""

load_dotenv()
nest_asyncio.apply()

### Baseline: DBias
+ Import the DBias classification model
+ https://huggingface.co/d4data/bias-detection-model
+ ^These steps are the way I was able to download. The wheel approach did not work.

In [None]:
tokenizer = AutoTokenizer.from_pretrained("d4data/bias-detection-model")
model = TFAutoModelForSequenceClassification.from_pretrained("d4data/bias-detection-model")

In [None]:
# Baseline model classifer
classifier = pipeline('text-classification', model=model, tokenizer=tokenizer)

In [None]:
# Preview some tests
for example in [
    "The irony, of course, is that the exhibit that invites people to throw trash at vacuuming Ivanka Trump lookalike reflects every stereotype feminists claim to stand against, oversexualizing Ivanka’s body and ignoring her hard work.",
    "Billie Eilish issues apology for mouthing an anti-Asian derogatory term in a resurfaced video.",
    "Dave walked over the hill to have a coffee at Progressive Grounds.",
    "Tara and Dave share their preferred pronouns in class.",
    "They applied to a grad program to learn advanced techniques.",
    "Trump is the 45th and 47th President of the United States.",
    "The podcast has almost exclusively male guests.",
    "Dad always makes funny jokes.",
    "Mom always makes funny jokes.",
    "White people tend to get better jobs.",
    "Black people have a harder time getting promoted.",
]:
    print(f"For example: {example}")
    print(classifier(example)[0])

### Dataset: NBias

In [None]:
# Authenticate using your token
login(os.getenv("HUGGINGFACE_TOKEN"))

In [None]:
# Store the various dataset directories in a dict
splits = {
    'bias_train': '1-Text-Classification/bias-train.csv',
    'bias_valid': '1-Text-Classification/bias-valid.csv',
    'bias_train_instruction': '1-Text-Classification/bias-train_10k-instruction-data.csv',
    'bias_valid_instruction': '1-Text-Classification/bias-valid_1k-instruction-data.csv',
    'sentiment_train': '1-Text-Classification/sentiment-train.csv',
    'sentiment_valid': '1-Text-Classification/sentiment-valid.csv',
    'toxic_train': '1-Text-Classification/toxic-train.csv',
    'toxic_valid': '1-Text-Classification/toxic-valid.csv',
    'multi_label': '1-Text-Classification/multi-label.csv'
}

In [None]:
bias_train = load_dataset("shainar/BEAD", data_files=splits["bias_train"])["train"]
bias_train_df = bias_train.to_pandas()

In [None]:
bias_train_df.head()

In [None]:
bias_train_df["label"].value_counts()

In [None]:
bias_train_df.shape

## Exploratory Data Analysis
0. Full Annotations
1. Text Classification
2. Token Classification
3. Aspects
4. Bias-Quantification-Demographics
5. Language-Generation

### 0-Full Annotations

In [None]:
# 0. Full Annotations
full_train = load_dataset("shainar/BEAD", data_files="0-Full Annotations/Full.csv")["train"]
full_train_df = full_train.to_pandas()

In [None]:
print(full_train_df.shape)
full_train_df.head()

In [None]:
# label
freq = full_train_df["label"].value_counts(dropna=False)
pct = round(freq / full_train_df.shape[0] * 100, 5)
pd.DataFrame({"#": freq, "%": pct})

In [None]:
# sentiment
freq = full_train_df["sentiment"].value_counts(dropna=False)
pct = round(freq / full_train_df.shape[0] * 100, 5)
pd.DataFrame({"#": freq, "%": pct})

In [None]:
# toxic
freq = full_train_df["toxic"].value_counts(dropna=False)
pct = round(freq / full_train_df.shape[0] * 100, 5)
pd.DataFrame({"#": freq, "%": pct})

In [None]:
# identity_mention
freq = full_train_df["identity_mention"].value_counts(dropna=False)
pct = round(freq / full_train_df.shape[0] * 100, 5)
pd.DataFrame({"#": freq, "%": pct})

In [None]:
# dimension
freq = full_train_df["dimension"].value_counts(dropna=False)
pct = round(freq / full_train_df.shape[0] * 100, 5)
pd.DataFrame({"#": freq, "%": pct})

In [None]:
# aspect
freq = full_train_df["aspect"].value_counts(dropna=False)
pct = round(freq / full_train_df.shape[0] * 100, 5)
pd.DataFrame({"#": freq, "%": pct}).head(10)

In [None]:
# biased_words
full_train_df["biased_words"] = full_train_df["biased_words"].apply(ast.literal_eval)

biased_words = [word for sublist in full_train_df["biased_words"] for word in sublist]
biased_words_counts = Counter(biased_words)
biased_words_df = pd.DataFrame(biased_words_counts.items(), columns=["Word", "#"])
biased_words_df.set_index("Word", inplace=True)
biased_words_df["%"] = round(biased_words_df["#"] / sum(biased_words_df["#"]) * 100, 5)
biased_words_df.sort_values(by="#", ascending=False, inplace=True)
biased_words_df.head(10)

In [None]:
# text
upper_bound = 2000
full_train_df["text_len"] = full_train_df["text"].str.len()
limited_df = full_train_df[full_train_df["text_len"] <= upper_bound]
pct_remaining = round(limited_df.shape[0]/full_train_df.shape[0] * 100, 5)
print(f"Setting a limit of {upper_bound:,} reduced to {limited_df.shape[0]:,}/{full_train_df.shape[0]:,} ({pct_remaining}%) rows...")
plt.figure(figsize=(8, 6))
plt.hist(limited_df["text_len"], bins=10, edgecolor="black")
plt.title("Binning 'text' field character length")
plt.gca().yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, _: f'{int(x):,}'))
plt.show()

In [None]:
# Measure the bias rate in text len ascending
def bin_text_len(column, increment):
    max_value = column.max()
    bin_edges = np.arange(0, max_value + increment, increment)
    binned_column = pd.cut(column, bins=bin_edges, include_lowest=False)
    return binned_column

increment = 200
full_train_df["bin"] = bin_text_len(full_train_df["text_len"], increment)
bin_label_counts = full_train_df.groupby(["bin", "label"]).size().unstack(fill_value=0)
bin_label_counts = bin_label_counts[["Neutral", "Slightly Biased", "Highly Biased"]]
bin_label_counts["Total"] = bin_label_counts.sum(axis=1)
bin_label_counts["Biased %"] = round(
    (bin_label_counts["Slightly Biased"] + bin_label_counts["Highly Biased"])\
    / bin_label_counts["Total"] * 100, 5
)
bin_label_counts.head(50)

### 4-Bias-Quantification-Demographics

In [None]:
# 4. Bias-Quantification-Demographics

# Store the various dataset directories in a dict
splits = {
    "demographic_variation": "4-Bias-Quantification-Demographics/Demographic-variation.csv",
    "demographic_template": "4-Bias-Quantification-Demographics/demographic-template.csv",
    "stereotype_prompts": "4-Bias-Quantification-Demographics/stereotype_prompts.csv",
}

In [None]:
# demographic_variation
bias_quant_demo_var_train = load_dataset("shainar/BEAD", data_files=splits["demographic_variation"])["train"]
bias_quant_demo_var_train_df = bias_quant_demo_var_train.to_pandas()

In [None]:
import random

rnum = random.randint(0, bias_quant_demo_var_train_df.shape[0])
print(f"'Original' example: {bias_quant_demo_var_train_df.loc[random.randint(0, rnum), 'Original']}")
print(f"'gender_a_man' example: {bias_quant_demo_var_train_df.loc[random.randint(0, rnum), 'gender_a_man']}")
print(f"'gender_a_woman' example: {bias_quant_demo_var_train_df.loc[random.randint(0, rnum), 'gender_a_woman']}")
print(f"'gender_a_woman' example: {bias_quant_demo_var_train_df.loc[random.randint(0, rnum), 'gender_a_woman']}")
print("")

print(bias_quant_demo_var_train_df.shape)
bias_quant_demo_var_train_df.head()

In [None]:
# demographic_template
bias_quant_demo_templ_train = load_dataset("shainar/BEAD", data_files=splits["demographic_template"])["train"]
bias_quant_demo_templ_train_df = bias_quant_demo_templ_train.to_pandas()

In [None]:
print(bias_quant_demo_templ_train_df.shape)
bias_quant_demo_templ_train_df.head()

In [None]:
# label
freq = bias_quant_demo_templ_train_df["label"].value_counts(dropna=False)
pct = round(freq / bias_quant_demo_templ_train_df.shape[0] * 100, 5)
pd.DataFrame({"#": freq, "%": pct})

In [None]:
# sentiment
freq = bias_quant_demo_templ_train_df["sentiment"].value_counts(dropna=False)
pct = round(freq / bias_quant_demo_templ_train_df.shape[0] * 100, 5)
pd.DataFrame({"#": freq, "%": pct})

In [None]:
# identity_mention
freq = bias_quant_demo_templ_train_df["identity_mention"].value_counts(dropna=False)
pct = round(freq / bias_quant_demo_templ_train_df.shape[0] * 100, 5)
pd.DataFrame({"#": freq, "%": pct})

In [None]:
# stereotype_prompts
bias_quant_stereotype_prompts_train = load_dataset("shainar/BEAD", data_files=splits["stereotype_prompts"])["train"]
bias_quant_stereotype_prompts_train_df = bias_quant_stereotype_prompts_train.to_pandas()

In [None]:
print(bias_quant_stereotype_prompts_train_df.shape)
bias_quant_stereotype_prompts_train_df.head()

In [None]:
bias_quant_stereotype_prompts_train_df.loc[0, "user_prompt"]

In [None]:
# stereotype_topic
freq = bias_quant_stereotype_prompts_train_df["stereotype_topic"].value_counts(dropna=False)
pct = round(freq / bias_quant_stereotype_prompts_train_df.shape[0] * 100, 5)
pd.DataFrame({"#": freq, "%": pct})

In [None]:
# target_group
freq = bias_quant_stereotype_prompts_train_df["target_group"].value_counts(dropna=False)
pct = round(freq / bias_quant_stereotype_prompts_train_df.shape[0] * 100, 5)
pd.DataFrame({"#": freq, "%": pct})

### 5-Language-Generation

In [None]:
# # 5. Language-Generation

# # Store the various dataset directories in a dict
# splits = {
#     "alpaca_debiasing_data": "5-Language-Generation/alpaca_debiasing_data-variation.csv",
#     "bias_debias": "5-Language-Generation/bias-debias.csv",
#     "test": "5-Language-Generation/test.csv",
# }
# lang_gen_alpaca_train = load_dataset("shainar/BEAD", data_files=splits["alpaca_debiasing_data"])["train"]
# lang_gen_alpaca_train_df = lang_gen_alpaca_train.to_pandas()

### Baseline
+ Only use labels when the model is X% confident. Noticed that going off the label alone gives very inconsistent results.

In [None]:
CONFIDENCE_THRESH = 0.9

In [None]:
# How many of the records are in a "confident" classification (e.g., drop Slightly biased)?
polarized_records = full_train_df[full_train_df["label"].isin(["Neutral", "Highly biased"])]
num_polarized_records = polarized_records.shape[0]
print(f"{num_polarized_records:,}")

In [None]:
full_train_df[full_train_df["label"] == "Highly Biased"][["text", "label", "biased_words"]].sample(5)

In [None]:
# Randomly sample N biased and N unbiased records to see how DBias compares
# NOTE: Only using "Highly Biased" and ignoring "Slightly Biased" for now
sample_size = 1000
sample_biased = full_train_df[full_train_df["label"] == "Highly Biased"].sample(sample_size)
sample_biased_text = np.array(sample_biased["text"])
print(sample_biased_text.shape)

sample_unbiased = full_train_df[full_train_df["label"] == "Neutral"].sample(sample_size)
sample_unbiased_text = np.array(sample_unbiased["text"])
print(sample_unbiased_text.shape)

In [None]:
# Model supports max of 512 tokens
MODEL_MAX = 512
sample_biased_tokens = tokenizer(
    list(sample_biased_text),
    max_length=MODEL_MAX,
    truncation=True,
    padding="max_length",
    return_tensors="tf"
)
sample_biased_text_limited = [tokenizer.decode(ids, skip_special_tokens=True) for ids in sample_biased_tokens["input_ids"]]
print(len(sample_biased_text_limited))

sample_unbiased_tokens = tokenizer(
    list(sample_unbiased_text),
    max_length=MODEL_MAX,
    truncation=True,
    padding="max_length",
    return_tensors="tf"
)
sample_unbiased_text_limited = [tokenizer.decode(ids, skip_special_tokens=True) for ids in sample_unbiased_tokens["input_ids"]]
print(len(sample_unbiased_text_limited))

In [None]:
average = (20.6 + 14.4) / 2 / 100
print(average)

sample_size = 1000
print(f"A sample of {sample_size:,} would take {round(sample_size*average/60, 1):,} minutes to complete")

sample_size = 10000
print(f"A sample of {sample_size:,} would take {round(sample_size*average/60, 1):,} minutes to complete")

sample_size = 100000
print(f"A sample of {sample_size:,} would take {round(sample_size*average/60, 1):,} minutes to complete")

sample_size = 1000000
print(f"A sample of {sample_size:,} would take {round(sample_size*average/60, 1):,} minutes to complete")

sample_size = num_polarized_records
print(f"A sample of {sample_size:,} would take {round(sample_size*average/60, 1):,} minutes to complete")

print("It would take 91 hours to baseline categorize the entire dataset of Highly Biased and Neutral records (ignoring Slightly Biased)")

### Parallelization

In [None]:
# General rule of thumb -- use # concurrent threads ≈ 2x to 3x the number of cores (Dave's laptop has 20 cores)
max_threads = os.cpu_count()
print("Number of CPU cores:", max_threads)

In [None]:
# Semaphore controls the number of concurrent threads
MAX_CONCURRENT = 30
semaphore = asyncio.Semaphore(MAX_CONCURRENT)

In [None]:
# Define the async function that uses the semaphore
async def process_classification(text):
    async with semaphore:
        # print(f"Task started for: {text}")

        loop = asyncio.get_running_loop()
        
        # Run the synchronous classifier() method in a separate thread to avoid blocking
        result = await loop.run_in_executor(None, classifier, text)

        # Extract label and score from the result
        label, score = result[0]["label"], result[0]["score"]
        if score is None or score < CONFIDENCE_THRESH:
            label = "Uncertain"

        # print(f"Task completed for: {text} - Label: {label}, Score: {score}")
        return label

In [None]:
# Asynchronous function to run classifier across list of texts
async def classify_with_dbias(texts):
    start_time = dt.datetime.now()
    print(f"Beginning main at {start_time.strftime('%H:%M:%S')}")
    tasks = [asyncio.create_task(process_classification(text)) for text in texts]
    print(f"Prepared {len(tasks)} tasks...")

    # Run tasks concurrently
    results = await asyncio.gather(*tasks)

    end_time  = dt.datetime.now()
    duration = end_time - start_time
    print(f"Completed main at {end_time.strftime('%H:%M:%S')}. Total of {duration} seconds to complete...")
    return results

In [None]:
# Run the main function in an event loop ***BIASED***
# Took 3:44 to run 1,000
sample_biased_results = await classify_with_dbias(sample_biased_text_limited)

In [None]:
# Run the main function in an event loop ***UNBIASED***
# Took 3:09 to run 1,000
sample_unbiased_results = await classify_with_dbias(sample_unbiased_text_limited)

In [None]:
print("Biased sample:")
unique_elements, counts = np.unique(sample_biased_results, return_counts=True)
sample_biased_value_counts = dict(zip(unique_elements, counts))
print(sample_biased_value_counts)

print("Unbiased sample:")
unique_elements, counts = np.unique(sample_unbiased_results, return_counts=True)
sample_unbiased_value_counts = dict(zip(unique_elements, counts))
print(sample_unbiased_value_counts)

In [None]:
# SAMPLES #1 (100)
"""
Biased sample:
{'Biased': 54, 'Uncertain': 46}
Unbiased sample:
{'Biased': 23, 'Non-biased': 4, 'Uncertain': 73}
"""
right = 54 + 4
wrong = 0 + 23
drop = 46 + 73
denom = right + wrong
print(f"Accuracy rate of SAMPLES #1 (100): {round(right/denom*100, 1)}%")
print(f"Retain rate for SAMPLES #1 (100): {round(denom/(denom + drop)*100, 1)}%")
print("")

# SAMPLES #2 (1,000)
"""
Biased sample:
{'Biased': 456, 'Non-biased': 3, 'Uncertain': 541}
Unbiased sample:
{'Biased': 292, 'Non-biased': 28, 'Uncertain': 680}
"""
right = 456 + 28
wrong = 3 + 292
drop = 541 + 680
denom = right + wrong
print(f"Accuracy rate of SAMPLES #2 (1,000): {round(right/denom*100, 1)}%")
print(f"Retain rate for SAMPLES #2 (1,000): {round(denom/(denom + drop)*100, 1)}%")
print("")

Dave forecasts it would take 4.5 days to process all 1.8 million
+ 2,000 	Sample size
+ 3.75	First 1000
+ 3.10	Second 1000
+ 6.85	Total minutes
+ 411	Total seconds
+ 0.21	Seconds per record

Resulting in...
+ 1,886,253 	Total dataset
+ 387,624.99 	Implied processing seconds
+ 6,460.42 	Implied processing minutes
+ 107.67 	Implied processing hours

### Modeling