In [1]:
import argparse
import os
import sys
import torch
import transformers
import numpy as np
import sklearn
import scipy.io as io
from sklearn.svm import LinearSVC
from tqdm import tqdm
from bias_bench.dataset import load_inlp_data
from bias_bench.debias.inlp import compute_projection_matrix
from bias_bench.debias.inlp import debias
from bias_bench.model import models
from bias_bench.util import generate_experiment_id
from bias_bench.debias.inlp import debias

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to
[nltk_data]     /home/sc066/sc066/shunshao/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [3]:
thisdir = os.path.dirname(os.path.realpath("__file__"))
parser = argparse.ArgumentParser(description="Computes the projection matrix for INLP.")
parser.add_argument(
    "--persistent_dir",
    action="store",
    type=str,
    default=os.path.realpath(os.path.join(thisdir, "..")),
    help="Directory where all persistent data will be stored.",
)
parser.add_argument(
    "--model",
    action="store",
    type=str,
    default="BertModel",
    choices=["BertModel", "AlbertModel", "RobertaModel", "GPT2Model"],
    help="Model (e.g., BertModel) to compute the INLP projection matrix for. "
    "Typically, these correspond to a HuggingFace class.",
)
parser.add_argument(
    "--model_name_or_path",
    action="store",
    type=str,
    default="bert-base-uncased",
    choices=["bert-base-uncased", "albert-base-v2", "roberta-base", "gpt2"],
    help="HuggingFace model name or path (e.g., bert-base-uncased). Checkpoint from which a "
    "model is instantiated.",
)
parser.add_argument(
    "--bias_type",
    action="store",
    default="gender",
    choices=["gender", "race", "religion"],
    help="What type of bias to compute the INLP projection matrix for.",
)
parser.add_argument(
    "--n_classifiers",
    action="store",
    type=int,
    default=80,
    help="Number of classifiers to train when computing projection matrix.",
)
parser.add_argument("--seed", action="store", type=int, default=0, help="Seed for RNG.")

_StoreAction(option_strings=['--seed'], dest='seed', nargs=None, const=None, default=0, type=<class 'int'>, choices=None, help='Seed for RNG.', metavar=None)

In [4]:
def _extract_gender_features(
    model,
    tokenizer,
    male_sentences,
    female_sentences,
    neutral_sentences,
):
    """Encodes gender sentences to create a set of representations to train classifiers
    for INLP on.
    Notes:
        * Implementation taken from  https://github.com/pliang279/LM_bias.
    """
    model.to(device)

    male_features = []
    female_features = []
    neutral_features = []

    # Encode the sentences.
    with torch.no_grad():
        for sentence in tqdm(male_sentences, desc="Encoding male sentences"):
            input_ids = tokenizer(
                sentence, add_special_tokens=True, truncation=True, return_tensors="pt"
            ).to(device)

            outputs = model(**input_ids)["last_hidden_state"]
            outputs = torch.mean(outputs, dim=1)
            outputs = outputs.squeeze().detach().cpu().numpy()

            male_features.append(outputs)

        for sentence in tqdm(female_sentences, desc="Encoding female sentences"):
            input_ids = tokenizer(
                sentence, add_special_tokens=True, truncation=True, return_tensors="pt"
            ).to(device)

            outputs = model(**input_ids)["last_hidden_state"]
            outputs = torch.mean(outputs, dim=1)
            outputs = outputs.squeeze().detach().cpu().numpy()

            female_features.append(outputs)

        for sentence in tqdm(neutral_sentences, desc="Encoding neutral sentences"):
            input_ids = tokenizer(
                sentence, add_special_tokens=True, truncation=True, return_tensors="pt"
            ).to(device)

            outputs = model(**input_ids)["last_hidden_state"]
            outputs = torch.mean(outputs, dim=1)
            outputs = outputs.squeeze().detach().cpu().numpy()

            neutral_features.append(outputs)

    male_features = np.array(male_features)
    female_features = np.array(female_features)
    neutral_features = np.array(neutral_features)

    return male_features, female_features, neutral_features


def _extract_binary_features(model, tokenizer, bias_sentences, neutral_sentences):
    """Encodes race/religion sentences to create a set of representations to train classifiers
    for INLP on.
    Notes:
        * Sentences are split into two classes based upon if they contain *any* race/religion bias
          attribute words.
    """
    model.to(device)

    bias_features = []
    neutral_features = []

    # Encode the sentences.
    with torch.no_grad():
        for sentence in tqdm(bias_sentences, desc="Encoding bias sentences"):
            input_ids = tokenizer(
                sentence, add_special_tokens=True, truncation=True, return_tensors="pt"
            ).to(device)

            outputs = model(**input_ids)["last_hidden_state"]
            outputs = torch.mean(outputs, dim=1)
            outputs = outputs.squeeze().detach().cpu().numpy()

            bias_features.append(outputs)

        for sentence in tqdm(neutral_sentences, desc="Encoding neutral sentences"):
            input_ids = tokenizer(
                sentence, add_special_tokens=True, truncation=True, return_tensors="pt"
            ).to(device)

            outputs = model(**input_ids)["last_hidden_state"]
            outputs = torch.mean(outputs, dim=1)
            outputs = outputs.squeeze().detach().cpu().numpy()

            neutral_features.append(outputs)

    bias_features = np.array(bias_features)
    neutral_features = np.array(neutral_features)

    return bias_features, neutral_features


def _split_gender_dataset(male_feat, female_feat, neut_feat):
    np.random.seed(0)

    X = np.concatenate((male_feat, female_feat, neut_feat), axis=0)

    y_male = np.ones(male_feat.shape[0], dtype=int)
    y_female = np.zeros(female_feat.shape[0], dtype=int)
    y_neutral = -np.ones(neut_feat.shape[0], dtype=int)

    y = np.concatenate((y_male, y_female, y_neutral))

    X_train_dev, X_test, y_train_dev, Y_test = sklearn.model_selection.train_test_split(
        X, y, test_size=0.3, random_state=0
    )
    X_train, X_dev, Y_train, Y_dev = sklearn.model_selection.train_test_split(
        X_train_dev, y_train_dev, test_size=0.3, random_state=0
    )

    return X_train, X_dev, X_test, Y_train, Y_dev, Y_test


def _split_binary_dataset(bias_feat, neut_feat):
    np.random.seed(0)

    X = np.concatenate((bias_feat, neut_feat), axis=0)

    y_bias = np.ones(bias_feat.shape[0], dtype=int)
    y_neutral = np.zeros(neut_feat.shape[0], dtype=int)

    y = np.concatenate((y_bias, y_neutral))

    X_train_dev, X_test, y_train_dev, Y_test = sklearn.model_selection.train_test_split(
        X, y, test_size=0.3, random_state=0
    )
    X_train, X_dev, Y_train, Y_dev = sklearn.model_selection.train_test_split(
        X_train_dev, y_train_dev, test_size=0.3, random_state=0
    )

    return X_train, X_dev, X_test, Y_train, Y_dev, Y_test


def _apply_nullspace_projection(
    X_train, X_dev, X_test, Y_train, Y_dev, Y_test, n_classifiers=80
):
    classifier_parameters = {
        "fit_intercept": False,
        "class_weight": None,
        "dual": False,
        "random_state": 0,
    }

    P, rowspace_projs, Ws = debias.get_debiasing_projection(
        classifier_class=LinearSVC,
        cls_params=classifier_parameters,
        num_classifiers=n_classifiers,
        input_dim=768,
        is_autoregressive=True,
        min_accuracy=0,
        X_train=X_train,
        Y_train=Y_train,
        X_dev=X_dev,
        Y_dev=Y_dev,
        Y_train_main=None,
        Y_dev_main=None,
        by_class=False,
        dropout_rate=0,
    )

    return P, rowspace_projs, Ws

In [5]:
def create_dataset(model, tokenizer, data, bias_type, n_classifiers=80):
    """Runs INLP.
    Notes:
        * We use the same classifier hyperparameters as Liang et al.
    Args:
        model: HuggingFace model (e.g., BertModel) to compute the projection
            matrix for.
        tokenizer: HuggingFace tokenizer (e.g., BertTokenizer). Used to pre-process
            examples for the INLP classifiers.
        data (`dict`): Dictionary of sentences used to train the INLP classifiers.
        bias_type (`str`): Type of bias to compute a projection matrix for.
        n_classifiers (`int`): How many classifiers to train when computing INLP
            projection matrix.
    """
    if bias_type == "gender":
        male_sentences = data["male"]
        female_sentences = data["female"]
        neutral_sentences = data["neutral"]

        male_features, female_features, neutral_features = _extract_gender_features(
            model, tokenizer, male_sentences, female_sentences, neutral_sentences
        )

        X_train, X_dev, X_test, Y_train, Y_dev, Y_test = _split_gender_dataset(
            male_features, female_features, neutral_features
        )

    else:
        bias_sentences = data["bias"]
        neutral_sentences = data["neutral"]

        bias_features, neutral_features = _extract_binary_features(
            model, tokenizer, bias_sentences, neutral_sentences
        )

        X_train, X_dev, X_test, Y_train, Y_dev, Y_test = _split_binary_dataset(
            bias_features, neutral_features
        )

#     print("Dataset split sizes:")
#     print(
#         f"Train size: {X_train.shape[0]}; Dev size: {X_dev.shape[0]}; Test size: {X_test.shape[0]}"
#     )

#     P, rowspace_projs, Ws = _apply_nullspace_projection(
#         X_train, X_dev, X_test, Y_train, Y_dev, Y_test, n_classifiers=n_classifiers
#     )

#     P = torch.tensor(P, dtype=torch.float32)

    return X_train, X_dev, X_test, Y_train, Y_dev, Y_test

In [7]:
# all_models=["BertModel", "AlbertModel", "RobertaModel", "GPT2Model"]
# all_bias_types=["gender", "race", "religion"]
all_models=["GPT2Model"]
all_bias_types=["religion"]

model_name = "BertModel"
bias_type = "gender"


if model_name == "GPT2Model":
    n_classifiers = "10"
else:
    n_classifiers = "80"

if model_name == "BertModel":
    model_name_path = "bert-base-uncased"
elif model_name == "AlbertModel":
    model_name_path = "albert-base-v2"
elif model_name == "RobertaModel":
    model_name_path = "roberta-base"
elif model_name == "GPT2Model":
    model_name_path = "gpt2"

sys.argv = ['--model', model_name, '--model_name_or_path', model_name_path,\
           '--bias_type', bias_type, '--n_classifiers', n_classifiers, '--seed', '0']


args = parser.parse_args(sys.argv)
experiment_id = generate_experiment_id(
    name="projection",
    model=args.model,
    model_name_or_path=args.model_name_or_path,
    bias_type=args.bias_type,
    seed=args.seed,
)

print("Computing projection matrix:")
print(f" - persistent_dir: {args.persistent_dir}")
print(f" - model: {args.model}")
print(f" - model_name_or_path: {args.model_name_or_path}")
print(f" - bias_type: {args.bias_type}")
print(f" - n_classifiers: {args.n_classifiers}")
print(f" - seed: {args.seed}")

# Load data for INLP classifiers.
data = load_inlp_data(args.persistent_dir, args.bias_type, seed=args.seed)

Computing projection matrix:
 - persistent_dir: /mnt/lustre/indy2lfs/work/sc066/sc066/shunshao/code/bias-bench
 - model: BertModel
 - model_name_or_path: bert-base-uncased
 - bias_type: gender
 - n_classifiers: 80
 - seed: 0


Loading INLP data:   6%|▉             | 88640/1372632 [00:10<02:29, 8586.98it/s]

INLP dataset collected:
 - Num. male sentences: 10000
 - Num. female sentences: 10000
 - Num. neutral sentences: 10000





In [8]:
male_sentences = data["male"]
female_sentences = data["female"]
neutral_sentences = data["neutral"]

In [9]:
pwd

'/mnt/lustre/indy2lfs/work/sc066/sc066/shunshao/code/bias-bench/experiments'

In [10]:
male_sentences

['as a solution to this issue of pronouns, swift and miller suggest using \'they\' in place of a gendered pronoun, or specifically stating "he or she/his or hers" as a substitute for just the male pronoun.',
 'yoshitaka was adopted as heir by his uncle, satake yoshinobu of kubota domain in april 1626, following the disinheritance of yoshinobu’s son satake yoshinao for incompetence, and was given the',
 'he turned kameda domain over to his younger',
 'on april 27, 1626, he was presented in formal audience to tokugawa hidetada and tokugawa iemitsu, and was awarded lower',
 'yoshitaka was awarded an estate of 50,000 "koku" in november 1630 by his adoptive father, who exhibited no signs of intending to retire or relax his control over kubota',
 'he was permitted to formally enter the domain for the first time on may',
 'on december 28, 1666, his court title was promoted to “general of the left imperial guard” (左近衛少将).',
 'he died at kubota castle',
 'he dedicated his book, "géopolitique, c

In [11]:
female_sentences

['in 2012, she became noted after a fuel',
 'in addition to the spine and leg injuries that affected her',
 'she enjoyed learning, and',
 'in 1951 she began using a motorized wheelchair, saying "heaven help any bureaucrat who gets in my',
 'she plays for the asf mahdia and in the tunisian',
 "she represented tunisia at the 2013 world women's handball",
 'she was part of the team winning',
 'original member orish grinstead, twin sister of irish, died on april 20, 2008, from kidney failure at the age',
 'she can be seen as one of the original four members of 702 in the video, "this lil\' game we play" with subway and',
 'the ferry had left manly at 07:45, and when off dobroyd she',
 'the "kate" was heading about due east, and the "bellubera"in the same direction as when she left.',
 "they then began to look for a wife for frederick william and potential brides included the holy roman emperor's sister, archduchess maria amalia, and the",
 'although angelina teny disputed the election resu

In [None]:
pwd

In [None]:
all_models=["BertModel", "AlbertModel", "RobertaModel", "GPT2Model"]
all_bias_types=["gender", "race", "religion"]

for model_name in all_models:
    for bias_type in all_bias_types:
        if model_name == "GPT2Model":
            n_classifiers = "10"
        else:
            n_classifiers = "80"
        
        if model_name == "BertModel":
            model_name_path = "bert-base-uncased"
        elif model_name == "AlbertModel":
            model_name_path = "albert-base-v2"
        elif model_name == "RobertaModel":
            model_name_path = "roberta-base"
        elif model_name == "GPT2Model":
            model_name_path = "gpt2"
    
        sys.argv = ['--model', model_name, '--model_name_or_path', model_name_path,\
                   '--bias_type', bias_type, '--n_classifiers', n_classifiers, '--seed', '0']
        
        
        args = parser.parse_args(sys.argv)
        experiment_id = generate_experiment_id(
            name="projection",
            model=args.model,
            model_name_or_path=args.model_name_or_path,
            bias_type=args.bias_type,
            seed=args.seed,
        )

        print("Computing projection matrix:")
        print(f" - persistent_dir: {args.persistent_dir}")
        print(f" - model: {args.model}")
        print(f" - model_name_or_path: {args.model_name_or_path}")
        print(f" - bias_type: {args.bias_type}")
        print(f" - n_classifiers: {args.n_classifiers}")
        print(f" - seed: {args.seed}")

        # Load data for INLP classifiers.
        data = load_inlp_data(args.persistent_dir, args.bias_type, seed=args.seed)

        # Load model and tokenizer.
        model = getattr(models, args.model)(args.model_name_or_path)
        model.eval()
        tokenizer = transformers.AutoTokenizer.from_pretrained(args.model_name_or_path)
        x_train, x_dev, x_test, y_train, y_dev, y_test = create_dataset(
            model,
            tokenizer,
            data,
            bias_type=args.bias_type,
            n_classifiers=args.n_classifiers,
        )

        np.savez(f"../data/saved_dataset/{model_name}/{bias_type}.npz", \
                 x_train = x_train, x_dev = x_dev, x_test = x_test, \
                 y_p_train = y_train, y_p_dev = y_dev, y_p_test = y_test)
        
        io.savemat(f"../data/saved_dataset/{model_name}/{bias_type}.mat", \
                   mdict={'x_train': x_train, 'x_dev': x_dev, 'x_test': x_test, \
                          'y_p_train': y_train, 'y_p_dev': y_dev, 'y_p_test': y_test})