In [None]:
!pip -q install accelerate
!pip -q install bitsandbytes
!pip -q install huggingface_hub
!pip -q install contractions
!pip -q install datasets

In [None]:
%cd /content/drive/MyDrive/DSD

In [None]:
import os
import gc
import json
import re
import csv
import string
import pickle
import numpy as np
import pandas as pd
import nltk
from nltk.corpus import stopwords
from tqdm import tqdm
from collections import Counter, defaultdict
import warnings
warnings.filterwarnings("ignore")

nltk.download('stopwords')
nltk.download('punkt')
nltk.download('wordnet')

from scipy import spatial
from scipy.stats import ttest_rel, spearmanr, pearsonr
from sklearn.decomposition import PCA
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix, f1_score

from xgboost import XGBClassifier

import contractions
from gensim.models.keyedvectors import Word2VecKeyedVectors
from gensim.models import Word2Vec

from transformers import AutoModel, AutoTokenizer

import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader, Dataset
from tensorflow.keras.preprocessing.sequence import pad_sequences

from huggingface_hub import notebook_login
from datasets import load_dataset
notebook_login()

In [None]:
class CFG:
    model_name = "meta-llama/Meta-Llama-3-8B"
    model_type = '2L'
    topic = 'race'
    mode = 'role'
    max_len = 4
    batch_size = 64
    num_workers = 12
    adam_lr = 1e-5
    sgd_lr = 1e-8

    base_path = "/content/drive/MyDrive/DSD/"
    vocabPath = f"{topic}_attributes_optm.json"
    outprefix =  model_name.replace("/", "-")+"-"+topic

    embedding_dict_data = f"word-embeddings/{model_name.replace('/','_')}_word_embeddings_reddit-l2.pkl"
    stereoset_data = "stereoset.json"
    crows_data = "crows_pairs.csv"
    cross_ner_data = "cross_ner.txt"
    stanford_sentiment_treebank_data = "stanford_sentiment_treebank.csv"

    stsb_word_vectors = f"word-embeddings/{model_name.replace('/','_')}_word_embeddings_stsb.pkl"
    cross_ner_word_vectors = f"word-embeddings/{model_name.replace('/','_')}_word_embeddings_cross_ner.pkl"
    sst_word_vectors = f"word-embeddings/{model_name.replace('/','_')}_word_embeddings_sst.pkl"
    mrpc_word_vectors = f"word-embeddings/{model_name.replace('/','_')}_word_embeddings_mrpc.pkl"
    mnli_word_vectors = f"word-embeddings/{model_name.replace('/','_')}_word_embeddings_mnli.pkl"
    wnli_word_vectors = f"word-embeddings/{model_name.replace('/','_')}_word_embeddings_wnli.pkl"
    rte_word_vectors = f"word-embeddings/{model_name.replace('/','_')}_word_embeddings_rte.pkl"

    results_filename = "results/results_debiasing.csv"
    downstream_results_filename = "results/results_downstream.csv"

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
results = dict()
results['model_name'] = CFG.model_name
results['topic'] = CFG.topic

downstream_results = dict()
downstream_results['model_name'] = CFG.model_name
downstream_results['topic'] = CFG.topic

In [None]:
def isValidWord(word):
    return all([c.isalpha() for c in word])

def pruneWordVecs(wordVecs):
    newWordVecs = {}
    for word, vec in wordVecs.items():
        valid=True
        if(not isValidWord(word)):
            valid = False
        if(valid):
            newWordVecs[word] = vec
    return newWordVecs

def load_words(w2v_files):
    words = []
    for w2v_file in w2v_files:
        with open(w2v_file, 'r') as f:
            for line in f:
                vect = line.strip().rsplit()
                word = vect[0]
                words.append(word)

    return words

def load_analogy_templates(json_filepath, mode):
	with open(json_filepath, "r") as f:
		loadedData = json.load(f)
		return loadedData["analogy_templates"][mode]

def load_test_terms(json_filepath):
	with open(json_filepath, "r") as f:
		loadedData = json.load(f)
		return loadedData["testTerms"]

def load_eval_terms(json_filepath, mode):
	with open(json_filepath, "r") as f:
		loadedData = json.load(f)
		return loadedData["eval_targets"], loadedData["analogy_templates"][mode].values()

def load_def_sets(json_filepath):
	with open(json_filepath, "r") as f:
		loadedData = json.load(f)
		return {i: v for i, v in enumerate(loadedData["definite_sets"])}

In [None]:
class WordsDataset(Dataset):
    def __init__(self, words, tokenizer):
        self.words = words
        self.tokenizer = tokenizer

    def __len__(self):
        return len(self.words)

    def __getitem__(self, idx):
        word = self.words[idx]
        return self.tokenizer(word, padding='max_length', max_length = CFG.max_len, truncation = True, return_tensors="pt")

In [None]:
tokenizer = AutoTokenizer.from_pretrained(CFG.model_name)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

In [None]:
def mean_pooling(model_output, attention_mask):
    token_embeddings = model_output[0]  # First element of model_output contains all token embeddings
    input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
    return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)

class Model(torch.nn.Module):
    def __init__(self, model_name):
        super(Model, self).__init__()
        self.encoder = AutoModel.from_pretrained(model_name, load_in_8bit = True)

    def forward(self, input_ids, attention_mask):
        model_output = self.encoder(input_ids=input_ids, attention_mask=attention_mask)
        sentence_embeddings = mean_pooling(model_output, attention_mask)
        return sentence_embeddings

lm_model = Model(CFG.model_name).to(CFG.device)

In [None]:
def get_word_embeddings(words, model):
    model.eval()
    words_dataset = WordsDataset(words, tokenizer)
    words_dataloader = DataLoader(words_dataset, batch_size=CFG.batch_size, shuffle=False, num_workers = CFG.num_workers)
    embeddings = []
    for batch in tqdm(words_dataloader, total = len(words_dataloader)):
        input_ids = torch.squeeze(batch['input_ids'].to(CFG.device), axis = 1)
        attention_mask = torch.squeeze(batch['attention_mask'].to(CFG.device), axis = 1)
        batch_embeddings = model(input_ids, attention_mask).detach().cpu().numpy()
        embeddings.append(batch_embeddings)
    # concatenate the embeddings into a single numpy array
    embeddings = np.concatenate(embeddings, axis=0)
    del words_dataset, words_dataloader
    gc.collect()
    return embeddings

In [None]:
analogyTemplates = load_analogy_templates(CFG.vocabPath, CFG.mode)
defSets = load_def_sets(CFG.vocabPath)
testTerms = load_test_terms(CFG.vocabPath)
evalTargets, evalAttrs = load_eval_terms(CFG.vocabPath, CFG.mode)

CFG.subspace_dim = len(defSets)*len(defSets[0])

neutral_words = []
for value in analogyTemplates.values():
    neutral_words.extend(value)

neutral_word_embeddings = get_word_embeddings(neutral_words, lm_model)
neutral_embedding_dict = {word: embedding for word, embedding in zip(neutral_words, neutral_word_embeddings)}
embedding_dim = neutral_word_embeddings.shape[-1]
CFG.embedding_dim = embedding_dim

In [None]:
print(defSets)

In [None]:
def identify_bias_subspace(vocab, def_sets, subspace_dim, embedding_dim):
    """
    Similar to bolukbasi's implementation at
    https://github.com/tolga-b/debiaswe/blob/master/debiaswe/debias.py

    vocab - dictionary mapping words to embeddings
    def_sets - sets of words that represent extremes? of the subspace
            we're interested in (e.g. man-woman, boy-girl, etc. for binary gender)
    subspace_dim - number of vectors defining the subspace
    embedding_dim - dimensions of the word embeddings
    """
    # calculate means of defining sets
    means = {}
    for k, v in def_sets.items():
        wSet = []
        for w in v:
            try:
                wSet.append(vocab[w])
            except KeyError as e:
                pass
        set_vectors = np.array(wSet)
        means[k] = np.mean(set_vectors, axis=0)

    # calculate vectors to perform PCA
    matrix = []
    for k, v in def_sets.items():
        wSet = []
        for w in v:
            try:
                wSet.append(vocab[w])
            except KeyError as e:
                pass
        set_vectors = np.array(wSet)
        diffs = set_vectors - means[k]
        matrix.append(diffs)

    matrix = np.concatenate(matrix)

    pca = PCA(n_components=subspace_dim)
    pca.fit(matrix)

    return pca.components_

with open(CFG.embedding_dict_data, 'rb') as f:
    embedding_dict = pickle.load(f)
embedding_dict = pruneWordVecs(embedding_dict)
subspace = identify_bias_subspace(embedding_dict, defSets, CFG.subspace_dim, CFG.embedding_dim)[:CFG.subspace_dim]

In [None]:
def equalize_and_soften(vocab, words, bias_subspace, embedding_dim, l=0.2, verbose=True):
    vocabIndex, vocabVectors = zip(*vocab.items())
    vocabIndex = {i:label for i, label in enumerate(vocabIndex)}
    Neutrals = torch.tensor([vocab[w] for w in words]).float().t()

    Words = torch.tensor(vocabVectors).float().t()

    # perform SVD on W to reduce memory and computational costs
    # based on suggestions in supplementary material of Bolukbasi et al.
    u, s, _ = torch.svd(Words)
    s = torch.diag(s)

    # precompute
    t1 = s.mm(u.t())
    t2 = u.mm(s)

    Transform = torch.randn(embedding_dim, embedding_dim).float()
    BiasSpace = torch.tensor(bias_subspace).reshape(embedding_dim, -1).float()

    Neutrals.requires_grad = False
    Words.requires_grad = False
    BiasSpace.requires_grad = False
    Transform.requires_grad = True

    epochs = 10
    optimizer = torch.optim.SGD([Transform], lr=CFG.sgd_lr, momentum=0.0)

    for i in range(0, epochs):
        TtT = torch.mm(Transform.t(), Transform)
        norm1 = (t1.mm(TtT - torch.eye(embedding_dim)).mm(t2)).norm(p=2)

        norm2 = (Neutrals.t().mm(TtT).mm(BiasSpace)).norm(p=2)

        loss = norm1 + l * norm2
        norm1 = None
        norm2 = None

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if(verbose):
            print("Loss @ Epoch #" + str(i) + ":", loss)

    if(verbose):
        print("Optimization Completed, normalizing vector transform")

    debiasedVectors = {}
    for i, w in enumerate(Words.t()):
        transformedVec = torch.mm(Transform, w.view(-1, 1))
        debiasedVectors[vocabIndex[i]] = ( transformedVec / transformedVec.norm(p=2) ).detach().numpy().flatten()

    return debiasedVectors, Transform

soft_word_vectors, debias_matrix = equalize_and_soften(embedding_dict, neutral_words, subspace, CFG.embedding_dim)

In [None]:
class ResidualBlock(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(ResidualBlock, self).__init__()
        self.fc1 = nn.Linear(input_dim, output_dim)
        self.fc2 = nn.Linear(output_dim, output_dim)
        if input_dim != output_dim:
            self.shortcut = nn.Linear(input_dim, output_dim)
        else:
            self.shortcut = nn.Identity()

    def forward(self, x):
        identity = self.shortcut(x)
        out = F.relu(self.fc1(x))
        out = self.fc2(out)
        out += identity
        return out

class TransformNet1L(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(TransformNet1L, self).__init__()
        self.resblock1 = ResidualBlock(input_dim, output_dim)

    def forward(self, x):
        x = self.resblock1(x)
        return x

class TransformNet2L(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(TransformNet2L, self).__init__()
        self.resblock1 = ResidualBlock(input_dim, output_dim*2)
        self.resblock2 = ResidualBlock(output_dim*2, output_dim)

    def forward(self, x):
        x = self.resblock1(x)
        x = self.resblock2(x)
        return x

class TransformNet3L(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(TransformNet3L, self).__init__()
        self.resblock1 = ResidualBlock(input_dim, output_dim*2)
        self.resblock2 = ResidualBlock(output_dim*2, output_dim*2)
        self.resblock3 = ResidualBlock(output_dim*2, output_dim)

    def forward(self, x):
        x = self.resblock1(x)
        x = self.resblock2(x)
        x = self.resblock3(x)
        return x

if(CFG.model_type == "1L"):
    TransformNet = TransformNet1L
    embedding_dim = CFG.embedding_dim
elif(CFG.model_type == "2L"):
    TransformNet = TransformNet2L
    embedding_dim = CFG.embedding_dim
elif(CFG.model_type == "3L"):
    TransformNet = TransformNet3L
    embedding_dim = CFG.embedding_dim

In [None]:
def equalize_and_soften_DSD(vocab, words, eq_sets, bias_subspace, embedding_dim, l=0.2, verbose=True):
    vocabIndex, vocabVectors = zip(*vocab.items())
    vocabIndex = {i:label for i, label in enumerate(vocabIndex)}
    Neutrals = torch.tensor([vocab[w] for w in words]).float().t().to(CFG.device)

    Words = torch.tensor(vocabVectors).float().t().to(CFG.device)

    Transform = TransformNet(embedding_dim, embedding_dim).to(CFG.device)
    BiasSpace = torch.tensor(bias_subspace).reshape(embedding_dim, -1).float().to(CFG.device)

    Neutrals.requires_grad = False
    Words.requires_grad = False
    BiasSpace.requires_grad = False

    epochs = 100
    optimizer = optim.Adam(Transform.parameters(), lr=CFG.adam_lr)

    identity_matrix = torch.eye(embedding_dim).to(CFG.device)

    Words = Words.t()
    for i in range(0, epochs):
        transformed_words = Transform(Words)

        TtT = torch.matmul(transformed_words.t(), transformed_words)
        norm1 = torch.norm(torch.matmul(transformed_words.t(), transformed_words) - identity_matrix)

        norm2 = torch.norm(torch.matmul(torch.matmul(Neutrals.t(), TtT), BiasSpace))

        # norm1 = torch.norm(torch.matmul(transformed_words.t(), transformed_words) - identity_matrix)

        # norm2 = torch.norm(torch.matmul(Neutrals.t(), transformed_words.t()))

        loss = norm1 + l * norm2

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if(verbose and (i%10==0)):
            print("Loss @ Epoch #" + str(i) + ":", loss.item())

    if(verbose):
        print("Optimization Completed, normalizing vector transform")

    debiasedVectors = {}
    for i, w in enumerate(Words):
        transformedVec = Transform(w.view(1, -1))
        debiasedVectors[vocabIndex[i]] = ( transformedVec / transformedVec.norm(p=2) ).detach().cpu().numpy().flatten()

    return debiasedVectors, Transform

DSD_soft_word_vectors, debias_model = equalize_and_soften_DSD(embedding_dict, neutral_words, defSets.values(), subspace, embedding_dim)

In [None]:
def scoredAnalogyAnswers(a,b,x, keyedVecs, thresh=12.5):
	words = [w for w in keyedVecs.key_to_index.keys() if np.linalg.norm(np.array(keyedVecs[w])-np.array(keyedVecs[x])) < thresh]

	def cos(a,b,x,y):
		aVec = np.array(keyedVecs[a])
		bVec = np.array(keyedVecs[b])
		xVec = np.array(keyedVecs[x])
		yVec = np.array(keyedVecs[y])
		numerator = (aVec-bVec).dot(xVec-yVec)
		denominator = np.linalg.norm(aVec-bVec)*np.linalg.norm(xVec-yVec)
		return numerator/(denominator if denominator != 0 else 1e-6)

	return sorted([(cos(a,b,x,y), a,b,x,y) for y in words], reverse=True)

def generateAnalogies(analogyTemplates, keyedVecs):
    expandedAnalogyTemplates = []
    for A, stereotypes in analogyTemplates.items():
        for B, _ in analogyTemplates.items():
            if(A != B):
                for stereotype in stereotypes:
                    expandedAnalogyTemplates.append([A, stereotype, B])

    analogies = []
    outputGroups = []
    for a,b,x in expandedAnalogyTemplates:
        outputs = scoredAnalogyAnswers(a,b,x,keyedVecs)
        formattedOutput = []

        for score, a_w, b_w, x_w, y_w in outputs:

            analogy = str(a_w) + " is to " + str(b_w) + " as " + str(x_w) + " is to " + str(y_w)
            analogyRaw = [a_w, b_w, x_w, y_w]
            analogies.append([score, analogy, analogyRaw])
            formattedOutput.append([score, analogy, analogyRaw])
        outputGroups.append(formattedOutput)

    analogies = sorted(analogies, key=lambda x:-x[0])
    return analogies, outputGroups

def convert_legacy_to_keyvec(legacy_w2v):
    dim = len(legacy_w2v[list(legacy_w2v.keys())[0]])
    vectors = Word2VecKeyedVectors(dim)

    ws = []
    vs = []

    for word, vect in legacy_w2v.items():
        ws.append(word)
        vs.append(vect)
        assert(len(vect) == dim)
    vectors.add_vectors(ws, vs, replace=True)
    return vectors

def multiclass_evaluation(embeddings, targets, attributes):
	targets_eval = []
	for targetSet in targets:
		for target in targetSet:
			for attributeSet in attributes:
				targets_eval.append(_unary_s(embeddings, target, attributeSet))
	m_score = np.mean(targets_eval)
	return m_score, targets_eval

def _unary_s(embeddings, target, attributes):
	return np.mean([ spatial.distance.cosine(embeddings[target], embeddings[ai]) for ai in attributes ])

In [None]:
print("Biased Evaluation Results")
biasedMAC, biasedDistribution = multiclass_evaluation(embedding_dict, evalTargets, evalAttrs)
print("Biased MAC:", biasedMAC)
results['Biased MAC'] = np.round(biasedMAC, 3)

In [None]:
print("SOFT Debiased Evaluation Results")
debiasedMAC, debiasedDistribution = multiclass_evaluation(soft_word_vectors, evalTargets, evalAttrs)
print("soft MAC:", debiasedMAC)

statistics, pvalue = ttest_rel(biasedDistribution, debiasedDistribution)
print("soft Debiased Cosine difference t-test", pvalue)

results['Soft-Debiased MAC'] = np.round(debiasedMAC, 3)
results['soft-Debiased PValue'] = pvalue

print("DSD Evaluation Results")
debiasedMAC, debiasedDistribution = multiclass_evaluation(DSD_soft_word_vectors, evalTargets, evalAttrs)
print("DSD MAC:", debiasedMAC)

statistics, pvalue = ttest_rel(biasedDistribution, debiasedDistribution)
print("soft Debiased DSD Cosine difference t-test", pvalue)

results['DSD MAC'] = np.round(debiasedMAC, 3)
results['DSD PValue'] = pvalue

In [None]:
def avg_feature_vector(sentence, model, num_features):
    words = sentence.split()
    feature_vec = np.zeros((num_features, ), dtype='float32')
    n_words = 0
    for word in words:
        if word in list(model.keys()):
            n_words += 1
            feature_vec = np.add(feature_vec, model[word])
    if (n_words > 0):
        feature_vec = np.divide(feature_vec, n_words)
    return feature_vec

In [None]:
def get_stereotype_score(word_vectors, data):
    total_samples = 0
    stereotypical_samples = 0

    # Iterate over the data
    for item in data:
        context = item['context']
        stereo = item['stereotype']
        antistereo = item['anti-stereotype']

        # Calculate sentence embeddings
        context_vec = avg_feature_vector(context, word_vectors, num_features=CFG.embedding_dim)
        stereo_vec = avg_feature_vector(stereo, word_vectors, num_features=CFG.embedding_dim)
        antistereo_vec = avg_feature_vector(antistereo, word_vectors, num_features=CFG.embedding_dim)

        context_vec = context_vec/np.linalg.norm(context_vec)
        stereo_vec = stereo_vec/np.linalg.norm(stereo_vec)
        antistereo_vec = antistereo_vec/np.linalg.norm(antistereo_vec)

        # Calculate cosine similarity
        simstereo = spatial.distance.cosine(context_vec, stereo_vec)
        simantistereo = spatial.distance.cosine(context_vec, antistereo_vec)

        if simstereo > simantistereo:
            stereotypical_samples += 1
        total_samples += 1

    # Calculate stereotype score
    stereotype_score = stereotypical_samples*100 / total_samples
    print('Stereotype Score:', stereotype_score)
    return stereotype_score

In [None]:
with open(CFG.stereoset_data) as f:
    data = json.load(f)

In [None]:
topic_data = [item for item in data if item['bias_type']==CFG.topic]

stereoset_data_processed = []

for item in topic_data:
    data_dict = defaultdict()
    data_dict['context'] = item['context']
    for item_ in item['sentences']:
        if item_['gold_label'] == 'stereotype':
            data_dict['stereotype'] = item_['sentence']
        elif item_['gold_label'] == 'anti-stereotype':
            data_dict['anti-stereotype'] = item_['sentence']
    stereoset_data_processed.append(data_dict)

In [None]:
results['Soft-Debiased SS'] = get_stereotype_score(soft_word_vectors, stereoset_data_processed)
results['DSD SS'] = get_stereotype_score(DSD_soft_word_vectors, stereoset_data_processed)

In [None]:
def read_crows_data(input_file):
    """
    Load data into pandas DataFrame format.
    """

    df_data = pd.DataFrame(columns=['sent1', 'sent2', 'direction', 'bias_type'])

    with open(input_file) as f:
        reader = csv.DictReader(f)
        for row in reader:
            direction, gold_bias = '_', '_'
            direction = row['stereo_antistereo']
            bias_type = row['bias_type']

            sent1, sent2 = '', ''
            if direction == 'stereo':
                sent1 = row['sent_more']
                sent2 = row['sent_less']
            else:
                sent1 = row['sent_less']
                sent2 = row['sent_more']

            df_item = {'sent1': sent1,
                       'sent2': sent2,
                       'direction': direction,
                       'bias_type': bias_type}
            df_data = df_data._append(df_item, ignore_index=True)

    return df_data

def replace_words(text):
    def repl(match):
        if match.group(0).endswith('woman'):
            return 'woman'
        elif match.group(0).endswith('man'):
            return 'man'
    return re.sub(r'\b\w*(man|woman)\b', repl, text)

def difference_with_repetition(list1, list2):
    counter1 = Counter(list1)
    counter2 = Counter(list2)

    difference_counter = counter1 - counter2

    difference_list = list(difference_counter.elements())

    return " ".join(difference_list)

def common_and_uncommon_parts(s1, s2):
    tokens1 = s1.split()
    tokens2 = s2.split()
    lengths = [[0 for j in range(len(tokens2)+1)] for i in range(len(tokens1)+1)]
    for i, x in enumerate(tokens1):
        for j, y in enumerate(tokens2):
            if x == y:
                lengths[i+1][j+1] = lengths[i][j] + 1
            else:
                lengths[i+1][j+1] = max(lengths[i+1][j], lengths[i][j+1])
    common = []
    x, y = len(tokens1), len(tokens2)
    while x != 0 and y != 0:
        if lengths[x][y] == lengths[x-1][y]:
            x -= 1
        elif lengths[x][y] == lengths[x][y-1]:
            y -= 1
        else:
            assert tokens1[x-1] == tokens2[y-1]
            common.insert(0, tokens1[x-1])
            x -= 1
            y -= 1
    common = ' '.join(common)
    uncommon1 = difference_with_repetition(tokens1, common.split())
    uncommon2 = difference_with_repetition(tokens2, common.split())
    return common, uncommon1, uncommon2

crows_data = read_crows_data(CFG.crows_data)

if CFG.topic=='race':
    topic = 'race-color'
else:
    topic = CFG.topic
topic_crows_data = crows_data[crows_data['bias_type'] == topic].reset_index(drop=True)

In [None]:
def get_crows_score(word_vectors):
    df_score = pd.DataFrame(columns=['sent_more', 'sent_less',
                                        'sent_more_score', 'sent_less_score',
                                        'score', 'stereo_antistereo', 'bias_type'])


    total_stereo, total_antistereo = 0, 0
    stereo_score, antistereo_score = 0, 0

    N = 0
    neutral = 0
    total = len(topic_crows_data.index)
    for row in topic_crows_data.itertuples():
        N += 1
        sent1 = re.sub(f"[{re.escape(string.punctuation)}]", "", contractions.fix(row.sent1.lower()).replace('\'s', ' is'))
        sent2 = re.sub(f"[{re.escape(string.punctuation)}]", "", contractions.fix(row.sent2.lower()).replace('\'s', ' is'))
        direction = row.direction
        bias = row.bias_type

        common_sequence, remainder1, remainder2 = common_and_uncommon_parts(sent1, sent2)
        remainder1 = replace_words(remainder1)
        remainder2 = replace_words(remainder2)

        pair_score = 0
        context_vec = avg_feature_vector(common_sequence, word_vectors, num_features=CFG.embedding_dim)
        more_vec = avg_feature_vector(remainder1, word_vectors, num_features=CFG.embedding_dim)
        less_vec = avg_feature_vector(remainder2, word_vectors, num_features=CFG.embedding_dim)

        more_vec = more_vec/np.linalg.norm(more_vec)
        less_vec = less_vec/np.linalg.norm(less_vec)
        context_vec = context_vec/np.linalg.norm(context_vec)

        # Calculate cosine similarity
        more_score = spatial.distance.cosine(context_vec, more_vec)
        less_score = spatial.distance.cosine(context_vec, less_vec)

        if more_score == less_score:

            neutral += 1
        else:
            if direction == 'stereo':
                total_stereo += 1
                if more_score > less_score:
                    stereo_score += 1
                    pair_score = 1
            elif direction == 'antistereo':
                total_antistereo += 1
                if less_score > more_score:
                    antistereo_score += 1
                    pair_score = 1

        sent_more, sent_less = '', ''
        if direction == 'stereo':
            sent_more = sent1
            sent_less = sent2
            sent_more_score = more_score
            sent_less_score = less_score
        else:
            sent_more = sent2
            sent_less =sent1
            sent_more_score = less_score
            sent_less_score = more_score

    df_score = df_score._append({'sent_more': sent_more,
                                'sent_less': sent_less,
                                'sent_more_score': sent_more_score,
                                'sent_less_score': sent_less_score,
                                'score': pair_score,
                                'stereo_antistereo': direction,
                                'bias_type': bias
                                }, ignore_index=True)

    print('=' * 100)
    print('Total examples:', N)
    print('Metric score:', round((stereo_score + antistereo_score) / N * 100, 2))
    print('Stereotype score:', round(stereo_score  / total_stereo * 100, 2))
    if antistereo_score != 0:
        print('Anti-stereotype score:', round(antistereo_score  / total_antistereo * 100, 2))
    print("Num. neutral:", neutral, round(neutral / N * 100, 2))
    print('=' * 100)
    print()
    return round((stereo_score + antistereo_score) / N * 100, 2), round(stereo_score  / total_stereo * 100, 2), round(antistereo_score  / total_antistereo * 100, 2)

results['Soft-Debiased CMS'], results['Soft-Debiased CSS'], results['Soft-Debiased CAS'] = get_crows_score(soft_word_vectors)
results['DSD CMS'], results['DSD CSS'], results['DSD CAS'] = get_crows_score(DSD_soft_word_vectors)

print("Crows Results")
print(f"Soft-Debiased CMS: {results['Soft-Debiased CMS']}")
print(f"DSD CMS: {results['DSD CMS']}")

In [None]:
def save_results(results, filename):
    file_exists = os.path.isfile(filename)

    with open(filename, 'a') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=results.keys())

        if not file_exists:
            writer.writeheader()  # file doesn't exist yet, write a header

        writer.writerow(results)

save_results(results, CFG.results_filename)

In [None]:
def DSD_Debias(Transform, new_word_dict):
    new_debiased_word_dict = {}
    for word,w_ in new_word_dict.items():
        w_ = torch.tensor(w_).float().to(CFG.device)
        transformedVec = Transform(w_.view(1, -1))
        new_debiased_word_dict[word] = ( transformedVec / transformedVec.norm(p=2) ).detach().cpu().numpy().flatten()
    return new_debiased_word_dict

def Soft_Debias(debias_matrix, word_vectors):
    new_debiased_word_dict = {}
    for word,w_ in word_vectors.items():
        w_ = torch.tensor(w_).float()
        transformedVec = torch.mm(debias_matrix, w_.view(-1, 1))
        new_debiased_word_dict[word] = ( transformedVec / transformedVec.norm(p=2) ).detach().numpy().flatten()
    return new_debiased_word_dict

In [None]:
stsb_dataset = load_dataset("glue", "stsb")
sentence_pairs = [(row["sentence1"], row["sentence2"]) for row in stsb_dataset["train"]]
similarity_scores = [row["label"] for row in stsb_dataset["train"]]

stop_words = set(stopwords.words('english'))

if not os.path.isfile(CFG.stsb_word_vectors):
    def preprocess_text(text):
        tokens = nltk.word_tokenize(text.lower())
        filtered_tokens = [word for word in tokens if word.isalpha() and word not in stop_words]
        return filtered_tokens

    sentences = []
    for  pair in sentence_pairs:
        sentence1, sentence2 = pair[0], pair[1]

        sentences.append(sentence1)
        sentences.append(sentence2)

    sentences = [preprocess_text(sentence) for sentence in sentences]

    stsb_words = set()
    for sentence in sentences:
        for word in sentence:
            stsb_words.add(word)

    stsb_words = list(stsb_words)
    stsb_word_embeddings = get_word_embeddings(stsb_words, lm_model)
    stsb_word_dict = {word: embedding for (word, embedding) in zip(stsb_words, stsb_word_embeddings)}
    pickle.dump(stsb_word_dict, open(CFG.stsb_word_vectors, "wb"))
else:
    stsb_word_dict = pickle.load(open(CFG.stsb_word_vectors, "rb"))
stsb_soft_debiased_word_dict = Soft_Debias(debias_matrix, stsb_word_dict)
stsb_DSD_word_dict = DSD_Debias(debias_model, stsb_word_dict)

In [None]:
class STSBModel(nn.Module):
    def __init__(self, input_dim):
        super(STSBModel, self).__init__()
        self.fc1 = nn.LazyLinear(input_dim//2)
        self.fc1_1 = nn.LazyLinear(input_dim//4)
        self.fc2 = nn.LazyLinear(input_dim//2)
        self.fc2_1 = nn.LazyLinear(input_dim//4)
        self.fc3 = nn.LazyLinear(1)

    def forward(self, x1, x2):
        x1 = self.fc1(x1)
        x1 = self.fc1_1(x1)
        x2 = self.fc2(x2)
        x2 = self.fc2_1(x2)
        x = F.cosine_similarity(x1, x2).view(-1,1)
        x = self.fc3(x)
        return x

class SentencePairDataset(Dataset):
    def __init__(self, embeddings, scores):
        self.embeddings = embeddings
        self.scores = scores

    def __len__(self):
        return len(self.embeddings)

    def __getitem__(self, idx):
        return self.embeddings[idx], self.scores[idx]

def get_stsb_score(word_embeddings):

    # Convert the sentence pairs to embeddings
    embeddings = [(avg_feature_vector(s1, word_embeddings, num_features=CFG.embedding_dim), avg_feature_vector(s2, word_embeddings, num_features=CFG.embedding_dim)) for s1, s2 in sentence_pairs]

    # Split the data into a training set and a validation set
    train_embeddings, val_embeddings, train_scores, val_scores = train_test_split(embeddings, similarity_scores, test_size=0.2)

    # Create DataLoaders
    train_dataloader = DataLoader(SentencePairDataset(train_embeddings, train_scores), batch_size=32, shuffle=True)
    val_dataloader = DataLoader(SentencePairDataset(val_embeddings, val_scores), batch_size=32)

    # Instantiate the model and define the loss and optimizer
    model = STSBModel(CFG.embedding_dim).to(CFG.device)
    # Choose a loss function and an optimizer
    loss_fn = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr = 1e-4)

    # Training loop
    for epoch in range(100):
        losses = []
        for embeddings, scores in train_dataloader:
            embeddings1 = torch.tensor(embeddings[0], dtype=torch.float32).to(CFG.device)
            embeddings2 = torch.tensor(embeddings[1], dtype=torch.float32).to(CFG.device)
            scores = torch.tensor(scores, dtype=torch.float32).to(CFG.device)
            outputs = model(embeddings1, embeddings2)
            loss = loss_fn(outputs, scores)
            losses.append(loss.item())
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

        if((epoch+1)%10==0):
            print(f"Epoch {epoch}, Loss: {np.mean(losses)}")

    # Evaluation
    model.eval()
    total_loss, total_count = 0, 0
    y_true, y_pred = [], []
    with torch.no_grad():
        for embeddings, scores in val_dataloader:
            embeddings1 = torch.tensor(embeddings[0], dtype=torch.float32).to(CFG.device)
            embeddings2 = torch.tensor(embeddings[1], dtype=torch.float32).to(CFG.device)
            scores = torch.tensor(scores, dtype=torch.float32).to(CFG.device)
            outputs = model(embeddings1, embeddings2)
            loss = loss_fn(outputs, scores)
            total_loss += loss.item()
            total_count += 1
            outputs = [item for item_l in outputs.cpu().tolist() for item in item_l]
            y_true.extend(scores.cpu().tolist())
            y_pred.extend(outputs)
    avg_loss = total_loss / total_count
    print(f"Validation loss: {avg_loss}")
    # Compute evaluation metrics
    pcc = pearsonr(y_true, y_pred)[0]
    srcc = spearmanr(y_true, y_pred)[0]
    print(f"PCC: {pcc}, SRCC: {srcc}")
    return (pcc, srcc)


downstream_results['STSB Biased PCC'], downstream_results['STSB Biased SRCC'] = get_stsb_score(stsb_word_dict)
downstream_results['STSB Soft-Debiased PCC'], downstream_results['STSB Soft-Debiased SRCC'] = get_stsb_score(stsb_soft_debiased_word_dict)
downstream_results['STSB DSD PCC'], downstream_results['STSB DSD CC'] = get_stsb_score(stsb_DSD_word_dict)

In [None]:
def preprocess_text(text):
    tokens = nltk.word_tokenize(text.lower())
    filtered_tokens = [word for word in tokens if word.isalpha() and word not in stop_words]
    return filtered_tokens

def load_data_cross_ner(file_path):
    with open(file_path, 'r') as f:
        lines = f.readlines()

    words, tags = [], []
    sentence_words, sentence_tags = [], []

    for line in lines:
        line = line.strip()
        if len(line) == 0 or line.startswith("-DOCSTART-"):
            if len(sentence_words) > 0:
                words.append(sentence_words)
                tags.append(sentence_tags)
                sentence_words, sentence_tags = [], []
        else:
            tokens = line.split('\t')
            sentence_words.append(tokens[0])
            sentence_tags.append(tokens[-1])

    return pd.DataFrame({"words": words, "tags": tags})

data = load_data_cross_ner(CFG.cross_ner_data)
display(data.head())

sentences, sentence_tags = [], []
for _,tagged_sentence in data.iterrows():
    sentence, tags = tagged_sentence['words'], tagged_sentence['tags']

    sentences.append(" ".join(sentence))
    sentence_tags.append(np.array(tags))

sentences = [preprocess_text(sentence) for sentence in sentences]

if not os.path.isfile(CFG.cross_ner_word_vectors):
    words_ = set()
    for sentence in sentences:
        for word in sentence:
            words_.add(word)
    words_ = list(words_)
    cross_ner_word_embeddings = get_word_embeddings(words_, lm_model)
    cross_ner_word_dict = {word: embedding for (word, embedding) in zip(words_, cross_ner_word_embeddings)}
    pickle.dump(cross_ner_word_dict, open(CFG.cross_ner_word_vectors, "wb"))
else:
    cross_ner_word_dict = pickle.load(open(CFG.cross_ner_word_vectors, "rb"))

cross_ner_soft_debiased_word_dict = Soft_Debias(debias_matrix, cross_ner_word_dict)
cross_ner_DSD_word_dict = DSD_Debias(debias_model, cross_ner_word_dict)

In [None]:
class BiLSTM(nn.Module):
    def __init__(self, num_classes):
        super(BiLSTM, self).__init__()
        self.lstm = nn.LSTM(embedding_dim, embedding_dim//2, bidirectional=True, batch_first=True)
        self.fc = nn.LazyLinear(num_classes)

    def forward(self, x):
        x, _ = self.lstm(x)
        x = self.fc(x)
        return x

def get_cross_ner_score(word_embedding_dict, sentences, sentence_tags):
    X = [[word_embedding_dict[word] if word in word_embedding_dict else word_embedding_dict['<UNK>'] for word in sentence] for sentence in sentences]
    y = [[tag for tag in tags] for tags in sentence_tags]
    tags = []
    for tagss in y:
        for tag in tagss:
            tags.append(tag)

    tags = list(set(tags))

    # Encode the labels
    le = LabelEncoder()
    le.fit(tags)
    y = [le.transform(tags) for tags in y]
    X = torch.tensor(pad_sequences(maxlen=100, sequences=X, padding="post", dtype='float32'))
    y = torch.tensor(pad_sequences(maxlen=100, sequences=y, padding="post", value=le.transform(["O"])))

    # Split the data into training and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

    # Create DataLoaders
    train_data = TensorDataset(X_train, y_train)
    train_loader = DataLoader(train_data, batch_size=32)

    test_data = TensorDataset(X_test, y_test)
    test_loader = DataLoader(test_data, batch_size=32)

    model_lstm = BiLSTM(len(le.classes_)).to(CFG.device)
    criterion = nn.CrossEntropyLoss()  # Set ignore_index to ignore the padding label
    optimizer = optim.Adam(model_lstm.parameters())

    # Train the model
    for epoch in range(10):
        for i, (sentences, labels) in enumerate(train_loader):
            optimizer.zero_grad()
            # print(sentences.shape, labels.shape)
            outputs = model_lstm(sentences.to(CFG.device))
            loss = criterion(outputs.view(-1, len(le.classes_)), labels.long().view(-1).to(CFG.device))
            loss.backward()
            optimizer.step()
        if (epoch+1)%10==0:
            print("Loss @ Epoch #" + str(epoch) + ":", loss.item())
    # Make predictions on the test set and evaluate the model_lstm
    with torch.no_grad():
        all_preds, all_labels = [], []
        for sentences, labels in test_loader:
            outputs = model_lstm(sentences.to(CFG.device))
            _, predicted = torch.max(outputs, 2)
            all_preds.extend(predicted.view(-1).detach().cpu().tolist())
            all_labels.extend(labels.view(-1).tolist())

    return classification_report(all_labels, all_preds, target_names=le.classes_, output_dict = True)['macro avg']['f1-score']

downstream_results['Cross-NER Biased F1'] = get_cross_ner_score(cross_ner_word_dict, sentences, sentence_tags)
downstream_results['Cross-NER Soft-Debiased F1'] = get_cross_ner_score(cross_ner_soft_debiased_word_dict, sentences, sentence_tags)
downstream_results['Cross-NER DSD F1'] = get_cross_ner_score(cross_ner_DSD_word_dict, sentences, sentence_tags)

In [None]:
stop_words = set(stopwords.words('english'))
df = pd.read_csv(CFG.stanford_sentiment_treebank_data)
def preprocess_text(text):
    tokens = nltk.word_tokenize(text.lower())
    filtered_tokens = [word for word in tokens if word not in stop_words]
    return filtered_tokens

label_map = {'positive': 1, 'negative': 0}
df['processed_sentence'] = df['sentence'].apply(preprocess_text)
df['hard_label'] = df['label'].apply(lambda x: int(np.round(x+0.0001)))
# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(df['processed_sentence'], df['hard_label'], test_size=0.2, random_state=42)

# Train a Word2Vec model on unlabeled data
if not os.path.isfile(CFG.sst_word_vectors):
    sentences = [sentence for sentence in df['processed_sentence']]
    words_ = set()
    for sentence in sentences:
        for word in sentence:
            words_.add(word)
    sst_words = list(words_)

    sst_word_embeddings = get_word_embeddings(sst_words, lm_model)
    sst_word_dict = {word: embedding for (word, embedding) in zip(sst_words, sst_word_embeddings)}
    pickle.dump(sst_word_dict, open(CFG.sst_word_vectors, "wb"))
else:
    sst_word_dict = pickle.load(open(CFG.sst_word_vectors, "rb"))

sst_soft_debiased_word_dict = Soft_Debias(debias_matrix, sst_word_dict)
sst_debiased_word_dict = DSD_Debias(debias_model, sst_word_dict)

In [None]:
def get_sst_score(cls_word_dict):
# Vectorize labeled reviews
    X_train_vec = np.array([np.mean([cls_word_dict[word] for word in review], axis=0) for review in tqdm(X_train)])
    X_test_vec = np.array([np.mean([cls_word_dict[word] for word in review], axis=0) for review in tqdm(X_test)])

    # Train an XGBoost classifier
    xgb_model = XGBClassifier(n_estimators=150, max_depth=3, learning_rate=0.05)
    xgb_model.fit(X_train_vec, y_train)

    # Make predictions
    y_pred = xgb_model.predict(X_test_vec)

    # Evaluate the model
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy: {accuracy:.2f}")
    return accuracy

downstream_results['SST Biased Accuracy'] = get_sst_score(sst_word_dict)
downstream_results['SST Soft-Debiased Accuracy'] = get_sst_score(sst_soft_debiased_word_dict)
downstream_results['SST DSD Accuracy'] = get_sst_score(sst_debiased_word_dict)

In [None]:
splits = {'train': 'train.jsonl', 'validation': 'validation.jsonl', 'test': 'test.jsonl'}
mrpc_df = pd.read_json("hf://datasets/SetFit/mrpc/" + splits["train"], lines=True)

sentence_pairs = [(row["text1"], row["text2"]) for _, row in mrpc_df.iterrows()]
similarity_scores = [row["label"] for _,row in mrpc_df.iterrows()]

stop_words = set(stopwords.words('english'))

if not os.path.isfile(CFG.mrpc_word_vectors):
    def preprocess_text(text):
        tokens = nltk.word_tokenize(text.lower())
        filtered_tokens = [word for word in tokens if word.isalpha() and word not in stop_words]
        return filtered_tokens

    sentences = []
    for  pair in sentence_pairs:
        sentence1, sentence2 = pair[0], pair[1]

        sentences.append(sentence1)
        sentences.append(sentence2)

    sentences = [preprocess_text(sentence) for sentence in sentences]

    mrpc_words = set()
    for sentence in sentences:
        for word in sentence:
            mrpc_words.add(word)

    mrpc_words = list(mrpc_words)
    mrpc_word_embeddings = get_word_embeddings(mrpc_words, lm_model)
    mrpc_word_dict = {word: embedding for (word, embedding) in zip(mrpc_words, mrpc_word_embeddings)}
    pickle.dump(mrpc_word_dict, open(CFG.mrpc_word_vectors, "wb"))
else:
    mrpc_word_dict = pickle.load(open(CFG.mrpc_word_vectors, "rb"))
mrpc_soft_debiased_word_dict = Soft_Debias(debias_matrix, mrpc_word_dict)
mrpc_DSD_word_dict = DSD_Debias(debias_model, mrpc_word_dict)

In [None]:
def get_mrpc_score(word_embeddings):

    # Convert the sentence pairs to embeddings
    embeddings = [(avg_feature_vector(s1, word_embeddings, num_features=CFG.embedding_dim), avg_feature_vector(s2, word_embeddings, num_features=CFG.embedding_dim)) for s1, s2 in sentence_pairs]

    # Split the data into a training set and a validation set
    train_embeddings, val_embeddings, train_scores, val_scores = train_test_split(embeddings, similarity_scores, test_size=0.2)

    # Create DataLoaders
    train_dataloader = DataLoader(SentencePairDataset(train_embeddings, train_scores), batch_size=32, shuffle=True)
    val_dataloader = DataLoader(SentencePairDataset(val_embeddings, val_scores), batch_size=32)

    # Instantiate the model and define the loss and optimizer
    model = STSBModel(CFG.embedding_dim).to(CFG.device)
    # Choose a loss function and an optimizer
    loss_fn = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr = 1e-5)

    # Training loop
    for epoch in range(100):
        losses = []
        for embeddings, scores in train_dataloader:
            embeddings1 = torch.tensor(embeddings[0], dtype=torch.float32).to(CFG.device)
            embeddings2 = torch.tensor(embeddings[1], dtype=torch.float32).to(CFG.device)
            scores = torch.tensor(scores, dtype=torch.float32).to(CFG.device)
            outputs = model(embeddings1, embeddings2)
            loss = loss_fn(outputs, scores)
            losses.append(loss.item())
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

        if((epoch+1)%10==0):
            print(f"Epoch {epoch}, Loss: {np.mean(losses)}")

    # Evaluation
    model.eval()
    total_loss, total_count = 0, 0
    y_true, y_pred = [], []
    with torch.no_grad():
        for embeddings, scores in val_dataloader:
            embeddings1 = torch.tensor(embeddings[0], dtype=torch.float32).to(CFG.device)
            embeddings2 = torch.tensor(embeddings[1], dtype=torch.float32).to(CFG.device)
            scores = torch.tensor(scores, dtype=torch.float32).to(CFG.device)
            outputs = model(embeddings1, embeddings2)
            loss = loss_fn(outputs, scores)
            total_loss += loss.item()
            total_count += 1
            outputs = [item for item_l in outputs.cpu().tolist() for item in item_l]
            y_true.extend(scores.cpu().tolist())
            y_pred.extend(outputs)
    avg_loss = total_loss / total_count
    print(f"Validation loss: {avg_loss}")
    # Compute evaluation metrics
    y_pred = [round(pred) for pred in y_pred]
    f1 = f1_score(y_true, y_pred, average = "macro")
    print(f"F1 Score: {f1}")
    return f1

downstream_results['MRPC Biased F1'] = get_mrpc_score(mrpc_word_dict)
downstream_results['MRPC Soft-Debiased F1'] = get_mrpc_score(mrpc_soft_debiased_word_dict)
downstream_results['MRPC DSD F1'] = get_mrpc_score(mrpc_DSD_word_dict)

In [None]:
splits = {'train': 'train.jsonl', 'validation': 'validation_matched.jsonl', 'test': 'test_matched.jsonl'}
mnli_df = pd.read_json("hf://datasets/SetFit/mnli/" + splits["train"], lines=True)
mnli_df = mnli_df.sample(frac=0.01,random_state=42 ).reset_index(drop=True)
sentence_pairs = [(row["text1"], row["text2"]) for _, row in mnli_df.iterrows()]
similarity_scores = [row["label"] for _,row in mnli_df.iterrows()]

stop_words = set(stopwords.words('english'))

if not os.path.isfile(CFG.mnli_word_vectors):
    def preprocess_text(text):
        tokens = nltk.word_tokenize(text.lower())
        filtered_tokens = [word for word in tokens if word.isalpha() and word not in stop_words]
        return filtered_tokens

    sentences = []
    for  pair in sentence_pairs:
        sentence1, sentence2 = pair[0], pair[1]

        sentences.append(sentence1)
        sentences.append(sentence2)

    sentences = [preprocess_text(sentence) for sentence in sentences]

    mnli_words = set()
    for sentence in sentences:
        for word in sentence:
            mnli_words.add(word)

    mnli_words = list(mnli_words)
    mnli_word_embeddings = get_word_embeddings(mnli_words, lm_model)
    mnli_word_dict = {word: embedding for (word, embedding) in zip(mnli_words, mnli_word_embeddings)}
    pickle.dump(mnli_word_dict, open(CFG.mnli_word_vectors, "wb"))
else:
    mnli_word_dict = pickle.load(open(CFG.mnli_word_vectors, "rb"))
mnli_soft_debiased_word_dict = Soft_Debias(debias_matrix, mnli_word_dict)
mnli_DSD_word_dict = DSD_Debias(debias_model, mnli_word_dict)

In [None]:
def get_mnli_score(word_embeddings):

    # Convert the sentence pairs to embeddings
    embeddings = [(avg_feature_vector(s1, word_embeddings, num_features=CFG.embedding_dim), avg_feature_vector(s2, word_embeddings, num_features=CFG.embedding_dim)) for s1, s2 in sentence_pairs]

    # Split the data into a training set and a validation set
    train_embeddings, val_embeddings, train_scores, val_scores = train_test_split(embeddings, similarity_scores, test_size=0.2)

    # Create DataLoaders
    train_dataloader = DataLoader(SentencePairDataset(train_embeddings, train_scores), batch_size=32, shuffle=True)
    val_dataloader = DataLoader(SentencePairDataset(val_embeddings, val_scores), batch_size=32)

    # Instantiate the model and define the loss and optimizer
    model = STSBModel(CFG.embedding_dim).to(CFG.device)
    # Choose a loss function and an optimizer
    loss_fn = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr = 1e-5)

    # Training loop
    for epoch in range(100):
        losses = []
        for embeddings, scores in train_dataloader:
            embeddings1 = torch.tensor(embeddings[0], dtype=torch.float32).to(CFG.device)
            embeddings2 = torch.tensor(embeddings[1], dtype=torch.float32).to(CFG.device)
            scores = torch.tensor(scores, dtype=torch.float32).to(CFG.device)
            outputs = model(embeddings1, embeddings2)
            loss = loss_fn(outputs, scores)
            losses.append(loss.item())
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

        if((epoch+1)%10==0):
            print(f"Epoch {epoch}, Loss: {np.mean(losses)}")

    # Evaluation
    model.eval()
    total_loss, total_count = 0, 0
    y_true, y_pred = [], []
    with torch.no_grad():
        for embeddings, scores in val_dataloader:
            embeddings1 = torch.tensor(embeddings[0], dtype=torch.float32).to(CFG.device)
            embeddings2 = torch.tensor(embeddings[1], dtype=torch.float32).to(CFG.device)
            scores = torch.tensor(scores, dtype=torch.float32).to(CFG.device)
            outputs = model(embeddings1, embeddings2)
            loss = loss_fn(outputs, scores)
            total_loss += loss.item()
            total_count += 1
            outputs = [item for item_l in outputs.cpu().tolist() for item in item_l]
            y_true.extend(scores.cpu().tolist())
            y_pred.extend(outputs)
    avg_loss = total_loss / total_count
    print(f"Validation loss: {avg_loss}")
    # Compute evaluation metrics
    y_pred = [round(pred) for pred in y_pred]
    f1 = f1_score(y_true, y_pred, average="macro")
    print(f"F1 Score: {f1}")
    return f1
downstream_results['MNLI Biased F1'] = get_mnli_score(mnli_word_dict)
downstream_results['MNLI Soft-Debiased F1'] = get_mnli_score(mnli_soft_debiased_word_dict)
downstream_results['MNLI DSD F1'] = get_mnli_score(mnli_DSD_word_dict)

In [None]:
splits = {'train': 'train.jsonl', 'validation': 'validation.jsonl', 'test': 'test.jsonl'}
rte_df = pd.read_json("hf://datasets/SetFit/rte/" + splits["train"], lines=True)

sentence_pairs = [(row["text1"], row["text2"]) for _, row in rte_df.iterrows()]
similarity_scores = [row["label"] for _,row in rte_df.iterrows()]

stop_words = set(stopwords.words('english'))

if not os.path.isfile(CFG.rte_word_vectors):
    def preprocess_text(text):
        tokens = nltk.word_tokenize(text.lower())
        filtered_tokens = [word for word in tokens if word.isalpha() and word not in stop_words]
        return filtered_tokens

    sentences = []
    for  pair in sentence_pairs:
        sentence1, sentence2 = pair[0], pair[1]

        sentences.append(sentence1)
        sentences.append(sentence2)

    sentences = [preprocess_text(sentence) for sentence in sentences]

    rte_words = set()
    for sentence in sentences:
        for word in sentence:
            rte_words.add(word)

    rte_words = list(rte_words)
    rte_word_embeddings = get_word_embeddings(rte_words, lm_model)
    rte_word_dict = {word: embedding for (word, embedding) in zip(rte_words, rte_word_embeddings)}
    pickle.dump(rte_word_dict, open(CFG.rte_word_vectors, "wb"))
else:
    rte_word_dict = pickle.load(open(CFG.rte_word_vectors, "rb"))
rte_soft_debiased_word_dict = Soft_Debias(debias_matrix, rte_word_dict)
rte_DSD_word_dict = DSD_Debias(debias_model, rte_word_dict)

In [None]:
def get_rte_score(word_embeddings):

    # Convert the sentence pairs to embeddings
    embeddings = [(avg_feature_vector(s1, word_embeddings, num_features=CFG.embedding_dim), avg_feature_vector(s2, word_embeddings, num_features=CFG.embedding_dim)) for s1, s2 in sentence_pairs]

    # Split the data into a training set and a validation set
    train_embeddings, val_embeddings, train_scores, val_scores = train_test_split(embeddings, similarity_scores, test_size=0.2)

    # Create DataLoaders
    train_dataloader = DataLoader(SentencePairDataset(train_embeddings, train_scores), batch_size=32, shuffle=True)
    val_dataloader = DataLoader(SentencePairDataset(val_embeddings, val_scores), batch_size=32)

    # Instantiate the model and define the loss and optimizer
    model = STSBModel(CFG.embedding_dim).to(CFG.device)
    # Choose a loss function and an optimizer
    loss_fn = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr = 1e-5)

    # Training loop
    for epoch in range(100):
        losses = []
        for embeddings, scores in train_dataloader:
            embeddings1 = torch.tensor(embeddings[0], dtype=torch.float32).to(CFG.device)
            embeddings2 = torch.tensor(embeddings[1], dtype=torch.float32).to(CFG.device)
            scores = torch.tensor(scores, dtype=torch.float32).to(CFG.device)
            outputs = model(embeddings1, embeddings2)
            loss = loss_fn(outputs, scores)
            losses.append(loss.item())
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

        if((epoch+1)%10==0):
            print(f"Epoch {epoch}, Loss: {np.mean(losses)}")

    # Evaluation
    model.eval()
    total_loss, total_count = 0, 0
    y_true, y_pred = [], []
    with torch.no_grad():
        for embeddings, scores in val_dataloader:
            embeddings1 = torch.tensor(embeddings[0], dtype=torch.float32).to(CFG.device)
            embeddings2 = torch.tensor(embeddings[1], dtype=torch.float32).to(CFG.device)
            scores = torch.tensor(scores, dtype=torch.float32).to(CFG.device)
            outputs = model(embeddings1, embeddings2)
            loss = loss_fn(outputs, scores)
            total_loss += loss.item()
            total_count += 1
            outputs = [item for item_l in outputs.cpu().tolist() for item in item_l]
            y_true.extend(scores.cpu().tolist())
            y_pred.extend(outputs)
    avg_loss = total_loss / total_count
    print(f"Validation loss: {avg_loss}")
    # Compute evaluation metrics
    y_pred = [round(pred) for pred in y_pred]
    f1 = f1_score(y_true, y_pred, average = "macro")
    print(f"F1 Score: {f1}")
    return f1

downstream_results['RTE Biased F1'] = get_rte_score(rte_word_dict)
downstream_results['RTE Soft-Debiased F1'] = get_rte_score(rte_soft_debiased_word_dict)
downstream_results['RTE DSD F1'] = get_rte_score(rte_DSD_word_dict)

In [None]:
splits = {'train': 'train.jsonl', 'validation': 'validation.jsonl', 'test': 'test.jsonl'}
wnli_df = pd.read_json("hf://datasets/SetFit/wnli/" + splits["train"], lines=True)

sentence_pairs = [(row["text1"], row["text2"]) for _, row in wnli_df.iterrows()]
similarity_scores = [row["label"] for _,row in wnli_df.iterrows()]

stop_words = set(stopwords.words('english'))

if not os.path.isfile(CFG.wnli_word_vectors):
    def preprocess_text(text):
        tokens = nltk.word_tokenize(text.lower())
        filtered_tokens = [word for word in tokens if word.isalpha() and word not in stop_words]
        return filtered_tokens

    sentences = []
    for  pair in sentence_pairs:
        sentence1, sentence2 = pair[0], pair[1]

        sentences.append(sentence1)
        sentences.append(sentence2)

    sentences = [preprocess_text(sentence) for sentence in sentences]

    wnli_words = set()
    for sentence in sentences:
        for word in sentence:
            wnli_words.add(word)

    wnli_words = list(wnli_words)
    wnli_word_embeddings = get_word_embeddings(wnli_words, lm_model)
    wnli_word_dict = {word: embedding for (word, embedding) in zip(wnli_words, wnli_word_embeddings)}
    pickle.dump(wnli_word_dict, open(CFG.wnli_word_vectors, "wb"))
else:
    wnli_word_dict = pickle.load(open(CFG.wnli_word_vectors, "rb"))
wnli_soft_debiased_word_dict = Soft_Debias(debias_matrix, wnli_word_dict)
wnli_DSD_word_dict = DSD_Debias(debias_model, wnli_word_dict)

In [None]:
len(sentence_pairs)

In [None]:
def get_wnli_score(word_embeddings):

    # Convert the sentence pairs to embeddings
    embeddings = [(avg_feature_vector(s1, word_embeddings, num_features=CFG.embedding_dim), avg_feature_vector(s2, word_embeddings, num_features=CFG.embedding_dim)) for s1, s2 in sentence_pairs]

    # Split the data into a training set and a validation set
    train_embeddings, val_embeddings, train_scores, val_scores = train_test_split(embeddings, similarity_scores, test_size=0.2)

    # Create DataLoaders
    train_dataloader = DataLoader(SentencePairDataset(train_embeddings, train_scores), batch_size=32, shuffle=True)
    val_dataloader = DataLoader(SentencePairDataset(val_embeddings, val_scores), batch_size=32)

    # Instantiate the model and define the loss and optimizer
    model = STSBModel(CFG.embedding_dim).to(CFG.device)
    # Choose a loss function and an optimizer
    loss_fn = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr = 1e-3)

    # Training loop
    for epoch in range(1000):
        losses = []
        for embeddings, scores in train_dataloader:
            embeddings1 = torch.tensor(embeddings[0], dtype=torch.float32).to(CFG.device)
            embeddings2 = torch.tensor(embeddings[1], dtype=torch.float32).to(CFG.device)
            scores = torch.tensor(scores, dtype=torch.float32).to(CFG.device)
            outputs = model(embeddings1, embeddings2)
            loss = loss_fn(outputs, scores)
            losses.append(loss.item())
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

        if((epoch+1)%100==0):
            print(f"Epoch {epoch}, Loss: {np.mean(losses)}")

    # Evaluation
    model.eval()
    total_loss, total_count = 0, 0
    y_true, y_pred = [], []
    with torch.no_grad():
        for embeddings, scores in val_dataloader:
            embeddings1 = torch.tensor(embeddings[0], dtype=torch.float32).to(CFG.device)
            embeddings2 = torch.tensor(embeddings[1], dtype=torch.float32).to(CFG.device)
            scores = torch.tensor(scores, dtype=torch.float32).to(CFG.device)
            outputs = model(embeddings1, embeddings2)
            loss = loss_fn(outputs, scores)
            total_loss += loss.item()
            total_count += 1
            outputs = [item for item_l in outputs.cpu().tolist() for item in item_l]
            y_true.extend(scores.cpu().tolist())
            y_pred.extend(outputs)
    avg_loss = total_loss / total_count
    print(f"Validation loss: {avg_loss}")
    # Compute evaluation metrics
    y_pred = [round(pred) for pred in y_pred]
    f1 = f1_score(y_true, y_pred, average="macro")
    print(f"F1 Score: {f1}")
    return f1

downstream_results['WNLI Biased F1'] = get_wnli_score(wnli_word_dict)
downstream_results['WNLI Soft-Debiased F1'] = get_wnli_score(wnli_soft_debiased_word_dict)
downstream_results['WNLI DSD F1'] = get_wnli_score(wnli_DSD_word_dict)

In [None]:
save_results(downstream_results, CFG.downstream_results_filename)