In [None]:
# !git clone https://github.com/aishik-rakshit/PyDebiaser-Embeddings.git

In [None]:
%cd /content/PyDebiaser-Embeddings
!pip install .

In [None]:
!pip -q install contractions
!pip -q install huggingface_hub
!pip -q install contractions
!pip -q install datasets

In [None]:
# !huggingface-cli login

In [None]:
import gc
import json
import csv
import numpy as np
import pandas as pd
import re
import string
from collections import Counter
from pickle import dump
from tqdm.notebook import tqdm

import nltk
from nltk.corpus import stopwords
from collections import Counter, defaultdict
import warnings
warnings.filterwarnings("ignore")

nltk.download('stopwords')
nltk.download('punkt')
nltk.download('wordnet')

import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset
from transformers import AutoTokenizer, AutoModel
from huggingface_hub import notebook_login
from datasets import load_dataset

from xgboost import XGBClassifier

from scipy import spatial
from scipy.stats import ttest_rel, spearmanr, pearsonr
from sklearn.decomposition import PCA
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix, f1_score

import contractions
from gensim.models.keyedvectors import Word2VecKeyedVectors
from gensim.models import Word2Vec

from pydebiaser.INLP import INLP
from pydebiaser.SelfDebias import SelfDebias
from pydebiaser.SentDebias import SentDebias

In [None]:
import os
os.makedirs("saved-models", exist_ok = True)

In [None]:
class CFG:
    bias_type = "religion"
    mode = "attribute"
    model_name = "openai-community/gpt2-xl"
    model = "GPT2Model"
    run_self_debias = True
    max_len = 4
    batch_size = 32
    num_workers = 12
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    base_path = "/content/drive/MyDrive/DSD/"
    vocabPath = base_path + f"{bias_type}_attributes_optm.json"
    outprefix =  model_name.replace("/", "-")+"-"+bias_type

    embedding_dict_data = base_path + f"word-embeddings/{model_name.replace('/','_')}_word_embeddings_reddit-l2.pkl"
    stereoset_data = base_path + "stereoset.json"
    crows_data = base_path + "crows_pairs.csv"
    cross_ner_data = base_path + "cross_ner.txt"
    stanford_sentiment_treebank_data = base_path + "stanford_sentiment_treebank.csv"

    results_filename = base_path + "results/other_methods_results.csv"
    ds_results_filename = base_path + "results/other_methods_ds_results.csv"

In [None]:
class WordsDataset(Dataset):
    def __init__(self, words, tokenizer):
        self.words = words
        self.tokenizer = tokenizer

    def __len__(self):
        return len(self.words)

    def __getitem__(self, idx):
        word = self.words[idx]
        inputs = self.tokenizer(word, padding='max_length', max_length = CFG.max_len, truncation = True)
        return {"input_ids": torch.tensor(inputs.input_ids, dtype=torch.long), "attention_mask": torch.tensor(inputs.attention_mask, dtype=torch.long)}

In [None]:
def mean_pooling(model_output, attention_mask):
    token_embeddings = model_output[0]
    input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
    return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)

class Model(torch.nn.Module):
    def __init__(self, model):
        super(Model, self).__init__()
        self.encoder = model

    def forward(self, input_ids, attention_mask):
        model_output = self.encoder(input_ids=input_ids, attention_mask=attention_mask)
        sentence_embeddings = mean_pooling(model_output, attention_mask)
        return sentence_embeddings

lm_model = Model(AutoModel.from_pretrained(CFG.model_name)).to(CFG.device)

In [None]:
def get_word_embeddings(words, model):
    model.eval()
    words_dataset = WordsDataset(words, tokenizer)
    words_dataloader = DataLoader(words_dataset, batch_size=CFG.batch_size, shuffle=False, num_workers = CFG.num_workers)
    embeddings = []
    for batch in tqdm(words_dataloader, total = len(words_dataloader)):
        input_ids = torch.squeeze(batch['input_ids'].to(CFG.device), axis = 1)
        attention_mask = torch.squeeze(batch['attention_mask'].to(CFG.device), axis = 1)
        batch_embeddings = model(input_ids, attention_mask).detach().cpu().numpy()
        embeddings.append(batch_embeddings)
    # concatenate the embeddings into a single numpy array
    embeddings = np.concatenate(embeddings, axis=0)
    del words_dataset, words_dataloader
    gc.collect()
    return embeddings

In [None]:
loaded_words = []
with open('/content/drive/MyDrive/DSD/reddit_l2_w2v_words.txt', 'r') as f:
    for line in f:
        word = line.strip()
        loaded_words.append(word)

In [None]:
def isValidWord(word):
    return all([c.isalpha() for c in word])

def pruneWordVecs(wordVecs):
    newWordVecs = {}
    for word, vec in wordVecs.items():
        valid=True
        if(not isValidWord(word)):
            valid = False
        if(valid):
            newWordVecs[word] = vec
    return newWordVecs

def load_words(w2v_files):
    words = []
    for w2v_file in w2v_files:
        with open(w2v_file, 'r') as f:
            for line in f:
                vect = line.strip().rsplit()
                word = vect[0]
                words.append(word)

    return words

def load_analogy_templates(json_filepath, mode):
	with open(json_filepath, "r") as f:
		loadedData = json.load(f)
		return loadedData["analogy_templates"][mode]

def load_test_terms(json_filepath):
	with open(json_filepath, "r") as f:
		loadedData = json.load(f)
		return loadedData["testTerms"]

def load_eval_terms(json_filepath, mode):
	with open(json_filepath, "r") as f:
		loadedData = json.load(f)
		return loadedData["eval_targets"], loadedData["analogy_templates"][mode].values()

def load_def_sets(json_filepath):
	with open(json_filepath, "r") as f:
		loadedData = json.load(f)
		return {i: v for i, v in enumerate(loadedData["definite_sets"])}

In [None]:
analogyTemplates = load_analogy_templates(CFG.vocabPath, CFG.mode)
defSets = load_def_sets(CFG.vocabPath)
testTerms = load_test_terms(CFG.vocabPath)
evalTargets, evalAttrs = load_eval_terms(CFG.vocabPath, CFG.mode)

CFG.subspace_dim = len(defSets)*len(defSets[0])

neutral_words = []
for value in analogyTemplates.values():
    neutral_words.extend(value)

tokenizer = AutoTokenizer.from_pretrained(CFG.model_name)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

neutral_word_embeddings = get_word_embeddings(neutral_words, lm_model)
neutral_embedding_dict = {word: embedding for word, embedding in zip(neutral_words, neutral_word_embeddings)}
embedding_dim = neutral_word_embeddings.shape[-1]
CFG.embedding_dim = embedding_dim

In [None]:
def scoredAnalogyAnswers(a,b,x, keyedVecs, thresh=12.5):
	words = [w for w in keyedVecs.key_to_index.keys() if np.linalg.norm(np.array(keyedVecs[w])-np.array(keyedVecs[x])) < thresh]

	def cos(a,b,x,y):
		aVec = np.array(keyedVecs[a])
		bVec = np.array(keyedVecs[b])
		xVec = np.array(keyedVecs[x])
		yVec = np.array(keyedVecs[y])
		numerator = (aVec-bVec).dot(xVec-yVec)
		denominator = np.linalg.norm(aVec-bVec)*np.linalg.norm(xVec-yVec)
		return numerator/(denominator if denominator != 0 else 1e-6)

	return sorted([(cos(a,b,x,y), a,b,x,y) for y in words], reverse=True)

def generateAnalogies(analogyTemplates, keyedVecs):
    expandedAnalogyTemplates = []
    for A, stereotypes in analogyTemplates.items():
        for B, _ in analogyTemplates.items():
            if(A != B):
                for stereotype in stereotypes:
                    expandedAnalogyTemplates.append([A, stereotype, B])

    analogies = []
    outputGroups = []
    for a,b,x in expandedAnalogyTemplates:
        outputs = scoredAnalogyAnswers(a,b,x,keyedVecs)
        formattedOutput = []

        for score, a_w, b_w, x_w, y_w in outputs:

            analogy = str(a_w) + " is to " + str(b_w) + " as " + str(x_w) + " is to " + str(y_w)
            analogyRaw = [a_w, b_w, x_w, y_w]
            analogies.append([score, analogy, analogyRaw])
            formattedOutput.append([score, analogy, analogyRaw])
        outputGroups.append(formattedOutput)

    analogies = sorted(analogies, key=lambda x:-x[0])
    return analogies, outputGroups

def convert_legacy_to_keyvec(legacy_w2v):
    dim = len(legacy_w2v[list(legacy_w2v.keys())[0]])
    vectors = Word2VecKeyedVectors(dim)

    ws = []
    vs = []

    for word, vect in legacy_w2v.items():
        ws.append(word)
        vs.append(vect)
        assert(len(vect) == dim)
    vectors.add_vectors(ws, vs, replace=True)
    return vectors

def multiclass_evaluation(embeddings, targets, attributes):
	targets_eval = []
	for targetSet in targets:
		for target in targetSet:
			for attributeSet in attributes:
				targets_eval.append(_unary_s(embeddings, target, attributeSet))
	m_score = np.mean(targets_eval)
	return m_score, targets_eval

def _unary_s(embeddings, target, attributes):
	return np.mean([ spatial.distance.cosine(embeddings[target], embeddings[ai]) for ai in attributes ])

In [None]:
results  = dict()
results['model'] = CFG.model_name
results['bias_type'] = CFG.bias_type

In [None]:
def avg_feature_vector(sentence, model, num_features):
    words = sentence.split()
    feature_vec = np.zeros((num_features, ), dtype='float32')
    n_words = 0
    for word in words:
        if word in list(model.keys()):
            n_words += 1
            feature_vec = np.add(feature_vec, model[word])
    if (n_words > 0):
        feature_vec = np.divide(feature_vec, n_words)
    return feature_vec

In [None]:
def get_stereotype_score(word_vectors, data):
    total_samples = 0
    stereotypical_samples = 0

    # Iterate over the data
    for item in data:
        context = item['context']
        stereo = item['stereotype']
        antistereo = item['anti-stereotype']

        # Calculate sentence embeddings
        context_vec = avg_feature_vector(context, word_vectors, num_features=CFG.embedding_dim)
        stereo_vec = avg_feature_vector(stereo, word_vectors, num_features=CFG.embedding_dim)
        antistereo_vec = avg_feature_vector(antistereo, word_vectors, num_features=CFG.embedding_dim)

        context_vec = context_vec/np.linalg.norm(context_vec)
        stereo_vec = stereo_vec/np.linalg.norm(stereo_vec)
        antistereo_vec = antistereo_vec/np.linalg.norm(antistereo_vec)

        # Calculate cosine similarity
        simstereo = spatial.distance.cosine(context_vec, stereo_vec)
        simantistereo = spatial.distance.cosine(context_vec, antistereo_vec)

        if simstereo > simantistereo:
            stereotypical_samples += 1
        total_samples += 1

    # Calculate stereotype score
    stereotype_score = stereotypical_samples*100 / total_samples
    print('Stereotype Score:', stereotype_score)
    return stereotype_score

In [None]:
with open(CFG.stereoset_data) as f:
    data = json.load(f)

In [None]:
topic_data = [item for item in data if item['bias_type']==CFG.bias_type]

stereoset_data_processed = []

for item in topic_data:
    data_dict = dict()
    data_dict['context'] = item['context']
    for item_ in item['sentences']:
        if item_['gold_label'] == 'stereotype':
            data_dict['stereotype'] = item_['sentence']
        elif item_['gold_label'] == 'anti-stereotype':
            data_dict['anti-stereotype'] = item_['sentence']
    stereoset_data_processed.append(data_dict)

In [None]:
def read_crows_data(input_file):
    """
    Load data into pandas DataFrame format.
    """

    df_data = pd.DataFrame(columns=['sent1', 'sent2', 'direction', 'bias_type'])

    with open(input_file) as f:
        reader = csv.DictReader(f)
        for row in reader:
            direction, gold_bias = '_', '_'
            direction = row['stereo_antistereo']
            bias_type = row['bias_type']

            sent1, sent2 = '', ''
            if direction == 'stereo':
                sent1 = row['sent_more']
                sent2 = row['sent_less']
            else:
                sent1 = row['sent_less']
                sent2 = row['sent_more']

            df_item = {'sent1': sent1,
                       'sent2': sent2,
                       'direction': direction,
                       'bias_type': bias_type}
            df_data = df_data._append(df_item, ignore_index=True)

    return df_data

def replace_words(text):
    def repl(match):
        if match.group(0).endswith('woman'):
            return 'woman'
        elif match.group(0).endswith('man'):
            return 'man'
    return re.sub(r'\b\w*(man|woman)\b', repl, text)

def difference_with_repetition(list1, list2):
    counter1 = Counter(list1)
    counter2 = Counter(list2)

    difference_counter = counter1 - counter2

    difference_list = list(difference_counter.elements())

    return " ".join(difference_list)

def common_and_uncommon_parts(s1, s2):
    tokens1 = s1.split()
    tokens2 = s2.split()
    lengths = [[0 for j in range(len(tokens2)+1)] for i in range(len(tokens1)+1)]
    for i, x in enumerate(tokens1):
        for j, y in enumerate(tokens2):
            if x == y:
                lengths[i+1][j+1] = lengths[i][j] + 1
            else:
                lengths[i+1][j+1] = max(lengths[i+1][j], lengths[i][j+1])
    common = []
    x, y = len(tokens1), len(tokens2)
    while x != 0 and y != 0:
        if lengths[x][y] == lengths[x-1][y]:
            x -= 1
        elif lengths[x][y] == lengths[x][y-1]:
            y -= 1
        else:
            assert tokens1[x-1] == tokens2[y-1]
            common.insert(0, tokens1[x-1])
            x -= 1
            y -= 1
    common = ' '.join(common)
    uncommon1 = difference_with_repetition(tokens1, common.split())
    uncommon2 = difference_with_repetition(tokens2, common.split())
    return common, uncommon1, uncommon2

crows_data = read_crows_data(CFG.crows_data)

if CFG.bias_type=='race':
    bias_type = 'race-color'
else:
    bias_type = CFG.bias_type
bias_type_crows_data = crows_data[crows_data['bias_type'] == bias_type].reset_index(drop=True)

In [None]:
def get_crows_score(word_vectors):
    df_score = pd.DataFrame(columns=['sent_more', 'sent_less',
                                        'sent_more_score', 'sent_less_score',
                                        'score', 'stereo_antistereo', 'bias_type'])


    total_stereo, total_antistereo = 0, 0
    stereo_score, antistereo_score = 0, 0

    N = 0
    neutral = 0
    total = len(bias_type_crows_data.index)
    for row in bias_type_crows_data.itertuples():
        N += 1
        sent1 = re.sub(f"[{re.escape(string.punctuation)}]", "", contractions.fix(row.sent1.lower()).replace('\'s', ' is'))
        sent2 = re.sub(f"[{re.escape(string.punctuation)}]", "", contractions.fix(row.sent2.lower()).replace('\'s', ' is'))
        direction = row.direction
        bias = row.bias_type

        common_sequence, remainder1, remainder2 = common_and_uncommon_parts(sent1, sent2)
        remainder1 = replace_words(remainder1)
        remainder2 = replace_words(remainder2)

        pair_score = 0
        context_vec = avg_feature_vector(common_sequence, word_vectors, num_features=CFG.embedding_dim)
        more_vec = avg_feature_vector(remainder1, word_vectors, num_features=CFG.embedding_dim)
        less_vec = avg_feature_vector(remainder2, word_vectors, num_features=CFG.embedding_dim)

        more_vec = more_vec/np.linalg.norm(more_vec)
        less_vec = less_vec/np.linalg.norm(less_vec)
        context_vec = context_vec/np.linalg.norm(context_vec)

        # Calculate cosine similarity
        more_score = spatial.distance.cosine(context_vec, more_vec)
        less_score = spatial.distance.cosine(context_vec, less_vec)

        if more_score == less_score:

            neutral += 1
        else:
            if direction == 'stereo':
                total_stereo += 1
                if more_score > less_score:
                    stereo_score += 1
                    pair_score = 1
            elif direction == 'antistereo':
                total_antistereo += 1
                if less_score > more_score:
                    antistereo_score += 1
                    pair_score = 1

        sent_more, sent_less = '', ''
        if direction == 'stereo':
            sent_more = sent1
            sent_less = sent2
            sent_more_score = more_score
            sent_less_score = less_score
        else:
            sent_more = sent2
            sent_less =sent1
            sent_more_score = less_score
            sent_less_score = more_score

    df_score = df_score._append({'sent_more': sent_more,
                                'sent_less': sent_less,
                                'sent_more_score': sent_more_score,
                                'sent_less_score': sent_less_score,
                                'score': pair_score,
                                'stereo_antistereo': direction,
                                'bias_type': bias
                                }, ignore_index=True)

    print('=' * 100)
    print('Total examples:', N)
    print('Metric score:', round((stereo_score + antistereo_score) / N * 100, 2))
    print('Stereotype score:', round(stereo_score  / total_stereo * 100, 2))
    if antistereo_score != 0:
        print('Anti-stereotype score:', round(antistereo_score  / total_antistereo * 100, 2))
    print("Num. neutral:", neutral, round(neutral / N * 100, 2))
    print('=' * 100)
    print()
    return round((stereo_score + antistereo_score) / N * 100, 2), round(stereo_score  / total_stereo * 100, 2), round(antistereo_score  / total_antistereo * 100, 2)

Self-Debias


In [None]:
if CFG.run_self_debias:
    if CFG.bias_type == "race":
        debiaser_self_debias = SelfDebias(CFG.model_name, "race-color")
    else:
        debiaser_self_debias = SelfDebias(CFG.model_name, CFG.bias_type)
    self_debias_embedding_dict = dict()
    for i in tqdm(range(0, len(loaded_words), CFG.batch_size)):
        batch_words = loaded_words[i : i + CFG.batch_size]
        batch_embeddings = debiaser_self_debias.generate_embeddings(batch_words)
        for word, embedding in zip(batch_words, batch_embeddings):
            self_debias_embedding_dict[word] = embedding

    self_debiasedMAC, self_debiasedDistribution = multiclass_evaluation(self_debias_embedding_dict, evalTargets, evalAttrs)
    print("Self-Debias MAC:", self_debiasedMAC)
    results['Self-Debias MAC'] = self_debiasedMAC
    print(self_debias_embedding_dict['woman'].shape)
    print(self_debias_embedding_dict['man'].shape)
    results['Self-Debias SS'] = get_stereotype_score(self_debias_embedding_dict, stereoset_data_processed)
    results['Self-Debias CMS'], results['Self-Debias CSS'], results['Self-Debias CAS'] = get_crows_score(self_debias_embedding_dict)
else:
    print("Skipping Self-Debias")
    results['Self-Debias MAC'] = None
    results['Self-Debias SS'] = None
    results['Self-Debias CMS'], results['Self-Debias CSS'], results['Self-Debias CAS'] = None, None, None

INLP

In [None]:
debiaser_inlp = INLP(CFG.model, CFG.model_name, CFG.bias_type)
inlp_model = debiaser_inlp.debias(save=True,path = 'saved-models')

In [None]:
lm_model_inlp = Model(inlp_model).to(CFG.device)

tokenizer = AutoTokenizer.from_pretrained(CFG.model_name)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

In [None]:
CFG.embedding_dim = inlp_model.config.hidden_size

In [None]:
word_embeddings = get_word_embeddings(loaded_words, lm_model_inlp)
inlp_word_embedding_dict = {word: embedding for word, embedding in zip(loaded_words, word_embeddings)}
inlp_mac, inlp_distribution = multiclass_evaluation(inlp_word_embedding_dict, evalTargets, evalAttrs)

print("INLP MAC:", inlp_mac)
results['INLP MAC'] = inlp_mac
results['INLP SS'] = get_stereotype_score(inlp_word_embedding_dict, stereoset_data_processed)
results['INLP CMS'], results['INLP CSS'], results['INLP CAS'] = get_crows_score(inlp_word_embedding_dict)
print("INLP SS:", results['INLP SS'])

In [None]:
print("INLP CMS", results['INLP CMS'])
print("INLP CSS", results['INLP CSS'])
print("INLP CAS", results['INLP CAS'])

Sent-Debias

In [None]:
debiaser_sent_debias = SentDebias(CFG.model, CFG.model_name, [CFG.bias_type])
sent_debias_model = debiaser_sent_debias.debias(save=True,path = 'saved-models')

In [None]:
lm_model_sent_debias = Model(sent_debias_model).to(CFG.device)

tokenizer = AutoTokenizer.from_pretrained(CFG.model_name)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

In [None]:
word_embeddings = get_word_embeddings(loaded_words, lm_model_sent_debias)
sent_debias_word_embdedding_dict = {word: embedding for word, embedding in zip(loaded_words, word_embeddings)}
sent_debias_mac, sent_debias_distribution = multiclass_evaluation(sent_debias_word_embdedding_dict, evalTargets, evalAttrs)
print("Sent-Debias MAC:", sent_debias_mac)
results['Sent-Debias MAC'] = sent_debias_mac
results['Sent-Debias SS'] = get_stereotype_score(sent_debias_word_embdedding_dict, stereoset_data_processed)
results['Sent-Debias CMS'], results['Sent-Debias CSS'], results['Sent-Debias CAS'] = get_crows_score(sent_debias_word_embdedding_dict)
print("Sent-Debias SS:", results['Sent-Debias SS'])
print("Sent-Debias CMS", results['Sent-Debias CMS'])
print("Sent-Debias CSS", results['Sent-Debias CSS'])
print("Sent-Debias CAS", results['Sent-Debias CAS'])

In [None]:
def save_results(results, filename):
    file_exists = os.path.isfile(filename)

    with open(filename, 'a') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=results.keys())

        if not file_exists:
            writer.writeheader()  # file doesn't exist yet, write a header

        writer.writerow(results)

save_results(results, CFG.results_filename)

In [None]:
stsb_dataset = load_dataset("glue", "stsb")
sentence_pairs = [(row["sentence1"], row["sentence2"]) for row in stsb_dataset["train"]]
similarity_scores = [row["label"] for row in stsb_dataset["train"]]

stop_words = set(stopwords.words('english'))

def preprocess_text(text):
    tokens = nltk.word_tokenize(text.lower())
    filtered_tokens = [word for word in tokens if word.isalpha() and word not in stop_words]
    return filtered_tokens

sentences = []
for  pair in sentence_pairs:
    sentence1, sentence2 = pair[0], pair[1]

    sentences.append(sentence1)
    sentences.append(sentence2)

sentences = [preprocess_text(sentence) for sentence in sentences]

stsb_words = set()
for sentence in sentences:
    for word in sentence:
        stsb_words.add(word)

stsb_words = list(stsb_words)
if CFG.run_self_debias:
    self_debias_embedding_dict = dict()
    for i in tqdm(range(0, len(loaded_words), CFG.batch_size)):
        batch_words = loaded_words[i : i + CFG.batch_size]
        batch_embeddings = debiaser_self_debias.generate_embeddings(batch_words)
        for word, embedding in zip(batch_words, batch_embeddings):
            self_debias_embedding_dict[word] = embedding

biased_word_embeddings = get_word_embeddings(stsb_words, lm_model)
biased_word_embedding_dict = {word: embedding for word, embedding in zip(stsb_words, biased_word_embeddings)}
stsb_word_embeddings_inlp = get_word_embeddings(stsb_words, lm_model_inlp)
stsb_word_embeddings_sent_debias = get_word_embeddings(stsb_words, lm_model_sent_debias)
stsb_word_dict_inlp = {word: embedding for word, embedding in zip(stsb_words, stsb_word_embeddings_inlp)}
stsb_word_dict_sent_debias = {word: embedding for word, embedding in zip(stsb_words, stsb_word_embeddings_sent_debias)}

In [None]:
downstream_results = dict()
downstream_results['Model'] = CFG.model_name
downstream_results['Bias Type'] = CFG.bias_type

In [None]:
from torch import optim
import torch.nn.functional as F

In [None]:
class STSBModel(nn.Module):
    def __init__(self, input_dim):
        super(STSBModel, self).__init__()
        self.fc1 = nn.LazyLinear(input_dim//2)
        self.fc1_1 = nn.LazyLinear(input_dim//4)
        self.fc2 = nn.LazyLinear(input_dim//2)
        self.fc2_1 = nn.LazyLinear(input_dim//4)
        self.fc3 = nn.LazyLinear(1)

    def forward(self, x1, x2):
        x1 = self.fc1(x1)
        x1 = self.fc1_1(x1)
        x2 = self.fc2(x2)
        x2 = self.fc2_1(x2)
        x = F.cosine_similarity(x1, x2).view(-1,1)
        x = self.fc3(x)
        return x

class SentencePairDataset(Dataset):
    def __init__(self, embeddings, scores):
        self.embeddings = embeddings
        self.scores = scores

    def __len__(self):
        return len(self.embeddings)

    def __getitem__(self, idx):
        return self.embeddings[idx], self.scores[idx]

def get_stsb_score(word_embeddings):

    # Convert the sentence pairs to embeddings
    embeddings = [(avg_feature_vector(s1, word_embeddings, num_features=CFG.embedding_dim), avg_feature_vector(s2, word_embeddings, num_features=CFG.embedding_dim)) for s1, s2 in sentence_pairs]

    # Split the data into a training set and a validation set
    train_embeddings, val_embeddings, train_scores, val_scores = train_test_split(embeddings, similarity_scores, test_size=0.2)

    # Create DataLoaders
    train_dataloader = DataLoader(SentencePairDataset(train_embeddings, train_scores), batch_size=32, shuffle=True)
    val_dataloader = DataLoader(SentencePairDataset(val_embeddings, val_scores), batch_size=32)

    # Instantiate the model and define the loss and optimizer
    model = STSBModel(CFG.embedding_dim).to(CFG.device)
    # Choose a loss function and an optimizer
    loss_fn = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr = 1e-4)

    # Training loop
    for epoch in range(100):
        losses = []
        for embeddings, scores in train_dataloader:
            embeddings1 = torch.tensor(embeddings[0], dtype=torch.float32).to(CFG.device)
            embeddings2 = torch.tensor(embeddings[1], dtype=torch.float32).to(CFG.device)
            scores = torch.tensor(scores, dtype=torch.float32).to(CFG.device)
            outputs = model(embeddings1, embeddings2)
            loss = loss_fn(outputs, scores)
            losses.append(loss.item())
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

        if((epoch+1)%10==0):
            print(f"Epoch {epoch}, Loss: {np.mean(losses)}")

    # Evaluation
    model.eval()
    total_loss, total_count = 0, 0
    y_true, y_pred = [], []
    with torch.no_grad():
        for embeddings, scores in val_dataloader:
            embeddings1 = torch.tensor(embeddings[0], dtype=torch.float32).to(CFG.device)
            embeddings2 = torch.tensor(embeddings[1], dtype=torch.float32).to(CFG.device)
            scores = torch.tensor(scores, dtype=torch.float32).to(CFG.device)
            outputs = model(embeddings1, embeddings2)
            loss = loss_fn(outputs, scores)
            total_loss += loss.item()
            total_count += 1
            outputs = [item for item_l in outputs.cpu().tolist() for item in item_l]
            y_true.extend(scores.cpu().tolist())
            y_pred.extend(outputs)
    avg_loss = total_loss / total_count
    print(f"Validation loss: {avg_loss}")
    # Compute evaluation metrics
    pcc = pearsonr(y_true, y_pred)[0]
    srcc = spearmanr(y_true, y_pred)[0]
    print(f"PCC: {pcc}, SRCC: {srcc}")
    return (pcc, srcc)

downstream_results['STSB Biased PCC'], downstream_results['STSB Biased SRCC'] = get_stsb_score(biased_word_embedding_dict)
if CFG.run_self_debias:
    downstream_results['STSB Self-Debias PCC'], downstream_results['STSB Self-Debias SRCC'] = get_stsb_score(self_debias_embedding_dict)
else:
    downstream_results['STSB Self-Debias PCC'], downstream_results['STSB Self-Debias SRCC'] = None, None
downstream_results['STSB INLP PCC'], downstream_results['STSB INLP SRCC'] = get_stsb_score(stsb_word_dict_inlp)
downstream_results['STSB Sent-Debias PCC'], downstream_results['STSB Sent-Debias CC'] = get_stsb_score(stsb_word_dict_sent_debias)

In [None]:
mrpc_df = pd.read_parquet("/content/drive/MyDrive/DSD/mrpc.parquet")
display(mrpc_df.head())
sentence_pairs = [(row["sentence1"], row["sentence2"]) for _,row in mrpc_df.iterrows()]
similarity_scores = [row["label"] for _,row in mrpc_df.iterrows()]

stop_words = set(stopwords.words('english'))

def preprocess_text(text):
    tokens = nltk.word_tokenize(text.lower())
    filtered_tokens = [word for word in tokens if word.isalpha() and word not in stop_words]
    return filtered_tokens

sentences = []
for  pair in sentence_pairs:
    sentence1, sentence2 = pair[0], pair[1]

    sentences.append(sentence1)
    sentences.append(sentence2)

sentences = [preprocess_text(sentence) for sentence in sentences]

mrpc_words = set()
for sentence in sentences:
    for word in sentence:
        mrpc_words.add(word)

mrpc_words = list(mrpc_words)
if CFG.run_self_debias:
    self_debias_embedding_dict = dict()
    for i in tqdm(range(0, len(loaded_words), CFG.batch_size)):
        batch_words = loaded_words[i : i + CFG.batch_size]
        batch_embeddings = debiaser_self_debias.generate_embeddings(batch_words)
        for word, embedding in zip(batch_words, batch_embeddings):
            self_debias_embedding_dict[word] = embedding

mrpc_biased_word_empoddings = get_word_embeddings(mrpc_words, lm_model)
mrpc_biased_word_embedding_dict = {word: embedding for word, embedding in zip(mrpc_words, mrpc_biased_word_empoddings)}
mrpc_word_embeddings_inlp = get_word_embeddings(mrpc_words, lm_model_inlp)
mrpc_word_embeddings_sent_debias = get_word_embeddings(mrpc_words, lm_model_sent_debias)
mrpc_word_dict_inlp = {word: embedding for word, embedding in zip(mrpc_words, mrpc_word_embeddings_inlp)}
mrpc_word_dict_sent_debias = {word: embedding for word, embedding in zip(mrpc_words, mrpc_word_embeddings_sent_debias)}

In [None]:
class MRPCModel(nn.Module):
    def __init__(self, input_dim):
        super(MRPCModel, self).__init__()
        self.fc1 = nn.LazyLinear(input_dim//2)
        self.fc1_1 = nn.LazyLinear(input_dim//4)
        self.fc2 = nn.LazyLinear(input_dim//2)
        self.fc2_1 = nn.LazyLinear(input_dim//4)
        self.fc3 = nn.LazyLinear(1)

    def forward(self, x1, x2):
        x1 = self.fc1(x1)
        x1 = self.fc1_1(x1)
        x2 = self.fc2(x2)
        x2 = self.fc2_1(x2)
        x = F.cosine_similarity(x1, x2).view(-1,1)
        x = self.fc3(x)
        return x

class SentencePairDataset(Dataset):
    def __init__(self, embeddings, scores):
        self.embeddings = embeddings
        self.scores = scores

    def __len__(self):
        return len(self.embeddings)

    def __getitem__(self, idx):
        return self.embeddings[idx], self.scores[idx]

def get_mrpc_score(word_embeddings):

    # Convert the sentence pairs to embeddings
    embeddings = [(avg_feature_vector(s1, word_embeddings, num_features=CFG.embedding_dim), avg_feature_vector(s2, word_embeddings, num_features=CFG.embedding_dim)) for s1, s2 in sentence_pairs]

    # Split the data into a training set and a validation set
    train_embeddings, val_embeddings, train_scores, val_scores = train_test_split(embeddings, similarity_scores, test_size=0.2)

    # Create DataLoaders
    train_dataloader = DataLoader(SentencePairDataset(train_embeddings, train_scores), batch_size=32, shuffle=True)
    val_dataloader = DataLoader(SentencePairDataset(val_embeddings, val_scores), batch_size=32)

    # Instantiate the model and define the loss and optimizer
    model = MRPCModel(CFG.embedding_dim).to(CFG.device)
    # Choose a loss function and an optimizer
    loss_fn = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr = 1e-4)

    # Training loop
    for epoch in range(100):
        losses = []
        for embeddings, scores in train_dataloader:
            embeddings1 = torch.tensor(embeddings[0], dtype=torch.float32).to(CFG.device)
            embeddings2 = torch.tensor(embeddings[1], dtype=torch.float32).to(CFG.device)
            scores = torch.tensor(scores, dtype=torch.float32).to(CFG.device)
            outputs = model(embeddings1, embeddings2)
            loss = loss_fn(outputs, scores)
            losses.append(loss.item())
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

        if((epoch+1)%10==0):
            print(f"Epoch {epoch}, Loss: {np.mean(losses)}")

    # Evaluation
    model.eval()
    total_loss, total_count = 0, 0
    y_true, y_pred = [], []
    with torch.no_grad():
        for embeddings, scores in val_dataloader:
            embeddings1 = torch.tensor(embeddings[0], dtype=torch.float32).to(CFG.device)
            embeddings2 = torch.tensor(embeddings[1], dtype=torch.float32).to(CFG.device)
            scores = torch.tensor(scores, dtype=torch.float32).to(CFG.device)
            outputs = model(embeddings1, embeddings2)
            loss = loss_fn(outputs, scores)
            total_loss += loss.item()
            total_count += 1
            outputs = [item for item_l in outputs.cpu().tolist() for item in item_l]
            y_true.extend(scores.cpu().tolist())
            y_pred.extend(outputs)
    avg_loss = total_loss / total_count
    print(f"Validation loss: {avg_loss}")
    # Compute evaluation metrics

    y_pred = [round(pred) for pred in y_pred]
    f1 = f1_score(y_true, y_pred, average = "macro")
    print(f"F1 Score: {f1}")
    return f1

downstream_results['MRPC Biased F1'] = get_mrpc_score(mrpc_biased_word_embedding_dict)
if CFG.run_self_debias:
    downstream_results['MRPC Self-Debias F1'] = get_mrpc_score(mrpc_biased_word_embedding_dict)
else:
    downstream_results['MRPC Self-Debias F1'] = None
downstream_results['MRPC INLP F1'] = get_mrpc_score(mrpc_word_dict_inlp)
downstream_results['MRPC Sent-Debias F1'] = get_mrpc_score(mrpc_word_dict_sent_debias)

In [None]:
mnli_df = pd.read_parquet("/content/drive/MyDrive/DSD/mnli.parquet")

mnli_df = mnli_df.sample(frac=0.01,random_state=42 ).reset_index(drop=True)
display(mnli_df.head())
sentence_pairs = [(row["premise"], row["hypothesis"]) for _,row in mnli_df.iterrows()]
similarity_scores = [row["label"] for _,row in mnli_df.iterrows()]

stop_words = set(stopwords.words('english'))

def preprocess_text(text):
    tokens = nltk.word_tokenize(text.lower())
    filtered_tokens = [word for word in tokens if word.isalpha() and word not in stop_words]
    return filtered_tokens

sentences = []
for  pair in sentence_pairs:
    sentence1, sentence2 = pair[0], pair[1]

    sentences.append(sentence1)
    sentences.append(sentence2)

sentences = [preprocess_text(sentence) for sentence in sentences]

mnli_words = set()
for sentence in sentences:
    for word in sentence:
        mnli_words.add(word)

mnli_words = list(mnli_words)
if CFG.run_self_debias:
    self_debias_embedding_dict = dict()
    for i in tqdm(range(0, len(loaded_words), CFG.batch_size)):
        batch_words = loaded_words[i : i + CFG.batch_size]
        batch_embeddings = debiaser_self_debias.generate_embeddings(batch_words)
        for word, embedding in zip(batch_words, batch_embeddings):
            self_debias_embedding_dict[word] = embedding

mnli_biased_word_empoddings = get_word_embeddings(mnli_words, lm_model)
mnli_biased_word_embedding_dict = {word: embedding for word, embedding in zip(mnli_words, mnli_biased_word_empoddings)}
mnli_word_embeddings_inlp = get_word_embeddings(mnli_words, lm_model_inlp)
mnli_word_embeddings_sent_debias = get_word_embeddings(mnli_words, lm_model_sent_debias)
mnli_word_dict_inlp = {word: embedding for word, embedding in zip(mnli_words, mnli_word_embeddings_inlp)}
mnli_word_dict_sent_debias = {word: embedding for word, embedding in zip(mnli_words, mnli_word_embeddings_sent_debias)}

In [None]:
class MNLIModel(nn.Module):
    def __init__(self, input_dim):
        super(MNLIModel, self).__init__()
        self.fc1 = nn.LazyLinear(input_dim//2)
        self.fc1_1 = nn.LazyLinear(input_dim//4)
        self.fc2 = nn.LazyLinear(input_dim//2)
        self.fc2_1 = nn.LazyLinear(input_dim//4)
        self.fc3 = nn.LazyLinear(1)

    def forward(self, x1, x2):
        x1 = self.fc1(x1)
        x1 = self.fc1_1(x1)
        x2 = self.fc2(x2)
        x2 = self.fc2_1(x2)
        x = F.cosine_similarity(x1, x2).view(-1,1)
        x = self.fc3(x)
        return x

class SentencePairDataset(Dataset):
    def __init__(self, embeddings, scores):
        self.embeddings = embeddings
        self.scores = scores

    def __len__(self):
        return len(self.embeddings)

    def __getitem__(self, idx):
        return self.embeddings[idx], self.scores[idx]

def get_mnli_score(word_embeddings):

    # Convert the sentence pairs to embeddings
    embeddings = [(avg_feature_vector(s1, word_embeddings, num_features=CFG.embedding_dim), avg_feature_vector(s2, word_embeddings, num_features=CFG.embedding_dim)) for s1, s2 in sentence_pairs]

    # Split the data into a training set and a validation set
    train_embeddings, val_embeddings, train_scores, val_scores = train_test_split(embeddings, similarity_scores, test_size=0.2)

    # Create DataLoaders
    train_dataloader = DataLoader(SentencePairDataset(train_embeddings, train_scores), batch_size=32, shuffle=True)
    val_dataloader = DataLoader(SentencePairDataset(val_embeddings, val_scores), batch_size=32)

    # Instantiate the model and define the loss and optimizer
    model = MNLIModel(CFG.embedding_dim).to(CFG.device)
    # Choose a loss function and an optimizer
    loss_fn = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr = 1e-4)

    # Training loop
    for epoch in range(100):
        losses = []
        for embeddings, scores in train_dataloader:
            embeddings1 = torch.tensor(embeddings[0], dtype=torch.float32).to(CFG.device)
            embeddings2 = torch.tensor(embeddings[1], dtype=torch.float32).to(CFG.device)
            scores = torch.tensor(scores, dtype=torch.float32).to(CFG.device)
            outputs = model(embeddings1, embeddings2)
            loss = loss_fn(outputs, scores)
            losses.append(loss.item())
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

        if((epoch+1)%10==0):
            print(f"Epoch {epoch}, Loss: {np.mean(losses)}")

    # Evaluation
    model.eval()
    total_loss, total_count = 0, 0
    y_true, y_pred = [], []
    with torch.no_grad():
        for embeddings, scores in val_dataloader:
            embeddings1 = torch.tensor(embeddings[0], dtype=torch.float32).to(CFG.device)
            embeddings2 = torch.tensor(embeddings[1], dtype=torch.float32).to(CFG.device)
            scores = torch.tensor(scores, dtype=torch.float32).to(CFG.device)
            outputs = model(embeddings1, embeddings2)
            loss = loss_fn(outputs, scores)
            total_loss += loss.item()
            total_count += 1
            outputs = [item for item_l in outputs.cpu().tolist() for item in item_l]
            y_true.extend(scores.cpu().tolist())
            y_pred.extend(outputs)
    avg_loss = total_loss / total_count
    print(f"Validation loss: {avg_loss}")
    # Compute evaluation metrics

    y_pred = [round(pred) for pred in y_pred]
    f1 = f1_score(y_true, y_pred, average = "macro")
    print(f"F1 Score: {f1}")
    return f1

downstream_results['MNLI Biased F1'] = get_mnli_score(mnli_biased_word_embedding_dict)
if CFG.run_self_debias:
    downstream_results['MNLI Self-Debias F1'] = get_mnli_score(mnli_biased_word_embedding_dict)
else:
    downstream_results['MNLI Self-Debias F1'] = None
downstream_results['MNLI INLP F1'] = get_mnli_score(mnli_word_dict_inlp)
downstream_results['MNLI Sent-Debias F1'] = get_mnli_score(mnli_word_dict_sent_debias)

In [None]:
rte_df = pd.read_parquet("/content/drive/MyDrive/DSD/rte.parquet")

display(rte_df.head())
sentence_pairs = [(row["sentence1"], row["sentence2"]) for _,row in rte_df.iterrows()]
similarity_scores = [row["label"] for _,row in rte_df.iterrows()]

stop_words = set(stopwords.words('english'))

def preprocess_text(text):
    tokens = nltk.word_tokenize(text.lower())
    filtered_tokens = [word for word in tokens if word.isalpha() and word not in stop_words]
    return filtered_tokens

sentences = []
for  pair in sentence_pairs:
    sentence1, sentence2 = pair[0], pair[1]

    sentences.append(sentence1)
    sentences.append(sentence2)

sentences = [preprocess_text(sentence) for sentence in sentences]

rte_words = set()
for sentence in sentences:
    for word in sentence:
        rte_words.add(word)

rte_words = list(rte_words)
if CFG.run_self_debias:
    self_debias_embedding_dict = dict()
    for i in tqdm(range(0, len(loaded_words), CFG.batch_size)):
        batch_words = loaded_words[i : i + CFG.batch_size]
        batch_embeddings = debiaser_self_debias.generate_embeddings(batch_words)
        for word, embedding in zip(batch_words, batch_embeddings):
            self_debias_embedding_dict[word] = embedding

rte_biased_word_empoddings = get_word_embeddings(rte_words, lm_model)
rte_biased_word_embedding_dict = {word: embedding for word, embedding in zip(rte_words, rte_biased_word_empoddings)}
rte_word_embeddings_inlp = get_word_embeddings(rte_words, lm_model_inlp)
rte_word_embeddings_sent_debias = get_word_embeddings(rte_words, lm_model_sent_debias)
rte_word_dict_inlp = {word: embedding for word, embedding in zip(rte_words, rte_word_embeddings_inlp)}
rte_word_dict_sent_debias = {word: embedding for word, embedding in zip(rte_words, rte_word_embeddings_sent_debias)}

In [None]:
class RTEModel(nn.Module):
    def __init__(self, input_dim):
        super(RTEModel, self).__init__()
        self.fc1 = nn.LazyLinear(input_dim//2)
        self.fc1_1 = nn.LazyLinear(input_dim//4)
        self.fc2 = nn.LazyLinear(input_dim//2)
        self.fc2_1 = nn.LazyLinear(input_dim//4)
        self.fc3 = nn.LazyLinear(1)

    def forward(self, x1, x2):
        x1 = self.fc1(x1)
        x1 = self.fc1_1(x1)
        x2 = self.fc2(x2)
        x2 = self.fc2_1(x2)
        x = F.cosine_similarity(x1, x2).view(-1,1)
        x = self.fc3(x)
        return x

class SentencePairDataset(Dataset):
    def __init__(self, embeddings, scores):
        self.embeddings = embeddings
        self.scores = scores

    def __len__(self):
        return len(self.embeddings)

    def __getitem__(self, idx):
        return self.embeddings[idx], self.scores[idx]

def get_rte_score(word_embeddings):

    # Convert the sentence pairs to embeddings
    embeddings = [(avg_feature_vector(s1, word_embeddings, num_features=CFG.embedding_dim), avg_feature_vector(s2, word_embeddings, num_features=CFG.embedding_dim)) for s1, s2 in sentence_pairs]

    # Split the data into a training set and a validation set
    train_embeddings, val_embeddings, train_scores, val_scores = train_test_split(embeddings, similarity_scores, test_size=0.2)

    # Create DataLoaders
    train_dataloader = DataLoader(SentencePairDataset(train_embeddings, train_scores), batch_size=32, shuffle=True)
    val_dataloader = DataLoader(SentencePairDataset(val_embeddings, val_scores), batch_size=32)

    # Instantiate the model and define the loss and optimizer
    model = RTEModel(CFG.embedding_dim).to(CFG.device)
    # Choose a loss function and an optimizer
    loss_fn = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr = 1e-5)

    # Training loop
    for epoch in range(100):
        losses = []
        for embeddings, scores in train_dataloader:
            embeddings1 = torch.tensor(embeddings[0], dtype=torch.float32).to(CFG.device)
            embeddings2 = torch.tensor(embeddings[1], dtype=torch.float32).to(CFG.device)
            scores = torch.tensor(scores, dtype=torch.float32).to(CFG.device)
            outputs = model(embeddings1, embeddings2)
            loss = loss_fn(outputs, scores)
            losses.append(loss.item())
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

        if((epoch+1)%10==0):
            print(f"Epoch {epoch}, Loss: {np.mean(losses)}")

    # Evaluation
    model.eval()
    total_loss, total_count = 0, 0
    y_true, y_pred = [], []
    with torch.no_grad():
        for embeddings, scores in val_dataloader:
            embeddings1 = torch.tensor(embeddings[0], dtype=torch.float32).to(CFG.device)
            embeddings2 = torch.tensor(embeddings[1], dtype=torch.float32).to(CFG.device)
            scores = torch.tensor(scores, dtype=torch.float32).to(CFG.device)
            outputs = model(embeddings1, embeddings2)
            loss = loss_fn(outputs, scores)
            total_loss += loss.item()
            total_count += 1
            outputs = [item for item_l in outputs.cpu().tolist() for item in item_l]
            y_true.extend(scores.cpu().tolist())
            y_pred.extend(outputs)
    avg_loss = total_loss / total_count
    print(f"Validation loss: {avg_loss}")
    # Compute evaluation metrics

    y_pred = [round(pred) for pred in y_pred]
    f1 = f1_score(y_true, y_pred, average = "macro")
    print(f"F1 Score: {f1}")
    return f1

downstream_results['RTE Biased F1'] = get_rte_score(rte_biased_word_embedding_dict)
if CFG.run_self_debias:
    downstream_results['RTE Self-Debias F1'] = get_rte_score(rte_biased_word_embedding_dict)
else:
    downstream_results['RTE Self-Debias F1'] = None
downstream_results['RTE INLP F1'] = get_rte_score(rte_word_dict_inlp)
downstream_results['RTE Sent-Debias F1'] = get_rte_score(rte_word_dict_sent_debias)

In [None]:
wnli_df = pd.read_parquet("/content/drive/MyDrive/DSD/wnli.parquet")

display(wnli_df.head())
sentence_pairs = [(row["sentence1"], row["sentence2"]) for _,row in wnli_df.iterrows()]
similarity_scores = [row["label"] for _,row in wnli_df.iterrows()]

stop_words = set(stopwords.words('english'))

def preprocess_text(text):
    tokens = nltk.word_tokenize(text.lower())
    filtered_tokens = [word for word in tokens if word.isalpha() and word not in stop_words]
    return filtered_tokens

sentences = []
for  pair in sentence_pairs:
    sentence1, sentence2 = pair[0], pair[1]

    sentences.append(sentence1)
    sentences.append(sentence2)

sentences = [preprocess_text(sentence) for sentence in sentences]

wnli_words = set()
for sentence in sentences:
    for word in sentence:
        wnli_words.add(word)

wnli_words = list(wnli_words)
if CFG.run_self_debias:
    self_debias_embedding_dict = dict()
    for i in tqdm(range(0, len(loaded_words), CFG.batch_size)):
        batch_words = loaded_words[i : i + CFG.batch_size]
        batch_embeddings = debiaser_self_debias.generate_embeddings(batch_words)
        for word, embedding in zip(batch_words, batch_embeddings):
            self_debias_embedding_dict[word] = embedding

wnli_biased_word_empoddings = get_word_embeddings(wnli_words, lm_model)
wnli_biased_word_embedding_dict = {word: embedding for word, embedding in zip(wnli_words, wnli_biased_word_empoddings)}
wnli_word_embeddings_inlp = get_word_embeddings(wnli_words, lm_model_inlp)
wnli_word_embeddings_sent_debias = get_word_embeddings(wnli_words, lm_model_sent_debias)
wnli_word_dict_inlp = {word: embedding for word, embedding in zip(wnli_words, wnli_word_embeddings_inlp)}
wnli_word_dict_sent_debias = {word: embedding for word, embedding in zip(wnli_words, wnli_word_embeddings_sent_debias)}

In [None]:
class WNLIModel(nn.Module):
    def __init__(self, input_dim):
        super(WNLIModel, self).__init__()
        self.fc1 = nn.LazyLinear(input_dim//2)
        self.fc1_1 = nn.LazyLinear(input_dim//4)
        self.fc2 = nn.LazyLinear(input_dim//2)
        self.fc2_1 = nn.LazyLinear(input_dim//4)
        self.fc3 = nn.LazyLinear(1)

    def forward(self, x1, x2):
        x1 = self.fc1(x1)
        x1 = self.fc1_1(x1)
        x2 = self.fc2(x2)
        x2 = self.fc2_1(x2)
        x = F.cosine_similarity(x1, x2).view(-1,1)
        x = self.fc3(x)
        return x

class SentencePairDataset(Dataset):
    def __init__(self, embeddings, scores):
        self.embeddings = embeddings
        self.scores = scores

    def __len__(self):
        return len(self.embeddings)

    def __getitem__(self, idx):
        return self.embeddings[idx], self.scores[idx]

def get_wnli_score(word_embeddings):

    # Convert the sentence pairs to embeddings
    embeddings = [(avg_feature_vector(s1, word_embeddings, num_features=CFG.embedding_dim), avg_feature_vector(s2, word_embeddings, num_features=CFG.embedding_dim)) for s1, s2 in sentence_pairs]

    # Split the data into a training set and a validation set
    train_embeddings, val_embeddings, train_scores, val_scores = train_test_split(embeddings, similarity_scores, test_size=0.2)

    # Create DataLoaders
    train_dataloader = DataLoader(SentencePairDataset(train_embeddings, train_scores), batch_size=32, shuffle=True)
    val_dataloader = DataLoader(SentencePairDataset(val_embeddings, val_scores), batch_size=32)

    # Instantiate the model and define the loss and optimizer
    model = WNLIModel(CFG.embedding_dim).to(CFG.device)
    # Choose a loss function and an optimizer
    loss_fn = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr = 1e-6)

    # Training loop
    for epoch in range(100):
        losses = []
        for embeddings, scores in train_dataloader:
            embeddings1 = torch.tensor(embeddings[0], dtype=torch.float32).to(CFG.device)
            embeddings2 = torch.tensor(embeddings[1], dtype=torch.float32).to(CFG.device)
            scores = torch.tensor(scores, dtype=torch.float32).to(CFG.device)
            outputs = model(embeddings1, embeddings2)
            loss = loss_fn(outputs, scores)
            losses.append(loss.item())
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

        if((epoch+1)%10==0):
            print(f"Epoch {epoch}, Loss: {np.mean(losses)}")

    # Evaluation
    model.eval()
    total_loss, total_count = 0, 0
    y_true, y_pred = [], []
    with torch.no_grad():
        for embeddings, scores in val_dataloader:
            embeddings1 = torch.tensor(embeddings[0], dtype=torch.float32).to(CFG.device)
            embeddings2 = torch.tensor(embeddings[1], dtype=torch.float32).to(CFG.device)
            scores = torch.tensor(scores, dtype=torch.float32).to(CFG.device)
            outputs = model(embeddings1, embeddings2)
            loss = loss_fn(outputs, scores)
            total_loss += loss.item()
            total_count += 1
            outputs = [item for item_l in outputs.cpu().tolist() for item in item_l]
            y_true.extend(scores.cpu().tolist())
            y_pred.extend(outputs)
    avg_loss = total_loss / total_count
    print(f"Validation loss: {avg_loss}")
    # Compute evaluation metrics

    y_pred = [round(pred) for pred in y_pred]
    f1 = f1_score(y_true, y_pred, average = "macro")
    print(f"F1 Score: {f1}")
    return f1

downstream_results['WNLI Biased F1'] = get_wnli_score(wnli_biased_word_embedding_dict)
if CFG.run_self_debias:
    downstream_results['WNLI Self-Debias F1'] = get_wnli_score(wnli_biased_word_embedding_dict)
else:
    downstream_results['WNLI Self-Debias F1'] = None
downstream_results['WNLI INLP F1'] = get_wnli_score(wnli_word_dict_inlp)
downstream_results['WNLI Sent-Debias F1'] = get_wnli_score(wnli_word_dict_sent_debias)

In [None]:
sst_df = pd.read_parquet("/content/drive/MyDrive/DSD/sst.parquet")

display(sst_df.head())
similarity_scores = [row["label"] for _,row in wnli_df.iterrows()]

stop_words = set(stopwords.words('english'))

def preprocess_text(text):
    tokens = nltk.word_tokenize(text.lower())
    filtered_tokens = [word for word in tokens if word.isalpha() and word not in stop_words]
    return filtered_tokens
sst_df['processed_sentence'] = sst_df['sentence'].apply(preprocess_text)
X_train, X_test, y_train, y_test = train_test_split(sst_df['processed_sentence'], sst_df['label'], test_size=0.2, random_state=42)

sentences = list(sst_df['processed_sentence'])

sst_words = set()
for sentence in sentences:
    for word in sentence:
        sst_words.add(word)

sst_words = list(sst_words)
if CFG.run_self_debias:
    sst_self_debias_embedding_dict = dict()
    for i in tqdm(range(0, len(loaded_words), CFG.batch_size)):
        batch_words = loaded_words[i : i + CFG.batch_size]
        batch_embeddings = debiaser_self_debias.generate_embeddings(batch_words)
        for word, embedding in zip(batch_words, batch_embeddings):
            sst_self_debias_embedding_dict[word] = embedding

sst_biased_word_empoddings = get_word_embeddings(sst_words, lm_model)
sst_biased_word_embedding_dict = {word: embedding for word, embedding in zip(sst_words, sst_biased_word_empoddings)}
sst_word_embeddings_inlp = get_word_embeddings(sst_words, lm_model_inlp)
sst_word_embeddings_sent_debias = get_word_embeddings(sst_words, lm_model_sent_debias)
sst_word_dict_inlp = {word: embedding for word, embedding in zip(sst_words, sst_word_embeddings_inlp)}
sst_word_dict_sent_debias = {word: embedding for word, embedding in zip(sst_words, sst_word_embeddings_sent_debias)}

In [None]:
sst_biased_word_embedding_dict['man'].shape

In [None]:
len(X_train), len(X_test), len(sst_df)

In [None]:
def get_sst_score(cls_word_dict):
# Vectorize labeled reviews
    X_train_vec = np.array([avg_feature_vector(" ".join(review), cls_word_dict, CFG.embedding_dim) for review in tqdm(X_train)])
    X_test_vec = np.array([avg_feature_vector(" ".join(review), cls_word_dict, CFG.embedding_dim) for review in tqdm(X_test)])

    # Train an XGBoost classifier
    xgb_model = XGBClassifier(n_estimators=150, max_depth=3, learning_rate=0.05)
    xgb_model.fit(X_train_vec, y_train)

    # Make predictions
    y_pred = xgb_model.predict(X_test_vec)

    # Evaluate the model
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy: {accuracy:.2f}")
    return accuracy

downstream_results['SST Biased Accuracy'] = get_sst_score(sst_biased_word_embedding_dict)
if CFG.run_self_debias:
    downstream_results['SST Self-Debias Accuracy'] = get_sst_score(sst_self_debias_embedding_dict )
else:
    downstream_results['SST Self-Debias Accuracy'] = None
downstream_results['SST INLP Accuracy'] = get_sst_score(sst_word_dict_inlp)
downstream_results['SST Sent-Debias Accuracy'] = get_sst_score(sst_word_dict_sent_debias)

In [None]:
save_results(downstream_results, CFG.ds_results_filename)