

# Exploring the biases in embeddings: Word2Vec, BERT and GPT2

The goal of this workshop is to explore the concepts of bias in NLP algorithms such as Word2Vec, BERT of GPT2 and the associated models.

References:

*   [Bolukbasi et al 2016](https://arxiv.org/abs/1607.06520)
*   [Ellis et al 2022](https://cs.carleton.edu/cs_comps/2223/replication/final-results-4/filez/Debiasewe_Final_Paper.pdf)





In [None]:
import json
import requests

import numpy as np
from gensim.models.word2vec import Word2Vec
import gensim.downloader as api
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from transformers import GPT2Model, GPT2Tokenizer, AutoModel, AutoTokenizer, BertModel, BertTokenizer
from torch import cosine_similarity, tensor

## Some useful functions

In [None]:

def load_json(url):
    resp = requests.get(url)
    return json.loads(resp.text)

def get_embedding(target_word, tokenizer, model):
    """
    Get the embedding vector of models from HuggingFace (transformers package)
    """
    input_ids = tokenizer.encode(target_word, add_special_tokens=True)
    outputs = model(tensor([input_ids]))[0]
    return outputs.mean(1)

def analogy_w2v(word_embedding, positive, negative, topn=5):
    """
    Compute analogies, i.e. operations on the embedding vectors and similarities with a corpus
    For gensim models
    """
    similars = word_embedding.most_similar(positive=positive, negative=negative, topn=topn)
    for item in similars:
        print(item)

def analogy(model, tokenizer, positive, negative, corpus, topn=5):
    """
    Compute analogies, i.e. operations on the embedding vectors and similarities with a corpus
    For HugingFace models
    """
    assert len(positive) == 2
    assert len(negative) == 1

    # Get the words
    word_1, word_2 = positive
    word_3 = negative[0]

    # Compute the embedding of word_1 - word_3 + word_2, i.e. the target
    vector = get_embedding(target_word=word_1, model=model, tokenizer=tokenizer)
    vector += get_embedding(target_word=word_2, model=model, tokenizer=tokenizer)
    vector -= get_embedding(target_word=word_3, model=model, tokenizer=tokenizer)

    # Loop over all of the words in a corpus, compute the embedding and compare with the target
    print(f"{word_1} - {word_2} + {word_3} = ?")
    res = []
    for word in corpus:
        if word.isalnum():
            emb = get_embedding(word, tokenizer=tokenizer, model=model)
            similarity = cosine_similarity(vector, emb).mean()
            res.append([word, similarity.numpy(force=True)])

    # Get the topn most similar words
    res = np.array(res)
    indexes = np.argsort(res[:, 1])[::-1]
    res = res[indexes[:topn]]
    return res

# Bias in Word2Vec models

## Explore `gensim` package

The package gensim is dedicated to the computation of word embedding for models trained with Word2Vec NN architecture

https://radimrehurek.com/gensim/

In [None]:
# Prints the datasets available in gensim
print("Datasets:\n")
for item in api.info().get("corpora"):
  print(item)

In [None]:
# List all available pre-trained models
print("Pre-trained models:\n")
for m in api.info().get("models"):
    print(m)

In [None]:
# Prints the metadata of well-known pre-trained models

# Model 1: word2vec-google-news-300
print(json.dumps(api.info().get("models").get("word2vec-google-news-300"), indent=4))

# Model 2: word2vec-google-news-300
print(json.dumps(api.info().get("models").get("glove-twitter-25"), indent=4))


In [None]:
# Load pre-trained models (the loading may be long...)

# Load glove-twitter-25
glove_word_embedding = api.load("glove-twitter-25")

# Load word2vec-google-news-300
news_word_embedding = api.load("word2vec-google-news-300")


In [None]:
# Get the embedding vectors
glove_word_embedding.vectors.shape

In [None]:
# Get the embedding of one word
word = "queen"
glove_word_embedding[word]

In [None]:
# Get the most similar words found in the training data
glove_word_embedding.most_similar(word, topn=5)

In [None]:
# Get the corpus, i.e. the dict of the words/index pairs used during the training
vocab = glove_word_embedding.key_to_index
np.random.choice(list(vocab.keys()), 10)

## Get the embeddings from pre-trained models




### Example 1: glove-twitter-25 model

In [None]:
# Analogies, i.e. operations on the words and similarities
# For instance, what is the most similar words of an operation like
# king - man + woman ~ ?

print(f"Example 1: king - man + woman ~ ?")
analogy_w2v(word_embedding=glove_word_embedding, positive=["king", "woman"], negative=["man"], topn=5)

print(f"\n\nExample 1: doctor - man + woman ~ ?")
analogy_w2v(word_embedding=glove_word_embedding, positive=["doctor", "woman"], negative=["man"])

print(f"\n\nExample 2: doctor - father + mother ~ ?")
analogy_w2v(word_embedding=glove_word_embedding, positive=["doctor", "mother"], negative=["father"])

### Example 2: word2vec-google-news-300 model

In [None]:
print(f"Example 1: man - woman ~ king - ?")
analogy_w2v(word_embedding=news_word_embedding, positive=["king", "woman"], negative=["man"], topn=5)

print(f"\n\nExample 1: man - woman ~ doctor - ?")
analogy_w2v(word_embedding=news_word_embedding, positive=["doctor", "woman"], negative=["man"])

print(f"\n\nExample 2: father - mother ~ doctor - ?")
analogy_w2v(word_embedding=news_word_embedding, positive=["doctor", "mother"], negative=["father"])

## What about Transformers models ?

Let's do the same operations for Transformers models (BERT and GPT2). Unfortunately, these models are not in gensim package so we use the package [transformers from HuggingFace](https://https://huggingface.co/docs/transformers/index).

As the corpus is not embed in the pre-trained models, we choose to compute the analogies on some Gender Specific words as in Bolukbasi et al 2016.

Then we compute the embeddings using transformers package and compare each word to a target_word with the cosine similarity.

In [None]:
# Load the gender specific list (see Bolukbasi et al 2016)
corpus = load_json("https://raw.githubusercontent.com/tolga-b/debiaswe/master/data/gender_specific_seed.json")

In [None]:
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
model = BertModel.from_pretrained("bert-base-uncased", output_hidden_states=True).eval()

analogies = analogy(model, tokenizer, ["doctor", "mother"], ["father"], corpus)
for similar in analogies:
    print(similar)

### GPT2

In [None]:
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2Model.from_pretrained("gpt2")

analogies = analogy(model, tokenizer, ["doctor", "mother"], ["father"], corpus)
for similar in analogies:
    print(similar)

# Identifying genre subspace

In [None]:
def get_differences(word_embedding, text_1, text_2):
    """
    Prepare the corpus for the PCA, see Bolukbasi et al 2016
    """
    center = (word_embedding[text_1] + word_embedding[text_2]) / 2
    return word_embedding[text_1] - center, word_embedding[text_2] - center

In [None]:
pairs = [["woman", "man"], ["girl", "boy"], ["she", "he"], ["mother", "father"], ["daughter", "son"], ["gal", "guy"], ["female", "male"], ["her", "his"], ["herself", "himself"], ["Mary", "John"]]

In [None]:
averages = []
for text_1, text_2 in pairs:
    diff_1, diff_2 = get_differences(news_word_embedding, text_1, text_2)
    averages.append(diff_1)
    averages.append(diff_2)
averages = np.array(averages)

In [None]:
scaled_inputs = StandardScaler(with_std=False).fit_transform(averages)

pca = PCA(n_components=10).fit(scaled_inputs)
explained_variance_ratio = pca.explained_variance_ratio_
cumulative_explained_variance_ratio = np.cumsum(explained_variance_ratio)

plt.plot(cumulative_explained_variance_ratio, "o-")

### Words visualization

In [None]:
from_pairs = False

In [None]:
vocab = news_word_embedding.key_to_index

# Some chosen words
sample_words = ["king", "husband", "treats", "heavy", "commit", "game", "seconds", "arrival", "tactical", "crafts", "identity", "trimester", "tanning", "user", "parts", "caused", "hoped", "modeling", "beautiful", "cake", "looks", "builder", "sewing", "dress", "letters", "nuclear", "hay", "quit", "brillient", "genius", "divorce", "cocky", "yard", "journeyman", "dancers", "thighs", "lust", "seeking", "ties", "guru", "salon", "buddy", "sassy", "frost", "bride", "groom", "governer", "sharply", "homemaker", "dancer", "roses", "pal", "daughter", "brass", "buddies", "burly", "feminist", "babe", "folks", "friend", "priest", "mate", "boyfriend", "boyfriends", "beard", "witch", "witches", "dads", "boys", "cousin", "chap", "boyhood", "actresses", "gals", "fiance", "wives", "son", "lad", "queen", "niece", "girlfriends", "guys", "girlfriend", "nephew", "grandmother", "wife", "ladies", "fiancee"]
sample_words = [word for word in sample_words if word in vocab]

indexes = [vocab[word] for word in sample_words]

In [None]:
vectors = news_word_embedding.vectors[indexes]

In [None]:
proj = pca.transform(vectors)

In [None]:
padding = .5
min_x = np.min(proj[:, 0]) - padding
max_x = np.max(proj[:, 0]) + padding
min_y = np.min(proj[:, 1]) - padding
max_y = np.max(proj[:, 1]) + padding


fig = plt.figure(figsize=(12, 12))
ax = fig.add_subplot(111)
ax.plot([min_x, max_x], [0, 0], "b-", alpha=.35)
ax.plot([0, 0], [min_y, max_y], "b-", alpha=.35)

for idx, word in enumerate(sample_words):
    ax.text(x=proj[idx, 0], y=proj[idx, 1], s=word)
ax.set_xlim(min_x, max_x)
ax.set_ylim(min_y, max_y)

ax.set_aspect('equal')

## Train Word2Vec using gensim package

In [None]:
# Prints the datasets available in gensim
print(json.dumps(api.info().get("corpora").get("text8"), indent=4))

In [None]:
# Load a corpus
corpus = api.load('text8')

# Train Word2Vec
model = Word2Vec(corpus, vector_size=100)
print(f"Finished after {model.epochs} epochs")

# Get the embedding object
word_embedding = model.wv

In [None]:
word_embedding.most_similar("car")

In [None]:
word_embedding.most_similar("nurse")

In [None]:
word_embedding.most_similar("engineer")

In [None]:
word_embedding.most_similar("surgeon")

In [None]:
# Get the embedding vectors
vectors = word_embedding.vectors
print("Embedding vectors dimensions:", word_embedding.vectors.shape)

# Get the vocab
vocab_dict = word_embedding.key_to_index
print("Vocabulary sample:")
print(np.random.choice(list(vocab_dict.keys()), 10))