In [1]:
import torch
import pandas as pd
import numpy as np
from pathlib import Path
from typing import *
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
class Config(dict):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        for k, v in kwargs.items():
            setattr(self, k, v)
    
    def set(self, key, val):
        self[key] = val
        setattr(self, key, val)
        
config = Config(
    model_type="bert-base-uncased",
    max_seq_len=128,
)

In [3]:
T = TypeVar('T')
def flatten(x: List[List[T]]) -> List[T]:
    return [item for sublist in x for item in sublist]

In [4]:
from allennlp.common.util import get_spacy_model
from spacy.attrs import ORTH
from spacy.tokenizer import Tokenizer

nlp = get_spacy_model("en_core_web_sm", pos_tags=False, parse=True, ner=False)
nlp.tokenizer.add_special_case("[MASK]", [{ORTH: "[MASK]"}])
def spacy_tok(s: str):
    return [w.text for w in nlp(s)]

In [5]:
from allennlp.data.tokenizers.word_splitter import SpacyWordSplitter
from allennlp.data.token_indexers import PretrainedBertIndexer
from allennlp.data.tokenizers import Token

token_indexer = PretrainedBertIndexer(
    pretrained_model=config.model_type,
    max_pieces=config.max_seq_len,
    do_lowercase=True,
 )o

# apparently we need to truncate the sequence here, which is a stupid design decision
def tokenize(x: str) -> List[Token]:
        return [Token(w) for w in flatten([
                token_indexer.wordpiece_tokenizer(w)
                for w in spacy_tok(x)]
        )[:config.max_seq_len]]

In [43]:
dir(token_indexer)

['__abstractmethods__',
 '__annotations__',
 '__args__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__extra__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__next_in_mro__',
 '__orig_bases__',
 '__origin__',
 '__parameters__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__slots__',
 '__str__',
 '__subclasshook__',
 '__tree_hash__',
 '__weakref__',
 '_abc_cache',
 '_abc_generic_negative_cache',
 '_abc_generic_negative_cache_version',
 '_abc_registry',
 '_add_encoding_to_vocabulary',
 '_add_start_and_end',
 '_added_to_vocabulary',
 '_do_lowercase',
 '_end_piece_ids',
 '_extend',
 '_gorg',
 '_namespace',
 '_never_lowercase',
 '_registry',
 '_separator_ids',
 '_start_piece_ids',
 '_token_min_padding_length',
 '_truncate_long_sequences',
 '_warn_about_truncation',
 '_warned_about_truncation',
 'as_

In [6]:
from pytorch_pretrained_bert import BertConfig, BertForMaskedLM
model = BertForMaskedLM.from_pretrained(config.model_type)
model

BertForMaskedLM(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): BertLayerNorm()
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): BertLayerNorm()
              (dropout): Dropout(p=0.1, inplace=False)
            )
   

In [51]:
from allennlp.data import Vocabulary

vocab = Vocabulary()
token_indexer._add_encoding_to_vocabulary(vocab)

['__annotations__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_extend',
 '_index_to_token',
 '_non_padded_namespaces',
 '_oov_token',
 '_padding_token',
 '_registry',
 '_retained_counter',
 '_token_to_index',
 'add_token_to_namespace',
 'add_tokens_to_namespace',
 'by_name',
 'default_implementation',
 'extend_from_instances',
 'from_files',
 'from_instances',
 'from_params',
 'get_index_to_token_vocabulary',
 'get_token_from_index',
 'get_token_index',
 'get_token_to_index_vocabulary',
 'get_vocab_size',
 'is_padded',
 'list_available',
 'print_statistics',
 'register',
 'save_to_files',
 'set_from_file']

In [53]:
def get_logits(input_sentence: str) -> torch.Tensor:
    input_toks = tokenize(input_sentence)
    batch = token_indexer.tokens_to_indices(input_toks, vocab, "tokens")
    token_ids = torch.LongTensor(batch["tokens"]).unsqueeze(0)
    with torch.no_grad():
        out_logits = model(token_ids).squeeze(0)
    return out_logits.detach().cpu().numpy()

In [57]:
vocab.get_token_to_index_vocabulary('hay')

{'@@PADDING@@': 0, '@@UNKNOWN@@': 1}

In [58]:
input_toks = tokenize("[MASK] be shakin it the house.")
batch = token_indexer.tokens_to_indices(input_toks, vocab, "tokens")
print("batch:", batch)
token_ids = torch.LongTensor(batch["tokens"]).unsqueeze(0)
print(token_ids)

batch: {'tokens': [101, 103, 2022, 21146, 4939, 2009, 1996, 2160, 1012, 102], 'tokens-offsets': [1, 2, 3, 4, 5, 6, 7, 8], 'tokens-type-ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'mask': [1, 1, 1, 1, 1, 1, 1, 1]}
tensor([[  101,   103,  2022, 21146,  4939,  2009,  1996,  2160,  1012,   102]])


In [60]:
model(token_ids).shape

torch.Size([1, 10, 30522])

In [10]:
mytensor=model(token_ids).squeeze()

In [61]:
myembedding = mytensor.detach().cpu().numpy()

In [12]:
get_logits("I am a [MASK] man.").argmax(1)

array([1012,  100, 2572, 1037, 2204, 2158, 1012, 1012], dtype=int64)

In [13]:
full_vocab = {v:k for k, v in token_indexer.vocab.items()}

def indices_to_words(indices: Iterable[int]) -> List[str]:
    return [full_vocab[x] for x in indices]

In [14]:
indices_to_words(get_logits("he is very [MASK].").argmax(1))

['.', 'he', 'is', 'very', '[', '.', '.']

In [15]:
indices_to_words(get_logits("he is very [MASK].")[3])

KeyError: -11.363832

In [16]:
token_indexer.vocab['hay']

10974

In [17]:
indices_to_words(get_logits("he is [MASK].").argmax(1))

['.', 'he', 'is', '[', '.', '.']

In [18]:
indices_to_words(get_logits("she is [MASK].").argmax(1))

['.', 'she', 'is', '[', '.', '.']

In [19]:
indices_to_words(get_logits("she is very [MASK].").argmax(1))

['.', 'she', 'is', 'very', '[', '.', '.']

In [20]:
get_logits("I [MASK] there.").argmax(1)

array([1012,  100, 2003, 2045, 1012, 1012], dtype=int64)

The usual stuff

In [26]:
indices_to_words(get_logits("[MASK] is a programmer.").argmax(1))

['.', 'he', 'is', 'a', 'programmer', '.', '.']

In [27]:
indices_to_words(get_logits("he likes [MASK].").argmax(1))

['.', 'he', 'likes', '[', '.', '.']

In [23]:
indices_to_words(get_logits("[MASK] is a nurse.").argmax(1))

['.', 'she', 'is', 'a', 'nurse', '.', '.']

In [41]:
np.sort(get_logits("[MASK] is a programmer.")[1])[::-1]

0

Measuring difference

In [199]:
male_logits = get_logits("he is very [MASK].")[4, :]
female_logits = get_logits("she is very [MASK].")[4, :]

array([-7.7857866, -7.69565  , -7.7401123, ..., -6.6526732, -7.4256597,
       -3.9204094], dtype=float32)

In [198]:
np.sort(male_logits)[::-1]

array([ 15.263277,  11.297821,   9.394203, ..., -14.027637, -15.018905,
       -15.808717], dtype=float32)

In [19]:
def softmax(x, axis=0, eps=1e-9):
    e = np.exp(x)
    return e / (e.sum(axis, keepdims=True) + eps)

In [20]:
male_probs = softmax(male_logits)
female_probs = softmax(female_logits)

In [21]:
male_probs

array([1.13422584e-10, 1.09587665e-10, 8.14181014e-11, ...,
       4.68224848e-10, 5.64034479e-11, 2.31260189e-09], dtype=float32)

In [22]:
msk = ((male_probs >= 1e-6) & (female_probs >= 1e-6))
male_probs = male_probs[msk]
female_probs = female_probs[msk]

In [23]:
[(pos + 1, full_vocab[i]) for i, pos in enumerate((male_probs / female_probs).argsort()) if pos < 10]

[(3, '[unused5]'),
 (6, '[unused6]'),
 (7, '[unused9]'),
 (4, '[unused18]'),
 (1, '[unused32]'),
 (8, '[unused38]'),
 (9, '[unused47]'),
 (5, '[unused51]'),
 (2, '[unused54]'),
 (10, '[unused69]')]

In [24]:
[(pos + 1, full_vocab[i]) for i, pos in enumerate((female_probs / male_probs).argsort()) if pos < 10]

[(10, '[unused7]'),
 (2, '[unused22]'),
 (5, '[unused25]'),
 (9, '[unused29]'),
 (8, '[unused38]'),
 (1, '[unused44]'),
 (4, '[unused58]'),
 (7, '[unused67]'),
 (6, '[unused70]'),
 (3, '[unused71]')]

# Construct measure of bias

In [25]:
input_sentence = "[MASK] is intelligent"

In [26]:
def _get_mask_index(toks: Iterable[Token]) -> int:
    for i, t in enumerate(toks):
        if t.text == "[MASK]":
            return i + 1 # take the [CLS] token into account
    raise ValueError("No [MASK] token found")

In [27]:
def get_logits(input_sentence: str, n_calc: int=10) -> np.ndarray:
    """
    n_calc: Since the logits are non-deterministic, 
    computing the logits multiple times might be better
    """
    input_toks = tokenize(input_sentence)
    batch = token_indexer.tokens_to_indices(input_toks, vocab, "tokens")
    token_ids = torch.LongTensor(batch["tokens"]).unsqueeze(0)
    
    logits = None
    for _ in range(n_calc):
        with torch.no_grad():
            out_logits = model(token_ids).squeeze(0)
        if logits is None: logits = np.zeros(out_logits.shape)
        logits += out_logits.detach().cpu().numpy()
    return logits / n_calc

In [28]:
def get_logit_scores(input_sentence: str, words: int) -> Dict[str, float]:
    out_logits = get_logits(input_sentence)
    input_toks = tokenize(input_sentence)
    i = _get_mask_index(input_toks)
    return {w: out_logits[i, token_indexer.vocab[w]] for w in words}

def get_log_odds(input_sentence: str, word1: str, word2: str) -> float:
    scores = get_logit_scores(input_sentence, (word1, word2))
    return scores[word1] - scores[word2]

In [29]:
get_logit_scores("[MASK] is intelligent.", ["she", "he"])

{'she': 9.103129959106445, 'he': 9.641182708740235}

In [30]:
get_log_odds("[MASK] is intelligent.", "she", "he")

-0.5490983009338368

Surprisingly, marriage is more strongly associated with he than she

In [31]:
get_log_odds("[MASK] is married.", "she", "he")

-1.9561370372772213

In [32]:
get_log_odds("[MASK] is alive.", "she", "he")

-0.3699408054351796

In [33]:
get_log_odds("[MASK] is a person.", "she", "he")

-0.17259473800659286

In [34]:
get_log_odds("[MASK] is a doctor.", "she", "he")

-0.8532533645629883

In [35]:
get_log_odds("[MASK] is my mother.", "she", "he")

4.602996683120728

In [36]:
get_log_odds("[MASK] is my father.", "she", "he")

-3.791827392578125

This is strange...

In [37]:
get_log_odds("[MASK] is female.", "she", "he")

-1.9437612771987904

In [38]:
get_log_odds("[MASK] is ugly.", "she", "he")

-0.5187945365905762

This is strange too...

In [39]:
get_log_odds("[MASK] is male.", "she", "he")

0.39930248260498047

In [40]:
get_log_odds("[MASK] is a housewife", "she", "he")

1.9183058738708505

In [41]:
get_log_odds("[MASK] is a girl", "she", "he")

1.2518535137176512

In [2]:
from transformers import BertTokenizer, BertForMaskedLM
import torch

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForMaskedLM.from_pretrained('bert-base-uncased')

input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0)  # Batch size 1
outputs = model(input_ids, masked_lm_labels=input_ids)

loss, prediction_scores = outputs[:2]

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=231508.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=433.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=440473133.0, style=ProgressStyle(descri…




In [3]:
loss

tensor(6.6588, grad_fn=<NllLossBackward>)

In [4]:
prediction_scores

tensor([[[ -7.8962,  -7.8105,  -7.7903,  ...,  -7.0694,  -7.1693,  -4.3590],
         [ -8.4461,  -8.4401,  -8.5044,  ...,  -8.0625,  -7.9909,  -5.7160],
         [-15.2953, -15.4727, -15.5865,  ..., -12.9857, -11.7038, -11.4293],
         ...,
         [-14.0628, -14.2535, -14.3645,  ..., -12.7151, -11.1621, -10.2317],
         [-10.6576, -10.7892, -11.0402,  ..., -10.3233, -10.1578,  -3.7721],
         [-11.3383, -11.4590, -11.1767,  ...,  -9.2152,  -9.5209,  -9.5571]]],
       grad_fn=<AddBackward0>)