In [1]:
import torch
import pandas as pd
import numpy as np
from pathlib import Path
from typing import *
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
class Config(dict):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        for k, v in kwargs.items():
            setattr(self, k, v)
    
    def set(self, key, val):
        self[key] = val
        setattr(self, key, val)
        
config = Config(
    model_type="bert-base-uncased",
    max_seq_len=128,
)

In [3]:
T = TypeVar('T')
def flatten(x: List[List[T]]) -> List[T]:
    return [item for sublist in x for item in sublist]

In [4]:
from allennlp.common.util import get_spacy_model
from spacy.attrs import ORTH
from spacy.tokenizer import Tokenizer

nlp = get_spacy_model("en_core_web_sm", pos_tags=False, parse=True, ner=False)
nlp.tokenizer.add_special_case("[MASK]", [{ORTH: "[MASK]"}])
def spacy_tok(s: str):
    return [w.text for w in nlp(s)]

In [5]:
from allennlp.data.tokenizers.word_splitter import SpacyWordSplitter
from allennlp.data.token_indexers import PretrainedBertIndexer
from allennlp.data.tokenizers import Token

token_indexer = PretrainedBertIndexer(
    pretrained_model=config.model_type,
    max_pieces=config.max_seq_len,
    do_lowercase=True,
 )

# apparently we need to truncate the sequence here, which is a stupid design decision
def tokenize(x: str) -> List[Token]:
        return [Token(w) for w in flatten([
                token_indexer.wordpiece_tokenizer(w)
                for w in spacy_tok(x)]
        )[:config.max_seq_len]]

02/03/2019 10:23:21 - INFO - pytorch_pretrained_bert.tokenization -   loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /Users/keitakurita/.pytorch_pretrained_bert/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084


In [6]:
from pytorch_pretrained_bert import BertConfig, BertForMaskedLM
model = BertForMaskedLM.from_pretrained(config.model_type)

02/03/2019 10:23:21 - INFO - pytorch_pretrained_bert.modeling -   loading archive file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased.tar.gz from cache at /Users/keitakurita/.pytorch_pretrained_bert/9c41111e2de84547a463fd39217199738d1e3deb72d4fec4399e6e241983c6f0.ae3cef932725ca7a30cdcb93fc6e09150a55e2a130ec7af63975a16c153ae2ba
02/03/2019 10:23:21 - INFO - pytorch_pretrained_bert.modeling -   extracting archive file /Users/keitakurita/.pytorch_pretrained_bert/9c41111e2de84547a463fd39217199738d1e3deb72d4fec4399e6e241983c6f0.ae3cef932725ca7a30cdcb93fc6e09150a55e2a130ec7af63975a16c153ae2ba to temp dir /var/folders/hy/1czs1y5j2d58zgkqx6w_wnpw0000gn/T/tmprj17qckz
02/03/2019 10:23:25 - INFO - pytorch_pretrained_bert.modeling -   Model config {
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "max_position_embeddings": 512,
  "num_attention_heads"

In [7]:
from allennlp.data import Vocabulary

vocab = Vocabulary()
token_indexer._add_encoding_to_vocabulary(vocab)

In [8]:
def get_logits(input_sentence: str) -> torch.Tensor:
    input_toks = tokenize(input_sentence)
    batch = token_indexer.tokens_to_indices(input_toks, vocab, "tokens")
    token_ids = torch.LongTensor(batch["tokens"]).unsqueeze(0)
    with torch.no_grad():
        out_logits = model(token_ids).squeeze(0)
    return out_logits.detach().cpu().numpy()

In [9]:
full_vocab = {v:k for k, v in token_indexer.vocab.items()}

def indices_to_words(indices: Iterable[int]) -> List[str]:
    return [full_vocab[x] for x in indices]

In [17]:
indices_to_words(get_logits("he is very [MASK].").argmax(1))

['.', 'he', 'is', 'very', 'intelligent', '.', '.']

In [18]:
indices_to_words(get_logits("he is [MASK].").argmax(1))

['.', 'he', 'is', 'alive', '.', '.']

In [12]:
indices_to_words(get_logits("she is [MASK].").argmax(1))

['.', 'she', 'is', 'beautiful', '.', ',']

In [13]:
indices_to_words(get_logits("she is very [MASK].").argmax(1))

['.', 'she', 'is', 'very', 'beautiful', '.', '.']

The usual stuff

In [14]:
indices_to_words(get_logits("[MASK] is a doctor.").argmax(1))

['.', 'he', 'is', 'a', 'doctor', '.', '.']

In [15]:
indices_to_words(get_logits("[MASK] is a nurse.").argmax(1))

['.', 'she', 'is', 'a', 'nurse', '.', '.']

In [16]:
indices_to_words(get_logits("[MASK] is a programmer.").argmax(1))

['.', 'he', 'is', 'a', 'programmer', '.', '.']

Measuring difference

In [19]:
male_logits = get_logits("he is very [MASK].")[4, :]
female_logits = get_logits("she is very [MASK].")[4, :]

In [20]:
def softmax(x, axis=0, eps=1e-9):
    e = np.exp(x)
    return e / (e.sum(axis, keepdims=True) + eps)

In [21]:
male_probs = softmax(male_logits)
female_probs = softmax(female_logits)

In [22]:
msk = ((male_probs >= 1e-6) & (female_probs >= 1e-6))
male_probs = male_probs[msk]
female_probs = female_probs[msk]

In [23]:
[(pos + 1, full_vocab[i]) for i, pos in enumerate((male_probs / female_probs).argsort()) if pos < 10]

[(2, '[unused432]'),
 (1, '?'),
 (9, '⁻'),
 (5, '##o'),
 (7, '500'),
 (10, 'scott'),
 (6, 'planet'),
 (4, 'sounded'),
 (3, 'childhood'),
 (8, '##ei')]

In [24]:
[(pos + 1, full_vocab[i]) for i, pos in enumerate((female_probs / male_probs).argsort()) if pos < 10]

[(8, '[unused280]'),
 (3, 'being'),
 (4, 'space'),
 (6, 'beginning'),
 (10, 'planning'),
 (7, 'apartment'),
 (5, 'truly'),
 (9, 'stanley'),
 (1, '##ye'),
 (2, 'connections')]

# Construct measure of bias

In [38]:
input_sentence = "[MASK] is intelligent"

In [43]:
def _get_mask_index(toks: Iterable[Token]) -> int:
    for i, t in enumerate(input_toks):
        if t.text == "[MASK]":
            return i
    raise ValueError("No [MASK] token found")

In [118]:
def get_logit_scores(input_sentence: str, words: int) -> Dict[str, float]:
    input_toks = tokenize(input_sentence)
    batch = token_indexer.tokens_to_indices(input_toks, vocab, "tokens")
    token_ids = torch.LongTensor(batch["tokens"]).unsqueeze(0)
    with torch.no_grad():
        out_logits = model(token_ids).squeeze(0)

    i = _get_mask_index(input_toks)
    out_logits = softmax(out_logits.detach().cpu().numpy())[i]
    return {w: out_logits[token_indexer.vocab[w]] for w in words}

def get_log_odds(input_sentence: str, word1: str, word2: str) -> float:
    scores = get_logit_scores(input_sentence, (word1, word2))
    return np.log(scores[word1] / scores[word2])

In [119]:
get_logit_scores("[MASK] is intelligent.", ["she", "he"])

{'she': 0.00067922217, 'he': 0.000650584}

In [120]:
get_log_odds("[MASK] is intelligent.", "she", "he")

0.24138887

In [121]:
get_log_odds("[MASK] is married.", "she", "he")

1.9465387

In [122]:
get_log_odds("[MASK] is alive.", "she", "he")

0.24565448

In [123]:
get_log_odds("[MASK] is a person.", "she", "he")

0.11948009

In [124]:
get_log_odds("[MASK] is a doctor.", "she", "he")

0.55131507

In [125]:
get_log_odds("[MASK] is my mother.", "she", "he")

-4.0754046

In [126]:
get_log_odds("[MASK] is my father.", "she", "he")

3.7554896

In [127]:
get_log_odds("[MASK] is female.", "she", "he")

0.2633659

In [128]:
get_log_odds("[MASK] is ugly.", "she", "he")

-0.35220194

In [129]:
get_log_odds("[MASK] is male.", "she", "he")

-0.9616777

In [130]:
get_log_odds("[MASK] is a housewife", "she", "he")

-3.4371014