<a href="https://colab.research.google.com/github/dr-irani/cs682-final-project/blob/master/notebooks/logprob_scores.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [34]:
!pip install pytorch_pretrained_bert
from pytorch_pretrained_bert import BertTokenizer, BertForMaskedLM, BertModel
import torch
import pandas as pd
import numpy as np
from collections import defaultdict
from typing import Iterable, Dict, List, TypeVar

BERT_MODELS="./bert_models/"



In [35]:
!pip install allennlp
from allennlp.common.util import get_spacy_model
from spacy.attrs import ORTH
from spacy.tokenizer import Tokenizer
from allennlp.data.tokenizers.word_splitter import SpacyWordSplitter
from allennlp.data.token_indexers import PretrainedBertIndexer
from allennlp.data.tokenizers import Token
from allennlp.data import Vocabulary

T = TypeVar('T')
nlp = get_spacy_model("en_core_web_sm", pos_tags=False, parse=True, ner=False)
nlp.tokenizer.add_special_case("[MASK]", [{ORTH: "[MASK]"}])



In [3]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
class Config(dict):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        for k, v in kwargs.items():
            setattr(self, k, v)

    def set(self, key, val):
        self[key] = val
        setattr(self, key, val)

class BertPreprocessor:
    def __init__(self, model_type: str, max_seq_len: int=128):
        self.model_type = model_type
        self.max_seq_len = max_seq_len
        self.token_indexer = PretrainedBertIndexer(
            pretrained_model=self.model_type,
            max_pieces=self.max_seq_len,
            do_lowercase=True,
        )
        self.vocab = Vocabulary()
        self.token_indexer._add_encoding_to_vocabulary(self.vocab)
        self.full_vocab = {v:k for k, v in self.token_indexer.vocab.items()}

    def tokenize(self, x: str) -> List[Token]:
        return [Token(w) for w in flatten([
                self.token_indexer.wordpiece_tokenizer(w)
                for w in spacy_tok(x)]
        )[:self.max_seq_len]]

    def index_to_token(self, idx: int) -> str:
        return self.full_vocab[idx]

    def indices_to_tokens(self, indices: Iterable[int]) -> List[str]:
        return [self.index_to_word(x) for x in indices]

    def token_to_index(self, token: str,
                      accept_wordpiece: bool=False,
                      ) -> int:
        wordpieces = self.tokenize(token)
        if len(wordpieces) > 1 and not accept_wordpiece:
            raise TokenizationError(f"{token} is not a single wordpiece")
        else: token = wordpieces[0].text
        return self.token_indexer.vocab[token]

    def get_index(self, sentence: str,
                  word: str,
                  accept_wordpiece: bool=False,
                  last: bool=False) -> int:
        toks = self.tokenize(sentence)
        wordpieces = self.tokenize(word)
        if len(wordpieces) > 1 and not accept_wordpiece:
            raise TokenizationError(f"{word} is not a single wordpiece")
        else: word = wordpieces[0].text # use first wordpiece

        if not last:
            for i, t in enumerate(toks):
                if t.text == word:
                    return i + 1 # take the [CLS] token into account
        else:
            for i, t in enumerate(reversed(toks)):
                if t.text == word:
                    return len(toks) - 1 - i
        raise ValueError(f"No {word} tokenn tokens {toks} found")

    def to_bert_model_input(self, input_sentence: str) -> np.ndarray:
        input_toks = self.tokenize(input_sentence)
        batch = self.token_indexer.tokens_to_indices(input_toks, self.vocab, "tokens")
        token_ids = torch.LongTensor(batch["tokens"]).unsqueeze(0)
        return token_ids

def flatten(x: List[List[T]]) -> List[T]:
    return [item for sublist in x for item in sublist]

def spacy_tok(s: str) -> List[str]:
    return [w.text for w in nlp(s)]

config = Config(
    model_type="bert-base-uncased",
    max_seq_len=128,
)

processor = BertPreprocessor(config.model_type, config.max_seq_len)

In [49]:
model = BertForMaskedLM.from_pretrained(config.model_type)
model.eval()

BertForMaskedLM(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): BertLayerNorm()
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): BertLayerNorm()
              (dropout): Dropout(p=0.1, inplace=False)
            )
   

In [0]:
df = pd.read_csv('/content/drive/My Drive/Final Projects/Deep Learning/Equity-Evaluation-Corpus.csv')
np.unique(df[df['Emotion word'].notna()]['Emotion word'])

names = df.loc[df.Emotion.isin(['fear', 'joy']), ['Person', 'Race']]
european_names = df.loc[df.Race == 'European', 'Person'].drop_duplicates()
african_american_names = df.loc[df.Race == 'African-American', 'Person'].drop_duplicates()

emotions = df.loc[df.Emotion.isin(['fear', 'joy']), :]
emotions = df.loc[df.Template.str.contains('<emotion word>'), 'Emotion word'].drop_duplicates()
templates = df.loc[df.Emotion.isin(['fear', 'joy']), 'Template'].drop_duplicates()
templates = [template for template in templates if '<person subject>' in template and '<emotion word>' in template]
templates = [template.replace('<person subject>', '[MASK]') for template in templates]

df = df.loc[df.Emotion.isin(['fear', 'joy']), :].drop_duplicates()
df = df.loc[df.Person.isin(names.Person), :]

In [0]:
white_names = pd.read_csv('/content/drive/My Drive/Final Projects/Deep Learning/White-Male-Names.csv').rename(columns={' first name': 'first_name'})
white_names = np.unique(list(white_names.apply(lambda x: x.first_name.split()[0], axis=1)))[:3000]
black_names = pd.read_csv('/content/drive/My Drive/Final Projects/Deep Learning/Black-Male-Names.csv').rename(columns={'first name': 'first_name'})
black_names = np.unique(list(black_names.apply(lambda x: x.first_name.split()[0], axis=1)))[:3000]

In [0]:
def get_logits(sentence: str) -> np.ndarray:
    return model(processor.to_bert_model_input(sentence))[0, :, :].cpu().detach().numpy()

def softmax(arr, axis=1):
    e = np.exp(arr)
    return e / e.sum(axis=axis, keepdims=True)

def get_mask_fill_logits(sentence: str, words: Iterable[str],
                         use_last_mask=False, apply_softmax=False) -> Dict[str, float]:
    mask_i = processor.get_index(sentence, "[MASK]", last=use_last_mask)
    logits = defaultdict(list)
    out_logits = get_logits(sentence)
    if apply_softmax: 
        out_logits = softmax(out_logits)
    return {w: out_logits[mask_i, processor.token_to_index(w)] for w in words}

Templates (step 1)

In [0]:
templates = ['[TARGET] is a [ATTRIBUTE]']
targets = ['Jamal', 'Harry']
attributes = ['lawyer', 'athlete']

Compute $P_{target}$=P([MASK]=[TARGET]|template) (step 2)

In [55]:
template = templates[0]
black_name, white_name = targets
attribute = attributes[0]

# prob of [MASK] being a name given attribute
subject_fill_logits = get_mask_fill_logits(
    template.replace('[ATTRIBUTE]', attribute).replace('[TARGET]', '[MASK]'),
    targets, False
)

subject_fill_bias = subject_fill_logits[white_name] - subject_fill_logits[black_name]
subject_fill_prior_logits = get_mask_fill_logits(
        template.replace("[TARGET]", "[MASK]").replace("[ATTRIBUTE]", "[MASK]"), 
        targets, use_last_mask=True,
)

subject_fill_bias_prior_correction = subject_fill_prior_logits[white_name] - subject_fill_prior_logits[black_name]

white_name_fill_prob = get_mask_fill_logits(
            template.replace("[TARGET]", white_name).replace("[ATTRIBUTE]", "[MASK]"), [attribute],
            apply_softmax=True,
)[attribute]

black_name_fill_prob = get_mask_fill_logits(
            template.replace("[TARGET]", black_name).replace("[ATTRIBUTE]", "[MASK]"), [attribute],
            apply_softmax=True,
)[attribute]

tgt_fill_bias = np.log(white_name_fill_prob / black_name_fill_prob)

{"race_fill_bias": subject_fill_bias,
 "race_fill_prior_correction": subject_fill_bias_prior_correction,
 "race_fill_bias_prior_corrected": subject_fill_bias - subject_fill_bias_prior_correction,
 "target_fill_bias": tgt_fill_bias, 
}

{'Jamal': -0.81307364, 'Harry': -0.81307364}


{'race_fill_bias': 0.0,
 'race_fill_bias_prior_corrected': 0.0,
 'race_fill_prior_correction': 0.0,
 'target_fill_bias': 0.0}