<a href="https://colab.research.google.com/github/deniskapel/2021_HACK_RSG/blob/main/blimp/blimp_elmo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Initialize

In [None]:
!pip install --upgrade simple_elmo

In [3]:
import json
import logging
import glob 

import numpy as np

import simple_elmo
import tensorflow as tf
from tensorflow import Graph

from torch.utils.data import Dataset, DataLoader
from scipy.special import softmax

from spacy.lang.en import English
from tqdm.notebook import tqdm

import matplotlib.pyplot as plt

In [None]:
%%capture
%%bash
git clone https://github.com/alexwarstadt/blimp

mkdir models
wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id=1px_asSvNUCe33oMk3Q9f2ZTguA46_NPr' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id=1px_asSvNUCe33oMk3Q9f2ZTguA46_NPr" -O models/enwiki30.zip && rm -rf /tmp/cookies.txt
unzip models/enwiki30.zip -d models

In [None]:
BATCH_SIZE = 1

In [None]:
elmo_model = simple_elmo.ElmoModel()
elmo_model.load('models/enwiki30', max_batch_size=BATCH_SIZE, full=True)

In [None]:
matrix_questions = [
               ("Had Bruce ever played ?", "Bruce had ever played ."), 
               ("Had Patrick ever answered ?", "Patrick had ever answered ."),
               ("Do some carts ever accelerate ?", "Some carts do ever accelerate ."), 
               ("Will Dawn ever know Jeffrey ?", "Dawn will ever know Jeffrey .")]

goods = [pair[0] for pair in matrix_questions]
bads = [pair[1] for pair in matrix_questions]
goods, bads

(['Had Bruce ever played ?',
  'Had Patrick ever answered ?',
  'Do some carts ever accelerate ?',
  'Will Dawn ever know Jeffrey ?'],
 ['Bruce had ever played .',
  'Patrick had ever answered .',
  'Some carts do ever accelerate .',
  'Dawn will ever know Jeffrey .'])

In [None]:
# warm up
tokenizer=English().tokenizer
goods = elmo_model.get_elmo_substitutes(goods, topn=30_003)
bads = elmo_model.get_elmo_substitutes(bads, topn=30_003)

2022-03-10 15:48:25,971 : INFO : Calculating language model predictions...
2022-03-10 15:48:26,755 : INFO : Calculating language model predictions...


In [None]:
def get_token_logp(token: dict, vocab_size=30_003) -> tuple:
    """ returns token logp from forward and backward lstm """
    vocab_forward = dict(zip(token['forward']['candidate_words'], softmax(token['forward']['logp'])))
    vocab_backward = dict(zip(token['backward']['candidate_words'], softmax(token['backward']['logp'])))
    forward_logp = vocab_forward.get(token['word'], vocab_forward['<UNK>'])
    backward_logp = vocab_backward.get(token['word'], vocab_backward['<UNK>'])
    
    return forward_logp, backward_logp

In [None]:
for right, wrong  in zip(goods, bads):
    for r, w in zip(right, wrong):
        print(get_token_logp(r), get_token_logp(w))
        print('\n')


In [None]:
def load(filename):
    pairs = []
    with open(filename) as f:
        for line in f.readlines():
            pairs.append(json.loads(line))
    return pairs

In [None]:
def get_ppl(sentence, direction='forward', vocab_size=30_003):
    
    err_message = "Direction must be either 'forward', 'backward' or 'bidirectional'"
    assert direction in ['forward', 'backward', 'bidirectional'], err_message
    log_p = [get_token_logp(token, vocab_size) for token in sentence]

    if direction == 'forward':
        log_p = [f for f, b in log_p]
    elif direction == 'backward':
        log_p = [b for f, b in log_p]
    else:
        log_p = [np.mean([f,b]) for f, b in log_p]

    ppl = np.sum(log_p)
    
    return ppl

In [None]:
def get_token_logp(token: dict, vocab_size=30_003) -> tuple:
    """ returns token logp from forward and backward lstm """
    vocab_forward = dict(zip(token['forward']['candidate_words'], softmax(token['forward']['logp'])))
    vocab_backward = dict(zip(token['backward']['candidate_words'], softmax(token['backward']['logp'])))
    forward_logp = vocab_forward.get(token['word'], vocab_forward['<UNK>'])
    backward_logp = vocab_backward.get(token['word'], vocab_backward['<UNK>'])

    return forward_logp, backward_logp

In [None]:
test_sentences = [
               ("Had Bruce ever played ?", "Bruce had ever played ."), 
               ("Had Patrick ever answered ?", "Patrick had ever answered ."),
               ("Do some carts ever accelerate ?", "Some carts do ever accelerate ."), 
               ("Will Dawn ever know Jeffrey ?", "Dawn will ever know Jeffrey .")]

print('forward')
for shorter, longer in test_sentences:
    g = get_ppl(elmo_model.get_elmo_substitutes([shorter.lower()], topn=30_003)[0])
    b = get_ppl(elmo_model.get_elmo_substitutes([longer.lower()], topn=30_003)[0])
    print(g,b, g>b, g-b)
    print('\n')

print('bidirectional')
for shorter, longer in test_sentences:
    g = get_ppl(elmo_model.get_elmo_substitutes([shorter.lower()], topn=30_003)[0], 'bidirectional')
    b = get_ppl(elmo_model.get_elmo_substitutes([longer.lower()], topn=30_003)[0], 'bidirectional')
    print(g,b, g>b, g-b)
    print('\n')

2022-03-10 18:10:45,437 : INFO : Calculating language model predictions...


forward


2022-03-10 18:10:45,721 : INFO : Calculating language model predictions...
2022-03-10 18:10:46,028 : INFO : Calculating language model predictions...


0.16437553350921608 0.1195200082790187 True 0.04485552523019738




2022-03-10 18:10:46,320 : INFO : Calculating language model predictions...
2022-03-10 18:10:46,601 : INFO : Calculating language model predictions...


0.16428846108889222 0.12262685037309673 True 0.041661610715795494




2022-03-10 18:10:46,936 : INFO : Calculating language model predictions...
2022-03-10 18:10:47,276 : INFO : Calculating language model predictions...


0.09545830796698825 0.1066859040724685 False -0.011227596105480256




2022-03-10 18:10:47,613 : INFO : Calculating language model predictions...
2022-03-10 18:10:47,950 : INFO : Calculating language model predictions...


0.029927070367573205 0.04392757967435819 False -0.014000509306784984


bidirectional


2022-03-10 18:10:48,244 : INFO : Calculating language model predictions...
2022-03-10 18:10:48,525 : INFO : Calculating language model predictions...


0.15470646383184627 0.45075030763439544 False -0.29604384380254917




2022-03-10 18:10:48,827 : INFO : Calculating language model predictions...
2022-03-10 18:10:49,120 : INFO : Calculating language model predictions...


0.15491644303892807 0.45253118880933063 False -0.2976147457704026




2022-03-10 18:10:49,480 : INFO : Calculating language model predictions...
2022-03-10 18:10:49,800 : INFO : Calculating language model predictions...


0.12131106700078226 0.5812269781733692 False -0.45991591117258696




2022-03-10 18:10:50,173 : INFO : Calculating language model predictions...


0.16876912439629832 0.7938641885822277 False -0.6250950641859294




## test if difference in a number of words matters

In [None]:
import re

test_sentences = [
               ("Had Bruce ever played ?", "Bruce had ever played ."), 
               ("Had Patrick ever answered ?", "Patrick had ever answered ."),
               ("Do some carts ever accelerate ?", "Some carts do ever accelerate ."), 
               ("Will Dawn ever know Jeffrey ?", "Dawn will ever know Jeffrey .")]

print('forward')
for shorter, longer in test_sentences:
    g = get_ppl(elmo_model.get_elmo_substitutes([re.sub(r'[^a-zA-Z\s]', "", shorter.lower())], topn=30_003)[0])
    b = get_ppl(elmo_model.get_elmo_substitutes([re.sub(r'[^a-zA-Z\s]', "", longer.lower())], topn=30_003)[0])
    print(g,b, g>b, g-b)
    print('\n')

print('bidirectional')
for shorter, longer in test_sentences:
    g = get_ppl(elmo_model.get_elmo_substitutes([re.sub(r'[^a-zA-Z\s]', "", shorter.lower())], topn=30_003)[0], 'bidirectional')
    b = get_ppl(elmo_model.get_elmo_substitutes([re.sub(r'[^a-zA-Z\s]', "", longer.lower())], topn=30_003)[0], 'bidirectional')
    print(g,b, g>b, g-b)
    print('\n')

2022-03-10 18:10:50,551 : INFO : Calculating language model predictions...


forward


2022-03-10 18:10:50,791 : INFO : Calculating language model predictions...
2022-03-10 18:10:51,010 : INFO : Calculating language model predictions...


0.16437536290911647 0.11866272626140305 True 0.045712636647713425




2022-03-10 18:10:51,252 : INFO : Calculating language model predictions...
2022-03-10 18:10:51,497 : INFO : Calculating language model predictions...


0.164223353806299 0.1186172918880896 True 0.04560606191820939




2022-03-10 18:10:51,780 : INFO : Calculating language model predictions...
2022-03-10 18:10:52,048 : INFO : Calculating language model predictions...


0.0954365405615123 0.09530133998696193 True 0.0001352005745503676




2022-03-10 18:10:52,327 : INFO : Calculating language model predictions...
2022-03-10 18:10:52,621 : INFO : Calculating language model predictions...


0.029887177768082793 0.030133554344813438 False -0.0002463765767306454


bidirectional


2022-03-10 18:10:52,854 : INFO : Calculating language model predictions...
2022-03-10 18:10:53,089 : INFO : Calculating language model predictions...


0.1534296774707198 0.1068706055678993 True 0.046559071902820506




2022-03-10 18:10:53,312 : INFO : Calculating language model predictions...
2022-03-10 18:10:53,551 : INFO : Calculating language model predictions...


0.15360889542608147 0.10707430151956245 True 0.046534593906519026




2022-03-10 18:10:53,825 : INFO : Calculating language model predictions...
2022-03-10 18:10:54,106 : INFO : Calculating language model predictions...


0.12002962359740861 0.23208253914882487 False -0.11205291555141626




2022-03-10 18:10:54,387 : INFO : Calculating language model predictions...


0.04716232892395271 0.04730898949940333 False -0.00014666057545061756




In [None]:
test_sentences = [
                  ('I like football .', 'I likes football .'),
                  ('This is a terrible mistake .', 'Is this a terrible mistake .'),
                  ('An island is an isolated piece of habitat that is surrounded by a dramatically different habitat, such as water.', 
                   'An island are an isolated piece of habitat that is surrounded by a dramatically different habitat, such as water.')
]

print('forward')
for shorter, longer in test_sentences:
    g = get_ppl(elmo_model.get_elmo_substitutes([shorter], topn=30_003)[0])
    b = get_ppl(elmo_model.get_elmo_substitutes([longer], topn=30_003)[0])
    print(g,b, g>b)
    print('\n')

print('bidirectional')
for shorter, longer in test_sentences:
    g = get_ppl(elmo_model.get_elmo_substitutes([shorter], topn=30_003)[0], 'bidirectional')
    b = get_ppl(elmo_model.get_elmo_substitutes([longer], topn=30_003)[0], 'bidirectional')
    print(g,b, g>b)
    print('\n')


2022-03-10 18:10:54,700 : INFO : Calculating language model predictions...


forward


2022-03-10 18:10:54,942 : INFO : Calculating language model predictions...
2022-03-10 18:10:55,165 : INFO : Calculating language model predictions...


0.01872809877039581 0.018797948863132317 False




2022-03-10 18:10:55,519 : INFO : Calculating language model predictions...
2022-03-10 18:10:55,837 : INFO : Calculating language model predictions...


0.5442768717844441 0.2129859411493574 True




2022-03-10 18:10:56,830 : INFO : Calculating language model predictions...
2022-03-10 18:10:57,764 : INFO : Calculating language model predictions...


1.8315671717644126 1.8089452909609534 True


bidirectional


2022-03-10 18:10:58,034 : INFO : Calculating language model predictions...
2022-03-10 18:10:58,252 : INFO : Calculating language model predictions...


0.353788376937596 0.3531010093399832 True




2022-03-10 18:10:58,605 : INFO : Calculating language model predictions...
2022-03-10 18:10:58,920 : INFO : Calculating language model predictions...


0.791774241260399 0.6023794926238621 True




2022-03-10 18:10:59,899 : INFO : Calculating language model predictions...


1.2995529715476661 1.267862116147716 True




In [None]:
import re
test_sentences = [("A lady has remembered who the actors conceal .", 
                   "A lady has remembered that the actors conceal ."),
                  ("Teenagers know what all ladies haven't examined .", 
                   "Teenagers know that all ladies haven't examined ."), 
                  ("Dancers hadn't figured out who the actress didn't reveal .", 
                   "Dancers hadn't figured out that the actress didn't reveal .")]

print('forward')
for shorter, longer in test_sentences:
    g = get_ppl(elmo_model.get_elmo_substitutes([shorter], topn=30_003)[0])
    b = get_ppl(elmo_model.get_elmo_substitutes([longer], topn=30_003)[0])
    print(g,b, g>b, g-b)
    print('\n')

print('bidirectional')
for shorter, longer in test_sentences:
    g = get_ppl(elmo_model.get_elmo_substitutes([shorter], topn=30_003)[0], 'bidirectional')
    b = get_ppl(elmo_model.get_elmo_substitutes([longer], topn=30_003)[0], 'bidirectional')
    print(g,b, g>b, g-b)
    print('\n')

2022-03-10 18:11:00,881 : INFO : Calculating language model predictions...


forward


2022-03-10 18:11:01,354 : INFO : Calculating language model predictions...
2022-03-10 18:11:01,835 : INFO : Calculating language model predictions...


0.017549096011822852 0.08018740551680818 False -0.06263830950498533




2022-03-10 18:11:02,274 : INFO : Calculating language model predictions...
2022-03-10 18:11:02,709 : INFO : Calculating language model predictions...


0.2061682052595249 0.20497023917624319 True 0.0011979660832817096




2022-03-10 18:11:03,231 : INFO : Calculating language model predictions...
2022-03-10 18:11:03,770 : INFO : Calculating language model predictions...


0.4764760406224498 0.5355690402469858 False -0.05909299962453601


bidirectional


2022-03-10 18:11:04,247 : INFO : Calculating language model predictions...
2022-03-10 18:11:04,741 : INFO : Calculating language model predictions...


0.45708886903124013 0.4889540304607064 False -0.03186516142946627




2022-03-10 18:11:05,162 : INFO : Calculating language model predictions...
2022-03-10 18:11:05,579 : INFO : Calculating language model predictions...


0.5304380817457375 0.5193736179559064 True 0.011064463789831058




2022-03-10 18:11:06,129 : INFO : Calculating language model predictions...


1.130657409387294 1.160599370688191 False -0.02994196130089688




In [None]:
def run(model, dataloader, direction):
    correct = 0
    
    vocab_size = 30_003

    for good, bad in tqdm(dataloader):
        good = model.get_elmo_substitutes(good, topn=vocab_size)
        bad = model.get_elmo_substitutes(bad, topn=vocab_size)
        
        for good_sent, bad_sent in zip(good, bad):
        
            good_ppl = get_ppl(good_sent, direction, vocab_size=vocab_size)
            bad_ppl = get_ppl(bad_sent, direction, vocab_size=vocab_size)

            if good_ppl > bad_ppl:
                correct += 1

    return correct / len(dataloader.dataset)

In [None]:
class Blimp:
    def __init__(self):
        self.phenomena = {}
    
    def add_result(self, phenomenon, uid, accuracy):
        if phenomenon not in self.phenomena:
            self.phenomena[phenomenon] = {}
        self.phenomena[phenomenon][uid] = accuracy
    
    def __str__(self):
        def iterator():
            for phenomenon_key in sorted(self.phenomena.keys()):
                phenomenon = self.phenomena[phenomenon_key]
                for uid_key in sorted(phenomenon.keys()):
                    yield f"{phenomenon_key},{uid_key},{phenomenon[uid_key]}"
        return '\n'.join(iterator())

In [None]:
class BlimpDataset(Dataset):
    
    """ customized Dataset class from torch """
    
    def __init__(self, data: list, tokenizer):
        self.data = data
        self.tokenizer = tokenizer
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        """ shape each sample into a proper """
        pair = self.data[index]
        good = " ".join([token.text for token in self.tokenizer(pair["sentence_good"])])
        bad = " ".join([token.text for token in self.tokenizer(pair["sentence_bad"])])
        
        return good, bad

def collate_fn(batch) -> tuple:
    goods, bads = list(), list()
    
    for good, bad in batch:
        goods.append(good)
        bads.append(bad)

    return goods, bads

## BLiMP on Elmo's forward only

In [None]:
logger = logging.getLogger()
logger.setLevel(logging.CRITICAL)

blimp = Blimp()
tokenizer = English().tokenizer

for dataset in tqdm(glob.glob(f"blimp/data/*.jsonl")[0:3]):
    dataset = load(dataset)
    loader = DataLoader(
        BlimpDataset(dataset, tokenizer),
        batch_size=BATCH_SIZE, shuffle=False, 
        collate_fn=collate_fn)
    
    accuracy = run(elmo_model, loader, direction='forward')
    blimp.add_result(dataset[0]["linguistics_term"], dataset[0]["UID"], accuracy)

print(blimp)

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

argument_structure,passive_1,0.067
control_raising,expletive_it_object_raising,0.045
subject_verb_agreement,irregular_plural_subject_verb_agreement_1,0.142


## BLiMP on Elmo's both directions averaged





In [None]:
logger = logging.getLogger()
logger.setLevel(logging.CRITICAL)

blimp = Blimp()
tokenizer = English().tokenizer

for dataset in tqdm(glob.glob(f"blimp/data/*.jsonl")[0:3]):
    dataset = load(dataset)
    loader = DataLoader(
        BlimpDataset(dataset, tokenizer),
        batch_size=BATCH_SIZE, shuffle=False, 
        collate_fn=collate_fn)
    
    accuracy = run(elmo_model, loader, direction='bidirectional')
    blimp.add_result(dataset[0]["linguistics_term"], dataset[0]["UID"], accuracy)

print(blimp)

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

argument_structure,passive_1,0.268
control_raising,expletive_it_object_raising,0.289
subject_verb_agreement,irregular_plural_subject_verb_agreement_1,0.48




## WSD_eval


In [None]:
%%bash
wget https://raw.githubusercontent.com/deniskapel/2021_HACK_RSG/main/blimp/wsd_eval.py
wget https://rusvectores.org/static/testsets/senseval3.tsv

In [None]:
%%bash
mkdir models
wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id=17EorqgpFj-d77JyJFWB9hLr-La4Pcn_g' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id=17EorqgpFj-d77JyJFWB9hLr-La4Pcn_g" -O models/BNC.zip && rm -rf /tmp/cookies.txt
unzip models/BNC.zip -d models

In [None]:
%%bash
python wsd_eval.py -i senseval3.tsv -e models/BNC/8/