<a href="https://colab.research.google.com/github/deniskapel/2021_HACK_RSG/blob/main/blimp/blimp_elmo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Initialize

In [1]:
!pip install --upgrade simple_elmo

Collecting simple_elmo
  Downloading simple_elmo-0.9.0-py3-none-any.whl (46 kB)
[?25l[K     |███████                         | 10 kB 21.8 MB/s eta 0:00:01[K     |██████████████▏                 | 20 kB 14.4 MB/s eta 0:00:01[K     |█████████████████████▎          | 30 kB 10.1 MB/s eta 0:00:01[K     |████████████████████████████▍   | 40 kB 9.1 MB/s eta 0:00:01[K     |████████████████████████████████| 46 kB 3.0 MB/s 
Installing collected packages: simple-elmo
Successfully installed simple-elmo-0.9.0


In [2]:
import json
import logging
import glob 

import numpy as np

import simple_elmo
import tensorflow as tf
from tensorflow import Graph

from torch.utils.data import Dataset, DataLoader

from spacy.lang.en import English
from tqdm.notebook import tqdm

import matplotlib.pyplot as plt

In [3]:
%%capture
%%bash
git clone https://github.com/alexwarstadt/blimp

mkdir models
wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id=1px_asSvNUCe33oMk3Q9f2ZTguA46_NPr' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id=1px_asSvNUCe33oMk3Q9f2ZTguA46_NPr" -O models/enwiki30.zip && rm -rf /tmp/cookies.txt
unzip models/enwiki30.zip -d models

In [4]:
BATCH_SIZE = 1

In [5]:
elmo_model = simple_elmo.ElmoModel()
elmo_model.load('models/enwiki30', max_batch_size=BATCH_SIZE, full=True)

2022-02-06 10:51:12,158 : INFO : Loading model from models/enwiki30...
2022-02-06 10:51:12,164 : INFO : Loading from models/enwiki30/model.ckpt-450000...
2022-02-06 10:51:12,170 : INFO : Invalid number of characters in the options.json file: 262.
2022-02-06 10:51:12,174 : INFO : Setting it to 261 for using the model as LM
2022-02-06 10:51:12,223 : INFO : We will cache the vocabulary of 30003 tokens.
USING SKIP CONNECTIONS
  proj_clip=proj_clip,
  partitioner=maybe_partitioner)
  initializer=initializer)
  partitioner=maybe_proj_partitioner)


'The model is now loaded.'

In [7]:
# warm up
tokenizer=English().tokenizer

sents = ['It was produced by Molecular Films and Wynne / Pike Productions .',
         'It were produced by Molecular Films and Wynne / Pike Productions .']
sents = [" ".join([token.text for token in tokenizer(s)]) for s in sents]
test_subs = elmo_model.get_elmo_substitutes(sents, topn=30_003)

2022-02-06 10:51:31,285 : INFO : Calculating language model predictions...


In [8]:
def get_token_logp(token: dict, vocab_size=30_003) -> tuple:
    """ returns token logp from forward and backward lstm """
    vocab_forward = dict(zip(token['forward']['candidate_words'], token['forward']['logp']))
    vocab_backward = dict(zip(token['backward']['candidate_words'], token['backward']['logp']))
    forward_logp = vocab_forward.get(token['word'], vocab_forward['<UNK>'])
    backward_logp = vocab_backward.get(token['word'], vocab_backward['<UNK>'])
    
    return forward_logp, backward_logp

In [9]:
for token in test_subs[0]:
    print(get_token_logp(token))

del test_subs

(9.0443, 3.0867)
(12.6795, 10.5953)
(6.2388, 3.638)
(13.7775, 7.5204)
(-2.7566, 2.0431)
(3.1803, 2.2528)
(11.2575, 9.2415)
(1.4924, -0.1546)
(8.0831, 4.3826)
(0.8896, 2.846)
(5.6171, 3.8793)
(12.7655, 13.2287)


In [None]:
def load(filename):
    pairs = []
    with open(filename) as f:
        for line in f.readlines():
            pairs.append(json.loads(line))
    return pairs

In [10]:
def get_ppl(sentence, direction='forward', vocab_size=30_003):
    
    err_message = "Direction must be either 'forward', 'backward' or 'bidirectional'"
    assert direction in ['forward', 'backward', 'bidirectional'], err_message
    log_p = [get_token_logp(token, vocab_size) for token in sentence]

    if direction == 'forward':
        log_p = [f for f, b in log_p]
    elif direction == 'backward':
        log_p = [b for f, b in log_p]
    else:
        log_p = [np.mean([f,b]) for f, b in log_p]

    ppl = np.sum(log_p)
    
    return ppl

## test if difference in a number of words matters

In [20]:
test_sentences = [
                  ('I like football .', 'I green like football .'),
                  ('This is a terrible mistake .', 'green This is a terrible mistake .'),
                  ('An island is an isolated piece of habitat that is surrounded by a dramatically different habitat, such as water.', 
                   'An island is an isolated piece of habitat ideas that is surrounded by a dramatically different habitat, such as water.')
]

print('forward')
for shorter, longer in test_sentences:
    print(get_ppl(elmo_model.get_elmo_substitutes([shorter], topn=30_003)[0]),
          get_ppl(elmo_model.get_elmo_substitutes([longer], topn=30_003)[0]))

print('bidirectional')
for shorter, longer in test_sentences:
    print(get_ppl(elmo_model.get_elmo_substitutes([shorter], topn=30_003)[0], 'bidirectional'),
          get_ppl(elmo_model.get_elmo_substitutes([longer], topn=30_003)[0], 'bidirectional'))


2022-02-06 11:08:07,366 : INFO : Calculating language model predictions...


forward


2022-02-06 11:08:07,742 : INFO : Calculating language model predictions...
2022-02-06 11:08:08,171 : INFO : Calculating language model predictions...


26.3016 24.2077


2022-02-06 11:08:08,671 : INFO : Calculating language model predictions...
2022-02-06 11:08:09,249 : INFO : Calculating language model predictions...


56.8789 46.798100000000005


2022-02-06 11:08:10,672 : INFO : Calculating language model predictions...
2022-02-06 11:08:12,165 : INFO : Calculating language model predictions...


163.96269999999998 168.87519999999998
bidirectional


2022-02-06 11:08:12,521 : INFO : Calculating language model predictions...
2022-02-06 11:08:12,947 : INFO : Calculating language model predictions...


25.898649999999996 22.912399999999998


2022-02-06 11:08:13,426 : INFO : Calculating language model predictions...
2022-02-06 11:08:14,005 : INFO : Calculating language model predictions...


54.396100000000004 49.132850000000005


2022-02-06 11:08:15,397 : INFO : Calculating language model predictions...


160.23125 163.2301


In [None]:
def run(model, dataloader, direction):
    correct = 0
    
    vocab_size = 30_003

    for good, bad in tqdm(dataloader):
        good = model.get_elmo_substitutes(good, topn=vocab_size)
        bad = model.get_elmo_substitutes(bad, topn=vocab_size)
        
        for good_sent, bad_sent in zip(good, bad):
        
            good_ppl = get_ppl(good_sent, direction, vocab_size=vocab_size)
            bad_ppl = get_ppl(bad_sent, direction, vocab_size=vocab_size)

            if good_ppl > bad_ppl:
                correct += 1

    return correct / len(dataloader.dataset)

In [None]:
class Blimp:
    def __init__(self):
        self.phenomena = {}
    
    def add_result(self, phenomenon, uid, accuracy):
        if phenomenon not in self.phenomena:
            self.phenomena[phenomenon] = {}
        self.phenomena[phenomenon][uid] = accuracy
    
    def __str__(self):
        def iterator():
            for phenomenon_key in sorted(self.phenomena.keys()):
                phenomenon = self.phenomena[phenomenon_key]
                for uid_key in sorted(phenomenon.keys()):
                    yield f"{phenomenon_key},{uid_key},{phenomenon[uid_key]}"
        return '\n'.join(iterator())

In [None]:
class BlimpDataset(Dataset):
    
    """ customized Dataset class from torch """
    
    def __init__(self, data: list, tokenizer):
        self.data = data
        self.tokenizer = tokenizer
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        """ shape each sample into a proper """
        pair = self.data[index]
        good = " ".join([token.text for token in self.tokenizer(pair["sentence_good"])])
        bad = " ".join([token.text for token in self.tokenizer(pair["sentence_bad"])])
        
        return good, bad

def collate_fn(batch) -> tuple:
    goods, bads = list(), list()
    
    for good, bad in batch:
        goods.append(good)
        bads.append(bad)

    return goods, bads

## BLiMP on Elmo's forward only

In [None]:
logger = logging.getLogger()
logger.setLevel(logging.CRITICAL)

blimp = Blimp()
tokenizer = English().tokenizer

for dataset in tqdm(glob.glob(f"blimp/data/*.jsonl")[0:3]):
    dataset = load(dataset)
    loader = DataLoader(
        BlimpDataset(dataset, tokenizer),
        batch_size=BATCH_SIZE, shuffle=False, 
        collate_fn=collate_fn)
    
    accuracy = run(elmo_model, loader, direction='forward')
    blimp.add_result(dataset[0]["linguistics_term"], dataset[0]["UID"], accuracy)

print(blimp)

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

argument_structure,passive_1,0.067
control_raising,expletive_it_object_raising,0.045
subject_verb_agreement,irregular_plural_subject_verb_agreement_1,0.142


## BLiMP on Elmo's both directions averaged





In [None]:
logger = logging.getLogger()
logger.setLevel(logging.CRITICAL)

blimp = Blimp()
tokenizer = English().tokenizer

for dataset in tqdm(glob.glob(f"blimp/data/*.jsonl")[0:3]):
    dataset = load(dataset)
    loader = DataLoader(
        BlimpDataset(dataset, tokenizer),
        batch_size=BATCH_SIZE, shuffle=False, 
        collate_fn=collate_fn)
    
    accuracy = run(elmo_model, loader, direction='bidirectional')
    blimp.add_result(dataset[0]["linguistics_term"], dataset[0]["UID"], accuracy)

print(blimp)

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

argument_structure,passive_1,0.268
control_raising,expletive_it_object_raising,0.289
subject_verb_agreement,irregular_plural_subject_verb_agreement_1,0.48
