In [81]:
%load_ext autoreload
%autoreload 2

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [82]:
import os
import json
import itertools
from itertools import product, chain

from pytorch_pretrained_bert import tokenization, BertTokenizer, BertModel, BertForMaskedLM, BertForPreTraining, BertConfig

In [83]:
CONFIG_NAME = 'bert_config.json'
BERT_DIR = '/nas/pretrain-bert/pretrain-tensorflow/uncased_L-12_H-768_A-12/'
tokenizer = BertTokenizer.from_pretrained(os.path.join(BERT_DIR, 'vocab.txt'))

01/25/2019 15:32:33 - INFO - pytorch_pretrained_bert.tokenization -   loading vocabulary file /nas/pretrain-bert/pretrain-tensorflow/uncased_L-12_H-768_A-12/vocab.txt


In [84]:
def reverse(l):
    return list(reversed(l))

In [85]:
def mask(ent_str):
    tokens = ent_str.strip().split()
    if len(tokens) == 1:
        return '[%s]' % tokens[0]
    elif len(tokens) == 2:
        assert tokens[0] == 'the', ent_str
        return '%s [%s]' % (tokens[0], tokens[1])
    else:
        assert False, ent_str

In [86]:
A_template = "{dt} {ent0} {rel} {dt} {ent1} {rel_suffix}"
B_template = "{dt} {ent} {pred}"

causal_templates = [["{A} because {B}."],# "{B} so {A}."], 
                    ["{A} so {B}."],# "{B} because {A}."]
                   ]
turning_templates = [["{A} although {B}."],# "{B} but {A}."], 
                     ["{A} but {B}."],# "{B} although {A}."]
                    ]

In [87]:
def make_sentences(A_template, B_template, causal_templates, turning_templates,
                   index=-1, orig_sentence='', entities=["John", "Mary"], entity_substitutes=None, determiner="", 
                   packed_relations=["rel/~rel", "rev_rel/~rev_rel"], packed_relation_substitutes=None, relation_suffix="",
                   packed_predicates=["pred0/~pred0", "pred1/~pred1"], predicate_substitutes=None,
                   predicate_dichotomy=True, reverse_causal=False):
    assert entities[0].lower() in tokenizer.vocab , entities[0]
    assert entities[1].lower() in tokenizer.vocab , entities[1]
    
    relations, neg_relations = zip(*[rel.split("/") for rel in packed_relations])
    relations, neg_relations = list(relations), list(neg_relations)
    predicates, neg_predicates = zip(*[pred.split("/") for pred in packed_predicates])
    predicates, neg_predicates = list(predicates), list(neg_predicates)
        
    As = [A_template.format(dt=determiner, ent0=ent0, ent1=ent1, rel=rel, rel_suffix=relation_suffix) 
          for ent0, ent1, rel in [entities + relations[:1], reverse(entities) + reverse(relations)[:1]]]
    negAs = [A_template.format(dt=determiner, ent0=ent0, ent1=ent1, rel=rel, rel_suffix=relation_suffix) 
             for ent0, ent1, rel in [entities + neg_relations[:1], reverse(entities) + reverse(neg_relations)[:1]]]

    Bs = [B_template.format(dt=determiner, ent=mask(ent), pred=pred) for ent, pred in zip(entities, predicates)]
    negBs = [B_template.format(dt=determiner, ent=mask(ent), pred=pred) for ent, pred in zip(entities, neg_predicates)]
    if predicate_dichotomy:
        Bs += [B_template.format(dt=determiner, ent=mask(ent), pred=pred) for ent, pred in zip(entities, reversed(neg_predicates))]
        negBs += [B_template.format(dt=determiner, ent=mask(ent), pred=pred) for ent, pred in zip(entities, reversed(predicates))]

    def form_sentences(sentence_template, As, Bs):
        return [" ".join(sentence_template.format(A=A, B=B).split()) for A, B in product(As, Bs)]

    causal_sentences = []
    for causal_template in causal_templates[int(reverse_causal)]:
        for A, B in [(As, Bs), (negAs, negBs)]:
            causal_sentences.extend(form_sentences(causal_template, A, B))

    turning_sentences = []
    for turning_template in turning_templates[int(reverse_causal)]:
        for A, B in [(As, negBs), (negAs, Bs)]:
            turning_sentences.extend(form_sentences(turning_template, A, B))
            
    sentences = causal_sentences + turning_sentences
    substituted_sentences = sentences
    
    if packed_relation_substitutes is not None:
        packed_relation_substitutes = list(itertools.product(packed_relations[:1] + packed_relation_substitutes[0], 
                                                             packed_relations[1:] + packed_relation_substitutes[1]))
        substituted_sentences = []
        for packed_sub_relations in packed_relation_substitutes:
            sub_relations, sub_neg_relations = zip(*[rel.split("/") for rel in packed_sub_relations])
            substituted_sentences += [sent.replace(relations[0], sub_relations[0]).replace(relations[1], sub_relations[1])
                                      .replace(neg_relations[0], sub_neg_relations[0]).replace(neg_relations[1], sub_neg_relations[1]) 
                                      for sent in sentences]
        substituted_sentences = list(set(substituted_sentences))
        
    if entity_substitutes is not None:
        for sub in entity_substitutes:
            for ent in sub:
                assert ent.lower() in tokenizer.vocab , ent + " not in BERT vocab"
        assert len(set(chain.from_iterable(entity_substitutes))) == 4, entity_substitutes
        assert len(set(chain.from_iterable(entity_substitutes)).union(set(entities))) == 6 
        
        entity_substitutes = list(itertools.product(entities[:1] + entity_substitutes[0], entities[1:] + entity_substitutes[1]))
        substituted_sentences = [sent.replace(entities[0], sub[0]).replace(entities[1], sub[1]) 
                                 for sent in substituted_sentences for sub in entity_substitutes]
    return causal_sentences, turning_sentences, substituted_sentences

In [158]:
frames = \
[
    {
        "index": 2,
        "orig_sentence": "The trophy doesn't fit into the brown suitcase because [it] is too large/small.",
        "entities": ["trophy", "suitcase"],
        "entitity_substitutes": [["ball", "toy"], ["bag", "box"]],
        "determiner": "the",
        "packed_relations": ["doesn't fit into/can fit into", "doesn't hold/can hold"],
        "packed_relation_substitutes": [["can't be put into/can be put into"], ["doesn't have enough room for/has enough room for"]],
        "relation_suffix": "",
        "packed_predicates": ["is large/isn't large", "is small/isn't small"],
        "predicate_dichotomy": True,
        "reverse_causal": False
    },
    {
        "index": 4,
        "orig_sentence": "Joan made sure to thank Susan for all the help [she] had recieved/given.",
        "entities": ["John", "Susan"],
        "entity_substitutes": [["David", "Michael"], ["Mary", "Linda"]],
        "determiner": "",
        "packed_relations": ["thanked/didn't thank", "took good care of/didn't good care of"],
        "packed_relation_substitutes": [["felt grateful to/didn't feel grateful to"], ["was appreciated by/wasn't appreciated by"]],
        "relation_suffix": "",
        "packed_predicates": ["had received a lot of help/hadn't received a lot of help", "had given a lot of help/hadn't given a lot of help"],
        "predicate_dichotomy": False,
        "reverse_causal": False
    },
    {
        "index": 4000,
        "orig_sentence": "John gave a lot of money to Susan because [he] was very rich/poor.",
        "entities": ["John", "Susan"],
        "entity_substitutes": [["David", "Michael"], ["Mary", "Linda"]],
        "determiner": "",
        "packed_relations": ["gave a lot of money to/didn't give a lot of money to", "received a lot of money from/didn't receive a lot of money from"],
        "packed_relation_substitutes": [["subsidized/didn't subsidize"], ["borrowed a lot of money from/didn't borrow any money from"]],
        "relation_suffix": "",
        "packed_predicates": ["was rich/wasn't rich", "was poor/wasn't poor"],
        "predicate_dichotomy": True,
        "reverse_causal": False
    },
    {
        "index": 10,
        "orig_sentence": "The delivery truck zoomed by the school bus because [it] was going so fast/slow.",
        "entities": ["truck", "bus"],
        "entity_substitutes": [["car", "ambulance"], ["bicycle", "tram"]],
        "determiner": "the",
        "packed_relations": ["overtook/couldn't overtake", "fell far behind/didn't fall far behind"],
        "packed_relation_substitutes": [["zoomed by/didn't pass"], ["was left behind/wasn't left far behind"]],
        "relation_suffix": "",
        "packed_predicates": ["was going fast/wasn't going fast", "was going slow/wasn't going slow"],
        "predicate_dichotomy": True,
        "reverse_causal": False
    },
    {
        "index": 12,
        "orig_sentence": "Frank felt vindicated/crushed when his longtime rival Bill revealed that [he] was the winner of the competition.",
        "entities": ["John", "Susan"],
        "entity_substitutes": [["David", "Michael"], ["Mary", "Linda"]],
        "determiner": "",
        "packed_relations": ["beat/didn't beat", "lost to/didn't lose to"],
        "packed_relation_substitutes": [["defeated/didn't defeat"], ["was defeated by/didn't be defeated by"]],
        "relation_suffix": "in the game",
        "packed_predicates": ["was happy/wasn't happy", "was sad/wasn't sad"],
        "predicate_dichotomy": True,
        "reverse_causal": True
    },
    {
        "index": 16,
        "orig_sentence": "The large ball crashed right through the table because [it] was made of steel/styrofoam.",
        "entities": ["ball", "board"],
        "entity_substitutes": [["bullet", "arrow"], ["shield", "disk"]],
        "determiner": "the",
        "packed_relations": ["crashed right through/didn't crash through", "failed to block/blocked"],
        "packed_relation_substitutes": [["penetrated through/didn't penetrate through"], ["failed to stop/stopped"]],
        "relation_suffix": "",
        "packed_predicates": ["was hard/wasn't hard", "was soft/wasn't soft"],
        "predicate_dichotomy": True,
        "reverse_causal": True
    },
    {
        "index": 18,
        "orig_sentence": "John couldn't see the stage with Billy in front of him because [he] is so short.",
        "entities": ["John", "Susan"],
        "entity_substitutes": [["David", "Edward"], ["Betty", "Donna"]],
        "determiner": "",
        "packed_relations": ["couldn't see the stage which behind/could see the stage which behind", "blocked the view of/couldn't block the view of"],
        "packed_relation_substitutes": [["couldn't find the stage which behind/could find the stage which behind"], ["obstructed the view of/couldn't obstruct the view of"]],
        "relation_suffix": "",
        "packed_predicates": ["is short/isn't short", "is tall/isn't tall"],
        "predicate_dichotomy": True,
        "reverse_causal": True
    },
    {
        "index": 20,
        "orig_sentence": "Tom threw his schoolbag down to Ray after [he] reached the top of the stairs.",
        "entities": ["Brian", "Amy"],
        "entity_substitutes": [["Charles", "Paul"], ["Emma", "Linda"]],
        "determiner": "",
        "packed_relations": ["through the schoolbag down to/through the schoolbag up to", "caught the schoolbag thrown down by/caught the schoolbag thrown up by"],
        "packed_relation_substitutes": [["cast the schoolbag down to/cast the schoolbag up to"], ["took the schoolbag thrown down by/took the schoolbag thrown up by"]],
        "relation_suffix": "",
        "packed_predicates": ["is on the top/isn't on the top", "is at the buttom/isn't at the buttom"],
        "predicate_dichotomy": True,
        "reverse_causal": True
    },
    {
        "index": 20,
        "orig_sentence": "Although they ran at about the same speed, Sue beat Sally because [she] had such a good start.",
        "entities": ["Brian", "Amy"],
        "entity_substitutes": [["Charles", "Paul"], ["Emma", "Linda"]],
        "determiner": "",
        "packed_relations": ["through the schoolbag down to/through the schoolbag up to", "caught the schoolbag thrown down by/caught the schoolbag thrown up by"],
        "packed_relation_substitutes": [["cast the schoolbag down to/cast the schoolbag up to"], ["took the schoolbag thrown down by/took the schoolbag thrown up by"]],
        "relation_suffix": "",
        "packed_predicates": ["is on the top/isn't on the top", "is at the buttom/isn't at the buttom"],
        "predicate_dichotomy": True,
        "reverse_causal": True
    },
]

In [156]:
causal_sentences, turning_sentences, substituted_sentences = \
    make_sentences(A_template, B_template, causal_templates, turning_templates, **frames[-1])

In [157]:
causal_sentences
turning_sentences
substituted_sentences

['Brian through the schoolbag down to Amy because [Brian] is on the top.',
 'Brian through the schoolbag down to Amy because [Amy] is at the buttom.',
 'Amy caught the schoolbag thrown down by Brian because [Brian] is on the top.',
 'Amy caught the schoolbag thrown down by Brian because [Amy] is at the buttom.',
 "Brian through the schoolbag up to Amy because [Brian] isn't on the top.",
 "Brian through the schoolbag up to Amy because [Amy] isn't at the buttom.",
 "Amy caught the schoolbag thrown up by Brian because [Brian] isn't on the top.",
 "Amy caught the schoolbag thrown up by Brian because [Amy] isn't at the buttom."]

["Brian through the schoolbag down to Amy although [Brian] isn't on the top.",
 "Brian through the schoolbag down to Amy although [Amy] isn't at the buttom.",
 "Amy caught the schoolbag thrown down by Brian although [Brian] isn't on the top.",
 "Amy caught the schoolbag thrown down by Brian although [Amy] isn't at the buttom.",
 'Brian through the schoolbag up to Amy although [Brian] is on the top.',
 'Brian through the schoolbag up to Amy although [Amy] is at the buttom.',
 'Amy caught the schoolbag thrown up by Brian although [Brian] is on the top.',
 'Amy caught the schoolbag thrown up by Brian although [Amy] is at the buttom.']

["Brian through the schoolbag up to Amy because [Amy] isn't at the buttom.",
 "Brian through the schoolbag up to Emma because [Emma] isn't at the buttom.",
 "Brian through the schoolbag up to Linda because [Linda] isn't at the buttom.",
 "Charles through the schoolbag up to Amy because [Amy] isn't at the buttom.",
 "Charles through the schoolbag up to Emma because [Emma] isn't at the buttom.",
 "Charles through the schoolbag up to Linda because [Linda] isn't at the buttom.",
 "Paul through the schoolbag up to Amy because [Amy] isn't at the buttom.",
 "Paul through the schoolbag up to Emma because [Emma] isn't at the buttom.",
 "Paul through the schoolbag up to Linda because [Linda] isn't at the buttom.",
 "Amy caught the schoolbag thrown up by Brian because [Amy] isn't at the buttom.",
 "Emma caught the schoolbag thrown up by Brian because [Emma] isn't at the buttom.",
 "Linda caught the schoolbag thrown up by Brian because [Linda] isn't at the buttom.",
 "Amy caught the schoolbag thro

In [40]:
examples = [
 (22,
  'Although they ran at about the same speed, Sue beat Sally because [she] had such a good start.',
  'beat:good/bad'),
 (26,
  "Sam's drawing was hung just above Tina's and [it] did look much better with another one below it.",
  'above/below'),
 (28,
  'Anna did a lot better than her good friend Lucy on the test because [she] had studied so hard.',
  'better/worse:study hard'),
 (30,
  'The firemen arrived after the police because [they] were coming from so far away.',
  'after/before:far away'),
 (32,
  "Frank was upset with Tom because the toaster [he] had bought from him didn't work.",
  'be upset with:buy from not work/sell not work'),
 (36,
  'The sack of potatoes had been placed above the bag of flour, so [it] had to be moved first.',
  'above/below:moved first'),
 (38,
  'Pete envies Martin although [he] is very successful.',
  'although/because'),
 (42,
  'I poured water from the bottle into the cup until [it] was empty.',
  'pour:empty/full'),
 (46,
  "Sid explained his theory to Mark but [he] couldn't convince him.",
  'explain:convince/understand'),
 (48,
  "Susan knew that Ann's son had been in a car accident, so [she] told her about it.",
  '?know tell:so/because'),
 (50,
  "Joe's uncle can still beat him at tennis, even though [he] is 30 years younger.",
  'beat:younger/older'),
 (64,
  'In the middle of the outdoor concert, the rain started falling, but [it] continued until 10.',
  'but/and'),
 (68,
  'Ann asked Mary what time the library closes, because [she] had forgotten.',
  'because/but'),
 (84,
  'If the con artist has succeeded in fooling Sam, [he] would have gotten a lot of money.',
  'fool:get/lose'),
 (92,
  'Alice tried frantically to stop her daughter from chatting at the party, leaving us to wonder why [she] was behaving so strangely.',
  '?stop normal/stop abnormal:strange'),
 (98,
  "I was trying to open the lock with the key, but someone had filled the  keyhole with chewing gum, and I couldn't get [it] in.",
  'put ... into filled with ... :get in/get out'),
 (100,
  'The dog chased the cat, which ran up a tree. [It] waited at the bottom.',
  'up:at the bottom/at the top'),
 (106,
  'John was doing research in the library when he heard a man humming and  whistling. [He] was very annoyed.',
  'hear ... humming and whistling:annoyed/annoying'),
 (108,
  'John was jogging through the park when he saw a man juggling watermelons. [He] was very impressed.',
  'see ... juggling watermelons:impressed/impressive'),
 (132,
  'Jane knocked on the door, and Susan answered it. [She] invited her to come out.',
  'visit:invite come out/invite come in'),
 (150,
  'Jackson was greatly influenced by Arnold, though [he] lived two centuries later.',
  'influence:later/earlier'),
 (160,
  'The actress used to be named Terpsichore, but she changed it to Tina a  few years ago, because she figured [it] was too hard to pronounce.',
  'change:hard/easy'),
 (166,
  'Fred is the only man still alive who remembers my great-grandfather. [He] is a remarkable man.',
  'alive:is/was'),
 (170,
  "In July, Kamtchatka declared war on Yakutsk. Since Yakutsk's army was much  better equipped and ten times larger, [they] were defeated within weeks.",
  'better equipped and large:defeated/victorious'),
 (186,
  'When the sponsors of the bill got to the town hall, they were surprised to find that the room was full of opponents. [They] were very much in the minority.',
  'be full of:minority/majority'),
 (188,
  'Everyone really loved the oatmeal cookies; only a few people liked the  chocolate chip cookies. Next time, we should make more of [them] .',
  'like over:more/fewer'),
 (190,
  'We had hoped to place copies of our newsletter on all the chairs in the auditorium, but there were simply not enough of [them] .',
  'place on all:not enough/too many'),
 (196,
  "Steve follows Fred's example in everything. [He] admires him hugely.",
  'follow:admire/influence'),
 (198,
  "The table won't fit through the doorway because [it] is too wide.",
  'fit through:wide/narrow'),
 (200,
  'Grace was happy to trade me her sweater for my jacket. She thinks [it] looks dowdy on her.',
  'trade:dowdy/great'),
 (202,
  'John hired Bill to take care of [him] .',
  'hire/hire oneself to:take care of'),
 (204,
  'John promised Bill to leave, so an hour later [he] left.',
  'promise/order'),
 (210,
  "Jane knocked on Susan's door but [she] did not get an answer.",
  'knock:get an answer/answer'),
 (212,
  'Joe paid the detective after [he] received the final report on the case.',
  'pay:receive/deliver'),
 (226,
  'Bill passed the half-empty plate to John because [he] was full.',
  'pass the plate:full/hungry'),
 (252,
  'George got free tickets to the play, but he gave them to Eric, even though [he] was particularly eager to see it.',
  'even though/because/not'),
 (255,
  "Jane gave Joan candy because [she] wasn't hungry.",
  'give:not hungry/hungry'),
 (259,
  'James asked Robert for a favor but [he] was refused.',
  'ask for a favor:refuse/be refused`'),
 (261,
  'Kirilov ceded the presidency to Shatov because [he] was less popular.',
  'cede:less popular/more popular'),
 (263,
  'Emma did not pass the ball to Janie although [she] saw that she was open.',
  'not pass although:see open/open')]

In [77]:
len(examples)

47