In [1]:
try:
    __IPYTHON__
    USING_IPYTHON = True
    %load_ext autoreload
    %autoreload 2
except NameError:
    USING_IPYTHON = False

#### Argparse

In [2]:
import argparse
ap = argparse.ArgumentParser()
ap.add_argument('project_root', help='')
ap.add_argument('--mrp-data-dir', default='data', help='')
ap.add_argument('--mrp-test-dir', default='src/tests', help='')
ap.add_argument('--tests-fixtures-file', default='fixtures/test.jsonl', help='')

ap.add_argument('--graphviz-sub-dir', default='visualization/graphviz', help='')
ap.add_argument('--train-sub-dir', default='training', help='')
ap.add_argument('--companion-sub-dir', default='companion')
ap.add_argument('--jamr-alignment-file', default='jamr.mrp')

ap.add_argument('--test-input-file', default='evaluation/input.mrp', help='')
ap.add_argument('--test-companion-file', default='evaluation/udpipe.mrp', help='')
ap.add_argument('--allennlp-mrp-json-file-template', default='allennlp-mrp-json-small-{}.jsonl', help='')


ap.add_argument('--mrp-file-extension', default='.mrp')
ap.add_argument('--companion-file-extension', default='.conllu')
ap.add_argument('--graphviz-file-template', default='http://localhost:8000/files/proj29_ds1/home/slai/mrp2019/visualization/graphviz/{}/{}.mrp/{}.png')
ap.add_argument('--parse-plot-file-template', default='http://localhost:8000/files/proj29_ds1/home/slai/mrp2019/visualization/graphviz/{}/{}.png')

arg_string = """
    /data/proj29_ds1/home/slai/mrp2019
"""
arguments = [arg for arg_line in arg_string.split(r'\\n') for arg in arg_line.split()]

In [3]:
if USING_IPYTHON:
    args = ap.parse_args(arguments)
else:
    args = ap.parse_args()

In [4]:
args

Namespace(allennlp_mrp_json_file_template='allennlp-mrp-json-small-{}.jsonl', companion_file_extension='.conllu', companion_sub_dir='companion', graphviz_file_template='http://localhost:8000/files/proj29_ds1/home/slai/mrp2019/visualization/graphviz/{}/{}.mrp/{}.png', graphviz_sub_dir='visualization/graphviz', jamr_alignment_file='jamr.mrp', mrp_data_dir='data', mrp_file_extension='.mrp', mrp_test_dir='src/tests', parse_plot_file_template='http://localhost:8000/files/proj29_ds1/home/slai/mrp2019/visualization/graphviz/{}/{}.png', project_root='/data/proj29_ds1/home/slai/mrp2019', test_companion_file='evaluation/udpipe.mrp', test_input_file='evaluation/input.mrp', tests_fixtures_file='fixtures/test.jsonl', train_sub_dir='training')

#### Library imports

In [5]:
import json
import logging
import os
import pprint
import re
import string
from collections import Counter, defaultdict, deque

import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import plot_util
import torch
from action_state import mrp_json2parser_states, _generate_parser_action_states
from action_state import ERROR, APPEND, RESOLVE, IGNORE
from preprocessing import (CompanionParseDataset, MrpDataset, JamrAlignmentDataset,
                           read_companion_parse_json_file, read_mrp_json_file, parse2parse_json)            
from torch import nn
from tqdm import tqdm

#### ipython notebook specific imports

In [6]:
if USING_IPYTHON:
    # matplotlib config
    %matplotlib inline

In [7]:
sh = logging.StreamHandler()
formatter = logging.Formatter('%(levelname)-8s [%(name)s:%(lineno)d] %(message)s')
sh.setFormatter(formatter)
logging.basicConfig(level=logging.DEBUG, handlers=[sh])
logger = logging.getLogger(__name__)  # pylint: disable=invalid-name
logger.setLevel(logging.INFO)

### Constants

In [8]:
UNKWOWN = 'UNKWOWN'

### Load data

In [9]:
train_dir = os.path.join(args.project_root, args.mrp_data_dir, args.train_sub_dir)

In [10]:
mrp_dataset = MrpDataset()

In [11]:
frameworks, framework2dataset2mrp_jsons = mrp_dataset.load_mrp_json_dir(
    train_dir, args.mrp_file_extension)

frameworks:   0%|          | 0/5 [00:00<?, ?it/s]
dataset_name:   0%|          | 0/2 [00:00<?, ?it/s][A
dataset_name:  50%|█████     | 1/2 [00:00<00:00,  2.73it/s][A
frameworks:  20%|██        | 1/5 [00:00<00:02,  1.49it/s]s][A
dataset_name:   0%|          | 0/1 [00:00<?, ?it/s][A
frameworks:  40%|████      | 2/5 [00:04<00:04,  1.50s/it]t][A
dataset_name:   0%|          | 0/1 [00:00<?, ?it/s][A
frameworks:  60%|██████    | 3/5 [00:08<00:04,  2.35s/it]t][A
dataset_name:   0%|          | 0/1 [00:00<?, ?it/s][A
frameworks:  80%|████████  | 4/5 [00:12<00:03,  3.01s/it]t][A
dataset_name:   0%|          | 0/14 [00:00<?, ?it/s][A
dataset_name:  43%|████▎     | 6/14 [00:00<00:00, 21.83it/s][A
dataset_name:  57%|█████▋    | 8/14 [00:00<00:00, 18.61it/s][A
dataset_name:  71%|███████▏  | 10/14 [00:01<00:00,  6.66it/s][A
dataset_name:  79%|███████▊  | 11/14 [00:01<00:00,  6.00it/s][A
frameworks: 100%|██████████| 5/5 [00:14<00:00,  2.54s/it]t/s][A


### Data Preprocessing companion

In [12]:
companion_dir = os.path.join(args.project_root, args.mrp_data_dir, args.companion_sub_dir)

In [13]:
cparse_dataset = CompanionParseDataset()

In [14]:
dataset2cid2parse = cparse_dataset.load_companion_parse_dir(companion_dir, args.companion_file_extension)

INFO     [preprocessing:168] framework amr found
dataset: 100%|██████████| 13/13 [00:01<00:00,  9.95it/s]
INFO     [preprocessing:168] framework dm found
dataset: 100%|██████████| 5/5 [00:03<00:00,  1.20it/s]
INFO     [preprocessing:168] framework ucca found
dataset: 100%|██████████| 6/6 [00:00<00:00, 34.45it/s]


In [15]:
dataset2cid2parse_json = cparse_dataset.convert_parse2parse_json()

In [16]:
dataset2cid2parse.keys()

dict_keys(['amr-guidelines', 'bolt', 'cctv', 'dfa', 'dfb', 'fables', 'lorelei', 'mt09sdl', 'proxy', 'rte', 'wb', 'wiki', 'xinhua', 'wsj', 'ewt'])

In [17]:
# Some data is missing
'20003001' in dataset2cid2parse['wsj']

False

### Load JAMR alignment data

In [18]:
jalignment_dataset = JamrAlignmentDataset()

In [19]:
cid2alignment = jalignment_dataset.load_jamr_alignment_file(os.path.join(
    args.project_root,
    args.mrp_data_dir,
    args.companion_sub_dir,
    args.jamr_alignment_file
))

### Load testing data

In [20]:
test_input_filename = os.path.join(args.project_root, args.mrp_data_dir, args.test_input_file)
test_companion_filename = os.path.join(args.project_root, args.mrp_data_dir, args.test_companion_file)

In [21]:
test_mrp_jsons = read_mrp_json_file(test_input_filename)
test_parse_jsons = read_companion_parse_json_file(test_companion_filename)

In [22]:
parse_json = test_parse_jsons['102990']

In [23]:
mrp_json = framework2dataset2mrp_jsons['psd']['wsj'][1]

In [24]:
framework = 'ucca'
dataset = 'wiki'

# framework = 'dm'
# dataset = 'wsj'

In [25]:
cid = list(dataset2cid2parse_json[dataset].keys())[1]

In [26]:
idx, mrp_json = [
    (idx, mrp_json)
    for idx, mrp_json in enumerate(framework2dataset2mrp_jsons[framework][dataset])
    if mrp_json.get('id') == cid
][0]
idx

70

In [27]:
parse_json = dataset2cid2parse_json[dataset][cid]

In [28]:
doc = mrp_json['input']

In [29]:
doc

'In the final minute of the game, Johnson had the ball stolen by Celtics center Robert Parish, and then missed two free throws that could have won the game.'

In [30]:
token_pos = 0
anchors = []
char_pos2tokenized_parse_node_id = []

for node_id, node in enumerate(parse_json.get('nodes')):
    label = node.get('label')
    label_size = len(label)
    while doc[token_pos] == ' ':
        token_pos += 1
        char_pos2tokenized_parse_node_id.append(node_id)
    anchors.append((token_pos, token_pos + label_size))
    char_pos2tokenized_parse_node_id.extend([node_id] * (label_size))
    print(node_id, doc[token_pos: token_pos + label_size], anchors[-1], len(char_pos2tokenized_parse_node_id))
    token_pos += label_size

0 In (0, 2) 2
1 the (3, 6) 6
2 final (7, 12) 12
3 minute (13, 19) 19
4 of (20, 22) 22
5 the (23, 26) 26
6 game (27, 31) 31
7 , (31, 32) 32
8 Johnson (33, 40) 40
9 had (41, 44) 44
10 the (45, 48) 48
11 ball (49, 53) 53
12 stolen (54, 60) 60
13 by (61, 63) 63
14 Celtics (64, 71) 71
15 center (72, 78) 78
16 Robert (79, 85) 85
17 Parish (86, 92) 92
18 , (92, 93) 93
19 and (94, 97) 97
20 then (98, 102) 102
21 missed (103, 109) 109
22 two (110, 113) 113
23 free (114, 118) 118
24 throws (119, 125) 125
25 that (126, 130) 130
26 could (131, 136) 136
27 have (137, 141) 141
28 won (142, 145) 145
29 the (146, 149) 149
30 game (150, 154) 154
31 . (154, 155) 155


In [31]:
doc

'In the final minute of the game, Johnson had the ball stolen by Celtics center Robert Parish, and then missed two free throws that could have won the game.'

In [32]:
len(char_pos2tokenized_parse_node_id)

155

In [33]:
doc = mrp_json['input']

In [34]:
mrp_json['tops']

[34]

In [35]:
mrp_parser_states, mrp_meta_data = mrp_json2parser_states(
    mrp_json, 
    tokenized_parse_nodes=parse_json['nodes'],
)

DEBUG    [action_state:60] ('remote 1', 3)
DEBUG    [action_state:60] ('remote 1', 10)
DEBUG    [action_state:60] ('remote 1', 11)
DEBUG    [action_state:207] {34}
DEBUG    [action_state:207] {33, 39, 17, 18, 19}
DEBUG    [action_state:207] {32, 35, 36, 38, 7, 8, 9, 40, 12, 24}
DEBUG    [action_state:207] {4, 5, 6, 37, 41, 10, 11, 13, 16, 20, 21, 22, 23, 25, 26, 27, 31}
DEBUG    [action_state:207] {0, 1, 2, 3, 14, 15, 28, 29, 30}
DEBUG    [action_state:85] ('remote 2', 3)
DEBUG    [action_state:85] ('remote 2', 10)
DEBUG    [action_state:85] ('remote 2', 11)
DEBUG    [action_state:415] ('prev anchors', 0)
DEBUG    [action_state:428] ('anchors', 0, 2, 0, 1)
DEBUG    [action_state:434] ('curr_node_id', 0)
DEBUG    [action_state:457] (0, [], True, True, True, True)
DEBUG    [action_state:503] (0, 0, [(0, 0, [(0, 0, None)])])
DEBUG    [action_state:504] [(0, None), (1, (1, {'id': 0, 'anchors': [{'from': 0, 'to': 2}], 'label': 'In'}, [[]]))]
DEBUG    [action_state:517] (0, 31, 31, {1, 2, 3}

DEBUG    [action_state:503] (32,
 32,
 [(32,
   32,
   [(31,
     31,
     [(0, 0, [(0, 0, None)]),
      (1, 1, [(1, 1, None)]),
      (2, 2, [(2, 2, None)]),
      (3, 3, [(3, 3, None)])]),
    (4, 4, [(4, 4, None)]),
    (5, 5, [(5, 5, None)]),
    (6, 6, [(6, 6, None)])])])
DEBUG    [action_state:504] [(1, (4, {'id': 32}, [[{'source': 32, 'target': 31, 'label': 'E', 'id': 42, 'parent': 32, 'child': 31}], [{'source': 32, 'target': 4, 'label': 'R', 'id': 0, 'parent': 32, 'child': 4}], [{'source': 32, 'target': 5, 'label': 'E', 'id': 5, 'parent': 32, 'child': 5}], [{'source': 32, 'target': 6, 'label': 'C', 'id': 18, 'parent': 32, 'child': 6}]]))]
DEBUG    [action_state:517] (32, 41, 33, {35, 36, 7, 8, 9, 12})
DEBUG    [action_state:577] ('token stack',
 [(32,
   '',
   [(31,
     '',
     [(0, 'In', [(0, 'In', 'In')]),
      (1, 'the', [(1, 'the', 'the')]),
      (2, 'final', [(2, 'final', 'final')]),
      (3, 'minute', [(3, 'minute', 'minute')])]),
    (4, 'of', [(4, 'of', 'of')]),


DEBUG    [action_state:503] (11,
 11,
 [(32,
   32,
   [(31,
     31,
     [(0, 0, [(0, 0, None)]),
      (1, 1, [(1, 1, None)]),
      (2, 2, [(2, 2, None)]),
      (3, 3, [(3, 3, None)])]),
    (4, 4, [(4, 4, None)]),
    (5, 5, [(5, 5, None)]),
    (6, 6, [(6, 6, None)])]),
  (7, 7, [(7, 7, None)]),
  (8, 8, [(8, 8, None)]),
  (9, 9, [(9, 9, None)]),
  (10, 10, [(10, 10, None)]),
  (11, 11, [(11, 11, None)])])
DEBUG    [action_state:504] [(0, None), (1, (1, {'id': 11, 'anchors': [{'from': 49, 'to': 53}], 'label': 'ball'}, [[]]))]
DEBUG    [action_state:517] (11, 9, 35, set())
DEBUG    [action_state:577] ('token stack',
 [(32,
   '',
   [(31,
     '',
     [(0, 'In', [(0, 'In', 'In')]),
      (1, 'the', [(1, 'the', 'the')]),
      (2, 'final', [(2, 'final', 'final')]),
      (3, 'minute', [(3, 'minute', 'minute')])]),
    (4, 'of', [(4, 'of', 'of')]),
    (5, 'the', [(5, 'the', 'the')]),
    (6, 'game', [(6, 'game', 'game')])]),
  (7, ',', [(7, ',', ',')]),
  (8, 'Johnson', [(8, 'Joh

DEBUG    [action_state:503] (14,
 14,
 [(32,
   32,
   [(31,
     31,
     [(0, 0, [(0, 0, None)]),
      (1, 1, [(1, 1, None)]),
      (2, 2, [(2, 2, None)]),
      (3, 3, [(3, 3, None)])]),
    (4, 4, [(4, 4, None)]),
    (5, 5, [(5, 5, None)]),
    (6, 6, [(6, 6, None)])]),
  (7, 7, [(7, 7, None)]),
  (8, 8, [(8, 8, None)]),
  (9, 9, [(9, 9, None)]),
  (35, 35, [(10, 10, [(10, 10, None)]), (11, 11, [(11, 11, None)])]),
  (12, 12, [(12, 12, None)]),
  (13, 13, [(13, 13, None)]),
  (14, 14, [(14, 14, None)])])
DEBUG    [action_state:504] [(0, None), (1, (1, {'id': 14, 'anchors': [{'from': 64, 'to': 71}], 'label': 'Celtics'}, [[]]))]
DEBUG    [action_state:517] (14, 26, 37, {15})
DEBUG    [action_state:577] ('token stack',
 [(32,
   '',
   [(31,
     '',
     [(0, 'In', [(0, 'In', 'In')]),
      (1, 'the', [(1, 'the', 'the')]),
      (2, 'final', [(2, 'final', 'final')]),
      (3, 'minute', [(3, 'minute', 'minute')])]),
    (4, 'of', [(4, 'of', 'of')]),
    (5, 'the', [(5, 'the', 'the

DEBUG    [action_state:579] ('visited states', {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 31, 32, 35, 36, 37}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 31, 32, 35, 37}, {32, 35, 36, 37, 31}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 31, 32, 35, 37})
DEBUG    [action_state:434] ('curr_node_id', 36)
DEBUG    [action_state:457] (36,
 [(32,
   32,
   [(31,
     31,
     [(0, 0, [(0, 0, None)]),
      (1, 1, [(1, 1, None)]),
      (2, 2, [(2, 2, None)]),
      (3, 3, [(3, 3, None)])]),
    (4, 4, [(4, 4, None)]),
    (5, 5, [(5, 5, None)]),
    (6, 6, [(6, 6, None)])]),
  (7, 7, [(7, 7, None)]),
  (8, 8, [(8, 8, None)]),
  (9, 9, [(9, 9, None)]),
  (35, 35, [(10, 10, [(10, 10, None)]), (11, 11, [(11, 11, None)])]),
  (12, 12, [(12, 12, None)]),
  (13, 13, [(13, 13, None)]),
  (37, 37, [(14, 14, [(14, 14, None)]), (15, 15, [(15, 15, None)])]),
  (16, 16, [(16, 16, None)])],
 False,
 True,
 False,
 True)
DEBUG    [action_state:503] (36,
 36,


DEBUG    [action_state:579] ('visited states', {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 31, 32, 33, 35, 36, 37}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 31, 32, 33, 35, 36, 37}, {32, 33, 35, 36, 37, 31}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 31, 32, 33, 35, 36, 37})
DEBUG    [action_state:415] ('prev anchors', 19)
DEBUG    [action_state:428] ('anchors', 94, 97, 19, 20)
DEBUG    [action_state:434] ('curr_node_id', 18)
DEBUG    [action_state:457] (18,
 [(33,
   33,
   [(32,
     32,
     [(31,
       31,
       [(0, 0, [(0, 0, None)]),
        (1, 1, [(1, 1, None)]),
        (2, 2, [(2, 2, None)]),
        (3, 3, [(3, 3, None)])]),
      (4, 4, [(4, 4, None)]),
      (5, 5, [(5, 5, None)]),
      (6, 6, [(6, 6, None)])]),
    (7, 7, [(7, 7, None)]),
    (8, 8, [(8, 8, None)]),
    (9, 9, [(9, 9, None)]),
    (35, 35, [(10, 10, [(10, 10, None)]), (11, 11, [(11, 11, None)])]),
    (12, 12, [(12, 12, None)]),
    (36,
   

DEBUG    [action_state:504] [(0, None), (1, (1, {'id': 20, 'anchors': [{'from': 103, 'to': 109}], 'label': 'missed'}, [[]]))]
DEBUG    [action_state:517] (20, 7, 38, {21, 22, 23})
DEBUG    [action_state:577] ('token stack',
 [(33,
   '',
   [(32,
     '',
     [(31,
       '',
       [(0, 'In', [(0, 'In', 'In')]),
        (1, 'the', [(1, 'the', 'the')]),
        (2, 'final', [(2, 'final', 'final')]),
        (3, 'minute', [(3, 'minute', 'minute')])]),
      (4, 'of', [(4, 'of', 'of')]),
      (5, 'the', [(5, 'the', 'the')]),
      (6, 'game', [(6, 'game', 'game')])]),
    (7, ',', [(7, ',', ',')]),
    (8, 'Johnson', [(8, 'Johnson', 'Johnson')]),
    (9, 'had', [(9, 'had', 'had')]),
    (35,
     '',
     [(10, 'the', [(10, 'the', 'the')]), (11, 'ball', [(11, 'ball', 'ball')])]),
    (12, 'stolen', [(12, 'stolen', 'stolen')]),
    (36,
     '',
     [(13, 'by', [(13, 'by', 'by')]),
      (37,
       '',
       [(14, 'Celtics', [(14, 'Celtics', 'Celtics')]),
        (15, 'center', [(15,

DEBUG    [action_state:579] ('visited states', {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 31, 32, 33, 35, 36, 37}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 31, 32, 33, 35, 36, 37}, {32, 33, 35, 36, 37, 31}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 31, 32, 33, 35, 36, 37})
DEBUG    [action_state:415] ('prev anchors', 24)
DEBUG    [action_state:428] ('anchors', 119, 125, 24, 25)
DEBUG    [action_state:434] ('curr_node_id', 23)
DEBUG    [action_state:457] (23,
 [(33,
   33,
   [(32,
     32,
     [(31,
       31,
       [(0, 0, [(0, 0, None)]),
        (1, 1, [(1, 1, None)]),
        (2, 2, [(2, 2, None)]),
        (3, 3, [(3, 3, None)])]),
      (4, 4, [(4, 4, None)]),
      (5, 5, [(5, 5, None)]),
      (6, 6, [(6, 6, None)])]),
    (7, 7, [(7, 7, None)]),
    (8, 8, [(8, 8, None)]),
    (9, 9, [(9, 9, None)]),
    (35, 35, [(10, 10, [(10, 10, None)]), (11, 11, [(1

DEBUG    [action_state:503] (24,
 24,
 [(33,
   33,
   [(32,
     32,
     [(31,
       31,
       [(0, 0, [(0, 0, None)]),
        (1, 1, [(1, 1, None)]),
        (2, 2, [(2, 2, None)]),
        (3, 3, [(3, 3, None)])]),
      (4, 4, [(4, 4, None)]),
      (5, 5, [(5, 5, None)]),
      (6, 6, [(6, 6, None)])]),
    (7, 7, [(7, 7, None)]),
    (8, 8, [(8, 8, None)]),
    (9, 9, [(9, 9, None)]),
    (35, 35, [(10, 10, [(10, 10, None)]), (11, 11, [(11, 11, None)])]),
    (12, 12, [(12, 12, None)]),
    (36,
     36,
     [(13, 13, [(13, 13, None)]),
      (37, 37, [(14, 14, [(14, 14, None)]), (15, 15, [(15, 15, None)])]),
      (16, 16, [(16, 16, None)])])]),
  (17, 17, [(17, 17, None)]),
  (18, 18, [(18, 18, None)]),
  (19, 19, [(19, 19, None)]),
  (38,
   38,
   [(20, 20, [(20, 20, None)]),
    (21, 21, [(21, 21, None)]),
    (22, 22, [(22, 22, None)]),
    (23, 23, [(23, 23, None)])]),
  (24, 24, [(24, 24, None)])])
DEBUG    [action_state:504] [(0, None), (1, (1, {'id': 24, 'anchors':

DEBUG    [action_state:504] [(0, None), (1, (1, {'id': 26, 'anchors': [{'from': 137, 'to': 141}], 'label': 'have'}, [[]]))]
DEBUG    [action_state:517] (26, 32, 40, {41, 27})
DEBUG    [action_state:577] ('token stack',
 [(33,
   '',
   [(32,
     '',
     [(31,
       '',
       [(0, 'In', [(0, 'In', 'In')]),
        (1, 'the', [(1, 'the', 'the')]),
        (2, 'final', [(2, 'final', 'final')]),
        (3, 'minute', [(3, 'minute', 'minute')])]),
      (4, 'of', [(4, 'of', 'of')]),
      (5, 'the', [(5, 'the', 'the')]),
      (6, 'game', [(6, 'game', 'game')])]),
    (7, ',', [(7, ',', ',')]),
    (8, 'Johnson', [(8, 'Johnson', 'Johnson')]),
    (9, 'had', [(9, 'had', 'had')]),
    (35,
     '',
     [(10, 'the', [(10, 'the', 'the')]), (11, 'ball', [(11, 'ball', 'ball')])]),
    (12, 'stolen', [(12, 'stolen', 'stolen')]),
    (36,
     '',
     [(13, 'by', [(13, 'by', 'by')]),
      (37,
       '',
       [(14, 'Celtics', [(14, 'Celtics', 'Celtics')]),
        (15, 'center', [(15, 'cen

DEBUG    [action_state:504] [(0, None), (1, (1, {'id': 28, 'anchors': [{'from': 146, 'to': 149}], 'label': 'the'}, [[]]))]
DEBUG    [action_state:517] (28, 15, 41, {29, 30})
DEBUG    [action_state:577] ('token stack',
 [(33,
   '',
   [(32,
     '',
     [(31,
       '',
       [(0, 'In', [(0, 'In', 'In')]),
        (1, 'the', [(1, 'the', 'the')]),
        (2, 'final', [(2, 'final', 'final')]),
        (3, 'minute', [(3, 'minute', 'minute')])]),
      (4, 'of', [(4, 'of', 'of')]),
      (5, 'the', [(5, 'the', 'the')]),
      (6, 'game', [(6, 'game', 'game')])]),
    (7, ',', [(7, ',', ',')]),
    (8, 'Johnson', [(8, 'Johnson', 'Johnson')]),
    (9, 'had', [(9, 'had', 'had')]),
    (35,
     '',
     [(10, 'the', [(10, 'the', 'the')]), (11, 'ball', [(11, 'ball', 'ball')])]),
    (12, 'stolen', [(12, 'stolen', 'stolen')]),
    (36,
     '',
     [(13, 'by', [(13, 'by', 'by')]),
      (37,
       '',
       [(14, 'Celtics', [(14, 'Celtics', 'Celtics')]),
        (15, 'center', [(15, 'cent

DEBUG    [action_state:504] [(0, None), (1, (1, {'id': 30, 'anchors': [{'from': 154, 'to': 155}], 'label': '.'}, [[]]))]
DEBUG    [action_state:517] (30, 17, 41, set())
DEBUG    [action_state:577] ('token stack',
 [(33,
   '',
   [(32,
     '',
     [(31,
       '',
       [(0, 'In', [(0, 'In', 'In')]),
        (1, 'the', [(1, 'the', 'the')]),
        (2, 'final', [(2, 'final', 'final')]),
        (3, 'minute', [(3, 'minute', 'minute')])]),
      (4, 'of', [(4, 'of', 'of')]),
      (5, 'the', [(5, 'the', 'the')]),
      (6, 'game', [(6, 'game', 'game')])]),
    (7, ',', [(7, ',', ',')]),
    (8, 'Johnson', [(8, 'Johnson', 'Johnson')]),
    (9, 'had', [(9, 'had', 'had')]),
    (35,
     '',
     [(10, 'the', [(10, 'the', 'the')]), (11, 'ball', [(11, 'ball', 'ball')])]),
    (12, 'stolen', [(12, 'stolen', 'stolen')]),
    (36,
     '',
     [(13, 'by', [(13, 'by', 'by')]),
      (37,
       '',
       [(14, 'Celtics', [(14, 'Celtics', 'Celtics')]),
        (15, 'center', [(15, 'center', 

DEBUG    [action_state:504] [(1, (4, {'id': 40}, [[{'source': 40, 'target': 25, 'label': 'D', 'id': 40, 'parent': 40, 'child': 25}], [{'source': 40, 'target': 26, 'label': 'F', 'id': 32, 'parent': 40, 'child': 26}], [{'source': 40, 'target': 27, 'label': 'P', 'id': 2, 'parent': 40, 'child': 27}], [{'source': 40, 'target': 41, 'label': 'A', 'id': 16, 'parent': 40, 'child': 41}]]))]
DEBUG    [action_state:517] (40, 43, 39, set())
DEBUG    [action_state:577] ('token stack',
 [(33,
   '',
   [(32,
     '',
     [(31,
       '',
       [(0, 'In', [(0, 'In', 'In')]),
        (1, 'the', [(1, 'the', 'the')]),
        (2, 'final', [(2, 'final', 'final')]),
        (3, 'minute', [(3, 'minute', 'minute')])]),
      (4, 'of', [(4, 'of', 'of')]),
      (5, 'the', [(5, 'the', 'the')]),
      (6, 'game', [(6, 'game', 'game')])]),
    (7, ',', [(7, ',', ',')]),
    (8, 'Johnson', [(8, 'Johnson', 'Johnson')]),
    (9, 'had', [(9, 'had', 'had')]),
    (35,
     '',
     [(10, 'the', [(10, 'the', 'the')]

DEBUG    [action_state:503] (34,
 34,
 [(34,
   34,
   [(33,
     33,
     [(32,
       32,
       [(31,
         31,
         [(0, 0, [(0, 0, None)]),
          (1, 1, [(1, 1, None)]),
          (2, 2, [(2, 2, None)]),
          (3, 3, [(3, 3, None)])]),
        (4, 4, [(4, 4, None)]),
        (5, 5, [(5, 5, None)]),
        (6, 6, [(6, 6, None)])]),
      (7, 7, [(7, 7, None)]),
      (8, 8, [(8, 8, None)]),
      (9, 9, [(9, 9, None)]),
      (35, 35, [(10, 10, [(10, 10, None)]), (11, 11, [(11, 11, None)])]),
      (12, 12, [(12, 12, None)]),
      (36,
       36,
       [(13, 13, [(13, 13, None)]),
        (37, 37, [(14, 14, [(14, 14, None)]), (15, 15, [(15, 15, None)])]),
        (16, 16, [(16, 16, None)])])]),
    (17, 17, [(17, 17, None)]),
    (18, 18, [(18, 18, None)]),
    (19, 19, [(19, 19, None)]),
    (39,
     39,
     [(38,
       38,
       [(20, 20, [(20, 20, None)]),
        (21, 21, [(21, 21, None)]),
        (22, 22, [(22, 22, None)]),
        (23, 23, [(23, 23, Non

In [36]:
mrp_meta_data[-1]

[(0, None),
 (1, (1, {'id': 0, 'anchors': [{'from': 0, 'to': 2}], 'label': 'In'}, [[]])),
 (0, None),
 (1, (1, {'id': 1, 'anchors': [{'from': 3, 'to': 6}], 'label': 'the'}, [[]])),
 (0, None),
 (1,
  (1, {'id': 2, 'anchors': [{'from': 7, 'to': 12}], 'label': 'final'}, [[]])),
 (0, None),
 (1,
  (1,
   {'id': 3, 'anchors': [{'from': 13, 'to': 19}], 'label': 'minute'},
   [[]])),
 (1,
  (4,
   {'id': 31},
   [[{'source': 31,
      'target': 0,
      'label': 'R',
      'id': 31,
      'parent': 31,
      'child': 0}],
    [{'source': 31,
      'target': 1,
      'label': 'E',
      'id': 28,
      'parent': 31,
      'child': 1}],
    [{'source': 31,
      'target': 2,
      'label': 'E',
      'id': 21,
      'parent': 31,
      'child': 2}],
    [{'source': 31,
      'target': 3,
      'label': 'C',
      'id': 14,
      'parent': 31,
      'child': 3}]])),
 (0, None),
 (1, (1, {'id': 4, 'anchors': [{'from': 20, 'to': 22}], 'label': 'of'}, [[]])),
 (0, None),
 (1,
  (1, {'id': 5, 'anch

In [37]:
companion_parser_states, companion_meta_data = mrp_json2parser_states(
    parse_json,
    mrp_doc=doc,
    tokenized_parse_nodes=parse_json['nodes'],
)

DEBUG    [action_state:207] {9}
DEBUG    [action_state:207] {3, 7, 8, 12, 18, 21, 31}
DEBUG    [action_state:207] {0, 1, 2, 6, 11, 17, 19, 20, 24}
DEBUG    [action_state:207] {4, 5, 10, 13, 14, 15, 16, 22, 23, 28}
DEBUG    [action_state:207] {25, 26, 27, 30}
DEBUG    [action_state:207] {29}
DEBUG    [action_state:434] ('curr_node_id', 0)
DEBUG    [action_state:457] (0, [], True, True, True, True)
DEBUG    [action_state:503] (0, 0, [(0, 0, [(0, 0, None)])])
DEBUG    [action_state:504] [(0, None), (1, (1, {'id': 0, 'label': 'In', 'properties': ['lemma', 'upos', 'xpos'], 'values': ['in', 'ADP', 'IN']}, [[]]))]
DEBUG    [action_state:517] (0, 0, 3, {1, 2, 6})
DEBUG    [action_state:577] ('token stack', [(0, 'In', [(0, 'In', 'In')])])
DEBUG    [action_state:579] ('visited states', {0}, {0}, set(), {0})
DEBUG    [action_state:434] ('curr_node_id', 1)
DEBUG    [action_state:457] (1, [(0, 0, [(0, 0, None)])], True, True, True, True)
DEBUG    [action_state:503] (1, 1, [(0, 0, [(0, 0, None)]), (

DEBUG    [action_state:579] ('visited states', {0, 1, 2, 3, 4, 5, 6}, {0, 1, 2, 3, 4, 5, 6}, {3, 6}, {0, 1, 2, 3, 4, 5, 6})
DEBUG    [action_state:434] ('curr_node_id', 7)
DEBUG    [action_state:457] (7,
 [(3,
   3,
   [(0, 0, [(0, 0, None)]),
    (1, 1, [(1, 1, None)]),
    (2, 2, [(2, 2, None)]),
    (3, 3, None),
    (6, 6, [(4, 4, [(4, 4, None)]), (5, 5, [(5, 5, None)]), (6, 6, None)])])],
 True,
 True,
 True,
 True)
DEBUG    [action_state:503] (7,
 7,
 [(3,
   3,
   [(0, 0, [(0, 0, None)]),
    (1, 1, [(1, 1, None)]),
    (2, 2, [(2, 2, None)]),
    (3, 3, None),
    (6, 6, [(4, 4, [(4, 4, None)]), (5, 5, [(5, 5, None)]), (6, 6, None)])]),
  (7, 7, [(7, 7, None)])])
DEBUG    [action_state:504] [(0, None), (1, (1, {'id': 7, 'label': ',', 'properties': ['lemma', 'upos', 'xpos'], 'values': [',', 'PUNCT', ',']}, [[]]))]
DEBUG    [action_state:517] (7, 7, 9, {8, 12, 18, 21, 31})
DEBUG    [action_state:577] ('token stack',
 [(3,
   'minute',
   [(0, 'In', [(0, 'In', 'In')]),
    (1, 'th

DEBUG    [action_state:579] ('visited states', {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}, {11, 3, 6}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 11})
DEBUG    [action_state:434] ('curr_node_id', 12)
DEBUG    [action_state:457] (12,
 [(3,
   3,
   [(0, 0, [(0, 0, None)]),
    (1, 1, [(1, 1, None)]),
    (2, 2, [(2, 2, None)]),
    (3, 3, None),
    (6, 6, [(4, 4, [(4, 4, None)]), (5, 5, [(5, 5, None)]), (6, 6, None)])]),
  (7, 7, [(7, 7, None)]),
  (8, 8, [(8, 8, None)]),
  (9, 9, None),
  (11, 11, [(10, 10, [(10, 10, None)]), (11, 11, None)])],
 False,
 False,
 True,
 True)
DEBUG    [action_state:503] (12,
 12,
 [(3,
   3,
   [(0, 0, [(0, 0, None)]),
    (1, 1, [(1, 1, None)]),
    (2, 2, [(2, 2, None)]),
    (3, 3, None),
    (6, 6, [(4, 4, [(4, 4, None)]), (5, 5, [(5, 5, None)]), (6, 6, None)])]),
  (7, 7, [(7, 7, None)]),
  (8, 8, [(8, 8, None)]),
  (9, 9, None),
  (11, 11, [(10, 10, [(10, 10, None)]), (11, 11, None)]),
  (12, 12, None)])
DEBUG    [action_s

DEBUG    [action_state:504] [(0, None), (1, (1, {'id': 16, 'label': 'Robert', 'properties': ['lemma', 'upos', 'xpos'], 'values': ['Robert', 'PROPN', 'NNP']}, [[]]))]
DEBUG    [action_state:517] (16, 15, 17, set())
DEBUG    [action_state:577] ('token stack',
 [(3,
   'minute',
   [(0, 'In', [(0, 'In', 'In')]),
    (1, 'the', [(1, 'the', 'the')]),
    (2, 'final', [(2, 'final', 'final')]),
    (3, 'minute', 'minute'),
    (6,
     'game',
     [(4, 'of', [(4, 'of', 'of')]),
      (5, 'the', [(5, 'the', 'the')]),
      (6, 'game', 'game')])]),
  (7, ',', [(7, ',', ',')]),
  (8, 'Johnson', [(8, 'Johnson', 'Johnson')]),
  (9, 'had', 'had'),
  (11, 'ball', [(10, 'the', [(10, 'the', 'the')]), (11, 'ball', 'ball')]),
  (12, 'stolen', 'stolen'),
  (13, 'by', [(13, 'by', 'by')]),
  (14, 'Celtics', [(14, 'Celtics', 'Celtics')]),
  (15, 'center', [(15, 'center', 'center')]),
  (16, 'Robert', [(16, 'Robert', 'Robert')])])
DEBUG    [action_state:579] ('visited states', {0, 1, 2, 3, 4, 5, 6, 7, 8, 9,

DEBUG    [action_state:579] ('visited states', {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}, {3, 6, 11, 12, 17}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17})
DEBUG    [action_state:434] ('curr_node_id', 18)
DEBUG    [action_state:457] (18,
 [(3,
   3,
   [(0, 0, [(0, 0, None)]),
    (1, 1, [(1, 1, None)]),
    (2, 2, [(2, 2, None)]),
    (3, 3, None),
    (6, 6, [(4, 4, [(4, 4, None)]), (5, 5, [(5, 5, None)]), (6, 6, None)])]),
  (7, 7, [(7, 7, None)]),
  (8, 8, [(8, 8, None)]),
  (9, 9, None),
  (12,
   12,
   [(11, 11, [(10, 10, [(10, 10, None)]), (11, 11, None)]),
    (12, 12, None),
    (17,
     17,
     [(13, 13, [(13, 13, None)]),
      (14, 14, [(14, 14, None)]),
      (15, 15, [(15, 15, None)]),
      (16, 16, [(16, 16, None)]),
      (17, 17, None)])])],
 True,
 True,
 True,
 True)
DEBUG    [action_state:503] (18,
 18,
 [(3,
   3,
   [(0, 0, [(0, 0, None)]),
    (1, 1, [(1, 1, No

DEBUG    [action_state:434] ('curr_node_id', 21)
DEBUG    [action_state:457] (21,
 [(3,
   3,
   [(0, 0, [(0, 0, None)]),
    (1, 1, [(1, 1, None)]),
    (2, 2, [(2, 2, None)]),
    (3, 3, None),
    (6, 6, [(4, 4, [(4, 4, None)]), (5, 5, [(5, 5, None)]), (6, 6, None)])]),
  (7, 7, [(7, 7, None)]),
  (8, 8, [(8, 8, None)]),
  (9, 9, None),
  (12,
   12,
   [(11, 11, [(10, 10, [(10, 10, None)]), (11, 11, None)]),
    (12, 12, None),
    (17,
     17,
     [(13, 13, [(13, 13, None)]),
      (14, 14, [(14, 14, None)]),
      (15, 15, [(15, 15, None)]),
      (16, 16, [(16, 16, None)]),
      (17, 17, None)])]),
  (18, 18, [(18, 18, None)]),
  (19, 19, [(19, 19, None)]),
  (20, 20, [(20, 20, None)])],
 False,
 False,
 True,
 True)
DEBUG    [action_state:503] (21,
 21,
 [(3,
   3,
   [(0, 0, [(0, 0, None)]),
    (1, 1, [(1, 1, None)]),
    (2, 2, [(2, 2, None)]),
    (3, 3, None),
    (6, 6, [(4, 4, [(4, 4, None)]), (5, 5, [(5, 5, None)]), (6, 6, None)])]),
  (7, 7, [(7, 7, None)]),
  (8, 8

DEBUG    [action_state:579] ('visited states', {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}, {3, 6, 11, 12, 17}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 22, 23})
DEBUG    [action_state:434] ('curr_node_id', 24)
DEBUG    [action_state:457] (24,
 [(3,
   3,
   [(0, 0, [(0, 0, None)]),
    (1, 1, [(1, 1, None)]),
    (2, 2, [(2, 2, None)]),
    (3, 3, None),
    (6, 6, [(4, 4, [(4, 4, None)]), (5, 5, [(5, 5, None)]), (6, 6, None)])]),
  (7, 7, [(7, 7, None)]),
  (8, 8, [(8, 8, None)]),
  (9, 9, None),
  (12,
   12,
   [(11, 11, [(10, 10, [(10, 10, None)]), (11, 11, None)]),
    (12, 12, None),
    (17,
     17,
     [(13, 13, [(13, 13, None)]),
      (14, 14, [(14, 14, None)]),
      (15, 15, [(15, 15, None)]),
      (16, 16, [(16, 16, None)]),
      (17, 17, None)])]),
  (18, 18, [(18, 18, None)]),
  (19, 19, [(19, 19, None)]),


DEBUG    [action_state:504] [(0, None), (1, (1, {'id': 26, 'label': 'could', 'properties': ['lemma', 'upos', 'xpos'], 'values': ['could', 'AUX', 'MD']}, [[]]))]
DEBUG    [action_state:517] (26, 25, 28, {27, 30})
DEBUG    [action_state:577] ('token stack',
 [(3,
   'minute',
   [(0, 'In', [(0, 'In', 'In')]),
    (1, 'the', [(1, 'the', 'the')]),
    (2, 'final', [(2, 'final', 'final')]),
    (3, 'minute', 'minute'),
    (6,
     'game',
     [(4, 'of', [(4, 'of', 'of')]),
      (5, 'the', [(5, 'the', 'the')]),
      (6, 'game', 'game')])]),
  (7, ',', [(7, ',', ',')]),
  (8, 'Johnson', [(8, 'Johnson', 'Johnson')]),
  (9, 'had', 'had'),
  (12,
   'stolen',
   [(11, 'ball', [(10, 'the', [(10, 'the', 'the')]), (11, 'ball', 'ball')]),
    (12, 'stolen', 'stolen'),
    (17,
     'Parish',
     [(13, 'by', [(13, 'by', 'by')]),
      (14, 'Celtics', [(14, 'Celtics', 'Celtics')]),
      (15, 'center', [(15, 'center', 'center')]),
      (16, 'Robert', [(16, 'Robert', 'Robert')]),
      (17, 'Pari

DEBUG    [action_state:579] ('visited states', {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28}, {3, 6, 11, 12, 17}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 22, 23, 25, 26, 27})
DEBUG    [action_state:434] ('curr_node_id', 29)
DEBUG    [action_state:457] (29,
 [(3,
   3,
   [(0, 0, [(0, 0, None)]),
    (1, 1, [(1, 1, None)]),
    (2, 2, [(2, 2, None)]),
    (3, 3, None),
    (6, 6, [(4, 4, [(4, 4, None)]), (5, 5, [(5, 5, None)]), (6, 6, None)])]),
  (7, 7, [(7, 7, None)]),
  (8, 8, [(8, 8, None)]),
  (9, 9, None),
  (12,
   12,
   [(11, 11, [(10, 10, [(10, 10, None)]), (11, 11, None)]),
    (12, 12, None),
    (17,
     17,
     [(13, 13, [(13, 13, None)]),
      (14, 14, [(14, 14, None)]),
      (15, 15, [(15, 15, None)]),
      (16, 16, [(16, 16, None)]),
      (17, 17, None)])]),
  (18, 1

DEBUG    [action_state:503] (28,
 28,
 [(3,
   3,
   [(0, 0, [(0, 0, None)]),
    (1, 1, [(1, 1, None)]),
    (2, 2, [(2, 2, None)]),
    (3, 3, None),
    (6, 6, [(4, 4, [(4, 4, None)]), (5, 5, [(5, 5, None)]), (6, 6, None)])]),
  (7, 7, [(7, 7, None)]),
  (8, 8, [(8, 8, None)]),
  (9, 9, None),
  (12,
   12,
   [(11, 11, [(10, 10, [(10, 10, None)]), (11, 11, None)]),
    (12, 12, None),
    (17,
     17,
     [(13, 13, [(13, 13, None)]),
      (14, 14, [(14, 14, None)]),
      (15, 15, [(15, 15, None)]),
      (16, 16, [(16, 16, None)]),
      (17, 17, None)])]),
  (18, 18, [(18, 18, None)]),
  (19, 19, [(19, 19, None)]),
  (20, 20, [(20, 20, None)]),
  (21, 21, None),
  (22, 22, [(22, 22, None)]),
  (23, 23, [(23, 23, None)]),
  (24, 24, None),
  (28,
   28,
   [(25, 25, [(25, 25, None)]),
    (26, 26, [(26, 26, None)]),
    (27, 27, [(27, 27, None)]),
    (28, 28, None),
    (30, 30, [(29, 29, [(29, 29, None)]), (30, 30, None)])])])
DEBUG    [action_state:504] [(1, (5, {'id': 28, '

DEBUG    [action_state:503] (21,
 21,
 [(3,
   3,
   [(0, 0, [(0, 0, None)]),
    (1, 1, [(1, 1, None)]),
    (2, 2, [(2, 2, None)]),
    (3, 3, None),
    (6, 6, [(4, 4, [(4, 4, None)]), (5, 5, [(5, 5, None)]), (6, 6, None)])]),
  (7, 7, [(7, 7, None)]),
  (8, 8, [(8, 8, None)]),
  (9, 9, None),
  (12,
   12,
   [(11, 11, [(10, 10, [(10, 10, None)]), (11, 11, None)]),
    (12, 12, None),
    (17,
     17,
     [(13, 13, [(13, 13, None)]),
      (14, 14, [(14, 14, None)]),
      (15, 15, [(15, 15, None)]),
      (16, 16, [(16, 16, None)]),
      (17, 17, None)])]),
  (18, 18, [(18, 18, None)]),
  (21,
   21,
   [(19, 19, [(19, 19, None)]),
    (20, 20, [(20, 20, None)]),
    (21, 21, None),
    (24,
     24,
     [(22, 22, [(22, 22, None)]),
      (23, 23, [(23, 23, None)]),
      (24, 24, None),
      (28,
       28,
       [(25, 25, [(25, 25, None)]),
        (26, 26, [(26, 26, None)]),
        (27, 27, [(27, 27, None)]),
        (28, 28, None),
        (30, 30, [(29, 29, [(29, 29, N

DEBUG    [action_state:503] (31,
 31,
 [(3,
   3,
   [(0, 0, [(0, 0, None)]),
    (1, 1, [(1, 1, None)]),
    (2, 2, [(2, 2, None)]),
    (3, 3, None),
    (6, 6, [(4, 4, [(4, 4, None)]), (5, 5, [(5, 5, None)]), (6, 6, None)])]),
  (7, 7, [(7, 7, None)]),
  (8, 8, [(8, 8, None)]),
  (9, 9, None),
  (12,
   12,
   [(11, 11, [(10, 10, [(10, 10, None)]), (11, 11, None)]),
    (12, 12, None),
    (17,
     17,
     [(13, 13, [(13, 13, None)]),
      (14, 14, [(14, 14, None)]),
      (15, 15, [(15, 15, None)]),
      (16, 16, [(16, 16, None)]),
      (17, 17, None)])]),
  (18, 18, [(18, 18, None)]),
  (21,
   21,
   [(19, 19, [(19, 19, None)]),
    (20, 20, [(20, 20, None)]),
    (21, 21, None),
    (24,
     24,
     [(22, 22, [(22, 22, None)]),
      (23, 23, [(23, 23, None)]),
      (24, 24, None),
      (28,
       28,
       [(25, 25, [(25, 25, None)]),
        (26, 26, [(26, 26, None)]),
        (27, 27, [(27, 27, None)]),
        (28, 28, None),
        (30, 30, [(29, 29, [(29, 29, N

In [38]:
logger.info(args.graphviz_file_template.format(
    framework, dataset, cid))

INFO     [__main__:2] http://localhost:8000/files/proj29_ds1/home/slai/mrp2019/visualization/graphviz/ucca/wiki.mrp/470004.png


In [39]:
mrp_json['input']

'In the final minute of the game, Johnson had the ball stolen by Celtics center Robert Parish, and then missed two free throws that could have won the game.'

In [40]:
mrp_parser_states

[(0,
  [(0, None),
   (1,
    (1, {'id': 0, 'anchors': [{'from': 0, 'to': 2}], 'label': 'In'}, [[]]))],
  [],
  [],
  [],
  [(0, 0, [(0, 0, None)])],
  [(0, 'In', [(0, 'In', 'In')])]),
 (1,
  [(0, None),
   (1,
    (1, {'id': 1, 'anchors': [{'from': 3, 'to': 6}], 'label': 'the'}, [[]]))],
  [],
  [],
  [],
  [(0, 0, [(0, 0, None)]), (1, 1, [(1, 1, None)])],
  [(0, 'In', [(0, 'In', 'In')]), (1, 'the', [(1, 'the', 'the')])]),
 (2,
  [(0, None),
   (1,
    (1,
     {'id': 2, 'anchors': [{'from': 7, 'to': 12}], 'label': 'final'},
     [[]]))],
  [],
  [],
  [],
  [(0, 0, [(0, 0, None)]), (1, 1, [(1, 1, None)]), (2, 2, [(2, 2, None)])],
  [(0, 'In', [(0, 'In', 'In')]),
   (1, 'the', [(1, 'the', 'the')]),
   (2, 'final', [(2, 'final', 'final')])]),
 (3,
  [(0, None),
   (1,
    (1,
     {'id': 3, 'anchors': [{'from': 13, 'to': 19}], 'label': 'minute'},
     [[]]))],
  [14],
  [],
  [31],
  [(0, 0, [(0, 0, None)]),
   (1, 1, [(1, 1, None)]),
   (2, 2, [(2, 2, None)]),
   (3, 3, [(3, 3, None)]

In [41]:
[(node['id'], node.get('label')) for node in mrp_json['nodes']]

[(0, 'In'),
 (1, 'the'),
 (2, 'final'),
 (3, 'minute'),
 (4, 'of'),
 (5, 'the'),
 (6, 'game'),
 (7, ','),
 (8, 'Johnson'),
 (9, 'had'),
 (10, 'the'),
 (11, 'ball'),
 (12, 'stolen'),
 (13, 'by'),
 (14, 'Celtics'),
 (15, 'center'),
 (16, 'RobertParish'),
 (17, ','),
 (18, 'and'),
 (19, 'then'),
 (20, 'missed'),
 (21, 'two'),
 (22, 'free'),
 (23, 'throws'),
 (24, 'that'),
 (25, 'could'),
 (26, 'have'),
 (27, 'won'),
 (28, 'the'),
 (29, 'game'),
 (30, '.'),
 (31, None),
 (32, None),
 (33, None),
 (34, None),
 (35, None),
 (36, None),
 (37, None),
 (38, None),
 (39, None),
 (40, None),
 (41, None)]

In [42]:
doc

'In the final minute of the game, Johnson had the ball stolen by Celtics center Robert Parish, and then missed two free throws that could have won the game.'

In [43]:
parse_json['nodes']

[{'id': 0,
  'label': 'In',
  'properties': ['lemma', 'upos', 'xpos'],
  'values': ['in', 'ADP', 'IN']},
 {'id': 1,
  'label': 'the',
  'properties': ['lemma', 'upos', 'xpos'],
  'values': ['the', 'DET', 'DT']},
 {'id': 2,
  'label': 'final',
  'properties': ['lemma', 'upos', 'xpos'],
  'values': ['final', 'ADJ', 'JJ']},
 {'id': 3,
  'label': 'minute',
  'properties': ['lemma', 'upos', 'xpos'],
  'values': ['minute', 'NOUN', 'NN']},
 {'id': 4,
  'label': 'of',
  'properties': ['lemma', 'upos', 'xpos'],
  'values': ['of', 'ADP', 'IN']},
 {'id': 5,
  'label': 'the',
  'properties': ['lemma', 'upos', 'xpos'],
  'values': ['the', 'DET', 'DT']},
 {'id': 6,
  'label': 'game',
  'properties': ['lemma', 'upos', 'xpos'],
  'values': ['game', 'NOUN', 'NN']},
 {'id': 7,
  'label': ',',
  'properties': ['lemma', 'upos', 'xpos'],
  'values': [',', 'PUNCT', ',']},
 {'id': 8,
  'label': 'Johnson',
  'properties': ['lemma', 'upos', 'xpos'],
  'values': ['Johnson', 'PROPN', 'NNP']},
 {'id': 9,
  'label

In [44]:
[(node['id'], node['label']) for node in parse_json['nodes']]

[(0, 'In'),
 (1, 'the'),
 (2, 'final'),
 (3, 'minute'),
 (4, 'of'),
 (5, 'the'),
 (6, 'game'),
 (7, ','),
 (8, 'Johnson'),
 (9, 'had'),
 (10, 'the'),
 (11, 'ball'),
 (12, 'stolen'),
 (13, 'by'),
 (14, 'Celtics'),
 (15, 'center'),
 (16, 'Robert'),
 (17, 'Parish'),
 (18, ','),
 (19, 'and'),
 (20, 'then'),
 (21, 'missed'),
 (22, 'two'),
 (23, 'free'),
 (24, 'throws'),
 (25, 'that'),
 (26, 'could'),
 (27, 'have'),
 (28, 'won'),
 (29, 'the'),
 (30, 'game'),
 (31, '.')]

In [45]:
anchors

[(0, 2),
 (3, 6),
 (7, 12),
 (13, 19),
 (20, 22),
 (23, 26),
 (27, 31),
 (31, 32),
 (33, 40),
 (41, 44),
 (45, 48),
 (49, 53),
 (54, 60),
 (61, 63),
 (64, 71),
 (72, 78),
 (79, 85),
 (86, 92),
 (92, 93),
 (94, 97),
 (98, 102),
 (103, 109),
 (110, 113),
 (114, 118),
 (119, 125),
 (126, 130),
 (131, 136),
 (137, 141),
 (142, 145),
 (146, 149),
 (150, 154),
 (154, 155)]

### Create training instance

In [46]:
total_count = 0
with_parse_count = 0
data_size_limit = 10
ignore_framework_set = {'amr', 'dm', 'psd', 'eds'}

In [47]:
allennlp_tests_fixtures_output_file = os.path.join(
    args.project_root, args.mrp_test_dir, args.tests_fixtures_file)
allennlp_train_output_file = os.path.join(
    args.project_root, args.allennlp_mrp_json_file_template.format('train'))
allennlp_test_output_file = os.path.join(
    args.project_root, args.allennlp_mrp_json_file_template.format('test'))

In [48]:
# Create tests fixture jsonl
fixture_combinations = [
    ('ucca', 'wiki', 70)
]

with open(allennlp_tests_fixtures_output_file, 'w') as wf:
    for framework, dataset, idx in fixture_combinations:
        mrp_json = framework2dataset2mrp_jsons[framework][dataset][idx]
        cid = mrp_json.get('id')
        doc = mrp_json.get('input')
        
        alignment = {}
        if framework == 'amr':
            alignment = cid2alignment[cid]  
        parse_json = dataset2cid2parse_json.get(dataset, {}).get(cid, {})

        if parse_json:
            with_parse_count += 1
            mrp_parser_states, mrp_meta_data = mrp_json2parser_states(
                mrp_json, 
                tokenized_parse_nodes=parse_json['nodes'],
                alignment=alignment,
            )
            companion_parser_states, companion_meta_data = mrp_json2parser_states(
                parse_json, 
                mrp_doc=doc,
                tokenized_parse_nodes=parse_json['nodes'],
            )

            data_instance = {
                'mrp_json': mrp_json,
                'parse_json': parse_json,
                'mrp_parser_states': mrp_parser_states,
                'mrp_meta_data': mrp_meta_data,
                'companion_parser_states': companion_parser_states,
                'companion_meta_data': companion_meta_data,
            }
            json_encoded_instance = json.dumps(data_instance)
            wf.write(json_encoded_instance + '\n')

DEBUG    [action_state:60] ('remote 1', 3)
DEBUG    [action_state:60] ('remote 1', 10)
DEBUG    [action_state:60] ('remote 1', 11)
DEBUG    [action_state:207] {34}
DEBUG    [action_state:207] {33, 39, 17, 18, 19}
DEBUG    [action_state:207] {32, 35, 36, 38, 7, 8, 9, 40, 12, 24}
DEBUG    [action_state:207] {4, 5, 6, 37, 41, 10, 11, 13, 16, 20, 21, 22, 23, 25, 26, 27, 31}
DEBUG    [action_state:207] {0, 1, 2, 3, 14, 15, 28, 29, 30}
DEBUG    [action_state:85] ('remote 2', 3)
DEBUG    [action_state:85] ('remote 2', 10)
DEBUG    [action_state:85] ('remote 2', 11)
DEBUG    [action_state:415] ('prev anchors', 0)
DEBUG    [action_state:428] ('anchors', 0, 2, 0, 1)
DEBUG    [action_state:434] ('curr_node_id', 0)
DEBUG    [action_state:457] (0, [], True, True, True, True)
DEBUG    [action_state:503] (0, 0, [(0, 0, [(0, 0, None)])])
DEBUG    [action_state:504] [(0, None), (1, (1, {'id': 0, 'anchors': [{'from': 0, 'to': 2}], 'label': 'In'}, [[]]))]
DEBUG    [action_state:517] (0, 31, 31, {1, 2, 3}

DEBUG    [action_state:503] (32,
 32,
 [(32,
   32,
   [(31,
     31,
     [(0, 0, [(0, 0, None)]),
      (1, 1, [(1, 1, None)]),
      (2, 2, [(2, 2, None)]),
      (3, 3, [(3, 3, None)])]),
    (4, 4, [(4, 4, None)]),
    (5, 5, [(5, 5, None)]),
    (6, 6, [(6, 6, None)])])])
DEBUG    [action_state:504] [(1, (4, {'id': 32}, [[{'source': 32, 'target': 31, 'label': 'E', 'id': 42, 'parent': 32, 'child': 31}], [{'source': 32, 'target': 4, 'label': 'R', 'id': 0, 'parent': 32, 'child': 4}], [{'source': 32, 'target': 5, 'label': 'E', 'id': 5, 'parent': 32, 'child': 5}], [{'source': 32, 'target': 6, 'label': 'C', 'id': 18, 'parent': 32, 'child': 6}]]))]
DEBUG    [action_state:517] (32, 41, 33, {35, 36, 7, 8, 9, 12})
DEBUG    [action_state:577] ('token stack',
 [(32,
   '',
   [(31,
     '',
     [(0, 'In', [(0, 'In', 'In')]),
      (1, 'the', [(1, 'the', 'the')]),
      (2, 'final', [(2, 'final', 'final')]),
      (3, 'minute', [(3, 'minute', 'minute')])]),
    (4, 'of', [(4, 'of', 'of')]),


DEBUG    [action_state:503] (11,
 11,
 [(32,
   32,
   [(31,
     31,
     [(0, 0, [(0, 0, None)]),
      (1, 1, [(1, 1, None)]),
      (2, 2, [(2, 2, None)]),
      (3, 3, [(3, 3, None)])]),
    (4, 4, [(4, 4, None)]),
    (5, 5, [(5, 5, None)]),
    (6, 6, [(6, 6, None)])]),
  (7, 7, [(7, 7, None)]),
  (8, 8, [(8, 8, None)]),
  (9, 9, [(9, 9, None)]),
  (10, 10, [(10, 10, None)]),
  (11, 11, [(11, 11, None)])])
DEBUG    [action_state:504] [(0, None), (1, (1, {'id': 11, 'anchors': [{'from': 49, 'to': 53}], 'label': 'ball'}, [[]]))]
DEBUG    [action_state:517] (11, 9, 35, set())
DEBUG    [action_state:577] ('token stack',
 [(32,
   '',
   [(31,
     '',
     [(0, 'In', [(0, 'In', 'In')]),
      (1, 'the', [(1, 'the', 'the')]),
      (2, 'final', [(2, 'final', 'final')]),
      (3, 'minute', [(3, 'minute', 'minute')])]),
    (4, 'of', [(4, 'of', 'of')]),
    (5, 'the', [(5, 'the', 'the')]),
    (6, 'game', [(6, 'game', 'game')])]),
  (7, ',', [(7, ',', ',')]),
  (8, 'Johnson', [(8, 'Joh

DEBUG    [action_state:503] (14,
 14,
 [(32,
   32,
   [(31,
     31,
     [(0, 0, [(0, 0, None)]),
      (1, 1, [(1, 1, None)]),
      (2, 2, [(2, 2, None)]),
      (3, 3, [(3, 3, None)])]),
    (4, 4, [(4, 4, None)]),
    (5, 5, [(5, 5, None)]),
    (6, 6, [(6, 6, None)])]),
  (7, 7, [(7, 7, None)]),
  (8, 8, [(8, 8, None)]),
  (9, 9, [(9, 9, None)]),
  (35, 35, [(10, 10, [(10, 10, None)]), (11, 11, [(11, 11, None)])]),
  (12, 12, [(12, 12, None)]),
  (13, 13, [(13, 13, None)]),
  (14, 14, [(14, 14, None)])])
DEBUG    [action_state:504] [(0, None), (1, (1, {'id': 14, 'anchors': [{'from': 64, 'to': 71}], 'label': 'Celtics'}, [[]]))]
DEBUG    [action_state:517] (14, 26, 37, {15})
DEBUG    [action_state:577] ('token stack',
 [(32,
   '',
   [(31,
     '',
     [(0, 'In', [(0, 'In', 'In')]),
      (1, 'the', [(1, 'the', 'the')]),
      (2, 'final', [(2, 'final', 'final')]),
      (3, 'minute', [(3, 'minute', 'minute')])]),
    (4, 'of', [(4, 'of', 'of')]),
    (5, 'the', [(5, 'the', 'the

DEBUG    [action_state:579] ('visited states', {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 31, 32, 35, 36, 37}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 31, 32, 35, 37}, {32, 35, 36, 37, 31}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 31, 32, 35, 37})
DEBUG    [action_state:434] ('curr_node_id', 36)
DEBUG    [action_state:457] (36,
 [(32,
   32,
   [(31,
     31,
     [(0, 0, [(0, 0, None)]),
      (1, 1, [(1, 1, None)]),
      (2, 2, [(2, 2, None)]),
      (3, 3, [(3, 3, None)])]),
    (4, 4, [(4, 4, None)]),
    (5, 5, [(5, 5, None)]),
    (6, 6, [(6, 6, None)])]),
  (7, 7, [(7, 7, None)]),
  (8, 8, [(8, 8, None)]),
  (9, 9, [(9, 9, None)]),
  (35, 35, [(10, 10, [(10, 10, None)]), (11, 11, [(11, 11, None)])]),
  (12, 12, [(12, 12, None)]),
  (13, 13, [(13, 13, None)]),
  (37, 37, [(14, 14, [(14, 14, None)]), (15, 15, [(15, 15, None)])]),
  (16, 16, [(16, 16, None)])],
 False,
 True,
 False,
 True)
DEBUG    [action_state:503] (36,
 36,


DEBUG    [action_state:579] ('visited states', {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 31, 32, 33, 35, 36, 37}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 31, 32, 33, 35, 36, 37}, {32, 33, 35, 36, 37, 31}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 31, 32, 33, 35, 36, 37})
DEBUG    [action_state:415] ('prev anchors', 19)
DEBUG    [action_state:428] ('anchors', 94, 97, 19, 20)
DEBUG    [action_state:434] ('curr_node_id', 18)
DEBUG    [action_state:457] (18,
 [(33,
   33,
   [(32,
     32,
     [(31,
       31,
       [(0, 0, [(0, 0, None)]),
        (1, 1, [(1, 1, None)]),
        (2, 2, [(2, 2, None)]),
        (3, 3, [(3, 3, None)])]),
      (4, 4, [(4, 4, None)]),
      (5, 5, [(5, 5, None)]),
      (6, 6, [(6, 6, None)])]),
    (7, 7, [(7, 7, None)]),
    (8, 8, [(8, 8, None)]),
    (9, 9, [(9, 9, None)]),
    (35, 35, [(10, 10, [(10, 10, None)]), (11, 11, [(11, 11, None)])]),
    (12, 12, [(12, 12, None)]),
    (36,
   

DEBUG    [action_state:504] [(0, None), (1, (1, {'id': 20, 'anchors': [{'from': 103, 'to': 109}], 'label': 'missed'}, [[]]))]
DEBUG    [action_state:517] (20, 7, 38, {21, 22, 23})
DEBUG    [action_state:577] ('token stack',
 [(33,
   '',
   [(32,
     '',
     [(31,
       '',
       [(0, 'In', [(0, 'In', 'In')]),
        (1, 'the', [(1, 'the', 'the')]),
        (2, 'final', [(2, 'final', 'final')]),
        (3, 'minute', [(3, 'minute', 'minute')])]),
      (4, 'of', [(4, 'of', 'of')]),
      (5, 'the', [(5, 'the', 'the')]),
      (6, 'game', [(6, 'game', 'game')])]),
    (7, ',', [(7, ',', ',')]),
    (8, 'Johnson', [(8, 'Johnson', 'Johnson')]),
    (9, 'had', [(9, 'had', 'had')]),
    (35,
     '',
     [(10, 'the', [(10, 'the', 'the')]), (11, 'ball', [(11, 'ball', 'ball')])]),
    (12, 'stolen', [(12, 'stolen', 'stolen')]),
    (36,
     '',
     [(13, 'by', [(13, 'by', 'by')]),
      (37,
       '',
       [(14, 'Celtics', [(14, 'Celtics', 'Celtics')]),
        (15, 'center', [(15,

DEBUG    [action_state:579] ('visited states', {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 31, 32, 33, 35, 36, 37}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 31, 32, 33, 35, 36, 37}, {32, 33, 35, 36, 37, 31}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 31, 32, 33, 35, 36, 37})
DEBUG    [action_state:415] ('prev anchors', 24)
DEBUG    [action_state:428] ('anchors', 119, 125, 24, 25)
DEBUG    [action_state:434] ('curr_node_id', 23)
DEBUG    [action_state:457] (23,
 [(33,
   33,
   [(32,
     32,
     [(31,
       31,
       [(0, 0, [(0, 0, None)]),
        (1, 1, [(1, 1, None)]),
        (2, 2, [(2, 2, None)]),
        (3, 3, [(3, 3, None)])]),
      (4, 4, [(4, 4, None)]),
      (5, 5, [(5, 5, None)]),
      (6, 6, [(6, 6, None)])]),
    (7, 7, [(7, 7, None)]),
    (8, 8, [(8, 8, None)]),
    (9, 9, [(9, 9, None)]),
    (35, 35, [(10, 10, [(10, 10, None)]), (11, 11, [(1

DEBUG    [action_state:503] (24,
 24,
 [(33,
   33,
   [(32,
     32,
     [(31,
       31,
       [(0, 0, [(0, 0, None)]),
        (1, 1, [(1, 1, None)]),
        (2, 2, [(2, 2, None)]),
        (3, 3, [(3, 3, None)])]),
      (4, 4, [(4, 4, None)]),
      (5, 5, [(5, 5, None)]),
      (6, 6, [(6, 6, None)])]),
    (7, 7, [(7, 7, None)]),
    (8, 8, [(8, 8, None)]),
    (9, 9, [(9, 9, None)]),
    (35, 35, [(10, 10, [(10, 10, None)]), (11, 11, [(11, 11, None)])]),
    (12, 12, [(12, 12, None)]),
    (36,
     36,
     [(13, 13, [(13, 13, None)]),
      (37, 37, [(14, 14, [(14, 14, None)]), (15, 15, [(15, 15, None)])]),
      (16, 16, [(16, 16, None)])])]),
  (17, 17, [(17, 17, None)]),
  (18, 18, [(18, 18, None)]),
  (19, 19, [(19, 19, None)]),
  (38,
   38,
   [(20, 20, [(20, 20, None)]),
    (21, 21, [(21, 21, None)]),
    (22, 22, [(22, 22, None)]),
    (23, 23, [(23, 23, None)])]),
  (24, 24, [(24, 24, None)])])
DEBUG    [action_state:504] [(0, None), (1, (1, {'id': 24, 'anchors':

DEBUG    [action_state:504] [(0, None), (1, (1, {'id': 26, 'anchors': [{'from': 137, 'to': 141}], 'label': 'have'}, [[]]))]
DEBUG    [action_state:517] (26, 32, 40, {41, 27})
DEBUG    [action_state:577] ('token stack',
 [(33,
   '',
   [(32,
     '',
     [(31,
       '',
       [(0, 'In', [(0, 'In', 'In')]),
        (1, 'the', [(1, 'the', 'the')]),
        (2, 'final', [(2, 'final', 'final')]),
        (3, 'minute', [(3, 'minute', 'minute')])]),
      (4, 'of', [(4, 'of', 'of')]),
      (5, 'the', [(5, 'the', 'the')]),
      (6, 'game', [(6, 'game', 'game')])]),
    (7, ',', [(7, ',', ',')]),
    (8, 'Johnson', [(8, 'Johnson', 'Johnson')]),
    (9, 'had', [(9, 'had', 'had')]),
    (35,
     '',
     [(10, 'the', [(10, 'the', 'the')]), (11, 'ball', [(11, 'ball', 'ball')])]),
    (12, 'stolen', [(12, 'stolen', 'stolen')]),
    (36,
     '',
     [(13, 'by', [(13, 'by', 'by')]),
      (37,
       '',
       [(14, 'Celtics', [(14, 'Celtics', 'Celtics')]),
        (15, 'center', [(15, 'cen

DEBUG    [action_state:504] [(0, None), (1, (1, {'id': 28, 'anchors': [{'from': 146, 'to': 149}], 'label': 'the'}, [[]]))]
DEBUG    [action_state:517] (28, 15, 41, {29, 30})
DEBUG    [action_state:577] ('token stack',
 [(33,
   '',
   [(32,
     '',
     [(31,
       '',
       [(0, 'In', [(0, 'In', 'In')]),
        (1, 'the', [(1, 'the', 'the')]),
        (2, 'final', [(2, 'final', 'final')]),
        (3, 'minute', [(3, 'minute', 'minute')])]),
      (4, 'of', [(4, 'of', 'of')]),
      (5, 'the', [(5, 'the', 'the')]),
      (6, 'game', [(6, 'game', 'game')])]),
    (7, ',', [(7, ',', ',')]),
    (8, 'Johnson', [(8, 'Johnson', 'Johnson')]),
    (9, 'had', [(9, 'had', 'had')]),
    (35,
     '',
     [(10, 'the', [(10, 'the', 'the')]), (11, 'ball', [(11, 'ball', 'ball')])]),
    (12, 'stolen', [(12, 'stolen', 'stolen')]),
    (36,
     '',
     [(13, 'by', [(13, 'by', 'by')]),
      (37,
       '',
       [(14, 'Celtics', [(14, 'Celtics', 'Celtics')]),
        (15, 'center', [(15, 'cent

DEBUG    [action_state:504] [(0, None), (1, (1, {'id': 30, 'anchors': [{'from': 154, 'to': 155}], 'label': '.'}, [[]]))]
DEBUG    [action_state:517] (30, 17, 41, set())
DEBUG    [action_state:577] ('token stack',
 [(33,
   '',
   [(32,
     '',
     [(31,
       '',
       [(0, 'In', [(0, 'In', 'In')]),
        (1, 'the', [(1, 'the', 'the')]),
        (2, 'final', [(2, 'final', 'final')]),
        (3, 'minute', [(3, 'minute', 'minute')])]),
      (4, 'of', [(4, 'of', 'of')]),
      (5, 'the', [(5, 'the', 'the')]),
      (6, 'game', [(6, 'game', 'game')])]),
    (7, ',', [(7, ',', ',')]),
    (8, 'Johnson', [(8, 'Johnson', 'Johnson')]),
    (9, 'had', [(9, 'had', 'had')]),
    (35,
     '',
     [(10, 'the', [(10, 'the', 'the')]), (11, 'ball', [(11, 'ball', 'ball')])]),
    (12, 'stolen', [(12, 'stolen', 'stolen')]),
    (36,
     '',
     [(13, 'by', [(13, 'by', 'by')]),
      (37,
       '',
       [(14, 'Celtics', [(14, 'Celtics', 'Celtics')]),
        (15, 'center', [(15, 'center', 

DEBUG    [action_state:504] [(1, (4, {'id': 40}, [[{'source': 40, 'target': 25, 'label': 'D', 'id': 40, 'parent': 40, 'child': 25}], [{'source': 40, 'target': 26, 'label': 'F', 'id': 32, 'parent': 40, 'child': 26}], [{'source': 40, 'target': 27, 'label': 'P', 'id': 2, 'parent': 40, 'child': 27}], [{'source': 40, 'target': 41, 'label': 'A', 'id': 16, 'parent': 40, 'child': 41}]]))]
DEBUG    [action_state:517] (40, 43, 39, set())
DEBUG    [action_state:577] ('token stack',
 [(33,
   '',
   [(32,
     '',
     [(31,
       '',
       [(0, 'In', [(0, 'In', 'In')]),
        (1, 'the', [(1, 'the', 'the')]),
        (2, 'final', [(2, 'final', 'final')]),
        (3, 'minute', [(3, 'minute', 'minute')])]),
      (4, 'of', [(4, 'of', 'of')]),
      (5, 'the', [(5, 'the', 'the')]),
      (6, 'game', [(6, 'game', 'game')])]),
    (7, ',', [(7, ',', ',')]),
    (8, 'Johnson', [(8, 'Johnson', 'Johnson')]),
    (9, 'had', [(9, 'had', 'had')]),
    (35,
     '',
     [(10, 'the', [(10, 'the', 'the')]

DEBUG    [action_state:503] (34,
 34,
 [(34,
   34,
   [(33,
     33,
     [(32,
       32,
       [(31,
         31,
         [(0, 0, [(0, 0, None)]),
          (1, 1, [(1, 1, None)]),
          (2, 2, [(2, 2, None)]),
          (3, 3, [(3, 3, None)])]),
        (4, 4, [(4, 4, None)]),
        (5, 5, [(5, 5, None)]),
        (6, 6, [(6, 6, None)])]),
      (7, 7, [(7, 7, None)]),
      (8, 8, [(8, 8, None)]),
      (9, 9, [(9, 9, None)]),
      (35, 35, [(10, 10, [(10, 10, None)]), (11, 11, [(11, 11, None)])]),
      (12, 12, [(12, 12, None)]),
      (36,
       36,
       [(13, 13, [(13, 13, None)]),
        (37, 37, [(14, 14, [(14, 14, None)]), (15, 15, [(15, 15, None)])]),
        (16, 16, [(16, 16, None)])])]),
    (17, 17, [(17, 17, None)]),
    (18, 18, [(18, 18, None)]),
    (19, 19, [(19, 19, None)]),
    (39,
     39,
     [(38,
       38,
       [(20, 20, [(20, 20, None)]),
        (21, 21, [(21, 21, None)]),
        (22, 22, [(22, 22, None)]),
        (23, 23, [(23, 23, Non

DEBUG    [action_state:517] (5, 5, 6, set())
DEBUG    [action_state:577] ('token stack',
 [(0, 'In', [(0, 'In', 'In')]),
  (1, 'the', [(1, 'the', 'the')]),
  (2, 'final', [(2, 'final', 'final')]),
  (3, 'minute', 'minute'),
  (4, 'of', [(4, 'of', 'of')]),
  (5, 'the', [(5, 'the', 'the')])])
DEBUG    [action_state:579] ('visited states', {0, 1, 2, 3, 4, 5, 6}, {0, 1, 2, 3, 4, 5}, {6}, {0, 1, 2, 4, 5})
DEBUG    [action_state:434] ('curr_node_id', 6)
DEBUG    [action_state:457] (6,
 [(0, 0, [(0, 0, None)]),
  (1, 1, [(1, 1, None)]),
  (2, 2, [(2, 2, None)]),
  (3, 3, None),
  (4, 4, [(4, 4, None)]),
  (5, 5, [(5, 5, None)])],
 False,
 True,
 False,
 True)
DEBUG    [action_state:503] (6,
 6,
 [(0, 0, [(0, 0, None)]),
  (1, 1, [(1, 1, None)]),
  (2, 2, [(2, 2, None)]),
  (3, 3, None),
  (6, 6, [(4, 4, [(4, 4, None)]), (5, 5, [(5, 5, None)]), (6, 6, None)])])
DEBUG    [action_state:504] [(0, None), (1, (3, {'id': 6, 'label': 'game', 'properties': ['lemma', 'upos', 'xpos'], 'values': ['game',

DEBUG    [action_state:503] (10,
 10,
 [(3,
   3,
   [(0, 0, [(0, 0, None)]),
    (1, 1, [(1, 1, None)]),
    (2, 2, [(2, 2, None)]),
    (3, 3, None),
    (6, 6, [(4, 4, [(4, 4, None)]), (5, 5, [(5, 5, None)]), (6, 6, None)])]),
  (7, 7, [(7, 7, None)]),
  (8, 8, [(8, 8, None)]),
  (9, 9, None),
  (10, 10, [(10, 10, None)])])
DEBUG    [action_state:504] [(0, None), (1, (1, {'id': 10, 'label': 'the', 'properties': ['lemma', 'upos', 'xpos'], 'values': ['the', 'DET', 'DT']}, [[]]))]
DEBUG    [action_state:517] (10, 9, 11, set())
DEBUG    [action_state:577] ('token stack',
 [(3,
   'minute',
   [(0, 'In', [(0, 'In', 'In')]),
    (1, 'the', [(1, 'the', 'the')]),
    (2, 'final', [(2, 'final', 'final')]),
    (3, 'minute', 'minute'),
    (6,
     'game',
     [(4, 'of', [(4, 'of', 'of')]),
      (5, 'the', [(5, 'the', 'the')]),
      (6, 'game', 'game')])]),
  (7, ',', [(7, ',', ',')]),
  (8, 'Johnson', [(8, 'Johnson', 'Johnson')]),
  (9, 'had', 'had'),
  (10, 'the', [(10, 'the', 'the')])])

DEBUG    [action_state:503] (14,
 14,
 [(3,
   3,
   [(0, 0, [(0, 0, None)]),
    (1, 1, [(1, 1, None)]),
    (2, 2, [(2, 2, None)]),
    (3, 3, None),
    (6, 6, [(4, 4, [(4, 4, None)]), (5, 5, [(5, 5, None)]), (6, 6, None)])]),
  (7, 7, [(7, 7, None)]),
  (8, 8, [(8, 8, None)]),
  (9, 9, None),
  (11, 11, [(10, 10, [(10, 10, None)]), (11, 11, None)]),
  (12, 12, None),
  (13, 13, [(13, 13, None)]),
  (14, 14, [(14, 14, None)])])
DEBUG    [action_state:504] [(0, None), (1, (1, {'id': 14, 'label': 'Celtics', 'properties': ['lemma', 'upos', 'xpos'], 'values': ['Celtics', 'PROPN', 'NNPS']}, [[]]))]
DEBUG    [action_state:517] (14, 13, 17, {16, 15})
DEBUG    [action_state:577] ('token stack',
 [(3,
   'minute',
   [(0, 'In', [(0, 'In', 'In')]),
    (1, 'the', [(1, 'the', 'the')]),
    (2, 'final', [(2, 'final', 'final')]),
    (3, 'minute', 'minute'),
    (6,
     'game',
     [(4, 'of', [(4, 'of', 'of')]),
      (5, 'the', [(5, 'the', 'the')]),
      (6, 'game', 'game')])]),
  (7, ',', [

DEBUG    [action_state:434] ('curr_node_id', 12)
DEBUG    [action_state:457] (12,
 [(3,
   3,
   [(0, 0, [(0, 0, None)]),
    (1, 1, [(1, 1, None)]),
    (2, 2, [(2, 2, None)]),
    (3, 3, None),
    (6, 6, [(4, 4, [(4, 4, None)]), (5, 5, [(5, 5, None)]), (6, 6, None)])]),
  (7, 7, [(7, 7, None)]),
  (8, 8, [(8, 8, None)]),
  (9, 9, None),
  (11, 11, [(10, 10, [(10, 10, None)]), (11, 11, None)]),
  (12, 12, None),
  (17,
   17,
   [(13, 13, [(13, 13, None)]),
    (14, 14, [(14, 14, None)]),
    (15, 15, [(15, 15, None)]),
    (16, 16, [(16, 16, None)]),
    (17, 17, None)])],
 False,
 True,
 False,
 True)
DEBUG    [action_state:503] (12,
 12,
 [(3,
   3,
   [(0, 0, [(0, 0, None)]),
    (1, 1, [(1, 1, None)]),
    (2, 2, [(2, 2, None)]),
    (3, 3, None),
    (6, 6, [(4, 4, [(4, 4, None)]), (5, 5, [(5, 5, None)]), (6, 6, None)])]),
  (7, 7, [(7, 7, None)]),
  (8, 8, [(8, 8, None)]),
  (9, 9, None),
  (12,
   12,
   [(11, 11, [(10, 10, [(10, 10, None)]), (11, 11, None)]),
    (12, 12, No

DEBUG    [action_state:503] (19,
 19,
 [(3,
   3,
   [(0, 0, [(0, 0, None)]),
    (1, 1, [(1, 1, None)]),
    (2, 2, [(2, 2, None)]),
    (3, 3, None),
    (6, 6, [(4, 4, [(4, 4, None)]), (5, 5, [(5, 5, None)]), (6, 6, None)])]),
  (7, 7, [(7, 7, None)]),
  (8, 8, [(8, 8, None)]),
  (9, 9, None),
  (12,
   12,
   [(11, 11, [(10, 10, [(10, 10, None)]), (11, 11, None)]),
    (12, 12, None),
    (17,
     17,
     [(13, 13, [(13, 13, None)]),
      (14, 14, [(14, 14, None)]),
      (15, 15, [(15, 15, None)]),
      (16, 16, [(16, 16, None)]),
      (17, 17, None)])]),
  (18, 18, [(18, 18, None)]),
  (19, 19, [(19, 19, None)])])
DEBUG    [action_state:504] [(0, None), (1, (1, {'id': 19, 'label': 'and', 'properties': ['lemma', 'upos', 'xpos'], 'values': ['and', 'CONJ', 'CC']}, [[]]))]
DEBUG    [action_state:517] (19, 18, 21, {24, 20})
DEBUG    [action_state:577] ('token stack',
 [(3,
   'minute',
   [(0, 'In', [(0, 'In', 'In')]),
    (1, 'the', [(1, 'the', 'the')]),
    (2, 'final', [(2, 'f

DEBUG    [action_state:503] (22,
 22,
 [(3,
   3,
   [(0, 0, [(0, 0, None)]),
    (1, 1, [(1, 1, None)]),
    (2, 2, [(2, 2, None)]),
    (3, 3, None),
    (6, 6, [(4, 4, [(4, 4, None)]), (5, 5, [(5, 5, None)]), (6, 6, None)])]),
  (7, 7, [(7, 7, None)]),
  (8, 8, [(8, 8, None)]),
  (9, 9, None),
  (12,
   12,
   [(11, 11, [(10, 10, [(10, 10, None)]), (11, 11, None)]),
    (12, 12, None),
    (17,
     17,
     [(13, 13, [(13, 13, None)]),
      (14, 14, [(14, 14, None)]),
      (15, 15, [(15, 15, None)]),
      (16, 16, [(16, 16, None)]),
      (17, 17, None)])]),
  (18, 18, [(18, 18, None)]),
  (19, 19, [(19, 19, None)]),
  (20, 20, [(20, 20, None)]),
  (21, 21, None),
  (22, 22, [(22, 22, None)])])
DEBUG    [action_state:504] [(0, None), (1, (1, {'id': 22, 'label': 'two', 'properties': ['lemma', 'upos', 'xpos'], 'values': ['two', 'NUM', 'CD']}, [[]]))]
DEBUG    [action_state:517] (22, 21, 24, {28, 23})
DEBUG    [action_state:577] ('token stack',
 [(3,
   'minute',
   [(0, 'In', [(0,

DEBUG    [action_state:434] ('curr_node_id', 25)
DEBUG    [action_state:457] (25,
 [(3,
   3,
   [(0, 0, [(0, 0, None)]),
    (1, 1, [(1, 1, None)]),
    (2, 2, [(2, 2, None)]),
    (3, 3, None),
    (6, 6, [(4, 4, [(4, 4, None)]), (5, 5, [(5, 5, None)]), (6, 6, None)])]),
  (7, 7, [(7, 7, None)]),
  (8, 8, [(8, 8, None)]),
  (9, 9, None),
  (12,
   12,
   [(11, 11, [(10, 10, [(10, 10, None)]), (11, 11, None)]),
    (12, 12, None),
    (17,
     17,
     [(13, 13, [(13, 13, None)]),
      (14, 14, [(14, 14, None)]),
      (15, 15, [(15, 15, None)]),
      (16, 16, [(16, 16, None)]),
      (17, 17, None)])]),
  (18, 18, [(18, 18, None)]),
  (19, 19, [(19, 19, None)]),
  (20, 20, [(20, 20, None)]),
  (21, 21, None),
  (22, 22, [(22, 22, None)]),
  (23, 23, [(23, 23, None)]),
  (24, 24, None)],
 True,
 True,
 True,
 True)
DEBUG    [action_state:503] (25,
 25,
 [(3,
   3,
   [(0, 0, [(0, 0, None)]),
    (1, 1, [(1, 1, None)]),
    (2, 2, [(2, 2, None)]),
    (3, 3, None),
    (6, 6, [(4, 4

DEBUG    [action_state:504] [(0, None), (1, (1, {'id': 27, 'label': 'have', 'properties': ['lemma', 'upos', 'xpos'], 'values': ['have', 'AUX', 'VB']}, [[]]))]
DEBUG    [action_state:517] (27, 26, 28, {30})
DEBUG    [action_state:577] ('token stack',
 [(3,
   'minute',
   [(0, 'In', [(0, 'In', 'In')]),
    (1, 'the', [(1, 'the', 'the')]),
    (2, 'final', [(2, 'final', 'final')]),
    (3, 'minute', 'minute'),
    (6,
     'game',
     [(4, 'of', [(4, 'of', 'of')]),
      (5, 'the', [(5, 'the', 'the')]),
      (6, 'game', 'game')])]),
  (7, ',', [(7, ',', ',')]),
  (8, 'Johnson', [(8, 'Johnson', 'Johnson')]),
  (9, 'had', 'had'),
  (12,
   'stolen',
   [(11, 'ball', [(10, 'the', [(10, 'the', 'the')]), (11, 'ball', 'ball')]),
    (12, 'stolen', 'stolen'),
    (17,
     'Parish',
     [(13, 'by', [(13, 'by', 'by')]),
      (14, 'Celtics', [(14, 'Celtics', 'Celtics')]),
      (15, 'center', [(15, 'center', 'center')]),
      (16, 'Robert', [(16, 'Robert', 'Robert')]),
      (17, 'Parish', '

DEBUG    [action_state:579] ('visited states', {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29}, {3, 6, 11, 12, 17, 30}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 22, 23, 25, 26, 27, 29})
DEBUG    [action_state:434] ('curr_node_id', 30)
DEBUG    [action_state:457] (30,
 [(3,
   3,
   [(0, 0, [(0, 0, None)]),
    (1, 1, [(1, 1, None)]),
    (2, 2, [(2, 2, None)]),
    (3, 3, None),
    (6, 6, [(4, 4, [(4, 4, None)]), (5, 5, [(5, 5, None)]), (6, 6, None)])]),
  (7, 7, [(7, 7, None)]),
  (8, 8, [(8, 8, None)]),
  (9, 9, None),
  (12,
   12,
   [(11, 11, [(10, 10, [(10, 10, None)]), (11, 11, None)]),
    (12, 12, None),
    (17,
     17,
     [(13, 13, [(13, 13, None)]),
      (14, 14, [(14, 14, None)]),
      (15, 15, [(15, 15, None)]),
      (16, 16, [(16, 16, None)]),
      (17, 17,

DEBUG    [action_state:434] ('curr_node_id', 24)
DEBUG    [action_state:457] (24,
 [(3,
   3,
   [(0, 0, [(0, 0, None)]),
    (1, 1, [(1, 1, None)]),
    (2, 2, [(2, 2, None)]),
    (3, 3, None),
    (6, 6, [(4, 4, [(4, 4, None)]), (5, 5, [(5, 5, None)]), (6, 6, None)])]),
  (7, 7, [(7, 7, None)]),
  (8, 8, [(8, 8, None)]),
  (9, 9, None),
  (12,
   12,
   [(11, 11, [(10, 10, [(10, 10, None)]), (11, 11, None)]),
    (12, 12, None),
    (17,
     17,
     [(13, 13, [(13, 13, None)]),
      (14, 14, [(14, 14, None)]),
      (15, 15, [(15, 15, None)]),
      (16, 16, [(16, 16, None)]),
      (17, 17, None)])]),
  (18, 18, [(18, 18, None)]),
  (19, 19, [(19, 19, None)]),
  (20, 20, [(20, 20, None)]),
  (21, 21, None),
  (22, 22, [(22, 22, None)]),
  (23, 23, [(23, 23, None)]),
  (24, 24, None),
  (28,
   28,
   [(25, 25, [(25, 25, None)]),
    (26, 26, [(26, 26, None)]),
    (27, 27, [(27, 27, None)]),
    (28, 28, None),
    (30, 30, [(29, 29, [(29, 29, None)]), (30, 30, None)])])],
 Fals

DEBUG    [action_state:579] ('visited states', {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30}, {3, 6, 11, 12, 17, 21, 24, 28, 30}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30})
DEBUG    [action_state:434] ('curr_node_id', 30)
DEBUG    [action_state:457] (30,
 [(3,
   3,
   [(0, 0, [(0, 0, None)]),
    (1, 1, [(1, 1, None)]),
    (2, 2, [(2, 2, None)]),
    (3, 3, None),
    (6, 6, [(4, 4, [(4, 4, None)]), (5, 5, [(5, 5, None)]), (6, 6, None)])]),
  (7, 7, [(7, 7, None)]),
  (8, 8, [(8, 8, None)]),
  (9, 9, None),
  (12,
   12,
   [(11, 11, [(10, 10, [(10, 10, None)]), (11, 11, None)]),
    (12, 12, None),
    (17,
     17,
     [(13, 13, [(13, 13, None)]),
      (14, 14, [(14, 14, None)]),
      (15, 15, [(15, 15, None)]),
      (16, 16, [

DEBUG    [action_state:579] ('visited states', {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}, {3, 6, 9, 11, 12, 17, 21, 24, 28, 30}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31})
DEBUG    [action_state:434] ('curr_node_id', 9)
DEBUG    [action_state:457] (9,
 [(3,
   3,
   [(0, 0, [(0, 0, None)]),
    (1, 1, [(1, 1, None)]),
    (2, 2, [(2, 2, None)]),
    (3, 3, None),
    (6, 6, [(4, 4, [(4, 4, None)]), (5, 5, [(5, 5, None)]), (6, 6, None)])]),
  (7, 7, [(7, 7, None)]),
  (8, 8, [(8, 8, None)]),
  (9, 9, None),
  (12,
   12,
   [(11, 11, [(10, 10, [(10, 10, None)]), (11, 11, None)]),
    (12, 12, None),
    (17,
     17,
     [(13, 13, [(13, 13, None)]),
      (14, 14, [(14, 14, None)]),
      (15, 15, [(15, 15, None)]),
   

In [49]:
doc

'In the final minute of the game, Johnson had the ball stolen by Celtics center Robert Parish, and then missed two free throws that could have won the game.'

In [50]:
parse_json

{'id': '470004',
 'tops': [9],
 'nodes': [{'id': 0,
   'label': 'In',
   'properties': ['lemma', 'upos', 'xpos'],
   'values': ['in', 'ADP', 'IN']},
  {'id': 1,
   'label': 'the',
   'properties': ['lemma', 'upos', 'xpos'],
   'values': ['the', 'DET', 'DT']},
  {'id': 2,
   'label': 'final',
   'properties': ['lemma', 'upos', 'xpos'],
   'values': ['final', 'ADJ', 'JJ']},
  {'id': 3,
   'label': 'minute',
   'properties': ['lemma', 'upos', 'xpos'],
   'values': ['minute', 'NOUN', 'NN']},
  {'id': 4,
   'label': 'of',
   'properties': ['lemma', 'upos', 'xpos'],
   'values': ['of', 'ADP', 'IN']},
  {'id': 5,
   'label': 'the',
   'properties': ['lemma', 'upos', 'xpos'],
   'values': ['the', 'DET', 'DT']},
  {'id': 6,
   'label': 'game',
   'properties': ['lemma', 'upos', 'xpos'],
   'values': ['game', 'NOUN', 'NN']},
  {'id': 7,
   'label': ',',
   'properties': ['lemma', 'upos', 'xpos'],
   'values': [',', 'PUNCT', ',']},
  {'id': 8,
   'label': 'Johnson',
   'properties': ['lemma', 'up

In [51]:
[n['values'][2] for n in parse_json['nodes']]

['IN',
 'DT',
 'JJ',
 'NN',
 'IN',
 'DT',
 'NN',
 ',',
 'NNP',
 'VBD',
 'DT',
 'NN',
 'VBN',
 'IN',
 'NNPS',
 'NN',
 'NNP',
 'NNP',
 ',',
 'CC',
 'RB',
 'VBD',
 'CD',
 'JJ',
 'NNS',
 'WDT',
 'MD',
 'VB',
 'VBN',
 'DT',
 'NN',
 '.']

In [52]:
# Create train jsonl
if os.path.isfile(allennlp_train_output_file):
    logger.info('allennlp_train_output_file found, stop generation')
else:
    data_size = 0
    with open(allennlp_train_output_file, 'w') as wf:
        for _, dataset, mrp_json in tqdm(mrp_dataset.mrp_json_generator(
            ignore_framework_set=ignore_framework_set
        )):
            total_count += 1
            if data_size >= data_size_limit:
                break
            cid = mrp_json.get('id')
            framework = mrp_json.get('framework')
            alignment = {}
            if framework == 'amr':
                alignment = cid2alignment[cid]  
            parse_json = dataset2cid2parse_json.get(dataset, {}).get(cid, {})
            
            if parse_json:
                data_size += 1
                with_parse_count += 1
                mrp_parser_states, mrp_meta_data = mrp_json2parser_states(mrp_json, alignment)
                companion_parser_states, companion_meta_data = mrp_json2parser_states(parse_json, {})

                data_instance = {
                    'mrp_json': mrp_json,
                    'parse_json': parse_json,
                    'mrp_parser_states': mrp_parser_states,
                    'mrp_meta_data': mrp_meta_data,
                    'companion_parser_states': companion_parser_states,
                    'companion_meta_data': companion_meta_data,
                }
                json_encoded_instance = json.dumps(data_instance)
                wf.write(json_encoded_instance + '\n')

                
# Create test jsonl
if os.path.isfile(allennlp_test_output_file):
    logger.info('allennlp_test_output_file found, stop generation')
else:
    data_size = 0
    with open(allennlp_test_output_file, 'w') as wf:
        alignment = {}
        for mrp_json in tqdm(test_mrp_jsons):
            data_size += 1
            if data_size >= data_size_limit:
                break
            cid = mrp_json.get('id', '')
            framework = mrp_json.get('framework', '')
            if framework in ignore_framework_set:
                continue
            parse_json = test_parse_jsons[cid]
            companion_parser_states, companion_meta_data = mrp_json2parser_states(parse_json, {})
            data_instance = {
                'mrp_json': mrp_json,
                'parse_json': parse_json,
                'companion_parser_states': companion_parser_states,
                'companion_meta_data': companion_meta_data,
            }
            json_encoded_instance = json.dumps(data_instance)
            wf.write(json_encoded_instance + '\n')

INFO     [__main__:3] allennlp_train_output_file found, stop generation
INFO     [__main__:40] allennlp_test_output_file found, stop generation


### Test allennlp dataset reader

In [53]:
import torch.optim as optim

from mrp_library.dataset_readers.mrp_jsons import MRPDatasetReader
from allennlp.common.file_utils import cached_path
from allennlp.data.vocabulary import Vocabulary
from allennlp.modules.text_field_embedders import TextFieldEmbedder, BasicTextFieldEmbedder
from allennlp.modules.token_embedders.embedding import Embedding
from allennlp.modules.seq2seq_encoders import Seq2SeqEncoder, PytorchSeq2SeqWrapper
from allennlp.modules.feedforward import FeedForward

from allennlp.training.metrics import CategoricalAccuracy

from allennlp.data.iterators import BucketIterator
from allennlp.training.trainer import Trainer

import json
import logging
from typing import Dict

from allennlp.common.file_utils import cached_path
from allennlp.data.dataset_readers.dataset_reader import DatasetReader
from allennlp.data.fields import LabelField, TextField
from allennlp.data.instance import Instance
from allennlp.data.token_indexers import SingleIdTokenIndexer, TokenIndexer
from allennlp.data.tokenizers import Token, Tokenizer, WordTokenizer
from allennlp.models import Model
from overrides import overrides

INFO     [pytorch_pretrained_bert.modeling:230] Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex .
DEBUG    [allennlp.common.registrable:56] instantiating registered subclass relu of <class 'allennlp.nn.activations.Activation'>
DEBUG    [allennlp.common.registrable:56] instantiating registered subclass relu of <class 'allennlp.nn.activations.Activation'>
DEBUG    [allennlp.common.registrable:56] instantiating registered subclass relu of <class 'allennlp.nn.activations.Activation'>
DEBUG    [allennlp.common.registrable:56] instantiating registered subclass relu of <class 'allennlp.nn.activations.Activation'>


In [54]:
from mrp_library.dataset_readers.mrp_jsons import MRPDatasetReader

In [55]:
reader = MRPDatasetReader()

In [56]:
train_dataset = reader.read(cached_path(allennlp_train_output_file))

0it [00:00, ?it/s]INFO     [mrp_library.dataset_readers.mrp_jsons:45] Reading instances from lines in file at: /data/proj29_ds1/home/slai/mrp2019/allennlp-mrp-json-small-train.jsonl
99it [00:00, 379.19it/s]


In [57]:
test_dataset = reader.read(cached_path(allennlp_test_output_file))

0it [00:00, ?it/s]INFO     [mrp_library.dataset_readers.mrp_jsons:45] Reading instances from lines in file at: /data/proj29_ds1/home/slai/mrp2019/allennlp-mrp-json-small-test.jsonl
99it [00:00, 2094.58it/s]


In [58]:
tests_fixtures_dataset = reader.read(cached_path(allennlp_tests_fixtures_output_file))

0it [00:00, ?it/s]INFO     [mrp_library.dataset_readers.mrp_jsons:45] Reading instances from lines in file at: /data/proj29_ds1/home/slai/mrp2019/src/tests/fixtures/test.jsonl
1it [00:00, 155.03it/s]


In [59]:
vocab = Vocabulary.from_instances(train_dataset + test_dataset + tests_fixtures_dataset)

INFO     [allennlp.data.vocabulary:396] Fitting token dictionary from dataset.
100%|██████████| 199/199 [00:00<00:00, 19737.67it/s]


In [60]:
vocab.get_vocab_size('word')

1718

In [61]:
vocab.get_vocab_size('pos')

60

In [62]:
vocab.get_vocab_size('label')

2

In [63]:
EMBEDDING_DIM = 6
HIDDEN_DIM = 5

### Test model

In [64]:
from mrp_library.models.generalizer import Generalizer
from allennlp.nn import InitializerApplicator, RegularizerApplicator, util
from allennlp.nn.activations import Activation
from allennlp.common.params import Params
from allennlp.nn.util import get_text_field_mask, sequence_cross_entropy_with_logits
from allennlp.modules.seq2vec_encoders.pytorch_seq2vec_wrapper import PytorchSeq2VecWrapper

In [65]:
word_embedding = Embedding(num_embeddings=vocab.get_vocab_size('word'),
                            embedding_dim=EMBEDDING_DIM)
word_embedder = BasicTextFieldEmbedder({"word": word_embedding})

In [66]:
pos_embedding = Embedding(num_embeddings=vocab.get_vocab_size('pos'),
                            embedding_dim=EMBEDDING_DIM)
pos_embedder = BasicTextFieldEmbedder({"pos": pos_embedding})

In [67]:
encoder = PytorchSeq2VecWrapper(torch.nn.LSTM(EMBEDDING_DIM, HIDDEN_DIM, batch_first=True))

In [68]:
classifier_params = Params({
  "input_dim": HIDDEN_DIM * 4,
  "num_layers": 2,
  "hidden_dims": [6, 2],
  "activations": ["sigmoid", "linear"],
  "dropout": [0.2, 0.0]
})

In [69]:
classifier_feedforward = FeedForward.from_params(classifier_params)

INFO     [allennlp.common.from_params:340] instantiating class <class 'allennlp.modules.feedforward.FeedForward'> from params {'input_dim': 20, 'num_layers': 2, 'hidden_dims': [6, 2], 'activations': ['sigmoid', 'linear'], 'dropout': [0.2, 0.0]} and extras set()
INFO     [allennlp.common.params:252] input_dim = 20
INFO     [allennlp.common.params:252] num_layers = 2
INFO     [allennlp.common.params:252] hidden_dims = [6, 2]
INFO     [allennlp.common.params:252] hidden_dims = [6, 2]
INFO     [allennlp.common.params:252] activations = ['sigmoid', 'linear']
INFO     [allennlp.common.from_params:340] instantiating class <class 'allennlp.nn.activations.Activation'> from params ['sigmoid', 'linear'] and extras set()
INFO     [allennlp.common.params:252] activations = ['sigmoid', 'linear']
INFO     [allennlp.common.from_params:340] instantiating class <class 'allennlp.nn.activations.Activation'> from params sigmoid and extras set()
INFO     [allennlp.common.params:252] type = sigmoid
INFO     

In [70]:
parse_label = {
    'word': torch.LongTensor(
        [
            [ 487,  488,  247,    7,  248,   39,    4],
            [  27,  256,    5,  219,  110, 1267,    4]
        ]
    )
}
embedded_parse_label = word_embedder(parse_label)

In [71]:
feature_mask = util.get_text_field_mask(parse_label)

In [72]:
encoded_feature = encoder(embedded_parse_label, feature_mask)

In [73]:
encoded_features = [encoded_feature, encoded_feature, encoded_feature, encoded_feature]

In [74]:
torch.cat(encoded_features, dim=-1).shape

torch.Size([2, 20])

In [75]:
logits = classifier_feedforward(torch.cat(encoded_features, dim=-1))

In [76]:
logits.shape

torch.Size([2, 2])

In [77]:
label = torch.tensor([1, 0])

In [78]:
loss_func = torch.nn.CrossEntropyLoss()
loss = loss_func(logits, label)

In [79]:
model = Generalizer(
    vocab=vocab,
    word_embedder=word_embedder,
    pos_embedder=pos_embedder,
    encoder=encoder,
    classifier_feedforward=classifier_feedforward
)

DEBUG    [mrp_library.models.generalizer:37] ('word_embedder', BasicTextFieldEmbedder(
  (token_embedder_word): Embedding()
))
DEBUG    [mrp_library.models.generalizer:39] ('pos_embedder', BasicTextFieldEmbedder(
  (token_embedder_pos): Embedding()
))
INFO     [allennlp.nn.initializers:293] Initializing parameters
INFO     [allennlp.nn.initializers:309] Done initializing parameters; the following parameters are using their default initialization from their code
INFO     [allennlp.nn.initializers:314]    classifier_feedforward._linear_layers.0.bias
INFO     [allennlp.nn.initializers:314]    classifier_feedforward._linear_layers.0.weight
INFO     [allennlp.nn.initializers:314]    classifier_feedforward._linear_layers.1.bias
INFO     [allennlp.nn.initializers:314]    classifier_feedforward._linear_layers.1.weight
INFO     [allennlp.nn.initializers:314]    encoder._module.bias_hh_l0
INFO     [allennlp.nn.initializers:314]    encoder._module.bias_ih_l0
INFO     [allennlp.nn.initializers:314

In [80]:
optimizer = optim.SGD(model.parameters(), lr=0.1)
cuda_device = -1

In [81]:
iterator = BucketIterator(batch_size=2, sorting_keys=[("parse_label", "num_tokens")])
iterator.index_with(vocab)

In [82]:
trainer = Trainer(
    model=model,
    optimizer=optimizer,
    iterator=iterator,
    train_dataset=train_dataset,
    validation_dataset=train_dataset,
    patience=10,
    num_epochs=10,
    cuda_device=cuda_device
)

In [83]:
trainer.train()

INFO     [allennlp.training.trainer:465] Beginning training.
INFO     [allennlp.training.trainer:281] Epoch 0/9
INFO     [allennlp.training.trainer:283] Peak CPU memory usage MB: 5556.168
INFO     [allennlp.training.trainer:287] GPU 0 memory usage MB: 10
INFO     [allennlp.training.trainer:287] GPU 1 memory usage MB: 11
INFO     [allennlp.training.trainer:287] GPU 2 memory usage MB: 11
INFO     [allennlp.training.trainer:287] GPU 3 memory usage MB: 10
INFO     [allennlp.training.trainer:287] GPU 4 memory usage MB: 11
INFO     [allennlp.training.trainer:287] GPU 5 memory usage MB: 10
INFO     [allennlp.training.trainer:287] GPU 6 memory usage MB: 11
INFO     [allennlp.training.trainer:287] GPU 7 memory usage MB: 10
INFO     [allennlp.training.trainer:311] Training
  0%|          | 0/50 [00:00<?, ?it/s]DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 78, 'num_tokens': 78}, 'parse_lemma': {'word_length': 78, 'num_tokens': 78}, 'pa

DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([1, 0]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 31, 'num_tokens': 31}, 'parse_lemma': {'word_length': 31, 'num_tokens': 31}, 'parse_upos': {'pos_length': 31, 'num_tokens': 31}, 'parse_xpos': {'pos_length': 31, 'num_tokens': 31}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([1, 0]))
loss: 0.7126 ||:  36%|███▌      | 18/50 [00:01<00:02, 13.18it/s]DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 20, 'num_tokens': 20}, 'parse_lemma': {'word_length': 20, 'num_tokens': 20}, 'parse_upos': {'pos_length': 20, 'num_tokens': 20}, 'parse_xpos': {'pos_length': 20, 'num_tokens': 20}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_l

DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 0]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 23, 'num_tokens': 23}, 'parse_lemma': {'word_length': 23, 'num_tokens': 23}, 'parse_upos': {'pos_length': 23, 'num_tokens': 23}, 'parse_xpos': {'pos_length': 23, 'num_tokens': 23}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 0]))
loss: 0.7037 ||:  72%|███████▏  | 36/50 [00:02<00:00, 18.60it/s]DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 45, 'num_tokens': 45}, 'parse_lemma': {'word_length': 45, 'num_tokens': 45}, 'parse_upos': {'pos_length': 45, 'num_tokens': 45}, 'parse_xpos': {'pos_length': 45, 'num_tokens': 45}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 1]))
DEBUG    [a

DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 12, 'num_tokens': 12}, 'parse_lemma': {'word_length': 12, 'num_tokens': 12}, 'parse_upos': {'pos_length': 12, 'num_tokens': 12}, 'parse_xpos': {'pos_length': 12, 'num_tokens': 12}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 0]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 15, 'num_tokens': 15}, 'parse_lemma': {'word_length': 15, 'num_tokens': 15}, 'parse_upos': {'pos_length': 15, 'num_tokens': 15}, 'parse_xpos': {'pos_length': 15, 'num_tokens': 15}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 1]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 16, 'num_tokens': 16}, 'parse_lemma': 

DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([1, 1]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 27, 'num_tokens': 27}, 'parse_lemma': {'word_length': 27, 'num_tokens': 27}, 'parse_upos': {'pos_length': 27, 'num_tokens': 27}, 'parse_xpos': {'pos_length': 27, 'num_tokens': 27}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([1, 0]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 24, 'num_tokens': 24}, 'parse_lemma': {'word_length': 24, 'num_tokens': 24}, 'parse_upos': {'pos_length': 24, 'num_tokens': 24}, 'parse_xpos': {'pos_length': 24, 'num_tokens': 24}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([1, 0]))
loss: 0.

DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 0]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 46, 'num_tokens': 46}, 'parse_lemma': {'word_length': 46, 'num_tokens': 46}, 'parse_upos': {'pos_length': 46, 'num_tokens': 46}, 'parse_xpos': {'pos_length': 46, 'num_tokens': 46}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 0]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 53, 'num_tokens': 53}, 'parse_lemma': {'word_length': 53, 'num_tokens': 53}, 'parse_upos': {'pos_length': 53, 'num_tokens': 53}, 'parse_xpos': {'pos_length': 53, 'num_tokens': 53}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 1]))
DEBUG   

DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 1]))
loss: 0.6938 ||:  12%|█▏        | 6/50 [00:00<00:04,  9.52it/s]DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 18, 'num_tokens': 18}, 'parse_lemma': {'word_length': 18, 'num_tokens': 18}, 'parse_upos': {'pos_length': 18, 'num_tokens': 18}, 'parse_xpos': {'pos_length': 18, 'num_tokens': 18}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 0]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 12, 'num_tokens': 12}, 'parse_lemma': {'word_length': 12, 'num_tokens': 12}, 'parse_upos': {'pos_length': 12, 'num_tokens': 12}, 'parse_xpos': {'pos_length': 12, 'num_tokens': 12}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 0]))
DEBUG    [al

loss: 0.7025 ||:  48%|████▊     | 24/50 [00:01<00:01, 17.04it/s]DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 55, 'num_tokens': 55}, 'parse_lemma': {'word_length': 55, 'num_tokens': 55}, 'parse_upos': {'pos_length': 55, 'num_tokens': 55}, 'parse_xpos': {'pos_length': 55, 'num_tokens': 55}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 1]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 16, 'num_tokens': 16}, 'parse_lemma': {'word_length': 16, 'num_tokens': 16}, 'parse_upos': {'pos_length': 16, 'num_tokens': 16}, 'parse_xpos': {'pos_length': 16, 'num_tokens': 16}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 0]))
loss: 0.7026 ||:  52%|█████▏    | 26/50 [00:01<00:01, 16.62it/s]DEBUG    [allennlp

DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 53, 'num_tokens': 53}, 'parse_lemma': {'word_length': 53, 'num_tokens': 53}, 'parse_upos': {'pos_length': 53, 'num_tokens': 53}, 'parse_xpos': {'pos_length': 53, 'num_tokens': 53}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 1]))
loss: 0.6906 ||:  86%|████████▌ | 43/50 [00:02<00:00, 17.68it/s]DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 20, 'num_tokens': 20}, 'parse_lemma': {'word_length': 20, 'num_tokens': 20}, 'parse_upos': {'pos_length': 20, 'num_tokens': 20}, 'parse_xpos': {'pos_length': 20, 'num_tokens': 20}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 0]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'pars

DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([1, 0]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 20, 'num_tokens': 20}, 'parse_lemma': {'word_length': 20, 'num_tokens': 20}, 'parse_upos': {'pos_length': 20, 'num_tokens': 20}, 'parse_xpos': {'pos_length': 20, 'num_tokens': 20}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 1]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 20, 'num_tokens': 20}, 'parse_lemma': {'word_length': 20, 'num_tokens': 20}, 'parse_upos': {'pos_length': 20, 'num_tokens': 20}, 'parse_xpos': {'pos_length': 20, 'num_tokens': 20}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 1]))
DEBUG   

DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([1, 0]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 31, 'num_tokens': 31}, 'parse_lemma': {'word_length': 31, 'num_tokens': 31}, 'parse_upos': {'pos_length': 31, 'num_tokens': 31}, 'parse_xpos': {'pos_length': 31, 'num_tokens': 31}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([1, 1]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 28, 'num_tokens': 28}, 'parse_lemma': {'word_length': 28, 'num_tokens': 28}, 'parse_upos': {'pos_length': 28, 'num_tokens': 28}, 'parse_xpos': {'pos_length': 28, 'num_tokens': 28}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 0]))
DEBUG   

DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 1]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 78, 'num_tokens': 78}, 'parse_lemma': {'word_length': 78, 'num_tokens': 78}, 'parse_upos': {'pos_length': 78, 'num_tokens': 78}, 'parse_xpos': {'pos_length': 78, 'num_tokens': 78}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 1
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([1]))
loss: 0.6836 ||: 100%|██████████| 50/50 [00:00<00:00, 83.15it/s]
INFO     [allennlp.training.tensorboard_writer:161]                     Training |  Validation
INFO     [allennlp.training.tensorboard_writer:178] gpu_3_memory_MB |    10.000  |       N/A
INFO     [allennlp.training.tensorboard_writer:178] cpu_memory_MB   |  5568.476  |       N/A
INFO     [allennlp.training.tensorboard_writer:178] gpu_5_memory_MB |    10.000  |       N/A

DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 0]))
loss: 0.6431 ||:  26%|██▌       | 13/50 [00:00<00:02, 18.37it/s]DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 31, 'num_tokens': 31}, 'parse_lemma': {'word_length': 31, 'num_tokens': 31}, 'parse_upos': {'pos_length': 31, 'num_tokens': 31}, 'parse_xpos': {'pos_length': 31, 'num_tokens': 31}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 1]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 22, 'num_tokens': 22}, 'parse_lemma': {'word_length': 22, 'num_tokens': 22}, 'parse_upos': {'pos_length': 22, 'num_tokens': 22}, 'parse_xpos': {'pos_length': 22, 'num_tokens': 22}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_l

DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 37, 'num_tokens': 37}, 'parse_lemma': {'word_length': 37, 'num_tokens': 37}, 'parse_upos': {'pos_length': 37, 'num_tokens': 37}, 'parse_xpos': {'pos_length': 37, 'num_tokens': 37}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([1, 0]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 16, 'num_tokens': 16}, 'parse_lemma': {'word_length': 16, 'num_tokens': 16}, 'parse_upos': {'pos_length': 16, 'num_tokens': 16}, 'parse_xpos': {'pos_length': 16, 'num_tokens': 16}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 1]))
loss: 0.6797 ||:  66%|██████▌   | 33/50 [00:01<00:00, 21.20it/s]DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'pars

DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 1]))
loss: 0.6959 ||: 100%|██████████| 50/50 [00:02<00:00, 19.90it/s]
INFO     [allennlp.training.trainer:404] Validating
  0%|          | 0/50 [00:00<?, ?it/s]DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 7, 'num_tokens': 7}, 'parse_lemma': {'word_length': 7, 'num_tokens': 7}, 'parse_upos': {'pos_length': 7, 'num_tokens': 7}, 'parse_xpos': {'pos_length': 7, 'num_tokens': 7}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([1, 0]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 10, 'num_tokens': 10}, 'parse_lemma': {'word_length': 10, 'num_tokens': 10}, 'parse_upos': {'pos_length': 10, 'num_tokens': 10}, 'parse_xpos': {'pos_length': 10, 'num_tokens': 10}}

DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 0]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 24, 'num_tokens': 24}, 'parse_lemma': {'word_length': 24, 'num_tokens': 24}, 'parse_upos': {'pos_length': 24, 'num_tokens': 24}, 'parse_xpos': {'pos_length': 24, 'num_tokens': 24}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([1, 0]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 24, 'num_tokens': 24}, 'parse_lemma': {'word_length': 24, 'num_tokens': 24}, 'parse_upos': {'pos_length': 24, 'num_tokens': 24}, 'parse_xpos': {'pos_length': 24, 'num_tokens': 24}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 1]))
DEBUG   

DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([1, 0]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 40, 'num_tokens': 40}, 'parse_lemma': {'word_length': 40, 'num_tokens': 40}, 'parse_upos': {'pos_length': 40, 'num_tokens': 40}, 'parse_xpos': {'pos_length': 40, 'num_tokens': 40}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([1, 1]))
loss: 0.7017 ||:  78%|███████▊  | 39/50 [00:00<00:00, 89.71it/s]DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 38, 'num_tokens': 38}, 'parse_lemma': {'word_length': 38, 'num_tokens': 38}, 'parse_upos': {'pos_length': 38, 'num_tokens': 38}, 'parse_xpos': {'pos_length': 38, 'num_tokens': 38}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_l

loss: 0.7829 ||:   4%|▍         | 2/50 [00:00<00:03, 15.65it/s]DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 16, 'num_tokens': 16}, 'parse_lemma': {'word_length': 16, 'num_tokens': 16}, 'parse_upos': {'pos_length': 16, 'num_tokens': 16}, 'parse_xpos': {'pos_length': 16, 'num_tokens': 16}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 1]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 55, 'num_tokens': 55}, 'parse_lemma': {'word_length': 55, 'num_tokens': 55}, 'parse_upos': {'pos_length': 55, 'num_tokens': 55}, 'parse_xpos': {'pos_length': 55, 'num_tokens': 55}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([1, 1]))
loss: 0.7702 ||:   8%|▊         | 4/50 [00:00<00:03, 15.07it/s]DEBUG    [allennlp.d

DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 0]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 21, 'num_tokens': 21}, 'parse_lemma': {'word_length': 21, 'num_tokens': 21}, 'parse_upos': {'pos_length': 21, 'num_tokens': 21}, 'parse_xpos': {'pos_length': 21, 'num_tokens': 21}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([1, 0]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 12, 'num_tokens': 12}, 'parse_lemma': {'word_length': 12, 'num_tokens': 12}, 'parse_upos': {'pos_length': 12, 'num_tokens': 12}, 'parse_xpos': {'pos_length': 12, 'num_tokens': 12}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 0]))
loss: 0.

loss: 0.7468 ||:  78%|███████▊  | 39/50 [00:02<00:00, 16.52it/s]DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 18, 'num_tokens': 18}, 'parse_lemma': {'word_length': 18, 'num_tokens': 18}, 'parse_upos': {'pos_length': 18, 'num_tokens': 18}, 'parse_xpos': {'pos_length': 18, 'num_tokens': 18}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 0]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 40, 'num_tokens': 40}, 'parse_lemma': {'word_length': 40, 'num_tokens': 40}, 'parse_upos': {'pos_length': 40, 'num_tokens': 40}, 'parse_xpos': {'pos_length': 40, 'num_tokens': 40}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 1]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'pars

DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([1, 0]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 18, 'num_tokens': 18}, 'parse_lemma': {'word_length': 18, 'num_tokens': 18}, 'parse_upos': {'pos_length': 18, 'num_tokens': 18}, 'parse_xpos': {'pos_length': 18, 'num_tokens': 18}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([1, 1]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 18, 'num_tokens': 18}, 'parse_lemma': {'word_length': 18, 'num_tokens': 18}, 'parse_upos': {'pos_length': 18, 'num_tokens': 18}, 'parse_xpos': {'pos_length': 18, 'num_tokens': 18}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 1]))
DEBUG   

DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([1, 0]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 28, 'num_tokens': 28}, 'parse_lemma': {'word_length': 28, 'num_tokens': 28}, 'parse_upos': {'pos_length': 28, 'num_tokens': 28}, 'parse_xpos': {'pos_length': 28, 'num_tokens': 28}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 1]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 28, 'num_tokens': 28}, 'parse_lemma': {'word_length': 28, 'num_tokens': 28}, 'parse_upos': {'pos_length': 28, 'num_tokens': 28}, 'parse_xpos': {'pos_length': 28, 'num_tokens': 28}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 1]))
DEBUG   

DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([1, 0]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 53, 'num_tokens': 53}, 'parse_lemma': {'word_length': 53, 'num_tokens': 53}, 'parse_upos': {'pos_length': 53, 'num_tokens': 53}, 'parse_xpos': {'pos_length': 53, 'num_tokens': 53}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([1, 0]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 69, 'num_tokens': 69}, 'parse_lemma': {'word_length': 69, 'num_tokens': 69}, 'parse_upos': {'pos_length': 69, 'num_tokens': 69}, 'parse_xpos': {'pos_length': 69, 'num_tokens': 69}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 0]))
DEBUG   

DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([1, 0]))
loss: 0.7237 ||:  20%|██        | 10/50 [00:00<00:02, 16.56it/s]DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 19, 'num_tokens': 19}, 'parse_lemma': {'word_length': 19, 'num_tokens': 19}, 'parse_upos': {'pos_length': 19, 'num_tokens': 19}, 'parse_xpos': {'pos_length': 19, 'num_tokens': 19}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([1, 0]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 22, 'num_tokens': 22}, 'parse_lemma': {'word_length': 22, 'num_tokens': 22}, 'parse_upos': {'pos_length': 22, 'num_tokens': 22}, 'parse_xpos': {'pos_length': 22, 'num_tokens': 22}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_l

DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 18, 'num_tokens': 18}, 'parse_lemma': {'word_length': 18, 'num_tokens': 18}, 'parse_upos': {'pos_length': 18, 'num_tokens': 18}, 'parse_xpos': {'pos_length': 18, 'num_tokens': 18}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 0]))
loss: 0.7019 ||:  58%|█████▊    | 29/50 [00:01<00:01, 20.62it/s]DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 44, 'num_tokens': 44}, 'parse_lemma': {'word_length': 44, 'num_tokens': 44}, 'parse_upos': {'pos_length': 44, 'num_tokens': 44}, 'parse_xpos': {'pos_length': 44, 'num_tokens': 44}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 0]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'pars

DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 1]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 78, 'num_tokens': 78}, 'parse_lemma': {'word_length': 78, 'num_tokens': 78}, 'parse_upos': {'pos_length': 78, 'num_tokens': 78}, 'parse_xpos': {'pos_length': 78, 'num_tokens': 78}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 1]))
loss: 0.6995 ||:  96%|█████████▌| 48/50 [00:02<00:00, 16.19it/s]DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 55, 'num_tokens': 55}, 'parse_lemma': {'word_length': 55, 'num_tokens': 55}, 'parse_upos': {'pos_length': 55, 'num_tokens': 55}, 'parse_xpos': {'pos_length': 55, 'num_tokens': 55}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_l

DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([1, 0]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 20, 'num_tokens': 20}, 'parse_lemma': {'word_length': 20, 'num_tokens': 20}, 'parse_upos': {'pos_length': 20, 'num_tokens': 20}, 'parse_xpos': {'pos_length': 20, 'num_tokens': 20}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 0]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 23, 'num_tokens': 23}, 'parse_lemma': {'word_length': 23, 'num_tokens': 23}, 'parse_upos': {'pos_length': 23, 'num_tokens': 23}, 'parse_xpos': {'pos_length': 23, 'num_tokens': 23}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 1]))
DEBUG   

DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 1]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 32, 'num_tokens': 32}, 'parse_lemma': {'word_length': 32, 'num_tokens': 32}, 'parse_upos': {'pos_length': 32, 'num_tokens': 32}, 'parse_xpos': {'pos_length': 32, 'num_tokens': 32}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 1]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 39, 'num_tokens': 39}, 'parse_lemma': {'word_length': 39, 'num_tokens': 39}, 'parse_upos': {'pos_length': 39, 'num_tokens': 39}, 'parse_xpos': {'pos_length': 39, 'num_tokens': 39}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 1]))
DEBUG   

INFO     [allennlp.training.trainer:287] GPU 3 memory usage MB: 10
INFO     [allennlp.training.trainer:287] GPU 4 memory usage MB: 11
INFO     [allennlp.training.trainer:287] GPU 5 memory usage MB: 10
INFO     [allennlp.training.trainer:287] GPU 6 memory usage MB: 11
INFO     [allennlp.training.trainer:287] GPU 7 memory usage MB: 10
INFO     [allennlp.training.trainer:311] Training
  0%|          | 0/50 [00:00<?, ?it/s]DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 38, 'num_tokens': 38}, 'parse_lemma': {'word_length': 38, 'num_tokens': 38}, 'parse_upos': {'pos_length': 38, 'num_tokens': 38}, 'parse_xpos': {'pos_length': 38, 'num_tokens': 38}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 1]))
loss: 0.7033 ||:   2%|▏         | 1/50 [00:00<00:05,  9.05it/s]DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_lab

DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([1, 0]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 31, 'num_tokens': 31}, 'parse_lemma': {'word_length': 31, 'num_tokens': 31}, 'parse_upos': {'pos_length': 31, 'num_tokens': 31}, 'parse_xpos': {'pos_length': 31, 'num_tokens': 31}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 1]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 40, 'num_tokens': 40}, 'parse_lemma': {'word_length': 40, 'num_tokens': 40}, 'parse_upos': {'pos_length': 40, 'num_tokens': 40}, 'parse_xpos': {'pos_length': 40, 'num_tokens': 40}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 0]))
loss: 0.

DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 39, 'num_tokens': 39}, 'parse_lemma': {'word_length': 39, 'num_tokens': 39}, 'parse_upos': {'pos_length': 39, 'num_tokens': 39}, 'parse_xpos': {'pos_length': 39, 'num_tokens': 39}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 1]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 15, 'num_tokens': 15}, 'parse_lemma': {'word_length': 15, 'num_tokens': 15}, 'parse_upos': {'pos_length': 15, 'num_tokens': 15}, 'parse_xpos': {'pos_length': 15, 'num_tokens': 15}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 0]))
loss: 0.7091 ||:  76%|███████▌  | 38/50 [00:01<00:00, 21.52it/s]DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'pars

DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 1]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 15, 'num_tokens': 15}, 'parse_lemma': {'word_length': 15, 'num_tokens': 15}, 'parse_upos': {'pos_length': 15, 'num_tokens': 15}, 'parse_xpos': {'pos_length': 15, 'num_tokens': 15}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 0]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 16, 'num_tokens': 16}, 'parse_lemma': {'word_length': 16, 'num_tokens': 16}, 'parse_upos': {'pos_length': 16, 'num_tokens': 16}, 'parse_xpos': {'pos_length': 16, 'num_tokens': 16}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([1, 1]))
DEBUG   

DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 1]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 25, 'num_tokens': 25}, 'parse_lemma': {'word_length': 25, 'num_tokens': 25}, 'parse_upos': {'pos_length': 25, 'num_tokens': 25}, 'parse_xpos': {'pos_length': 25, 'num_tokens': 25}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 1]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 27, 'num_tokens': 27}, 'parse_lemma': {'word_length': 27, 'num_tokens': 27}, 'parse_upos': {'pos_length': 27, 'num_tokens': 27}, 'parse_xpos': {'pos_length': 27, 'num_tokens': 27}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([1, 0]))
DEBUG   

DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 0]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 50, 'num_tokens': 50}, 'parse_lemma': {'word_length': 50, 'num_tokens': 50}, 'parse_upos': {'pos_length': 50, 'num_tokens': 50}, 'parse_xpos': {'pos_length': 50, 'num_tokens': 50}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 0]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 55, 'num_tokens': 55}, 'parse_lemma': {'word_length': 55, 'num_tokens': 55}, 'parse_upos': {'pos_length': 55, 'num_tokens': 55}, 'parse_xpos': {'pos_length': 55, 'num_tokens': 55}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([1, 0]))
loss: 0.

DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([1, 0]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 45, 'num_tokens': 45}, 'parse_lemma': {'word_length': 45, 'num_tokens': 45}, 'parse_upos': {'pos_length': 45, 'num_tokens': 45}, 'parse_xpos': {'pos_length': 45, 'num_tokens': 45}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 0]))
loss: 0.5895 ||:  16%|█▌        | 8/50 [00:00<00:02, 16.17it/s]DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 23, 'num_tokens': 23}, 'parse_lemma': {'word_length': 23, 'num_tokens': 23}, 'parse_upos': {'pos_length': 23, 'num_tokens': 23}, 'parse_xpos': {'pos_length': 23, 'num_tokens': 23}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_li

DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([1, 1]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 36, 'num_tokens': 36}, 'parse_lemma': {'word_length': 36, 'num_tokens': 36}, 'parse_upos': {'pos_length': 36, 'num_tokens': 36}, 'parse_xpos': {'pos_length': 36, 'num_tokens': 36}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 1]))
loss: 0.6445 ||:  52%|█████▏    | 26/50 [00:01<00:01, 19.04it/s]DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 78, 'num_tokens': 78}, 'parse_lemma': {'word_length': 78, 'num_tokens': 78}, 'parse_upos': {'pos_length': 78, 'num_tokens': 78}, 'parse_xpos': {'pos_length': 78, 'num_tokens': 78}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 1
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([1]))
DEBUG    [alle

DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 27, 'num_tokens': 27}, 'parse_lemma': {'word_length': 27, 'num_tokens': 27}, 'parse_upos': {'pos_length': 27, 'num_tokens': 27}, 'parse_xpos': {'pos_length': 27, 'num_tokens': 27}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 1]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 23, 'num_tokens': 23}, 'parse_lemma': {'word_length': 23, 'num_tokens': 23}, 'parse_upos': {'pos_length': 23, 'num_tokens': 23}, 'parse_xpos': {'pos_length': 23, 'num_tokens': 23}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([1, 1]))
loss: 0.6830 ||:  90%|█████████ | 45/50 [00:02<00:00, 22.13it/s]DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'pars

DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([1, 0]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 20, 'num_tokens': 20}, 'parse_lemma': {'word_length': 20, 'num_tokens': 20}, 'parse_upos': {'pos_length': 20, 'num_tokens': 20}, 'parse_xpos': {'pos_length': 20, 'num_tokens': 20}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 0]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 20, 'num_tokens': 20}, 'parse_lemma': {'word_length': 20, 'num_tokens': 20}, 'parse_upos': {'pos_length': 20, 'num_tokens': 20}, 'parse_xpos': {'pos_length': 20, 'num_tokens': 20}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 0]))
loss: 0.

DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([1, 1]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 30, 'num_tokens': 30}, 'parse_lemma': {'word_length': 30, 'num_tokens': 30}, 'parse_upos': {'pos_length': 30, 'num_tokens': 30}, 'parse_xpos': {'pos_length': 30, 'num_tokens': 30}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 0]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 28, 'num_tokens': 28}, 'parse_lemma': {'word_length': 28, 'num_tokens': 28}, 'parse_upos': {'pos_length': 28, 'num_tokens': 28}, 'parse_xpos': {'pos_length': 28, 'num_tokens': 28}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([1, 1]))
DEBUG   

DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 1
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([1]))
loss: 0.6890 ||: 100%|██████████| 50/50 [00:00<00:00, 100.93it/s]
INFO     [allennlp.training.tensorboard_writer:161]                     Training |  Validation
INFO     [allennlp.training.tensorboard_writer:178] gpu_3_memory_MB |    10.000  |       N/A
INFO     [allennlp.training.tensorboard_writer:178] cpu_memory_MB   |  5568.668  |       N/A
INFO     [allennlp.training.tensorboard_writer:178] gpu_5_memory_MB |    10.000  |       N/A
INFO     [allennlp.training.tensorboard_writer:178] gpu_7_memory_MB |    10.000  |       N/A
INFO     [allennlp.training.tensorboard_writer:178] gpu_2_memory_MB |    11.000  |       N/A
INFO     [allennlp.training.tensorboard_writer:178] gpu_0_memory_MB |    10.000  |       N/A
INFO     [allennlp.training.tensorboard_writer:178] gpu_6_memory_MB |    11.000  |       N/A
INFO     [allennlp.training.tensorboard_writer:174] l

DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([1, 0]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 44, 'num_tokens': 44}, 'parse_lemma': {'word_length': 44, 'num_tokens': 44}, 'parse_upos': {'pos_length': 44, 'num_tokens': 44}, 'parse_xpos': {'pos_length': 44, 'num_tokens': 44}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([1, 0]))
loss: 0.7183 ||:  30%|███       | 15/50 [00:00<00:01, 18.54it/s]DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 27, 'num_tokens': 27}, 'parse_lemma': {'word_length': 27, 'num_tokens': 27}, 'parse_upos': {'pos_length': 27, 'num_tokens': 27}, 'parse_xpos': {'pos_length': 27, 'num_tokens': 27}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_l

loss: 0.7168 ||:  64%|██████▍   | 32/50 [00:01<00:01, 17.62it/s]DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 18, 'num_tokens': 18}, 'parse_lemma': {'word_length': 18, 'num_tokens': 18}, 'parse_upos': {'pos_length': 18, 'num_tokens': 18}, 'parse_xpos': {'pos_length': 18, 'num_tokens': 18}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 0]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 16, 'num_tokens': 16}, 'parse_lemma': {'word_length': 16, 'num_tokens': 16}, 'parse_upos': {'pos_length': 16, 'num_tokens': 16}, 'parse_xpos': {'pos_length': 16, 'num_tokens': 16}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([1, 0]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'pars

  0%|          | 0/50 [00:00<?, ?it/s]DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 7, 'num_tokens': 7}, 'parse_lemma': {'word_length': 7, 'num_tokens': 7}, 'parse_upos': {'pos_length': 7, 'num_tokens': 7}, 'parse_xpos': {'pos_length': 7, 'num_tokens': 7}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([1, 0]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 10, 'num_tokens': 10}, 'parse_lemma': {'word_length': 10, 'num_tokens': 10}, 'parse_upos': {'pos_length': 10, 'num_tokens': 10}, 'parse_xpos': {'pos_length': 10, 'num_tokens': 10}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 0]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 12, 'num

DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 25, 'num_tokens': 25}, 'parse_lemma': {'word_length': 25, 'num_tokens': 25}, 'parse_upos': {'pos_length': 25, 'num_tokens': 25}, 'parse_xpos': {'pos_length': 25, 'num_tokens': 25}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([1, 1]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 23, 'num_tokens': 23}, 'parse_lemma': {'word_length': 23, 'num_tokens': 23}, 'parse_upos': {'pos_length': 23, 'num_tokens': 23}, 'parse_xpos': {'pos_length': 23, 'num_tokens': 23}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([1, 0]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 24, 'num_tokens': 24}, 'parse_lemma': 

DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 40, 'num_tokens': 40}, 'parse_lemma': {'word_length': 40, 'num_tokens': 40}, 'parse_upos': {'pos_length': 40, 'num_tokens': 40}, 'parse_xpos': {'pos_length': 40, 'num_tokens': 40}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([1, 0]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 42, 'num_tokens': 42}, 'parse_lemma': {'word_length': 42, 'num_tokens': 42}, 'parse_upos': {'pos_length': 42, 'num_tokens': 42}, 'parse_xpos': {'pos_length': 42, 'num_tokens': 42}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([1, 0]))
loss: 0.6782 ||:  80%|████████  | 40/50 [00:00<00:00, 90.07it/s] DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'par

DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([1, 1]))
loss: 0.7007 ||:   6%|▌         | 3/50 [00:00<00:01, 25.94it/s]DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 44, 'num_tokens': 44}, 'parse_lemma': {'word_length': 44, 'num_tokens': 44}, 'parse_upos': {'pos_length': 44, 'num_tokens': 44}, 'parse_xpos': {'pos_length': 44, 'num_tokens': 44}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 0]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 28, 'num_tokens': 28}, 'parse_lemma': {'word_length': 28, 'num_tokens': 28}, 'parse_upos': {'pos_length': 28, 'num_tokens': 28}, 'parse_xpos': {'pos_length': 28, 'num_tokens': 28}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_li

DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 0]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 12, 'num_tokens': 12}, 'parse_lemma': {'word_length': 12, 'num_tokens': 12}, 'parse_upos': {'pos_length': 12, 'num_tokens': 12}, 'parse_xpos': {'pos_length': 12, 'num_tokens': 12}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 1]))
loss: 0.6964 ||:  44%|████▍     | 22/50 [00:01<00:01, 21.92it/s]DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 37, 'num_tokens': 37}, 'parse_lemma': {'word_length': 37, 'num_tokens': 37}, 'parse_upos': {'pos_length': 37, 'num_tokens': 37}, 'parse_xpos': {'pos_length': 37, 'num_tokens': 37}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 0]))
DEBUG    [a

DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 1]))
loss: 0.7009 ||:  80%|████████  | 40/50 [00:02<00:00, 17.62it/s]DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 21, 'num_tokens': 21}, 'parse_lemma': {'word_length': 21, 'num_tokens': 21}, 'parse_upos': {'pos_length': 21, 'num_tokens': 21}, 'parse_xpos': {'pos_length': 21, 'num_tokens': 21}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 1]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 7, 'num_tokens': 7}, 'parse_lemma': {'word_length': 7, 'num_tokens': 7}, 'parse_upos': {'pos_length': 7, 'num_tokens': 7}, 'parse_xpos': {'pos_length': 7, 'num_tokens': 7}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.m

DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 18, 'num_tokens': 18}, 'parse_lemma': {'word_length': 18, 'num_tokens': 18}, 'parse_upos': {'pos_length': 18, 'num_tokens': 18}, 'parse_xpos': {'pos_length': 18, 'num_tokens': 18}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 0]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 18, 'num_tokens': 18}, 'parse_lemma': {'word_length': 18, 'num_tokens': 18}, 'parse_upos': {'pos_length': 18, 'num_tokens': 18}, 'parse_xpos': {'pos_length': 18, 'num_tokens': 18}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 0]))
loss: 0.6587 ||:  20%|██        | 10/50 [00:00<00:00, 97.85it/s]DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'pars

DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 27, 'num_tokens': 27}, 'parse_lemma': {'word_length': 27, 'num_tokens': 27}, 'parse_upos': {'pos_length': 27, 'num_tokens': 27}, 'parse_xpos': {'pos_length': 27, 'num_tokens': 27}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 1]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 31, 'num_tokens': 31}, 'parse_lemma': {'word_length': 31, 'num_tokens': 31}, 'parse_upos': {'pos_length': 31, 'num_tokens': 31}, 'parse_xpos': {'pos_length': 31, 'num_tokens': 31}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([1, 1]))
loss: 0.6838 ||:  58%|█████▊    | 29/50 [00:00<00:00, 90.19it/s]DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'pars

DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 0]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 55, 'num_tokens': 55}, 'parse_lemma': {'word_length': 55, 'num_tokens': 55}, 'parse_upos': {'pos_length': 55, 'num_tokens': 55}, 'parse_xpos': {'pos_length': 55, 'num_tokens': 55}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([1, 1]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 69, 'num_tokens': 69}, 'parse_lemma': {'word_length': 69, 'num_tokens': 69}, 'parse_upos': {'pos_length': 69, 'num_tokens': 69}, 'parse_xpos': {'pos_length': 69, 'num_tokens': 69}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 0]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths:

DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([1, 0]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 23, 'num_tokens': 23}, 'parse_lemma': {'word_length': 23, 'num_tokens': 23}, 'parse_upos': {'pos_length': 23, 'num_tokens': 23}, 'parse_xpos': {'pos_length': 23, 'num_tokens': 23}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 0]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 23, 'num_tokens': 23}, 'parse_lemma': {'word_length': 23, 'num_tokens': 23}, 'parse_upos': {'pos_length': 23, 'num_tokens': 23}, 'parse_xpos': {'pos_length': 23, 'num_tokens': 23}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 0]))
loss: 0.6668 ||:  24%|██▍       | 12/50 [00:00<00:02, 15.88it/s]DEBUG    [a

DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 55, 'num_tokens': 55}, 'parse_lemma': {'word_length': 55, 'num_tokens': 55}, 'parse_upos': {'pos_length': 55, 'num_tokens': 55}, 'parse_xpos': {'pos_length': 55, 'num_tokens': 55}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 1]))
loss: 0.7028 ||:  58%|█████▊    | 29/50 [00:01<00:01, 15.88it/s]DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 16, 'num_tokens': 16}, 'parse_lemma': {'word_length': 16, 'num_tokens': 16}, 'parse_upos': {'pos_length': 16, 'num_tokens': 16}, 'parse_xpos': {'pos_length': 16, 'num_tokens': 16}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([1, 0]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'pars

DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([1, 1]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 17, 'num_tokens': 17}, 'parse_lemma': {'word_length': 17, 'num_tokens': 17}, 'parse_upos': {'pos_length': 17, 'num_tokens': 17}, 'parse_xpos': {'pos_length': 17, 'num_tokens': 17}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 0]))
loss: 0.7058 ||:  96%|█████████▌| 48/50 [00:02<00:00, 17.74it/s]DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 21, 'num_tokens': 21}, 'parse_lemma': {'word_length': 21, 'num_tokens': 21}, 'parse_upos': {'pos_length': 21, 'num_tokens': 21}, 'parse_xpos': {'pos_length': 21, 'num_tokens': 21}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_l

DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([1, 1]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 21, 'num_tokens': 21}, 'parse_lemma': {'word_length': 21, 'num_tokens': 21}, 'parse_upos': {'pos_length': 21, 'num_tokens': 21}, 'parse_xpos': {'pos_length': 21, 'num_tokens': 21}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 1]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 23, 'num_tokens': 23}, 'parse_lemma': {'word_length': 23, 'num_tokens': 23}, 'parse_upos': {'pos_length': 23, 'num_tokens': 23}, 'parse_xpos': {'pos_length': 23, 'num_tokens': 23}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 0]))
DEBUG   

DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 0]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 36, 'num_tokens': 36}, 'parse_lemma': {'word_length': 36, 'num_tokens': 36}, 'parse_upos': {'pos_length': 36, 'num_tokens': 36}, 'parse_xpos': {'pos_length': 36, 'num_tokens': 36}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([0, 1]))
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_label': {'word_length': 37, 'num_tokens': 37}, 'parse_lemma': {'word_length': 37, 'num_tokens': 37}, 'parse_upos': {'pos_length': 37, 'num_tokens': 37}, 'parse_xpos': {'pos_length': 37, 'num_tokens': 37}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 2
DEBUG    [mrp_library.models.generalizer:86] ('label', tensor([1, 0]))
DEBUG   

{'best_epoch': 8,
 'peak_cpu_memory_MB': 5569.592,
 'peak_gpu_0_memory_MB': 10,
 'peak_gpu_1_memory_MB': 11,
 'peak_gpu_2_memory_MB': 11,
 'peak_gpu_3_memory_MB': 10,
 'peak_gpu_4_memory_MB': 11,
 'peak_gpu_5_memory_MB': 10,
 'peak_gpu_6_memory_MB': 11,
 'peak_gpu_7_memory_MB': 10,
 'training_duration': '0:00:34.543248',
 'training_start_epoch': 0,
 'training_epochs': 9,
 'epoch': 9,
 'training_loss': 0.7053492307662964,
 'training_cpu_memory_MB': 5569.592,
 'training_gpu_0_memory_MB': 10,
 'training_gpu_1_memory_MB': 11,
 'training_gpu_2_memory_MB': 11,
 'training_gpu_3_memory_MB': 10,
 'training_gpu_4_memory_MB': 11,
 'training_gpu_5_memory_MB': 10,
 'training_gpu_6_memory_MB': 11,
 'training_gpu_7_memory_MB': 10,
 'validation_loss': 0.6876568341255188,
 'best_validation_loss': 0.6833285081386566}

In [66]:
class LstmTagger(Model):
    def __init__(self,
                 word_embeddings: TextFieldEmbedder,
                 encoder: Seq2SeqEncoder,
                 vocab: Vocabulary) -> None:
        super().__init__(vocab)
        self.word_embeddings = word_embeddings
        self.encoder = encoder
        self.hidden2tag = torch.nn.Linear(in_features=encoder.get_output_dim(),
                                          out_features=vocab.get_vocab_size('labels'))
        self.accuracy = CategoricalAccuracy()
        
    def forward(self,
                sentence: Dict[str, torch.Tensor],
                labels: torch.Tensor = None) -> Dict[str, torch.Tensor]:
        mask = get_text_field_mask(sentence)
        embeddings = self.word_embeddings(sentence)
        encoder_out = self.encoder(embeddings, mask)
        tag_logits = self.hidden2tag(encoder_out)
        output = {"tag_logits": tag_logits}
        if labels is not None:
            self.accuracy(tag_logits, labels, mask)
            output["loss"] = sequence_cross_entropy_with_logits(tag_logits, labels, mask)
        return output
    
    def get_metrics(self, reset: bool = False) -> Dict[str, float]:
        return {"accuracy": self.accuracy.get_metric(reset)}

In [67]:
lstm = PytorchSeq2SeqWrapper(torch.nn.LSTM(EMBEDDING_DIM, HIDDEN_DIM, batch_first=True))
model = LstmTagger(word_embeddings, lstm, vocab)

In [71]:
cuda_device = -1
optimizer = optim.SGD(model.parameters(), lr=0.1)
iterator = BucketIterator(batch_size=2, sorting_keys=[("sentence", "num_tokens")])
iterator.index_with(vocab)
trainer = Trainer(model=model,
                  optimizer=optimizer,
                  iterator=iterator,
                  train_dataset=train_dataset,
                  validation_dataset=train_dataset,
                  test_datase=test_dataset
                  patience=10,
                  num_epochs=1000,
                  cuda_device=cuda_device)

In [72]:
trainer.train()

allennlp.training.trainer - INFO - Beginning training.
allennlp.training.trainer - INFO - Epoch 0/999
allennlp.training.trainer - INFO - Peak CPU memory usage MB: 5777.16
allennlp.training.trainer - INFO - GPU 0 memory usage MB: 0
allennlp.training.trainer - INFO - GPU 1 memory usage MB: 0
allennlp.training.trainer - INFO - GPU 2 memory usage MB: 0
allennlp.training.trainer - INFO - GPU 3 memory usage MB: 0
allennlp.training.trainer - INFO - GPU 4 memory usage MB: 0
allennlp.training.trainer - INFO - GPU 5 memory usage MB: 0
allennlp.training.trainer - INFO - GPU 6 memory usage MB: 0
allennlp.training.trainer - INFO - GPU 7 memory usage MB: 0
allennlp.training.trainer - INFO - Training
  0%|          | 0/25903 [00:00<?, ?it/s]

KeyError: 'sentence'

In [None]:
dataset2cid2parse_json['wiki']['502000']

In [None]:
mrp_json

In [None]:
mrp_parser_states[0]

In [None]:
with_parse_count, total_count

In [None]:
class Resolver(nn.Module):
    def __init__(self, vocab_size,vocab_embed_dim):
        super(Resolver, self).__init__()
        self.vocab_embeds = nn.Embedding(vocab_size, vocab_embed_dim)
        self.pos_embeds = nn.Embedding(pos_size, pos_embed_dim)
        self.reset()
    
    def reset(self):
        self.curr_token_id = 0
        self.token_stack = []
        self.token_id2property_dict = {}
        
    def forward(self, token_tensor, lemma_tensor, upos_tensor, xpos_tensor):
        token_id = self.curr_token_id
        token_embeds = self.vocab_embeds(token_tensor)
        
        lemma_embeds = self.vocab_embeds(lemma_tensor)
        upos_embeds = self.pos_embeds(upos_tensor)
        xpos_embeds = self.pos_embeds(xpos_tensor)
        
        features = (token_embed, lemma_embed, upos_embed, xpos_embed)
        
        action_prob = self.choose_action(features)
        action_type = 
        
        if action_type == APPEND:
            self.token_stack.append(token)
            params = None
        elif action_type == RESOLVE:
            
            params = (num_pop, resolve_node, resolved_edges)
        else:
            params = None
        self.curr_token_id += 1
        return action_type, params
        

In [None]:
torch.LongTensor([12])

In [None]:
parse_json

In [None]:
resolver = Resolver()

In [None]:
mrp_parser_states

In [None]:
# plot parse json
cid = mrp_json.get('id')
framework = mrp_json.get('framework')

dataset_dir = os.path.join(args.project_root, args.graphviz_sub_dir, dataset)
plot_util.draw_mrp_graphviz(parse_json, dataset_dir)

logger.info(args.graphviz_file_template.format(framework, dataset, cid))
logger.info(args.parse_plot_file_template.format(dataset, cid))

In [None]:
parser_states, meta_data = mrp_json2parser_states(parse_json, {})
actions = meta_data[-1]

In [None]:
actions

In [None]:
for framework in framework2dataset2mrp_jsons:
    logger.info(framework)
    logger.info(list(framework2dataset2mrp_jsons[framework].keys()))

### Test module

In [None]:
from action_state import mrp_json2parser_states, _generate_parser_action_states

In [None]:
from action_state import sentence_spliter

In [None]:
framework, dataset = [
    ('dm', 'wsj'),
    ('psd', 'wsj'),
    ('eds', 'wsj'),
    ('ucca', 'wiki'),
    ('amr', 'wsj'),
    ('amr', 'wiki'),
][0]

mrp_jsons = framework2dataset2mrp_jsons[framework][dataset]
framework, dataset

In [None]:
mrp_json = mrp_jsons[0]

In [None]:
# mrp_json = [mrp_json for mrp_json in mrp_jsons if mrp_json.get('id') == '20209013'][0]

In [None]:
mrp_json['input']

In [None]:
alignment = {}
if framework == 'amr':
    cid = mrp_json.get('id', '')
    alignment = cid2alignment[cid]

In [None]:
logger.info(args.graphviz_file_template.format(
    framework, dataset, mrp_json.get('id')))

In [None]:
parser_states, meta_data = mrp_json2parser_states(mrp_json, alignment)

In [None]:
import allennlp
from allennlp.data import Instance
from allennlp.data.tokenizers import Tokenizer, WordTokenizer
from allennlp.modules import Seq2SeqEncoder, TimeDistributed, TextFieldEmbedder
from allennlp.data.fields import LabelField, TextField
from allennlp.data.token_indexers import TokenIndexer, SingleIdTokenIndexer

In [None]:
wt = WordTokenizer()

In [None]:
tokenized_doc = wt.tokenize(mrp_json['input'])

In [None]:
token_indexers = {"tokens": SingleIdTokenIndexer()}

In [None]:
doc_field = TextField(tokenized_doc, token_indexers)

In [None]:
instance_fields = {'doc': doc_field}

In [None]:
instance = Instance(instance_fields)

In [None]:
doc_field.as_tensor(padding_lengths=2)

In [None]:
doc_field._indexer_name_to_indexed_token

In [None]:
tokenized_doc[1].text_id

In [None]:
allennlp.data.tokenizers.token.Token('Apple').text

In [None]:
type(tokenized_doc[0])

In [None]:
mrp_json['input']

In [None]:
(
    doc,
    nodes,
    node_id2node,
    edge_id2edge,
    top_oriented_edges,
    token_nodes,
    abstract_node_id_set,
    parent_id2indegree,
    parent_id2child_id_set,
    child_id2parent_id_set,
    child_id2edge_id_set,
    parent_id2edge_id_set,
    token_node_id_set,
    actions,
    anchor2token_id,
    parent_child_id2edge_id_set,
) = meta_data

In [None]:
assert all([edge.get('id') == i for i, edge in enumerate(top_oriented_edges)])

In [None]:
abstract_node_id_set

In [None]:
parent_id2edge_id_set

In [None]:
parent_id2child_id_set

In [None]:
# node_id2node

In [None]:
top_oriented_edges[27]

In [None]:
top_oriented_edges[5]

In [None]:
tokenized_doc

In [None]:
nodes

In [None]:
parser_node_id_set = set()
parser_edge_id_set = set()
for (node_id, actions, edge_state, abstract_node_state, 
     complete_node_state, node_state, token_stack, pending_token_stack) in parser_states:
    parser_node_id_set.add(node_id)
    for edge_id in edge_state:
        parser_edge_id_set.add(edge_id)
    
    node = node_id2node[node_id]
    node_edges = [edge_id2edge[edge_id] for edge_id in edge_state]
    pprint.pprint((
        node.get('id'),
        actions, 
        node.get('label'), 
        [edge.get('label') for edge in node_edges], 
#         abstract_node_state,
        complete_node_state,
        node_state,
        token_stack,
        pending_token_stack,
    ))
    
print({node.get('id', -1) for node in mrp_json.get('nodes')} - parser_node_id_set)
assert len(parser_node_id_set) == len(mrp_json.get('nodes'))
print({edge_id for edge_id, edge in enumerate(mrp_json.get('edges'))} - parser_edge_id_set)
assert len(parser_edge_id_set) == len(mrp_json.get('edges'))

In [None]:
action_states = [s[1] for s in parser_states]

In [None]:
token_poss = []
prev_token_pos = 0
for token in sentence_spliter(doc):
    token_poss.append((prev_token_pos, prev_token_pos + len(token)))
    prev_token_pos += len(token) + 1

In [None]:
list(sentence_spliter(doc))

In [None]:
token_poss

In [None]:
[n['anchors'] for n in nodes]

In [None]:
nodes

In [None]:
nodes = mrp_json['nodes']

In [None]:
logger.setLevel(logging.INFO)

In [None]:
num_pops = []
error_num = 0
for i, mrp_json in tqdm(enumerate(mrp_jsons)):
#     print(i)
    parser_states, meta_data = mrp_json2parser_states(mrp_json, framework, alignment)
    if not parser_states:
        logger.info(i)
        error_num += 1
        continue
    action_states = [s[1] for s in parser_states]
    for action_state in action_states:
        for action in action_state:
            action_type, arg = action
            if action_type == RESOLVE:
                num_pop = arg
                num_pops.append(num_pop)