In [60]:
try:
    __IPYTHON__
    USING_IPYTHON = True
    %load_ext autoreload
    %autoreload 2
except NameError:
    USING_IPYTHON = False

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


#### Argparse

In [61]:
import argparse
ap = argparse.ArgumentParser()
ap.add_argument('project_root', help='')
ap.add_argument('--mrp-data-dir', default='data', help='')
ap.add_argument('--mrp-test-dir', default='src/tests', help='')
ap.add_argument('--tests-fixtures-file-template', default='fixtures/{}-test.jsonl', help='')

ap.add_argument('--graphviz-sub-dir', default='visualization/graphviz', help='')
ap.add_argument('--train-sub-dir', default='training', help='')
ap.add_argument('--companion-sub-dir', default='companion')
ap.add_argument('--jamr-alignment-file', default='jamr.mrp')

ap.add_argument('--test-input-file', default='evaluation/input.mrp', help='')
ap.add_argument('--test-companion-file', default='evaluation/udpipe.mrp', help='')
ap.add_argument('--allennlp-mrp-json-file-template', default='allennlp-mrp-json-small-{}-{}.jsonl', help='')
ap.add_argument('--data-size-limit', type=int, default=100, help='')

ap.add_argument('--mrp-file-extension', default='.mrp')
ap.add_argument('--companion-file-extension', default='.conllu')
ap.add_argument('--graphviz-file-template', default='http://localhost:8000/files/proj29_ds1/home/slai/mrp2019/visualization/graphviz/{}/{}.mrp/{}.png')
ap.add_argument('--parse-plot-file-template', default='http://localhost:8000/files/proj29_ds1/home/slai/mrp2019/visualization/graphviz/{}/{}.png')

arg_string = """
    /data/proj29_ds1/home/slai/mrp2019
"""
arguments = [arg for arg_line in arg_string.split(r'\\n') for arg in arg_line.split()]

In [3]:
if USING_IPYTHON:
    args = ap.parse_args(arguments)
else:
    args = ap.parse_args()

In [4]:
args

Namespace(allennlp_mrp_json_file_template='allennlp-mrp-json-small-{}-{}.jsonl', companion_file_extension='.conllu', companion_sub_dir='companion', data_size_limit=100, graphviz_file_template='http://localhost:8000/files/proj29_ds1/home/slai/mrp2019/visualization/graphviz/{}/{}.mrp/{}.png', graphviz_sub_dir='visualization/graphviz', jamr_alignment_file='jamr.mrp', mrp_data_dir='data', mrp_file_extension='.mrp', mrp_test_dir='src/tests', parse_plot_file_template='http://localhost:8000/files/proj29_ds1/home/slai/mrp2019/visualization/graphviz/{}/{}.png', project_root='/data/proj29_ds1/home/slai/mrp2019', test_companion_file='evaluation/udpipe.mrp', test_input_file='evaluation/input.mrp', tests_fixtures_file_template='fixtures/{}-test.jsonl', train_sub_dir='training')

#### Library imports

In [5]:
import json
import logging
import os
import pprint
import re
import string
from collections import Counter, defaultdict, deque

import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import plot_util
import torch
from action_state import mrp_json2parser_states, _generate_parser_action_states
from action_state import ERROR, APPEND, RESOLVE, IGNORE
from preprocessing import (CompanionParseDataset, MrpDataset, JamrAlignmentDataset,
                           read_companion_parse_json_file, read_mrp_json_file, parse2parse_json)            
from torch import nn
from tqdm import tqdm

#### ipython notebook specific imports

In [6]:
if USING_IPYTHON:
    # matplotlib config
    %matplotlib inline

In [7]:
sh = logging.StreamHandler()
formatter = logging.Formatter('%(levelname)-8s [%(name)s:%(lineno)d] %(message)s')
sh.setFormatter(formatter)
logging.basicConfig(level=logging.DEBUG, handlers=[sh])
logger = logging.getLogger(__name__)  # pylint: disable=invalid-name
logger.setLevel(logging.INFO)

### Constants

In [8]:
UNKWOWN = 'UNKWOWN'

### Load data

In [82]:
train_dir = os.path.join(args.project_root, args.mrp_data_dir, args.train_sub_dir)

In [83]:
mrp_dataset = MrpDataset()

In [84]:
frameworks, framework2dataset2mrp_jsons = mrp_dataset.load_mrp_json_dir(
    train_dir, args.mrp_file_extension)

frameworks:   0%|          | 0/5 [00:00<?, ?it/s]
dataset_name:   0%|          | 0/2 [00:00<?, ?it/s][A
dataset_name:  50%|█████     | 1/2 [00:00<00:00,  3.92it/s][A
frameworks:  20%|██        | 1/5 [00:00<00:02,  1.85it/s]s][A
dataset_name:   0%|          | 0/1 [00:00<?, ?it/s][A
frameworks:  40%|████      | 2/5 [00:02<00:02,  1.04it/s]t][A
dataset_name:   0%|          | 0/1 [00:00<?, ?it/s][A
frameworks:  60%|██████    | 3/5 [00:10<00:06,  3.06s/it]t][A
dataset_name:   0%|          | 0/1 [00:00<?, ?it/s][A
frameworks:  80%|████████  | 4/5 [00:18<00:04,  4.64s/it]t][A
dataset_name:   0%|          | 0/14 [00:00<?, ?it/s][A
dataset_name:  36%|███▌      | 5/14 [00:00<00:00, 47.06it/s][A
dataset_name:  50%|█████     | 7/14 [00:00<00:00, 21.95it/s][A
dataset_name:  64%|██████▍   | 9/14 [00:00<00:00, 18.49it/s][A
dataset_name:  79%|███████▊  | 11/14 [00:01<00:00,  5.14it/s][A
frameworks: 100%|██████████| 5/5 [00:20<00:00,  3.72s/it]t/s][A


In [85]:
framework2dataset2mrp_jsons.keys()

dict_keys(['ucca', 'psd', 'eds', 'dm', 'amr'])

### Data Preprocessing companion

In [86]:
companion_dir = os.path.join(args.project_root, args.mrp_data_dir, args.companion_sub_dir)

In [87]:
cparse_dataset = CompanionParseDataset()

In [88]:
dataset2cid2parse = cparse_dataset.load_companion_parse_dir(companion_dir, args.companion_file_extension)

INFO     [preprocessing:179] framework amr found
dataset: 100%|██████████| 13/13 [00:01<00:00,  8.99it/s]
INFO     [preprocessing:179] framework dm found
dataset: 100%|██████████| 5/5 [00:01<00:00,  3.39it/s]
INFO     [preprocessing:179] framework ucca found
dataset: 100%|██████████| 6/6 [00:00<00:00, 35.32it/s]


In [89]:
dataset2cid2parse_json = cparse_dataset.convert_parse2parse_json()

In [90]:
dataset2cid2parse.keys()

dict_keys(['amr-guidelines', 'bolt', 'cctv', 'dfa', 'dfb', 'fables', 'lorelei', 'mt09sdl', 'proxy', 'rte', 'wb', 'wiki', 'xinhua', 'wsj', 'ewt'])

In [91]:
# Some data is missing
'20003001' in dataset2cid2parse['wsj']

False

### Load JAMR alignment data

In [92]:
jalignment_dataset = JamrAlignmentDataset()

In [93]:
cid2alignment = jalignment_dataset.load_jamr_alignment_file(os.path.join(
    args.project_root,
    args.mrp_data_dir,
    args.companion_sub_dir,
    args.jamr_alignment_file
))

### Load testing data

In [94]:
test_input_filename = os.path.join(args.project_root, args.mrp_data_dir, args.test_input_file)
test_companion_filename = os.path.join(args.project_root, args.mrp_data_dir, args.test_companion_file)

In [95]:
test_mrp_jsons = read_mrp_json_file(test_input_filename)
test_parse_jsons = read_companion_parse_json_file(test_companion_filename)

In [96]:
parse_json = test_parse_jsons['102990']

In [97]:
mrp_json = framework2dataset2mrp_jsons['psd']['wsj'][1]

In [98]:
test_configs = [
    ('ucca', 'wiki', 70),
]
framework, dataset, idx = test_configs[0]

In [99]:
mrp_json = framework2dataset2mrp_jsons[framework][dataset][idx]
cid = mrp_json.get('id')

In [100]:
parse_json = dataset2cid2parse_json[dataset][cid]

In [101]:
doc = mrp_json['input']

In [102]:
doc

'In the final minute of the game, Johnson had the ball stolen by Celtics center Robert Parish, and then missed two free throws that could have won the game.'

In [103]:
token_pos = 0
anchors = []
char_pos2tokenized_parse_node_id = []

for node_id, node in enumerate(parse_json.get('nodes')):
    label = node.get('label')
    label_size = len(label)
    while doc[token_pos] == ' ':
        token_pos += 1
        char_pos2tokenized_parse_node_id.append(node_id)
    anchors.append((token_pos, token_pos + label_size))
    char_pos2tokenized_parse_node_id.extend([node_id] * (label_size))
    print(node_id, doc[token_pos: token_pos + label_size], anchors[-1], len(char_pos2tokenized_parse_node_id))
    token_pos += label_size

0 In (0, 2) 2
1 the (3, 6) 6
2 final (7, 12) 12
3 minute (13, 19) 19
4 of (20, 22) 22
5 the (23, 26) 26
6 game (27, 31) 31
7 , (31, 32) 32
8 Johnson (33, 40) 40
9 had (41, 44) 44
10 the (45, 48) 48
11 ball (49, 53) 53
12 stolen (54, 60) 60
13 by (61, 63) 63
14 Celtics (64, 71) 71
15 center (72, 78) 78
16 Robert (79, 85) 85
17 Parish (86, 92) 92
18 , (92, 93) 93
19 and (94, 97) 97
20 then (98, 102) 102
21 missed (103, 109) 109
22 two (110, 113) 113
23 free (114, 118) 118
24 throws (119, 125) 125
25 that (126, 130) 130
26 could (131, 136) 136
27 have (137, 141) 141
28 won (142, 145) 145
29 the (146, 149) 149
30 game (150, 154) 154
31 . (154, 155) 155


In [104]:
doc

'In the final minute of the game, Johnson had the ball stolen by Celtics center Robert Parish, and then missed two free throws that could have won the game.'

In [105]:
len(char_pos2tokenized_parse_node_id)

155

In [106]:
doc = mrp_json['input']

In [107]:
mrp_json['tops']

[34]

In [108]:
mrp_parser_states, mrp_meta_data = mrp_json2parser_states(
    mrp_json, 
    tokenized_parse_nodes=parse_json['nodes'],
)

In [109]:
(
    doc,
    nodes,
    node_id2node,
    edge_id2edge,
    top_oriented_edges,
    token_nodes,
    # abstract_node_id_set,
    parent_id2indegree,
    # parent_id2child_id_set,
    # child_id2parent_id_set,
    # child_id2edge_id_set,
    # parent_id2edge_id_set,
    # parent_child_id2edge_id_set,
    parse_nodes_anchors,
    char_pos2tokenized_node_id,
    curr_node_ids,
    token_states,
    actions,
) = mrp_meta_data

In [110]:
curr_node_ids = mrp_meta_data[-3]
token_states = mrp_meta_data[-2]
actions = mrp_meta_data[-1]

In [111]:
*_, curr_node_ids, token_states, actions = mrp_meta_data

In [112]:
actions[:4]

[(0, None),
 (1,
  (1,
   0,
   {'id': 0,
    'anchors': [{'from': 0, 'to': 2}],
    'label': 'In',
    'propagate_label': 'R'},
   [[]])),
 (0, None),
 (1,
  (1,
   0,
   {'id': 1,
    'anchors': [{'from': 3, 'to': 6}],
    'label': 'the',
    'propagate_label': 'E'},
   [[]]))]

In [113]:
for curr_node_id, action, token_state in zip(curr_node_ids, actions, token_states):
    action_type, params = action
#     pprint.pprint((curr_node_id, action[0]))
#     pprint.pprint(([token_group[:4] for token_group in token_state]))
    pprint.pprint((curr_node_id, action[0], [token_group[:4] for token_group in token_state]))

(0, 0, [(0, False, 'In', [])])
(1, 1, [(0, True, 'R', [(0, False, 'In', [])])])
(1, 0, [(0, True, 'R', [(0, False, 'In', [])]), (1, False, 'the', [])])
(2,
 1,
 [(0, True, 'R', [(0, False, 'In', [])]),
  (1, True, 'E', [(1, False, 'the', [])])])
(2,
 0,
 [(0, True, 'R', [(0, False, 'In', [])]),
  (1, True, 'E', [(1, False, 'the', [])]),
  (2, False, 'final', [])])
(3,
 1,
 [(0, True, 'R', [(0, False, 'In', [])]),
  (1, True, 'E', [(1, False, 'the', [])]),
  (2, True, 'E', [(2, False, 'final', [])])])
(3,
 0,
 [(0, True, 'R', [(0, False, 'In', [])]),
  (1, True, 'E', [(1, False, 'the', [])]),
  (2, True, 'E', [(2, False, 'final', [])]),
  (3, False, 'minute', [])])
(4,
 1,
 [(0, True, 'R', [(0, False, 'In', [])]),
  (1, True, 'E', [(1, False, 'the', [])]),
  (2, True, 'E', [(2, False, 'final', [])]),
  (3, True, 'C', [(3, False, 'minute', [])])])
(4,
 1,
 [(32,
   True,
   'E',
   [(0, True, 'R', [(0, False, 'In', [])]),
    (1, True, 'E', [(1, False, 'the', [])]),
    (2, True, 'E', [(

   True,
   'H',
   [(33,
     True,
     'T',
     [(32,
       True,
       'E',
       [(0, True, 'R', [(0, False, 'In', [])]),
        (1, True, 'E', [(1, False, 'the', [])]),
        (2, True, 'E', [(2, False, 'final', [])]),
        (3, True, 'C', [(3, False, 'minute', [])])]),
      (4, True, 'R', [(4, False, 'of', [])]),
      (5, True, 'E', [(5, False, 'the', [])]),
      (6, True, 'C', [(6, False, 'game', [])])]),
    (7, True, 'U', [(7, False, ',', [])]),
    (8, True, 'A', [(8, False, 'Johnson', [])]),
    (9, True, 'F', [(9, False, 'had', [])]),
    (34,
     True,
     'A',
     [(10, True, 'E', [(10, False, 'the', [])]),
      (11, True, 'C', [(11, False, 'ball', [])])]),
    (12, True, 'P', [(12, False, 'stolen', [])]),
    (36,
     True,
     'A',
     [(13, True, 'R', [(13, False, 'by', [])]),
      (35,
       True,
       'E',
       [(14, True, 'A', [(14, False, 'Celtics', [])]),
        (15, True, 'S', [(15, False, 'center', [])])]),
      (16, True, 'C', [(16, F

In [114]:
for curr_node_id, action, token_state in zip(curr_node_ids, actions, [[]] + token_states):
    action_type, params = action
    pprint.pprint((curr_node_id, action[0], [token_group[:4] for token_group in token_state]))

(0, 0, [])
(1, 1, [(0, False, 'In', [])])
(1, 0, [(0, True, 'R', [(0, False, 'In', [])])])
(2, 1, [(0, True, 'R', [(0, False, 'In', [])]), (1, False, 'the', [])])
(2,
 0,
 [(0, True, 'R', [(0, False, 'In', [])]),
  (1, True, 'E', [(1, False, 'the', [])])])
(3,
 1,
 [(0, True, 'R', [(0, False, 'In', [])]),
  (1, True, 'E', [(1, False, 'the', [])]),
  (2, False, 'final', [])])
(3,
 0,
 [(0, True, 'R', [(0, False, 'In', [])]),
  (1, True, 'E', [(1, False, 'the', [])]),
  (2, True, 'E', [(2, False, 'final', [])])])
(4,
 1,
 [(0, True, 'R', [(0, False, 'In', [])]),
  (1, True, 'E', [(1, False, 'the', [])]),
  (2, True, 'E', [(2, False, 'final', [])]),
  (3, False, 'minute', [])])
(4,
 1,
 [(0, True, 'R', [(0, False, 'In', [])]),
  (1, True, 'E', [(1, False, 'the', [])]),
  (2, True, 'E', [(2, False, 'final', [])]),
  (3, True, 'C', [(3, False, 'minute', [])])])
(4,
 0,
 [(32,
   True,
   'E',
   [(0, True, 'R', [(0, False, 'In', [])]),
    (1, True, 'E', [(1, False, 'the', [])]),
    (2, Tr

       True,
       'E',
       [(14, True, 'A', [(14, False, 'Celtics', [])]),
        (15, True, 'S', [(15, False, 'center', [])])]),
      (16, True, 'C', [(16, False, 'Robert', [])])])]),
  (18, True, 'U', [(18, False, ',', [])]),
  (19, True, 'L', [(19, False, 'and', [])]),
  (20, True, 'L', [(20, False, 'then', [])]),
  (38,
   True,
   'H',
   [(21, True, 'D', [(21, False, 'missed', [])]),
    (22, True, 'D', [(22, False, 'two', [])]),
    (23, True, 'D', [(23, False, 'free', [])]),
    (24, True, 'P', [(24, False, 'throws', [])])]),
  (25, True, 'L', [(25, False, 'that', [])]),
  (26, False, 'could', [])])
(27,
 0,
 [(37,
   True,
   'H',
   [(33,
     True,
     'T',
     [(32,
       True,
       'E',
       [(0, True, 'R', [(0, False, 'In', [])]),
        (1, True, 'E', [(1, False, 'the', [])]),
        (2, True, 'E', [(2, False, 'final', [])]),
        (3, True, 'C', [(3, False, 'minute', [])])]),
      (4, True, 'R', [(4, False, 'of', [])]),
      (5, True, 'E', [(5, False

In [115]:
actions

[(0, None),
 (1,
  (1,
   0,
   {'id': 0,
    'anchors': [{'from': 0, 'to': 2}],
    'label': 'In',
    'propagate_label': 'R'},
   [[]])),
 (0, None),
 (1,
  (1,
   0,
   {'id': 1,
    'anchors': [{'from': 3, 'to': 6}],
    'label': 'the',
    'propagate_label': 'E'},
   [[]])),
 (0, None),
 (1,
  (1,
   0,
   {'id': 2,
    'anchors': [{'from': 7, 'to': 12}],
    'label': 'final',
    'propagate_label': 'E'},
   [[]])),
 (0, None),
 (1,
  (1,
   0,
   {'id': 3,
    'anchors': [{'from': 13, 'to': 19}],
    'label': 'minute',
    'propagate_label': 'C'},
   [[]])),
 (1,
  (4,
   3,
   {'id': 31, 'propagate_label': 'E'},
   [[{'source': 31,
      'target': 0,
      'label': 'R',
      'id': 31,
      'parent': 31,
      'child': 0}],
    [{'source': 31,
      'target': 1,
      'label': 'E',
      'id': 28,
      'parent': 31,
      'child': 1}],
    [{'source': 31,
      'target': 2,
      'label': 'E',
      'id': 21,
      'parent': 31,
      'child': 2}],
    [{'source': 31,
      't

In [116]:
token_states[1]

[(0, True, 'R', [(0, False, 'In', [])])]

In [117]:
[n['label'] for n in parse_json['nodes']]

['In',
 'the',
 'final',
 'minute',
 'of',
 'the',
 'game',
 ',',
 'Johnson',
 'had',
 'the',
 'ball',
 'stolen',
 'by',
 'Celtics',
 'center',
 'Robert',
 'Parish',
 ',',
 'and',
 'then',
 'missed',
 'two',
 'free',
 'throws',
 'that',
 'could',
 'have',
 'won',
 'the',
 'game',
 '.']

In [118]:
token_states[-1]

[(42,
  True,
  '<UCCA-TOP-NODE>',
  [(37,
    True,
    'H',
    [(33,
      True,
      'T',
      [(32,
        True,
        'E',
        [(0, True, 'R', [(0, False, 'In', [])]),
         (1, True, 'E', [(1, False, 'the', [])]),
         (2, True, 'E', [(2, False, 'final', [])]),
         (3, True, 'C', [(3, False, 'minute', [])])]),
       (4, True, 'R', [(4, False, 'of', [])]),
       (5, True, 'E', [(5, False, 'the', [])]),
       (6, True, 'C', [(6, False, 'game', [])])]),
     (7, True, 'U', [(7, False, ',', [])]),
     (8, True, 'A', [(8, False, 'Johnson', [])]),
     (9, True, 'F', [(9, False, 'had', [])]),
     (34,
      True,
      'A',
      [(10, True, 'E', [(10, False, 'the', [])]),
       (11, True, 'C', [(11, False, 'ball', [])])]),
     (12, True, 'P', [(12, False, 'stolen', [])]),
     (36,
      True,
      'A',
      [(13, True, 'R', [(13, False, 'by', [])]),
       (35,
        True,
        'E',
        [(14, True, 'A', [(14, False, 'Celtics', [])]),
         (

In [119]:
companion_parser_states, companion_meta_data = mrp_json2parser_states(
    parse_json,
    mrp_doc=doc,
    tokenized_parse_nodes=parse_json['nodes'],
)

In [120]:
logger.info(args.graphviz_file_template.format(
    framework, dataset, cid))

INFO     [__main__:2] http://localhost:8000/files/proj29_ds1/home/slai/mrp2019/visualization/graphviz/ucca/wiki.mrp/470004.png


In [121]:
mrp_json['input']

'In the final minute of the game, Johnson had the ball stolen by Celtics center Robert Parish, and then missed two free throws that could have won the game.'

In [122]:
mrp_parser_states

[(0,
  [(0, None),
   (1,
    (1,
     0,
     {'id': 0,
      'anchors': [{'from': 0, 'to': 2}],
      'label': 'In',
      'propagate_label': 'R'},
     [[]]))],
  [],
  [],
  [],
  [(0, 0, [(0, 0, None)])],
  [(0, True, 'R', [(0, False, 'In', 'In')])]),
 (1,
  [(0, None),
   (1,
    (1,
     0,
     {'id': 1,
      'anchors': [{'from': 3, 'to': 6}],
      'label': 'the',
      'propagate_label': 'E'},
     [[]]))],
  [],
  [],
  [],
  [(0, 0, [(0, 0, None)]), (1, 1, [(1, 1, None)])],
  [(0, True, 'R', [(0, False, 'In', 'In')]),
   (1, True, 'E', [(1, False, 'the', 'the')])]),
 (2,
  [(0, None),
   (1,
    (1,
     0,
     {'id': 2,
      'anchors': [{'from': 7, 'to': 12}],
      'label': 'final',
      'propagate_label': 'E'},
     [[]]))],
  [],
  [],
  [],
  [(0, 0, [(0, 0, None)]), (1, 1, [(1, 1, None)]), (2, 2, [(2, 2, None)])],
  [(0, True, 'R', [(0, False, 'In', 'In')]),
   (1, True, 'E', [(1, False, 'the', 'the')]),
   (2, True, 'E', [(2, False, 'final', 'final')])]),
 (3,
  

In [123]:
[(node['id'], node.get('label')) for node in mrp_json['nodes']]

[(0, 'In'),
 (1, 'the'),
 (2, 'final'),
 (3, 'minute'),
 (4, 'of'),
 (5, 'the'),
 (6, 'game'),
 (7, ','),
 (8, 'Johnson'),
 (9, 'had'),
 (10, 'the'),
 (11, 'ball'),
 (12, 'stolen'),
 (13, 'by'),
 (14, 'Celtics'),
 (15, 'center'),
 (16, 'RobertParish'),
 (17, ','),
 (18, 'and'),
 (19, 'then'),
 (20, 'missed'),
 (21, 'two'),
 (22, 'free'),
 (23, 'throws'),
 (24, 'that'),
 (25, 'could'),
 (26, 'have'),
 (27, 'won'),
 (28, 'the'),
 (29, 'game'),
 (30, '.'),
 (31, None),
 (32, None),
 (33, None),
 (34, None),
 (35, None),
 (36, None),
 (37, None),
 (38, None),
 (39, None),
 (40, None),
 (41, None)]

In [124]:
doc

'In the final minute of the game, Johnson had the ball stolen by Celtics center Robert Parish, and then missed two free throws that could have won the game.'

In [125]:
parse_json['nodes']

[{'id': 0,
  'label': 'In',
  'properties': ['lemma', 'upos', 'xpos'],
  'values': ['in', 'ADP', 'IN']},
 {'id': 1,
  'label': 'the',
  'properties': ['lemma', 'upos', 'xpos'],
  'values': ['the', 'DET', 'DT']},
 {'id': 2,
  'label': 'final',
  'properties': ['lemma', 'upos', 'xpos'],
  'values': ['final', 'ADJ', 'JJ']},
 {'id': 3,
  'label': 'minute',
  'properties': ['lemma', 'upos', 'xpos'],
  'values': ['minute', 'NOUN', 'NN']},
 {'id': 4,
  'label': 'of',
  'properties': ['lemma', 'upos', 'xpos'],
  'values': ['of', 'ADP', 'IN']},
 {'id': 5,
  'label': 'the',
  'properties': ['lemma', 'upos', 'xpos'],
  'values': ['the', 'DET', 'DT']},
 {'id': 6,
  'label': 'game',
  'properties': ['lemma', 'upos', 'xpos'],
  'values': ['game', 'NOUN', 'NN']},
 {'id': 7,
  'label': ',',
  'properties': ['lemma', 'upos', 'xpos'],
  'values': [',', 'PUNCT', ',']},
 {'id': 8,
  'label': 'Johnson',
  'properties': ['lemma', 'upos', 'xpos'],
  'values': ['Johnson', 'PROPN', 'NNP']},
 {'id': 9,
  'label

In [126]:
[(node['id'], node['label']) for node in parse_json['nodes']]

[(0, 'In'),
 (1, 'the'),
 (2, 'final'),
 (3, 'minute'),
 (4, 'of'),
 (5, 'the'),
 (6, 'game'),
 (7, ','),
 (8, 'Johnson'),
 (9, 'had'),
 (10, 'the'),
 (11, 'ball'),
 (12, 'stolen'),
 (13, 'by'),
 (14, 'Celtics'),
 (15, 'center'),
 (16, 'Robert'),
 (17, 'Parish'),
 (18, ','),
 (19, 'and'),
 (20, 'then'),
 (21, 'missed'),
 (22, 'two'),
 (23, 'free'),
 (24, 'throws'),
 (25, 'that'),
 (26, 'could'),
 (27, 'have'),
 (28, 'won'),
 (29, 'the'),
 (30, 'game'),
 (31, '.')]

In [127]:
anchors

[(0, 2),
 (3, 6),
 (7, 12),
 (13, 19),
 (20, 22),
 (23, 26),
 (27, 31),
 (31, 32),
 (33, 40),
 (41, 44),
 (45, 48),
 (49, 53),
 (54, 60),
 (61, 63),
 (64, 71),
 (72, 78),
 (79, 85),
 (86, 92),
 (92, 93),
 (94, 97),
 (98, 102),
 (103, 109),
 (110, 113),
 (114, 118),
 (119, 125),
 (126, 130),
 (131, 136),
 (137, 141),
 (142, 145),
 (146, 149),
 (150, 154),
 (154, 155)]

### Create training instance

In [128]:
total_count = 0
with_parse_count = 0
data_size_limit = args.data_size_limit

In [129]:
# framework = 'ucca'
# ignore_framework_set = {'amr', 'dm', 'psd', 'eds'}
# dataset = 'wiki'
# ignore_dataset_set = {}

# framework = 'dm'
# ignore_framework_set = {'amr', 'ucca', 'psd', 'eds'}
# dataset = 'wsj'
# ignore_dataset_set = {}

framework = 'ucca'
ignore_framework_set = {'amr', 'psd', 'eds'}
dataset = 'wiki'
ignore_dataset_set = {}

In [130]:
frameworks

['ucca', 'psd', 'eds', 'dm', 'amr']

In [131]:
framework_names = '-'.join([
    framework 
    for framework in frameworks 
    if framework not in ignore_framework_set
])
framework_names

'ucca-dm'

In [132]:
allennlp_tests_fixtures_output_file = os.path.join(
    args.project_root, args.mrp_test_dir, args.tests_fixtures_file_template.format(framework_names))

allennlp_framework_train_output_file = os.path.join(
    args.project_root, args.allennlp_mrp_json_file_template.format(framework_names, 'train'))

allennlp_framework_test_output_file = os.path.join(
    args.project_root, args.allennlp_mrp_json_file_template.format(framework_names, 'test'))

In [133]:
# Create tests fixture jsonl
fixture_combinations = [
    ('ucca', 'wiki', 70),
    ('dm', 'wsj', 3)
] * 5

with open(allennlp_tests_fixtures_output_file, 'w') as wf:
    for framework, dataset, idx in fixture_combinations:
        mrp_json = framework2dataset2mrp_jsons[framework][dataset][idx]
        cid = mrp_json.get('id')
        doc = mrp_json.get('input')
        
        alignment = {}
        if framework == 'amr':
            alignment = cid2alignment[cid]  
        parse_json = dataset2cid2parse_json.get(dataset, {}).get(cid, {})

        if parse_json:
            with_parse_count += 1
            mrp_parser_states, mrp_meta_data = mrp_json2parser_states(
                mrp_json, 
                tokenized_parse_nodes=parse_json['nodes'],
                alignment=alignment,
            )
            companion_parser_states, companion_meta_data = mrp_json2parser_states(
                parse_json, 
                mrp_doc=doc,
                tokenized_parse_nodes=parse_json['nodes'],
            )

            data_instance = {
                'mrp_json': mrp_json,
                'parse_json': parse_json,
                'mrp_parser_states': mrp_parser_states,
                'mrp_meta_data': mrp_meta_data,
                'companion_parser_states': companion_parser_states,
                'companion_meta_data': companion_meta_data,
            }
            json_encoded_instance = json.dumps(data_instance)
            wf.write(json_encoded_instance + '\n')

In [134]:
for idx in range(20):
    mrp_json = framework2dataset2mrp_jsons[framework][dataset][idx]
    cid = mrp_json.get('id')
    if cid in dataset2cid2parse[dataset]:
        print(idx)

3
8
13
18


In [135]:
with_parse_count

10

In [136]:
[state[-1] for state in mrp_parser_states]

[[(0, True, 'the', [(0, False, 'the', 'The')])],
 [(0, True, 'the', [(0, False, 'the', 'The')]),
  (1, True, 'asbestos', [(1, False, 'asbestos', 'asbestos')])],
 [(0, True, 'the', [(0, False, 'the', 'The')]),
  (1, True, 'asbestos', [(1, False, 'asbestos', 'asbestos')]),
  (2, False, 'fiber', 'fiber')],
 [(0, True, 'the', [(0, False, 'the', 'The')]),
  (1, True, 'asbestos', [(1, False, 'asbestos', 'asbestos')]),
  (2, False, 'fiber', 'fiber'),
  (4, True, 'crocidolite', [(4, False, 'crocidolite', 'crocidolite')])],
 [(2,
   True,
   'fiber',
   [(0, True, 'the', [(0, False, 'the', 'The')]),
    (1, True, 'asbestos', [(1, False, 'asbestos', 'asbestos')]),
    (2, False, 'fiber', 'fiber'),
    (4, True, 'crocidolite', [(4, False, 'crocidolite', 'crocidolite')])])],
 [(2,
   True,
   'fiber',
   [(0, True, 'the', [(0, False, 'the', 'The')]),
    (1, True, 'asbestos', [(1, False, 'asbestos', 'asbestos')]),
    (2, False, 'fiber', 'fiber'),
    (4, True, 'crocidolite', [(4, False, 'crocidol

In [137]:
mrp_meta_data[-1]

[(0, None),
 (1,
  (1,
   0,
   {'id': 0,
    'label': 'the',
    'properties': ['pos', 'frame'],
    'values': ['DT', 'q:i-h-h'],
    'anchors': [{'from': 0, 'to': 3}]},
   [[]])),
 (0, None),
 (1,
  (1,
   0,
   {'id': 1,
    'label': 'asbestos',
    'properties': ['pos', 'frame'],
    'values': ['NN', 'n:x'],
    'anchors': [{'from': 4, 'to': 12}]},
   [[]])),
 (0, None),
 (2, None),
 (0, None),
 (1,
  (1,
   0,
   {'id': 4,
    'label': 'crocidolite',
    'properties': ['pos', 'frame'],
    'values': ['NN', 'n:x'],
    'anchors': [{'from': 20, 'to': 31}]},
   [[]])),
 (1,
  (4,
   2,
   {'id': 2,
    'label': 'fiber',
    'properties': ['pos', 'frame'],
    'values': ['NN', 'n:x'],
    'anchors': [{'from': 13, 'to': 18}]},
   [[{'source': 0,
      'target': 2,
      'label': 'BV',
      'id': 2,
      'parent': 2,
      'child': 0}],
    [{'source': 1,
      'target': 2,
      'label': 'compound',
      'id': 1,
      'parent': 2,
      'child': 1}],
    [],
    [{'source': 4,
    

In [138]:
doc

'The asbestos fiber, crocidolite, is unusually resilient once it enters the lungs, with even brief exposures to it causing symptoms that show up decades later, researchers said.'

In [139]:
parse_json

{'id': '20003002',
 'tops': [30],
 'nodes': [{'id': 0,
   'label': 'The',
   'properties': ['lemma', 'upos', 'xpos'],
   'values': ['the', 'DET', 'DT']},
  {'id': 1,
   'label': 'asbestos',
   'properties': ['lemma', 'upos', 'xpos'],
   'values': ['asbestos', 'NOUN', 'NN']},
  {'id': 2,
   'label': 'fiber',
   'properties': ['lemma', 'upos', 'xpos'],
   'values': ['fiber', 'NOUN', 'NN']},
  {'id': 3,
   'label': ',',
   'properties': ['lemma', 'upos', 'xpos'],
   'values': [',', 'PUNCT', ',']},
  {'id': 4,
   'label': 'crocidolite',
   'properties': ['lemma', 'upos', 'xpos'],
   'values': ['crocidolite', 'NOUN', 'NN']},
  {'id': 5,
   'label': ',',
   'properties': ['lemma', 'upos', 'xpos'],
   'values': [',', 'PUNCT', ',']},
  {'id': 6,
   'label': 'is',
   'properties': ['lemma', 'upos', 'xpos'],
   'values': ['be', 'VERB', 'VBZ']},
  {'id': 7,
   'label': 'unusually',
   'properties': ['lemma', 'upos', 'xpos'],
   'values': ['unusually', 'ADV', 'RB']},
  {'id': 8,
   'label': 'resil

In [140]:
[n['values'][2] for n in parse_json['nodes']]

['DT',
 'NN',
 'NN',
 ',',
 'NN',
 ',',
 'VBZ',
 'RB',
 'JJ',
 'IN',
 'PRP',
 'VBZ',
 'DT',
 'NNS',
 ',',
 'IN',
 'RB',
 'JJ',
 'NNS',
 'TO',
 'PRP',
 'VBG',
 'NNS',
 'WDT',
 'VBP',
 'RP',
 'NNS',
 'RB',
 ',',
 'NNS',
 'VBD',
 '.']

In [141]:
# Create train jsonl
if os.path.isfile(allennlp_framework_train_output_file) and os.path.isfile(
    allennlp_framework_train_output_file):
    logger.info('allennlp_train_output_file found, stop generation')
else:
    pass
if 1==1:
    data_size = 0
    with open(allennlp_framework_train_output_file, 'w') as train_wf:
        with open(allennlp_framework_test_output_file, 'w') as test_wf:
            for _, dataset, idx, mrp_json in tqdm(mrp_dataset.mrp_json_generator(
                ignore_framework_set=ignore_framework_set,
                ignore_dataset_set=ignore_dataset_set,
                data_size_limit=data_size_limit * 2
            )):
                total_count += 1
                cid = mrp_json.get('id')
                doc = mrp_json.get('input')

                framework = mrp_json.get('framework')
                alignment = {}
                if framework == 'amr':
                    alignment = cid2alignment[cid]  
                parse_json = dataset2cid2parse_json.get(dataset, {}).get(cid, {})

                if parse_json:
                    mrp_parser_states, mrp_meta_data = mrp_json2parser_states(
                        mrp_json, 
                        tokenized_parse_nodes=parse_json['nodes'],
                        alignment=alignment,
                    )
                    companion_parser_states, companion_meta_data = mrp_json2parser_states(
                        parse_json, 
                        mrp_doc=doc,
                        tokenized_parse_nodes=parse_json['nodes'],
                    )

                    # Continue if error
                    if not mrp_parser_states:
                        continue

                    data_instance = {
                        'mrp_json': mrp_json,
                        'parse_json': parse_json,
                        'mrp_parser_states': mrp_parser_states,
                        'mrp_meta_data': mrp_meta_data,
                        'companion_parser_states': companion_parser_states,
                        'companion_meta_data': companion_meta_data,
                    }
                    json_encoded_instance = json.dumps(data_instance)
                    if idx <= data_size_limit:
                        train_wf.write(json_encoded_instance + '\n')
                    else:
                        test_wf.write(json_encoded_instance + '\n')

INFO     [__main__:4] allennlp_train_output_file found, stop generation
0it [00:00, ?it/s]INFO     [__main__:42] 0
1it [00:00,  6.93it/s]INFO     [__main__:42] 0
2it [00:00,  6.23it/s]INFO     [__main__:42] 0
INFO     [__main__:42] 0
6it [00:01,  4.59it/s]INFO     [__main__:42] 0
INFO     [__main__:42] 0
8it [00:01,  5.77it/s]INFO     [__main__:42] 0
10it [00:01,  5.07it/s]INFO     [__main__:42] 0
12it [00:02,  4.27it/s]INFO     [__main__:42] 0
INFO     [__main__:42] 0
INFO     [__main__:42] 0
17it [00:03,  4.86it/s]INFO     [__main__:42] 0
INFO     [__main__:42] 0
19it [00:03,  6.12it/s]INFO     [__main__:42] 0
21it [00:03,  7.38it/s]INFO     [__main__:42] 0
25it [00:04,  7.61it/s]INFO     [__main__:42] 0
29it [00:05,  5.00it/s]INFO     [__main__:42] 0
31it [00:05,  6.37it/s]INFO     [__main__:42] 0
INFO     [__main__:42] 0
33it [00:05,  7.00it/s]INFO     [__main__:42] 0
35it [00:06,  5.09it/s]INFO     [__main__:42] 0
INFO     [__main__:42] 0
38it [00:06,  6.61it/s]INFO     [__main__:

### Test allennlp dataset reader

In [142]:
import torch.optim as optim

from mrp_library.dataset_readers.mrp_jsons import MRPDatasetReader
from allennlp.common.file_utils import cached_path
from allennlp.data.vocabulary import Vocabulary
from allennlp.modules.text_field_embedders import TextFieldEmbedder, BasicTextFieldEmbedder
from allennlp.modules.token_embedders.embedding import Embedding
from allennlp.modules.seq2seq_encoders import Seq2SeqEncoder, PytorchSeq2SeqWrapper
from allennlp.modules.feedforward import FeedForward

from allennlp.training.metrics import CategoricalAccuracy

from allennlp.data.iterators import BucketIterator
from allennlp.training.trainer import Trainer

import json
import logging
from typing import Dict

from allennlp.common.file_utils import cached_path
from allennlp.data.dataset_readers.dataset_reader import DatasetReader
from allennlp.data.fields import LabelField, TextField
from allennlp.data.instance import Instance
from allennlp.data.token_indexers import SingleIdTokenIndexer, TokenIndexer
from allennlp.data.tokenizers import Token, Tokenizer, WordTokenizer
from allennlp.models import Model
from overrides import overrides

INFO     [pytorch_pretrained_bert.modeling:230] Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex .
DEBUG    [allennlp.common.registrable:56] instantiating registered subclass relu of <class 'allennlp.nn.activations.Activation'>
DEBUG    [allennlp.common.registrable:56] instantiating registered subclass relu of <class 'allennlp.nn.activations.Activation'>
DEBUG    [allennlp.common.registrable:56] instantiating registered subclass relu of <class 'allennlp.nn.activations.Activation'>
DEBUG    [allennlp.common.registrable:56] instantiating registered subclass relu of <class 'allennlp.nn.activations.Activation'>


In [143]:
from mrp_library.dataset_readers.mrp_jsons_actions import MRPDatasetActionReader

In [144]:
reader = MRPDatasetActionReader()

In [145]:
train_dataset = reader.read(cached_path(allennlp_framework_train_output_file))

0it [00:00, ?it/s]INFO     [mrp_library.dataset_readers.mrp_jsons_actions:113] Reading instances from lines in file at: /data/proj29_ds1/home/slai/mrp2019/allennlp-mrp-json-small-ucca-dm-train.jsonl
3369it [00:09, 370.34it/s] 


In [146]:
# test_dataset = reader.read(cached_path(allennlp_train_output_file))
test_dataset = reader.read(cached_path(allennlp_framework_test_output_file))

0it [00:00, ?it/s]INFO     [mrp_library.dataset_readers.mrp_jsons_actions:113] Reading instances from lines in file at: /data/proj29_ds1/home/slai/mrp2019/allennlp-mrp-json-small-ucca-dm-test.jsonl
2868it [00:00, 5953.52it/s]


In [147]:
tests_fixtures_dataset = reader.read(cached_path(allennlp_tests_fixtures_output_file))

0it [00:00, ?it/s]INFO     [mrp_library.dataset_readers.mrp_jsons_actions:113] Reading instances from lines in file at: /data/proj29_ds1/home/slai/mrp2019/src/tests/fixtures/ucca-dm-test.jsonl
640it [00:00, 4258.45it/s]


In [148]:
vocab = Vocabulary.from_instances(train_dataset + test_dataset + tests_fixtures_dataset)

INFO     [allennlp.data.vocabulary:396] Fitting token dictionary from dataset.
100%|██████████| 6877/6877 [00:00<00:00, 11118.05it/s]


In [149]:
vocab.print_statistics()

INFO     [allennlp.data.vocabulary:664] Printed vocabulary statistics are only for the part of the vocabulary generated from instances. If vocabulary is constructed by extending saved vocabulary with dataset instances, the directly loaded portion won't be considered here.




----Vocabulary Statistics----


Top 10 most frequent tokens in namespace 'word':
	Token: <START-WORD>		Frequency: 68770
	Token: <END-WORD>		Frequency: 68770
	Token: ,		Frequency: 23118
	Token: the		Frequency: 22562
	Token: .		Frequency: 13754
	Token: in		Frequency: 9141
	Token: and		Frequency: 8094
	Token: to		Frequency: 7853
	Token: a		Frequency: 6129
	Token: of		Frequency: 5854

Top 10 longest tokens in namespace 'word':
	Token: dollar-denominated		length: 18	Frequency: 50
	Token: asbestos-related		length: 16	Frequency: 60
	Token: confrontational		length: 15	Frequency: 148
	Token: reorganization		length: 14	Frequency: 114
	Token: collaboration		length: 13	Frequency: 136
	Token: collaborative		length: 13	Frequency: 118
	Token: contributions		length: 13	Frequency: 59
	Token: circumstances		length: 13	Frequency: 46
	Token: <START-WORD>		length: 12	Frequency: 68770
	Token: Merseysiders		length: 12	Frequency: 150

Top 10 shortest tokens in namespace 'word':
	Token: –		length: 1	Frequenc

In [150]:
vocab.get_vocab_size('token_node_label')

1163

In [151]:
vocab.get_vocab_size('word')

1410

In [152]:
vocab.get_vocab_size('pos')

59

In [153]:
vocab.get_vocab_size('label')

2

In [154]:
EMBEDDING_DIM = 100
HIDDEN_DIM = 50

### Test model

In [198]:
from mrp_library.models.generalizer import ActionGeneralizer
from mrp_library.iterators.same_representation_iterator import SameRepresentationIterator

from allennlp.nn import InitializerApplicator, RegularizerApplicator, util
from allennlp.nn.activations import Activation
from allennlp.common.params import Params
from allennlp.nn.util import get_text_field_mask, sequence_cross_entropy_with_logits
from allennlp.modules.seq2vec_encoders.pytorch_seq2vec_wrapper import PytorchSeq2VecWrapper

In [199]:
field_types = ['word', 'pos', 'resolved', 'token_node_label', 'token_node_prev_action']
field_type2embedder = {}
field_type2seq2vec_encoder = {}
field_type2seq2seq_encoder = {}

for field_type in field_types:
    embedding = Embedding(num_embeddings=vocab.get_vocab_size(field_type),
                            embedding_dim=EMBEDDING_DIM)
    embedder = BasicTextFieldEmbedder({field_type: embedding})
    field_type2embedder[field_type] = embedder
    
    field_type2seq2vec_encoder[field_type] = PytorchSeq2VecWrapper(
        torch.nn.LSTM(EMBEDDING_DIM, HIDDEN_DIM, batch_first=True))
    field_type2seq2seq_encoder[field_type] = PytorchSeq2SeqWrapper(
        torch.nn.LSTM(EMBEDDING_DIM, HIDDEN_DIM, batch_first=True))

In [200]:
# word_embedding = Embedding(num_embeddings=vocab.get_vocab_size('word'),
#                             embedding_dim=EMBEDDING_DIM)
# pos_embedding = Embedding(num_embeddings=vocab.get_vocab_size('pos'),
#                             embedding_dim=EMBEDDING_DIM)

# word_embedder = BasicTextFieldEmbedder({
#     "word": word_embedding,
#     "pos": pos_embedding,
# })
# parse_label = {
#     'word': torch.LongTensor(
#         [
#             [ 1,  0,  3,  7,  2,  9,  4],
#             [ 0,  0,  5,  0,  0,  0,  4]
#         ]
#     ),
#     'pos': torch.LongTensor(
#         [
#             [ 1,  0,  3,  7,  2,  9,  4],
#             [ 0,  0,  5,  0,  0,  0,  4]
#         ]
#     )
# }

In [201]:
# embedded_parse_label = word_embedder(parse_label)

In [202]:
# embedded_parse_label.shape

In [203]:
classifier_params = Params({
  "input_dim": HIDDEN_DIM * 3,
  "num_layers": 2,
  "hidden_dims": [50, 3],
  "activations": ["sigmoid", "linear"],
  "dropout": [0.0, 0.0]
})

In [204]:
classifier_feedforward = FeedForward.from_params(classifier_params)

INFO     [allennlp.common.from_params:340] instantiating class <class 'allennlp.modules.feedforward.FeedForward'> from params {'input_dim': 150, 'num_layers': 2, 'hidden_dims': [50, 3], 'activations': ['sigmoid', 'linear'], 'dropout': [0.0, 0.0]} and extras set()
INFO     [allennlp.common.params:252] input_dim = 150
INFO     [allennlp.common.params:252] num_layers = 2
INFO     [allennlp.common.params:252] hidden_dims = [50, 3]
INFO     [allennlp.common.params:252] hidden_dims = [50, 3]
INFO     [allennlp.common.params:252] activations = ['sigmoid', 'linear']
INFO     [allennlp.common.from_params:340] instantiating class <class 'allennlp.nn.activations.Activation'> from params ['sigmoid', 'linear'] and extras set()
INFO     [allennlp.common.params:252] activations = ['sigmoid', 'linear']
INFO     [allennlp.common.from_params:340] instantiating class <class 'allennlp.nn.activations.Activation'> from params sigmoid and extras set()
INFO     [allennlp.common.params:252] type = sigmoid
INFO

In [205]:
field_type = 'word'

In [206]:
parse_label = {
    field_type: torch.LongTensor(
        [
            [ 1,  0,  3,  7,  2,  9,  4],
            [ 0,  0,  5,  0,  0,  0,  4]
        ]
    )
}
embedded_parse_label = field_type2embedder[field_type](parse_label)

In [207]:
feature_mask = util.get_text_field_mask(parse_label)

In [208]:
seq2vec_encoder = field_type2seq2vec_encoder[field_type]

In [209]:
encoded_feature = seq2vec_encoder(embedded_parse_label, feature_mask)

In [210]:
encoded_features = [encoded_feature] * 3

In [211]:
torch.cat(encoded_features, dim=-1).shape

torch.Size([2, 150])

In [212]:
logits = classifier_feedforward(torch.cat(encoded_features, dim=-1))

In [213]:
logits.shape

torch.Size([2, 3])

In [214]:
label = torch.tensor([1, 0])

In [215]:
# loss_func = torch.nn.CrossEntropyLoss()
# loss = loss_func(logits, label)

In [238]:
ActionGeneralizer = None

In [282]:
from mrp_library.models.generalizer import ActionGeneralizer
from mrp_library.iterators.same_representation_iterator import SameRepresentationIterator

In [283]:
ActionGeneralizer

mrp_library.models.generalizer.ActionGeneralizer

In [284]:
model = ActionGeneralizer(
    vocab=vocab,
    field_type2embedder=field_type2embedder,
    field_type2seq2vec_encoder=field_type2seq2vec_encoder,
    field_type2seq2seq_encoder=field_type2seq2seq_encoder,
    classifier_feedforward=classifier_feedforward
)

iterator = SameRepresentationIterator(
    batch_size=20, sorting_keys=[("token_node_resolveds", "num_tokens")])
iterator.index_with(vocab)

optimizer = optim.SGD(model.parameters(), lr=0.1)
cuda_device = -1

INFO     [allennlp.nn.initializers:293] Initializing parameters
INFO     [allennlp.nn.initializers:309] Done initializing parameters; the following parameters are using their default initialization from their code
INFO     [allennlp.nn.initializers:314]    classifier_feedforward._linear_layers.0.bias
INFO     [allennlp.nn.initializers:314]    classifier_feedforward._linear_layers.0.weight
INFO     [allennlp.nn.initializers:314]    classifier_feedforward._linear_layers.1.bias
INFO     [allennlp.nn.initializers:314]    classifier_feedforward._linear_layers.1.weight
INFO     [allennlp.nn.initializers:314]    field_type2embedder.pos.token_embedder_pos.weight
INFO     [allennlp.nn.initializers:314]    field_type2embedder.resolved.token_embedder_resolved.weight
INFO     [allennlp.nn.initializers:314]    field_type2embedder.token_node_label.token_embedder_token_node_label.weight
INFO     [allennlp.nn.initializers:314]    field_type2embedder.token_node_prev_action.token_embedder_token_node_pre

In [285]:
# list(model.named_parameters())

In [286]:
trainer = Trainer(
    model=model,
    optimizer=optimizer,
    iterator=iterator,
    train_dataset=train_dataset,
    validation_dataset=test_dataset,
#     train_dataset=train_dataset,
#     validation_dataset=train_dataset,
    patience=10,
    num_epochs=20,
    cuda_device=cuda_device
)

In [287]:
logits = torch.tensor([[-2.2126,  2.6022, -1.1655],
        [ 4.7340, -1.9992, -3.4521],
        [-1.9665,  2.4100, -1.2047],
        [-2.1353,  2.4847, -1.1260],
        [ 4.7492, -2.0234, -3.4460],
        [-1.4369,  1.9822, -1.2885],
        [ 1.0337,  0.3599, -2.0420],
        [ 5.0974, -2.3380, -3.4647],
        [ 5.4187, -2.4720, -3.6469],
        [-3.4045,  2.4903,  0.0773],
        [ 0.6384,  0.6764, -1.9942],
        [-2.2904,  2.7170, -1.2016],
        [ 4.6333, -1.9474, -3.4113],
        [-2.0811,  2.5367, -1.2174],
        [ 5.1840, -2.7536, -3.1499],
        [ 4.7421, -2.0138, -3.4485],
        [ 5.1121, -2.2290, -3.5999],
        [ 0.1843,  0.8324, -1.6990],
        [ 5.3854, -2.4593, -3.6309],
        [ 0.0324,  0.6715, -1.4173]])

In [288]:
values, indices = logits.max(1)

In [289]:
indices.eq_(torch.tensor(2))

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [290]:
trainer.train()

INFO     [allennlp.training.trainer:465] Beginning training.
INFO     [allennlp.training.trainer:281] Epoch 0/19
INFO     [allennlp.training.trainer:283] Peak CPU memory usage MB: 10268.444
INFO     [allennlp.training.trainer:287] GPU 0 memory usage MB: 10
INFO     [allennlp.training.trainer:287] GPU 1 memory usage MB: 11
INFO     [allennlp.training.trainer:287] GPU 2 memory usage MB: 11
INFO     [allennlp.training.trainer:287] GPU 3 memory usage MB: 10
INFO     [allennlp.training.trainer:287] GPU 4 memory usage MB: 11
INFO     [allennlp.training.trainer:287] GPU 5 memory usage MB: 10
INFO     [allennlp.training.trainer:287] GPU 6 memory usage MB: 11
INFO     [allennlp.training.trainer:287] GPU 7 memory usage MB: 10
INFO     [allennlp.training.trainer:311] Training



  0%|          | 0/169 [00:00<?, ?it/s][A[A[ADEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_node_labels': {'word_length': 55, 'num_tokens': 55}, 'parse_node_lemmas': {'word_length':

DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 20



accuracy: 0.8542, loss: 0.3363 ||:   7%|▋         | 12/169 [00:01<00:19,  7.94it/s][A[A[ADEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_node_labels': {'word_length': 60, 'num_tokens': 60}, 'parse_node_lemmas': {'word_length': 60, 'num_tokens': 60}, 'parse_node_uposs': {'pos_length': 60, 'num_tokens': 60}, 'parse_node_xposs': {'pos_length': 60, 'num_tokens': 60}, 'token_node_resolveds': {'resolved_length': 15, 'num_tokens': 15}, 'token_node_labels': {'token_node_label_length': 15, 'num_tokens': 15}, 'token_node_prev_actions': {'token_node_prev_action_length': 5, 'num_tokens': 5}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 20
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_node_labels': {'word_length': 56, 'num_tokens': 56}, 'parse_node_lemmas': {'word_length': 56, 'num_tokens': 56}, 'parse_node_uposs': {'pos_length': 56,

DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 20
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_node_labels': {'word_length': 49, 'num_tokens': 49}, 'parse_node_lemmas': {'word_length': 49, 'num_tokens': 49}, 'parse_node_uposs': {'pos_length': 49, 'num_tokens': 49}, 'parse_node_xposs': {'pos_length': 49, 'num_tokens': 49}, 'token_node_resolveds': {'resolved_length': 22, 'num_tokens': 22}, 'token_node_labels': {'token_node_label_length': 22, 'num_tokens': 22}, 'token_node_prev_actions': {'token_node_prev_action_length': 5, 'num_tokens': 5}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 20



accuracy: 0.8615, loss: 0.3192 ||:  15%|█▌        | 26/169 [00:02<00:12, 11.59it/s][A[A[ADEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_node_labels': {'word_length': 60, 'num_tokens': 60}, 'parse_node_lemmas': {'word_length': 60, 'num_tokens': 60}, 'parse_node_uposs': {'pos_length': 60,

DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 20



accuracy: 0.8553, loss: 0.3336 ||:  22%|██▏       | 38/169 [00:03<00:11, 11.80it/s][A[A[ADEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_node_labels': {'word_length': 60, 'num_tokens': 60}, 'parse_node_lemmas': {'word_length': 60, 'num_tokens': 60}, 'parse_node_uposs': {'pos_length': 60, 'num_tokens': 60}, 'parse_node_xposs': {'pos_length': 60, 'num_tokens': 60}, 'token_node_resolveds': {'resolved_length': 19, 'num_tokens': 19}, 'token_node_labels': {'token_node_label_length': 19, 'num_tokens': 19}, 'token_node_prev_actions': {'token_node_prev_action_length': 5, 'num_tokens': 5}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 20
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_node_labels': {'word_length': 60, 'num_tokens': 60}, 'parse_node_lemmas': {'word_length': 60, 'num_tokens': 60}, 'parse_node_uposs': {'pos_length': 60,

DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 20
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_node_labels': {'word_length': 60, 'num_tokens': 60}, 'parse_node_lemmas': {'word_length': 60, 'num_tokens': 60}, 'parse_node_uposs': {'pos_length': 60, 'num_tokens': 60}, 'parse_node_xposs': {'pos_length': 60, 'num_tokens': 60}, 'token_node_resolveds': {'resolved_length': 13, 'num_tokens': 13}, 'token_node_labels': {'token_node_label_length': 13, 'num_tokens': 13}, 'token_node_prev_actions': {'token_node_prev_action_length': 5, 'num_tokens': 5}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 20



accuracy: 0.8510, loss: 0.3492 ||:  31%|███       | 52/169 [00:04<00:08, 13.52it/s][A[A[ADEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_node_labels': {'word_length': 56, 'num_tokens': 56}, 'parse_node_lemmas': {'word_length': 56, 'num_tokens': 56}, 'parse_node_uposs': {'pos_length': 56,

DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 20



accuracy: 0.8523, loss: 0.3557 ||:  38%|███▊      | 64/169 [00:05<00:08, 13.12it/s][A[A[ADEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_node_labels': {'word_length': 60, 'num_tokens': 60}, 'parse_node_lemmas': {'word_length': 60, 'num_tokens': 60}, 'parse_node_uposs': {'pos_length': 60, 'num_tokens': 60}, 'parse_node_xposs': {'pos_length': 60, 'num_tokens': 60}, 'token_node_resolveds': {'resolved_length': 14, 'num_tokens': 14}, 'token_node_labels': {'token_node_label_length': 14, 'num_tokens': 14}, 'token_node_prev_actions': {'token_node_prev_action_length': 5, 'num_tokens': 5}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 20
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_node_labels': {'word_length': 60, 'num_tokens': 60}, 'parse_node_lemmas': {'word_length': 60, 'num_tokens': 60}, 'parse_node_uposs': {'pos_length': 60,

DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 20
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_node_labels': {'word_length': 60, 'num_tokens': 60}, 'parse_node_lemmas': {'word_length': 60, 'num_tokens': 60}, 'parse_node_uposs': {'pos_length': 60, 'num_tokens': 60}, 'parse_node_xposs': {'pos_length': 60, 'num_tokens': 60}, 'token_node_resolveds': {'resolved_length': 18, 'num_tokens': 18}, 'token_node_labels': {'token_node_label_length': 18, 'num_tokens': 18}, 'token_node_prev_actions': {'token_node_prev_action_length': 5, 'num_tokens': 5}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 20



accuracy: 0.8513, loss: 0.3563 ||:  46%|████▌     | 78/169 [00:06<00:07, 11.97it/s][A[A[ADEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_node_labels': {'word_length': 56, 'num_tokens': 56}, 'parse_node_lemmas': {'word_length': 56, 'num_tokens': 56}, 'parse_node_uposs': {'pos_length': 56,

DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 20



accuracy: 0.8511, loss: 0.3597 ||:  53%|█████▎    | 90/169 [00:07<00:05, 13.35it/s][A[A[ADEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_node_labels': {'word_length': 60, 'num_tokens': 60}, 'parse_node_lemmas': {'word_length': 60, 'num_tokens': 60}, 'parse_node_uposs': {'pos_length': 60, 'num_tokens': 60}, 'parse_node_xposs': {'pos_length': 60, 'num_tokens': 60}, 'token_node_resolveds': {'resolved_length': 22, 'num_tokens': 22}, 'token_node_labels': {'token_node_label_length': 22, 'num_tokens': 22}, 'token_node_prev_actions': {'token_node_prev_action_length': 5, 'num_tokens': 5}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 20
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_node_labels': {'word_length': 60, 'num_tokens': 60}, 'parse_node_lemmas': {'word_length': 60, 'num_tokens': 60}, 'parse_node_uposs': {'pos_length': 60,

DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 20
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_node_labels': {'word_length': 56, 'num_tokens': 56}, 'parse_node_lemmas': {'word_length': 56, 'num_tokens': 56}, 'parse_node_uposs': {'pos_length': 56, 'num_tokens': 56}, 'parse_node_xposs': {'pos_length': 56, 'num_tokens': 56}, 'token_node_resolveds': {'resolved_length': 16, 'num_tokens': 16}, 'token_node_labels': {'token_node_label_length': 16, 'num_tokens': 16}, 'token_node_prev_actions': {'token_node_prev_action_length': 5, 'num_tokens': 5}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 20



accuracy: 0.8524, loss: 0.3559 ||:  62%|██████▏   | 104/169 [00:08<00:05, 12.23it/s][A[A[ADEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_node_labels': {'word_length': 56, 'num_tokens': 56}, 'parse_node_lemmas': {'word_length': 56, 'num_tokens': 56}, 'parse_node_uposs': {'pos_length': 56

DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 20



accuracy: 0.8509, loss: 0.3608 ||:  69%|██████▊   | 116/169 [00:09<00:03, 13.38it/s][A[A[ADEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_node_labels': {'word_length': 60, 'num_tokens': 60}, 'parse_node_lemmas': {'word_length': 60, 'num_tokens': 60}, 'parse_node_uposs': {'pos_length': 60, 'num_tokens': 60}, 'parse_node_xposs': {'pos_length': 60, 'num_tokens': 60}, 'token_node_resolveds': {'resolved_length': 19, 'num_tokens': 19}, 'token_node_labels': {'token_node_label_length': 19, 'num_tokens': 19}, 'token_node_prev_actions': {'token_node_prev_action_length': 5, 'num_tokens': 5}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 20
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_node_labels': {'word_length': 60, 'num_tokens': 60}, 'parse_node_lemmas': {'word_length': 60, 'num_tokens': 60}, 'parse_node_uposs': {'pos_length': 60

DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 20
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_node_labels': {'word_length': 60, 'num_tokens': 60}, 'parse_node_lemmas': {'word_length': 60, 'num_tokens': 60}, 'parse_node_uposs': {'pos_length': 60, 'num_tokens': 60}, 'parse_node_xposs': {'pos_length': 60, 'num_tokens': 60}, 'token_node_resolveds': {'resolved_length': 25, 'num_tokens': 25}, 'token_node_labels': {'token_node_label_length': 25, 'num_tokens': 25}, 'token_node_prev_actions': {'token_node_prev_action_length': 5, 'num_tokens': 5}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 20



accuracy: 0.8515, loss: 0.3578 ||:  77%|███████▋  | 130/169 [00:10<00:03, 11.50it/s][A[A[ADEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_node_labels': {'word_length': 56, 'num_tokens': 56}, 'parse_node_lemmas': {'word_length': 56, 'num_tokens': 56}, 'parse_node_uposs': {'pos_length': 56

DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 20



accuracy: 0.8435, loss: 0.3872 ||:  84%|████████▍ | 142/169 [00:11<00:01, 14.35it/s][A[A[ADEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_node_labels': {'word_length': 52, 'num_tokens': 52}, 'parse_node_lemmas': {'word_length': 52, 'num_tokens': 52}, 'parse_node_uposs': {'pos_length': 52, 'num_tokens': 52}, 'parse_node_xposs': {'pos_length': 52, 'num_tokens': 52}, 'token_node_resolveds': {'resolved_length': 12, 'num_tokens': 12}, 'token_node_labels': {'token_node_label_length': 12, 'num_tokens': 12}, 'token_node_prev_actions': {'token_node_prev_action_length': 5, 'num_tokens': 5}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 20
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_node_labels': {'word_length': 52, 'num_tokens': 52}, 'parse_node_lemmas': {'word_length': 52, 'num_tokens': 52}, 'parse_node_uposs': {'pos_length': 52

DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 20
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_node_labels': {'word_length': 52, 'num_tokens': 52}, 'parse_node_lemmas': {'word_length': 52, 'num_tokens': 52}, 'parse_node_uposs': {'pos_length': 52, 'num_tokens': 52}, 'parse_node_xposs': {'pos_length': 52, 'num_tokens': 52}, 'token_node_resolveds': {'resolved_length': 10, 'num_tokens': 10}, 'token_node_labels': {'token_node_label_length': 10, 'num_tokens': 10}, 'token_node_prev_actions': {'token_node_prev_action_length': 5, 'num_tokens': 5}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 20



accuracy: 0.8238, loss: 0.4313 ||:  92%|█████████▏| 156/169 [00:12<00:00, 14.34it/s][A[A[ADEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_node_labels': {'word_length': 52, 'num_tokens': 52}, 'parse_node_lemmas': {'word_length': 52, 'num_tokens': 52}, 'parse_node_uposs': {'pos_length': 52

DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 20



accuracy: 0.8173, loss: 0.4509 ||:  99%|█████████▉| 168/169 [00:13<00:00, 13.30it/s][A[A[ADEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_node_labels': {'word_length': 52, 'num_tokens': 52}, 'parse_node_lemmas': {'word_length': 52, 'num_tokens': 52}, 'parse_node_uposs': {'pos_length': 52, 'num_tokens': 52}, 'parse_node_xposs': {'pos_length': 52, 'num_tokens': 52}, 'token_node_resolveds': {'resolved_length': 19, 'num_tokens': 19}, 'token_node_labels': {'token_node_label_length': 19, 'num_tokens': 19}, 'token_node_prev_actions': {'token_node_prev_action_length': 5, 'num_tokens': 5}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 20
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_node_labels': {'word_length': 52, 'num_tokens': 52}, 'parse_node_lemmas': {'word_length': 52, 'num_tokens': 52}, 'parse_node_uposs': {'pos_length': 52

DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 20
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_node_labels': {'word_length': 39, 'num_tokens': 39}, 'parse_node_lemmas': {'word_length': 39, 'num_tokens': 39}, 'parse_node_uposs': {'pos_length': 39, 'num_tokens': 39}, 'parse_node_xposs': {'pos_length': 39, 'num_tokens': 39}, 'token_node_resolveds': {'resolved_length': 16, 'num_tokens': 16}, 'token_node_labels': {'token_node_label_length': 16, 'num_tokens': 16}, 'token_node_prev_actions': {'token_node_prev_action_length': 5, 'num_tokens': 5}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 20
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_node_labels': {'word_length': 25, 'num_tokens': 25}, 'parse_node_lemmas': {'word_length': 25, 'num_tokens': 25}, 'parse_node_uposs': {'pos_length': 25, 'num_tokens': 25}, 'parse_node_xposs': {'pos_length': 25, 'num_tokens': 25}, 'token_node_resol

DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 20



accuracy: 0.7960, loss: 0.5222 ||:  17%|█▋        | 25/144 [00:00<00:03, 39.22it/s][A[A[ADEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_node_labels': {'word_length': 43, 'num_tokens': 43}, 'parse_node_lemmas': {'word_length': 43, 'num_tokens': 43}, 'parse_node_uposs': {'pos_length': 43, 'num_tokens': 43}, 'parse_node_xposs': {'pos_length': 43, 'num_tokens': 43}, 'token_node_resolveds': {'resolved_length': 10, 'num_tokens': 10}, 'token_node_labels': {'token_node_label_length': 10, 'num_tokens': 10}, 'token_node_prev_actions': {'token_node_prev_action_length': 5, 'num_tokens': 5}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 20
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_node_labels': {'word_length': 43, 'num_tokens': 43}, 'parse_node_lemmas': {'word_length': 43, 'num_tokens': 43}, 'parse_node_uposs': {'pos_length': 43,

DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 20
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_node_labels': {'word_length': 31, 'num_tokens': 31}, 'parse_node_lemmas': {'word_length': 31, 'num_tokens': 31}, 'parse_node_uposs': {'pos_length': 31, 'num_tokens': 31}, 'parse_node_xposs': {'pos_length': 31, 'num_tokens': 31}, 'token_node_resolveds': {'resolved_length': 10, 'num_tokens': 10}, 'token_node_labels': {'token_node_label_length': 10, 'num_tokens': 10}, 'token_node_prev_actions': {'token_node_prev_action_length': 5, 'num_tokens': 5}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 20
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_node_labels': {'word_length': 31, 'num_tokens': 31}, 'parse_node_lemmas': {'word_length': 31, 'num_tokens': 31}, 'parse_node_uposs': {'pos_length': 31, 'num_tokens': 31}, 'parse_node_xposs': {'pos_length': 31, 'num_tokens': 31}, 'token_node_resol

DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 20
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_node_labels': {'word_length': 48, 'num_tokens': 48}, 'parse_node_lemmas': {'word_length': 48, 'num_tokens': 48}, 'parse_node_uposs': {'pos_length': 48, 'num_tokens': 48}, 'parse_node_xposs': {'pos_length': 48, 'num_tokens': 48}, 'token_node_resolveds': {'resolved_length': 21, 'num_tokens': 21}, 'token_node_labels': {'token_node_label_length': 21, 'num_tokens': 21}, 'token_node_prev_actions': {'token_node_prev_action_length': 5, 'num_tokens': 5}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 20
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_node_labels': {'word_length': 48, 'num_tokens': 48}, 'parse_node_lemmas': {'word_length': 48, 'num_tokens': 48}, 'parse_node_uposs': {'pos_length': 48, 'num_tokens': 48}, 'parse_node_xposs': {'pos_length': 48, 'num_tokens': 48}, 'token_node_resol

DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 20



accuracy: 0.8104, loss: 0.5004 ||:  47%|████▋     | 67/144 [00:01<00:02, 30.79it/s][A[A[ADEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_node_labels': {'word_length': 41, 'num_tokens': 41}, 'parse_node_lemmas': {'word_length': 41, 'num_tokens': 41}, 'parse_node_uposs': {'pos_length': 41, 'num_tokens': 41}, 'parse_node_xposs': {'pos_length': 41, 'num_tokens': 41}, 'token_node_resolveds': {'resolved_length': 9, 'num_tokens': 9}, 'token_node_labels': {'token_node_label_length': 9, 'num_tokens': 9}, 'token_node_prev_actions': {'token_node_prev_action_length': 5, 'num_tokens': 5}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 20
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_node_labels': {'word_length': 41, 'num_tokens': 41}, 'parse_node_lemmas': {'word_length': 41, 'num_tokens': 41}, 'parse_node_uposs': {'pos_length': 41, 'nu

DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 20
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_node_labels': {'word_length': 20, 'num_tokens': 20}, 'parse_node_lemmas': {'word_length': 20, 'num_tokens': 20}, 'parse_node_uposs': {'pos_length': 20, 'num_tokens': 20}, 'parse_node_xposs': {'pos_length': 20, 'num_tokens': 20}, 'token_node_resolveds': {'resolved_length': 6, 'num_tokens': 6}, 'token_node_labels': {'token_node_label_length': 6, 'num_tokens': 6}, 'token_node_prev_actions': {'token_node_prev_action_length': 5, 'num_tokens': 5}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 20
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_node_labels': {'word_length': 32, 'num_tokens': 32}, 'parse_node_lemmas': {'word_length': 32, 'num_tokens': 32}, 'parse_node_uposs': {'pos_length': 32, 'num_tokens': 32}, 'parse_node_xposs': {'pos_length': 32, 'num_tokens': 32}, 'token_node_resolveds

DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 20
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_node_labels': {'word_length': 62, 'num_tokens': 62}, 'parse_node_lemmas': {'word_length': 62, 'num_tokens': 62}, 'parse_node_uposs': {'pos_length': 62, 'num_tokens': 62}, 'parse_node_xposs': {'pos_length': 62, 'num_tokens': 62}, 'token_node_resolveds': {'resolved_length': 25, 'num_tokens': 25}, 'token_node_labels': {'token_node_label_length': 25, 'num_tokens': 25}, 'token_node_prev_actions': {'token_node_prev_action_length': 5, 'num_tokens': 5}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 20
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_node_labels': {'word_length': 62, 'num_tokens': 62}, 'parse_node_lemmas': {'word_length': 62, 'num_tokens': 62}, 'parse_node_uposs': {'pos_length': 62, 'num_tokens': 62}, 'parse_node_xposs': {'pos_length': 62, 'num_tokens': 62}, 'token_node_resol

DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 20



accuracy: 0.8124, loss: 0.5027 ||:  76%|███████▌  | 109/144 [00:02<00:01, 33.57it/s][A[A[ADEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_node_labels': {'word_length': 31, 'num_tokens': 31}, 'parse_node_lemmas': {'word_length': 31, 'num_tokens': 31}, 'parse_node_uposs': {'pos_length': 31, 'num_tokens': 31}, 'parse_node_xposs': {'pos_length': 31, 'num_tokens': 31}, 'token_node_resolveds': {'resolved_length': 11, 'num_tokens': 11}, 'token_node_labels': {'token_node_label_length': 11, 'num_tokens': 11}, 'token_node_prev_actions': {'token_node_prev_action_length': 5, 'num_tokens': 5}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 20
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_node_labels': {'word_length': 31, 'num_tokens': 31}, 'parse_node_lemmas': {'word_length': 31, 'num_tokens': 31}, 'parse_node_uposs': {'pos_length': 31

DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 20
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_node_labels': {'word_length': 46, 'num_tokens': 46}, 'parse_node_lemmas': {'word_length': 46, 'num_tokens': 46}, 'parse_node_uposs': {'pos_length': 46, 'num_tokens': 46}, 'parse_node_xposs': {'pos_length': 46, 'num_tokens': 46}, 'token_node_resolveds': {'resolved_length': 10, 'num_tokens': 10}, 'token_node_labels': {'token_node_label_length': 10, 'num_tokens': 10}, 'token_node_prev_actions': {'token_node_prev_action_length': 5, 'num_tokens': 5}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 20
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_node_labels': {'word_length': 37, 'num_tokens': 37}, 'parse_node_lemmas': {'word_length': 37, 'num_tokens': 37}, 'parse_node_uposs': {'pos_length': 37, 'num_tokens': 37}, 'parse_node_xposs': {'pos_length': 37, 'num_tokens': 37}, 'token_node_resol

DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 20
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_node_labels': {'word_length': 45, 'num_tokens': 45}, 'parse_node_lemmas': {'word_length': 45, 'num_tokens': 45}, 'parse_node_uposs': {'pos_length': 45, 'num_tokens': 45}, 'parse_node_xposs': {'pos_length': 45, 'num_tokens': 45}, 'token_node_resolveds': {'resolved_length': 15, 'num_tokens': 15}, 'token_node_labels': {'token_node_label_length': 15, 'num_tokens': 15}, 'token_node_prev_actions': {'token_node_prev_action_length': 5, 'num_tokens': 5}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 20
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_node_labels': {'word_length': 45, 'num_tokens': 45}, 'parse_node_lemmas': {'word_length': 45, 'num_tokens': 45}, 'parse_node_uposs': {'pos_length': 45, 'num_tokens': 45}, 'parse_node_xposs': {'pos_length': 45, 'num_tokens': 45}, 'token_node_resol

DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 20



accuracy: 0.7167, loss: 0.7681 ||:   2%|▏         | 3/169 [00:00<00:41,  4.04it/s][A[A[ADEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_node_labels': {'word_length': 52, 'num_tokens': 52}, 'parse_node_lemmas': {'word_length': 52, 'num_tokens': 52}, 'parse_node_uposs': {'pos_length': 52, 'num_tokens': 52}, 'parse_node_xposs': {'pos_length': 52, 'num_tokens': 52}, 'token_node_resolveds': {'resolved_length': 9, 'num_tokens': 9}, 'token_node_labels': {'token_node_label_length': 9, 'num_tokens': 9}, 'token_node_prev_actions': {'token_node_prev_action_length': 5, 'num_tokens': 5}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 20
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_node_labels': {'word_length': 42, 'num_tokens': 42}, 'parse_node_lemmas': {'word_length': 42, 'num_tokens': 42}, 'parse_node_uposs': {'pos_length': 42, 'num

DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 20
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_node_labels': {'word_length': 42, 'num_tokens': 42}, 'parse_node_lemmas': {'word_length': 42, 'num_tokens': 42}, 'parse_node_uposs': {'pos_length': 42, 'num_tokens': 42}, 'parse_node_xposs': {'pos_length': 42, 'num_tokens': 42}, 'token_node_resolveds': {'resolved_length': 8, 'num_tokens': 8}, 'token_node_labels': {'token_node_label_length': 8, 'num_tokens': 8}, 'token_node_prev_actions': {'token_node_prev_action_length': 5, 'num_tokens': 5}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 20



accuracy: 0.6882, loss: 0.7717 ||:  10%|█         | 17/169 [00:01<00:12, 12.20it/s][A[A[ADEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_node_labels': {'word_length': 52, 'num_tokens': 52}, 'parse_node_lemmas': {'word_length': 52, 'num_tokens': 52}, 'parse_node_uposs': {'pos_length': 52, 'nu

DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 20



accuracy: 0.6879, loss: 0.7697 ||:  17%|█▋        | 29/169 [00:02<00:10, 12.84it/s][A[A[ADEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_node_labels': {'word_length': 52, 'num_tokens': 52}, 'parse_node_lemmas': {'word_length': 52, 'num_tokens': 52}, 'parse_node_uposs': {'pos_length': 52, 'num_tokens': 52}, 'parse_node_xposs': {'pos_length': 52, 'num_tokens': 52}, 'token_node_resolveds': {'resolved_length': 12, 'num_tokens': 12}, 'token_node_labels': {'token_node_label_length': 12, 'num_tokens': 12}, 'token_node_prev_actions': {'token_node_prev_action_length': 5, 'num_tokens': 5}}
DEBUG    [allennlp.data.iterators.data_iterator:152] Batch size: 20
DEBUG    [allennlp.data.iterators.data_iterator:151] Batch padding lengths: {'parse_node_labels': {'word_length': 52, 'num_tokens': 52}, 'parse_node_lemmas': {'word_length': 52, 'num_tokens': 52}, 'parse_node_uposs': {'pos_length': 52,

KeyboardInterrupt: 

In [None]:
token_state = [[26, True, 'H', [[23, True, 'A', [[0, True, 'E', [[0, False, 'The', []]]], [1, True, 'C', [[1, False, 'Lakers', []]]]]], [2, True, 'P', [[2, False, 'advanced', []]]], [25, True, 'A', [[3, True, 'R', [[3, False, 'through', []]]], [4, True, 'E', [[4, False, 'the', []]]], [24, True, 'P', [[5, True, 'T', [[5, False, '1982', []]]], [6, True, 'C', [[6, False, 'playoffs', []]]]]]]]]], [7, True, 'L', [[7, False, 'and', []]]], [8, True, 'P', [[8, False, 'faced', []]]], [9, True, 'A', [[9, False, 'Philadelphia', []]]], [27, True, 'D', [[10, True, 'R', [[10, False, 'for', []]]], [11, True, 'E', [[11, False, 'the', []]]], [12, True, 'Q', [[12, False, 'second', []]]], [13, True, 'C', [[13, False, 'time', []]]]]], [14, True, 'R', [[14, False, 'in', []]]], [15, True, 'Q', [[15, False, 'three', []]]], [16, False, 'years', []]]

In [None]:
pprint.pprint(token_state)

In [None]:
vocab.get_token_from_index(0, namespace='labels')

In [None]:
vocab.get_token_from_index(3, namespace='resolved')

1275it [00:40, 42.06it/s]