In [1]:
try:
    __IPYTHON__
    USING_IPYTHON = True
except NameError:
    USING_IPYTHON = False

#### Argparse

In [2]:
import argparse
ap = argparse.ArgumentParser()
ap.add_argument('mrp_data_dir', help='')
ap.add_argument('--train-sub-dir', default='../data/training', help='')
ap.add_argument('--graphviz-dir', default='../graphviz', help='')
ap.add_argument('--companion-sub-dir', default='../data/companion')
ap.add_argument('--evaluation-sub-dir', default='../data/evaluation')
ap.add_argument('--mrp-file-extension', default='.mrp')
ap.add_argument('--jamr-alignment-file', default='jamr.mrp')
ap.add_argument('--companion-file-extension', default='.conllu')
arg_string = """
    ../data/
"""
arguments = [arg for arg_line in arg_string.split(r'\\n') for arg in arg_line.split()]

In [3]:
import argparse
ap = argparse.ArgumentParser()
ap.add_argument('project_root', help='')
ap.add_argument('--mrp-data-dir', default='data', help='')
ap.add_argument('--mrp-test-dir', default='src/tests', help='')
ap.add_argument('--tests-fixtures-file', default='fixtures/test.jsonl', help='')

ap.add_argument('--graphviz-sub-dir', default='visualization/graphviz', help='')
ap.add_argument('--train-sub-dir', default='training', help='')
ap.add_argument('--companion-sub-dir', default='companion')
ap.add_argument('--evaluation-sub-dir', default='evaluation')
ap.add_argument('--erg-sub-dir', default='erg')
ap.add_argument('--czEngVallex-sub-dir', default='czEngVallex')
ap.add_argument('--jamr-alignment-file', default='jamr.mrp')

ap.add_argument('--test-input-file', default='evaluation/input.mrp', help='')
ap.add_argument('--test-companion-file', default='evaluation/udpipe.mrp', help='')
ap.add_argument('--allennlp-mrp-json-file-template', default='allennlp-mrp-json-small-{}.jsonl', help='')
ap.add_argument('--data-size-limit', type=int, default=100, help='')

ap.add_argument('--mrp-file-extension', default='.mrp')
ap.add_argument('--companion-file-extension', default='.conllu')
ap.add_argument('--erg-file-extension', default='.smi')
ap.add_argument('--czEngVallex-file-extension', default='.xml')
ap.add_argument('--graphviz-file-template', default='http://localhost:8000/files/proj29_ds1/home/slai/mrp2019/visualization/graphviz/{}/{}.mrp/{}.png')
ap.add_argument('--parse-plot-file-template', default='http://localhost:8000/files/proj29_ds1/home/slai/mrp2019/visualization/graphviz/{}/{}.png')

arg_string = """
    ..
"""
arguments = [arg for arg_line in arg_string.split(r'\\n') for arg in arg_line.split()]

In [4]:
if USING_IPYTHON:
    args = ap.parse_args(arguments)
else:
    args = ap.parse_args()

In [5]:
args

Namespace(allennlp_mrp_json_file_template='allennlp-mrp-json-small-{}.jsonl', companion_file_extension='.conllu', companion_sub_dir='companion', czEngVallex_file_extension='.xml', czEngVallex_sub_dir='czEngVallex', data_size_limit=100, erg_file_extension='.smi', erg_sub_dir='erg', evaluation_sub_dir='evaluation', graphviz_file_template='http://localhost:8000/files/proj29_ds1/home/slai/mrp2019/visualization/graphviz/{}/{}.mrp/{}.png', graphviz_sub_dir='visualization/graphviz', jamr_alignment_file='jamr.mrp', mrp_data_dir='data', mrp_file_extension='.mrp', mrp_test_dir='src/tests', parse_plot_file_template='http://localhost:8000/files/proj29_ds1/home/slai/mrp2019/visualization/graphviz/{}/{}.png', project_root='..', test_companion_file='evaluation/udpipe.mrp', test_input_file='evaluation/input.mrp', tests_fixtures_file='fixtures/test.jsonl', train_sub_dir='training')

#### Library imports

In [6]:
import json
import logging
import os
import random
import copy
import re

from PIL import Image
from matplotlib.pyplot import figure
from networkx.drawing.nx_agraph import to_agraph
from tqdm import tqdm
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import plotly.graph_objs as go
from pprint import pprint
import string
from IPython.display import Image

#### ipython notebook specific imports

In [7]:
if USING_IPYTHON:
    # matplotlib config
    %matplotlib inline
    
    # ipython notebook plotly config
    from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
    init_notebook_mode(connected=True)
else:
    from plotly.plotly import plot, iplot

In [8]:
logging.basicConfig(level=logging.INFO, handlers=[logging.StreamHandler()])
logger = logging.getLogger(__name__)  # pylint: disable=invalid-name
logger.setLevel(logging.INFO)

### Constants

In [9]:
UNKWOWN = 'UNKWOWN'

### Load data

In [10]:
train_dir = os.path.join(args.project_root, args.mrp_data_dir, args.train_sub_dir)

In [11]:
frameworks = [sub_dir for sub_dir in os.listdir(train_dir) if os.path.isdir(os.path.join(train_dir, sub_dir))]
frameworks

['amr', 'eds', 'psd', 'ucca', 'dm']

In [12]:
framework2dataset2mrp_jsons = {}
framework2id2mrp_jsons = {}
#id2mrp_jsons = {}
for framework in tqdm(frameworks, desc='frameworks'):
    dataset2mrp_jsons = {}
    id2mrp_jsons_perFram = {}
    framework_dir = os.path.join(train_dir, framework)
    dataset_names = os.listdir(framework_dir)
    
    for dataset_name in tqdm(dataset_names, desc='dataset_name'):
        mrp_jsons = []
        if not dataset_name.endswith(args.mrp_file_extension):
            continue
        with open(os.path.join(framework_dir, dataset_name)) as rf:
            for line in rf:
                mrp_json = json.loads(line.strip())
                if framework == 'ucca' and 'nodes' in mrp_json and 'input' in mrp_json:
                    input_text = mrp_json['input']
                    nodes = mrp_json['nodes']
                    for i, node in enumerate(nodes):
                        if 'anchors' not in node:
                            continue
                        text_segments = []
                        for anchor in node['anchors']:
                            text_segments.append(input_text[anchor.get('from', -1): anchor.get('to', -1)])
                        mrp_json['nodes'][i]['label'] = ''.join(text_segments)
                        
                mrp_jsons.append(mrp_json)
                if mrp_json['id'].startswith("reviews-"):
                    id2mrp_jsons_perFram[mrp_json['id'][8:]] = mrp_json
                else:
                    id2mrp_jsons_perFram[mrp_json['id']] = mrp_json
                #id2mrp_jsons[mrp_json['id']] = mrp_json
        dataset_name = dataset_name.split('.')[0]
        dataset2mrp_jsons[dataset_name] = mrp_jsons
    framework2dataset2mrp_jsons[framework] = dataset2mrp_jsons
    framework2id2mrp_jsons[framework] = id2mrp_jsons_perFram

frameworks:   0%|          | 0/5 [00:00<?, ?it/s]
dataset_name:   0%|          | 0/14 [00:00<?, ?it/s][A
dataset_name:   7%|▋         | 1/14 [00:00<00:02,  4.82it/s][A
dataset_name:  50%|█████     | 7/14 [00:00<00:01,  6.59it/s][A
dataset_name:  64%|██████▍   | 9/14 [00:00<00:00,  6.03it/s][A
dataset_name:  79%|███████▊  | 11/14 [00:01<00:00,  3.37it/s][A
dataset_name:  86%|████████▌ | 12/14 [00:02<00:00,  2.83it/s][A
frameworks:  20%|██        | 1/5 [00:02<00:09,  2.47s/it]t/s][A
dataset_name:   0%|          | 0/1 [00:00<?, ?it/s][A
dataset_name: 100%|██████████| 1/1 [00:06<00:00,  6.50s/it][A
frameworks:  40%|████      | 2/5 [00:08<00:11,  3.68s/it]
dataset_name:   0%|          | 0/1 [00:00<?, ?it/s][A
dataset_name: 100%|██████████| 1/1 [00:03<00:00,  3.96s/it][A
frameworks:  60%|██████    | 3/5 [00:12<00:07,  3.77s/it]
dataset_name:   0%|          | 0/2 [00:00<?, ?it/s][A
dataset_name:  50%|█████     | 1/2 [00:00<00:00,  3.94it/s][A
dataset_name: 100%|██████████| 2/2 [0

In [13]:
for framework in framework2dataset2mrp_jsons:
    logger.info(framework)
    logger.info(list(framework2dataset2mrp_jsons[framework].keys()))

INFO:__main__:amr
INFO:__main__:['lorelei', 'bolt', 'wiki', 'mt09sdl', 'rte', 'cctv', 'xinhua', 'amr-guidelines', 'proxy', 'wsj', 'dfb', 'dfa', 'fables', 'wb']
INFO:__main__:eds
INFO:__main__:['wsj']
INFO:__main__:psd
INFO:__main__:['wsj']
INFO:__main__:ucca
INFO:__main__:['wiki', 'ewt']
INFO:__main__:dm
INFO:__main__:['wsj']


### Load companion

In [14]:
dataset2cid2parse = {}
framework2cid2parse = {}
cid2parse = {}
for framework in os.listdir(os.path.join(args.project_root, args.mrp_data_dir,args.companion_sub_dir)):
    print (framework)
    framework_dir = os.path.join(args.project_root, args.mrp_data_dir, args.companion_sub_dir, framework)
    if not os.path.isdir(framework_dir):
        continue
    cid2parse_perFram = {}
    for dataset in tqdm(os.listdir(framework_dir), desc='dataset'):
        if not dataset.endswith(args.companion_file_extension):
            continue
        dataset_name = dataset.split('.')[0].rstrip(string.digits)
        cid2parse_perDataset = {}
        print (dataset)
        with open(os.path.join(framework_dir, dataset)) as rf:
            parse = []
            for line in rf:
                line = line.strip()
                if not line:
                    cid2parse_perDataset[cid] = parse
                    cid2parse_perFram[cid] = parse
                    cid2parse[cid] = parse
                    parse = []
                    cid = ''
                elif line.startswith('#'):
                    cid = line[1:]
                else:
                    parse.append(line.split('\t'))
        dataset2cid2parse[dataset_name] = cid2parse_perDataset
    framework2cid2parse[framework] = cid2parse_perFram
    if framework=='dm':
        framework2cid2parse['psd'] = cid2parse_perFram
        framework2cid2parse['eds'] = cid2parse_perFram

dataset:   0%|          | 0/13 [00:00<?, ?it/s]

amr
rte.conllu
fables.conllu
amr-guidelines.conllu
cctv.conllu
wb.conllu
dfa.conllu


dataset:  46%|████▌     | 6/13 [00:00<00:00, 18.09it/s]

wiki.conllu
bolt.conllu
xinhua.conllu
dfb.conllu


dataset:  77%|███████▋  | 10/13 [00:01<00:00,  7.63it/s]

proxy.conllu


dataset: 100%|██████████| 13/13 [00:01<00:00,  7.38it/s]
dataset:   0%|          | 0/6 [00:00<?, ?it/s]

mt09sdl.conllu
lorelei.conllu
.DS_Store
ucca
ewt04.conllu
ewt00.conllu
ewt02.conllu


dataset: 100%|██████████| 6/6 [00:01<00:00,  3.35it/s]
dataset:   0%|          | 0/5 [00:00<?, ?it/s]

wiki.conllu
ewt01.conllu
ewt03.conllu
Makefile
udpipe.mrp
jamr.mrp
dm
wsj04.conllu


dataset:  20%|██        | 1/5 [00:00<00:00,  4.19it/s]

wsj02.conllu


dataset:  40%|████      | 2/5 [00:00<00:00,  4.14it/s]

wsj00.conllu


dataset:  60%|██████    | 3/5 [00:00<00:00,  4.09it/s]

wsj03.conllu


dataset:  80%|████████  | 4/5 [00:00<00:00,  4.07it/s]

wsj01.conllu


dataset: 100%|██████████| 5/5 [00:01<00:00,  4.09it/s]

README.txt
isi.mrp





### Load JAMR

In [15]:
def load_jamr_alignment_file(alignment_filename):
    cid2alignment = {}
    with open(alignment_filename) as rf:
        for line in rf:
            alignment_json = json.loads(line.strip())
            cid = alignment_json.get('id', '')
            cid2alignment[cid] = alignment_json
    return cid2alignment

cid2alignment = load_jamr_alignment_file(os.path.join(
    args.project_root,
    args.mrp_data_dir,
    args.companion_sub_dir,
    args.jamr_alignment_file))

In [16]:
#DF_000170_20100514_C000209GX_0001.28
for key in cid2alignment:
    if 'DF_000170_20100514_C000209GX_0001.28' in key:
        pprint (cid2alignment[key])

{'flavor': 2,
 'framework': 'alignment',
 'id': 'ENG_DF_000170_20100514_C000209GX_0001.28',
 'nodes': [{'id': 0, 'label': [4]},
           {'id': 1, 'label': [1]},
           {'id': 2, 'label': [0]},
           {'id': 3, 'label': [7]},
           {'id': 4, 'label': [6]},
           {'id': 5, 'label': [8]},
           {'id': 6, 'label': [10]},
           {'id': 7, 'label': [12]},
           {'id': 9, 'label': [18]}],
 'time': '2019-06-24',
 'version': 1.0}


### Load evaluation

In [17]:
eval_framework2id2data = {'dm':{},'psd':{},'eds':{},'ucca':{},'amr':{}}
with open(os.path.join(args.project_root,args.mrp_data_dir,args.evaluation_sub_dir, "input.mrp")) as rf:
    for line in rf:
        line = line.strip()
        eval_json = json.loads(line)
        for target in eval_json['targets']:
            eval_framework2id2data[target][eval_json['id']] = eval_json

        #print(tmp)
        #break
        #{"id": "264993-0001", "version": 1.0, "time": "2019-06-23", "source": "ewt", "targets": ["ucca"], "input": "An Hour Of Prego Bliss!"}


### Load evaluation companion

In [18]:
eval_id2parse = {}
with open(os.path.join(args.project_root,args.mrp_data_dir,args.evaluation_sub_dir, "udpipe.mrp")) as rf:
    for line in rf:
        line = line.strip()
        companion_json = json.loads(line)
        data_id = companion_json['id']
        nodes = companion_json['nodes']
        #pprint (companion_json)
        
        conv_nodes = []
        for node in nodes:
            conv_node = (node['id'],node['label'],node['values'][0],node['values'][1],node['values'][2],"_","n/a","n/a","_",'TokenRange={}:{}'.format(node['anchors'][0]['from'],node['anchors'][0]['to']))
            conv_nodes.append(conv_node)
            #print (conllu)
        eval_id2parse[data_id] = conv_nodes 
        #break

In [19]:
def parse_erg(erg_path):
    file_names = os.listdir(erg_path)
    print (file_names)
    for file_name in tqdm(file_names, desc='file_name'):
        if not file_name.endswith(args.erg_file_extension):
            continue
        with open(os.path.join(erg_path, file_name)) as rf:
            if(file_name == 'surface.smi'):
                surface_dmlemma2frame = dict()
                p_cnt = 0
                max_pred_len = 0
                for line in rf:
                    line = line.strip()
                    if line.startswith('_'):
                        #print (line)
                        pred,frame = line.split(":")
                        #pred preprocess
                        pred_list = pred.strip().split("_")

                        if (len(pred_list)>4):
                            continue
                        if (len(pred_list)>3):
                            if(not pred_list[3].isnumeric()):
                                sense = "_".join(pred_list[2:4])
                            else:
                                sense = pred_list[2]
                        else:
                            sense = pred_list[2]
                        dm_lemma = pred_list[1]
                        
                        #frame preprocessing
                        frame = frame.strip()
                        arg_list = []
                        for idx,char in enumerate(frame):
                            if char.islower() and not (frame[idx-1].islower() or frame[idx+1].islower()):
                                arg_list.append(char)
                        dm_arg = "-".join(arg_list)
                        dm_frame = ":".join([sense,dm_arg])
                        
                        if (not dm_lemma in surface_dmlemma2frame):
                            surface_dmlemma2frame[dm_lemma] = []
                        surface_dmlemma2frame[dm_lemma].append(dm_frame)
                        #print (frame)
                        
                    p_cnt += 1
                    if p_cnt>1000:
                        #break
                        pass
                print (p_cnt)
            elif(file_name == 'abstract.smi'):
                abstract_frame = []
                for line in rf:
                    if line.startswith("  "):
                        line = line.strip()
                        pred,frame = line.split(":")
                        #pred preprocess (no need)
                        pred = pred.strip()
                        #frame preprocessing
                        frame = frame.strip()
                        arg_list = []
                        for idx,char in enumerate(frame):
                            if char.islower() and not (frame[idx-1].islower() or frame[idx+1].islower()):
                                arg_list.append(char)
                        dm_arg = "-".join(arg_list)
                        dm_frame = ":".join([pred,dm_arg])

                        abstract_frame.append(dm_frame)
                    #print (frame)
    #print (surface_dmlemma2frame)
    return surface_dmlemma2frame,abstract_frame

In [20]:
erg_path = os.path.join(args.project_root, args.erg_sub_dir)
surface_dmlemma2frame,abstract_frame = parse_erg(erg_path)

file_name: 100%|██████████| 2/2 [00:00<00:00, 16.51it/s]

['surface.smi', 'abstract.smi']
24976





### load czEngVallex

In [21]:
def parse_czEngVallex(czEngVallex_path):
    file_names = os.listdir(czEngVallex_path)
    print (file_names)
    for file_name in tqdm(file_names, desc='file_name'):
        if not file_name.endswith(args.czEngVallex_file_extension):
            continue
        with open(os.path.join(czEngVallex_path, file_name)) as rf:
            if(file_name == 'frames_pairs.xml'):
                for line in rf:
                    line = line.strip()
                    print (line)
                    break
    return surface_dmlemma2frame,abstract_frame

In [22]:
czEngVallex_path = os.path.join(args.project_root, args.czEngVallex_sub_dir)
frame = parse_czEngVallex(czEngVallex_path)
len(frame[0]), len(frame[1])
frame[0]
print (list(frame[0].keys())[0])
for i in range(0,16263):
    a = frame[0][list(frame[0].keys())[i]]
    m = surface_dmlemma2frame[list(surface_dmlemma2frame.keys())[i]]
    if a==m:
        print (a,m)

file_name: 100%|██████████| 3/3 [00:00<00:00, 1166.38it/s]

['frames_pairs.xml', 'vallex_cz.xml', 'vallex_en.xml']
<?xml version="1.0" encoding="utf-8"?>
!=
['v:e-i-p'] ['v:e-i-p']
['a:e-h'] ['a:e-h']
['a:e-p'] ['a:e-p']
['a:e-p', 'a:e-e'] ['a:e-p', 'a:e-e']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['v:e-i-p'] ['v:e-i-p']
['n:x'] ['n:x']
['n:x'] ['n:x']
['a:e-e', 'q:i-h-h', 'x_deg:e-u'] ['a:e-e', 'q:i-h-h', 'x_deg:e-u']
['a:e-i'] ['a:e-i']
['a:e-p'] ['a:e-p']
['a:e-e'] ['a:e-e']
['x_deg:e-u'] ['x_deg:e-u']
['a:e-e', 'x:e-u'] ['a:e-e', 'x:e-u']
['a:e-p'] ['a:e-p']
['a:e-e', 'x:e-u'] ['a:e-e', 'x:e-u']
['q:e-i'] ['q:e-i']
['x:e-u'] ['x:e-u']
['p:e-u-i'] ['p:e-u-i']
['a:e-e', 'x:e-u'] ['a:e-e', 'x:e-u']
['a:e-e', 'q:i-h-h', 'x_deg:e-u'] ['a:e-e', 'q:i-h-h', 'x_deg:e-u']
['a:e'] ['a:e']
['a:e-e', 'x:e-u'] ['a:e-e', 'x:e-u']
['x_deg:e-u'] ['x_deg:e-u']
['a:e-e', 'a:e-p'] ['a:e-e', 'a:e-p']
['n:x'] ['n:x']
['a:e-e', 'x_deg:e-u'] ['a:e-e', 'x_deg:e-u']
['a:e-e'] ['a:e-e']
['a:e-e'] ['a:e-e']
['a:e-e', 'x_much:e-u'] ['a:e-e', 'x_much:e-u']
['n:x




 ['n_of:x-i']
['n:x'] ['n:x']
['a:e-p', 'a:e-e'] ['a:e-p', 'a:e-e']
['a:e-p'] ['a:e-p']
['n:x'] ['n:x']
['v:e-i-p', 'v_for:e-i-i'] ['v:e-i-p', 'v_for:e-i-i']
['n:x'] ['n:x']
['p:e-i-u'] ['p:e-i-u']
['v:e-i'] ['v:e-i']
['n:x'] ['n:x']
['v:e-i-h', 'v_on:e-i-i', 'v_on:e-p', 'v_to:e-i-i', 'v_upon:e-i-i', 'v_with:e-i-h-i'] ['v:e-i-h', 'v_on:e-i-i', 'v_on:e-p', 'v_to:e-i-i', 'v_upon:e-i-i', 'v_with:e-i-h-i']
['a:e-p', 'a:e-e'] ['a:e-p', 'a:e-e']
['a:e-p'] ['a:e-p']
['n:x-h'] ['n:x-h']
['a:e-p'] ['a:e-p']
['n:x'] ['n:x']
['a:e-p'] ['a:e-p']
['a:e-i'] ['a:e-i']
['a:e-h'] ['a:e-h']
['p:e-u-i', 'p_dir:e-u-i', 'p_state:e-u-i'] ['p:e-u-i', 'p_dir:e-u-i', 'p_state:e-u-i']
['p:e-i'] ['p:e-i']
['a:e-h'] ['a:e-h']
['n_i:x'] ['n_i:x']
['n:x', 'v:e-i-p'] ['n:x', 'v:e-i-p']
['n:x'] ['n:x']
['v:e-i', 'v_cause:e-i-p'] ['v:e-i', 'v_cause:e-i-p']
['n:x'] ['n:x']
['n:x', 'v:e-i-p', 'v_at:e-i-p-i', 'v_at:e-i-i', 'v_for:e-i-i'] ['n:x', 'v:e-i-p', 'v_at:e-i-p-i', 'v_at:e-i-i', 'v_for:e-i-i']
['a:e-p'] ['a:e-p']


['n:x'] ['n:x']
['a:e-p'] ['a:e-p']
['n:x', 'n:x'] ['n:x', 'n:x']
['n:x', 'v:e-i', 'v_cause:e-i-p'] ['n:x', 'v:e-i', 'v_cause:e-i-p']
['n:x'] ['n:x']
['a:e-p', 'n:x', 'v:e-i', 'v_cause:e-i-p'] ['a:e-p', 'n:x', 'v:e-i', 'v_cause:e-i-p']
['a:e-p'] ['a:e-p']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['a:e-p'] ['a:e-p']
['n:x'] ['n:x']
['a:e-p'] ['a:e-p']
['n:x'] ['n:x']
['n:x'] ['n:x']
['a:e-p'] ['a:e-p']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['a:e-p'] ['a:e-p']
['n:x'] ['n:x']
['n_of:x-i'] ['n_of:x-i']
['a:e-p'] ['a:e-p']
['a:e-p', 'a:e-e'] ['a:e-p', 'a:e-e']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x', 'v:e-i-p'] ['n:x', 'v:e-i-p']
['n:x'] ['n:x']
['a:e-p', 'n:x'] ['a:e-p', 'n:x']
['a:e-p'] ['a:e-p']
['a:e-p'] ['a:e-p']
['n_of:x', 'n_of:x-i'] ['n_of:x', 'n_of:x-i']
['n:x'] ['n:x']
['n:x', 'n:x'] ['n:x', 'n:x']
['a:e-p', 'n:x'] ['a:e-p', 'n:x']
['n:x'] ['n:x']
['a:e-p'] ['a:e-p']
['v_about:e-i-i', 'v_for:e-i-i', 'v_over:e-i-i', 'v_with:e-i-p-i'] ['v_about:e-i-i

['n:x'] ['n:x']
['a:e-p'] ['a:e-p']
['n:x', 'v:e-i-p'] ['n:x', 'v:e-i-p']
['n:x', 'v:e-i-p'] ['n:x', 'v:e-i-p']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['a:e-e', 'a_of:e-p-i', 'v:e-i-p'] ['a:e-e', 'a_of:e-p-i', 'v:e-i-p']
['a:e-p'] ['a:e-p']
['x_deg:e-u'] ['x_deg:e-u']
['n:x', 'v:e-i-p', 'v_over:e-i-i'] ['n:x', 'v:e-i-p', 'v_over:e-i-i']
['n:x'] ['n:x']
['n:x', 'v:e-i'] ['n:x', 'v:e-i']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['a:e-p'] ['a:e-p']
['n:x'] ['n:x']
['n:x', 'v:e-i-p'] ['n:x', 'v:e-i-p']
['n:x'] ['n:x']
['a:e-p'] ['a:e-p']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x', 'v:e-i-p'] ['n:x', 'v:e-i-p']
['n_to:x-i'] ['n_to:x-i']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x', 'v:e-i-p-h'] ['n:x', 'v:e-i-p-h']
['n:x'] ['n:x']
['v:e-i'] ['v:e-i']
['n:x', 'n:x'] ['n:x', 'n:x']
['n:x'] ['n:x']
['n:x', 'v:e-i-p'] ['n:x', 'v:e-i-p']
['a:e-p'] ['a:e-p']
['n:x'] ['n:x']
['a:e-p', 'x:e-u'] ['a:e-p', 'x:e-u']
['x_deg:e-u'] ['

['n:x'] ['n:x']
['a:e-h', 'n:x'] ['a:e-h', 'n:x']
['n:x', 'v:e-i-p'] ['n:x', 'v:e-i-p']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x', 'v:e-i-p-h', 'v:e-i-i-h'] ['n:x', 'v:e-i-p-h', 'v:e-i-i-h']
['n:x'] ['n:x']
['a:e-p'] ['a:e-p']
['n:x'] ['n:x']
['n:x'] ['n:x']
['v_to:e-i-h-i'] ['v_to:e-i-h-i']
['n:x', 'v:e-i', 'v_cause:e-i-p'] ['n:x', 'v:e-i', 'v_cause:e-i-p']
['n:x'] ['n:x']
['n_i:x'] ['n_i:x']
['n:x', 'v:e-i', 'v_cause:e-i-p'] ['n:x', 'v:e-i', 'v_cause:e-i-p']
['n:x', 'n:x'] ['n:x', 'n:x']
['n:x'] ['n:x']
['n_of:x-i', 'v:e-i', 'v_off:e-i', 'v_out:e-i'] ['n_of:x-i', 'v:e-i', 'v_off:e-i', 'v_out:e-i']
['a:e-p'] ['a:e-p']
['n_of:x-i', 'n_of:x', 'v:e-i-i-i', 'v:e-i-p'] ['n_of:x-i', 'n_of:x', 'v:e-i-i-i', 'v:e-i-p']
['v:e-i-p'] ['v:e-i-p']
['n:x'] ['n:x']
['a:e-p', 'a:e-i'] ['a:e-p', 'a:e-i']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['a:e-i', 'a:e-p', 'n:x', 'v:e-i-p'] ['a:e-i', 'a:e-p', 'n:x', 'v:e-i-p']
['a:e-h'] ['a:e-h']
['a:e-p'] ['a:e-p']
['a:e-p', 'n:x'] ['a:e-p', 'n:x']
['a:e-p'

['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n_of:x-i', 'v:e-i'] ['n_of:x-i', 'v:e-i']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['a:e-p'] ['a:e-p']
['n:x', 'v:e-i-p'] ['n:x', 'v:e-i-p']
['n:x'] ['n:x']
['n:x'] ['n:x']
['a:e-p'] ['a:e-p']
['a:e-p', 'n:x'] ['a:e-p', 'n:x']
['a:e-p'] ['a:e-p']
['n:x'] ['n:x']
['p:e-u-i'] ['p:e-u-i']
['n:x', 'n_i:x', 'v:e-i-h', 'v_about:e-i-i', 'v_for:e-i-i'] ['n:x', 'n_i:x', 'v:e-i-h', 'v_about:e-i-i', 'v_for:e-i-i']
['v:e-i'] ['v:e-i']
['n:x', 'v:e-i'] ['n:x', 'v:e-i']
['a:e-p'] ['a:e-p']
['a_with-about-of:e-p-i', 'a_with-about-of:e-i-h'] ['a_with-about-of:e-p-i', 'a_with-about-of:e-i-h']
['a_with-about-of:e-p-i', 'a_with-about-of:e-e'] ['a_with-about-of:e-p-i', 'a_with-about-of:e-e']
['n:x'] ['n:x']
['n:x', 'v:e-i-p'] ['n:x', 'v:e-i-p']
['n_of:x-i'] ['n_of:x-i']
['n:x'] ['n:x']
['a:e-p'] ['a:e-p']
['n:x', 'v:e-i-p'] ['n:x', 'v:e-i-p']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['a:e-p'] ['a:e-p']
['n:x', 

['a:e-p', 'n:x'] ['a:e-p', 'n:x']
['n:x', 'n_of:x', 'n_of:x-i', 'v:e-i-p', 'v_in:e-i-i', 'v_into:e-i-p-i', 'v_into:e-i-i', 'v_on:e-i-i', 'v_out:e-i-i', 'v_out-of:e-i-i', 'v_through:e-i-i', 'v_up-on:e-i-i'] ['n:x', 'n_of:x', 'n_of:x-i', 'v:e-i-p', 'v_in:e-i-i', 'v_into:e-i-p-i', 'v_into:e-i-i', 'v_on:e-i-i', 'v_out:e-i-i', 'v_out-of:e-i-i', 'v_through:e-i-i', 'v_up-on:e-i-i']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['a:e-e', 'a:e-p'] ['a:e-e', 'a:e-p']
['a:e-e', 'a:e-p'] ['a:e-e', 'a:e-p']
['n:x'] ['n:x']
['n:x', 'v:e-i-p', 'v_on:e-i-i', 'v_up:e-i-i'] ['n:x', 'v:e-i-p', 'v_on:e-i-i', 'v_up:e-i-i']
['a:e-p', 'a:e-e'] ['a:e-p', 'a:e-e']
['n:x'] ['n:x']
['a:e-p'] ['a:e-p']
['v:e-i'] ['v:e-i']
['n:x'] ['n:x']
['n:x'] ['n:x']
['a:e-p'] ['a:e-p']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['a:e-e', 'a:e-p', 'n:x', 'n:x'] ['a:e-e', 'a:e-p', 'n:x', 'n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['v:e-i-p', 'v_as:e-i-i-h', 'v_as:e-i-p-i'] ['v:

['a_about:e-p-i', 'a_about:e-i-h', 'a_with:e-p-i'] ['a_about:e-p-i', 'a_about:e-i-h', 'a_with:e-p-i']
['p:e-u-i'] ['p:e-u-i']
['n:x', 'n:x'] ['n:x', 'n:x']
['a:e-p'] ['a:e-p']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x-h'] ['n:x-h']
['n:x'] ['n:x']
['n:x'] ['n:x']
['a:e-p'] ['a:e-p']
['n:x'] ['n:x']
['v:e-i-p-i'] ['v:e-i-p-i']
['n_of:x-i', 'n_of:x-h'] ['n_of:x-i', 'n_of:x-h']
['a:e-p'] ['a:e-p']
['a:e-p', 'n:x'] ['a:e-p', 'n:x']
['a:e-e', 'a:e-p', 'n:x'] ['a:e-e', 'a:e-p', 'n:x']
['n:x'] ['n:x']
['a:e-p', 'a:e-e'] ['a:e-p', 'a:e-e']
['n:x'] ['n:x']
['v_as:e-i-p-i', 'v_as:e-i-i-h', 'v_to:e-i-p-i'] ['v_as:e-i-p-i', 'v_as:e-i-i-h', 'v_to:e-i-p-i']
['n:x'] ['n:x']
['n:x'] ['n:x']
['v:e-i', 'v_cause:e-i-p'] ['v:e-i', 'v_cause:e-i-p']
['n_of:x-i'] ['n_of:x-i']
['n:x-h', 'v:e-i-p', 'v:e-i-i-h', 'v_on:e-i-p-i'] ['n:x-h', 'v:e-i-p', 'v:e-i-i-h', 'v_on:e-i-p-i']
['a:e-p', 'n:x'] ['a:e-p', 'n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['v:e-i'] ['v:e-i']
['a_to:e-p-i'] ['a_to:e-p-i']
['n:x', 

['n:x', 'v_i:e-i-i'] ['n:x', 'v_i:e-i-i']
['a:e-p'] ['a:e-p']
['v:e-i'] ['v:e-i']
['n_of:x-i', 'v:e-i-p-h', 'v_down:e-i-i', 'v_out:e-i-i'] ['n_of:x-i', 'v:e-i-p-h', 'v_down:e-i-i', 'v_out:e-i-i']
['n:x', 'v:e-i-p'] ['n:x', 'v:e-i-p']
['a:i-i', 'n:x'] ['a:i-i', 'n:x']
['n:x', 'v:e-i-p'] ['n:x', 'v:e-i-p']
['n:x'] ['n:x']
['n:x'] ['n:x']
['a:e-p'] ['a:e-p']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['a:e-p', 'n:x'] ['a:e-p', 'n:x']
['n:x', 'v:e-i-p'] ['n:x', 'v:e-i-p']
['n:x'] ['n:x']
['n_of:x-i', 'v:e-i-p', 'v:e-i'] ['n_of:x-i', 'v:e-i-p', 'v:e-i']
['n:x'] ['n:x']
['n:x'] ['n:x']
['a:i-i', 'n:x'] ['a:i-i', 'n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['a:e-p', 'n:x'] ['a:e-p', 'n:x']
['n_of:x-i'] ['n_of:x-i']
['n:x'] ['n:x']
['a:e-i'] ['a:e-i']
['n:x'] ['n:x']
['a:e-p', 'a:e-e'] ['a:e-p', 'a:e-e']
['n:x', 'n:x'] ['n:x', 'n:x']
['a:e-p'] ['a:e-p']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x', 'v:e-i-p'] ['n:x', 'v:e-i-p']
['a:e-e', 'a:e-p', 'v:e-i-p', 'v_for:e-i

['n:x', 'v:e-i-p'] ['n:x', 'v:e-i-p']
['a:e-p'] ['a:e-p']
['n:x'] ['n:x']
['n:x'] ['n:x']
['v:e-i', 'v_cause:e-i-p'] ['v:e-i', 'v_cause:e-i-p']
['n_of:x-i'] ['n_of:x-i']
['v:e-i-p'] ['v:e-i-p']
['v:e-i-p'] ['v:e-i-p']
['n_of:x-i'] ['n_of:x-i']
['a:e-p'] ['a:e-p']
['n:x'] ['n:x']
['n:x', 'v:e-i', 'v_cause:e-i-p'] ['n:x', 'v:e-i', 'v_cause:e-i-p']
['n:x', 'v_to:e-i-p-i'] ['n:x', 'v_to:e-i-p-i']
['v_to:e-i-p-i'] ['v_to:e-i-p-i']
['n_of:x-i'] ['n_of:x-i']
['v:e-i-p', 'v_from:e-i-h-i'] ['v:e-i-p', 'v_from:e-i-h-i']
['v_from:e-i-p-i'] ['v_from:e-i-p-i']
['a:e-p', 'n:x'] ['a:e-p', 'n:x']
['n:x'] ['n:x']
['a:e-p'] ['a:e-p']
['n:x'] ['n:x']
['v:e-i-i-h'] ['v:e-i-i-h']
['a:e-i', 'a:e-p', 'x_deg:e-u'] ['a:e-i', 'a:e-p', 'x_deg:e-u']
['v:e-i', 'v_cause:e-i-p'] ['v:e-i', 'v_cause:e-i-p']
['a:e-e'] ['a:e-e']
['n:x'] ['n:x']
['n:x', 'v:e-i', 'v_on:e-i-i'] ['n:x', 'v:e-i', 'v_on:e-i-i']
['n:x', 'v:e-i-p'] ['n:x', 'v:e-i-p']
['n:x', 'v:e-i'] ['n:x', 'v:e-i']
['a:e-p'] ['a:e-p']
['n:x'] ['n:x']
['v_agai

['n_of:x-i'] ['n_of:x-i']
['a:e-p'] ['a:e-p']
['n_at-with:x-i'] ['n_at-with:x-i']
['a:e-p'] ['a:e-p']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x', 'v:e-i'] ['n:x', 'v:e-i']
['a:e-p'] ['a:e-p']
['v:e-i', 'v_cause:e-i-p'] ['v:e-i', 'v_cause:e-i-p']
['n_of:x-i'] ['n_of:x-i']
['n_of:x-i'] ['n_of:x-i']
['v:e-i', 'v_cause:e-i-p'] ['v:e-i', 'v_cause:e-i-p']
['v_from:e-i-i-h', 'v_from:e-i-p'] ['v_from:e-i-i-h', 'v_from:e-i-p']
['a:e-p'] ['a:e-p']
['n:x', 'n:x', 'v_from:e-i-p-i'] ['n:x', 'n:x', 'v_from:e-i-p-i']
['a:e-p'] ['a:e-p']
['a:e-i-h', 'a:e-p'] ['a:e-i-h', 'a:e-p']
['v:e-i-p'] ['v:e-i-p']
['v:e-i-p'] ['v:e-i-p']
['n:x'] ['n:x']
['a:e-e', 'a:e-p'] ['a:e-e', 'a:e-p']
['n:x'] ['n:x']
['a:e-p', 'n:x'] ['a:e-p', 'n:x']
['v_between:e-i-i', 'v_from:e-i-p-i'] ['v_between:e-i-i', 'v_from:e-i-p-i']
['a:e-p'] ['a:e-p']
['v:e-i', 'v_cause:e-i-p'] ['v:e-i', 'v_cause:e-i-p']
['v_from:e-i-p-i'] ['v_from:e-i-p-i']
['a:e-p'] ['a:e-p']
['n:x'] ['n:x']
['n:x', 'v:e-i-p'] ['n:x', 'v:e-i-p']
['a:e-p'] ['a:e-p']


['a:e-p', 'n:x'] ['a:e-p', 'n:x']
['v:e-i-i'] ['v:e-i-i']
['n:x'] ['n:x']
['n:x'] ['n:x']
['a:e-p'] ['a:e-p']
['a:e-p', 'n:x'] ['a:e-p', 'n:x']
['n:x'] ['n:x']
['v:e-i'] ['v:e-i']
['a:e-p'] ['a:e-p']
['x_deg:e-u'] ['x_deg:e-u']
['n_of:x', 'n_of:x-i'] ['n_of:x', 'n_of:x-i']
['v:e-i-p'] ['v:e-i-p']
['n:x'] ['n:x']
['n:x'] ['n:x']
['a:e-p', 'a:e-e'] ['a:e-p', 'a:e-e']
['n:x'] ['n:x']
['n_on:x-i'] ['n_on:x-i']
['v:e-h-h', 'v_to:e-i-p-i'] ['v:e-h-h', 'v_to:e-i-p-i']
['a:e-p', 'a:e-e'] ['a:e-p', 'a:e-e']
['n:x'] ['n:x']
['a:e-p'] ['a:e-p']
['n:x', 'v:e-i-p', 'v_as:e-i-p-i'] ['n:x', 'v:e-i-p', 'v_as:e-i-p-i']
['n:x'] ['n:x']
['n_of:x-i'] ['n_of:x-i']
['n:x'] ['n:x']
['n:x'] ['n:x']
['v:e-i-p', 'v:e-i-i-h'] ['v:e-i-p', 'v:e-i-i-h']
['n:x'] ['n:x']
['n:x'] ['n:x']
['a:e-i', 'a:e-p', 'v:e-i-p-h'] ['a:e-i', 'a:e-p', 'v:e-i-p-h']
['v:e-i-p'] ['v:e-i-p']
['n:x'] ['n:x']
['a:e-e'] ['a:e-e']
['p:e-i'] ['p:e-i']
['v:e-i-p', 'v:e-i-i-h'] ['v:e-i-p', 'v:e-i-i-h']
['v:e-i-p'] ['v:e-i-p']
['v:e-i-p'] ['v:

['a_of:e-p-i'] ['a_of:e-p-i']
['v:e-i-p'] ['v:e-i-p']
['n:x'] ['n:x']
['a:e-p'] ['a:e-p']
['v:e-i-p'] ['v:e-i-p']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['a:e-e'] ['a:e-e']
['n:x', 'p:e-u-i'] ['n:x', 'p:e-u-i']
['v:e-i-p'] ['v:e-i-p']
['a:e-e', 'a_same-as:e-i', 'v:e-i-p', 'v_from:e-i-p-i'] ['a:e-e', 'a_same-as:e-i', 'v:e-i-p', 'v_from:e-i-p-i']
['a:e-p'] ['a:e-p']
['a:e-h', 'x_deg:e-u'] ['a:e-h', 'x_deg:e-u']
['v:e-i-p'] ['v:e-i-p']
['v:e-i-p'] ['v:e-i-p']
['n:x'] ['n:x']
['n_of:x-i'] ['n_of:x-i']
['v:e-i-p'] ['v:e-i-p']
['n_of:x-i'] ['n_of:x-i']
['v:e-i-p'] ['v:e-i-p']
['n:x'] ['n:x']
['v:e-i-p'] ['v:e-i-p']
['n:x'] ['n:x']
['v:e-i-p'] ['v:e-i-p']
['a:e-e'] ['a:e-e']
['v:e-i'] ['v:e-i']
['n:x'] ['n:x']
['a:e-p', 'a:e-e'] ['a:e-p', 'a:e-e']
['p:e-u-i'] ['p:e-u-i']
['c:i-i-i'] ['c:i-i-i']
['x:e-h-h'] ['x:e-h-h']
['c:i-i-i', 'p:e-u-u', 'v:e-i-p', 'x_h:e-h-h'] ['c:i-i-i', 'p:e-u-u', 'v:e-i-p', 'x_h:e-h-h']
['p:e-u-i'] ['p:e-u-i']
['n_to:x-i'] ['n_t

['a:e-p'] ['a:e-p']
['n:x'] ['n:x']
['n:x'] ['n:x']
['a:e-p', 'a:e-e'] ['a:e-p', 'a:e-e']
['a:e-i'] ['a:e-i']
['a:e-i'] ['a:e-i']
['x:e-u'] ['x:e-u']
['n:x'] ['n:x']
['n:x', 'n:x'] ['n:x', 'n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['a:e-p'] ['a:e-p']
['n:x'] ['n:x']
['a:e-p'] ['a:e-p']
['n:x', 'v:e-i'] ['n:x', 'v:e-i']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n_of:x', 'n_of:x-i', 'v:e-i-p'] ['n_of:x', 'n_of:x-i', 'v:e-i-p']
['n:x'] ['n:x']
['a:e-p', 'a:e-i'] ['a:e-p', 'a:e-i']
['a:e-p'] ['a:e-p']
['n:x'] ['n:x']
['a:e-e'] ['a:e-e']
['n:x'] ['n:x']
['n:x'] ['n:x']
['a:e-h', 'n_of:x-i'] ['a:e-h', 'n_of:x-i']
['n:x'] ['n:x']
['n:x', 'v:e-i-p', 'v_back:e-i', 'v_off:e-i-i'] ['n:x', 'v:e-i-p', 'v_back:e-i', 'v_off:e-i-i']
['n:x'] ['n:x']
['a:e-e', 'a:e-p'] ['a:e-e', 'a:e-p']
['n:x', 'n:x', 'v:e-i-p', 'v_in:e-i-i', 'v_into:e-i-p-i', 'v_out:e-i-h', 'v_out:e-i-i'] ['n:x', 'n:x', 'v:e-i-p', 'v_in:e-i-i', 'v_into:e-i-p-i', 'v_out:e-i-h', 'v_out:e-i-i']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n_of

['n_of:x-i', 'n_of:x-i', 'v:e-i', 'v_cause:e-i-p'] ['n_of:x-i', 'n_of:x-i', 'v:e-i', 'v_cause:e-i-p']
['a:e-p', 'a:e-e'] ['a:e-p', 'a:e-e']
['n:x'] ['n:x']
['n:x'] ['n:x']
['v:e-i-p'] ['v:e-i-p']
['n:x', 'v:e-i-p'] ['n:x', 'v:e-i-p']
['n_of:x-i'] ['n_of:x-i']
['a:e-p'] ['a:e-p']
['a:e-e', 'a:e-p', 'n:x'] ['a:e-e', 'a:e-p', 'n:x']
['c:i-i-i', 'x_deg:e-u'] ['c:i-i-i', 'x_deg:e-u']
['a:e-p'] ['a:e-p']
['n:x'] ['n:x']
['v:e-i-p'] ['v:e-i-p']
['n_of:x-i', 'n_of:x-h', 'n_of:x-i'] ['n_of:x-i', 'n_of:x-h', 'n_of:x-i']
['v:e-i-p'] ['v:e-i-p']
['a:e-p'] ['a:e-p']
['n:x', 'n:x'] ['n:x', 'n:x']
['n:x'] ['n:x']
['p_dir:e-i'] ['p_dir:e-i']
['a:e-p'] ['a:e-p']
['a:e-e'] ['a:e-e']
['n_of:x-i'] ['n_of:x-i']
['v:e-i-p'] ['v:e-i-p']
['n:x'] ['n:x']
['a:e-e', 'n:x'] ['a:e-e', 'n:x']
['n:x', 'n:x'] ['n:x', 'n:x']
['a:e-p'] ['a:e-p']
['a:e-p', 'a:e-i-h'] ['a:e-p', 'a:e-i-h']
['n:x'] ['n:x']
['n:x'] ['n:x']
['p:e-u-i', 'p_dir:e-u-i', 'p_state:e-u-i'] ['p:e-u-i', 'p_dir:e-u-i', 'p_state:e-u-i']
['a:e-p', 'n:x

['v:e-i-h', 'v_about:e-i-i', 'v_cause:e-i-p'] ['v:e-i-h', 'v_about:e-i-i', 'v_cause:e-i-p']
['x:e-u'] ['x:e-u']
['v:e-i-p'] ['v:e-i-p']
['n_of:x-i'] ['n_of:x-i']
['n:x'] ['n:x']
['a:e-p'] ['a:e-p']
['n:x'] ['n:x']
['a:e-p', 'a:e-e'] ['a:e-p', 'a:e-e']
['n:x'] ['n:x']
['a:e-p'] ['a:e-p']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['a:e-p', 'n:x'] ['a:e-p', 'n:x']
['a:e-p', 'a:e-i', 'v:e-i-p'] ['a:e-p', 'a:e-i', 'v:e-i-p']
['n:x'] ['n:x']
['n:x'] ['n:x']
['a:e-p', 'a:e-e'] ['a:e-p', 'a:e-e']
['n:x'] ['n:x']
['n:x'] ['n:x']
['a:e-p', 'n:x'] ['a:e-p', 'n:x']
['a:e-p'] ['a:e-p']
['n:x'] ['n:x']
['a:e-p'] ['a:e-p']
['a:e-e', 'a:e-p'] ['a:e-e', 'a:e-p']
['n:x'] ['n:x']
['a:e-p'] ['a:e-p']
['a:e-e', 'a:e-p'] ['a:e-e', 'a:e-p']
['n:x'] ['n:x']
['n:x'] ['n:x']
['a:e-p'] ['a:e-p']
['a:e-p'] ['a:e-p']
['n:x'] ['n:x']
['a:e-p', 'n:x'] ['a:e-p', 'n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['a:e-p', 'n:x'] ['a:e-p', 'n:x']
['a_to:e-p-i']

['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x', 'v:e-i-h'] ['n:x', 'v:e-i-h']
['n_of:x-i', 'v:e-i'] ['n_of:x-i', 'v:e-i']
['a:e-i', 'a:e-p'] ['a:e-i', 'a:e-p']
['n:x', 'v:e-i-p', 'v_out:e-i-i'] ['n:x', 'v:e-i-p', 'v_out:e-i-i']
['n:x'] ['n:x']
['a:e-p'] ['a:e-p']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['a:e-p'] ['a:e-p']
['n:x', 'n:x'] ['n:x', 'n:x']
['n:x', 'v:e-i-p'] ['n:x', 'v:e-i-p']
['n:x'] ['n:x']
['n:x'] ['n:x']
['a:e-h'] ['a:e-h']
['a:e-h'] ['a:e-h']
['n:x'] ['n:x']
['n_of:x-i'] ['n_of:x-i']
['n:x'] ['n:x']
['n:x'] ['n:x']
['a:e-p', 'a:e-e'] ['a:e-p', 'a:e-e']
['v_to:e-i-p-i'] ['v_to:e-i-p-i']
['n:x', 'v:e-i-p', 'v_up:e-i-i'] ['n:x', 'v:e-i-p', 'v_up:e-i-i']
['a:e-i'] ['a:e-i']
['n:x'] ['n:x']
['v_qmodal:e-h'] ['v_qmodal:e-h']
['v_qmodal:e-h'] ['v_qmodal:e-h']
['n:x'] ['n:x']
['a:e-p'] ['a:e-p']
['a:e-h'] ['a:e-h']
['a:e-h'] ['a:e-h']
['n:x', 'v:e-i-p', 'v_as:e-i-i-h', 'v_as:e-i-p-i', 'v_from:e-i-i'] ['n:x', 'v:e-i-p', 'v_as:e-i-i-h', 'v_as:e-i-p-i', 'v_from:e-

['n:x', 'v:e-i-p', 'v_down:e-i-i'] ['n:x', 'v:e-i-p', 'v_down:e-i-i']
['n:x'] ['n:x']
['a:e-h'] ['a:e-h']
['n:x'] ['n:x']
['a:e-p'] ['a:e-p']
['n:x', 'n:x'] ['n:x', 'n:x']
['n:x'] ['n:x']
['v:e-i-p'] ['v:e-i-p']
['n_of-to:x-i', 'n_of-to:x-i', 'v:e-i-p'] ['n_of-to:x-i', 'n_of-to:x-i', 'v:e-i-p']
['n:x', 'n:x'] ['n:x', 'n:x']
['n:x'] ['n:x']
['a:e-p', 'n:x'] ['a:e-p', 'n:x']
['n:x'] ['n:x']
['a:e-p'] ['a:e-p']
['a:e-i', 'a:e-p', 'v_up:e-i'] ['a:e-i', 'a:e-p', 'v_up:e-i']
['n:x-i', 'n:x'] ['n:x-i', 'n:x']
['n:x'] ['n:x']
['n:x', 'v:e-i-p'] ['n:x', 'v:e-i-p']
['a:e-p'] ['a:e-p']
['n:x', 'n:x'] ['n:x', 'n:x']
['a:e-e', 'a:e-p'] ['a:e-e', 'a:e-p']
['n:x', 'n_of:x-i', 'v:e-i-p'] ['n:x', 'n_of:x-i', 'v:e-i-p']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['v:e-i'] ['v:e-i']
['n:x'] ['n:x']
['x:e-h-u'] ['x:e-h-u']
['x:e-h'] ['x:e-h']
['x:e-h-h'] ['x:e-h-h']
['a:e-p'] ['a:e-p']
['a:e-h'] ['a:e-h']
['n:x', 'v:e-i-p', 'v_at:e-i-i']

['p:e-u-i'] ['p:e-u-i']
['a:e-p', 'p:e-u-i', 'p_dir:e-u-i', 'p_state:e-u-i'] ['a:e-p', 'p:e-u-i', 'p_dir:e-u-i', 'p_state:e-u-i']
['a:e-e'] ['a:e-e']
['p:e-u-i'] ['p:e-u-i']
['x:e-h-h'] ['x:e-h-h']
['a_of:e-p-i'] ['a_of:e-p-i']
['p:e-i'] ['p:e-i']
['a:e-e'] ['a:e-e']
['x_deg:e-u'] ['x_deg:e-u']
['a:e-e'] ['a:e-e']
['a_of:e-p-i'] ['a_of:e-p-i']
['p:e-u-i', 'p_dir:e-u-i', 'p_state:e-u-i'] ['p:e-u-i', 'p_dir:e-u-i', 'p_state:e-u-i']
['p:e-i'] ['p:e-i']
['a:e-i'] ['a:e-i']
['a:e-e', 'a:e-p'] ['a:e-e', 'a:e-p']
['n:x'] ['n:x']
['p:e-u-i'] ['p:e-u-i']
['x:e-h-h'] ['x:e-h-h']
['x_deg:e-u'] ['x_deg:e-u']
['p:e-i', 'x_deg:e-u'] ['p:e-i', 'x_deg:e-u']
['a:e-e'] ['a:e-e']
['p:e-u-i'] ['p:e-u-i']
['a:e-e'] ['a:e-e']
['p:e-u-i'] ['p:e-u-i']
['p:e-u-i'] ['p:e-u-i']
['a:e-e'] ['a:e-e']
['a:e-i', 'a:e-p'] ['a:e-i', 'a:e-p']
['x:e-h-h'] ['x:e-h-h']
['p:e-u-i'] ['p:e-u-i']
['a:e-e'] ['a:e-e']
['x:e-h-h'] ['x:e-h-h']
['x:e-h-h'] ['x:e-h-h']
['a:e-i'] ['a:e-i']
['p:e-i'] ['p:e-i']
['p:e-u-i'] ['p:e-u-i']


['n:x'] ['n:x']
['a:e-e', 'n:x'] ['a:e-e', 'n:x']
['n:x', 'v:e-i', 'v_cause:e-i-p'] ['n:x', 'v:e-i', 'v_cause:e-i-p']
['a:e-p', 'n:x'] ['a:e-p', 'n:x']
['v:e-i-p', 'v_in:e-i-i', 'v_in:e-i-p-i', 'v_with:e-i-p-i'] ['v:e-i-p', 'v_in:e-i-i', 'v_in:e-i-p-i', 'v_with:e-i-p-i']
['v:e-i-p'] ['v:e-i-p']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['a:e-p'] ['a:e-p']
['a_to:e-p-i'] ['a_to:e-p-i']
['n_of:x-i', 'n_of:x-h'] ['n_of:x-i', 'n_of:x-h']
['v_in:e-i-i', 'v_out:e-i-i', 'v_to:e-i-i-h', 'v_to:e-i-p-i'] ['v_in:e-i-i', 'v_out:e-i-i', 'v_to:e-i-i-h', 'v_to:e-i-p-i']
['a:e-p'] ['a:e-p']
['n:x'] ['n:x']
['n_for:x-i', 'v:e-i-p'] ['n_for:x-i', 'v:e-i-p']
['v:e-i-p'] ['v:e-i-p']
['a:e-p', 'a:e-e'] ['a:e-p', 'a:e-e']
['v:e-i-p', 'v_in:e-i-p-i'] ['v:e-i-p', 'v_in:e-i-p-i']
['a:e-p'] ['a:e-p']
['n_in:x-i'] ['n_in:x-i']
['a_to:e-p-i'] ['a_to:e-p-i']
['a:e-p', 'a:e-e', 'a:e-x'] ['a:e-p', 'a:e-e', 'a:e-x']
['n:x'] ['n:x']
['n_of:x-i'] ['n_of:x-i']
['v:e-i-p'] ['v:e-i-p']
['n:x'] ['n:x'

['n:x'] ['n:x']
['a:e-p'] ['a:e-p']
['n:x'] ['n:x']
['a:e-i', 'n:x'] ['a:e-i', 'n:x']
['v:e-i-p'] ['v:e-i-p']
['n:x-h'] ['n:x-h']
['a:e-p'] ['a:e-p']
['n:x'] ['n:x']
['n:x'] ['n:x']
['a:e-e', 'a:e-p'] ['a:e-e', 'a:e-p']
['n:x'] ['n:x']
['a:e-e', 'a:e-p'] ['a:e-e', 'a:e-p']
['n:x', 'n:x'] ['n:x', 'n:x']
['a:i-i', 'n:x'] ['a:i-i', 'n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['v:e-i-i-i', 'v:e-i-p', 'v_to:e-i-p-i'] ['v:e-i-i-i', 'v:e-i-p', 'v_to:e-i-p-i']
['n:x'] ['n:x']
['n_of:x-i'] ['n_of:x-i']
['v:e-i', 'v_cause:e-i-p'] ['v:e-i', 'v_cause:e-i-p']
['a:e-p'] ['a:e-p']
['a:e-p', 'a:e-i'] ['a:e-p', 'a:e-i']
['a:e-p', 'a:e-i'] ['a:e-p', 'a:e-i']
['n:x'] ['n:x']
['a:e-e', 'a:e-p'] ['a:e-e', 'a:e-p']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['a:e-p-u', 'p:e-u-i', 'x:e-u'] ['a:e-p-u', 'p:e-u-i', 'x:e-u']
['a:e-i'] ['a:e-i']
['n:x'] ['n:x']
['v:e-i', 'v_cause:e-i-p'] ['v:e-i', 'v_cause:e-i-p']
['n:x'] ['n:x']
['n:x'] ['n:x']
['x:e-h-h'] ['x:e-h-h']
['c:i-i-i'] ['c:i-i-i']
['v:e-i-i', 'v:e-i-h

['n:x'] ['n:x']
['a:e-e', 'a:e-p'] ['a:e-e', 'a:e-p']
['n:x', 'n:x'] ['n:x', 'n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['a:e-p', 'n:x'] ['a:e-p', 'n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['a:e-p', 'n:x'] ['a:e-p', 'n:x']
['a:e-e', 'a:e-p'] ['a:e-e', 'a:e-p']
['a:e-h', 'n:x', 'n:x', 'v:e-i-p'] ['a:e-h', 'n:x', 'n:x', 'v:e-i-p']
['n:x', 'v:e-i-p'] ['n:x', 'v:e-i-p']
['v:e-i-p', 'v_cope:e-i'] ['v:e-i-p', 'v_cope:e-i']
['a:e-p', 'a:e-e'] ['a:e-p', 'a:e-e']
['n:x'] ['n:x']
['n_of:x', 'n_of:x-i'] ['n_of:x', 'n_of:x-i']
['a:e-p'] ['a:e-p']
['n:x'] ['n:x']
['n:x-h', 'v:e-i-p'] ['n:x-h', 'v:e-i-p']
['a:e-p'] ['a:e-p']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x', 'v:e-i-p-h'] ['n:x', 'v:e-i-p-h']
['n:x'] ['n:x']
['n:x'] ['n:x']
['a:e-p', 'n:x'] ['a:e-p', 'n:x']
['a:e-p', 'n:x', 'v:e-i-p'] ['a:e-p', 'n:x', 'v:e-i-p']
['n:x'] ['n:x']
['n:x'] ['n:x']
['a:e-p', 'n:x'] ['a:e-p', 'n:x']
['v:e-i-p'] ['v:e-i-p']
['n:x'] ['n:x']
['n:x'] ['n:x']
['a:e-i', 'a:e-p'] ['a:e-i', 'a:e-p']


['n:x'] ['n:x']
['a:e-p', 'n_of:x-i'] ['a:e-p', 'n_of:x-i']
['a:e-p'] ['a:e-p']
['n:x'] ['n:x']
['p:e-x'] ['p:e-x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['a:e-p'] ['a:e-p']
['n:x'] ['n:x']
['a:e-x'] ['a:e-x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n_of:x-i'] ['n_of:x-i']
['a:e-p', 'a:e-e', 'x:e-u'] ['a:e-p', 'a:e-e', 'x:e-u']
['a:e-p'] ['a:e-p']
['n:x'] ['n:x']
['n:x', 'v_modal:e-h', 'v_modal:e-h'] ['n:x', 'v_modal:e-h', 'v_modal:e-h']
['a:e-i', 'a:e-p', 'x_deg:e-u'] ['a:e-i', 'a:e-p', 'x_deg:e-u']
['v:e-i'] ['v:e-i']
['n:x'] ['n:x']
['a:e-p', 'a:e-i'] ['a:e-p', 'a:e-i']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['a:e-p', 'n:x'] ['a:e-p', 'n:x']
['a:e-p', 'n:x'] ['a:e-p', 'n:x']
['n:x'] ['n:x']
['n:x', 'v:e-i-p'] ['n:x', 'v:e-i-p']
['n:x'] ['n:x']
['n:x'] ['n:x']
['a:e-i', 'a:e-p'] ['a:e-i', 'a:e-p']
['n:x', 'v:e-i-p'] ['n:x', 'v:e-i-p']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n_of:x-i'] ['n_of:x-i']


['n:x'] ['n:x']
['n:x', 'v:e-u-p', 'v_along:e-i', 'v_through:e-i', 'v_up:e-i-i'] ['n:x', 'v:e-u-p', 'v_along:e-i', 'v_through:e-i', 'v_up:e-i-i']
['a:e-i', 'a:e-p'] ['a:e-i', 'a:e-p']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x', 'v:e-i-p'] ['n:x', 'v:e-i-p']
['n:x'] ['n:x']
['n:x', 'v:e-i-p'] ['n:x', 'v:e-i-p']
['n:x'] ['n:x']
['n:x', 'v:e-i-p'] ['n:x', 'v:e-i-p']
['v:e-i-p', 'v_over:e-i-i'] ['v:e-i-p', 'v_over:e-i-i']
['n:x'] ['n:x']
['n:x'] ['n:x']
['a:e-p', 'a:e-e'] ['a:e-p', 'a:e-e']
['n:x'] ['n:x']
['a:e-p'] ['a:e-p']
['n:x'] ['n:x']
['n:x'] ['n:x']
['a:e-p', 'a:e-i', 'n:x'] ['a:e-p', 'a:e-i', 'n:x']
['n:x'] ['n:x']
['v:e-i', 'v_cause:e-i-p'] ['v:e-i', 'v_cause:e-i-p']
['v:e-i', 'v_cause:e-i-p'] ['v:e-i', 'v_cause:e-i-p']
['n_of:x-i'] ['n_of:x-i']
['n:x'] ['n:x']
['a:e-p', 'n:x'] ['a:e-p', 'n:x']
['n:x', 'v:e-i-p-i'] ['n:x', 'v:e-i-p-i']
['n:x'] ['n:x']
['n:x'] ['n:x']
['v:e-i-p', 'v_on:e-i-i'] ['v:e-i-p', 'v_on:e-i-i']
['a:e-p'] ['a:e-p']
['n:x', 'n:x'] ['n:x', 'n:x']
['n:x'] ['n:x']


['n:x'] ['n:x']
['n:x'] ['n:x']
['a:e-p'] ['a:e-p']
['n:x'] ['n:x']
['n:x', 'v:e-i-p'] ['n:x', 'v:e-i-p']
['n:x'] ['n:x']
['n_of:x-i', 'v:e-i-p'] ['n_of:x-i', 'v:e-i-p']
['n:x'] ['n:x']
['n:x', 'n:x'] ['n:x', 'n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['a:e-p'] ['a:e-p']
['a:e-p'] ['a:e-p']
['a:e-i'] ['a:e-i']
['n:x'] ['n:x']
['a:e-i', 'a:e-p'] ['a:e-i', 'a:e-p']
['n:x', 'n:x'] ['n:x', 'n:x']
['n_of:x-i'] ['n_of:x-i']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x', 'n_of:x-i'] ['n:x', 'n_of:x-i']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['a:e-p'] ['a:e-p']
['n:x'] ['n:x']
['n:x'] ['n:x']
['a:e-p'] ['a:e-p']
['n:x'] ['n:x']
['v:e-i-p'] ['v:e-i-p']
['n:x', 'v:e-i-h-i', 'v_to:e-i-i'] ['n:x', 'v:e-i-h-i', 'v_to:e-i-i']
['n_to:x-i'] ['n_to:x-i']
['a:e-p', 'a:e-e', 'n:x-h'] ['a:e-p', 'a:e-e', 'n:x-h']
['v:e-i-i-h'] ['v:e-i-i-h']
['n:x-h'] ['n:x-h']
['a:e-p'] ['a:e-p']
['v:e-i-i-h', 'v:e-i-p'] ['v:e-i-i

['n:x'] ['n:x']
['a:e-p'] ['a:e-p']
['a:e-p', 'n:x'] ['a:e-p', 'n:x']
['a:e-p'] ['a:e-p']
['n_of:x-i', 'v:e-i-p'] ['n_of:x-i', 'v:e-i-p']
['n:x'] ['n:x']
['n:x'] ['n:x']
['v:e-i-p'] ['v:e-i-p']
['n:x'] ['n:x']
['n:x', 'v:e-i'] ['n:x', 'v:e-i']
['n:x', 'v:e-i-p-h'] ['n:x', 'v:e-i-p-h']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x', 'v:e-i-p'] ['n:x', 'v:e-i-p']
['v:e-i-p'] ['v:e-i-p']
['n:x'] ['n:x']
['n:x', 'n:x'] ['n:x', 'n:x']
['n:x', 'v:e-i-p'] ['n:x', 'v:e-i-p']
['n:x', 'v:e-i-p'] ['n:x', 'v:e-i-p']
['n:x', 'v:e-h', 'v_down:e-i-i', 'v_for:e-i-p-u-i', 'v_for:e-i-i-i', 'v_off:e-i-i', 'v_out:e-i-i', 'v_up:e-i'] ['n:x', 'v:e-h', 'v_down:e-i-i', 'v_for:e-i-p-u-i', 'v_for:e-i-i-i', 'v_off:e-i-i', 'v_out:e-i-i', 'v_up:e-i']
['a_to:e-p-i'] ['a_to:e-p-i']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n_of:x-i'] ['n_of:x-i']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['a:e-p', 'n:x'] ['a:e-p', 'n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['a:e-e', 'a:e-p'] ['a:e-e', 'a:e-p']
['a:

['n:x'] ['n:x']
['n:x'] ['n:x']
['a:e-p'] ['a:e-p']
['a:e-p', 'n:x-h'] ['a:e-p', 'n:x-h']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x', 'v:e-i', 'v_on:e-i-i'] ['n:x', 'v:e-i', 'v_on:e-i-i']
['n:x', 'n:x'] ['n:x', 'n:x']
['n:x', 'n_cur:x', 'v:e-i-i-h', 'v:e-i-p'] ['n:x', 'n_cur:x', 'v:e-i-i-h', 'v:e-i-p']
['n:x'] ['n:x']
['v:e-i-p', 'v:e-i-i-i', 'v_down:e-i', 'v_in:e-i-i', 'v_off:e-i-i', 'v_out:e-i-i'] ['v:e-i-p', 'v:e-i-i-i', 'v_down:e-i', 'v_in:e-i-i', 'v_off:e-i-i', 'v_out:e-i-i']
['n:x'] ['n:x']
['n:x', 'v:e-i-p'] ['n:x', 'v:e-i-p']
['n:x', 'n:x-h', 'v:e-i-p', 'v_down:e-i-i', 'v_up:e-i-i'] ['n:x', 'n:x-h', 'v:e-i-p', 'v_down:e-i-i', 'v_up:e-i-i']
['n:x'] ['n:x']
['a:e-p', 'a:e-e', 'n:x'] ['a:e-p', 'a:e-e', 'n:x']
['a:e-p', 'n:x'] ['a:e-p', 'n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['a:e-p'] ['a:e-p']
['a:e-p', 'a:e-e'] ['a:e-p', 'a:e-e']
['x_deg:e-u'

['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n_of:x-i', 'n_temp:x', 'v:e-i-p'] ['n_of:x-i', 'n_temp:x', 'v:e-i-p']
['a:e-p', 'a:e-e', 'n:x'] ['a:e-p', 'a:e-e', 'n:x']
['n_of:x-i'] ['n_of:x-i']
['v:e-i-p'] ['v:e-i-p']
['a:e-p'] ['a:e-p']
['a:e-p'] ['a:e-p']
['n:x', 'v:e-i'] ['n:x', 'v:e-i']
['n:x'] ['n:x']
['n_of:x', 'n_of:x-i'] ['n_of:x', 'n_of:x-i']
['a:e-i', 'a:e-p', 'n:x', 'v:e-i-p'] ['a:e-i', 'a:e-p', 'n:x', 'v:e-i-p']
['v:e-i-p'] ['v:e-i-p']
['v:e-u-p'] ['v:e-u-p']
['a:e-p'] ['a:e-p']
['n:x', 'v:e-i-p'] ['n:x', 'v:e-i-p']
['n_for:x-i', 'n_for:x-h'] ['n_for:x-i', 'n_for:x-h']
['n_about:x-h', 'n_about:x', 'n_about:x-i', 'v:e-u-p', 'v_about:e-i-p-i'] ['n_about:x-h', 'n_about:x', 'n_about:x-i', 'v:e-u-p', 'v_about:e-i-p-i']
['a:e-p'] ['a:e-p']
['a:e-p'] ['a:e-p']
['n:x'] ['n:x']
['n:x', 'v:e-i-p', 'v_up:e-i-i'] ['n:x', 'v:e-i-p', 'v_up:e-i-i']
['n:x', 'v_with:e-i-h-i'] ['n:x', 'v_with:e-i-h-i']
['a:e-p', 'a:e-i-h', 'n:x'] ['a:e-p', 'a:e-i-h', 'n:x']
['v:e-i-p'] ['v:e-i-p']
['a:e-

['a:e-p'] ['a:e-p']
['n:x', 'n:x'] ['n:x', 'n:x']
['v:e-i-p'] ['v:e-i-p']
['v:e-i-p'] ['v:e-i-p']
['n_of:x-i'] ['n_of:x-i']
['a:e-p'] ['a:e-p']
['n:x', 'v:e-i-p', 'v:e-i-i-i', 'v_out:e-i-i-i'] ['n:x', 'v:e-i-p', 'v:e-i-i-i', 'v_out:e-i-i-i']
['n_of:x-i'] ['n_of:x-i']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n_abb:x'] ['n_abb:x']
['n_of:x-i', 'v:e-i-p'] ['n_of:x-i', 'v:e-i-p']
['n:x'] ['n:x']
['v:e-i-p', 'v:e-i-i-i'] ['v:e-i-p', 'v:e-i-i-i']
['n_of:x-i'] ['n_of:x-i']
['n_of:x-i', 'v:e-i-p'] ['n_of:x-i', 'v:e-i-p']
['n:x', 'v:e-i-p'] ['n:x', 'v:e-i-p']
['a:e-e'] ['a:e-e']
['n:x'] ['n:x']
['a:e-p', 'n:x'] ['a:e-p', 'n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n_of:x-i'] ['n_of:x-i']
['a:e-e', 'a:e-p'] ['a:e-e', 'a:e-p']
['a:e-p'] ['a:e-p']
['v_with:e-i-p-i'] ['v_with:e-i-p-i']
['n_of:x-i'] ['n_of:x-i']
['n_of:x-i'] ['n_of:x-i']
['v:e-i-p'] ['v:e-i-p']
['n_of:x-i'] ['n_of:x-i']
['v:e-i-p'] ['v:e-i-p']
['n_of:x-i', 'v_to:e-i-h-i'] ['n_of:x-i', 'v_to:e-i-

['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x', 'n:x', 'n:x', 'v:e-i-p-h'] ['n:x', 'n:x', 'n:x', 'v:e-i-p-h']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x', 'n:x'] ['n:x', 'n:x']
['a:e-p', 'a:e-e'] ['a:e-p', 'a:e-e']
['n:x'] ['n:x']
['n:x', 'n_pair:x'] ['n:x', 'n_pair:x']
['n:x'] ['n:x']
['v_at:e-i-i'] ['v_at:e-i-i']
['v:e-i-p'] ['v:e-i-p']
['n:x', 'v:e-i-p-h', 'v_out:e-i-i', 'v_up:e-i-i'] ['n:x', 'v:e-i-p-h', 'v_out:e-i-i', 'v_up:e-i-i']
['v:e-i-p-h'] ['v:e-i-p-h']
['n:x'] ['n:x']
['n_of:x-i'] ['n_of:x-i']
['n:x', 'n_of:x-i', 'v:e-i-p'] ['n:x', 'n_of:x-i', 'v:e-i-p']
['n:x'] ['n:x']
['n:x', 'v:e-i-p'] ['n:x', 'v:e-i-p']
['a:e-e', 'a_of:e-p-i'] ['a:e-e', 'a_of:e-p-i']
['a:e-p'] ['a:e-p']
['n:x'] ['n:x']
['a:e-p', 'n:x', 'v:e-i-p'] ['a:e-p', 'n:x', 'v:e-i-p']
['n:x'] ['n:x']
['n:x'] ['n:x']
['a:e-p'] ['a:e-p']
['v:e-i-p', 'v:e-i-i-h'] ['v:e-i-p', 'v:e-i-i-h']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x', 'v:e-i-p', 'v_for:e-i-i'

['a:e-p'] ['a:e-p']
['n:x'] ['n:x']
['n:x'] ['n:x']
['a:e-p', 'n:x'] ['a:e-p', 'n:x']
['a_of:e-p-i'] ['a_of:e-p-i']
['a_of:e-p-i', 'n:x'] ['a_of:e-p-i', 'n:x']
['v:e-i', 'v_cause:e-i-p'] ['v:e-i', 'v_cause:e-i-p']
['a:e-p'] ['a:e-p']
['n:x'] ['n:x']
['p:e-i', 'p:e-p'] ['p:e-i', 'p:e-p']
['n:x', 'v:e-i-p'] ['n:x', 'v:e-i-p']
['n:x', 'v_with:e-i-i'] ['n:x', 'v_with:e-i-i']
['n:x'] ['n:x']
['n:x'] ['n:x']
['a:e-p'] ['a:e-p']
['n:x'] ['n:x']
['a:e-p', 'p:e-i', 'p_dir:e-i', 'p_state:e-i'] ['a:e-p', 'p:e-i', 'p_dir:e-i', 'p_state:e-i']
['n:x'] ['n:x']
['n:x'] ['n:x']
['v:e-i'] ['v:e-i']
['n:x'] ['n:x']
['a:i-i'] ['a:i-i']
['n:x'] ['n:x']
['v:e-i-p'] ['v:e-i-p']
['n:x', 'v_to:e-i-h-i'] ['n:x', 'v_to:e-i-h-i']
['v:e-i'] ['v:e-i']
['n_of:x-i', 'v:e-i-p'] ['n_of:x-i', 'v:e-i-p']
['n_of:x-i'] ['n_of:x-i']
['a_for:e-p', 'n:x'] ['a_for:e-p', 'n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n_of:x-h', 'n_of:x-i', 'v:e-i-p', 'v_off:e-i', 'v_on:e-i', 'v_up:e-i-i'] ['n_of:x-h', 'n_of:x-i', 'v:e-i-p', 'v_off:e-

['n:x', 'v:e-i-p', 'v_off:e-i-i', 'v_up:e-i-i', 'v_up-with:e-i-i'] ['n:x', 'v:e-i-p', 'v_off:e-i-i', 'v_up:e-i-i', 'v_up-with:e-i-i']
['n:x'] ['n:x']
['n:x', 'v:e-i'] ['n:x', 'v:e-i']
['v:e-u-p'] ['v:e-u-p']
['a:e-p'] ['a:e-p']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x', 'v:e-i-p', 'v_off-of:e-i-u'] ['n:x', 'v:e-i-p', 'v_off-of:e-i-u']
['a:e-i', 'a:e-p'] ['a:e-i', 'a:e-p']
['n_of:x-i', 'v:e-i-p'] ['n_of:x-i', 'v:e-i-p']
['n_of:x-i'] ['n_of:x-i']
['a:e-p', 'a:e-e'] ['a:e-p', 'a:e-e']
['n:x', 'v:e-i', 'v_cause:e-i-p'] ['n:x', 'v:e-i', 'v_cause:e-i-p']
['n:x', 'v:e-i-p-h'] ['n:x', 'v:e-i-p-h']
['n:x'] ['n:x']
['a:e-p', 'a:e-e'] ['a:e-p', 'a:e-e']
['n:x'] ['n:x']
['n:x', 'v:e-i-p'] ['n:x', 'v:e-i-p']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['v:e-i-p-h'] ['v:e-i-p-h']
['n_of:x-i', 'v:e-i-i-h', 'v:e-i-p'] ['n_of:x-i', 'v:e-i-i-h', 'v:e-i-p']
['a:e-i', 'a:e-p'] ['a:e-i', 'a:e-p']
['n_of:x-i'] ['n_of:x-i']
['n:x', 'v:e-i-p'] ['n:x', 'v:e-i-p'

['n:x', 'n_of:x-i'] ['n:x', 'n_of:x-i']
['n_of:x-i'] ['n_of:x-i']
['v:e-i'] ['v:e-i']
['a:e-e'] ['a:e-e']
['a:e-p'] ['a:e-p']
['n:x', 'n:x'] ['n:x', 'n:x']
['v_forth:e-i-i', 'v_to:e-i-p-i', 'v_up:e-i-i'] ['v_forth:e-i-i', 'v_to:e-i-p-i', 'v_up:e-i-i']
['n:x'] ['n:x']
['a:e-p'] ['a:e-p']
['n:x', 'v:e-i-p'] ['n:x', 'v:e-i-p']
['n:x'] ['n:x']
['v:e-i'] ['v:e-i']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['a:e-p'] ['a:e-p']
['n:x'] ['n:x']
['v:e-i-p'] ['v:e-i-p']
['n:x'] ['n:x']
['n:x'] ['n:x']
['a:e-p'] ['a:e-p']
['n:x'] ['n:x']
['n_pair:x'] ['n_pair:x']
['a:e-p'] ['a:e-p']
['n:x'] ['n:x']
['a:e-p'] ['a:e-p']
['a:e-i', 'a:e-p'] ['a:e-i', 'a:e-p']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['a:e-p'] ['a:e-p']
['n:x'] ['n:x']
['n:x'] ['n:x']
['a:e-p', 'x_deg:e-u'] ['a:e-p', 'x_deg:e-u']
['a:e-p', 'a:e-e'] ['a:e-p', 'a:e-e']
['a:e-e', 'a:e-p'] ['a:e-e', 'a:e-p']
['n:x'] ['n:x']
['a:e-e', 'a:e-p'] ['a:e-e', 'a:e-p']
['n:x'] ['n:x']
['a:e-p', 'n:x'] [

['a:e-p'] ['a:e-p']
['n:x', 'v_as:e-i-p-i', 'v_as:e-i-i-h'] ['n:x', 'v_as:e-i-p-i', 'v_as:e-i-i-h']
['a:e-p'] ['a:e-p']
['n:x'] ['n:x']
['n:x', 'v:e-i-p'] ['n:x', 'v:e-i-p']
['n_of:x-i'] ['n_of:x-i']
['n:x', 'v:e-i'] ['n:x', 'v:e-i']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['v:e-i-i-h', 'v:e-i-p-h', 'v:e-i-i-i'] ['v:e-i-i-h', 'v:e-i-p-h', 'v:e-i-i-i']
['n_of:x-i'] ['n_of:x-i']
['a:e-i'] ['a:e-i']
['n:x'] ['n:x']
['n:x'] ['n:x']
['a:i-i', 'n:x', 'n:x'] ['a:i-i', 'n:x', 'n:x']
['n_of:x', 'n_of:x-i', 'v_up:e-i'] ['n_of:x', 'n_of:x-i', 'v_up:e-i']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x', 'v:e-i', 'v_apart:e-i-i', 'v_cause:e-i-p', 'v_off:e-i-i', 'v_open:e-i-i', 'v_up:e-i-i'] ['n:x', 'v:e-i', 'v_apart:e-i-i', 'v_cause:e-i-p', 'v_off:e-i-i', 'v_open:e-i-i', 'v_up:e-i-i']
['n:x'] ['n:x']
['n:x', 'v:e-i-p', 'v_apart:e-i-i', 'v_open:e-i-i', 'v_out:e-i-i'] ['n:x', 'v:e-i-p', 'v_apart:e-i-i', 'v_open:e-i-i', 'v_out:e-i-i']
['n:x'] ['n:x']
['a:e-p'] ['a:e-p']
['a:e-e', 'a:e-p

['n:x'] ['n:x']
['a:e-e', 'a:e-p'] ['a:e-e', 'a:e-p']
['n:x', 'v:e-i-p', 'v_off:e-i'] ['n:x', 'v:e-i-p', 'v_off:e-i']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n_of:x-i', 'v:e-i-i-h', 'v:e-i-p'] ['n_of:x-i', 'v:e-i-i-h', 'v:e-i-p']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['a:e-p'] ['a:e-p']
['n:x'] ['n:x']
['n:x', 'v:e-i'] ['n:x', 'v:e-i']
['v:e-i-p'] ['v:e-i-p']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['a:e-p'] ['a:e-p']
['n:x'] ['n:x']
['v:e-i-p'] ['v:e-i-p']
['n:x'] ['n:x']
['v:e-i-p'] ['v:e-i-p']
['a:e-p'] ['a:e-p']
['v_into:e-i-p-i'] ['v_into:e-i-p-i']
['n:x'] ['n:x']
['n_of:x-i'] ['n_of:x-i']
['n:x'] ['n:x']
['n_of:x-i', 'v:e-i-p', 'v_from-to:e-i-p-i-i'] ['n_of:x-i', 'v:e-i-p', 'v_from-to:e-i-p-i-i']
['n:x', 'v_into:e-i-p-i'] ['n:x', 'v_into:e-i-p-i']
['n_of:x-i'] ['n_of:x-i']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['a:e-p'] ['a:e-p']
['a:e-p', 'n:x'] ['a:e-p', 'n:x']
['a:e-e'] ['a:e-e']
['v:e-i-p', 'v_into:e-i-p-i', 'v_into:e-i-i'

['a_for:e-p-i', 'a_for:e-e', 'a_for:e-h'] ['a_for:e-p-i', 'a_for:e-e', 'a_for:e-h']
['a:e-p'] ['a:e-p']
['a:e-p'] ['a:e-p']
['a:e-p'] ['a:e-p']
['a:e-p'] ['a:e-p']
['v:e-i-p'] ['v:e-i-p']
['a:e-p'] ['a:e-p']
['a:e-p'] ['a:e-p']
['a:e-p', 'a:e-e'] ['a:e-p', 'a:e-e']
['a:e-p'] ['a:e-p']
['a:e-p'] ['a:e-p']
['a:e-p'] ['a:e-p']
['a:e-p'] ['a:e-p']
['a:e-p'] ['a:e-p']
['a:e-p'] ['a:e-p']
['a:e-p', 'a:e-i-h'] ['a:e-p', 'a:e-i-h']
['a:e-p'] ['a:e-p']
['a:e-e'] ['a:e-e']
['a:e-p'] ['a:e-p']
['a:e-p', 'a:e-e'] ['a:e-p', 'a:e-e']
['a:e-p'] ['a:e-p']
['a:e-e'] ['a:e-e']
['a:e-e', 'a_for:e-p'] ['a:e-e', 'a_for:e-p']
['v:e-i', 'v_cause:e-i-p'] ['v:e-i', 'v_cause:e-i-p']
['a:e-p', 'a:e-e'] ['a:e-p', 'a:e-e']
['a_for:e-p-i'] ['a_for:e-p-i']
['a:e-p'] ['a:e-p']
['a:e-p'] ['a:e-p']
['a:e-p'] ['a:e-p']
['a_to:e-p-i'] ['a_to:e-p-i']
['a:e-p'] ['a:e-p']
['a:e-p'] ['a:e-p']
['a:e-p'] ['a:e-p']
['a:e-p'] ['a:e-p']
['a:e-p'] ['a:e-p']
['a:e-p'] ['a:e-p']
['n:x'] ['n:x']
['a:e-p', 'a:e-e'] ['a:e-p', 'a:e-e']


['a:e-p', 'a:e-i'] ['a:e-p', 'a:e-i']
['p:e-u-u'] ['p:e-u-u']
['n:x'] ['n:x']
['n:x'] ['n:x']
['a:e-p'] ['a:e-p']
['n_of:x-i'] ['n_of:x-i']
['v:e-i-p'] ['v:e-i-p']
['n:x'] ['n:x']
['n:x'] ['n:x']
['a:e-p'] ['a:e-p']
['n:x'] ['n:x']
['n:x', 'v:e-i-p'] ['n:x', 'v:e-i-p']
['n:x'] ['n:x']
['a_of:e-p-i', 'n:x', 'v:e-i-p'] ['a_of:e-p-i', 'n:x', 'v:e-i-p']
['a:e-p', 'n:x'] ['a:e-p', 'n:x']
['n:x'] ['n:x']
['a:e-p'] ['a:e-p']
['n:x'] ['n:x']
['n:x', 'n:x'] ['n:x', 'n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n_of:x-i'] ['n_of:x-i']
['n_of:x-i'] ['n_of:x-i']
['a:e-p'] ['a:e-p']
['a:e-p'] ['a:e-p']
['a:e-p', 'a:e-e'] ['a:e-p', 'a:e-e']
['n:x', 'v:e-i-p'] ['n:x', 'v:e-i-p']
['n:x', 'v:e-i', 'v_up:e-i-i'] ['n:x', 'v:e-i', 'v_up:e-i-i']
['n:x-h', 'v:e-i-i-i', 'v:e-i-p', 'v_against:e-i-i', 'v_down:e-i-i', 'v_for:e-i-i'] ['n:x-h', 'v:e-i-i-i', 'v:e-i-p', 'v_against:e-i-i', 'v_down:e-i-i', 'v_for:e-i-i']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x', 'v:e-i-p', 'v_to:e-i-h-i'] ['n:x', 'v:e-i-p', 

['n_of:x-i'] ['n_of:x-i']
['a:e-i', 'a:e-p'] ['a:e-i', 'a:e-p']
['a:e-p', 'a:e-e'] ['a:e-p', 'a:e-e']
['n:x'] ['n:x']
['n:x'] ['n:x']
['n:x', 'v:e-i-p-h'] ['n:x', 'v:e-i-p-h']
['a:e-i-h', 'a_about:e-p-i', 'a_about:e-e'] ['a:e-i-h', 'a_about:e-p-i', 'a_about:e-e']
['a_for:e-p-i'] ['a_for:e-p-i']
['n:x-h', 'v:e-i-p', 'v_about:e-i-i', 'v_about:e-i-h-i'] ['n:x-h', 'v:e-i-p', 'v_about:e-i-i', 'v_about:e-i-h-i']
['a:e-p'] ['a:e-p']
['a:e-e'] ['a:e-e']
['v:e-i', 'v_cause:e-i-p'] ['v:e-i', 'v_cause:e-i-p']
['n:x', 'v:e-i-p'] ['n:x', 'v:e-i-p']
['n:x'] ['n:x']
['n:x'] ['n:x']
['a:e-e'] ['a:e-e']
['a:e-e'] ['a:e-e']
['a:e-p'] ['a:e-p']
['n:x'] ['n:x']
['a:i-u-u', 'n_of:x-i'] ['a:i-u-u', 'n_of:x-i']
['n:x', 'n:x-h'] ['n:x', 'n:x-h']
['a_to:e-p-i'] ['a_to:e-p-i']
['a:e-u-h'] ['a:e-u-h']
['a_of:e-i', 'a_of:e-p-i'] ['a_of:e-i', 'a_of:e-p-i']
['a:e-p'] ['a:e-p']
['v_modal:e-h', 'v_modal:e-h'] ['v_modal:e-h', 'v_modal:e-h']
['n:x', 'v:e-i-p'] ['n:x', 'v:e-i-p']
['n:x'] ['n:x']
['a:e-h', 'n:x', 'v:e-i-

In [23]:
len(surface_dmlemma2frame),len(abstract_frame)

(16263, 195)

### Export node2edge counts for dm, psd and eds as json

In [24]:
def record_node2edge(framework):
    node2outEdge_cnt = dict()
    node2inEdge_cnt = dict()
    if framework == 'dm' or framework == 'psd':
        f = framework
        f2i2m = framework2id2mrp_jsons
        #f2c2p = framework2cid2parse
        #
        cnt = 0
        for key in f2i2m[f]:
            #print (key)
            cnt+=1
            mrp_edges = f2i2m[f][key]['edges']
            mrp_nodes = {node['id']: node for node in f2i2m[f][key]['nodes']}
            for edge in mrp_edges:
                edge_label,edge_source,edge_target = edge['label'],edge['source'],edge['target']
                s = mrp_nodes[edge_source]
                t = mrp_nodes[edge_target]
                s_label,s_pos = s['label'],s['values'][0]
                if (s_pos!='NNP'):
                    s_label = s_label.lower()
                s_label
                if len(s['values'])==2:
                    s_frame = s['values'][1]
                else:
                    s_frame = 'none'
                t_label,t_pos = t['label'],t['values'][0]
                if (t_pos!='NNP'):
                    t_label = t_label.lower()
                if len(t['values'])==2:
                    t_frame = t['values'][1]
                else:
                    t_frame = 'none'
                outkey = "||".join([s_label,s_pos,s_frame])
                inkey = "||".join([t_label,t_pos,t_frame])
                #print (outkey,inkey)
                if not outkey in node2outEdge_cnt:
                    node2outEdge_cnt[outkey] = dict()
                if not t_pos in node2outEdge_cnt[outkey]:
                    node2outEdge_cnt[outkey][t_pos] = dict()
                if not edge_label in node2outEdge_cnt[outkey][t_pos]:
                    node2outEdge_cnt[outkey][t_pos][edge_label] = 0
                node2outEdge_cnt[outkey][t_pos][edge_label] += 1
                
                if not inkey in node2inEdge_cnt:
                    node2inEdge_cnt[inkey] = dict()
                if not s_pos in node2inEdge_cnt[inkey]:
                    node2inEdge_cnt[inkey][s_pos] = dict()
                if not edge_label in node2inEdge_cnt[inkey][s_pos]:
                    node2inEdge_cnt[inkey][s_pos][edge_label] = 0
                node2inEdge_cnt[inkey][s_pos][edge_label] += 1
                
            if cnt>5:
                pass
                #break
        return node2inEdge_cnt,node2outEdge_cnt
    if framework == 'eds':
        f = framework
        f2i2m = framework2id2mrp_jsons
        #f2c2p = framework2cid2parse
        #
        cnt = 0
        for key in f2i2m[f]:
            #print (key)
            cnt+=1
            mrp_edges = f2i2m[f][key]['edges']
            mrp_nodes = {node['id']: node for node in f2i2m[f][key]['nodes']}
            for edge in mrp_edges:
                edge_label,edge_source,edge_target = edge['label'],edge['source'],edge['target']
                s = mrp_nodes[edge_source]
                t = mrp_nodes[edge_target]
                s_label = s['label']
                t_label = t['label']
                outkey = s_label
                inkey = t_label
                #print (outkey,inkey)
                if not outkey in node2outEdge_cnt:
                    node2outEdge_cnt[outkey] = dict()
                if not edge_label in node2outEdge_cnt[outkey]:
                    node2outEdge_cnt[outkey][edge_label] = 0
                node2outEdge_cnt[outkey][edge_label] += 1
                
                if not inkey in node2inEdge_cnt:
                    node2inEdge_cnt[inkey] = dict()
                if not edge_label in node2inEdge_cnt[inkey]:
                    node2inEdge_cnt[inkey][edge_label] = 0
                node2inEdge_cnt[inkey][edge_label] += 1
                
            if cnt>5:
                pass
                #break
        return node2inEdge_cnt,node2outEdge_cnt

In [None]:
dm_node2inEdge_cnt,dm_node2outEdge_cnt = record_node2edge('dm')
with open("../node2edge_cnt/dm_node2inEdge_cnt.json", 'w') as json_file:
    json.dump(dm_node2inEdge_cnt, json_file)
with open("../node2edge_cnt/dm_node2outEdge_cnt.json", 'w') as json_file:
    json.dump(dm_node2outEdge_cnt, json_file)

In [None]:
pprint (node2outEdge_cnt)

In [None]:
psd_node2inEdge_cnt,psd_node2outEdge_cnt = record_node2edge('psd')
with open("../node2edge_cnt/psd_node2inEdge_cnt.json", 'w') as json_file:
    json.dump(psd_node2inEdge_cnt, json_file)
with open("../node2edge_cnt/psd_node2outEdge_cnt.json", 'w') as json_file:
    json.dump(psd_node2outEdge_cnt, json_file)

In [None]:
psd_node2outEdge_cnt

In [None]:
eds_node2inEdge_cnt,eds_node2outEdge_cnt = record_node2edge('eds')
with open("../node2edge_cnt/eds_node2inEdge_cnt.json", 'w') as json_file:
    json.dump(eds_node2inEdge_cnt, json_file)
with open("../node2edge_cnt/eds_node2outEdge_cnt.json", 'w') as json_file:
    json.dump(eds_node2outEdge_cnt, json_file)

### Token-node Mapping (dm, psd, eds) counting on training data

In [52]:
def count_mapping(framework):
    
    if framework == 'dm':
        token2dm_lemma = {
                  "isn’t":"not","aren’t":"not","wasn’t":"not","weren’t":"not",
                  "ain’t":"not","haven’t":"not","hasn’t":"not","hadn’t":"not","won’t":"not",
                  "doesn’t":"not","don’t":"not","didn’t":"not","can’t":"not","shalln’t":"not",
                  "shouldn’t":"not","couldn’t":"not","wouldn’t":"not","mustn’t":"not","mightn’t":"not","mayn’t":"not"
                 }
        token2labels = dict()
        label2tokens = dict()
        dm_label2frame_cnt = dict()
        #cmp_label2frame = dict()
        dm_singlenot_label2pos_cnt = dict()
        dm_doublenot_label2pos1_cnt = dict()
        dm_doublenot_label2pos2_cnt = dict()
        dm_singlenot_label2frame_cnt = dict()
        dm_doublenot_label2frame1_cnt = dict()
        dm_doublenot_label2frame2_cnt = dict()
        cnt = 0
        f = framework
        f2i2m = framework2id2mrp_jsons
        f2c2p = framework2cid2parse
        dm_num_cor_label = 0
        dm_num_cor_pos = 0
        dm_num_nodes = 0
        cnt_double = 0
        cnt_single = 0

        for key in f2i2m[f]:
            if key != '20003020':
                pass
            #print ('key: ',key)
            parse = f2c2p[f][key]
            parse_an2node = dict()
            for parse_node in parse:
                parse_token = parse_node[1]
                parse_lemma = parse_node[2]
                parse_pos = parse_node[4]
                parse_an = re.search(r"(\d+):(\d+)", parse_node[-1])
                parse_an_from = parse_an.group(1)
                parse_an_to = parse_an.group(2)
                parse_an2node[":".join([parse_an_from,parse_an_to])] = (parse_token,parse_lemma,parse_pos)
            #print (parse_an2node)
            #print ("\n")


            mrp_text = f2i2m[f][key]['input']
            #print (mrp_text)
            #print ("\n")
            mrp_nodes = f2i2m[f][key]['nodes']
            #print (mrp_nodes)
            #print ("\n")
            #break
            for idx,mrp_node in enumerate(mrp_nodes):
                #print (mrp_node)
                dm_num_nodes += 1
                mrp_an_from = str(mrp_node['anchors'][0]['from'])
                mrp_an_to = str(mrp_node['anchors'][0]['to'])
                mrp_label = mrp_node['label']
                mrp_token = mrp_text[int(mrp_an_from):int(mrp_an_to)]
                mrp_pos = mrp_node['values'][0]
                if "in+addition+to" in mrp_label:
                    print (key,mrp_node)
                try:
                    mrp_frame = mrp_node['values'][1]
                    dm_label = mrp_label
                    if not dm_label in dm_label2frame_cnt:
                        dm_label2frame_cnt[dm_label] = dict()
                        dm_label2frame_cnt[dm_label][mrp_frame] = 1
                    else:
                        if not mrp_frame in dm_label2frame_cnt[dm_label]:
                            dm_label2frame_cnt[dm_label][mrp_frame] = 1
                        else:
                            dm_label2frame_cnt[dm_label][mrp_frame] += 1
                except:
                    pass
                    #print (mrp_node)
                    #return

                try:
                    if mrp_nodes[idx+1]['label'] == mrp_label and mrp_nodes[idx+1]['label'] in token2dm_lemma:
                        #print (mrp_nodes[idx])
                        #print (mrp_nodes[idx+1])
                        #record pos1,2 cnt
                        mrp_label2 = mrp_nodes[idx+1]['label']
                        mrp_pos2 = mrp_nodes[idx+1]['values'][0]
                        if not mrp_label in dm_doublenot_label2pos1_cnt:
                            dm_doublenot_label2pos1_cnt[mrp_label] = dict()
                            dm_doublenot_label2pos1_cnt[mrp_label][mrp_pos] = 1
                        else:
                            if not mrp_pos in dm_doublenot_label2pos1_cnt[mrp_label]:
                                dm_doublenot_label2pos1_cnt[mrp_label][mrp_pos] = 1
                            else:
                                dm_doublenot_label2pos1_cnt[mrp_label][mrp_pos] += 1

                        if not mrp_label2 in dm_doublenot_label2pos2_cnt:
                            dm_doublenot_label2pos2_cnt[mrp_label2] = dict()
                            dm_doublenot_label2pos2_cnt[mrp_label2][mrp_pos2] = 1
                        else:
                            if not mrp_pos2 in dm_doublenot_label2pos2_cnt[mrp_label2]:
                                dm_doublenot_label2pos2_cnt[mrp_label2][mrp_pos2] = 1
                            else:
                                dm_doublenot_label2pos2_cnt[mrp_label2][mrp_pos2] += 1

                        #record frame1,2 cnt
                        mrp_frame2 = mrp_nodes[idx+1]['values'][1]
                        if not mrp_label in dm_doublenot_label2frame1_cnt:
                            dm_doublenot_label2frame1_cnt[mrp_label] = dict()
                            dm_doublenot_label2frame1_cnt[mrp_label][mrp_frame] = 1
                        else:
                            if not mrp_frame in dm_doublenot_label2frame1_cnt[mrp_label]:
                                dm_doublenot_label2frame1_cnt[mrp_label][mrp_frame] = 1
                            else:
                                dm_doublenot_label2frame1_cnt[mrp_label][mrp_frame] += 1

                        if not mrp_label2 in dm_doublenot_label2frame2_cnt:
                            dm_doublenot_label2frame2_cnt[mrp_label2] = dict()
                            dm_doublenot_label2frame2_cnt[mrp_label2][mrp_frame2] = 1
                        else:
                            if not mrp_frame2 in dm_doublenot_label2frame2_cnt[mrp_label2]:
                                dm_doublenot_label2frame2_cnt[mrp_label2][mrp_frame2] = 1
                            else:
                                dm_doublenot_label2frame2_cnt[mrp_label2][mrp_frame2] += 1
                        cnt_double += 1

                    elif (mrp_nodes[idx]['label'] in token2dm_lemma and 
                          mrp_nodes[idx-1]['label'] != mrp_nodes[idx]['label']
                          and mrp_nodes[idx+1]['label'] != mrp_nodes[idx]['label']) :
                        #print (mrp_nodes[idx])

                        #record pos cnt
                        if not mrp_label in dm_singlenot_label2pos_cnt:
                            dm_singlenot_label2pos_cnt[mrp_label] = dict()
                            dm_singlenot_label2pos_cnt[mrp_label][mrp_pos] = 1
                        else:
                            if not mrp_pos in dm_singlenot_label2pos_cnt[mrp_label]:
                                dm_singlenot_label2pos_cnt[mrp_label][mrp_pos] = 1
                            else:
                                dm_singlenot_label2pos_cnt[mrp_label][mrp_pos] += 1

                        #record frame cnt
                        if not mrp_label in dm_singlenot_label2frame_cnt:
                            dm_singlenot_label2frame_cnt[mrp_label] = dict()
                            dm_singlenot_label2frame_cnt[mrp_label][mrp_frame] = 1
                        else:
                            if not mrp_frame in dm_singlenot_label2frame_cnt[mrp_label]:
                                dm_singlenot_label2frame_cnt[mrp_label][mrp_frame] = 1
                            else:
                                dm_singlenot_label2frame_cnt[mrp_label][mrp_frame] += 1                        
                        cnt_single += 1
                except:
                    pass

                #print (parse_an2node)
                try:
                    parse_lemma = parse_an2node[":".join([mrp_an_from,mrp_an_to])][1]
                    parse_pos = parse_an2node[":".join([mrp_an_from,mrp_an_to])][2]
                    #print (parse_lemma,mrp_an_from,mrp_an_to)
                except Exception as e:
    #                 print (e)
    #                 print (mrp_an_from,mrp_an_to)
    #                 print (mrp_text)
    #                 print (parse_an2node)
    #                 if cnt > 17884:
    #                     return
                    pass

                if mrp_label != parse_lemma:
                    #print (mrp_label,parse_lemma)
                    pass
                else:
                    dm_num_cor_label += 1
                if mrp_pos != parse_pos:
                    #print (mrp_pos,parse_pos)
                    pass
                else:
                    dm_num_cor_pos += 1

                #count frame
                #if not mrp_token lemma2frame[]

                #count label
                if not mrp_token in token2labels:
                    token2labels[mrp_token] = dict()
    #             if not token in label2tokens:
    #                 label2tokens[label] = dict()

                if not mrp_label in token2labels[mrp_token]:
                    token2labels[mrp_token][mrp_label] = 1
                else:
                    token2labels[mrp_token][mrp_label] += 1

    #             if not token in label2tokens[label]:
    #                 label2tokens[label][token] = 1
    #             else:
    #                 label2tokens[label][token] += 1
            #print ("--------------------------------------------------------------\n")    
            if cnt > 340:
                pass
            cnt += 1
            #print (cnt)
        print (cnt_double)
        print (cnt_single)
    #     pprint ([
    #             dm_singlenot_label2pos_cnt,dm_doublenot_label2pos1_cnt,dm_doublenot_label2pos2_cnt,
    #             dm_singlenot_label2frame_cnt,dm_doublenot_label2frame1_cnt,dm_doublenot_label2frame2_cnt])
        print (framework)
        print (cnt)
        print ("label correct: ",dm_num_cor_label, dm_num_cor_label/dm_num_nodes)
        print ("pos correct: ",dm_num_cor_pos, dm_num_cor_pos/dm_num_nodes)
        print ("total: ",dm_num_nodes)

        return [dm_label2frame_cnt,
                dm_singlenot_label2pos_cnt,dm_doublenot_label2pos1_cnt,dm_doublenot_label2pos2_cnt,
                dm_singlenot_label2frame_cnt,dm_doublenot_label2frame1_cnt,dm_doublenot_label2frame2_cnt]
    
    
    if framework == 'psd':
        psd_label2frame_cnt = dict()
        psd_cmp2anposframe = dict()
        psd_token2abstract_cnt = dict()
        psd_tokenlemma2label = dict()
        cnt = 0
        cnt_wrong_label = 0
        cnt_wrong_pos = 0
        cnt_total_pos = 0
        cnt_total_label = 0
        psd_num_nodes = 0
        f = framework
        f2i2m = framework2id2mrp_jsons
        f2c2p = framework2cid2parse
        for key in f2i2m[f]:
            parse = f2c2p[f][key]
            parse_an2node = dict()
            for parse_node in parse:
                parse_token = parse_node[1]
                parse_lemma = parse_node[2]
                parse_sim_pos = parse_node[3]
                parse_pos = parse_node[4]
                parse_an = re.search(r"(\d+):(\d+)", parse_node[-1])
                parse_an_from = parse_an.group(1)
                parse_an_to = parse_an.group(2)
                parse_an2node[":".join([parse_an_from,parse_an_to])] = (parse_token,parse_lemma,parse_sim_pos,parse_pos)
            #pprint (parse_an2node)
            #print ("\n")


            mrp_text = f2i2m[f][key]['input']
            #print (mrp_text)
            #print ("\n")
            mrp_nodes = f2i2m[f][key]['nodes']
            #pprint (mrp_nodes)
            #print ("\n")
            #print (mrp_text)
            for idx,mrp_node in enumerate(mrp_nodes):
                #print (mrp_node)
                psd_num_nodes += 1
                mrp_an_from = str(mrp_node['anchors'][0]['from'])
                mrp_an_to = str(mrp_node['anchors'][0]['to'])
                mrp_label = mrp_node['label'].lower()
                mrp_token = mrp_text[int(mrp_an_from):int(mrp_an_to)].lower()
                mrp_pos = mrp_node['values'][0]
                
                #frame
                try:
                    mrp_frame = mrp_node['values'][1]
                    psd_label = mrp_label
                    if not psd_label in psd_label2frame_cnt:
                        psd_label2frame_cnt[psd_label] = dict()
                        psd_label2frame_cnt[psd_label][mrp_frame] = 1
                    else:
                        if not mrp_frame in psd_label2frame_cnt[psd_label]:
                            psd_label2frame_cnt[psd_label][mrp_frame] = 1
                        else:
                            psd_label2frame_cnt[psd_label][mrp_frame] += 1
                except:
                    #not verb
                    mrp_frame = None
                    pass
                    #print (mrp_node)
                    #return
                
                try:
                    parse_lemma = parse_an2node[":".join([mrp_an_from,mrp_an_to])][1]
                    parse_pos = parse_an2node[":".join([mrp_an_from,mrp_an_to])][3]
                    #print (parse_lemma,mrp_an_from,mrp_an_to)
                except Exception as e:
                    #ignore ...!
                    parse_lemma == None
                #/, -> _
#                 if ('/' in parse_lemma):
#                     print (mrp_text)
#                     print (parse_lemma,mrp_label)
#                     #print ()
                #tokenlemma 2 label
                if (not mrp_label.startswith('#')):
                    if parse_lemma != None:
                        if(mrp_token+","+parse_lemma+","+parse_pos in psd_tokenlemma2label and mrp_label != psd_tokenlemma2label[mrp_token+","+parse_lemma+","+parse_pos]):
                            print ((mrp_token,parse_lemma,parse_pos),psd_tokenlemma2label[mrp_token+","+parse_lemma+","+parse_pos],mrp_label)
                        psd_tokenlemma2label[mrp_token+","+parse_lemma+","+parse_pos] = mrp_label
                    splited = mrp_label.split("_")
                    #such_as
                    if len(splited)>=2:
                        if (len(splited) == 6):
                            print (mrp_label)
                        #print (len(splited),"bbb")
                        is_word = [True if word.isalpha() else False for word in splited ]
                        if all(is_word):
                            #is_cmp
                            psd_label = "_".join(splited)
                            #print (psd_label,mrp_frame)
                            anchored_lemma = parse_an2node[":".join([mrp_an_from,mrp_an_to])][1]
                            if not psd_label in psd_cmp2anposframe:
                                psd_cmp2anposframe[psd_label] = dict()
                                psd_cmp2anposframe[psd_label]["POS:"+mrp_pos] = 1
                                psd_cmp2anposframe[psd_label]["AN:"+anchored_lemma] = 1 #!!!!!
                                if mrp_frame != None:
                                    psd_cmp2anposframe[psd_label]["FRAME:"+mrp_frame] = 1
                            else:
                                if not "POS:"+mrp_pos in psd_cmp2anposframe[psd_label]:
                                    psd_cmp2anposframe[psd_label]["POS:"+mrp_pos] = 1
                                else:
                                    psd_cmp2anposframe[psd_label]["POS:"+mrp_pos] += 1
                                if not "AN:"+mrp_token in psd_cmp2anposframe[psd_label]:
                                    psd_cmp2anposframe[psd_label]["AN:"+anchored_lemma] = 1
                                else:
                                    psd_cmp2anposframe[psd_label]["AN:"+anchored_lemma] += 1
                                if mrp_frame != None:
                                    if not "FRAME:"+mrp_frame in psd_cmp2anposframe[psd_label]:
                                        psd_cmp2anposframe[psd_label]["FRAME:"+mrp_frame] = 1
                                    else:
                                        psd_cmp2anposframe[psd_label]["FRAME:"+mrp_frame] += 1
                        else:
                            pass
                        
                #abstract label
                else:
                    #print (mrp_label, mrp_token)
                    if not mrp_token+","+parse_pos in psd_token2abstract_cnt:
                        psd_token2abstract_cnt[mrp_token+","+parse_pos] = dict()
                        psd_token2abstract_cnt[mrp_token+","+parse_pos][mrp_label+","+mrp_pos] = 1
                    else:
                        if not mrp_label+","+mrp_pos in psd_token2abstract_cnt[mrp_token+","+parse_pos]:
                            psd_token2abstract_cnt[mrp_token+","+parse_pos][mrp_label+","+mrp_pos] = 1
                        else:
                            psd_token2abstract_cnt[mrp_token+","+parse_pos][mrp_label+","+mrp_pos] += 1
                
                #show stat
                if (not mrp_label.startswith('#')):
                    cnt_total_label += 1
                    
                if mrp_label.lower() != parse_lemma.lower():
                    if (not mrp_label.startswith('#')):
                        #print (mrp_label,parse_lemma)
                        cnt_wrong_label += 1
                        pass
                else:
                    pass
                
                if mrp_pos != parse_pos:
                    #print (mrp_pos,parse_pos)
                    cnt_wrong_pos += 1
                    pass
                else:
                    pass
                if mrp_label.lower() != parse_lemma.lower():
                    if (not mrp_label.startswith('#')):
                        #print (mrp_label.lower(),parse_lemma.lower())
                        pass
            cnt += 1
            if cnt>500:
                #break
                pass
        print (len(psd_label2frame_cnt))
        print ("label:",1-cnt_wrong_label/cnt_total_label)
        print ("pos:",1-cnt_wrong_pos/psd_num_nodes)
        #pprint (psd_label2frame_cnt)
        return psd_label2frame_cnt,psd_cmp2anposframe,psd_token2abstract_cnt,psd_tokenlemma2label
    if framework == 'eds':
        concept_label_set = set()
        abstract_label_set = set()
        node2outEdge_cnt = dict()
        node2inEdge_cnt = dict()
        eds_lemmapos2surface_cnt = dict()
        eds_token2decor_cnt = dict()
        eds_token2concept_cnt = dict()
        eds_abstract2noedge_cnt = dict()
        eds_abscon_node_set = set()
        eds_decor_cnt = dict()
        eds_q_cnt = dict()
        eds_label2abstracttwobutone_cnt = dict()
        eds_label2abstractone_cnt = dict()
        cnt = 0
        cnt_wrong_label = 0
        cnt_wrong_pos = 0
        cnt_total_pos = 0
        cnt_total_label = 0
        eds_num_nodes = 0
        f = framework
        f2i2m = framework2id2mrp_jsons
        f2c2p = framework2cid2parse
        
        #retrieve _surface, abstracted and decor and gen id2eds_star_mrp_jsons
        id2eds_star_mrp_jsons = dict()
        for key in f2i2m[f]:
            #print ()
            #print (key)
            
            mrp_text = f2i2m[f][key]['input']
            mrp_nodes = f2i2m[f][key]['nodes']
            mrp_id2nodes = {node['id']: node for node in mrp_nodes}
            mrp_edges = f2i2m[f][key]['edges']
            
            mrp_nodeid2type = dict()
            #record edges of each node
#             mrp_node2outedges = dict()
#             mrp_node2inedges = dict()
            mrp_node2outnodes = {node['id']: set() for node in mrp_nodes}
            mrp_node2innodes = {node['id']: set() for node in mrp_nodes}
            for edge in mrp_edges:
                edge_label,edge_source,edge_target = edge['label'],edge['source'],edge['target']
#                 if not edge_source in mrp_node2outedges:
#                     mrp_node2outedges[edge_source] = dict()
#                 mrp_node2outedges[edge_source][edge_label] = edge_target
#                 if not edge_target in mrp_node2inedges:
#                     mrp_node2inedges[edge_target] = dict()
#                 mrp_node2inedges[edge_target][edge_label] = edge_source
                if not edge_source in mrp_node2outnodes:
                    mrp_node2outnodes[edge_source] = set()
                mrp_node2outnodes[edge_source].add(edge_target)
                if not edge_target in mrp_node2innodes:
                    mrp_node2innodes[edge_target] = set()
                mrp_node2innodes[edge_target].add(edge_source)
                    
#             #iterate through node
#             while (len(mrp_nodeid2type) < len(mrp_nodes)):
            for mrp_id in mrp_id2nodes:
                if (mrp_id in mrp_nodeid2type):
                    continue
                eds_num_nodes += 1
                mrp_node = mrp_id2nodes[mrp_id]
                mrp_an_from = str(mrp_node['anchors'][0]['from'])
                mrp_an_to = str(mrp_node['anchors'][0]['to'])
                mrp_label = mrp_node['label'].lower()
#                 if mrp_an_from,mrp_an_to
                mrp_token = mrp_text[int(mrp_an_from):int(mrp_an_to)].lower()

                try:
                    mrp_pos = mrp_node['values'][0]
                except:
                    mrp_pos = None

                #Classify node type
                if (mrp_label[0]=='_'):
                    mrp_nodeid2type[mrp_id] = "surface"
                    eds_lemmapos2surface_cnt = []
                elif (mrp_label.endswith('_q')):
                    mrp_nodeid2type[mrp_id] = "decor"
                else:
                    mrp_nodeid2type[mrp_id] = "other"
                    eds_abscon_node_set.add(mrp_label)
                    
                if mrp_label.endswith('_q'):
                    if not mrp_label in eds_q_cnt:
                        eds_q_cnt[mrp_label] = 0
                    eds_q_cnt[mrp_label] += 1
                #eds_decor_cnt
                elif not mrp_label.startswith('_'):
                    if not mrp_label in eds_decor_cnt:
                        eds_decor_cnt[mrp_label] = 0
                    eds_decor_cnt[mrp_label] += 1
                #eds_token2decor_cnt
#                 if not mrp_token in eds_token2decor_cnt:
#                     eds_token2decor_cnt[mrp_token] = 0
                         
            #conv to eds star step1
            #remove BV and its node
            rm_node_id = set()
            rm_edge_id = set()
            new_nodes = []
            for mrp_node_id in mrp_nodeid2type:
                if mrp_nodeid2type[mrp_node_id] == 'decor':
                    rm_node_id.add(mrp_node_id)
                if mrp_nodeid2type[mrp_node_id] == 'other':
                    #print (mrp_id2nodes[mrp_node_id]['label'],mrp_nodeid2type[mrp_node_id])
                    pass
                
            new_rm_node_id = None
            while new_rm_node_id != rm_node_id:
                if new_rm_node_id == None:
                    new_rm_node_id = rm_node_id.copy()
                rm_node_id = new_rm_node_id
                for mrp_node_id in mrp_id2nodes:
                    if not mrp_node_id in rm_node_id:
                        for out_node in mrp_node2outnodes[mrp_node_id]:
                            if out_node in rm_node_id:
                                new_rm_node_id.add(mrp_node_id)
                                break
            
            new_nodes = [mrp_id2nodes[node_id] for node_id in mrp_id2nodes if node_id not in new_rm_node_id ]
            new_edges = [edge for edge in mrp_edges if not edge['source'] in rm_node_id and not edge['target'] in rm_node_id]
            
            #print (new_rm_node_id)
            #pprint (new_edges)
            
            eds_star_mrp = copy.deepcopy(f2i2m[f][key])
            eds_star_mrp['nodes']=new_nodes
            eds_star_mrp['edges']=new_edges
            
            #pprint (eds_star_mrp)
            #count abstract no of edge
            #separate abscon into abstract and concept
            node_id2no_edge = {node['id']: 0 for node in new_nodes}
            
            for edge in new_edges:
                if mrp_nodeid2type[edge['source']] == 'other':
                    node_id2no_edge[edge['source']] += 1
            for node_id in node_id2no_edge:
                if node_id2no_edge[node_id] > 0:
                    if not mrp_id2nodes[node_id]['label'] in eds_abstract2noedge_cnt:
                        eds_abstract2noedge_cnt[mrp_id2nodes[node_id]['label']] = node_id2no_edge[node_id]
                    else:
                        if eds_abstract2noedge_cnt[mrp_id2nodes[node_id]['label']] < node_id2no_edge[node_id]:
                            eds_abstract2noedge_cnt[mrp_id2nodes[node_id]['label']] = node_id2no_edge[node_id]
                    
            
            #for mrp_node2outedges[]
            #eds_star_mrp
            id2eds_star_mrp_jsons[key] = copy.deepcopy(eds_star_mrp)
            
        concept_label_set = eds_abscon_node_set-set(eds_abstract2noedge_cnt.keys())
        
        #conv to eds star step2
        cnt_k = 0
        cnt_usable = 0
        plott = False
        eds_star_mrp_jsons_usable = dict()
        for key in tqdm(id2eds_star_mrp_jsons):
            try:
#                 if key != '20004014':
#                     continue
#             if(True):
                parse = f2c2p[f][key]
                parse_an2node = dict()
                for parse_node in parse:
                    parse_id = parse_node[0]
                    parse_token = parse_node[1]
                    parse_lemma = parse_node[2]
                    parse_sim_pos = parse_node[3]
                    parse_pos = parse_node[4]
                    parse_an = re.search(r"(\d+):(\d+)", parse_node[-1])
                    parse_an_from = parse_an.group(1)
                    parse_an_to = parse_an.group(2)
                    parse_an2node[":".join([parse_an_from,parse_an_to])] = (parse_token,parse_lemma,parse_sim_pos,parse_pos,parse_id)

                cnt_k += 1
                eds_star_mrp = copy.deepcopy(id2eds_star_mrp_jsons[key])
                if plott:
                    plot_mrp_json(f2i2m['eds'][key],'eds',False,"0")
                    pprint (f2i2m['eds'][key])
                if plott:
                    plot_mrp_json(eds_star_mrp,'eds',False,"1")
                #print ("eds_star_mrp:",eds_star_mrp)

                mrp_id2nodes = {node['id']: node for node in eds_star_mrp['nodes']}
                #find node2no_edge
                abs_node_id2no_edge = dict()
                node_id2edge_list = {node_id: set() for node_id in mrp_id2nodes}
                node_id2inedge_list = {node_id: set() for node_id in mrp_id2nodes}
                new_edges = set()
                for edge in eds_star_mrp['edges']:
                    l,s,t = edge['label'],edge['source'],edge['target']
                    new_edges.add((l,s,t))
                    node_id2edge_list[s].add((l,t))
                    node_id2inedge_list[t].add((l,s))

                eds_star_mrp2 = copy.deepcopy(eds_star_mrp)
                #print ("eds_star_mrp2:",eds_star_mrp2)
                #do edge transform if cnt>1

                new_nodes = []
                for node_id in mrp_id2nodes:
                    if plott:
                        plot_mrp_json(eds_star_mrp2,'eds',False,node_id)
                    #max out edge =2
                    edge_list = node_id2edge_list[node_id]
                    if mrp_id2nodes[node_id]['label'] in eds_abstract2noedge_cnt and eds_abstract2noedge_cnt[mrp_id2nodes[node_id]['label']] == 2:
                        #actual 2out
                        if len(edge_list) == 2:
                            edges = [edge for edge in edge_list]
                            if (edges[0][0][-1] == '1' and edges[1][0][-1] == '2') or (edges[0][0][0] == 'L' and edges[1][0][0] == 'R'):
                                edge_source,edge_target = edges[0][1],edges[1][1]
                                edge_to_source_label,edge_to_target_label = edges[0][0],edges[1][0]
                            elif (edges[1][0][-1] == '1' and edges[0][0][-1] == '2') or (edges[1][0][0] == 'L' and edges[0][0][0] == 'R'):
                                edge_source,edge_target = edges[1][1],edges[0][1]
                                edge_to_source_label,edge_to_target_label = edges[1][0],edges[0][0]
                            #1+ in: relocate in edge to left(edge_source)
                            if len(node_id2inedge_list[node_id]) > 0:
                                #del old in edge
                                in_edge_list = node_id2inedge_list[node_id].copy()
                                for l,s in in_edge_list:
                                    new_edges.remove((l,s,node_id))
                                    node_id2edge_list[s].remove((l,node_id))
                                    node_id2inedge_list[node_id].remove((l,s))
                                #relocate in edge
                                for l,s in in_edge_list:
                                    new_edges.add((l,s,edge_source))
                                    node_id2edge_list[s].add((l,edge_source))
                                    node_id2inedge_list[edge_source].add((l,s))
                            #remove outedge
                            new_edges.remove((edge_to_source_label,node_id,edge_source))
                            node_id2edge_list[node_id].remove((edge_to_source_label,edge_source))
                            node_id2inedge_list[edge_source].remove((edge_to_source_label,node_id))
                            new_edges.remove((edge_to_target_label,node_id,edge_target))
                            node_id2edge_list[node_id].remove((edge_to_target_label,edge_target))
                            node_id2inedge_list[edge_target].remove((edge_to_target_label,node_id))
                            #add new edge
                            new_edges.add((mrp_id2nodes[node_id]['label'],edge_source,edge_target))
                            node_id2edge_list[edge_source].add((mrp_id2nodes[node_id]['label'],edge_target))
                            node_id2inedge_list[edge_target].add((mrp_id2nodes[node_id]['label'],edge_source))

                        #record max2 abs node for cur only 1 outedge  (need modify! key!)
                        elif len(edge_list) == 1:
                            edges = [edge for edge in edge_list]
                            l,t = edges[0]
                            #0 in: remove outedge
                            if len(node_id2inedge_list[node_id]) == 0:
                                #del out edge
                                new_edges.remove((l,node_id,t))
                                node_id2edge_list[node_id].remove((l,t))
                                node_id2inedge_list[t].remove((l,node_id))
                            else:
                                new_nodes.append(mrp_id2nodes[node_id])
    #                         edge_key = "("+edge[0][0]+","+edge[0][1]+")"
    #                         if not mrp_id2nodes[node_id]['label'] in eds_label2abstracttwobutone_cnt:
    #                             eds_label2abstracttwobutone_cnt[mrp_id2nodes[node_id]] = dict()
    #                         if not edge_key in eds_label2abstracttwobutone_cnt[mrp_id2nodes[node_id]]:
    #                             eds_label2abstracttwobutone_cnt[mrp_id2nodes[node_id]][edge_key] = 0
    #                         eds_label2abstracttwobutone_cnt[mrp_id2nodes[node_id]] += 1
                        else:
                            print ("max 2 but 0!",key)
                            new_nodes.append(mrp_id2nodes[node_id])

                    #max out edge =1  
                    elif mrp_id2nodes[node_id]['label'] in eds_abstract2noedge_cnt and eds_abstract2noedge_cnt[mrp_id2nodes[node_id]['label']] == 1:
                        edge_list = node_id2edge_list[node_id]
                        if len(edge_list) == 1:
                            edges = [edge for edge in edge_list]
                            l,t = edges[0]
                            #0 in: remove outedge
                            if len(node_id2inedge_list[node_id]) == 0:
                                #del out edge
                                new_edges.remove((l,node_id,t))
                                node_id2edge_list[node_id].remove((l,t))
                                node_id2inedge_list[t].remove((l,node_id))
                            else:
                                new_nodes.append(mrp_id2nodes[node_id])
                            #0 in: no edge need to add
    #                         edge_key = "("+edge[0][0]+","+edge[0][1]+")"
    #                         if not mrp_id2nodes[node_id]['label'] in eds_label2abstractone_cnt:
    #                             eds_label2abstractone_cnt[mrp_id2nodes[node_id]] = dict()
    #                         if not edge_key in eds_label2abstractone_cnt[mrp_id2nodes[node_id]]:
    #                             eds_label2abstractone_cnt[mrp_id2nodes[node_id]][edge_key] = 0
    #                         eds_label2abstractone_cnt[mrp_id2nodes[node_id]][edge_key] += 1
                        else:
                            print ("max 1 but 0!",key)
                            new_nodes.append(mrp_id2nodes[node_id])
                            #break
                    else:
                        if mrp_id2nodes[node_id]['label'] in eds_abstract2noedge_cnt and eds_abstract2noedge_cnt[mrp_id2nodes[node_id]['label']] == 3:
                            print ("max 3! ignore!",key)
                        else:
                            pass
                            #non-abstract node (surface or concept)
                            #record eds_token2concept_cnt or eds_token2surface_cnt
    #                         mrp_node = mrp_id2nodes[node_id]
    #                         mrp_an_from = mrp_node['anchors'][0]['from']
    #                         mrp_an_to = mrp_node['anchors'][0]['to']
    #                         try:
    #                             mrp_token = mrp_text[int(mrp_an_from):int(mrp_an_to)].lower()
    #                         except:
    #                             print ()
    #                         if not mrp_id2nodes[node_id]['l'] in eds_token2concept_cnt:
    #                             eds_token2concept_cnt[mrp_id2nodes[node_id]['label']
                        new_nodes.append(mrp_id2nodes[node_id])

                new_edges_list = []
                for l,s,t in new_edges:
                    new_edges_list.append({
                        "label": l,
                        "source": s,
                        "target": t
                    })
                eds_star_mrp2['edges'] = new_edges_list
                eds_star_mrp2['nodes'] = new_nodes

                #pprint (parse_an2node)
                #pprint (eds_star_mrp2)
                #gen usable eds_star_mrp2
                eds_usable_nodes = []
                parse_an_list = []
                for parse_an in parse_an2node:
                    parse_an_from,parse_an_to = parse_an.split(":")
                    parse_an_list.append((int(parse_an_from),int(parse_an_to)))
                eds_an_list = [(node['anchors'][0]['from'],node['anchors'][0]['to']) for node in eds_star_mrp2['nodes']]

                eds_idx = 0
                eds_last_an_to = 0
                parse_idx = 0
                while eds_idx < len(eds_an_list):
                    if parse_an_list[parse_idx][0]==eds_an_list[eds_idx][0]:
                        if parse_an_list[parse_idx][1]==eds_an_list[eds_idx][1]:
                            usable_node = eds_star_mrp2['nodes'][eds_idx].copy()
                            usable_node['ignore'] = False
                            eds_usable_nodes.append(usable_node)
                            parse_idx += 1
                            eds_last_an_to = eds_an_list[eds_idx][1]
                            eds_idx += 1
                        elif eds_an_list[eds_idx][1] - parse_an_list[parse_idx][1] == 1:
                            fr = eds_an_list[eds_idx][0]
                            too = parse_an_list[parse_idx][1]
                            usable_node = eds_star_mrp2['nodes'][eds_idx].copy()
                            usable_node['ignore'] = False
                            usable_node['anchors'] = [{'from': fr, 'to': too}]
                            eds_usable_nodes.append(usable_node)
                            eds_idx += 1
                            parse_idx += 1
                            eds_last_an_to = too
                        else:
                #                             print ("case1")
                #                             print (parse_an_list)
                #                             print (eds_an_list)
                            break
                    elif parse_an_list[parse_idx][0] < eds_an_list[eds_idx][0]:
                        if parse_an_list[parse_idx][1] < eds_an_list[eds_idx][1]:
                            fr = parse_an_list[parse_idx][0]
                            too = parse_an_list[parse_idx][1]
                            eds_usable_nodes.append(
                                {'anchors': [{'from': fr, 'to': too}],
                                 'id': "<id>",
                                 'label': "<dummy>",
                                 'ignore': True}
                            )
                            parse_idx += 1
                            eds_last_an_to = too
                        elif parse_an_list[parse_idx][1] == eds_an_list[eds_idx][1]:
                            usable_node = eds_star_mrp2['nodes'][eds_idx].copy()
                            usable_node['ignore'] = False
                            eds_usable_nodes.append(usable_node)
                            parse_idx += 1
                            eds_last_an_to = eds_an_list[eds_idx][1]
                            eds_idx += 1
                        elif eds_an_list[eds_idx][1] - parse_an_list[parse_idx][1] == 1:
                            fr = eds_an_list[eds_idx][0]
                            too = parse_an_list[parse_idx][1]
                            usable_node = eds_star_mrp2['nodes'][eds_idx].copy()
                            usable_node['ignore'] = False
                            usable_node['anchors'] = [{'from': fr, 'to': too}]
                            eds_usable_nodes.append(usable_node)
                            eds_last_an_to = too
                            eds_idx += 1
                            parse_idx += 1
                        else:
#                             print (parse_an_list)
#                             print (eds_an_list)
#                             print ("case2")
                            break
                    else:
#                         print (parse_an_list)
#                         print (eds_an_list)
#                         print ("case3")
                        break
#                 print ("b",parse_an_list)
#                 print ("c",eds_usable_nodes)

                if parse_idx == len(parse_an_list):
                    eds_star_mrp_jsons_usable[key] = copy.deepcopy(eds_star_mrp2)
                    eds_star_mrp_jsons_usable[key]['nodes'] = eds_usable_nodes.copy()
                elif parse_idx < len(parse_an_list):
                    for idx in range(parse_idx,len(parse_an_list)):
                        fr,too = parse_an_list[idx][0],parse_an_list[idx][1]
                        eds_usable_nodes.append(
                            {'anchors': [{'from': fr, 'to': too}],
                             'id': "<id>",
                             'label': "<dummy>",
                             'ignore': True}
                        )
                    eds_star_mrp_jsons_usable[key] = copy.deepcopy(eds_star_mrp2)
                    eds_star_mrp_jsons_usable[key]['nodes'] = copy.deepcopy(eds_usable_nodes)
                else:
                    pass
                #return
                #print (eds_star_mrp_jsons_usable)
            except Exception as e:
                print (e.__doc__)
                #print (e.message)
                pass
#             if(cnt_k == 30):
#                 return mrp_nodeid2type,id2eds_star_mrp_jsons,eds_abstract2noedge_cnt,concept_label_set,eds_star_mrp_jsons_usable
                #             #return
#                     else:
#                         #if xxx -BV-> node
#                         if 'BV' in mrp_node2inedges[mrp_id]:
# #                              node must be concept (mofy, yofc, named, pron)
# #                              record in token2concept_cnt
# #                                  discover pattern
#                              mrp_nodeid2type[mrp_id] = "concept"
#                              if not mrp_token in eds_token2concept_cnt:
#                                  eds_token2concept_cnt[mrp_token] = dict()
#                              if not mrp_label in eds_token2concept_cnt[mrp_token]:
#                                  eds_token2concept_cnt[mrp_token][mrp_label] = 0
#                              eds_token2concept_cnt[mrp_token][mrp_label] += 1
#                         elif
#                                  if surface -> node
#          node must be concept
                    #eds_q_cnt
                
#             # count in out edge for each label
#             for edge in mrp_edges:
#                 if (mrp_id2nodes[])
#                 if (mrp_id2nodes[edge['source']]['label'].endswith('_q')):
#                     print (key,edge,mrp_id2nodes[edge['source']]['label'],mrp_id2nodes[edge['target']]['label'])

#                 edge_label,edge_source,edge_target = edge['label'],edge['source'],edge['target']
#                     s = mrp_nodes[edge_source]
#                     t = mrp_nodes[edge_target]
#                     s_label = s['label']
#                     t_label = t['label']
#                     outkey = s_label
#                     inkey = t_label
#                     #print (outkey,inkey)
#                     if not outkey in node2outEdge_cnt:
#                         node2outEdge_cnt[outkey] = dict()
#                     if not edge_label in node2outEdge_cnt[outkey]:
#                         node2outEdge_cnt[outkey][edge_label] = 0
#                     node2outEdge_cnt[outkey][edge_label] += 1

#                     if not inkey in node2inEdge_cnt:
#                         node2inEdge_cnt[inkey] = dict()
#                     if not edge_label in node2inEdge_cnt[inkey]:
#                         node2inEdge_cnt[inkey][edge_label] = 0
#                     node2inEdge_cnt[inkey][edge_label] += 1



        
        return eds_q_cnt,eds_decor_cnt,mrp_nodeid2type,id2eds_star_mrp_jsons,eds_abstract2noedge_cnt,concept_label_set,eds_star_mrp_jsons_usable
#,token2labels


In [679]:
len(eds_star_mrp_jsons_usable)

35467

In [None]:
#dm count mapping
[dm_label2frame_cnt,
            dm_singlenot_label2pos_cnt,dm_doublenot_label2pos1_cnt,dm_doublenot_label2pos2_cnt,
            dm_singlenot_label2frame_cnt,dm_doublenot_label2frame1_cnt,dm_doublenot_label2frame2_cnt] = count_mapping('dm')

In [None]:
#psd count mapping
psd_label2frame_cnt,psd_cmp2anposframe,psd_token2abstract_cnt,psd_tokenlemma2label = count_mapping('psd')

In [None]:
#EDS count mapping
eds_q_cnt,eds_decor_cnt,mrp_nodeid2type,eds_star_mrp,eds_abstract2noedge_cnt,concept_label_set,eds_star_mrp_jsons_usable = count_mapping('eds')


In [675]:
eds_star_mrp_jsons_usable

{'20001001': {'id': '20001001',
  'flavor': 1,
  'framework': 'eds',
  'version': 0.9,
  'time': '2019-04-10 (20:21)',
  'input': 'Pierre Vinken, 61 years old, will join the board as a nonexecutive director Nov. 29.',
  'tops': [10],
  'nodes': [{'id': 3,
    'label': 'named',
    'properties': ['carg'],
    'values': ['Pierre'],
    'anchors': [{'from': 0, 'to': 6}],
    'ignore': False},
   {'id': 4,
    'label': 'named',
    'properties': ['carg'],
    'values': ['Vinken'],
    'anchors': [{'from': 7, 'to': 13}],
    'ignore': False},
   {'anchors': [{'from': 13, 'to': 14}],
    'id': '<id>',
    'label': '<dummy>',
    'ignore': True},
   {'anchors': [{'from': 15, 'to': 17}],
    'id': '<id>',
    'label': '<dummy>',
    'ignore': True},
   {'id': 8,
    'label': '_year_n_1',
    'anchors': [{'from': 18, 'to': 23}],
    'ignore': False},
   {'id': 9,
    'label': '_old_a_1',
    'anchors': [{'from': 24, 'to': 27}],
    'ignore': False},
   {'anchors': [{'from': 27, 'to': 28}],
    

In [None]:
# list(zip(sorted(eds_q_cnt,key=eds_q_cnt.get,reverse=True),sorted(eds_q_cnt.values(),reverse=True)))


In [None]:
# list(zip(sorted(eds_decor_cnt,key=eds_decor_cnt.get,reverse=True),sorted(eds_decor_cnt.values(),reverse=True)))


### Generating node mapping for data (train/eval) (dm, psd)

In [None]:
# #DM mapping
# surface:
# 1. keep is/am/are/was/be, our/his/her/their, an, in own form
# 2. keep couldn't, didn't
# 3. according_to instead of accord,to , e.g. _in+principle_a_1 : ARG0 e, ARG1 e.
# 4. separate hyphen, e.g. high-performance ->performance
# 5. keep capital_letter, plural if not found in erg 
# 6. lemmatize/change_of_form of other words, e.g. mixed -> mix, selling -> sell, adv -> adj
# 7. punctuation have no frame

# frame:
#     surface:
#         1. search with lemma/reduced form (isamare->be,)
#     abstract: e.g. them (pron:x),
#         1. end with n't or not neg:e-h
#         2. 

In [None]:
# EDS:
# if token in eds_token2surface_cnt
#         convert (include ('_a_q', 17493),('_some_q', 1502) ...)
#     else:
#         pass
# if token in eds_token2decor_cnt:
#         convery
#     else:
     
    
    

# if _xxx
#     -> surface:
#         record in eds_token2surface_cnt
# else xxx_q:
#     -> decor:
#         [('udef_q', 133584),
#          ('proper_q', 62815),
#          ('pronoun_q', 21676),
#          ('def_explicit_q', 15818),
#          ('def_implicit_q', 4782),
#          ('number_q', 2432),
#             ...
#         record in eds_token2decor_cnt
# else:
#     if node1 -BV-> node2
#          node2 must be concept (mofy, yofc, named, pron, neg)
#          record in token2concept_cnt
#              discover pattern
#          node1 must be decor
# #     if surface -> node
# #          node must be concept

#     else if node -> surface/concept
#         node must be abstract (compound, subord)
#          discover pattern:
#              comp: 
#             (40, '_low_a_1', 'lower'),
#              (41, 'comp', 'lower'),
#             superl:
#              superl -ARG1-> hot for hottest (both node anchored)
#             poss:
#                 (12, 'poss', 'its'),
#                  (13, 'pronoun_q', 'its'),
#                  (14, 'pron', 'its'),
#                 pronoun_q -BV-> pron
#                 poss -ARG2-> pron
#             compound:
#                 use stat determine
#             neg:
#              neg -ARG1 -> increase for "not increasing", anchored separately
         
#     if a token only anchored once then that node must be abstracted
    
#     without node -BV-> xxx must be decor (compound, subord)
#     abstract --> surface if:

# concept->surface/abstract
# surface -> surface/concept/
# decor -> surface/concept
# abstract -> surface/concept/abstract

In [None]:
def gen_mapping(framework,parse,dm_info,psd_info):
    if framework == 'eds':
        #not finished ...
        #eds_info 
        
        parse_an2node = dict()
        predicted_nodes_mrp = []
        look_aheaded_idx = []
        for idx,parse_node in enumerate(parse):
            #print (idx,parse_node)
            ignore = False
            if idx in look_aheaded_idx:
                #print ("skip!")
                continue
            parse_token = parse_node[1]
            parse_lemma = parse_node[2]
            parse_sim_pos = parse_node[3]
            parse_pos = parse_node[4]
            if (parse_pos != 'NNP'):
                parse_token = parse_token.lower()
            parse_an = re.search(r"(\d+):(\d+)", parse_node[-1])
            parse_an_from = parse_an.group(1)
            parse_an_to = parse_an.group(2)
            parse_an2node[":".join([parse_an_from,parse_an_to])] = (parse_token,parse_lemma,parse_pos)
            #print (parse_an2node)
            #print ("\n")
            try:
                parse_next_token = parse[idx+1][1]
                parse_next_an = re.search(r"(\d+):(\d+)", parse[idx+1][-1])
                parse_next_an_from = parse_next_an.group(1)
                parse_next_an_to = parse_next_an.group(2)
                parse_next_pos = parse[idx+1][3]
            except:
                parse_next_token = None
#                 parse_next_an = re.search(r"(\d+):(\d+)", parse[idx+1][-1])
#                 parse_next_an_from = parse_next_an.group(1)
#                 parse_next_an_to = parse_next_an.group(2)
            try:
                parse_next2_token = parse[idx+2][1]
                parse_next2_an = re.search(r"(\d+):(\d+)", parse[idx+2][-1])
                parse_next2_an_from = parse_next2_an.group(1)
                parse_next2_an_to = parse_next2_an.group(2)
                parse_next2_pos = parse[idx+2][3]
            except:
                parse_next2_token = None
#                 parse_next_an = re.search(r"(\d+):(\d+)", parse[idx+1][-1])
#                 parse_next_an_from = parse_next_an.group(1)
#                 parse_next_an_to = parse_next_an.group(2)
            
            predicted_node_info_list = []
            handled = False
            #not finished ...
            
            
    if framework == 'dm':
        [dm_label2frame_cnt,
            dm_singlenot_label2pos_cnt,dm_doublenot_label2pos1_cnt,dm_doublenot_label2pos2_cnt,
            dm_singlenot_label2frame_cnt,dm_doublenot_label2frame1_cnt,dm_doublenot_label2frame2_cnt,
            surface_dmlemma2frame,abstract_frame] = dm_info
        token2dm_lemma = {'is':'be','are':'be','was':'be','were':'be','am':'be',
                          'an':'a',"isn’t":"not","aren’t":"not","wasn’t":"not","weren’t":"not",
                          "ain’t":"not","haven’t":"not","hasn’t":"not","hadn’t":"not","won’t":"not",
                          "doesn’t":"not","don’t":"not","didn’t":"not","can’t":"not","shalln’t":"not",
                          "shouldn’t":"not","couldn’t":"not","wouldn’t":"not","mustn’t":"not","mightn’t":"not","mayn’t":"not"
                         }
        dohavewill = {"do","did","does","have","had","has","might","may","can","could","wo","would","should","must","is","am","are","ai","was","were"}
        md_double = {"shouldn’t","couldn’t","wouldn’t","mustn’t","mightn’t","mayn’t"}
        #extract erg
        #
        #extract compound (_+_)
#         cnt_cmp = 0
#         cmp_label2frame = dict()
#         for dmlemma in surface_dmlemma2frame:
#             if "+" in dmlemma:
#                 if not dmlemma in cmp_label2frame:
#                     cmp_label2frame[dmlemma] = dict()
#                     cmp_label2frame[dmlemma][surface_dmlemma2frame[dmlemma]] = 
#                 print (dmlemma)
#                 cnt_cmp += 1
#         print (cnt_cmp)
        
        
        #extract parse token, lemma and pos for all nodes into parse_an2node
        #for each parse_lemma
        parse_an2node = dict()
        predicted_nodes_mrp = []
        look_aheaded_idx = []
        for idx,parse_node in enumerate(parse):
            #print (idx,parse_node)
            ignore = False
            if idx in look_aheaded_idx:
                #print ("skip!")
                continue
            parse_token = parse_node[1]
            parse_lemma = parse_node[2]
            parse_sim_pos = parse_node[3]
            parse_pos = parse_node[4]
            if (parse_pos != 'NNP'):
                parse_token = parse_token.lower()
            parse_an = re.search(r"(\d+):(\d+)", parse_node[-1])
            parse_an_from = parse_an.group(1)
            parse_an_to = parse_an.group(2)
            parse_an2node[":".join([parse_an_from,parse_an_to])] = (parse_token,parse_lemma,parse_pos)
            #print (parse_an2node)
            #print ("\n")
            try:
                parse_next_token = parse[idx+1][1]
                parse_next_an = re.search(r"(\d+):(\d+)", parse[idx+1][-1])
                parse_next_an_from = parse_next_an.group(1)
                parse_next_an_to = parse_next_an.group(2)
                parse_next_pos = parse[idx+1][3]
            except:
                parse_next_token = None
#                 parse_next_an = re.search(r"(\d+):(\d+)", parse[idx+1][-1])
#                 parse_next_an_from = parse_next_an.group(1)
#                 parse_next_an_to = parse_next_an.group(2)
            try:
                parse_next2_token = parse[idx+2][1]
                parse_next2_an = re.search(r"(\d+):(\d+)", parse[idx+2][-1])
                parse_next2_an_from = parse_next2_an.group(1)
                parse_next2_an_to = parse_next2_an.group(2)
                parse_next2_pos = parse[idx+2][3]
            except:
                parse_next2_token = None
#                 parse_next_an = re.search(r"(\d+):(\d+)", parse[idx+1][-1])
#                 parse_next_an_from = parse_next_an.group(1)
#                 parse_next_an_to = parse_next_an.group(2)
            
            predicted_node_info_list = []
            handled = False
            #if it is capitalized (any chracter) -> NNP
            if (parse_pos == 'NNP'):
                #print ("name!!!")
                #frame = named_<determined_by...>
                #if seen in training data
                    #use most frequent frame
                predict_an_from = parse_an_from
                predict_an_to = parse_an_to
                predict_id = idx
                predict_label = parse_lemma
                predict_pos = parse_pos
                if parse_lemma in dm_label2frame_cnt:
                    predict_frame = max(dm_label2frame_cnt[parse_lemma], key=dm_label2frame_cnt[parse_lemma].get)
                #else
                else:
                    #named_<any>
                    predict_frame = 'named:x-c'
                predicted_node_info_list.append((predict_an_from,predict_an_to,predict_id,predict_label,predict_pos,predict_frame,ignore))
                handled = True
                #print (predicted_node_info_list)
            #if lemma1,2,3, then lemma1,2 (in lemma form) if appear in erg compound list
                 #if yes
                    #create two node
                        #pos1 = pos1, frame = that frame
                        #pos2 = pos2, frame = that frame
                    #skip next(1/2) lemma
            if (parse_next2_token!=None and handled == False):
                #lemma123
                predict_an_from = parse_an_from
                predict_an_to = parse_an_to
                predict_id = idx
                predict_pos = parse_pos
                lemma123 = "+".join([parse_token,parse_next_token,parse_next2_token])
                predict_label = lemma123
                if lemma123 in dm_label2frame_cnt:
                    predict_frame = max(dm_label2frame_cnt[lemma123], key=dm_label2frame_cnt[lemma123].get)
                    predicted_node_info_list.append((predict_an_from,predict_an_to,predict_id,predict_label,predict_pos,predict_frame,ignore))
                    predicted_node_info_list.append((parse_next_an_from,parse_next_an_to,predict_id+1,predict_label,parse_next_pos,predict_frame,ignore))
                    predicted_node_info_list.append((parse_next2_an_from,parse_next2_an_to,predict_id+2,predict_label,parse_next2_pos,predict_frame,ignore))
                    look_aheaded_idx.append(idx+1)
                    look_aheaded_idx.append(idx+2)
                    handled = True
                elif lemma123 in surface_dmlemma2frame:
                    predict_frame = surface_dmlemma2frame[lemma123][0]
                    predicted_node_info_list.append((predict_an_from,predict_an_to,predict_id,predict_label,predict_pos,predict_frame,ignore))
                    predicted_node_info_list.append((parse_next_an_from,parse_next_an_to,predict_id+1,predict_label,parse_next_pos,predict_frame,ignore))
                    predicted_node_info_list.append((parse_next2_an_from,parse_next2_an_to,predict_id+2,predict_label,parse_next2_pos,predict_frame,ignore))
                    look_aheaded_idx.append(idx+1)
                    look_aheaded_idx.append(idx+2)
                    handled = True
                else:
                    if(parse_next_token!=None):
                        #lemma12
                        lemma12 = "+".join([parse_token,parse_next_token])
                        predict_label = lemma12
                        try:
                            if lemma12 in dm_label2frame_cnt:
                                predict_frame = max(dm_label2frame_cnt[lemma12], key=dm_label2frame_cnt[lemma12].get)
                                predicted_node_info_list.append((predict_an_from,predict_an_to,predict_id,predict_label,predict_pos,predict_frame,ignore))
                                predicted_node_info_list.append((parse_next_an_from,parse_next_an_to,predict_id+1,predict_label,parse_next_pos,predict_frame,ignore))
                                look_aheaded_idx.append(idx+1)
                                handled = True
                            elif lemma12 in surface_dmlemma2frame:
                                predict_frame = surface_dmlemma2frame[lemma12][0]
                                predicted_node_info_list.append((predict_an_from,predict_an_to,predict_id,predict_label,predict_pos,predict_frame,ignore))
                                predicted_node_info_list.append((parse_next_an_from,parse_next_an_to,predict_id+1,predict_label,parse_next_pos,predict_frame,ignore))
                                look_aheaded_idx.append(idx+1)
                                handled = True
                        except Exception as e:
                            print (e)
                            print (lemma12)
                    else:
                        #print ("not +!")
                        pass
            #else if it is "do,have,be"
            if (parse_token in dohavewill and handled == False):
                #if next token is n't
                if parse_next_token == "n’t":
                    #merge e.g. do n't -> don't
                    predict_label = parse_token + "n’t"
                    #pos 
                    if(predict_label in md_double):
                        #double node
                        #first node
                        predict_an_from = parse_an_from
                        predict_an_to = parse_an_to
                        predict_id = idx
                        predict_pos = 'MD'
                        predict_frame = 'neg:e-h'
                        predicted_node_info_list.append((predict_an_from,predict_an_to,predict_id,predict_label,predict_pos,predict_frame,ignore))
                        #second node
                        predict_an_from = parse_next_an_from
                        predict_an_to = parse_next_an_to
                        predict_id = idx+1
                        if predict_label in dm_doublenot_label2pos2_cnt:
                            predict_pos = max(dm_doublenot_label2pos2_cnt[predict_label], key=dm_doublenot_label2pos2_cnt[predict_label].get)
                        #else
                        else:
                            #named_<any>
                            predict_pos = 'VB'
                        predict_frame = 'neg:e-h'
                        predicted_node_info_list.append((predict_an_from,predict_an_to,predict_id,predict_label,predict_pos,predict_frame,ignore))

                    else:
                        #single node (add dummy node for first parse token)
                        predict_an_from = parse_an_from
                        predict_an_to = parse_an_to
                        predict_id = idx
                        predict_pos = 'N/A'
                        predict_frame = 'N/A'
                        predicted_node_info_list.append((predict_an_from,predict_an_to,predict_id,predict_label,predict_pos,predict_frame,True))
                    
                        predict_an_from = parse_next_an_from
                        predict_an_to = parse_next_an_to
                        predict_id = idx+1
                        if predict_label in dm_singlenot_label2frame_cnt:
                            predict_pos = max(dm_singlenot_label2pos_cnt[predict_label], key=dm_singlenot_label2pos_cnt[predict_label].get)
                        else:
                            predict_pos = 'RB'
                        predict_frame = 'neg:e-h'
                        predicted_node_info_list.append((predict_an_from,predict_an_to,predict_id,predict_label,predict_pos,predict_frame,ignore))
                    #skip next lemma
                    look_aheaded_idx.append(idx+1)
                    handled = True
                else:
                    predict_an_from = parse_an_from
                    predict_an_to = parse_an_to
                    predict_id = idx
                    predict_pos = parse_pos
                    predict_label = parse_token
                    if predict_label == 'had' or predict_label == 'has':
                        predict_label = 'have'
                    if predict_label == 'did' or predict_label == 'does':
                        predict_label = 'do'
                    #if appeared in training
                    if predict_label in dm_label2frame_cnt:
                        #use most frequent frame
                        predict_frame = max(dm_label2frame_cnt[predict_label], key=dm_label2frame_cnt[predict_label].get)
                    else:
                        predict_frame = "unknown"
                        print (parse_node)
                        print ("not found?!")
                    predicted_node_info_list.append((predict_an_from,predict_an_to,predict_id,predict_label,predict_pos,predict_frame,ignore))
                    handled = True
            #else if it is verb
            if parse_sim_pos == 'VERB' and handled == False:
                predict_an_from = parse_an_from
                predict_an_to = parse_an_to
                predict_id = idx
                predict_pos = parse_pos
                #if found in training
                if parse_token in dm_label2frame_cnt:
                    predict_label = parse_token
                    predict_frame = max(dm_label2frame_cnt[predict_label], key=dm_label2frame_cnt[predict_label].get)
                    predicted_node_info_list.append((predict_an_from,predict_an_to,predict_id,predict_label,predict_pos,predict_frame,ignore))
                    handled = True
                #if found in erg
                elif parse_token in surface_dmlemma2frame:
                    predict_label = parse_token
                    predict_frame =  surface_dmlemma2frame[parse_token][0]
                    predicted_node_info_list.append((predict_an_from,predict_an_to,predict_id,predict_label,predict_pos,predict_frame,ignore))
                    handled = True
                #else use lemma
                #frame = most frequent one if appear in training
                    #else frame = retrieve from erg (any)
                elif parse_lemma in dm_label2frame_cnt:
                    predict_label = parse_lemma
                    predict_frame = max(dm_label2frame_cnt[predict_label], key=dm_label2frame_cnt[predict_label].get)
                    predicted_node_info_list.append((predict_an_from,predict_an_to,predict_id,predict_label,predict_pos,predict_frame,ignore))
                    handled = True
                elif parse_lemma in surface_dmlemma2frame:
                    predict_label = parse_lemma
                    predict_frame =  surface_dmlemma2frame[predict_label][0]
                    predicted_node_info_list.append((predict_an_from,predict_an_to,predict_id,predict_label,predict_pos,predict_frame,ignore))
                    handled = True
                else:
                    print ("error2")
                    #print (parse_node)
            #if it is adv
            if parse_sim_pos == 'ADV' and parse_token[-2:]=='ly' and handled == False:
                predict_an_from = parse_an_from
                predict_an_to = parse_an_to
                predict_id = idx
                predict_pos = parse_pos
                predict_label = parse_token
                #if found in training
                if parse_token in dm_label2frame_cnt:
                    predict_frame = max(dm_label2frame_cnt[predict_label], key=dm_label2frame_cnt[predict_label].get)
                    predicted_node_info_list.append((predict_an_from,predict_an_to,predict_id,predict_label,predict_pos,predict_frame,ignore))
                    handled = True
                #if found in erg
                elif parse_token in surface_dmlemma2frame:
                    predict_frame =  surface_dmlemma2frame[parse_token][0]
                    predicted_node_info_list.append((predict_an_from,predict_an_to,predict_id,predict_label,predict_pos,predict_frame,ignore))
                    handled = True
                #change to adj and find in erg
                elif(parse_token[:-2] in surface_dmlemma2frame):
                    predict_label = parse_token[:-2]
                    predict_frame = surface_dmlemma2frame[predict_label][0]
                    predicted_node_info_list.append((predict_an_from,predict_an_to,predict_id,predict_label,predict_pos,predict_frame,ignore))
                    handled = True
                else:
                    print ("error3")
                    #print (node)
            #else
            if handled == False:
                predict_an_from = parse_an_from
                predict_an_to = parse_an_to
                predict_id = idx
                predict_pos = parse_pos
                predict_label = parse_token
                #if appeared in training
                if predict_label in dm_label2frame_cnt:
                    #use most frequent frame
                    predict_frame = max(dm_label2frame_cnt[predict_label], key=dm_label2frame_cnt[predict_label].get)
                    predicted_node_info_list.append((predict_an_from,predict_an_to,predict_id,predict_label,predict_pos,predict_frame,ignore))
                elif predict_label in surface_dmlemma2frame:
                    predict_frame =  surface_dmlemma2frame[predict_label][0]
                    predicted_node_info_list.append((predict_an_from,predict_an_to,predict_id,predict_label,predict_pos,predict_frame,ignore))
                #search with lemma
                elif parse_lemma in dm_label2frame_cnt:
                    #use most frequent frame
                    if (parse_sim_pos == 'NOUN'):
                        predict_label = parse_lemma
                    predict_frame = max(dm_label2frame_cnt[parse_lemma], key=dm_label2frame_cnt[parse_lemma].get)
                    predicted_node_info_list.append((predict_an_from,predict_an_to,predict_id,predict_label,predict_pos,predict_frame,ignore))
                elif parse_lemma in surface_dmlemma2frame:
                    if (parse_sim_pos == 'NOUN'):
                        predict_label = parse_lemma
                    predict_frame =  surface_dmlemma2frame[parse_lemma][0]
                    predicted_node_info_list.append((predict_an_from,predict_an_to,predict_id,predict_label,predict_pos,predict_frame,ignore))
                #else
                else:
                    #ignore punctuation (add dummy node)
                    if (not parse_sim_pos == "PUNCT"):
                        predict_frame = 'n:x'
                        #break hyphen
                        if ("-" in parse_token):
                            hyphen = re.search(r"(\w+)-(\w+)", parse_token)
                            try:
                                suffix_token = hyphen.group(2)
                                predict_label = suffix_token
                                if predict_label in dm_label2frame_cnt:
                                    #use most frequent frame
                                    predict_frame = max(dm_label2frame_cnt[predict_label], key=dm_label2frame_cnt[predict_label].get)
                                elif predict_label in surface_dmlemma2frame:
                                    predict_frame =  surface_dmlemma2frame[predict_label][0]
                            except:
                                pass
                        else:
                            #use any
                            predict_frame = 'n:x'
                            #print ("error4")
                            #print (parse_node)
                        predicted_node_info_list.append((predict_an_from,predict_an_to,predict_id,predict_label,predict_pos,predict_frame,ignore))
                        handled = True
                    else:
                        predict_an_from = parse_an_from
                        predict_an_to = parse_an_to
                        predict_id = idx
                        predict_pos = '(PUNCT)'
                        predict_frame = 'N/A'
                        predicted_node_info_list.append((predict_an_from,predict_an_to,predict_id,predict_label,predict_pos,predict_frame,True))
                    
            #print ("b:",predicted_node_info_list)
            if predicted_node_info_list != []:
                for (predict_an_from,predict_an_to,predict_id,predict_label,predict_pos,predict_frame,ignore) in predicted_node_info_list:
                    predicted_nodes_mrp.append(
                        {'anchors': [{'from': int(predict_an_from), 'to': int(predict_an_to)}],
                         'id': predict_id,
                         'label': predict_label,
                         'properties': ['pos', 'frame'],
                         'values': [predict_pos, predict_frame],
                         'ignore': ignore})
        #pprint (predicted_nodes_mrp)
        #print ("\n")
        return predicted_nodes_mrp
    
    elif framework == 'psd':
        [psd_label2frame_cnt,
         psd_cmp2anposframe,
         psd_token2abstract_cnt,
         psd_tokenlemma2label] = psd_info
        look_aheaded_idx = []
        predicted_nodes_mrp = []
        parse_an2node = dict()
        for idx,parse_node in enumerate(parse):
            #print (idx,parse_node)
            ignore = False
            if idx in look_aheaded_idx:
                #print ("skip!")
                continue
            parse_token = parse_node[1].lower()
            parse_lemma = parse_node[2].lower()
            parse_sim_pos = parse_node[3]
            parse_pos = parse_node[4]
            parse_an = re.search(r"(\d+):(\d+)", parse_node[-1])
            parse_an_from = parse_an.group(1)
            parse_an_to = parse_an.group(2)
            parse_an2node[":".join([parse_an_from,parse_an_to])] = (parse_token,parse_lemma,parse_pos)
            #print (parse_an2node)
            #print ("\n")
            try:
                parse_next_token = parse[idx+1][1]
                parse_next_lemma = parse[idx+1][2].lower()
                parse_next_an = re.search(r"(\d+):(\d+)", parse[idx+1][-1])
                parse_next_an_from = parse_next_an.group(1)
                parse_next_an_to = parse_next_an.group(2)
                parse_next_pos = parse[idx+1][3]
            except:
                parse_next_token = None
#                 parse_next_an = re.search(r"(\d+):(\d+)", parse[idx+1][-1])
#                 parse_next_an_from = parse_next_an.group(1)
#                 parse_next_an_to = parse_next_an.group(2)
            try:
                parse_next2_token = parse[idx+2][1]
                parse_next2_lemma = parse[idx+2][2].lower()
                parse_next2_an = re.search(r"(\d+):(\d+)", parse[idx+2][-1])
                parse_next2_an_from = parse_next2_an.group(1)
                parse_next2_an_to = parse_next2_an.group(2)
                parse_next2_pos = parse[idx+2][3]
            except:
                parse_next2_token = None
#                 parse_next_an = re.search(r"(\d+):(\d+)", parse[idx+1][-1])
#                 parse_next_an_from = parse_next_an.group(1)
#                 parse_next_an_to = parse_next_an.group(2)
            
            predicted_node_info_list = []
            handled = False
            
            #if lemma1,2,3, then lemma1,2 (in lemma form) if appear in erg compound list
                 #if yes
                    #create node
                        #pos1 = pos1, frame = that frame
                    #skip next(1/2) lemma
            if (parse_next2_token!=None and handled == False):
                #lemma123
                lemma123 = "_".join([parse_lemma,parse_next_lemma,parse_next2_lemma])
                predict_label = lemma123
                if lemma123 in psd_cmp2anposframe:
                    pos_choice = {anposframe[4:]:psd_cmp2anposframe[lemma123][anposframe] for anposframe in psd_cmp2anposframe[lemma123] if anposframe.startswith("POS")}
                    frame_choice = {anposframe[6:]:psd_cmp2anposframe[lemma123][anposframe] for anposframe in psd_cmp2anposframe[lemma123] if anposframe.startswith("FRAME")}
                    an_choice = {anposframe[3:]:psd_cmp2anposframe[lemma123][anposframe] for anposframe in psd_cmp2anposframe[lemma123] if anposframe.startswith("AN")}
                    predict_pos = max(pos_choice, key=pos_choice.get)
                    if frame_choice:
                        predict_frame = max(frame_choice, key=frame_choice.get)
                    else:
                        predict_frame = None
                    predict_an = max(an_choice, key=an_choice.get)
                    #process an
                    ignore1 = True
                    ignore2 = True
                    ignore3 = True
                    if predict_an==parse_lemma:
                        predict_id = idx
                        predict_an_from = parse_an_from
                        predict_an_to = parse_an_to
                        ignore1 = False
                    elif predict_an==parse_next_lemma:
                        predict_id = idx+1
                        predict_an_from = parse_next_an_from
                        predict_an_to = parse_next_an_to
                        ignore2 = False
                    elif predict_an==parse_next2_lemma:
                        predict_id = idx+2
                        predict_an_from = parse_next2_an_from
                        predict_an_to = parse_next2_an_to
                        ignore3 = False
                    predicted_node_info_list.append((parse_an_from,parse_an_to,idx,predict_label,predict_pos,predict_frame,ignore1))
                    predicted_node_info_list.append((parse_next_an_from,parse_next_an_to,idx+1,predict_label,predict_pos,predict_frame,ignore2))
                    predicted_node_info_list.append((parse_next2_an_from,parse_next2_an_to,idx+2,predict_label,predict_pos,predict_frame,ignore3))
                    look_aheaded_idx.append(idx+1)
                    look_aheaded_idx.append(idx+2)
                    handled = True
                else:
                    if(parse_next_token!=None):
                        #lemma12
                        lemma12 = "_".join([parse_lemma,parse_next_lemma])
                        predict_label = lemma12
                        if lemma12 in psd_cmp2anposframe:
                            pos_choice = {anposframe[4:]:psd_cmp2anposframe[lemma12][anposframe] for anposframe in psd_cmp2anposframe[lemma12] if anposframe.startswith("POS")}
                            frame_choice = {anposframe[6:]:psd_cmp2anposframe[lemma12][anposframe] for anposframe in psd_cmp2anposframe[lemma12] if anposframe.startswith("FRAME")}
                            an_choice = {anposframe[3:]:psd_cmp2anposframe[lemma12][anposframe] for anposframe in psd_cmp2anposframe[lemma12] if anposframe.startswith("AN")}
                            predict_pos = max(pos_choice, key=pos_choice.get)
                            if frame_choice:
                                predict_frame = max(frame_choice, key=frame_choice.get)
                            else:
                                predict_frame = None
                            predict_an = max(an_choice, key=an_choice.get)
                            #process anchor
                            ignore1 = True
                            ignore2 = True
                            if predict_an==parse_lemma:
                                predict_id = idx
                                predict_an_from = parse_an_from
                                predict_an_to = parse_an_to
                                ignore1 = False
                            elif predict_an==parse_next_lemma:
                                predict_id = idx+1
                                predict_an_from = parse_next_an_from
                                predict_an_to = parse_next_an_to
                                ignore2 = False
                            predicted_node_info_list.append((parse_an_from,parse_an_to,idx,predict_label,predict_pos,predict_frame,ignore1))
                            predicted_node_info_list.append((parse_next_an_from,parse_next_an_to,idx+1,predict_label,predict_pos,predict_frame,ignore2))
                            look_aheaded_idx.append(idx+1)
                            handled = True
                    else:
                        #print ("not +!")
                        pass
            #if abstract
            if (handled == False):
                if(parse_token+","+parse_pos in psd_token2abstract_cnt):
                    predict_id = idx
                    predict_label,predict_pos = max(psd_token2abstract_cnt[parse_token+","+parse_pos], key=psd_token2abstract_cnt[parse_token+","+parse_pos].get).split(",",1)
#                     predict_label_predict_pos
                    predict_an_from = parse_an_from
                    predict_an_to = parse_an_to
                    predict_frame = None
                    predicted_node_info_list.append((predict_an_from,predict_an_to,predict_id,predict_label,predict_pos,predict_frame,ignore))
                    handled = True
            #if appeared in 
            if (handled == False):
                predict_id = idx
                predict_pos = parse_pos
                predict_an_from = parse_an_from
                predict_an_to = parse_an_to
                if(parse_token+","+parse_lemma in psd_tokenlemma2label):
                    predict_label = psd_tokenlemma2label[parse_token+","+parse_lemma]
                    if predict_label in psd_label2frame_cnt:
                        predict_frame = max(psd_label2frame_cnt[predict_label], key=psd_label2frame_cnt[predict_label].get)
                    else:
                        predict_frame = None

                    #lemma as label
                else:
                    predict_label = parse_lemma
                    if predict_label in psd_label2frame_cnt:
                        predict_frame = max(psd_label2frame_cnt[predict_label], key=psd_label2frame_cnt[predict_label].get)
                    else:
                        predict_frame = None
                predicted_node_info_list.append((predict_an_from,predict_an_to,predict_id,predict_label,predict_pos,predict_frame,ignore))
            
            if predicted_node_info_list != []:
                #print (predicted_node_info_list)
                for (predict_an_from,predict_an_to,predict_id,predict_label,predict_pos,predict_frame,ignore) in predicted_node_info_list:
                    if predict_frame!=None:
                        predicted_nodes_mrp.append(
                            {'anchors': [{'from': int(predict_an_from), 'to': int(predict_an_to)}],
                             'id': predict_id,
                             'label': predict_label,
                             'properties': ['pos', 'frame'],
                             'values': [predict_pos, predict_frame],
                             'ignore': ignore
                            })
                    else:
                        predicted_nodes_mrp.append(
                            {'anchors': [{'from': int(predict_an_from), 'to': int(predict_an_to)}],
                             'id': predict_id,
                             'label': predict_label,
                             'properties': ['pos'],
                             'values': [predict_pos],
                             'ignore': ignore
                            })
        #pprint (predicted_nodes_mrp)
        #print ("\n")
        return predicted_nodes_mrp
                
                
                    

### Benchmarking of node generation on training data

In [None]:
#compare gen against train for DM
total_num_nodes = 0
total_num_cor_nodes = 0
total_num_cor_an = 0
for data_id in framework2id2mrp_jsons['dm']:
    true_nodes = framework2id2mrp_jsons['dm'][data_id]['nodes']
    gen_nodes = gen_mapping('dm',framework2cid2parse['dm'][data_id],[dm_label2frame_cnt,
            dm_singlenot_label2pos_cnt,dm_doublenot_label2pos1_cnt,dm_doublenot_label2pos2_cnt,
            dm_singlenot_label2frame_cnt,dm_doublenot_label2frame1_cnt,dm_doublenot_label2frame2_cnt,
                                                            surface_dmlemma2frame,abstract_frame],
                            [psd_label2frame_cnt,
                                                             psd_cmp2anposframe,
                                                             psd_token2abstract_cnt,
                                                             psd_tokenlemma2label])
    
    num_nodes = 0
    num_cor_nodes = 0
    num_cor_an = 0
    for idx,mrp_node in enumerate(true_nodes):
        num_nodes += 1
        for idx2,dm_node in enumerate(gen_nodes):
            all_cor = False
            an_cor = False
            if mrp_node['anchors'][0]['from'] == dm_node['anchors'][0]['from'] and mrp_node['anchors'][0]['to'] == dm_node['anchors'][0]['to']:
                all_cor = True
                if (dm_node['ignore']==False):
                    num_cor_an += 1
                for key in mrp_node:
                    if (key != 'anchors' and key != 'id'):
                        if mrp_node[key] == dm_node[key]:
                            pass
                        else:
                            all_cor = False
                            #print ("anchor right but sth wrong")
                            #pprint (mrp_node)
                            #pprint (dm_node)
                            break
            else:
                an_cor = False
                for key in mrp_node:
                    if (key != 'anchors'and key != 'id'):
                        if mrp_node[key] == dm_node[key]:
                            pass
                        else:
                            all_cor = False
                            #print ("anchor wrong and sth also wrong")
                            break
            if all_cor == True:
                num_cor_nodes += 1

    if (num_nodes==0):
        print (gen_nodes,true_nodes)
    else:
        if num_cor_nodes/num_nodes==0.0:
            #print (framework2id2mrp_jsons['dm'][data_id]['input'])
            #pprint(true_nodes)
            #pprint (gen_nodes)
            #print ("\n")
            pass
            
    total_num_nodes += num_nodes
    total_num_cor_nodes += num_cor_nodes    
    total_num_cor_an += num_cor_an
    #print(num_cor_nodes/num_nodes)
print(total_num_cor_nodes/total_num_nodes)                
print(total_num_cor_an/total_num_nodes)



In [None]:
#compare gen against train for PSD
framework_cmp = 'psd'
total_num_nodes = 0
total_num_cor_nodes = 0
total_num_cor_an = 0
for data_id in framework2id2mrp_jsons[framework_cmp]:
    true_nodes = framework2id2mrp_jsons[framework_cmp][data_id]['nodes']
    gen_nodes = gen_mapping(framework_cmp,framework2cid2parse[framework_cmp][data_id],[dm_label2frame_cnt,
            dm_singlenot_label2pos_cnt,dm_doublenot_label2pos1_cnt,dm_doublenot_label2pos2_cnt,
            dm_singlenot_label2frame_cnt,dm_doublenot_label2frame1_cnt,dm_doublenot_label2frame2_cnt,
                                                            surface_dmlemma2frame,abstract_frame],
                            [psd_label2frame_cnt,
                                                             psd_cmp2anposframe,
                                                             psd_token2abstract_cnt,
                                                             psd_tokenlemma2label])
    
    num_nodes = 0
    num_cor_nodes = 0
    num_cor_an = 0
    for idx,mrp_node in enumerate(true_nodes):
        num_nodes += 1
        for idx2,dm_node in enumerate(gen_nodes):
            all_cor = False
            an_cor = False
            if mrp_node['anchors'][0]['from'] == dm_node['anchors'][0]['from'] and mrp_node['anchors'][0]['to'] == dm_node['anchors'][0]['to']:
                all_cor = True
                if (dm_node['ignore']==False):
                    num_cor_an += 1
                for key in mrp_node:
                    if (key != 'anchors' and key != 'id'):
                        if mrp_node[key] == dm_node[key]:
                            pass
                        else:
                            all_cor = False
                            #print ("anchor right but sth wrong")
                            #pprint (mrp_node)
                            #pprint (dm_node)
                            break
            else:
                an_cor = False
                for key in mrp_node:
                    if (key != 'anchors'and key != 'id'):
                        if mrp_node[key] == dm_node[key]:
                            pass
                        else:
                            all_cor = False
                            #print ("anchor wrong and sth also wrong")
                            break
            if all_cor == True:
                num_cor_nodes += 1

    if (num_nodes==0):
        print (gen_nodes,true_nodes)
    else:
        if num_cor_nodes/num_nodes==0.0:
            #print (framework2id2mrp_jsons['dm'][data_id]['input'])
            #pprint(true_nodes)
            #pprint (gen_nodes)
            #print ("\n")
            pass
            
    total_num_nodes += num_nodes
    total_num_cor_nodes += num_cor_nodes    
    total_num_cor_an += num_cor_an
    #print(num_cor_nodes/num_nodes)
print(total_num_cor_nodes/total_num_nodes)                
print(total_num_cor_an/total_num_nodes)



### Predict training (dm, psd, eds)/evaluation (dm, psd) data node mapping and export as json

In [None]:
train_dm_id2predict_nodes = dict()
for data_id in framework2cid2parse['dm']:
    predict_nodes = gen_mapping('dm',framework2cid2parse['dm'][data_id],[dm_label2frame_cnt,
            dm_singlenot_label2pos_cnt,dm_doublenot_label2pos1_cnt,dm_doublenot_label2pos2_cnt,
            dm_singlenot_label2frame_cnt,dm_doublenot_label2frame1_cnt,dm_doublenot_label2frame2_cnt,
                                                            surface_dmlemma2frame,abstract_frame],[])
    rep = [(node['anchors'][0]['from'],node['anchors'][0]['to']) for node in predict_nodes if node['ignore']==False]
    if len(rep)>len(set(rep)):
        pprint (predict_nodes)
        pprint (rep)
        pprint (set(rep))
        break
    train_dm_id2predict_nodes[data_id] = predict_nodes
with open("../nodes_prediction/train_dm_id2predict_nodes.json", 'w') as json_file:
    json.dump(train_dm_id2predict_nodes, json_file)
    #pprint (predict_nodes)
    #break

In [None]:
len(train_dm_id2predict_nodes)

In [None]:
eval_dm_id2predict_nodes = dict()
for eval_id in eval_framework2id2data['dm']:
    predict_nodes = gen_mapping('dm',eval_id2parse[eval_id],[dm_label2frame_cnt,
            dm_singlenot_label2pos_cnt,dm_doublenot_label2pos1_cnt,dm_doublenot_label2pos2_cnt,
            dm_singlenot_label2frame_cnt,dm_doublenot_label2frame1_cnt,dm_doublenot_label2frame2_cnt,
                                                            surface_dmlemma2frame,abstract_frame],[])
    eval_dm_id2predict_nodes[eval_id] = predict_nodes
with open("../nodes_prediction/eval_dm_id2predict_nodes.json", 'w') as json_file:
    json.dump(eval_dm_id2predict_nodes, json_file)

    #pprint (predict_nodes)
    #break

In [None]:
train_psd_id2predict_nodes = dict()
for data_id in framework2cid2parse['psd']:
    predict_nodes = gen_mapping('psd',framework2cid2parse['psd'][data_id],[dm_label2frame_cnt,
            dm_singlenot_label2pos_cnt,dm_doublenot_label2pos1_cnt,dm_doublenot_label2pos2_cnt,
            dm_singlenot_label2frame_cnt,dm_doublenot_label2frame1_cnt,dm_doublenot_label2frame2_cnt,
                                                            surface_dmlemma2frame,abstract_frame],
                                [psd_label2frame_cnt,
                                                             psd_cmp2anposframe,
                                                             psd_token2abstract_cnt,
                                                             psd_tokenlemma2label])
    train_psd_id2predict_nodes[data_id] = predict_nodes
with open("../nodes_prediction/train_psd_id2predict_nodes.json", 'w') as json_file:
    json.dump(train_psd_id2predict_nodes, json_file)
    #pprint (predict_nodes)
    #break

In [None]:
eval_psd_id2predict_nodes = dict()
for eval_id in eval_framework2id2data['psd']:
    predict_nodes = gen_mapping('psd',eval_id2parse[eval_id],[dm_label2frame_cnt,
            dm_singlenot_label2pos_cnt,dm_doublenot_label2pos1_cnt,dm_doublenot_label2pos2_cnt,
            dm_singlenot_label2frame_cnt,dm_doublenot_label2frame1_cnt,dm_doublenot_label2frame2_cnt,
                                                            surface_dmlemma2frame,abstract_frame],
                                [psd_label2frame_cnt,
                                                             psd_cmp2anposframe,
                                                             psd_token2abstract_cnt,
                                                             psd_tokenlemma2label])
    eval_psd_id2predict_nodes[eval_id] = predict_nodes
with open("../nodes_prediction/eval_psd_id2predict_nodes.json", 'w') as json_file:
    json.dump(eval_psd_id2predict_nodes, json_file)

    #pprint (predict_nodes)
    #break

In [680]:
with open("../nodes_prediction/train_eds_id2predict_nodes.json", 'w') as json_file:
    json.dump(eds_star_mrp_jsons_usable, json_file)
    

### Some stat counting for eds

In [None]:
def count_eds_stat():
    concept_label_set = set()
    abstract_label_set = set()
    node2outEdge_cnt = dict()
    node2inEdge_cnt = dict()
    eds_lemmapos2surface_cnt = dict()
    eds_token2decor_cnt = dict()
    eds_token2concept_cnt = dict()
    eds_abstract2noedge_cnt = dict()
    eds_abscon_node_set = set()
    eds_decor_cnt = dict()
    eds_q_cnt = dict()
    eds_label2abstracttwobutone_cnt = dict()
    eds_label2abstractone_cnt = dict()
    cnt = 0
    cnt_wrong_label = 0
    cnt_wrong_pos = 0
    cnt_total_pos = 0
    cnt_total_label = 0
    eds_num_nodes = 0
    f = 'eds'
    f2i2m = framework2id2mrp_jsons
    f2c2p = framework2cid2parse

    #retrieve _surface, abstracted and decor and gen id2eds_star_mrp_jsons
    id2eds_star_mrp_jsons = dict()
    for key in tqdm(f2i2m[f]):
        #print ()
        #print (key)
        parse = f2c2p[f][key]
        parse_an2node = dict()
        for parse_node in parse:
            parse_id = parse_node[0]
            parse_token = parse_node[1]
            parse_lemma = parse_node[2]
            parse_sim_pos = parse_node[3]
            parse_pos = parse_node[4]
            parse_an = re.search(r"(\d+):(\d+)", parse_node[-1])
            parse_an_from = parse_an.group(1)
            parse_an_to = parse_an.group(2)
            parse_an2node[":".join([parse_an_from,parse_an_to])] = (parse_token,parse_lemma,parse_sim_pos,parse_pos,parse_id)

        mrp_text = f2i2m[f][key]['input']
        mrp_nodes = f2i2m[f][key]['nodes']
        mrp_id2nodes = {node['id']: node for node in mrp_nodes}
        mrp_edges = f2i2m[f][key]['edges']
        mrp_nodeid2type = dict()
        #record edges of each node
#             mrp_node2outedges = dict()
#             mrp_node2inedges = dict()
        mrp_node2outnodes = {node['id']: set() for node in mrp_nodes}
        mrp_node2innodes = {node['id']: set() for node in mrp_nodes}
        for edge in mrp_edges:
            edge_label,edge_source,edge_target = edge['label'],edge['source'],edge['target']
#                 if not edge_source in mrp_node2outedges:
#                     mrp_node2outedges[edge_source] = dict()
#                 mrp_node2outedges[edge_source][edge_label] = edge_target
#                 if not edge_target in mrp_node2inedges:
#                     mrp_node2inedges[edge_target] = dict()
#                 mrp_node2inedges[edge_target][edge_label] = edge_source
            if not edge_source in mrp_node2outnodes:
                mrp_node2outnodes[edge_source] = set()
            mrp_node2outnodes[edge_source].add(edge_target)
            if not edge_target in mrp_node2innodes:
                mrp_node2innodes[edge_target] = set()
            mrp_node2innodes[edge_target].add(edge_source)

#             #iterate through node
#             while (len(mrp_nodeid2type) < len(mrp_nodes)):
        for mrp_id in mrp_id2nodes:
            eds_num_nodes += 1
            mrp_node = mrp_id2nodes[mrp_id]
            mrp_an_from = str(mrp_node['anchors'][0]['from'])
            mrp_an_to = str(mrp_node['anchors'][0]['to'])
            mrp_label = mrp_node['label'].lower()
            mrp_token = mrp_text[int(mrp_an_from):int(mrp_an_to)].lower()

            try:
                mrp_pos = mrp_node['values'][0]
            except:
                mrp_pos = None

            #Classify node type
            try:
                parse_info = parse_an2node[mrp_an_from+":"+mrp_an_to]
                #print (mrp_label)
                #surface
                if (mrp_label[0]=='_'):
                    #mrp_nodeid2type[mrp_id] = "surface"
                    if not parse_info[1]+"||"+parse_info[3] in eds_lemmapos2surface_cnt:
                        eds_lemmapos2surface_cnt[parse_info[1]+"||"+parse_info[3]] = dict()
                    if not mrp_label in eds_lemmapos2surface_cnt[parse_info[1]+"||"+parse_info[3]]:
                        eds_lemmapos2surface_cnt[parse_info[1]+"||"+parse_info[3]][mrp_label] = 0
                    eds_lemmapos2surface_cnt[parse_info[1]+"||"+parse_info[3]][mrp_label] += 1
                    #print (eds_lemmapos2surface_cnt)
                #decor
                elif (mrp_label.endswith('_q')):
                    #mrp_nodeid2type[mrp_id] = "decor"
                    if not parse_info[3] in eds_token2decor_cnt:
                        eds_token2decor_cnt[parse_info[3]] = dict()
                    if not mrp_label in eds_token2decor_cnt[parse_info[3]]:
                        eds_token2decor_cnt[parse_info[3]][mrp_label] = 0
                    eds_token2decor_cnt[parse_info[3]][mrp_label] += 1
                #concept
                elif mrp_label in concept_label_set:
                    if not parse_info[0] in eds_token2concept_cnt:
                        eds_token2concept_cnt[parse_info[0]] = dict()
                    if not mrp_label in eds_token2concept_cnt[parse_info[0]]:
                        eds_token2concept_cnt[parse_info[0]][mrp_label] = 0
                    eds_token2concept_cnt[parse_info[0]][mrp_label] += 1
                
                else:
                    pass
                    
            except:
                #print (mrp_an_from+":"+mrp_an_to)
                pass
            
    return eds_lemmapos2surface_cnt,eds_token2decor_cnt,eds_token2concept_cnt
                
                #mrp_nodeid2type[mrp_id] = "other"
                #eds_abscon_node_set.add(mrp_label)

#                 if mrp_label.endswith('_q'):
#                     if not mrp_label in eds_q_cnt:
#                         eds_q_cnt[mrp_label] = 0
#                     eds_q_cnt[mrp_label] += 1
#                 #eds_decor_cnt
#                 elif not mrp_label.startswith('_'):
#                     if not mrp_label in eds_decor_cnt:
#                         eds_decor_cnt[mrp_label] = 0
#                     eds_decor_cnt[mrp_label] += 1
#                 #eds_token2decor_cnt
#                 if not mrp_token in eds_token2decor_cnt:
#                     eds_token2decor_cnt[mrp_token] = 0
        

eds_lemmapos2surface_cnt,eds_token2decor_cnt,eds_token2concept_cnt = count_eds_stat()