In [1]:
try:
    __IPYTHON__
    USING_IPYTHON = True
except NameError:
    USING_IPYTHON = False

#### Argparse

In [2]:
import argparse
ap = argparse.ArgumentParser()
ap.add_argument('mrp_data_dir', help='')
ap.add_argument('--train-sub-dir', default='training', help='')
ap.add_argument('--graphviz-dir', default='../graphviz', help='')
ap.add_argument('--companion-sub-dir', default='../mrp-companion/2019/companion')
ap.add_argument('--mrp-file-extension', default='.mrp')
ap.add_argument('--companion-file-extension', default='.conllu')
arg_string = """
    ../data/
"""
arguments = [arg for arg_line in arg_string.split(r'\\n') for arg in arg_line.split()]

In [3]:
if USING_IPYTHON:
    args = ap.parse_args(arguments)
else:
    args = ap.parse_args()

In [4]:
args

Namespace(companion_file_extension='.conllu', companion_sub_dir='../mrp-companion/2019/companion', graphviz_dir='../graphviz', mrp_data_dir='../data/', mrp_file_extension='.mrp', train_sub_dir='training')

#### Library imports

In [5]:
import json
import logging
import os

from PIL import Image
from matplotlib.pyplot import figure
from networkx.drawing.nx_agraph import to_agraph
from tqdm import tqdm
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import plotly.graph_objs as go
from pprint import pprint
import string

#### ipython notebook specific imports

In [6]:
if USING_IPYTHON:
    # matplotlib config
    %matplotlib inline
    
    # ipython notebook plotly config
    from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
    init_notebook_mode(connected=True)
else:
    from plotly.plotly import plot, iplot

In [7]:
logging.basicConfig(level=logging.INFO, handlers=[logging.StreamHandler()])
logger = logging.getLogger(__name__)  # pylint: disable=invalid-name
logger.setLevel(logging.INFO)

### Constants

In [8]:
UNKWOWN = 'UNKWOWN'

### Load data

In [9]:
train_dir = os.path.join(args.mrp_data_dir, args.train_sub_dir)
frameworks = [sub_dir for sub_dir in os.listdir(train_dir) if os.path.isdir(os.path.join(train_dir, sub_dir))]
frameworks

['amr', 'eds', 'psd', 'ucca', 'dm']

In [10]:
framework2dataset2mrp_jsons = {}
framework2id2mrp_jsons = {}
#id2mrp_jsons = {}
for framework in tqdm(frameworks, desc='frameworks'):
    dataset2mrp_jsons = {}
    id2mrp_jsons_perFram = {}
    framework_dir = os.path.join(train_dir, framework)
    dataset_names = os.listdir(framework_dir)
    
    for dataset_name in tqdm(dataset_names, desc='dataset_name'):
        mrp_jsons = []
        if not dataset_name.endswith(args.mrp_file_extension):
            continue
        with open(os.path.join(framework_dir, dataset_name)) as rf:
            for line in rf:
                mrp_json = json.loads(line.strip())
                if framework == 'ucca' and 'nodes' in mrp_json and 'input' in mrp_json:
                    input_text = mrp_json['input']
                    nodes = mrp_json['nodes']
                    for i, node in enumerate(nodes):
                        if 'anchors' not in node:
                            continue
                        text_segments = []
                        for anchor in node['anchors']:
                            text_segments.append(input_text[anchor.get('from', -1): anchor.get('to', -1)])
                        mrp_json['nodes'][i]['label'] = ''.join(text_segments)
                        
                mrp_jsons.append(mrp_json)
                if mrp_json['id'].startswith("reviews-"):
                    id2mrp_jsons_perFram[mrp_json['id'][8:]] = mrp_json
                else:
                    id2mrp_jsons_perFram[mrp_json['id']] = mrp_json
                #id2mrp_jsons[mrp_json['id']] = mrp_json
        dataset_name = dataset_name.split('.')[0]
        dataset2mrp_jsons[dataset_name] = mrp_jsons
    framework2dataset2mrp_jsons[framework] = dataset2mrp_jsons
    framework2id2mrp_jsons[framework] = id2mrp_jsons_perFram

frameworks:   0%|          | 0/5 [00:00<?, ?it/s]
dataset_name:   0%|          | 0/14 [00:00<?, ?it/s][A
dataset_name:   7%|▋         | 1/14 [00:00<00:03,  3.91it/s][A
dataset_name:  36%|███▌      | 5/14 [00:00<00:01,  5.31it/s][A
dataset_name:  57%|█████▋    | 8/14 [00:00<00:00,  6.87it/s][A
dataset_name:  71%|███████▏  | 10/14 [00:00<00:00,  6.67it/s][A
dataset_name:  86%|████████▌ | 12/14 [00:02<00:00,  2.84it/s][A
frameworks:  20%|██        | 1/5 [00:02<00:10,  2.54s/it]t/s][A
dataset_name:   0%|          | 0/1 [00:00<?, ?it/s][A
dataset_name: 100%|██████████| 1/1 [00:05<00:00,  5.84s/it][A
frameworks:  40%|████      | 2/5 [00:08<00:10,  3.54s/it]
dataset_name:   0%|          | 0/1 [00:00<?, ?it/s][A
dataset_name: 100%|██████████| 1/1 [00:03<00:00,  3.79s/it][A
frameworks:  60%|██████    | 3/5 [00:12<00:07,  3.62s/it]
dataset_name:   0%|          | 0/2 [00:00<?, ?it/s][A
dataset_name:  50%|█████     | 1/2 [00:00<00:00,  5.15it/s][A
dataset_name: 100%|██████████| 2/2 [0

In [11]:
for framework in framework2dataset2mrp_jsons:
    logger.info(framework)
    logger.info(list(framework2dataset2mrp_jsons[framework].keys()))

INFO:__main__:amr
INFO:__main__:['lorelei', 'bolt', 'wiki', 'mt09sdl', 'rte', 'cctv', 'xinhua', 'amr-guidelines', 'proxy', 'wsj', 'dfb', 'dfa', 'fables', 'wb']
INFO:__main__:eds
INFO:__main__:['wsj']
INFO:__main__:psd
INFO:__main__:['wsj']
INFO:__main__:ucca
INFO:__main__:['wiki', 'ewt']
INFO:__main__:dm
INFO:__main__:['wsj']


### Load companion

In [12]:
dataset2cid2parse = {}
framework2cid2parse = {}
cid2parse = {}
for framework in os.listdir(args.companion_sub_dir):
    print (framework)
    framework_dir = os.path.join(args.companion_sub_dir, framework)
    if not os.path.isdir(framework_dir):
        continue
    cid2parse_perFram = {}
    for dataset in tqdm(os.listdir(framework_dir), desc='dataset'):
        if not dataset.endswith(args.companion_file_extension):
            continue
        dataset_name = dataset.split('.')[0].rstrip(string.digits)
        cid2parse_perDataset = {}
        print (dataset)
        with open(os.path.join(framework_dir, dataset)) as rf:
            parse = []
            for line in rf:
                line = line.strip()
                if not line:
                    cid2parse_perDataset[cid] = parse
                    cid2parse_perFram[cid] = parse
                    cid2parse[cid] = parse
                    parse = []
                    cid = ''
                elif line.startswith('#'):
                    cid = line[1:]
                else:
                    parse.append(line.split('\t'))
        dataset2cid2parse[dataset_name] = cid2parse_perDataset
    framework2cid2parse[framework] = cid2parse_perFram
    if framework=='dm':
        framework2cid2parse['psd'] = cid2parse_perFram
        framework2cid2parse['eds'] = cid2parse_perFram

dataset:  38%|███▊      | 5/13 [00:00<00:00, 48.03it/s]

amr
rte.conllu
fables.conllu
amr-guidelines.conllu
cctv.conllu
wb.conllu
dfa.conllu


dataset:  54%|█████▍    | 7/13 [00:00<00:00, 19.63it/s]

wiki.conllu
bolt.conllu
xinhua.conllu
dfb.conllu


dataset:  85%|████████▍ | 11/13 [00:01<00:00,  6.96it/s]

proxy.conllu
mt09sdl.conllu


dataset: 100%|██████████| 13/13 [00:01<00:00,  8.05it/s]
dataset:   0%|          | 0/6 [00:00<?, ?it/s]

lorelei.conllu
.DS_Store
ucca
ewt04.conllu
ewt00.conllu
ewt02.conllu
wiki.conllu


dataset: 100%|██████████| 6/6 [00:01<00:00,  3.62it/s]
dataset:   0%|          | 0/5 [00:00<?, ?it/s]

ewt01.conllu
ewt03.conllu
Makefile
udpipe.mrp
jamr.mrp
dm
wsj04.conllu


dataset:  20%|██        | 1/5 [00:00<00:01,  3.87it/s]

wsj02.conllu


dataset:  40%|████      | 2/5 [00:00<00:00,  3.85it/s]

wsj00.conllu


dataset:  60%|██████    | 3/5 [00:00<00:00,  3.88it/s]

wsj03.conllu


dataset:  80%|████████  | 4/5 [00:01<00:00,  3.87it/s]

wsj01.conllu


dataset: 100%|██████████| 5/5 [00:01<00:00,  3.87it/s]

README.txt
isi.mrp





In [13]:
framework2dataset2mrp_jsons.keys()

dict_keys(['amr', 'eds', 'psd', 'ucca', 'dm'])

In [14]:
dataset = 'wsj'
framework = 'dm'
mrp_index = 128
parse = None
while not parse:
    mrp_index += 1
    cid = framework2dataset2mrp_jsons[framework][dataset][mrp_index]['id']
    parse = dataset2cid2parse[dataset].get(cid)
    
(mrp_index, ' '.join([word_record[1] for word_record in parse]), parse)

(129,
 'Last year Commonwealth Edison had to refund $ 72.7 million for poor performance of its LaSalle I nuclear plant .',
 [['1', 'Last', 'last', 'ADJ', 'JJ', '_', '2', 'amod', '_', 'TokenRange=0:4'],
  ['2',
   'year',
   'year',
   'NOUN',
   'NN',
   '_',
   '5',
   'obl:tmod',
   '_',
   'TokenRange=5:9'],
  ['3',
   'Commonwealth',
   'Commonwealth',
   'PROPN',
   'NNP',
   '_',
   '4',
   'compound',
   '_',
   'TokenRange=10:22'],
  ['4',
   'Edison',
   'Edison',
   'PROPN',
   'NNP',
   '_',
   '5',
   'nsubj',
   '_',
   'TokenRange=23:29'],
  ['5',
   'had',
   'have',
   'VERB',
   'VBD',
   '_',
   '0',
   'root',
   '_',
   'TokenRange=30:33'],
  ['6', 'to', 'to', 'PART', 'TO', '_', '7', 'mark', '_', 'TokenRange=34:36'],
  ['7',
   'refund',
   'refund',
   'VERB',
   'VB',
   '_',
   '5',
   'xcomp',
   '_',
   'TokenRange=37:43'],
  ['8', '$', '$', 'SYM', '$', '_', '7', 'obj', '_', 'TokenRange=44:45'],
  ['9',
   '72.7',
   '72.7',
   'NUM',
   'CD',
   '_',
   '10',


In [15]:
cid

'20015024'

In [16]:
framework2dataset2mrp_jsons[framework][dataset][mrp_index]

{'id': '20015024',
 'flavor': 0,
 'framework': 'dm',
 'version': 0.9,
 'time': '2019-04-10 (20:16)',
 'input': 'Last year Commonwealth Edison had to refund $72.7 million for poor performance of its LaSalle I nuclear plant.',
 'tops': [4],
 'nodes': [{'id': 0,
   'label': 'last',
   'properties': ['pos', 'frame'],
   'values': ['JJ', 'q:i-h-h'],
   'anchors': [{'from': 0, 'to': 4}]},
  {'id': 1,
   'label': 'year',
   'properties': ['pos', 'frame'],
   'values': ['NN', 'n:x'],
   'anchors': [{'from': 5, 'to': 9}]},
  {'id': 2,
   'label': 'Commonwealth',
   'properties': ['pos', 'frame'],
   'values': ['NNP', 'named:x-c'],
   'anchors': [{'from': 10, 'to': 22}]},
  {'id': 3,
   'label': 'Edison',
   'properties': ['pos', 'frame'],
   'values': ['NNP', 'named:x-c'],
   'anchors': [{'from': 23, 'to': 29}]},
  {'id': 4,
   'label': 'have',
   'properties': ['pos', 'frame'],
   'values': ['VBD', 'v_qmodal:e-h'],
   'anchors': [{'from': 30, 'to': 33}]},
  {'id': 6,
   'label': 'refund',
   '

In [17]:
print(' '.join([
    node.get('label', '') 
    for node in sorted(framework2dataset2mrp_jsons[framework][dataset][mrp_index]['nodes'], key=lambda x:x['id'])
]))
print('http://localhost:8000/tree/proj29_ds1/home/slai/mrp/graphviz/{}/{}.mrp/{}.png'.format(framework, dataset, cid))

last year Commonwealth Edison have refund $ 72.7 million for poor performance of its LaSalle I nuclear plant
http://localhost:8000/tree/proj29_ds1/home/slai/mrp/graphviz/dm/wsj.mrp/20015024.png


In [18]:
for key in framework2id2mrp_jsons:
    print (key)
    print (len(framework2id2mrp_jsons[key].keys()))
    

amr
56236
eds
35656
psd
35656
ucca
4939
dm
35656


In [19]:
for key in framework2cid2parse:
    print (key)
    print (len(framework2cid2parse[key].keys()))
    

amr
56149
ucca
6485
dm
35656
psd
35656
eds
35656


In [20]:
print (key)
print (len(set(framework2cid2parse['ucca'].keys())-set(framework2id2mrp_jsons['ucca'].keys())))
print (set(framework2cid2parse['ucca'].keys())-set(framework2id2mrp_jsons['ucca'].keys()))

eds
1546
{'652018', '606001', '125000', '357217-0003', '642015', '354474-0002', '085009-0004', '152281-0001', '363428-0003', '138249-0001', '175434-0001', '616000', '396046-0001', '141001', '114009', '130647-0002', '656005', '325330-0003', '604015', '143009', '287360-0006', '618011', '607011', '652007', '229100-0002', '604016', '193257-0002', '038523-0003', '148016', '653004', '652008', '148006', '651005', '142006', '125004', '257735-0002', '655007', '129012', '314938-0001', '148011', '616007', '488003', '128011', '210153-0004', '143001', '146008', '647006', '264993-0002', '538012', '118668-0004', '486000', '607002', '059655-0001', '235190-0003', '649007', '189007', '131000', '648012', '137000', '392826-0003', '491005', '536006', '275595-0003', '217747-0002', '616003', '630007', '488008', '652013', '143011', '190001', '059685-0001', '122514-0002', '536008', '652014', '654000', '140014', '335490-0001', '343813-0003', '644008', '487011', '124005', '150004', '035993-0001', '263909-0001', 

In [21]:
framework2cid2parse['ucca']['140302-0001']

[['1', '5', '5', 'NUM', 'CD', '_', '2', 'nummod', '_', 'TokenRange=0:1'],
 ['2',
  'star',
  'star',
  'NOUN',
  'NN',
  '_',
  '4',
  'compound',
  '_',
  'TokenRange=2:6'],
 ['3',
  'detail',
  'detail',
  'NOUN',
  'NN',
  '_',
  '4',
  'compound',
  '_',
  'TokenRange=7:13'],
 ['4', 'job', 'job', 'NOUN', 'NN', '_', '0', 'root', '_', 'TokenRange=14:17']]

In [22]:
framework2id2mrp_jsons['ucca']["140302-0001"]


{'id': 'reviews-140302-0001',
 'flavor': 1,
 'framework': 'ucca',
 'version': 0.9,
 'time': '2019-04-11 (22:03)',
 'input': '5 star detail job',
 'tops': [5],
 'nodes': [{'id': 0, 'anchors': [{'from': 0, 'to': 1}], 'label': '5'},
  {'id': 1, 'anchors': [{'from': 2, 'to': 6}], 'label': 'star'},
  {'id': 2, 'anchors': [{'from': 7, 'to': 13}], 'label': 'detail'},
  {'id': 3, 'anchors': [{'from': 14, 'to': 17}], 'label': 'job'},
  {'id': 4},
  {'id': 5},
  {'id': 6}],
 'edges': [{'source': 6, 'target': 2, 'label': 'D'},
  {'source': 5, 'target': 6, 'label': 'H'},
  {'source': 4, 'target': 0, 'label': 'Q'},
  {'source': 6, 'target': 3, 'label': 'P'},
  {'source': 4, 'target': 1, 'label': 'C'},
  {'source': 6, 'target': 4, 'label': 'D'}]}

In [23]:
for key in framework2id2mrp_jsons['ucca']:
    if "Hepburn" in framework2id2mrp_jsons['ucca'][key]['input']:
        print (framework2id2mrp_jsons['ucca'][key]['input'])
        print ("found!")
        break

In [24]:
for key in framework2id2mrp_jsons:
    print (key)
    print (len(set(framework2id2mrp_jsons[key].keys())-set(framework2cid2parse[key].keys())))
    print (set(framework2id2mrp_jsons[key].keys())-set(framework2cid2parse[key].keys()))

amr
87
{'20009002', '20003030', '20010002', '20005003', '20004001', '20005002', '20009001', '20010007', '20007002', '20003023', '20008001', '20003027', '20008002', '20004002', '20003001', '20010015', '20004015', '20003010', '20012002', '20003019', '20003017', '20008004', '20004010', '20008003', '20010011', '20010012', '20004011', '20003009', '20004005', '20004004', '20012005', '20003002', '20007003', '20010016', '20004006', '20003012', '20011004', '20003020', '20009003', '20003011', '20004017', '20003005', '20003007', '20008006', '20011005', '20003003', '20004016', '20003016', '20003025', '20011002', '20010020', '20010017', '20010006', '20003014', '20010003', '20003026', '20009004', '20011007', '20003021', '20010001', '20011006', '20011001', '20005001', '20001001', '20004007', '20003015', '20003018', '20010010', '20010018', '20004009', '20003013', '20003022', '20010008', '20003029', '20007004', '20010013', '20004014', '20003028', '20011008', '20004008', '20003024', '20003008', '2001200

### Visualization

In [25]:
HIDE_FIELD_SET = set(['anchors', 'source', 'target', 'label'])

In [26]:
def add_nodes_to_directed_graph(nodes, dg):
    for node in nodes:
        node_id = node.get('id', -1)
        dg.add_node(node_id)
        info_texts = [node.get('label', '')] + [str((key[:3], value)) for key, value in node.items() if key not in HIDE_FIELD_SET]
        dg.nodes[node_id]['label'] = '\n'.join(info_texts)
#         for key, value in node.items():
#             dg.nodes[node_id][key] = value

In [27]:
def add_edges_to_directed_graph(edges, dg):
    for edge in edges:
        edge_source = edge.get('source', -1)
        edge_target = edge.get('target', -1)
        dg.add_edge(edge_source, edge_target)
        info_texts = [edge.get('label', '')] + [str((key[:3], value)) for key, value in edge.items() if key not in HIDE_FIELD_SET]
        dg[edge_source][edge_target]['label'] = '\n'.join(info_texts)
#         for key, value in edge.items():
#             dg[edge_source][edge_target][key] = value

In [28]:
def mrp_json_to_directed_graph(mrp_json):
    dg = nx.DiGraph()
    nodes = mrp_json.get('nodes', [])
    edges = mrp_json.get('edges', [])
    add_nodes_to_directed_graph(nodes, dg)
    add_edges_to_directed_graph(edges, dg)
    return dg

In [29]:
figure(num=None, figsize=(4, 4), dpi=500, facecolor='w', edgecolor='k')

<Figure size 2000x2000 with 0 Axes>

<Figure size 2000x2000 with 0 Axes>

In [30]:
def draw_graphviz(mrp_json, dataset_dir):
    mrp_id = mrp_json.get('id', UNKWOWN)
    dg = mrp_json_to_directed_graph(mrp_json)
    save_name = os.path.join(dataset_dir, mrp_id)
    dg2graphviz_image(dg, save_name)

In [31]:
def dg2graphviz_image(dg, save_name, layout='dot', verbose=0):
    ag = to_agraph(dg)
    ag.layout(layout)
    image_name = '{}.png'.format(save_name)
    ag.draw(image_name)
    if verbose and USING_IPYTHON:
        pil_im = Image.open(image_name, 'r')
        plt.imshow(pil_im)

In [32]:
# draw_graphviz(mrp_json, './')

In [33]:
# for framework in tqdm(frameworks, desc='frameworks'):
#     framework_dir = os.path.join(train_dir, framework)
#     dataset_names = os.listdir(framework_dir)
#     graphviz_framework_dir = os.path.join(args.graphviz_dir, framework)
#     os.makedirs(graphviz_framework_dir, exist_ok=True)
    
#     dataset2mrp_jsons = framework2dataset2mrp_jsons[framework]
#     for dataset, mrp_jsons in tqdm(dataset2mrp_jsons.items(), desc='dataset_name'):
#         graphviz_dataset_dir = os.path.join(graphviz_framework_dir, dataset)
#         os.makedirs(graphviz_dataset_dir, exist_ok=True)
#         for mrp_json in mrp_jsons:
#             draw_graphviz(mrp_json, graphviz_dataset_dir)

# Compare parseTree and mrpjson

In [41]:
framework = 'dm'
data_id = '20001002'


[['1',
  'Mr.',
  'Mr.',
  'PROPN',
  'NNP',
  '_',
  '2',
  'compound',
  '_',
  'TokenRange=0:3'],
 ['2',
  'Vinken',
  'Vinken',
  'PROPN',
  'NNP',
  '_',
  '4',
  'nsubj',
  '_',
  'TokenRange=4:10'],
 ['3', 'is', 'be', 'VERB', 'VBZ', '_', '4', 'cop', '_', 'TokenRange=11:13'],
 ['4',
  'chairman',
  'chairman',
  'NOUN',
  'NN',
  '_',
  '0',
  'root',
  '_',
  'TokenRange=14:22'],
 ['5', 'of', 'of', 'ADP', 'IN', '_', '7', 'case', '_', 'TokenRange=23:25'],
 ['6',
  'Elsevier',
  'Elsevier',
  'PROPN',
  'NNP',
  '_',
  '7',
  'compound',
  '_',
  'TokenRange=26:34'],
 ['7',
  'N.V.',
  'N.V.',
  'PROPN',
  'NNP',
  '_',
  '4',
  'nmod',
  '_',
  'TokenRange=35:39'],
 ['8', ',', ',', 'PUNCT', ',', '_', '7', 'punct', '_', 'TokenRange=39:40'],
 ['9', 'the', 'the', 'DET', 'DT', '_', '12', 'det', '_', 'TokenRange=41:44'],
 ['10',
  'Dutch',
  'dutch',
  'ADJ',
  'JJ',
  '_',
  '12',
  'amod',
  '_',
  'TokenRange=45:50'],
 ['11',
  'publishing',
  'publishing',
  'NOUN',
  'NN',
  '_',

In [53]:
mrp_json = framework2id2mrp_jsons[framework][data_id]


In [54]:
parse_tree = framework2cid2parse[framework][data_id]

In [55]:
def plot_mrp_json(mrp_json):
    dg = mrp_json_to_directed_graph(mrp_json)
    ag = to_agraph(dg)
    ag.layout('dot')
    image_name = './tmp_mrp_json.png'
    ag.draw(image_name)
# if USING_IPYTHON:
#     pil_im = Image.open(image_name, 'r')
#     plt.figure(figsize=(50,50)) 
#     plt.imshow(pil_im)

In [56]:
def plot_parse_tree(parse_tree):
    """plot parse and print png filename"""
    parse_dg = plot_util.parse_to_directed_graph(parse_tree)
#     dataset_dir = os.path.join(args.graphviz_dir, dataset)
    image_path = './tmp_parse_tree.png'
    #os.makedirs(dataset_dir, exist_ok=True)
    directed_graph_to_graphviz_image(parse_dg, image_path)
    #return os.path.join('http://localhost:8000/files/proj29_ds1/home/slai/mrp2019', image_path)

In [57]:
plot_mrp_json(mrp_json)

In [58]:
plot_parse_tree(parse_tree)

NameError: name 'plot_util' is not defined

<img src='./tmp_mrp_json.png'>

<img src='./tmp_parse_tree.png'>

In [40]:
dataset = 'wiki'
framework = 'dm'
framework = 'eds'
framework = 'psd'
framework = 'ucca'
# framework = 'amr'
mrp_index = 10

parse = None
while not parse:
    mrp_index += 1
    cid = framework2dataset2mrp_jsons[framework][dataset][mrp_index]['id']
    parse = dataset2cid2parse[dataset].get(cid)
    
# print sentence
print(mrp_index, ' '.join([word_record[1] for word_record in parse]))

# print corresponding mrp png link
for framework in frameworks:
    if dataset not in framework2dataset2mrp_jsons[framework]:
        continue
    if len(framework2dataset2mrp_jsons[framework][dataset]) <= mrp_index:
        continue
    if framework2dataset2mrp_jsons[framework][dataset][mrp_index]['id'] == cid:
        print(args.graphviz_file_template.format(framework, dataset, cid))

# 
def plot_parse(parse, args, dataset, dataset_dir, cid):
    """plot parse and print png filename"""
    parse_dg = plot_util.parse_to_directed_graph(parse)
    dataset_dir = os.path.join(args.graphviz_dir, dataset)
    image_path = os.path.join(dataset_dir, cid + '.png')
    os.makedirs(dataset_dir, exist_ok=True)
    plot_util.directed_graph_to_graphviz_image(parse_dg, image_path)
    return os.path.join('http://localhost:8000/files/proj29_ds1/home/slai/mrp2019', image_path)
    
parse_plot_filename = plot_parse(parse, args, dataset, dataset_dir, cid)

print(parse_plot_filename)

11 One of the most popular musicians of the 20th century , he is often referred to as the “ King of Rock and Roll ” or “ the King”. Born in Tupelo , Mississippi , Presley moved to Memphis , Tennessee , with his family at the age of 13 .


AttributeError: 'Namespace' object has no attribute 'graphviz_file_template'

![title](./tmp.png)

In [None]:
\


### Interactive drawing using plotly

In [None]:
# prog list: neato, dot, twopi, circo, fdp, nop, wc, acyclic, gvpr, gvcolor, ccomps, sccmap, tred, sfdp, unflatten
poss = nx.nx_agraph.graphviz_layout(dg, prog='dot')

In [None]:
def info_dict2plotly_text(info_dict):
    key_value_texts = []
    for key, value in info_dict.items():
        key_value_texts.append('{}: {}'.format(key, value))
    return '<br />'.join(key_value_texts)

In [None]:
# Draw Edge
edge_trace = go.Scatter(
    x=[],
    y=[],
    text=[],
    line=dict(width=0.5,color='#888'),
    hoverinfo='none',
    mode='lines')

for source, target in dg.edges():
    x0, y0 = poss[source]
    x1, y1 = poss[target]
    edge_trace['x'] += tuple([x0, x1, None])
    edge_trace['y'] += tuple([y0, y1, None])
    
# Draw Edge info node
edge_info_node_trace = go.Scatter(
    x=[],
    y=[],
    text=[],
    hovertext=[],
    mode='markers+text',
    hoverinfo='text',
    marker=dict(
        color=[],
        size=10,
        opacity=0.01,
        colorbar=dict(
            thickness=15,
            title='Node Connections',
            xanchor='left',
            titleside='right',
        ),
        line=dict(width=2)))

for source, target in dg.edges():
    x0, y0 = poss[source]
    x1, y1 = poss[target]
    edge_info_node_trace['x'] += tuple([(x0 + x1) / 2])
    edge_info_node_trace['y'] += tuple([(y0 + y1) / 2])
    edge_info_node_trace['text'] += tuple([dg[source][target].get('label')])
    edge_info_node_trace['hovertext'] += tuple([info_dict2plotly_text(dg[source][target])])

    
# Draw Node
node_trace = go.Scatter(
    x=[],
    y=[],
    text=[],
    hovertext=[],
    mode='markers+text',
    hoverinfo='text',
    marker=dict(
        showscale=True,
        # colorscale options
        #'Greys' | 'YlGnBu' | 'Greens' | 'YlOrRd' | 'Bluered' | 'RdBu' |
        #'Reds' | 'Blues' | 'Picnic' | 'Rainbow' | 'Portland' | 'Jet' |
        #'Hot' | 'Blackbody' | 'Earth' | 'Electric' | 'Viridis' |
        colorscale='YlGnBu',
        reversescale=True,
        color=[],
        size=0.5,
        colorbar=dict(
            thickness=15,
            title='Node Connections',
            xanchor='left',
            titleside='right'
        ),
        line=dict(width=2)))

for node in dg.nodes():
    x, y = poss[node]
    node_trace['x'] += tuple([x])
    node_trace['y'] += tuple([y])
    node_trace['text'] += tuple(['{} ({})'.format(dg.nodes[node].get('label'), dg.nodes[node].get('values', [''])[0])])
    node_trace['hovertext'] += tuple([info_dict2plotly_text(dg.nodes[node])])
    
# for node, adjacencies in enumerate(dg.adjacency()):
#     node_trace['marker']['color']+=tuple([len(adjacencies[1])])
#     node_info = '# of connections: '+str(len(adjacencies[1]))
#     node_trace['text']+=tuple([node_info])

In [None]:
fig = go.Figure(
    data=[edge_trace, node_trace, edge_info_node_trace],
    layout=go.Layout(
    title='<br>Network graph made with Python',
    titlefont=dict(size=16),
    showlegend=False,
    hovermode='closest',
    margin=dict(b=20,l=5,r=5,t=40),
    annotations=[ dict(
        text="""
        Python code: <a href='https://plot.ly/ipython-notebooks/network-graphs/'> 
        https://plot.ly/ipython-notebooks/network-graphs/</a>""",
        showarrow=False,
        xref="paper", yref="paper",
        x=0.005, y=-0.002 ) ],
    xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
    yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)))

iplot(fig, filename='networkx')
print(mrp_json.get('input'))