In [1]:
try:
    __IPYTHON__
    USING_IPYTHON = True
except NameError:
    USING_IPYTHON = False

#### Argparse

In [296]:
import argparse
ap = argparse.ArgumentParser()
ap.add_argument('mrp_data_dir', help='')
ap.add_argument('--train-sub-dir', default='training', help='')
ap.add_argument('--graphviz-dir', default='graphviz', help='')
arg_string = """
    ./data/
"""
arguments = [arg for arg_line in arg_string.split(r'\\n') for arg in arg_line.split()]

In [297]:
if USING_IPYTHON:
    args = ap.parse_args(arguments)
else:
    args = ap.parse_args()

In [298]:
args

Namespace(graphviz_dir='graphviz', mrp_data_dir='./data/', train_sub_dir='training')

#### Library imports

In [286]:
import json
import logging
import os

from PIL import Image
from matplotlib.pyplot import figure
from networkx.drawing.nx_agraph import to_agraph
from tqdm import tqdm
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import plotly.graph_objs as go
import pprint

#### ipython notebook specific imports

In [270]:
if USING_IPYTHON:
    # matplotlib config
    %matplotlib inline
    
    # ipython notebook plotly config
    from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
    init_notebook_mode(connected=True)
else:
    from plotly.plotly import plot, iplot

In [271]:
logging.basicConfig(level=logging.INFO, handlers=[logging.StreamHandler()])
logger = logging.getLogger(__name__)  # pylint: disable=invalid-name
logger.setLevel(logging.INFO)

### Constants

In [272]:
UNKWOWN = 'UNKWOWN'

### Load data

In [273]:
train_dir = os.path.join(args.mrp_data_dir, args.train_sub_dir)
frameworks = [sub_dir for sub_dir in os.listdir(train_dir) if os.path.isdir(os.path.join(train_dir, sub_dir))]
frameworks

['ucca', 'psd', 'eds', 'dm', 'amr']

In [344]:
framework2dataset2mrp_jsons = {}
for framework in tqdm(frameworks, desc='frameworks'):
    dataset2mrp_jsons = {}
    framework_dir = os.path.join(train_dir, framework)
    dataset_names = os.listdir(framework_dir)
    
    for dataset_name in tqdm(dataset_names, desc='dataset_name'):
        mrp_jsons = []
        with open(os.path.join(framework_dir, dataset_name)) as rf:
            for line in rf:
                mrp_json = json.loads(line.strip())
                if framework == 'ucca' and 'nodes' in mrp_json and 'input' in mrp_json:
                    input_text = mrp_json['input']
                    nodes = mrp_json['nodes']
                    for i, node in enumerate(nodes):
                        if 'anchors' not in node:
                            continue
                        text_segments = []
                        for anchor in node['anchors']:
                            text_segments.append(input_text[anchor.get('from', -1): anchor.get('to', -1)])
                        mrp_json['nodes'][i]['label'] = ''.join(text_segments)
                        
                mrp_jsons.append(mrp_json)
        dataset2mrp_jsons[dataset_name] = mrp_jsons
                
    framework2dataset2mrp_jsons[framework] = dataset2mrp_jsons









frameworks:   0%|          | 0/5 [00:00<?, ?it/s][A[A[A[A[A[A[A[A








dataset_name:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A








dataset_name:  50%|█████     | 1/2 [00:00<00:00,  3.56it/s][A[A[A[A[A[A[A[A[A








dataset_name: 100%|██████████| 2/2 [00:00<00:00,  3.66it/s][A[A[A[A[A[A[A[A[A







frameworks:  20%|██        | 1/5 [00:00<00:02,  1.84it/s][A[A[A[A[A[A[A[A








dataset_name:   0%|          | 0/1 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A








dataset_name: 100%|██████████| 1/1 [00:09<00:00,  9.27s/it][A[A[A[A[A[A[A[A[A







frameworks:  40%|████      | 2/5 [00:09<00:09,  3.16s/it][A[A[A[A[A[A[A[A








dataset_name:   0%|          | 0/1 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A








dataset_name: 100%|██████████| 1/1 [00:02<00:00,  2.77s/it][A[A[A[A[A[A[A[A[A







frameworks:  60%|██████    | 3/5 [00:12<00:06,  3.05s/it][A[A[A[A[A[A[A[A








da

In [318]:
for framework in framework2dataset2mrp_jsons:
    logger.info(framework)
    logger.info(list(framework2dataset2mrp_jsons[framework].keys()))

INFO:__main__:ucca
INFO:__main__:['wiki.mrp', 'ewt.mrp']
INFO:__main__:psd
INFO:__main__:['wsj.mrp']
INFO:__main__:eds
INFO:__main__:['wsj.mrp']
INFO:__main__:dm
INFO:__main__:['wsj.mrp']
INFO:__main__:amr
INFO:__main__:['xinhua.mrp', 'wsj.mrp', 'wiki.mrp', 'wb.mrp', 'rte.mrp', 'proxy.mrp', 'mt09sdl.mrp', 'lorelei.mrp', 'fables.mrp', 'dfb.mrp', 'dfa.mrp', 'cctv.mrp', 'bolt.mrp', 'amr-guidelines.mrp']


### Visualization

In [497]:
HIDE_FIELD_SET = set(['anchors', 'source', 'target', 'label'])

In [498]:
def add_nodes_to_directed_graph(nodes, dg):
    for node in nodes:
        node_id = node.get('id', -1)
        dg.add_node(node_id)
        info_texts = [node.get('label', '')] + [str((key[:3], value)) for key, value in node.items() if key not in HIDE_FIELD_SET]
        dg.nodes[node_id]['label'] = '\n'.join(info_texts)
#         for key, value in node.items():
#             dg.nodes[node_id][key] = value

In [499]:
def add_edges_to_directed_graph(edges, dg):
    for edge in edges:
        edge_source = edge.get('source', -1)
        edge_target = edge.get('target', -1)
        dg.add_edge(edge_source, edge_target)
        info_texts = [edge.get('label', '')] + [str((key[:3], value)) for key, value in edge.items() if key not in HIDE_FIELD_SET]
        dg[edge_source][edge_target]['label'] = '\n'.join(info_texts)
#         for key, value in edge.items():
#             dg[edge_source][edge_target][key] = value

In [500]:
def mrp_json_to_directed_graph(mrp_json):
    dg = nx.DiGraph()
    nodes = mrp_json.get('nodes', [])
    edges = mrp_json.get('edges', [])
    add_nodes_to_directed_graph(nodes, dg)
    add_edges_to_directed_graph(edges, dg)
    return dg

In [501]:
figure(num=None, figsize=(4, 4), dpi=500, facecolor='w', edgecolor='k')

<Figure size 2000x2000 with 0 Axes>

<Figure size 2000x2000 with 0 Axes>

In [502]:
def draw_graphviz(mrp_json, dataset_dir):
    mrp_id = mrp_json.get('id', UNKWOWN)
    dg = mrp_json_to_directed_graph(mrp_json)
    save_name = os.path.join(dataset_dir, mrp_id)
    dg2graphviz_image(dg, save_name)

In [503]:
def dg2graphviz_image(dg, save_name, layout='dot', verbose=0):
    ag = to_agraph(dg)
    ag.layout(layout)
    image_name = '{}.png'.format(save_name)
    ag.draw(image_name)
    if verbose and USING_IPYTHON:
        pil_im = Image.open(image_name, 'r')
        plt.imshow(pil_im)

In [505]:
# draw_graphviz(mrp_json, './')

In [None]:
for framework in tqdm(frameworks, desc='frameworks'):
    framework_dir = os.path.join(train_dir, framework)
    dataset_names = os.listdir(framework_dir)
    graphviz_framework_dir = os.path.join(args.graphviz_dir, framework)
    os.makedirs(graphviz_framework_dir, exist_ok=True)
    
    dataset2mrp_jsons = framework2dataset2mrp_jsons[framework]
    for dataset, mrp_jsons in tqdm(dataset2mrp_jsons.items(), desc='dataset_name'):
        graphviz_dataset_dir = os.path.join(graphviz_framework_dir, dataset)
        os.makedirs(graphviz_dataset_dir, exist_ok=True)
        for mrp_json in mrp_jsons:
            draw_graphviz(mrp_json, graphviz_dataset_dir)




















frameworks:   0%|          | 0/5 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A



















dataset_name:   0%|          | 0/2 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A

In [350]:
framework2dataset2mrp_jsons.keys()

dict_keys(['ucca', 'psd', 'eds', 'dm', 'amr'])

In [351]:
mrp_json = framework2dataset2mrp_jsons['ucca']['wiki.mrp'][1]

### Interactive drawing using plotly

In [253]:
# prog list: neato, dot, twopi, circo, fdp, nop, wc, acyclic, gvpr, gvcolor, ccomps, sccmap, tred, sfdp, unflatten
poss = nx.nx_agraph.graphviz_layout(dg, prog='dot')

In [254]:
def info_dict2plotly_text(info_dict):
    key_value_texts = []
    for key, value in info_dict.items():
        key_value_texts.append('{}: {}'.format(key, value))
    return '<br />'.join(key_value_texts)

In [255]:
# Draw Edge
edge_trace = go.Scatter(
    x=[],
    y=[],
    text=[],
    line=dict(width=0.5,color='#888'),
    hoverinfo='none',
    mode='lines')

for source, target in dg.edges():
    x0, y0 = poss[source]
    x1, y1 = poss[target]
    edge_trace['x'] += tuple([x0, x1, None])
    edge_trace['y'] += tuple([y0, y1, None])
    
# Draw Edge info node
edge_info_node_trace = go.Scatter(
    x=[],
    y=[],
    text=[],
    hovertext=[],
    mode='markers+text',
    hoverinfo='text',
    marker=dict(
        color=[],
        size=10,
        opacity=0.01,
        colorbar=dict(
            thickness=15,
            title='Node Connections',
            xanchor='left',
            titleside='right',
        ),
        line=dict(width=2)))

for source, target in dg.edges():
    x0, y0 = poss[source]
    x1, y1 = poss[target]
    edge_info_node_trace['x'] += tuple([(x0 + x1) / 2])
    edge_info_node_trace['y'] += tuple([(y0 + y1) / 2])
    edge_info_node_trace['text'] += tuple([dg[source][target].get('label')])
    edge_info_node_trace['hovertext'] += tuple([info_dict2plotly_text(dg[source][target])])

    
# Draw Node
node_trace = go.Scatter(
    x=[],
    y=[],
    text=[],
    hovertext=[],
    mode='markers+text',
    hoverinfo='text',
    marker=dict(
        showscale=True,
        # colorscale options
        #'Greys' | 'YlGnBu' | 'Greens' | 'YlOrRd' | 'Bluered' | 'RdBu' |
        #'Reds' | 'Blues' | 'Picnic' | 'Rainbow' | 'Portland' | 'Jet' |
        #'Hot' | 'Blackbody' | 'Earth' | 'Electric' | 'Viridis' |
        colorscale='YlGnBu',
        reversescale=True,
        color=[],
        size=0.5,
        colorbar=dict(
            thickness=15,
            title='Node Connections',
            xanchor='left',
            titleside='right'
        ),
        line=dict(width=2)))

for node in dg.nodes():
    x, y = poss[node]
    node_trace['x'] += tuple([x])
    node_trace['y'] += tuple([y])
    node_trace['text'] += tuple(['{} ({})'.format(dg.nodes[node].get('label'), dg.nodes[node].get('values', [''])[0])])
    node_trace['hovertext'] += tuple([info_dict2plotly_text(dg.nodes[node])])
    
# for node, adjacencies in enumerate(dg.adjacency()):
#     node_trace['marker']['color']+=tuple([len(adjacencies[1])])
#     node_info = '# of connections: '+str(len(adjacencies[1]))
#     node_trace['text']+=tuple([node_info])

In [256]:
fig = go.Figure(
    data=[edge_trace, node_trace, edge_info_node_trace],
    layout=go.Layout(
    title='<br>Network graph made with Python',
    titlefont=dict(size=16),
    showlegend=False,
    hovermode='closest',
    margin=dict(b=20,l=5,r=5,t=40),
    annotations=[ dict(
        text="""
        Python code: <a href='https://plot.ly/ipython-notebooks/network-graphs/'> 
        https://plot.ly/ipython-notebooks/network-graphs/</a>""",
        showarrow=False,
        xref="paper", yref="paper",
        x=0.005, y=-0.002 ) ],
    xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
    yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)))

iplot(fig, filename='networkx')
print(mrp_json.get('input'))

Like all pitcher plants, it is carnivorous and uses its nectar to attract insects that drown in the pitcher and are digested by the plant.
