In [1]:
import yaml
import os
import networkx as nx
from wos_db_studies.utils_json import apply_mapper, process_document_top, parse_edges
from collections import defaultdict

from itertools import product
from pprint import pprint

"""
plot db schematics

graphviz attributes 

https://renenyffenegger.ch/notes/tools/Graphviz/attributes/index
https://rsms.me/graphviz/
https://graphviz.readthedocs.io/en/stable/examples.html
https://graphviz.org/doc/info/attrs.html

usage: 
    color='red',style='filled', fillcolor='blue',shape='square'

to keep 
level_one = [node1, node2]
sg_one = ag.add_subgraph(level_one, rank='same')
"""

fillcolor_palette = {"violet": "#DDD0E5", "green": "#BEDFC8", 
                     "blue":  "#B7D1DF", "red": "#EBA59E"}

map_type2shape = {"table": "box", "vcollection": "ellipse", 
                  "index": "polygon", "field": "octagon",
                 "blank": "box"}
map_type2color = {"table": fillcolor_palette["blue"], 
                  "vcollection": fillcolor_palette["green"], 
                  "index": "orange", 
                  "def_field": fillcolor_palette["red"],
                  "field": fillcolor_palette["red"],
                 "blank": "white"}

edge_status = {"vcollection": "dashed", "table": "solid"}

config_path = "../"
figgpath = "../figs/schema"
# fname = os.path.join(config_path, "./conf/wos_json.yaml")
fname = os.path.join(config_path, "./conf/wos_json_simple.yaml")
with open(fname, "r") as f:
    config = yaml.load(f, Loader=yaml.FullLoader)

In [2]:
### fields/indexes
g = nx.DiGraph()
nodes = []
edges = []
for k, props in config["vertex_collections"].items():
    nodes_collection = [(k, {"type": "vcollection"})]
    nodes_fields = [(f"{k}:{item}", 
                         {"type": "field", "label": item}) for item in props["fields"]]   
    nodes += nodes_collection
    nodes += nodes_fields
    edges += [(x[0], y[0]) for x, y in product(nodes_collection, nodes_fields)]

g.add_nodes_from(nodes)
g.add_edges_from(edges)

for n in g.nodes():
    props = g.nodes()[n]
    upd_dict = props.copy()
    if "type" in upd_dict:
        upd_dict["shape"] = map_type2shape[props["type"]]
        upd_dict["color"] = map_type2color[props["type"]]
    if "label" in upd_dict:
        upd_dict["forcelabel"] = True
    upd_dict["style"] = "filled"

    for k, v in upd_dict.items():
        g.nodes[n][k] = v

for e in g.edges(data=True):
    s, t, _ = e
    target_props = g.nodes[s]
    upd_dict = {"style": "solid",
                "arrowhead": "vee"}
    for k, v in upd_dict.items():
        g.edges[s, t][k] = v

ag = nx.nx_agraph.to_agraph(g)

for k, props in config["vertex_collections"].items():
    nodes_collection = [(k, {"type": "vcollection"})]
    level_index = [f"{k}:{item}" for item in props["index"]]
#     print(k, level_index, f"cluster_{k[:3]}:def")
    index_subgraph = ag.add_subgraph(level_index, name=f"cluster_{k}:def")
    index_subgraph.node_attr["style"] = "filled"
    index_subgraph.node_attr["label"] = "definition"

ag.draw(os.path.join(figgpath, "./json_simple_vc2fields.pdf"), "pdf", prog="dot")

In [3]:
def parse_branch(croot, acc, nc):
    """
    extract edge definition and edge fields from definition dict
    :param croot:
    :param edge_acc:
    :param mapping_fields:
    :return:
    """
    if isinstance(croot, dict):
        if "maps" in croot:
            if "descend_key" in croot:
                nleft = (croot["descend_key"], "blank")
            else:
                nleft = nc
            for m in croot["maps"]:
                acc, cnode = parse_branch(m, acc, nleft)
                if nleft != cnode:
                    acc += [(nleft, cnode)]
            return acc, nleft
        elif "name" in croot:
            nleft = (croot["name"], "vcollection")
            return acc, nleft
        else:
            return acc, [(None, "blank")]

In [4]:
g = nx.DiGraph()
nodes = []
acc = []
edges_, _ = parse_branch(config["json"], acc, None)
edges = [("_".join(x), "_".join(y)) for x, y in edges_]
for ee in edges_:
    for n in ee:
        nodes += [("_".join(n), {"type": n[1], "name": n[0]})]

for nid, weight in nodes:
    g.add_node(nid, **weight)
# g.add_nodes_from(nodes)
g.add_edges_from(edges)

for n in g.nodes():
    props = g.nodes()[n]
    upd_dict = {
                "label": props["name"],
                "shape": map_type2shape[props["type"]],
                "color": map_type2color[props["type"]],
                "style": "filled"
    }
    for k, v in upd_dict.items():
        g.nodes[n][k] = v

# for e in g.edges(data=True):
#     s, t, _ = e
#     target_props = g.nodes[s]
#     upd_dict = {"style": edge_status[target_props["type"]],
#                 "arrowhead": "vee"}
#     for k, v in upd_dict.items():
#         g.edges[s, t][k] = v

ag = nx.nx_agraph.to_agraph(g)
ag.draw(os.path.join(figgpath, "./json_simple_structure.pdf"), "pdf", prog="dot")

In [5]:
acc[:2]

[(('REC', 'blank'), ('publication', 'vcollection')),
 (('edition', 'blank'), ('edition', 'vcollection'))]

In [6]:
g = nx.DiGraph()
nodes = []
edge_def, excl_fields = parse_edges(config["json"], [], defaultdict(list))
edges = [x[:2] for x in edge_def]
nodes = [(n, {"type": "vcollection"}) for n in config["vertex_collections"]]
g.add_nodes_from(nodes)
g.add_edges_from(edges)

for n in g.nodes():
    props = g.nodes()[n]
    upd_dict = {
                "shape": map_type2shape[props["type"]],
                "color": map_type2color[props["type"]],
                "style": "filled"
    }
    for k, v in upd_dict.items():
        g.nodes[n][k] = v

for e in g.edges(data=True):
    s, t, _ = e
    target_props = g.nodes[s]
    upd_dict = {"style": edge_status[target_props["type"]],
                "arrowhead": "vee"}
    for k, v in upd_dict.items():
        g.edges[s, t][k] = v

ag = nx.nx_agraph.to_agraph(g)
ag.draw(os.path.join(figgpath, "./json_simple_vc2vc.pdf"), "pdf", prog="dot")