# flow chart

In [1]:
from my_happy_graphviz import pydot
from my_happy_jupyter_utils import (
    image_utils
)


G = pydot.Dot(graph_type='digraph')

sd_node = pydot.Node("Structured\nData\n(ODBC, CSV, XLS)")
sd_node.set_shape('box3d')
G.add_node(sd_node)


qsd_node = pydot.Node("Quasistructured\nData\n(PACS, RIS, JSON)")
qsd_node.set_shape('box3d')
G.add_node(qsd_node)


sds_node = pydot.Node("Streaming\nData Sources\n(MQ, Kafka, IoT)")
sds_node.set_shape('box3d')
G.add_node(sds_node)


usd_node = pydot.Node("Unstructured\nData\n(Text, DOCX)")
usd_node.set_shape('box3d')
G.add_node(usd_node)


nlp_node = pydot.Node("NLP Processing\n(Word2Vec,\nBag of Words,\nParseNet)")
nlp_node.set_shape('box3d')
G.add_node(nlp_node)


riq_node = pydot.Node("4Quant\nAnalytics\nEngine")
G.add_node(riq_node)


iedge = pydot.Edge(sd_node, riq_node)
iedge.set_label('Tables')
G.add_edge(iedge)


iedge = pydot.Edge(qsd_node, riq_node)
iedge.set_label('Key-Value\nStores')
G.add_edge(iedge)


iedge = pydot.Edge(usd_node, nlp_node)
iedge.set_label('Keyword\nExtraction')
G.add_edge(iedge)


iedge = pydot.Edge(usd_node, nlp_node)
iedge.set_label('Category')
G.add_edge(iedge)


iedge = pydot.Edge(nlp_node, riq_node)
iedge.set_label('Key-Value\nStore')
G.add_edge(iedge)


iedge = pydot.Edge(sds_node, riq_node)
iedge.set_label('Minibatch\nDatasets')
iedge.set_style('dashed')
G.add_edge(iedge)

asp_node = pydot.Node('Apache Spark')
asp_node.set_shape('square')
G.add_node(asp_node)


hadoop_node = pydot.Node('Distributed\nHadoop\nFilesystem (HDFS)')
hadoop_node.set_shape('box3d')
G.add_node(hadoop_node)


iedge = pydot.Edge(riq_node, asp_node)
iedge.set_label('Redudant\nDistributed\nDatasets')
G.add_edge(iedge)

iedge = pydot.Edge(riq_node, asp_node)
iedge.set_label('Streaming\nReduntant\nDistributed\nDatasets')
iedge.set_style('dashed')
G.add_edge(iedge)


iedge = pydot.Edge(asp_node, hadoop_node)
iedge.set_label('Parquet\nDistributed\nColumn\nStore')
iedge.set_penwidth(3)
G.add_edge(iedge)


odbc_node = pydot.Node('ODBC-Hive\nData View')
odbc_node.set_shape('box3d')
G.add_node(odbc_node)


xls_node = pydot.Node('Excel Workbook\nSummaries')
xls_node.set_shape('box')
G.add_node(xls_node)


iedge = pydot.Edge(asp_node, odbc_node)
iedge.set_penwidth(2)
G.add_edge(iedge)


iedge = pydot.Edge(odbc_node, xls_node)
iedge.set_penwidth(2)
G.add_edge(iedge)


sap_node = pydot.Node('SAP HANA')
sap_node.set_shape('none')
G.add_node(sap_node)

iedge = pydot.Edge(odbc_node, sap_node)
iedge.set_penwidth(1)
G.add_edge(iedge)


ibot_node = pydot.Node('Image\nBot')
ibot_node.set_shape('triangle')
G.add_node(ibot_node)


iedge = pydot.Edge(riq_node, ibot_node)
iedge.set_penwidth(1)
iedge.set_style('dashed')
G.add_edge(iedge)


iedge = pydot.Edge(ibot_node, riq_node)
iedge.set_penwidth(1)
iedge.set_style('dashed')
G.add_edge(iedge)


ibot_node = pydot.Node('Genomics\nBot')
ibot_node.set_shape('triangle')
G.add_node(ibot_node)


iedge = pydot.Edge(riq_node, ibot_node)
iedge.set_penwidth(1)
iedge.set_style('dashed')
G.add_edge(iedge)


ibot_node = pydot.Node('DxRx\nBot')
ibot_node.set_shape('triangle')
G.add_node(ibot_node)


iedge = pydot.Edge(riq_node, ibot_node)
iedge.set_penwidth(1)
iedge.set_style('dashed')
G.add_edge(iedge)


iedge = pydot.Edge(ibot_node, riq_node)
iedge.set_penwidth(1)
iedge.set_style('dashed')
G.add_edge(iedge)

print(G.to_string())

file_name = '/'.join([
    'data/output/images',
    '0501_0101_flow_chart.svg'
])

G.draw(file_name)

image_utils.show_image_with_title_by_url({
    'file_path': file_name,
    'title': 'flow chart',
})

digraph G {
"Structured\nData\n(ODBC, CSV, XLS)" [shape=box3d];
"Quasistructured\nData\n(PACS, RIS, JSON)" [shape=box3d];
"Streaming\nData Sources\n(MQ, Kafka, IoT)" [shape=box3d];
"Unstructured\nData\n(Text, DOCX)" [shape=box3d];
"NLP Processing\n(Word2Vec,\nBag of Words,\nParseNet)" [shape=box3d];
"4Quant\nAnalytics\nEngine";
"Structured\nData\n(ODBC, CSV, XLS)" -> "4Quant\nAnalytics\nEngine"  [label=Tables];
"Quasistructured\nData\n(PACS, RIS, JSON)" -> "4Quant\nAnalytics\nEngine"  [label="Key-Value\nStores"];
"Unstructured\nData\n(Text, DOCX)" -> "NLP Processing\n(Word2Vec,\nBag of Words,\nParseNet)"  [label="Keyword\nExtraction"];
"Unstructured\nData\n(Text, DOCX)" -> "NLP Processing\n(Word2Vec,\nBag of Words,\nParseNet)"  [label=Category];
"NLP Processing\n(Word2Vec,\nBag of Words,\nParseNet)" -> "4Quant\nAnalytics\nEngine"  [label="Key-Value\nStore"];
"Streaming\nData Sources\n(MQ, Kafka, IoT)" -> "4Quant\nAnalytics\nEngine"  [label="Minibatch\nDatasets", style=dashed];
"Apache 