# Make a one-generation graph

## Pseudocode

### Display a graph for a character node

- show direct parents
- show direct children

https://en.wikipedia.org/wiki/Bellman%E2%80%93Ford_algorithm

## Library Imports

In [10]:
import pandas as pd
import os, inspect, graphviz

## Peek at data

In [4]:
os.listdir('../data/raw/')

['WikiCCD_2021-07-18.csv']

In [6]:
df = pd.read_csv('../data/raw/WikiCCD_2021-07-18.csv')

In [218]:
def add_node_and_nontrivial_edges(idx:int, dot:graphviz.Digraph):
    [c, lc, rc] = df.loc[idx, ['Component','LeftComponent', 'RightComponent']].tolist()
    dot.node(c)
    if c == lc:
        return None
    if is_relevant(idx, 'LeftComponent'):
        dot.edges([f"{lc}{c}"])
    if is_relevant(idx, 'RightComponent'):
        dot.edges([f"{rc}{c}"])
    return None

In [285]:
def get_component_indices(i):
    """
    if the sub-component is different than its parent, get the sub-component's index.
    """
    if df.loc[i, 'LeftComponent'] != df.loc[i, 'Component']:
        li = find_index(df.loc[i, 'LeftComponent'])
    else:
        li = None
    if df.loc[i, 'RightComponent'] != df.loc[i, 'Component']:
        ri = find_index(df.loc[i, 'RightComponent'])
    else:
        ri = None
    return {'LeftComponent': li, 'RightComponent': ri}

In [267]:
def find_index(c:str) -> int:
    try:
        return df.loc[df.Component == c].index[0]
    except IndexError:
        return None

In [310]:
def get_all_ancestors(idx):
    """
    """
    # i tracks the dataframe index for the current record
    i = idx
    # all-ancestors stack of characters
    aa_stack = [df.loc[i, ['Component']][0]]
    # all-ancestors dict; {dataframe_index: character}
    aa_dict = {i: aa_stack[0]}
    while aa_stack:
        comp_idxs = get_component_indices(i)
        for k in comp_idxs:
            if comp_idxs[k]:
                sub_component = df.loc[i, k][0]
                aa_stack.append(sub_component)
                aa_dict[comp_idxs[k]] = sub_component
        i = find_index(aa_stack.pop(0))

    return aa_dict

In [311]:
n = 6000
display(df.loc[n].to_frame().T)
print(get_all_ancestors(n))

Unnamed: 0,Component,Strokes,CompositionType,LeftComponent,LeftStrokes,RightComponent,RightStrokes,Signature,Notes,Section
6000,敯,13,吅,昬,9,攴,4,RAYE,/,攵


{6000: '敯', 6189: '昬', 5941: '攴', 7698: '民', 6118: '日', 1372: '卜', 1480: '又'}


In [331]:
del dot
dot = graphviz.Digraph(comment='graph 99 chars')
dot.source

'// graph 99 chars\ndigraph {\n}\n'

In [332]:
mychar1 = '愛'
n1 = find_index(mychar1)
aa1 = get_all_ancestors(n1)
for i in list(aa1.keys()):
    add_node_and_nontrivial_edges(i, dot)
    
mychar2 = '潘'
n2 = find_index(mychar2)
aa2 = get_all_ancestors(n2)
for i in list(aa2.keys()):
    add_node_and_nontrivial_edges(i, dot)

In [333]:
dot.render(format='pdf')

'Digraph.gv.pdf'

In [334]:
!evince Digraph.gv.pdf