# Make a one-generation graph

## Pseudocode

### Display a graph for a character node

- show direct parents
- show direct children

https://en.wikipedia.org/wiki/Bellman%E2%80%93Ford_algorithm

## Setup

In [1]:
import pandas as pd
import os, inspect, graphviz

In [2]:
os.listdir('../data/raw/')

['WikiCCD_2021-07-18.csv']

In [25]:
df = pd.read_csv('../data/raw/WikiCCD_2021-07-18.csv')
df = df.loc[df.index.difference([3618, 21004, 21100])]  # remove some duplicates
df = df.set_index('Component')

## get and graph vertical family trees

### get all ancestors

In [94]:
def get_all_ancestors(comp:str) -> list[str]:
    """
        INPUTS: character component as str
        OUTPUTS: list of all ancestor components
    """
    # all-ancestors stack of characters
    aa_stack = [comp]
    aa_list = [comp]
    
    while aa_stack:
        c = aa_stack.pop(0)
        
        parents = get_parents_that_have_a_record(c)
        for l_r in parents:
            if parents[l_r]:
                aa_stack.append(parents[l_r])
                aa_list.append(parents[l_r])
    return aa_list

#### child functions

In [92]:
def get_parents_that_have_a_record(comp:str) -> dict:
    """
        if:
            - the sub-/parent-component is different than its child, and
            - the sub-/parent-component has its own record
        then:
            - show the sub-/parent-component as the dict value,
        else:
            - show None
    """
    [lc, rc] = df.loc[comp][['LeftComponent','RightComponent']].to_list()
    
    if lc != comp:
        l = check_for_parents(lc)
    else:
        l = None
        
    if rc != comp:
        r = check_for_parents(rc)
    else:
        r = None
    
    return {'LeftComponent': l, 'RightComponent': r}

In [49]:
def check_for_parents(comp:str) -> int:
    """
        If a string is a component with its own record of parents, 
            then return that index. 
        Else,
            return None. 
    """
    try:
        return df.loc[comp].name
    except KeyError:
        return None

### get kids 

In [67]:
def get_all_descendents(comp:str) -> dict:
    """
        INPUTS: target component
        OUTPUTS: list of all descendant components
    """
    ad_stack = [comp]
    ad_list = [comp]

    while ad_stack:
        c = ad_stack.pop(0)
        kids = kdb[c]
        for kid in kids:
            ad_stack.append(kid)
            ad_list.append(kid)
    
    return ad_list

#### child functions

In [64]:
def get_kids_db() -> dict:
    """
    Create database of immediate children-components, for quick searching in that direction.
    """
    allcomps = df.index
    kids = {c: [] for c in allcomps}
    for c in df.index:
        comps = df.loc[c][['LeftComponent','RightComponent']].to_list()
        for xc in comps:
            try:
                kids[xc].append(c)
            except:
                pass
    return kids

### graph

In [367]:
def vertical_family_tree(comp:str='雲', directory:str='../data/processed/'):
    dot = graphviz.Digraph(comment='vertical family tree')
    aa = get_all_ancestors(comp)
    for c in aa:
        add_nodes_and_edges_parents(c, dot)

    add_edges_kids(comp, dot)
    
    if 'Digraph.gv.pdf' in os.listdir(directory):
        os.remove(directory + 'Digraph.gv.pdf')
    filename = dot.render(format='pdf', directory=directory)
    
    return dot

In [310]:
def add_nodes_and_edges_parents(comp:str, dot:graphviz.Digraph=dot):
    [lc, rc] = df.loc[comp, ['LeftComponent', 'RightComponent']].tolist()
    
    dot.node(comp)
    
    if comp == lc:
        return None
    
    parents = get_parents_that_have_a_record(comp)
    if parents['LeftComponent']:
        dot.edges([f"{lc}{comp}"])
    if parents['RightComponent']:
        dot.edges([f"{rc}{comp}"])
    return None

In [311]:
def add_nodes_and_edges_kids(comp:str, dot:graphviz.Digraph):
    dot_body_joined = ''.join(dot.body)
    dot_body_charset = set(dot_body_joined)
    comp_and_kids = kids_db[comp] + [comp]
    for c in comp_and_kids:
        if c not in dot_body_charset:
            dot.node(c)
    for c in kids_db[comp]:
        dot.edges([f"{comp}{c}"])

### get_and_graph

In [364]:
kdb = get_kids_db()

In [370]:
dot = vertical_family_tree()

In [371]:
!evince ../data/processed/Digraph.gv.pdf

## connect two trees

(this will be the next step)