In [4]:
import os
import math

In [61]:
def get_label(longname):
    """
    Convert a long name to a short version
    """
    filename = longname.split('/')[-1]
    
    if len(filename) <= 10:
        return filename
    else:
        filename = filename[:7] + "..."
        return filename

def get_rel_paths(here_path, root_path):
    """
    Given the current full path and the root path, return the
    relative path to the directory above, and the relative path
    to this full path
    """
    # Split paths into parts
    here_parts = here_path.split("/")
    root_parts = root_path.split("/")

    # Initialize the relative paths
    rel_path_to_parent = ''
    rel_path_to_here = ''

    # Calculate the relative paths
    if here_path == root_path:
        # The here_path is the root_path
        rel_path_to_parent = '.'
        rel_path_to_here = '.'
    elif len(here_parts) == len(root_parts) + 1:
        # The here_path is a direct child of root_path
        rel_path_to_parent = '.'
        rel_path_to_here = here_parts[-1]
    else:
        # General case
        rel_path_to_parent_parts = here_parts[len(root_parts):-1]
        rel_path_to_parent = '/'.join(rel_path_to_parent_parts) if rel_path_to_parent_parts else '.'
        rel_path_to_here = '/'.join(here_parts[len(root_parts):])

    return rel_path_to_parent, rel_path_to_here

def remove_excluded_subdirs(dir_list):
    """
    For os.path.walk(), we can restrict the branches that get traversed by changing
    the list which walk() returns as 'subdirs'.  Remember that when we are editing
    that list, we are actually changing the memory representation inside the walk()
    generator, so the semantics are a little tricky.  For example, we can't simultaneously
    loop over the list and edit it.  I'll give you a working function for this task
    to avoid confusion.
    
    This function removes 'reveal.js', '.git', and anything starting with '_' from the list.
    """
    more_names_to_remove = [elt for elt in dir_list if elt.startswith('_')][:]
    for nm in ['reveal.js', '.git'] + more_names_to_remove:
        if nm in dir_list:
            dir_list.remove(nm)

def show_this_leaf(nm):
    """
    Use this function to return False for leaf node names you don't want to draw.
    """
    # Check if the name starts with '.' or '_'
    if nm.startswith('.') or nm.startswith('_'):
        return False
    
    # Check for editor backup files
    if nm.endswith('~') or (nm.startswith('#') and nm.endswith('#')):
        return False
    
    return True

class Node():
    def __init__(self, parent_node, name):
        """
        parent_node should be the Node instance of the parent.
        
        name is a long name, like a full relative path.  It needs to be
        unique for all nodes in the tree.
        
        Note how we make a connection to the parent node when this node is created.
        """
        self.parent = parent_node
        if self.parent is not None:
            self.parent._add_kid(self)
        self.name = name
        self.label = get_label(self.name)
        self.kids = []
        self.descendant_count = 0
        
    def _add_kid(self, kid_node):
        self.kids.append(kid_node)
        
    def add_descendant_to_all_ancestors(self):
        """
        Modify this function so that when it is called for a node's parent,
        that parent and all ancestors get their descendant_count incremented.
        """
        current = self.parent
        while current:
            current.descendant_count += 1
            current = current.parent
        
    def write_node(self, indent=0):
        if not self.kids: # leaf nodes
            shape = "rect"
        else:
            shape = "ellipse"
        
        # Check file extension for color fill
        if self.name.endswith(('.png', '.jpg', '.svg')):
            fillcolor = "blue"
        else:
            fillcolor = "lightgrey"
            
        print(f'{indent*" "}"{self.name}" [label="{self.label}", shape={shape}, style="filled", fillcolor={fillcolor}];') 
        
    def traverse_node_defs(self, indent=0):
        """
        Write the DOT code that defines this Node and all its descendants.
        
        Here and in traverse_edge_defs(), 'indent' just helps with formatting
        when writing out the DOT code.
        """
        self.write_node(indent=indent)
        for kid in self.kids:
            kid.traverse_node_defs(indent+4)
            
    def write_incoming_edge(self, this_parent, indent=0):
        
        penwidth = max(math.sqrt(self.descendant_count),1)

        print(f'{indent*" "}"{this_parent.name}" -> "{self.name}" [penwidth={penwidth}];') 
        
    def traverse_edge_defs(self, indent=0):
        """
        Write the DOT code that defines the incoming edges for this Node and
        all its descendants.
        
        Here and in traverse_node_defs(), 'indent' just helps with formatting
        when writing out the DOT code.
        """
        for kid in self.kids:
            kid.write_incoming_edge(self, indent=indent+4)
            kid.traverse_edge_defs(indent+4)


# Maintain a dictionary of Nodes so that we can find them by name.  Use
# paths relative to the root as keys- so the very first key is just '.'
nodes = {}
root_path = '/Users/77wu/desktop/docs'  # path to the docs
root_node = Node(None, '.')
nodes['.'] = root_node

# Walk the tree
for dirname, subdirs, files in os.walk(root_path):
    remove_excluded_subdirs(subdirs)
    rel_dir_path, rel_path = get_rel_paths(dirname, root_path)
    if rel_path in nodes:
        # This happens on the very first node
        dir_node = nodes[rel_path]
    else:
        assert rel_dir_path in nodes
        dir_node= Node(nodes[rel_dir_path], rel_path)
        nodes[rel_path] = dir_node

    # Add nodes for all the children of this dir
    for file in files:
        if show_this_leaf(file):
            full_path = os.path.join(dirname, file)
            ignore_this, rel_path = get_rel_paths(full_path, root_path)
            this_node = Node(dir_node, rel_path)
            nodes[rel_path] = this_node

# Calculate number of descendants for all nodes
for node in nodes:
    nodes[node].add_descendant_to_all_ancestors()

# Write out the Dot code
print("digraph {")
root_node.traverse_node_defs()
root_node.traverse_edge_defs()
print("}")


digraph {
"." [label=".", shape=ellipse, style="filled", fillcolor=lightgrey];
    "assignment_matplotlib.html" [label="assignm...", shape=rect, style="filled", fillcolor=lightgrey];
    "assignment_maps_body.md" [label="assignm...", shape=rect, style="filled", fillcolor=lightgrey];
    "idioms_for_statistics.html" [label="idioms_...", shape=rect, style="filled", fillcolor=lightgrey];
    "assignment_ipywidgets_body.md" [label="assignm...", shape=rect, style="filled", fillcolor=lightgrey];
    "assignment_ggplot.html" [label="assignm...", shape=rect, style="filled", fillcolor=lightgrey];
    "graphs_with_nodes_and_edges.html" [label="graphs_...", shape=rect, style="filled", fillcolor=lightgrey];
    "overview.md" [label="overvie...", shape=rect, style="filled", fillcolor=lightgrey];
    "git_and_github.html" [label="git_and...", shape=rect, style="filled", fillcolor=lightgrey];
    "ipywidgets_intro.html" [label="ipywidg...", shape=rect, style="filled", fillcolor=lightgrey];
    "dashb