# Directory Tree Visualization

> Generate tree visualizations for nbdev project structure

In [None]:
#| default_exp tree

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
from __future__ import annotations
from pathlib import Path
from typing import List, Optional, Dict, Any
from nbdev.config import get_config
from cjm_nbdev_overview.core import *
import re

## Basic Tree Generation

First, let's implement the basic tree generation without descriptions:

In [None]:
#| export
ALIGNMENT_BUFFER = 1

In [None]:
#| export
def strip_markdown_links(
    text: str  # TODO: Add description
) -> str:  # TODO: Add return description
    "Strip Markdown links from text, keeping only the link text"
    # Pattern matches [link text](url)
    pattern = r'\[([^\]]+)\]\([^\)]+\)'
    return re.sub(pattern, r'\1', text)

In [None]:
#| export
def generate_tree_lines(path: Path,                         # Directory to visualize
                       prefix: str = "",                    # Line prefix for tree structure
                       is_last: bool = True,                # Is this the last item in parent
                       show_notebooks_only: bool = False,   # Only show notebooks, not directories
                       max_depth: Optional[int] = None,     # Maximum depth to traverse
                       current_depth: int = 0,              # Current depth in traversal
                       exclude_index: bool = True           # Exclude index.ipynb from tree
                       ) -> List[str]:                      # Lines of tree output
    "Generate tree visualization lines for a directory"
    lines = []
    
    # Check depth limit
    if max_depth is not None and current_depth >= max_depth:
        return lines
    
    # Get items to process
    subdirs = sorted([d for d in path.iterdir() if d.is_dir()])
    notebooks = get_notebook_files(path, recursive=False)
    
    # Filter out index.ipynb if exclude_index is True
    if exclude_index:
        notebooks = [nb for nb in notebooks if nb.name not in ['index.ipynb', '00_index.ipynb']]
    
    # Combine items based on show_notebooks_only
    if show_notebooks_only:
        items = [(nb, 'file') for nb in notebooks]
    else:
        items = [(d, 'dir') for d in subdirs] + [(nb, 'file') for nb in notebooks]
    
    # Generate lines for each item
    for i, (item, item_type) in enumerate(items):
        is_last_item = (i == len(items) - 1)
        connector = "└── " if is_last_item else "├── "
        
        if item_type == 'dir':
            # Directory line
            lines.append(f"{prefix}{connector}{item.name}/")
            
            # Recursive call for subdirectory
            next_prefix = prefix + ("    " if is_last_item else "│   ")
            child_lines = generate_tree_lines(
                item, next_prefix, is_last_item, show_notebooks_only, 
                max_depth, current_depth + 1, exclude_index
            )
            lines.extend(child_lines)
        else:
            # Notebook file line
            lines.append(f"{prefix}{connector}{item.name}")
    
    return lines

In [None]:
#| export
def generate_tree(path: Path = None,                    # Directory to visualize (defaults to nbs_path)
                 show_notebooks_only: bool = False,     # Only show notebooks, not directories
                 max_depth: Optional[int] = None,       # Maximum depth to traverse
                 exclude_index: bool = True             # Exclude index.ipynb from tree
                 ) -> str:                              # Tree visualization as string
    "Generate a tree visualization for a directory"
    if path is None:
        cfg = get_config()
        path = cfg.nbs_path
    
    # Start with the root directory name
    lines = [f"{path.name}/"]
    
    # Generate tree lines
    tree_lines = generate_tree_lines(path, "", True, show_notebooks_only, max_depth, 0, exclude_index)
    lines.extend(tree_lines)
    
    return '\n'.join(lines)

## Testing Basic Tree

Let's test the basic tree generation on our project:

In [None]:
# Test basic tree generation
print(generate_tree(show_notebooks_only=True))

nbs/
├── 00_core.ipynb
├── 01_parsers.ipynb
├── 02_tree.ipynb
├── 03_api_docs.ipynb
├── 04_dependencies.ipynb
├── 05_generators.ipynb
└── 06_cli.ipynb


## Parsing Notebook Information

Now let's add functions to extract descriptions from notebooks:

In [None]:
#| export
def extract_notebook_info(path: Path                    # Path to notebook file
                         ) -> NotebookInfo:             # Notebook information
    "Extract title and description from a notebook"
    nb_info = NotebookInfo(
        path=path,
        name=path.stem
    )
    
    try:
        nb_content = read_notebook(path)
        cells = nb_content.get('cells', [])
        
        if not cells:
            return nb_info
        
        # Look for default_exp in first few code cells
        for cell in cells[:5]:
            if cell.get('cell_type') == 'code':
                source = get_cell_source(cell)
                if '#| default_exp' in source:
                    match = re.search(r'#\|\s*default_exp\s+(\S+)', source)
                    if match:
                        nb_info.export_module = match.group(1)
        
        # Extract title and description from first markdown cell
        for cell in cells:
            if cell.get('cell_type') == 'markdown':
                source = get_cell_source(cell)
                lines = source.strip().split('\n')
                
                if not lines:
                    continue
                
                # Look for H1 title
                for i, line in enumerate(lines):
                    if line.strip().startswith('# '):
                        nb_info.title = line.strip()[2:].strip()
                        
                        # Look for blockquote description after the title
                        # It could be on any subsequent line
                        for j in range(i + 1, len(lines)):
                            if lines[j].strip().startswith('> '):
                                nb_info.description = lines[j].strip()[2:].strip()
                                break
                            elif lines[j].strip() and not lines[j].strip().startswith('>'):
                                # If we hit a non-empty, non-blockquote line, stop looking
                                break
                        break
                
                # If we found a title, we're done
                if nb_info.title:
                    break
    
    except Exception:
        # If we can't read the notebook, just return basic info
        pass
    
    return nb_info

In [None]:
# Test extracting notebook info
nb_info = extract_notebook_info(Path("core.ipynb"))
print(f"Title: {nb_info.title}")
print(f"Description: {nb_info.description}")
print(f"Export module: {nb_info.export_module}")

Title: Core Utilities
Description: Core utilities and data models for nbdev project overview generation
Export module: core


## Tree Generation with Descriptions

Now let's create the enhanced tree generation that includes descriptions:

In [None]:
#| export
def generate_tree_with_descriptions(path: Path = None,              # Directory to visualize
                                   show_counts: bool = True,        # Show notebook counts for directories
                                   max_depth: Optional[int] = None, # Maximum depth to traverse
                                   exclude_index: bool = True       # Exclude index.ipynb from tree
                                   ) -> str:                        # Tree with descriptions
    "Generate tree visualization with descriptions from notebooks"
    if path is None:
        cfg = get_config()
        path = cfg.nbs_path
    
    lines = []
    
    # Check if this is a flat structure (no subdirectories)
    subdirs = get_subdirectories(path, recursive=False)
    is_flat = len(subdirs) == 0
    
    if is_flat:
        # Flat structure - show notebooks with descriptions
        lines.append(f"{path.name}/")
        notebooks = get_notebook_files(path, recursive=False)
        
        # Filter out index.ipynb if exclude_index is True
        if exclude_index:
            notebooks = [nb for nb in notebooks if nb.name not in ['index.ipynb', '00_index.ipynb']]
        
        # First pass: collect all lines to calculate the maximum length
        temp_lines = []
        descriptions = []
        max_length = 0
        
        for i, nb_path in enumerate(notebooks):
            is_last = (i == len(notebooks) - 1)
            connector = "└── " if is_last else "├── "
            
            # Extract notebook info
            nb_info = extract_notebook_info(nb_path)
            
            # Create the base line without description
            base_line = f"{connector}{nb_path.name}"
            temp_lines.append(base_line)
            
            # Track max length for alignment
            max_length = max(max_length, len(base_line))
            
            # Store description for later use
            if nb_info.description:
                # Strip Markdown links from description
                clean_description = strip_markdown_links(nb_info.description)
                descriptions.append(clean_description)
            else:
                descriptions.append(None)
        
        # Second pass: create properly aligned lines
        # Add a small buffer between the longest filename and comments
        alignment_offset = max_length + ALIGNMENT_BUFFER
        
        for base_line, description in zip(temp_lines, descriptions):
            if description:
                line = base_line.ljust(alignment_offset) + f"# {description}"
            else:
                line = base_line
            
            lines.append(line)
    else:
        # Nested structure - show directories with counts and descriptions
        lines.append(f"{path.name}/")
        lines.extend(_generate_nested_tree_lines(path, "", show_counts, max_depth, 0, exclude_index))
    
    return '\n'.join(lines)

In [None]:
#| export
def _generate_nested_tree_lines(path: Path,                         # Directory to process
                               prefix: str = "",                    # Line prefix
                               show_counts: bool = True,            # Show notebook counts
                               max_depth: Optional[int] = None,     # Maximum depth
                               current_depth: int = 0,              # Current depth
                               exclude_index: bool = True           # Exclude index.ipynb from tree
                               ) -> List[str]:                      # Tree lines
    "Generate tree lines for nested directory structure"
    lines = []
    
    # Check depth limit
    if max_depth is not None and current_depth >= max_depth:
        return lines
    
    # Get subdirectories and notebooks
    subdirs = get_subdirectories(path, recursive=False)
    notebooks = get_notebook_files(path, recursive=False)
    
    # Filter out index.ipynb if exclude_index is True
    if exclude_index:
        notebooks = [nb for nb in notebooks if nb.name not in ['index.ipynb', '00_index.ipynb']]
    
    # First pass: collect all items and calculate max length
    all_items = []
    max_length = 0
    
    # Add subdirectories
    for subdir in subdirs:
        all_nb_files = get_notebook_files(subdir, recursive=True)
        # Filter out index.ipynb from counts if exclude_index is True
        if exclude_index:
            all_nb_files = [nb for nb in all_nb_files if nb.name not in ['index.ipynb', '00_index.ipynb']]
        
        notebook_count = len(all_nb_files)
        
        if show_counts and notebook_count > 0:
            all_items.append((subdir, 'dir', f"({notebook_count})"))
        else:
            all_items.append((subdir, 'dir', ""))
    
    # Add notebooks in current directory
    for nb_path in notebooks:
        nb_info = extract_notebook_info(nb_path)
        if nb_info.description:
            # Strip Markdown links from description
            clean_description = strip_markdown_links(nb_info.description)
            all_items.append((nb_path, 'file', f"# {clean_description}"))
        else:
            all_items.append((nb_path, 'file', ""))
    
    # Calculate max length for notebook files (for alignment)
    for i, (item, item_type, annotation) in enumerate(all_items):
        is_last = (i == len(all_items) - 1)
        connector = "└── " if is_last else "├── "
        
        if item_type == 'file':
            base_line = f"{prefix}{connector}{item.name}"
            max_length = max(max_length, len(base_line))
    
    # Add a small buffer for alignment if we have any notebooks
    alignment_offset = max_length + ALIGNMENT_BUFFER if max_length > 0 else 0
    
    # Generate lines for each item
    for i, (item, item_type, annotation) in enumerate(all_items):
        is_last = (i == len(all_items) - 1)
        connector = "└── " if is_last else "├── "
        
        if item_type == 'dir':
            # Directory with count
            dir_line = f"{prefix}{connector}{item.name}/"
            if annotation:
                dir_line += f" {annotation}"
            lines.append(dir_line)
            
            # Recurse into subdirectory
            next_prefix = prefix + ("    " if is_last else "│   ")
            child_lines = _generate_nested_tree_lines(
                item, next_prefix, show_counts, max_depth, 
                current_depth + 1, exclude_index
            )
            lines.extend(child_lines)
        else:
            # Notebook file
            nb_line = f"{prefix}{connector}{item.name}"
            if annotation and alignment_offset > 0:
                # Pad the filename to align descriptions
                nb_line = nb_line.ljust(alignment_offset) + f" {annotation}"
            lines.append(nb_line)
    
    return lines

## Testing Tree with Descriptions

Let's test the enhanced tree on our project:

### Testing Markdown Link Stripping

Let's test that Markdown links are properly stripped from descriptions:

In [None]:
# Test the strip_markdown_links function
test_cases = [
    "Buttons allow the user to take actions or make choices. [daisyUI docs](https://daisyui.com/components/button/)",
    "A simple description with no links",
    "Multiple [link one](http://example.com) and [link two](http://example.com/2) in text",
    "[Link at start](http://example.com) of description"
]

print("Testing strip_markdown_links function:")
for test in test_cases:
    result = strip_markdown_links(test)
    print(f"\nOriginal: {test}")
    print(f"Stripped: {result}")

Testing strip_markdown_links function:

Original: Buttons allow the user to take actions or make choices. [daisyUI docs](https://daisyui.com/components/button/)
Stripped: Buttons allow the user to take actions or make choices. daisyUI docs

Original: A simple description with no links
Stripped: A simple description with no links

Original: Multiple [link one](http://example.com) and [link two](http://example.com/2) in text
Stripped: Multiple link one and link two in text

Original: [Link at start](http://example.com) of description
Stripped: Link at start of description


In [None]:
# Test tree with descriptions
print(generate_tree_with_descriptions())

nbs/
├── 00_core.ipynb         # Core utilities and data models for nbdev project overview generation
├── 01_parsers.ipynb      # Parse notebook metadata, content, and extract function/class signatures with docments
├── 02_tree.ipynb         # Generate tree visualizations for nbdev project structure
├── 03_api_docs.ipynb     # Generate module overviews with formatted signatures for nbdev projects
├── 04_dependencies.ipynb # Analyze cross-notebook imports and generate Mermaid.js dependency diagrams
├── 05_generators.ipynb   # Auto-generate folder_name.ipynb notebooks for nbdev project organization
└── 06_cli.ipynb          # CLI commands for nbdev project overview generation and analysis


## Subdirectory Tree Visualization

Let's also add a function to visualize a specific subdirectory with its notebooks:

In [None]:
#| export
def generate_subdirectory_tree(subdir_path: Path,               # Path to subdirectory
                              show_descriptions: bool = True    # Include notebook descriptions
                              ) -> str:                         # Tree visualization
    "Generate tree visualization for a specific subdirectory showing all notebooks"
    lines = [f"{subdir_path.name}/"]
    
    # Process all items (subdirs and notebooks) in order
    items = []
    
    # Get subdirectories
    subdirs = get_subdirectories(subdir_path, recursive=False)
    for subdir in subdirs:
        items.append((subdir, True))
    
    # Get notebooks
    notebooks = get_notebook_files(subdir_path, recursive=False)
    for notebook in notebooks:
        items.append((notebook, False))
    
    # Sort by name
    items.sort(key=lambda x: x[0].name.lower())
    
    # Calculate max length for alignment if showing descriptions
    max_length = 0
    if show_descriptions:
        for i, (item, is_dir) in enumerate(items):
            if not is_dir:  # Only calculate for notebooks
                is_last = (i == len(items) - 1)
                connector = "└── " if is_last else "├── "
                base_line = f"{connector}{item.name}"
                max_length = max(max_length, len(base_line))
        # Add buffer
        max_length += 4 if max_length > 0 else 0
    
    # Generate tree lines
    for i, (item, is_dir) in enumerate(items):
        is_last = (i == len(items) - 1)
        lines.extend(_generate_subdirectory_lines(
            item, "", is_last, is_dir, show_descriptions, 0, max_length
        ))
    
    return '\n'.join(lines)

In [None]:
#| export
def _generate_subdirectory_lines(item: Path,                    # Item to process
                                prefix: str,                    # Line prefix
                                is_last: bool,                  # Is last item
                                is_dir: bool,                   # Is directory
                                show_descriptions: bool,        # Show descriptions
                                depth: int,                     # Current depth
                                max_length: int = 0             # Max length for alignment (calculated externally)
                                ) -> List[str]:                 # Tree lines
    "Generate tree lines for subdirectory visualization"
    lines = []
    
    connector = "└── " if is_last else "├── "
    extension = "    " if is_last else "│   "
    
    if is_dir:
        # Directory entry
        line = f"{prefix}{connector}{item.name}/"
        lines.append(line)
        
        # Process subdirectory contents
        sub_items = []
        
        # Get nested subdirectories
        subdirs = get_subdirectories(item, recursive=False)
        for subdir in subdirs:
            sub_items.append((subdir, True))
        
        # Get notebooks in subdirectory
        notebooks = get_notebook_files(item, recursive=False)
        for notebook in notebooks:
            sub_items.append((notebook, False))
        
        # Sort by name
        sub_items.sort(key=lambda x: x[0].name.lower())
        
        # Calculate max length for this subdirectory if not provided
        local_max_length = max_length
        if local_max_length == 0 and show_descriptions:
            for sub_item, sub_is_dir in sub_items:
                if not sub_is_dir:  # Only calculate for notebooks
                    sub_connector = "└── " if sub_item == sub_items[-1][0] else "├── "
                    base_line = f"{prefix}{extension}{sub_connector}{sub_item.name}"
                    local_max_length = max(local_max_length, len(base_line))
            # Add buffer
            local_max_length += 4 if local_max_length > 0 else 0
        
        # Generate lines for sub-items
        for j, (sub_item, sub_is_dir) in enumerate(sub_items):
            sub_is_last = (j == len(sub_items) - 1)
            lines.extend(_generate_subdirectory_lines(
                sub_item, prefix + extension, sub_is_last, 
                sub_is_dir, show_descriptions, depth + 1, local_max_length
            ))
    else:
        # Notebook entry
        base_line = f"{prefix}{connector}{item.name}"
        
        if show_descriptions:
            nb_info = extract_notebook_info(item)
            if nb_info.description and max_length > 0:
                # Strip Markdown links from description
                clean_description = strip_markdown_links(nb_info.description)
                line = base_line.ljust(max_length) + f"# {clean_description}"
            else:
                line = base_line
        else:
            line = base_line
        
        lines.append(line)
    
    return lines

## Summary Statistics

Let's add a function to generate summary statistics:

In [None]:
#| export
def get_tree_summary(path: Path = None              # Directory to analyze
                    ) -> str:                       # Summary string
    "Get summary statistics for notebooks in directory tree"
    if path is None:
        cfg = get_config()
        path = cfg.nbs_path
    
    # Count notebooks
    total_notebooks = len([nb for nb in get_notebook_files(path, recursive=True) if nb.name != 'index.ipynb'])
    
    # Count directories
    subdirs = get_subdirectories(path, recursive=False)
    total_dirs = len(subdirs)
    
    if total_dirs == 0:
        return f"Total: {total_notebooks} notebook{'s' if total_notebooks != 1 else ''}"
    else:
        return f"Total: {total_notebooks} notebook{'s' if total_notebooks != 1 else ''} across {total_dirs} director{'ies' if total_dirs != 1 else 'y'}"

In [None]:
# Test summary
print(get_tree_summary())

Total: 7 notebooks


In [None]:
#| hide
import nbdev; nbdev.nbdev_export()