In [None]:
import pkgutil
import importlib
import inspect
from typing import Optional, List, Dict, Set, Tuple
import ast
from pathlib import Path
from IPython.display import HTML, display

In [None]:
class CallDependencyVisitor(ast.NodeVisitor):
    """AST visitor to find dependencies between classes and functions."""
    def __init__(self):
        self.module_name = None
        self.current_function = None
        self.current_class = None
        self.call_deps = {
            'class_calls': {},    # class -> {function: module_location}
            'function_calls': {}  # function -> {function: module_location}
        }
        self.imported_names = {}  # maps imported names to their modules

    def visit_ImportFrom(self, node):
        """Track imported names and their modules."""
        for alias in node.names:
            if node.module:
                self.imported_names[alias.name] = node.module
        self.generic_visit(node)

    def visit_Import(self, node):
        """Track direct imports."""
        for alias in node.names:
            self.imported_names[alias.name.split('.')[-1]] = alias.name
        self.generic_visit(node)

    def visit_ClassDef(self, node):
        """Visit a class definition."""
        prev_class = self.current_class
        self.current_class = node.name
        if self.current_class not in self.call_deps['class_calls']:
            self.call_deps['class_calls'][self.current_class] = {}
        self.generic_visit(node)
        self.current_class = prev_class

    def visit_FunctionDef(self, node):
        """Visit a function definition."""
        prev_function = self.current_function
        self.current_function = node.name
        if not self.current_class:  # Only track top-level functions
            if self.current_function not in self.call_deps['function_calls']:
                self.call_deps['function_calls'][self.current_function] = {}
        self.generic_visit(node)
        self.current_function = prev_function

    def visit_Call(self, node):
        """Track function calls."""
        func_name = None
        module_path = self.module_name

        # Get function name and module for different types of calls
        if isinstance(node.func, ast.Name):
            func_name = node.func.id
            if func_name in self.imported_names:
                module_path = self.imported_names[func_name]
        elif isinstance(node.func, ast.Attribute):
            if isinstance(node.func.value, ast.Name):
                if node.func.value.id in self.imported_names:
                    module_path = self.imported_names[node.func.value.id]
                func_name = f"{node.func.value.id}.{node.func.attr}"
            else:
                func_name = node.func.attr

        if func_name:
            if self.current_class:
                self.call_deps['class_calls'][self.current_class][func_name] = module_path
            elif self.current_function:
                self.call_deps['function_calls'][self.current_function][func_name] = module_path

        self.generic_visit(node)

def analyze_package(package_name: str, max_depth: int = 3) -> Dict:
    """Analyze a Python package to create a hierarchical overview of its structure."""
    def get_module_info(module) -> Dict:
        """Extract relevant information from a module."""
        info = {
            'classes': {},
            'functions': [],
            'submodules': {},
            'call_dependencies': {}
        }
        
        try:
            module_file = inspect.getsourcefile(module)
            if module_file:
                with open(module_file, 'r', encoding='utf-8') as f:
                    source = f.read()
                
                tree = ast.parse(source)
                visitor = CallDependencyVisitor(module.__name__)
                visitor.visit(tree)
                info['call_dependencies'] = visitor.call_deps
        except Exception as e:
            info['call_dependencies']['error'] = str(e)
        
        for name, obj in inspect.getmembers(module):
            if name.startswith('_'):
                continue
                
            if inspect.isclass(obj) and obj.__module__ == module.__name__:
                methods = []
                for method_name, method in inspect.getmembers(obj, inspect.isfunction):
                    if not method_name.startswith('_'):
                        methods.append(method_name)
                info['classes'][name] = methods
                
            elif inspect.isfunction(obj) and obj.__module__ == module.__name__:
                info['functions'].append(name)
                
        return info

    def analyze_recursively(module_name: str, current_depth: int = 0) -> Dict:
        """Recursively analyze package structure."""
        if current_depth >= max_depth:
            return {}
            
        try:
            module = importlib.import_module(module_name)
            structure = get_module_info(module)
            
            if hasattr(module, '__path__'):
                for finder, name, ispkg in pkgutil.iter_modules(module.__path__):
                    full_name = f"{module_name}.{name}"
                    structure['submodules'][name] = analyze_recursively(full_name, current_depth + 1)
                
            return structure
            
        except (ImportError, AttributeError) as e:
            return {'error': str(e)}

    return {package_name: analyze_recursively(package_name)}

def generate_structure_html(struct: Dict) -> str:
    """Generate HTML structure from the package dictionary."""
    def _generate_html(d: Dict, level: int = 0) -> List[str]:
        indent = "    " * level
        parts = []
        
        for key, value in d.items():
            if isinstance(value, dict):
                # Package header
                parts.append(f'{indent}<li><span class="caret package">📦 {key}</span>')
                parts.append(f'{indent}<ul class="nested">')
                
                # Classes section
                if 'classes' in value and value['classes']:
                    parts.append(f'{indent}    <li><span class="caret">📘 Classes</span>')
                    parts.append(f'{indent}    <ul class="nested">')
                    
                    for class_name, methods in value['classes'].items():
                        parts.append(f'{indent}        <li><span class="caret class">🔹 {class_name}</span>')
                        parts.append(f'{indent}        <ul class="nested">')
                        
                        # Call dependencies
                        if ('call_dependencies' in value and 
                            'class_calls' in value['call_dependencies'] and 
                            class_name in value['call_dependencies']['class_calls']):
                            calls = value['call_dependencies']['class_calls'][class_name]
                            if calls:
                                parts.append(f'{indent}            <li><span class="caret">📞 Calls</span>')
                                parts.append(f'{indent}            <ul class="nested">')
                                for func, module in calls.items():
                                    parts.append(f'{indent}                <li>→ {func} '
                                              f'<span class="module">(in {module})</span></li>')
                                parts.append(f'{indent}            </ul></li>')
                        
                        # Methods
                        for method in methods:
                            parts.append(f'{indent}            <li><span class="method">⚡ {method}</span></li>')
                        
                        parts.append(f'{indent}        </ul></li>')
                    
                    parts.append(f'{indent}    </ul></li>')
                
                # Functions section
                if 'functions' in value and value['functions']:
                    parts.append(f'{indent}    <li><span class="caret">📗 Functions</span>')
                    parts.append(f'{indent}    <ul class="nested">')
                    
                    for func in value['functions']:
                        parts.append(f'{indent}        <li><span class="caret function">⚡ {func}</span>')
                        
                        # Call dependencies
                        if ('call_dependencies' in value and 
                            'function_calls' in value['call_dependencies'] and 
                            func in value['call_dependencies']['function_calls']):
                            calls = value['call_dependencies']['function_calls'][func]
                            if calls:
                                parts.append(f'{indent}        <ul class="nested">')
                                parts.append(f'{indent}            <li><span class="caret">📞 Calls</span>')
                                parts.append(f'{indent}            <ul class="nested">')
                                for called_func, module in calls.items():
                                    parts.append(f'{indent}                <li>→ {called_func} '
                                              f'<span class="module">(in {module})</span></li>')
                                parts.append(f'{indent}            </ul></li>')
                                parts.append(f'{indent}        </ul>')
                        
                        parts.append(f'{indent}        </li>')
                    
                    parts.append(f'{indent}    </ul></li>')
                
                # Submodules section
                if 'submodules' in value and value['submodules']:
                    parts.append(f'{indent}    <li><span class="caret">📂 Submodules</span>')
                    parts.append(f'{indent}    <ul class="nested">')
                    parts.extend(_generate_html(value['submodules'], level + 3))
                    parts.append(f'{indent}    </ul></li>')
                
                parts.append(f'{indent}</ul></li>')
        
        return parts
    
    return '\n'.join(_generate_html(struct))

In [15]:
def save_package_analysis(package_name: str, output_file: str = "package_analysis.html"):
    """
    Save package analysis with properly scoped JavaScript and CSS.
    """
    structure = analyze_package(package_name)
    mermaid_dag = generate_mermaid_dag(structure)
    
    html_content = f"""
<!DOCTYPE html>
<html>
<head>
    <meta charset="UTF-8">
    <title>Package Analysis: {package_name}</title>
    <script src="https://cdnjs.cloudflare.com/ajax/libs/mermaid/10.6.1/mermaid.min.js"></script>
    <style>
        .package-container {{
            display: flex;
            gap: 20px;
            padding: 20px;
            font-family: Arial, sans-serif;
        }}
        .view-panel {{
            flex: 1;
            border: 1px solid #ddd;
            padding: 20px;
            border-radius: 4px;
        }}
        .tree-list {{
            list-style: none;
            padding-left: 20px;
            margin: 0;
        }}
        .tree-item {{
            margin: 5px 0;
            padding: 2px;
        }}
        .tree-caret {{
            cursor: pointer;
            user-select: none;
            padding: 3px;
        }}
        .tree-caret::before {{
            content: "▶";
            color: black;
            display: inline-block;
            margin-right: 6px;
        }}
        .tree-caret.open::before {{
            content: "▼";
        }}
        .tree-content {{
            display: none;
            margin-left: 20px;
        }}
        .tree-content.show {{
            display: block;
        }}
        .package-name {{ color: #2A4858; font-weight: bold; }}
        .class-name {{ color: #1A5F7A; }}
        .function-name {{ color: #2E7D32; }}
        .method-name {{ color: #455A64; }}
        .calls-section {{ color: #5C6BC0; }}
        .module-info {{ color: #78909C; font-style: italic; }}
        .controls {{
            margin-bottom: 15px;
        }}
        .control-button {{
            padding: 5px 10px;
            margin-right: 10px;
            border: 1px solid #ddd;
            border-radius: 4px;
            background: white;
            cursor: pointer;
        }}
        .control-button:hover {{
            background: #f0f0f0;
        }}
    </style>
</head>
<body>
    <div class="package-container">
        <!-- Tree View -->
        <div class="view-panel">
            <h2>Hierarchical View</h2>
            <div class="controls">
                <button class="control-button" id="expandAllBtn">Expand All</button>
                <button class="control-button" id="collapseAllBtn">Collapse All</button>
            </div>
            <div class="tree-container">
                {generate_structure_html(structure)}
            </div>
        </div>
        
        <!-- DAG View -->
        <div class="view-panel">
            <h2>Dependency Graph</h2>
            <pre class="mermaid">
                {mermaid_dag}
            </pre>
        </div>
    </div>

    <script>
        // Initialize Mermaid first
        mermaid.initialize({{ 
            startOnLoad: true,
            securityLevel: 'loose',
            theme: 'default',
            flowchart: {{
                curve: 'basis',
                padding: 15
            }}
        }});

        // Wait for DOM to be fully loaded
        document.addEventListener('DOMContentLoaded', function() {{
            // Initialize tree view functionality
            function initializeTreeView() {{
                const treeItems = document.querySelectorAll('.tree-caret');
                
                treeItems.forEach(item => {{
                    item.addEventListener('click', function() {{
                        this.classList.toggle('open');
                        const content = this.nextElementSibling;
                        if (content) {{
                            content.classList.toggle('show');
                        }}
                    }});
                }});
            }}

            // Initialize expand/collapse functionality
            function initializeControls() {{
                const expandAllBtn = document.getElementById('expandAllBtn');
                const collapseAllBtn = document.getElementById('collapseAllBtn');
                
                if (expandAllBtn) {{
                    expandAllBtn.addEventListener('click', function() {{
                        document.querySelectorAll('.tree-caret').forEach(caret => {{
                            caret.classList.add('open');
                        }});
                        document.querySelectorAll('.tree-content').forEach(content => {{
                            content.classList.add('show');
                        }});
                    }});
                }}
                
                if (collapseAllBtn) {{
                    collapseAllBtn.addEventListener('click', function() {{
                        document.querySelectorAll('.tree-caret').forEach(caret => {{
                            caret.classList.remove('open');
                        }});
                        document.querySelectorAll('.tree-content').forEach(content => {{
                            content.classList.remove('show');
                        }});
                    }});
                }}
            }}

            // Initialize everything
            initializeTreeView();
            initializeControls();

            // Expand first level by default
            document.querySelectorAll('.tree-container > .tree-list > .tree-item > .tree-caret').forEach(caret => {{
                caret.classList.add('open');
                const content = caret.nextElementSibling;
                if (content) {{
                    content.classList.add('show');
                }}
            }});

            // Force Mermaid to render
            try {{
                mermaid.init(undefined, document.querySelectorAll('.mermaid'));
            }} catch (error) {{
                console.error('Mermaid initialization error:', error);
            }}
        }});
    </script>
</body>
</html>
"""

    with open(output_file, 'w', encoding='utf-8') as f:
        f.write(html_content)
    
    print(f"Package analysis saved to {output_file}")

# Update generate_structure_html to use new class names
def generate_structure_html(struct: Dict) -> str:
    def _generate_html(d: Dict, level: int = 0) -> List[str]:
        parts = []
        for key, value in d.items():
            if isinstance(value, dict):
                parts.append(f'<li class="tree-item">')
                parts.append(f'<span class="tree-caret package-name">📦 {key}</span>')
                parts.append(f'<div class="tree-content">')
                parts.append(f'<ul class="tree-list">')
                
                if 'classes' in value and value['classes']:
                    parts.append(f'<li class="tree-item">')
                    parts.append(f'<span class="tree-caret">📘 Classes</span>')
                    parts.append(f'<div class="tree-content">')
                    parts.append(f'<ul class="tree-list">')
                    
                    for class_name, methods in value['classes'].items():
                        parts.append(f'<li class="tree-item">')
                        parts.append(f'<span class="tree-caret class-name">🔹 {class_name}</span>')
                        parts.append(f'<div class="tree-content">')
                        
                        # Add dependencies
                        if ('call_dependencies' in value and 
                            'class_calls' in value['call_dependencies'] and 
                            class_name in value['call_dependencies']['class_calls']):
                            
                            calls = value['call_dependencies']['class_calls'][class_name]
                            if calls:
                                parts.append(f'<span class="tree-caret calls-section">📞 Calls</span>')
                                parts.append(f'<div class="tree-content">')
                                for func, module in calls.items():
                                    parts.append(f'<div>→ {func} <span class="module-info">(in {module})</span></div>')
                                parts.append('</div>')
                        
                        # Add methods
                        for method in methods:
                            parts.append(f'<div class="method-name">⚡ {method}</div>')
                        
                        parts.append('</div></li>')
                    
                    parts.append('</ul></div></li>')
                
                if 'functions' in value and value['functions']:
                    parts.append(f'<li class="tree-item">')
                    parts.append(f'<span class="tree-caret">📗 Functions</span>')
                    parts.append(f'<div class="tree-content">')
                    parts.append(f'<ul class="tree-list">')
                    
                    for func in value['functions']:
                        parts.append(f'<li class="tree-item">')
                        parts.append(f'<span class="tree-caret function-name">⚡ {func}</span>')
                        
                        if ('call_dependencies' in value and 
                            'function_calls' in value['call_dependencies'] and 
                            func in value['call_dependencies']['function_calls']):
                            
                            calls = value['call_dependencies']['function_calls'][func]
                            if calls:
                                parts.append(f'<div class="tree-content">')
                                parts.append(f'<span class="tree-caret calls-section">📞 Calls</span>')
                                parts.append(f'<div class="tree-content">')
                                for called_func, module in calls.items():
                                    parts.append(f'<div>→ {called_func} <span class="module-info">(in {module})</span></div>')
                                parts.append('</div></div>')
                        
                        parts.append('</li>')
                    
                    parts.append('</ul></div></li>')
                
                if 'submodules' in value and value['submodules']:
                    parts.append(f'<li class="tree-item">')
                    parts.append(f'<span class="tree-caret">📂 Submodules</span>')
                    parts.append(f'<div class="tree-content">')
                    parts.append(f'<ul class="tree-list">')
                    parts.extend(_generate_html(value['submodules']))
                    parts.append('</ul></div></li>')
                
                parts.append('</ul></div></li>')
        
        return parts
    
    return '\n'.join(['<ul class="tree-list">'] + _generate_html(struct) + ['</ul>'])

In [22]:
def analyze_and_save(package_name: str, output_dir: str = ".", display_in_notebook: bool = False):
    """
    Analyze package and provide flexible output options.
    
    Args:
        package_name: Name of the package to analyze
        output_dir: Directory to save files
        display_in_notebook: If True, also displays SVG directly in notebook
    """
    from pathlib import Path
    output_dir = Path(output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)
    
    # Analyze package
    structure = analyze_package(package_name)
    
    # Generate and save graph
    dot = generate_dependency_graph(structure, str(output_dir / "dependencies"))
    
    # Save HTML tree view
    html_file = output_dir / "package_structure.html"
    save_package_analysis(package_name, str(html_file))
    
    print(f"\nFiles saved in: {output_dir.absolute()}")
    print(f"- Tree view: package_structure.html")
    print(f"- Dependency graph: dependencies.svg")
    
    if display_in_notebook:
        try:
            from IPython.display import SVG, display, HTML
            # Display the graph in the notebook
            display(SVG(filename=str(output_dir / "dependencies.svg")))
            print("\nGraph displayed above. You can also download the files for viewing on your local machine.")
        except Exception as e:
            print(f"\nCouldn't display in notebook: {e}")
            print("Files are still saved and can be downloaded.")

# Usage examples:
"""
# For viewing in notebook:
analyze_and_save('your_package_name', display_in_notebook=True)

# For downloading to Windows:
analyze_and_save('your_package_name', output_dir='your_package_analysis')
"""

"\n# For viewing in notebook:\nanalyze_and_save('your_package_name', display_in_notebook=True)\n\n# For downloading to Windows:\nanalyze_and_save('your_package_name', output_dir='your_package_analysis')\n"

In [36]:
def generate_graph(structure: Dict) -> graphviz.Digraph:
    """Generate dependency graph with explicit debug printing."""
    dot = graphviz.Digraph('package_dependencies', 
                          comment='Package Dependencies',
                          format='svg')
    
    # Graph styling
    dot.attr(rankdir='LR')
    dot.attr('node', shape='box', style='rounded,filled', 
            fillcolor='aliceblue', fontname='Arial')
    dot.attr('edge', fontname='Arial', fontsize='10')
    
    def process_module(module_data: Dict, module_name: str):
        """Process a module's dependencies with debug output."""
        print(f"\nProcessing module: {module_name}")
        
        if 'call_dependencies' in module_data:
            deps = module_data['call_dependencies']
            
            # Process class calls
            if 'class_calls' in deps:
                print("Found class calls:")
                for class_name, calls in deps['class_calls'].items():
                    print(f"  Class: {class_name}")
                    print(f"  Calls: {calls}")
                    if calls:
                        # Add class node
                        class_id = class_name.replace('.', '_')
                        dot.node(class_id, class_name, fillcolor='lightblue')
                        
                        # Add called function nodes and edges
                        for func_name, module in calls.items():
                            func_id = f"{func_name}_{module}".replace('.', '_')
                            dot.node(func_id, f"{func_name}\n({module})", fillcolor='lightyellow')
                            dot.edge(class_id, func_id)
                            print(f"    Added edge: {class_name} -> {func_name}")
            
            # Process function calls
            if 'function_calls' in deps:
                print("Found function calls:")
                for func_name, calls in deps['function_calls'].items():
                    print(f"  Function: {func_name}")
                    print(f"  Calls: {calls}")
                    if calls:
                        # Add function node
                        func_id = func_name.replace('.', '_')
                        dot.node(func_id, func_name, fillcolor='lightgreen')
                        
                        # Add called function nodes and edges
                        for called_func, module in calls.items():
                            called_id = f"{called_func}_{module}".replace('.', '_')
                            dot.node(called_id, f"{called_func}\n({module})", fillcolor='lightyellow')
                            dot.edge(func_id, called_id)
                            print(f"    Added edge: {func_name} -> {called_func}")
        
        # Process submodules
        if 'submodules' in module_data:
            for submod_name, submod_data in module_data['submodules'].items():
                process_module(submod_data, f"{module_name}.{submod_name}")
    
    # Process the package
    for pkg_name, pkg_data in structure.items():
        process_module(pkg_data, pkg_name)
        
    # Debug: print all nodes and edges
    print("\nFinal graph contents:")
    for line in dot.body:
        print(f"  {line}")
    
    return dot

def analyze_and_save(package_name: str, output_dir: str = "."):
    """Analyze package and save visualizations with detailed debugging."""
    output_dir = Path(output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)
    
    print(f"Analyzing package: {package_name}")
    structure = analyze_package(package_name)
    
    print("\nGenerating dependency graph...")
    graph_path = output_dir / "dependencies"
    dot = generate_graph(structure)
    
    # Save files
    dot.render(str(graph_path), view=False, cleanup=True)
    html_file = output_dir / "package_structure.html"
    save_package_analysis(package_name, str(html_file))
    
    svg_path = Path(str(graph_path) + '.svg')
    print(f"\nFiles generated in: {output_dir.absolute()}")
    print(f"- Tree view (HTML): {html_file.name} ({os.path.getsize(html_file)} bytes)")
    print(f"- Dependency graph (SVG): {svg_path.name} ({os.path.getsize(svg_path)} bytes)")

# Run with detailed debugging
analyze_and_save('dhlab', 'graph_struct')

Analyzing package: dhlab

Generating dependency graph...

Processing module: dhlab

Processing module: dhlab.api

Processing module: dhlab.api.dhlab_api

Processing module: dhlab.api.nb_ngram_api

Processing module: dhlab.api.nb_search_api

Processing module: dhlab.constants

Processing module: dhlab.future

Processing module: dhlab.future.corpus_conc_coll

Processing module: dhlab.future.ngram

Processing module: dhlab.graph_networkx_louvain

Processing module: dhlab.images

Processing module: dhlab.images.nbpictures

Processing module: dhlab.legacy

Processing module: dhlab.legacy.graph_networkx_louvain

Processing module: dhlab.legacy.module_update

Processing module: dhlab.legacy.nb_external_files

Processing module: dhlab.legacy.nbpictures

Processing module: dhlab.legacy.nbtext

Processing module: dhlab.legacy.token_map

Processing module: dhlab.metadata

Processing module: dhlab.metadata.metadata

Processing module: dhlab.metadata.natbib

Processing module: dhlab.metadata.natbib

In [34]:
analyze_and_save('dhlab', output_dir='graph_struct')

Analyzing package: dhlab
Generating dependency graph...

Debug information:
Graph nodes: 3
Package analysis saved to graph_struct/package_structure.html

Files generated successfully in: /mnt/disk1/Github/DH-lab hierarchy/graph_struct
- Tree view (HTML): package_structure.html (96567 bytes)
- Dependency graph (SVG): dependencies.svg (597 bytes)

First few lines of SVG file:
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
 "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<!-- Generated by graphviz version 2.43.0 (0)
 -->
<!-- Title: package_dependencies Pages: 1 -->
<svg width="8pt" height="8pt"
 viewBox="0.00 0.00 8.00 8.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 4)">
<title>package_dependencies</title>


Color coding in the dependency graph:
- Light blue: Classes
- Light green: Functions
- Light yellow: Called functio

In [5]:
dhlabhtml = save_package_structure('dhlab', 'dhlab_structure.html')

Package structure saved to dhlab_structure.html


In [16]:
save_package_analysis('dhlab', 'dhlab_structure_mermaid.html')

Package analysis saved to dhlab_structure_mermaid.html


In [32]:
#analyze_and_save('dhlab', display_in_notebook=True)