diff --git a/docs/images/blast-radius.png b/docs/images/blast-radius.png new file mode 100644 index 000000000..31535e212 Binary files /dev/null and b/docs/images/blast-radius.png differ diff --git a/docs/mint.json b/docs/mint.json index 3546fbd79..1f9fd103b 100644 --- a/docs/mint.json +++ b/docs/mint.json @@ -73,6 +73,7 @@ "pages": [ "tutorials/at-a-glance", "tutorials/migrating-apis", + "tutorials/codebase-visualization", "tutorials/organize-your-codebase", "tutorials/modularity", "tutorials/deleting-dead-code", diff --git a/docs/tutorials/codebase-visualization.mdx b/docs/tutorials/codebase-visualization.mdx new file mode 100644 index 000000000..93c0e5003 --- /dev/null +++ b/docs/tutorials/codebase-visualization.mdx @@ -0,0 +1,383 @@ +--- +title: "Codebase Visualization" +sidebarTitle: "Codebase Visualization" +description: "This guide will show you how to create codebase visualizations using [codegen](/introduction/overview)." +icon: "flashlight" +iconType: "solid" +--- + + + + + +## Overview + +To demonstrate the visualization capabilities of the codegen we will generate three different visualizations of PostHog's open source [repository](https://github.com/PostHog/posthog). + - [Call Trace Visualization](#call-trace-visualization) + - [Function Dependency Graph](#function-dependency-graph) + - [Blast Radius Visualization](#blast-radius-visualization) + + +## Call Trace Visualization + +Visualizing the call trace of a function is a great way to understand the flow of a function and for debugging. In this tutorial we will create a call trace visualization of the `patch` method of the `SharingConfigurationViewSet` class. View the source code [here](https://github.com/PostHog/posthog/blob/c2986d9ac7502aa107a4afbe31b3633848be6582/posthog/api/sharing.py#L163). + + +### Basic Setup +First, we'll set up our codebase, graph and configure some basic parameters: + +```python +import networkx as nx +from codegen import Codebase + +# Initialize codebase +codebase = Codebase("path/to/posthog/") + +# Create a directed graph for representing call relationships +G = nx.DiGraph() + +# Configuration flags +IGNORE_EXTERNAL_MODULE_CALLS = True # Skip calls to external modules +IGNORE_CLASS_CALLS = False # Include class definition calls +MAX_DEPTH = 10 + +COLOR_PALETTE = { + "StartFunction": "#9cdcfe", # Light blue - Start Function + "PyFunction": "#a277ff", # Soft purple/periwinkle - PyFunction + "PyClass": "#ffca85", # Warm peach/orange - PyClass + "ExternalModule": "#f694ff" # Bright magenta/pink - ExternalModule +} +``` + +### Building the Visualization +We'll create a function that will recursively traverse the call trace of a function and add nodes and edges to the graph: + +```python +def create_downstream_call_trace(src_func: Function, depth: int = 0): + """Creates call graph by recursively traversing function calls + + Args: + src_func (Function): Starting function for call graph + depth (int): Current recursion depth + """ + # Prevent infinite recursion + if MAX_DEPTH <= depth: + return + + # External modules are not functions + if isinstance(src_func, ExternalModule): + return + + # Process each function call + for call in src_func.function_calls: + # Skip self-recursive calls + if call.name == src_func.name: + continue + + # Get called function definition + func = call.function_definition + if not func: + continue + + # Apply configured filters + if isinstance(func, ExternalModule) and IGNORE_EXTERNAL_MODULE_CALLS: + continue + if isinstance(func, Class) and IGNORE_CLASS_CALLS: + continue + + # Generate display name (include class for methods) + if isinstance(func, Class) or isinstance(func, ExternalModule): + func_name = func.name + elif isinstance(func, Function): + func_name = f"{func.parent_class.name}.{func.name}" if func.is_method else func.name + + # Add node and edge with metadata + G.add_node(func, name=func_name, + color=COLOR_PALETTE.get(func.__class__.__name__)) + G.add_edge(src_func, func, **generate_edge_meta(call)) + + # Recurse for regular functions + if isinstance(func, Function): + create_downstream_call_trace(func, depth + 1) +``` + +### Adding Edge Metadata +We can enrich our edges with metadata about the function calls: + +```python +def generate_edge_meta(call: FunctionCall) -> dict: + """Generate metadata for call graph edges + + Args: + call (FunctionCall): Function call information + + Returns: + dict: Edge metadata including name and location + """ + return { + "name": call.name, + "file_path": call.filepath, + "start_point": call.start_point, + "end_point": call.end_point, + "symbol_name": "FunctionCall" + } +``` +### Visualizing the Graph +Finally, we can visualize our call graph starting from a specific function: +```python +# Get target function to analyze +target_class = codebase.get_class('SharingConfigurationViewSet') +target_method = target_class.get_method('patch') + +# Add root node +G.add_node(target_method, + name=f"{target_class.name}.{target_method.name}", + color=COLOR_PALETTE["StartFunction"]) + +# Build the call graph +create_downstream_call_trace(target_method) + +# Render the visualization +codebase.visualize(G) +``` + +### Common Use Cases +The call graph visualization is particularly useful for: + - Understanding complex codebases + - Planning refactoring efforts + - Identifying tightly coupled components + - Analyzing critical paths + - Documenting system architecture + +### Take a look + + +## Function Dependency Graph + +Understanding symbol dependencies is crucial for maintaining and refactoring code. This tutorial will show you how to create visual dependency graphs using Codegen and NetworkX. We will be creating a dependency graph of the `get_query_runner` function. View the source code [here](https://github.com/PostHog/posthog/blob/c2986d9ac7502aa107a4afbe31b3633848be6582/posthog/hogql_queries/query_runner.py#L152). + +### Basic Setup + +We'll use the same basic setup as the [Call Trace Visualization](/tutorials/codebase-visualization#call-trace-visualization) tutorial. + + +### Building the Dependency Graph +The core function for building our dependency graph: +```python +def create_dependencies_visualization(symbol: Symbol, depth: int = 0): + """Creates visualization of symbol dependencies + + Args: + symbol (Symbol): Starting symbol to analyze + depth (int): Current recursion depth + """ + # Prevent excessive recursion + if depth >= MAX_DEPTH: + return + + # Process each dependency + for dep in symbol.dependencies: + dep_symbol = None + + # Handle different dependency types + if isinstance(dep, Symbol): + # Direct symbol reference + dep_symbol = dep + elif isinstance(dep, Import): + # Import statement - get resolved symbol + dep_symbol = dep.resolved_symbol if dep.resolved_symbol else None + + if dep_symbol: + # Add node with appropriate styling + G.add_node(dep_symbol, + color=COLOR_PALETTE.get(dep_symbol.__class__.__name__, + "#f694ff")) + + # Add dependency relationship + G.add_edge(symbol, dep_symbol) + + # Recurse unless it's a class (avoid complexity) + if not isinstance(dep_symbol, PyClass): + create_dependencies_visualization(dep_symbol, depth + 1) +``` + +### Visualizing the Graph +Finally, we can visualize our dependency graph starting from a specific symbol: +```python +# Get target symbol +target_func = codebase.get_function("get_query_runner") + +# Add root node +G.add_node(target_func, color=COLOR_PALETTE["StartFunction"]) + +# Generate dependency graph +create_dependencies_visualization(target_func) + +# Render visualization +codebase.visualize(G) +``` + +### Take a look + + +## Blast Radius visualization + +Understanding the impact of code changes is crucial for safe refactoring. A blast radius visualization shows how changes to one function might affect other parts of the codebase by tracing usage relationships. In this tutorial we will create a blast radius visualization of the `export_asset` function. View the source code [here](https://github.com/PostHog/posthog/blob/c2986d9ac7502aa107a4afbe31b3633848be6582/posthog/tasks/exporter.py#L57). + +### Basic Setup + +We'll use the same basic setup as the [Call Trace Visualization](/tutorials/codebase-visualization#call-trace-visualization) tutorial. + + +### Helper Functions +We'll create some utility functions to help build our visualization: +```python +# List of HTTP methods to highlight +HTTP_METHODS = ["get", "put", "patch", "post", "head", "delete"] + +def generate_edge_meta(usage: Usage) -> dict: + """Generate metadata for graph edges + + Args: + usage (Usage): Usage relationship information + + Returns: + dict: Edge metadata including name and location + """ + return { + "name": usage.match.source, + "file_path": usage.match.filepath, + "start_point": usage.match.start_point, + "end_point": usage.match.end_point, + "symbol_name": usage.match.__class__.__name__ + } + +def is_http_method(symbol: PySymbol) -> bool: + """Check if a symbol is an HTTP endpoint method + + Args: + symbol (PySymbol): Symbol to check + + Returns: + bool: True if symbol is an HTTP method + """ + if isinstance(symbol, PyFunction) and symbol.is_method: + return symbol.name in HTTP_METHODS + return False +``` + +### Building the Blast Radius Visualization +The main function for creating our blast radius visualization: +```python +def create_blast_radius_visualization(symbol: PySymbol, depth: int = 0): + """Create visualization of symbol usage relationships + + Args: + symbol (PySymbol): Starting symbol to analyze + depth (int): Current recursion depth + """ + # Prevent excessive recursion + if depth >= MAX_DEPTH: + return + + # Process each usage of the symbol + for usage in symbol.usages: + usage_symbol = usage.usage_symbol + + # Determine node color based on type + if is_http_method(usage_symbol): + color = COLOR_PALETTE.get("HTTP_METHOD") + else: + color = COLOR_PALETTE.get(usage_symbol.__class__.__name__, "#f694ff") + + # Add node and edge to graph + G.add_node(usage_symbol, color=color) + G.add_edge(symbol, usage_symbol, **generate_edge_meta(usage)) + + # Recursively process usage symbol + create_blast_radius_visualization(usage_symbol, depth + 1) +``` + +### Visualizing the Graph +Finally, we can create our blast radius visualization: +```python +# Get target function to analyze +target_func = codebase.get_function('export_asset') + +# Add root node +G.add_node(target_func, color=COLOR_PALETTE.get("StartFunction")) + +# Build the visualization +create_blast_radius_visualization(target_func) + +# Render reversed graph to show impact flow +codebase.visualize(G.reverse()) +``` + +### Take a look + + +## What's Next? + + + + Learn how to use Codegen to create modular codebases. + + + Learn how to use Codegen to delete dead code. + + + Learn how to use Codegen to increase type coverage. + + + Explore the complete API documentation for all Codegen classes and methods. + + \ No newline at end of file