From c95d5b36f4a5ff19ffbf8341a2538ec0a720793e Mon Sep 17 00:00:00 2001 From: Avikalp Kumar Gupta Date: Fri, 6 Jun 2025 16:43:07 -0400 Subject: [PATCH 1/7] Implement exponential backoff retry logic for OpenAI API calls in CodeAnalysisAgent - Added a decorator to handle rate limit errors with exponential backoff and jitter. - Refactored agent analysis execution to use the new retry logic. - Enhanced dependency matching for components to allow more flexible name comparisons. - Added functionality to track dependents in component relationships. --- diffgraph/ai_analysis.py | 54 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 50 insertions(+), 4 deletions(-) diff --git a/diffgraph/ai_analysis.py b/diffgraph/ai_analysis.py index 44bbb48..3d028d0 100644 --- a/diffgraph/ai_analysis.py +++ b/diffgraph/ai_analysis.py @@ -3,6 +3,9 @@ import os from pydantic import BaseModel from .graph_manager import GraphManager, FileStatus, ChangeType, ComponentNode +import time +import random +import openai class FileChange(BaseModel): """Model representing a file change.""" @@ -15,6 +18,28 @@ class DiffAnalysis(BaseModel): summary: str mermaid_diagram: str +def exponential_backoff_retry(func): + """Decorator to implement exponential backoff retry logic.""" + def wrapper(*args, **kwargs): + max_retries = 5 + base_delay = 1 # Start with 1 second + max_delay = 60 # Maximum delay of 60 seconds + + for attempt in range(max_retries): + try: + return func(*args, **kwargs) + except openai.RateLimitError as e: + if attempt == max_retries - 1: # Last attempt + raise # Re-raise the exception if all retries failed + + # Calculate delay with exponential backoff and jitter + delay = min(base_delay * (2 ** attempt) + random.uniform(0, 1), max_delay) + print(f"Rate limit hit. Retrying in {delay:.2f} seconds...") + time.sleep(delay) + except Exception as e: + raise # Re-raise other exceptions immediately + return wrapper + class CodeAnalysisAgent: """Agent for analyzing code changes using OpenAI's Agents SDK.""" @@ -66,6 +91,12 @@ def _determine_change_type(self, status: str) -> ChangeType: else: return ChangeType.MODIFIED + @exponential_backoff_retry + def _run_agent_analysis(self, prompt: str) -> str: + """Run the agent analysis with retry logic.""" + result = Runner.run_sync(self.agent, prompt) + return result.final_output + def analyze_changes(self, files_with_content: List[Dict[str, str]]) -> DiffAnalysis: """ Analyze code changes using the OpenAI agent, processing files incrementally. @@ -114,9 +145,8 @@ def analyze_changes(self, files_with_content: List[Dict[str, str]]) -> DiffAnaly for comp in processed_components: prompt += f"- {comp.name}: {comp.summary}\n" - # Run the agent - result = Runner.run_sync(self.agent, prompt) - response_text = result.final_output + # Run the agent with retry logic + response_text = self._run_agent_analysis(prompt) # Parse the response summary = "" @@ -166,12 +196,28 @@ def analyze_changes(self, files_with_content: List[Dict[str, str]]) -> DiffAnaly for dep in comp.get("dependencies", []): # Try to find the dependency in other components for other_comp in self.graph_manager.component_nodes.values(): - if other_comp.name == dep: + # More flexible matching - check if the dependency name is contained in the component name + # or if the component name is contained in the dependency name + if (dep.lower() in other_comp.name.lower() or + other_comp.name.lower() in dep.lower()): self.graph_manager.add_component_dependency( f"{current_file}::{comp['name']}", f"{other_comp.file_path}::{other_comp.name}" ) + # Add dependents + for dep in comp.get("dependents", []): + # Try to find the dependent in other components + for other_comp in self.graph_manager.component_nodes.values(): + # More flexible matching - check if the dependent name is contained in the component name + # or if the component name is contained in the dependent name + if (dep.lower() in other_comp.name.lower() or + other_comp.name.lower() in dep.lower()): + self.graph_manager.add_component_dependency( + f"{other_comp.file_path}::{other_comp.name}", + f"{current_file}::{comp['name']}" + ) + # Mark file as processed self.graph_manager.mark_processed(current_file, summary, components) From f9dac4d9ca3ee2e4002564072e715dbe46347b9a Mon Sep 17 00:00:00 2001 From: Avikalp Kumar Gupta Date: Sun, 8 Jun 2025 15:15:35 -0400 Subject: [PATCH 2/7] - add roadmap to properly implement agentic flow for diffgraph creation - all roadmap documents in the docs folder. --- docs/Roadmap-v0-demo.md | 202 ++++++++++++++++++++ Roadmap.md => docs/Roadmap-v0-initialize.md | 0 2 files changed, 202 insertions(+) create mode 100644 docs/Roadmap-v0-demo.md rename Roadmap.md => docs/Roadmap-v0-initialize.md (100%) diff --git a/docs/Roadmap-v0-demo.md b/docs/Roadmap-v0-demo.md new file mode 100644 index 0000000..23ae2bf --- /dev/null +++ b/docs/Roadmap-v0-demo.md @@ -0,0 +1,202 @@ +# πŸ“… Roadmap: Modular Multi-Agent Architecture for diffgraph-ai + +This roadmap introduces a modular re-architecture of `diffgraph-ai`, aimed at improving the accuracy, extensibility, and quality of visual diff representations. The goal is to break up the existing monolithic agent prompt into smaller, specialized AI agents using the OpenAI Agents SDK. + +Each step is: + +* βœ… Independently testable +* πŸ” Minimal, logical step in the pipeline +* πŸ“— Documented with clarity for even junior developers or AI assistants to follow + +--- + +## βœ… Step 1: Introduce Component-Level Visualization + +### 🌟 Objective: + +Update the `GraphManager` and Mermaid generation to render function- and class-level nodes instead of only file-level nodes. + +### πŸ”Ή Definition of Done: + +The output HTML graph should show component nodes (e.g., `validateUser()`, `AuthService`) with their change type (added/modified/deleted). + +### ⚑ Implementation Plan: + +* Update `GraphManager.add_component` to always record components +* Modify `get_mermaid_diagram()` to include and render component-level nodes with styles +* Use stub data if needed (before real extraction logic works) + +### πŸ”§ Implementation Details: + +* Use unique node IDs: `file_path::component_name` +* Use the `ChangeType` color scheme already defined +* Show 1–2 lines of the component summary (if available) +* Link dependencies using edges (even hardcoded for now) + +### πŸ“Œ Status: `Not Started` + +--- + +## βœ… Step 2: Split Agent Prompts into Two Specialized Roles + +### 🌟 Objective: + +Replace the monolithic analysis prompt with two focused prompts: + +1. **Component Extractor** +2. **Dependency Mapper** + +### πŸ”Ή Definition of Done: + +You should be able to call each of these prompts independently and get JSON output conforming to a shared schema. + +### ⚑ Implementation Plan: + +* Create prompt templates (can live as Python strings or YAML files) +* Use `openai.ChatCompletion.create` to run each prompt manually first +* Define a shared `Component` schema with: + + * `name`, `type`, `summary`, `dependencies`, `dependents`, `file_path`, `change_type` + +### πŸ”§ Implementation Details: + +* Component Extractor receives: file path + content + change type +* Dependency Mapper receives: file path + list of components + code snippets +* Each tool returns JSON with a list of `Component` objects +* Validate that outputs can round-trip through `pydantic.BaseModel` + +### πŸ“Œ Status: `Not Started` + +--- + +## βœ… Step 3: Add Chunking System for Large Files + +### 🌟 Objective: + +Break large files into chunks so component extraction doesn’t fail due to context limits. + +### πŸ”Ή Definition of Done: + +The system should: + +* Automatically detect long files (>1000 lines or >10KB) +* Split by top-level class or function boundaries +* Pass each chunk individually to the Extractor agent + +### ⚑ Implementation Plan: + +* Use `tree-sitter` or regex to identify safe split points +* Assign each chunk a metadata block: + + * `chunk_id`, `start_line`, `end_line`, `file_path`, `text` +* Feed each chunk separately into agent and recombine component outputs + +### πŸ”§ Implementation Details: + +* Store `ChunkResult` structs with raw outputs + parsed `Component` list +* Keep a map of chunk β†’ original file for traceability +* If any chunk fails, include fallback: "Chunk X could not be parsed." + +### πŸ“Œ Status: `Not Started` + +--- + +## βœ… Step 4: Set Up OpenAI Agents SDK Planner and Tools + +### 🌟 Objective: + +Create an orchestrator agent using the OpenAI Agents SDK that delegates to tools: + +* `extract_components` +* `map_dependencies` +* `visualize_graph` + +### πŸ”Ή Definition of Done: + +A single `Runner.run()` call should: + +* Take the file list +* Call tools in correct order +* Generate a final graph and return it + +### ⚑ Implementation Plan: + +* Define 3 `Tool` subclasses +* Define their input/output JSON schemas +* Register these tools inside a `Planner` agent + +### πŸ”§ Implementation Details: + +* Use [`openai_agents.Tool`](https://openai.github.io/openai-agents-python/agents/#tools) class +* Each tool’s `call()` should be fully stateless and log input/output +* Store intermediate results in a `Workspace` or memory dict +* Use the built-in `Planner` or implement a `ToolPicker` + +### πŸ“Œ Status: `Not Started` + +--- + +## βœ… Step 5: Refactor Agent-Calling Logic in `ai_analysis.py` + +### 🌟 Objective: + +Replace direct prompt + parsing logic with calls to the OpenAI Agents SDK planner + tools. + +### πŸ”Ή Definition of Done: + +`CodeAnalysisAgent.analyze_changes()` should: + +* Initialize planner +* Submit file info + diffs to planner +* Collect final graph (or summary) + +### ⚑ Implementation Plan: + +* Replace `_run_agent_analysis` with SDK planner call +* Move all hardcoded prompt strings into reusable tools +* Add detailed logging so developers can trace step-by-step agent execution + +### πŸ”§ Implementation Details: + +* Prepare `ToolContext` or `ToolMemory` between agent steps if needed +* Use JSON logs for prompt/output snapshots to aid debugging +* Ensure fallback mode (manual single-agent) works with `--legacy` flag + +### πŸ“Œ Status: `Not Started` + +--- + +## βœ… Step 6: Plan for Future Multi-LLM Abstraction + +### 🌟 Objective: + +Lay the foundation for future support of Anthropic, Perplexity, Google Gemini, and OSS models. + +### πŸ”Ή Definition of Done: + +Abstract out all LLM calls behind an `LLMProvider` interface. + +### ⚑ Implementation Plan: + +* Create `providers/` module +* Start with `OpenAIProvider` (wraps OpenAI SDK + Agents SDK) +* Define common interface: `.chat()`, `.tool_call()`, `.get_capabilities()` + +### πŸ”§ Implementation Details: + +* Use Python `ABC` or `Protocol` +* Later: add `AnthropicProvider`, `OllamaProvider`, etc. +* Allow tool definitions to work with different backends based on config or CLI flag + +### πŸ“Œ Status: `Not Started` + +--- + +## 🏁 Final Deliverables Checklist + +* [ ] Component-level Mermaid output +* [ ] Modular tool + planner agent pipeline +* [ ] Chunked file handling with metadata +* [ ] JSON-based schemas and round-trippable outputs +* [ ] SDK-based execution with full logging +* [ ] Abstracted backend for future multi-LLM provider support diff --git a/Roadmap.md b/docs/Roadmap-v0-initialize.md similarity index 100% rename from Roadmap.md rename to docs/Roadmap-v0-initialize.md From b2450a4c65b9d57b43a3934ec7212cb52c1e6895 Mon Sep 17 00:00:00 2001 From: Avikalp Kumar Gupta Date: Sun, 8 Jun 2025 15:20:15 -0400 Subject: [PATCH 3/7] Refactor Mermaid diagram generation in GraphManager - Updated file and component node styling to use lighter and darker shades respectively for better visual distinction. - Removed redundant color mapping and replaced it with direct class definitions for file and component changes in the Mermaid diagram. - In HTML report, Mermaid diagram is shown before change summary now. --- diffgraph/graph_manager.py | 38 +++++++++++++++----------------------- diffgraph/html_report.py | 8 ++++---- 2 files changed, 19 insertions(+), 27 deletions(-) diff --git a/diffgraph/graph_manager.py b/diffgraph/graph_manager.py index f1628e7..bb9022f 100644 --- a/diffgraph/graph_manager.py +++ b/diffgraph/graph_manager.py @@ -144,46 +144,38 @@ def get_mermaid_diagram(self) -> str: """Generate a Mermaid diagram representation of the graph.""" mermaid = ["graph TD"] - # Add file nodes with their change type colors + # Add file nodes with their change type colors (lighter shades) for file_path, node in self.file_nodes.items(): - color = { - ChangeType.ADDED: "green", - ChangeType.DELETED: "red", - ChangeType.MODIFIED: "orange", - ChangeType.UNCHANGED: "gray" - }[node.change_type] - label = f"{file_path}" if node.summary: label += f"
{node.summary[:50]}..." if node.error: label += f"
(Error: {node.error})" - mermaid.append(f' {file_path.replace("/", "_")}["{label}"]:::change_{node.change_type.value}') + mermaid.append(f' {file_path.replace("/", "_")}["{label}"]:::file_{node.change_type.value}') - # Add component nodes + # Add component nodes (darker shades) for component_id, node in self.component_nodes.items(): - color = { - ChangeType.ADDED: "green", - ChangeType.DELETED: "red", - ChangeType.MODIFIED: "orange", - ChangeType.UNCHANGED: "gray" - }[node.change_type] - label = f"{node.name}" if node.summary: label += f"
{node.summary[:50]}..." - mermaid.append(f' {component_id.replace("/", "_").replace("::", "_")}["{label}"]:::change_{node.change_type.value}') + mermaid.append(f' {component_id.replace("/", "_").replace("::", "_")}["{label}"]:::component_{node.change_type.value}') # Add edges between components for source, target in self.component_graph.edges(): mermaid.append(f' {source.replace("/", "_").replace("::", "_")} --> {target.replace("/", "_").replace("::", "_")}') - # Add style definitions - mermaid.append(" classDef change_added fill:green,stroke:#333,stroke-width:2px") - mermaid.append(" classDef change_deleted fill:red,stroke:#333,stroke-width:2px") - mermaid.append(" classDef change_modified fill:orange,stroke:#333,stroke-width:2px") - mermaid.append(" classDef change_unchanged fill:gray,stroke:#333,stroke-width:2px") + # Add style definitions for files (lighter shades) + mermaid.append(" classDef file_added fill:#90EE90,stroke:#333,stroke-width:2px") # Light green + mermaid.append(" classDef file_deleted fill:#FFB6C1,stroke:#333,stroke-width:2px") # Light red + mermaid.append(" classDef file_modified fill:#FFD580,stroke:#333,stroke-width:2px") # Light orange + mermaid.append(" classDef file_unchanged fill:#D3D3D3,stroke:#333,stroke-width:2px") # Light gray + + # Add style definitions for components (darker shades) + mermaid.append(" classDef component_added fill:#32CD32,stroke:#333,stroke-width:2px") # Lime green + mermaid.append(" classDef component_deleted fill:#DC143C,stroke:#333,stroke-width:2px") # Crimson + mermaid.append(" classDef component_modified fill:#FF8C00,stroke:#333,stroke-width:2px") # Dark orange + mermaid.append(" classDef component_unchanged fill:#808080,stroke:#333,stroke-width:2px") # Gray return "\n".join(mermaid) \ No newline at end of file diff --git a/diffgraph/html_report.py b/diffgraph/html_report.py index 2565473..5765024 100644 --- a/diffgraph/html_report.py +++ b/diffgraph/html_report.py @@ -147,6 +147,10 @@ def generate_html_report(analysis: AnalysisResult, output_path: str = "diffgraph +
+ {mermaid_diagram} +
+

Analysis Summary

@@ -154,10 +158,6 @@ def generate_html_report(analysis: AnalysisResult, output_path: str = "diffgraph
-
- {mermaid_diagram} -
- From 47ae64d2dced0d8acb71ef1e26e87b298bb6ed87 Mon Sep 17 00:00:00 2001 From: Avikalp Kumar Gupta Date: Sun, 8 Jun 2025 19:17:22 -0400 Subject: [PATCH 5/7] refactor(diffgraph: graph_manager, html_report): Update Mermaid diagram generation and enhance tooltip functionality - Changed Mermaid diagram orientation from TD to LR for better layout. - Improved component ID handling using regex for cleaner formatting. - Enhanced tooltip display in HTML report with markdown parsing and improved styling. - Streamlined event handling for tooltip visibility and interaction with component nodes. --- diffgraph/graph_manager.py | 16 ++++++------ diffgraph/html_report.py | 53 ++++++++++++++++++++++---------------- docs/Roadmap-v0-demo.md | 6 ++--- 3 files changed, 42 insertions(+), 33 deletions(-) diff --git a/diffgraph/graph_manager.py b/diffgraph/graph_manager.py index cb72b02..5878427 100644 --- a/diffgraph/graph_manager.py +++ b/diffgraph/graph_manager.py @@ -2,6 +2,7 @@ from dataclasses import dataclass from enum import Enum import networkx as nx +import re class ChangeType(Enum): """Type of change in the code.""" @@ -161,7 +162,7 @@ def get_connected_components(self, start_component: str, max_depth: int = 3) -> def get_mermaid_diagram(self) -> str: """Generate a Mermaid diagram representation of the graph.""" - mermaid = ["graph TD"] + mermaid = ["graph LR"] file_classes = [] component_classes = [] @@ -185,19 +186,18 @@ def get_mermaid_diagram(self) -> str: # Add components within this file if file_path in file_components: for component_id, comp_node in file_components[file_path]: - comp_id = component_id.replace("/", "_").replace("::", "_") - component_label = comp_node.name + comp_id = re.sub(r'[^a-zA-Z0-9_]', '_', component_id) + component_label = comp_node.name.replace('"', '\\"').replace('`', '\\`') if comp_node.summary: - mermaid.append(f' {comp_id}["{component_label}"]') - component_classes.append(f'class {comp_id} component_{comp_node.change_type.value}') + mermaid.append(f' {comp_id}["{component_label}"]:::component_{comp_node.change_type.value}') + mermaid.append(f' click {comp_id} call callback("{comp_node.summary.replace('"', '\\"')}") "{comp_node.summary.replace('"', '\\"')}"') else: - mermaid.append(f' {comp_id}["{component_label}"]') - component_classes.append(f'class {comp_id} component_{comp_node.change_type.value}') + mermaid.append(f' {comp_id}["{component_label}"]:::component_{comp_node.change_type.value}') mermaid.append(' end') # Add edges between components for source, target in self.component_graph.edges(): - mermaid.append(f' {source.replace("/", "_").replace("::", "_")} --> {target.replace("/", "_").replace("::", "_")}') + mermaid.append(f' {re.sub(r'[^a-zA-Z0-9_]', '_', source)} --> {re.sub(r'[^a-zA-Z0-9_]', '_', target)}') # Add style definitions for files (lighter shades) mermaid.append(" classDef file_added fill:#90EE90,stroke:#333,stroke-width:2px") # Light green diff --git a/diffgraph/html_report.py b/diffgraph/html_report.py index c68c20f..c0dc606 100644 --- a/diffgraph/html_report.py +++ b/diffgraph/html_report.py @@ -157,6 +157,21 @@ def generate_html_report(analysis: AnalysisResult, output_path: str = "diffgraph .tooltip.visible {{ display: block; }} + + .mermaidTooltip {{ + position: absolute; + text-align: center; + max-width: 200px; + padding: 2px; + font-family: 'trebuchet ms', verdana, arial; + font-size: 12px; + background: #ffffde; + border: 1px solid #aaaa33; + border-radius: 2px; + pointer-events: none; + z-index: 100; + }} + @@ -169,6 +184,8 @@ def generate_html_report(analysis: AnalysisResult, output_path: str = "diffgraph {mermaid_diagram} +
+

Analysis Summary

@@ -176,9 +193,6 @@ def generate_html_report(analysis: AnalysisResult, output_path: str = "diffgraph
- -
-