diff --git a/diffgraph/ai_analysis.py b/diffgraph/ai_analysis.py index 44bbb48..c47ce16 100644 --- a/diffgraph/ai_analysis.py +++ b/diffgraph/ai_analysis.py @@ -3,6 +3,10 @@ import os from pydantic import BaseModel from .graph_manager import GraphManager, FileStatus, ChangeType, ComponentNode +import time +import random +import openai +import re class FileChange(BaseModel): """Model representing a file change.""" @@ -15,6 +19,39 @@ class DiffAnalysis(BaseModel): summary: str mermaid_diagram: str +def exponential_backoff_retry(func): + """Decorator to implement exponential backoff retry logic using API rate limit information.""" + def wrapper(*args, **kwargs): + max_retries = 5 + base_delay = 1 # Start with 1 second + max_delay = 60 # Maximum delay of 60 seconds + + for attempt in range(max_retries): + try: + return func(*args, **kwargs) + except openai.RateLimitError as e: + if attempt == max_retries - 1: # Last attempt + raise # Re-raise the exception if all retries failed + + # Try to get the retry delay from the error response + try: + # The error response usually contains a 'retry_after' field + retry_after = getattr(e, 'retry_after', None) + if retry_after: + delay = float(retry_after) + else: + # Fallback to exponential backoff if retry_after is not available + delay = min(base_delay * (2 ** attempt) + random.uniform(0, 1), max_delay) + except (ValueError, TypeError): + # If we can't parse the retry_after, fallback to exponential backoff + delay = min(base_delay * (2 ** attempt) + random.uniform(0, 1), max_delay) + + print(f"Rate limit hit. Retrying in {delay:.2f} seconds...") + time.sleep(delay) + except Exception as e: + raise # Re-raise other exceptions immediately + return wrapper + class CodeAnalysisAgent: """Agent for analyzing code changes using OpenAI's Agents SDK.""" @@ -66,6 +103,12 @@ def _determine_change_type(self, status: str) -> ChangeType: else: return ChangeType.MODIFIED + @exponential_backoff_retry + def _run_agent_analysis(self, prompt: str) -> str: + """Run the agent analysis with retry logic.""" + result = Runner.run_sync(self.agent, prompt) + return result.final_output + def analyze_changes(self, files_with_content: List[Dict[str, str]]) -> DiffAnalysis: """ Analyze code changes using the OpenAI agent, processing files incrementally. @@ -114,10 +157,12 @@ def analyze_changes(self, files_with_content: List[Dict[str, str]]) -> DiffAnaly for comp in processed_components: prompt += f"- {comp.name}: {comp.summary}\n" - # Run the agent - result = Runner.run_sync(self.agent, prompt) - response_text = result.final_output + # Run the agent with retry logic + response_text = self._run_agent_analysis(prompt) + print("--------------------------------") + print(response_text) + print("--------------------------------") # Parse the response summary = "" components = [] @@ -128,49 +173,81 @@ def analyze_changes(self, files_with_content: List[Dict[str, str]]) -> DiffAnaly if "COMPONENTS:" in response_text: components_section = response_text.split("COMPONENTS:")[1].split("IMPACT:")[0].strip() current_component = {} + components = [] # Reset components list for each file for line in components_section.split("\n"): line = line.strip() if not line: - if current_component: + if current_component and "name" in current_component: # Only add if we have a name components.append(current_component) current_component = {} continue - if line.startswith("- name:"): - if current_component: - components.append(current_component) - current_component = {"name": line[7:].strip()} - elif line.startswith(" type:"): - current_component["type"] = line[7:].strip() - elif line.startswith(" summary:"): - current_component["summary"] = line[10:].strip() - elif line.startswith(" dependencies:"): - current_component["dependencies"] = [d.strip() for d in line[15:].split(",")] - elif line.startswith(" dependents:"): - current_component["dependents"] = [d.strip() for d in line[12:].split(",")] - - if current_component: + parts = line.split(":") + if len(parts) > 1: + field_name = re.sub(r'[^a-zA-Z0-9_]', '', parts[0].strip()).lower() + field_value = ":".join(parts[1:]).strip() + if field_name == "name": + if current_component and "name" in current_component: # Only add if we have a name + components.append(current_component) + current_component = {"name": field_value} + elif field_name == "type": + current_component["type"] = re.sub(r'[^a-zA-Z0-9_]', '', field_value.strip()).lower() + elif field_name == "summary": + current_component["summary"] = field_value + elif field_name == "dependencies": + current_component["dependencies"] = [d.strip() for d in field_value.split(",") if d.strip()] + elif field_name == "dependents": + current_component["dependents"] = [d.strip() for d in field_value.split(",") if d.strip()] + + if current_component and "name" in current_component: # Only add if we have a name components.append(current_component) # Add components to the graph for comp in components: - change_type = ChangeType[comp["type"].upper()] - self.graph_manager.add_component( - comp["name"], - current_file, - change_type - ) - - # Add dependencies - for dep in comp.get("dependencies", []): - # Try to find the dependency in other components - for other_comp in self.graph_manager.component_nodes.values(): - if other_comp.name == dep: - self.graph_manager.add_component_dependency( - f"{current_file}::{comp['name']}", - f"{other_comp.file_path}::{other_comp.name}" - ) + if "name" not in comp or "type" not in comp: + print(f"Skipping invalid component: {comp}") + continue + + try: + change_type = ChangeType[comp["type"].upper()] + self.graph_manager.add_component( + comp["name"], + current_file, + change_type, + summary=comp.get("summary"), + dependencies=comp.get("dependencies", []), + dependents=comp.get("dependents", []) + ) + + # Add dependencies + for dep in comp.get("dependencies", []): + if not dep: # Skip empty dependencies + continue + # Try to find the dependency in other components + for other_comp in self.graph_manager.component_nodes.values(): + if (dep.lower() in other_comp.name.lower() or + other_comp.name.lower() in dep.lower()): + self.graph_manager.add_component_dependency( + f"{current_file}::{comp['name']}", + f"{other_comp.file_path}::{other_comp.name}" + ) + + # Add dependents + for dep in comp.get("dependents", []): + if not dep: # Skip empty dependents + continue + # Try to find the dependent in other components + for other_comp in self.graph_manager.component_nodes.values(): + if (dep.lower() in other_comp.name.lower() or + other_comp.name.lower() in dep.lower()): + self.graph_manager.add_component_dependency( + f"{other_comp.file_path}::{other_comp.name}", + f"{current_file}::{comp['name']}" + ) + except Exception as e: + print(f"Error processing component {comp.get('name', 'unknown')}: {str(e)}") + continue # Mark file as processed self.graph_manager.mark_processed(current_file, summary, components) diff --git a/diffgraph/graph_manager.py b/diffgraph/graph_manager.py index f1628e7..f51e799 100644 --- a/diffgraph/graph_manager.py +++ b/diffgraph/graph_manager.py @@ -2,6 +2,7 @@ from dataclasses import dataclass from enum import Enum import networkx as nx +import json, re class ChangeType(Enum): """Type of change in the code.""" @@ -70,23 +71,42 @@ def add_file(self, file_path: str, change_type: ChangeType) -> None: self.file_graph.add_node(file_path) self.processing_queue.append(file_path) - def add_component(self, name: str, file_path: str, change_type: ChangeType) -> None: + def add_component(self, name: str, file_path: str, change_type: ChangeType, summary: str = None, dependencies: list = None, dependents: list = None) -> None: """Add a new component to the graph.""" component_id = f"{file_path}::{name}" + # Clean up dependencies and dependents lists + dependencies = [d for d in (dependencies or []) if d] + dependents = [d for d in (dependents or []) if d] + if component_id not in self.component_nodes: self.component_nodes[component_id] = ComponentNode( name=name, file_path=file_path, - change_type=change_type + change_type=change_type, + summary=summary, + dependencies=dependencies, + dependents=dependents ) self.component_graph.add_node(component_id) + else: + # Update existing component + existing = self.component_nodes[component_id] + existing.summary = summary or existing.summary + existing.dependencies = dependencies or existing.dependencies + existing.dependents = dependents or existing.dependents def add_component_dependency(self, source: str, target: str) -> None: """Add a dependency relationship between components.""" + if not source or not target or source == target: + return + if source in self.component_nodes and target in self.component_nodes: - self.component_graph.add_edge(source, target) - self.component_nodes[source].dependencies.append(target) - self.component_nodes[target].dependents.append(source) + if not self.component_graph.has_edge(source, target): + self.component_graph.add_edge(source, target) + if target not in self.component_nodes[source].dependencies: + self.component_nodes[source].dependencies.append(target) + if source not in self.component_nodes[target].dependents: + self.component_nodes[target].dependents.append(source) def get_next_file(self) -> Optional[str]: """Get the next file to process from the queue.""" @@ -142,48 +162,61 @@ def get_connected_components(self, start_component: str, max_depth: int = 3) -> def get_mermaid_diagram(self) -> str: """Generate a Mermaid diagram representation of the graph.""" - mermaid = ["graph TD"] - - # Add file nodes with their change type colors - for file_path, node in self.file_nodes.items(): - color = { - ChangeType.ADDED: "green", - ChangeType.DELETED: "red", - ChangeType.MODIFIED: "orange", - ChangeType.UNCHANGED: "gray" - }[node.change_type] - - label = f"{file_path}" - if node.summary: - label += f"
{node.summary[:50]}..." - if node.error: - label += f"
(Error: {node.error})" + mermaid = ["graph LR"] - mermaid.append(f' {file_path.replace("/", "_")}["{label}"]:::change_{node.change_type.value}') + file_classes = [] + component_classes = [] - # Add component nodes + # Group components by their file paths + file_components = {} for component_id, node in self.component_nodes.items(): - color = { - ChangeType.ADDED: "green", - ChangeType.DELETED: "red", - ChangeType.MODIFIED: "orange", - ChangeType.UNCHANGED: "gray" - }[node.change_type] - - label = f"{node.name}" - if node.summary: - label += f"
{node.summary[:50]}..." + if node.file_path not in file_components: + file_components[node.file_path] = [] + file_components[node.file_path].append((component_id, node)) - mermaid.append(f' {component_id.replace("/", "_").replace("::", "_")}["{label}"]:::change_{node.change_type.value}') + # Add file nodes as subgraphs with their components inside + for file_path, node in self.file_nodes.items(): + file_id = file_path.replace("/", "_") + file_label = file_path + if node.error: + file_label += f"
(Error: {node.error})" + mermaid.append(f' subgraph {file_id}["{file_label}"]') + mermaid.append(f' direction TB') + file_classes.append(f'class {file_id} file_{node.change_type.value}') + # Add components within this file + if file_path in file_components: + for component_id, comp_node in file_components[file_path]: + comp_id = re.sub(r'[^a-zA-Z0-9_]', '_', component_id) + component_label = comp_node.name.replace('"', '\\"').replace('`', '\\`') + if comp_node.summary: + summary_txt = json.dumps(comp_node.summary) + mermaid.append(f' {comp_id}["{component_label}"]:::component_{comp_node.change_type.value}') + mermaid.append(f' click {comp_id} call callback("{summary_txt}") "{summary_txt}"') + else: + mermaid.append(f' {comp_id}["{component_label}"]:::component_{comp_node.change_type.value}') + mermaid.append(' end') # Add edges between components for source, target in self.component_graph.edges(): - mermaid.append(f' {source.replace("/", "_").replace("::", "_")} --> {target.replace("/", "_").replace("::", "_")}') - - # Add style definitions - mermaid.append(" classDef change_added fill:green,stroke:#333,stroke-width:2px") - mermaid.append(" classDef change_deleted fill:red,stroke:#333,stroke-width:2px") - mermaid.append(" classDef change_modified fill:orange,stroke:#333,stroke-width:2px") - mermaid.append(" classDef change_unchanged fill:gray,stroke:#333,stroke-width:2px") + src_id = re.sub(r'[^a-zA-Z0-9_]', '_', source) + tgt_id = re.sub(r'[^a-zA-Z0-9_]', '_', target) + mermaid.append(f' {src_id} --> {tgt_id}') + + # Add style definitions for files (lighter shades) + mermaid.append(" classDef file_added fill:#90EE90,stroke:#333,stroke-width:2px") # Light green + mermaid.append(" classDef file_deleted fill:#FFB6C1,stroke:#333,stroke-width:2px") # Light red + mermaid.append(" classDef file_modified fill:#FFD580,stroke:#333,stroke-width:2px") # Light orange + mermaid.append(" classDef file_unchanged fill:#D3D3D3,stroke:#333,stroke-width:2px") # Light gray + + # Add style definitions for components (darker shades) + mermaid.append(" classDef component_added fill:#32CD32,stroke:#333,stroke-width:2px") # Lime green + mermaid.append(" classDef component_deleted fill:#DC143C,stroke:#333,stroke-width:2px") # Crimson + mermaid.append(" classDef component_modified fill:#FF8C00,stroke:#333,stroke-width:2px") # Dark orange + mermaid.append(" classDef component_unchanged fill:#808080,stroke:#333,stroke-width:2px") # Gray + mermaid.append(" classDef hidden fill:none,stroke:none") + + # Add explicit class statements for files and components + mermaid.extend(file_classes) + mermaid.extend(component_classes) return "\n".join(mermaid) \ No newline at end of file diff --git a/diffgraph/html_report.py b/diffgraph/html_report.py index 2565473..d619cd4 100644 --- a/diffgraph/html_report.py +++ b/diffgraph/html_report.py @@ -139,6 +139,39 @@ def generate_html_report(analysis: AnalysisResult, output_path: str = "diffgraph border-radius: 0.25rem; font-size: 0.875em; }} + + /* Tooltip styles */ + .tooltip {{ + position: fixed; + background: var(--bg-secondary); + border: 1px solid var(--border-color); + border-radius: 0.5rem; + padding: 1rem; + max-width: 400px; + box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); + z-index: 1000; + display: none; + color: var(--text-primary); + }} + + .tooltip.visible {{ + display: block; + }} + + .mermaidTooltip {{ + position: absolute; + text-align: center; + max-width: 200px; + padding: 2px; + font-family: 'trebuchet ms', verdana, arial; + font-size: 12px; + background: #ffffde; + border: 1px solid #aaaa33; + border-radius: 2px; + pointer-events: none; + z-index: 100; + }} + @@ -147,6 +180,12 @@ def generate_html_report(analysis: AnalysisResult, output_path: str = "diffgraph +
+ {mermaid_diagram} +
+ +
+

Analysis Summary

@@ -154,10 +193,6 @@ def generate_html_report(analysis: AnalysisResult, output_path: str = "diffgraph
-
- {mermaid_diagram} -
- diff --git a/docs/Roadmap-v0-demo.md b/docs/Roadmap-v0-demo.md new file mode 100644 index 0000000..5e0ffd9 --- /dev/null +++ b/docs/Roadmap-v0-demo.md @@ -0,0 +1,202 @@ +# 📅 Roadmap: Modular Multi-Agent Architecture for diffgraph-ai + +This roadmap introduces a modular re-architecture of `diffgraph-ai`, aimed at improving the accuracy, extensibility, and quality of visual diff representations. The goal is to break up the existing monolithic agent prompt into smaller, specialized AI agents using the OpenAI Agents SDK. + +Each step is: + +* ✅ Independently testable +* 🔁 Minimal, logical step in the pipeline +* 📗 Documented with clarity for even junior developers or AI assistants to follow + +--- + +## ✅ Step 1: Introduce Component-Level Visualization + +### 🌟 Objective: + +Update the `GraphManager` and Mermaid generation to render function- and class-level nodes instead of only file-level nodes. + +### 🔹 Definition of Done: + +The output HTML graph should show component nodes (e.g., `validateUser()`, `AuthService`) with their change type (added/modified/deleted). + +### ⚡ Implementation Plan: + +* Update `GraphManager.add_component` to always record components +* Modify `get_mermaid_diagram()` to include and render component-level nodes with styles +* Use stub data if needed (before real extraction logic works) + +### 🔧 Implementation Details: + +* Use unique node IDs: `file_path::component_name` +* Use the `ChangeType` color scheme already defined +* Show 1–2 lines of the component summary (if available) +* Link dependencies using edges (even hardcoded for now) + +### 📌 Status: `Completed` + +--- + +## ✅ Step 2: Split Agent Prompts into Two Specialized Roles + +### 🌟 Objective: + +Replace the monolithic analysis prompt with two focused prompts: + +1. **Component Extractor** +2. **Dependency Mapper** + +### 🔹 Definition of Done: + +You should be able to call each of these prompts independently and get JSON output conforming to a shared schema. + +### ⚡ Implementation Plan: + +* Create prompt templates (can live as Python strings or YAML files) +* Use `openai.ChatCompletion.create` to run each prompt manually first +* Define a shared `Component` schema with: + + * `name`, `type`, `summary`, `dependencies`, `dependents`, `file_path`, `change_type` + +### 🔧 Implementation Details: + +* Component Extractor receives: file path + content + change type +* Dependency Mapper receives: file path + list of components + code snippets +* Each tool returns JSON with a list of `Component` objects +* Validate that outputs can round-trip through `pydantic.BaseModel` + +### 📌 Status: `Not Started` + +--- + +## ✅ Step 3: Add Chunking System for Large Files + +### 🌟 Objective: + +Break large files into chunks so component extraction doesn't fail due to context limits. + +### 🔹 Definition of Done: + +The system should: + +* Automatically detect long files (>1000 lines or >10KB) +* Split by top-level class or function boundaries +* Pass each chunk individually to the Extractor agent + +### ⚡ Implementation Plan: + +* Use `tree-sitter` or regex to identify safe split points +* Assign each chunk a metadata block: + + * `chunk_id`, `start_line`, `end_line`, `file_path`, `text` +* Feed each chunk separately into agent and recombine component outputs + +### 🔧 Implementation Details: + +* Store `ChunkResult` structs with raw outputs + parsed `Component` list +* Keep a map of chunk → original file for traceability +* If any chunk fails, include fallback: "Chunk X could not be parsed." + +### 📌 Status: `Not Started` + +--- + +## ✅ Step 4: Set Up OpenAI Agents SDK Planner and Tools + +### 🌟 Objective: + +Create an orchestrator agent using the OpenAI Agents SDK that delegates to tools: + +* `extract_components` +* `map_dependencies` +* `visualize_graph` + +### 🔹 Definition of Done: + +A single `Runner.run()` call should: + +* Take the file list +* Call tools in correct order +* Generate a final graph and return it + +### ⚡ Implementation Plan: + +* Define 3 `Tool` subclasses +* Define their input/output JSON schemas +* Register these tools inside a `Planner` agent + +### 🔧 Implementation Details: + +* Use [`openai_agents.Tool`](https://openai.github.io/openai-agents-python/agents/#tools) class +* Each tool's `call()` should be fully stateless and log input/output +* Store intermediate results in a `Workspace` or memory dict +* Use the built-in `Planner` or implement a `ToolPicker` + +### 📌 Status: `Not Started` + +--- + +## ✅ Step 5: Refactor Agent-Calling Logic in `ai_analysis.py` + +### 🌟 Objective: + +Replace direct prompt + parsing logic with calls to the OpenAI Agents SDK planner + tools. + +### 🔹 Definition of Done: + +`CodeAnalysisAgent.analyze_changes()` should: + +* Initialize planner +* Submit file info + diffs to planner +* Collect final graph (or summary) + +### ⚡ Implementation Plan: + +* Replace `_run_agent_analysis` with SDK planner call +* Move all hardcoded prompt strings into reusable tools +* Add detailed logging so developers can trace step-by-step agent execution + +### 🔧 Implementation Details: + +* Prepare `ToolContext` or `ToolMemory` between agent steps if needed +* Use JSON logs for prompt/output snapshots to aid debugging +* Ensure fallback mode (manual single-agent) works with `--legacy` flag + +### 📌 Status: `Not Started` + +--- + +## ✅ Step 6: Plan for Future Multi-LLM Abstraction + +### 🌟 Objective: + +Lay the foundation for future support of Anthropic, Perplexity, Google Gemini, and OSS models. + +### 🔹 Definition of Done: + +Abstract out all LLM calls behind an `LLMProvider` interface. + +### ⚡ Implementation Plan: + +* Create `providers/` module +* Start with `OpenAIProvider` (wraps OpenAI SDK + Agents SDK) +* Define common interface: `.chat()`, `.tool_call()`, `.get_capabilities()` + +### 🔧 Implementation Details: + +* Use Python `ABC` or `Protocol` +* Later: add `AnthropicProvider`, `OllamaProvider`, etc. +* Allow tool definitions to work with different backends based on config or CLI flag + +### 📌 Status: `Not Started` + +--- + +## 🏁 Final Deliverables Checklist + +* [ ] Component-level Mermaid output +* [ ] Modular tool + planner agent pipeline +* [ ] Chunked file handling with metadata +* [ ] JSON-based schemas and round-trippable outputs +* [ ] SDK-based execution with full logging +* [ ] Abstracted backend for future multi-LLM provider support diff --git a/Roadmap.md b/docs/Roadmap-v0-initialize.md similarity index 100% rename from Roadmap.md rename to docs/Roadmap-v0-initialize.md