Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 67 additions & 0 deletions codegen-examples/examples/deep_code_research/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# Deep Code Research Example

This example demonstrates how to use Codegen to build a CLI tool for deep code research. The tool allows you to:

- Clone and analyze any GitHub repository
- Ask questions about the codebase
- Explore dependencies and relationships
- Search for patterns and implementations

## Setup

1. Install the requirements:

```bash
uv venv
source .venv/bin/activate
uv sync
```

2. Set up your OpenAI API key in a `.env`:

```bash
OPENAI_API_KEY=your-api-key
```

## Usage

Run the CLI tool by providing a GitHub repository:

```bash
python run.py research "owner/repo"
```

For example:

```bash
python run.py research "fastapi/fastapi"
```

You can also provide an initial query:

```bash
python run.py research "fastapi/fastapi" -q "Explain the main components"
```

## Example Queries

- "Explain the main components and their relationships"
- "Find all usages of the FastAPI class"
- "Show me the dependency graph for the routing module"
- "What design patterns are used in this codebase?"
- "How is dependency injection implemented?"

## Features

The research agent has access to several powerful tools:

- Semantic code search
- Symbol relationship analysis
- Directory structure exploration
- Code viewing and analysis

The agent maintains conversation history, so you can ask follow-up questions and build on previous findings.

## Exit

Type "exit" or "quit" to end the research session.
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
click>=8.0.0
rich>=10.0.0
rich-click>=1.7.0
langchain-core>=0.1.0
langchain-openai>=0.0.5
langchain>=0.1.0
codegen-sdk>=0.1.0
152 changes: 152 additions & 0 deletions codegen-examples/examples/deep_code_research/run.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
"""CLI program for deep code research using Codegen."""

import sys
import warnings
from pathlib import Path
from typing import Optional

import rich_click as click
from codegen import Codebase
from codegen.extensions.langchain.agent import create_agent_with_tools
from codegen.extensions.langchain.tools import (
ListDirectoryTool,
RevealSymbolTool,
SearchTool,
SemanticSearchTool,
ViewFileTool,
)
from langchain_core.messages import SystemMessage
from rich.console import Console
from rich.markdown import Markdown
from rich.prompt import Prompt

# Suppress LangSmith warning
warnings.filterwarnings("ignore", message="API key must be provided when using hosted LangSmith API")

# Add the project root to Python path
project_root = str(Path(__file__).parent.parent.parent)
sys.path.append(project_root)

# Configure rich-click
click.rich_click.USE_RICH_MARKUP = True
click.rich_click.USE_MARKDOWN = True
click.rich_click.SHOW_ARGUMENTS = True
click.rich_click.GROUP_ARGUMENTS_OPTIONS = True
click.rich_click.STYLE_ERRORS_SUGGESTION = "yellow italic"
click.rich_click.ERRORS_SUGGESTION = "Try running the command with --help for more information"

console = Console()

RESEARCH_AGENT_PROMPT = """You are a code research expert. Your goal is to help users understand codebases by:
1. Finding relevant code through semantic and text search
2. Analyzing symbol relationships and dependencies
3. Exploring directory structures
4. Reading and explaining code

Always explain your findings in detail and provide context about how different parts of the code relate to each other.
When analyzing code, consider:
- The purpose and functionality of each component
- How different parts interact
- Key patterns and design decisions
- Potential areas for improvement

Break down complex concepts into understandable pieces and use examples when helpful."""


def initialize_codebase(repo_name: str) -> Optional[Codebase]:
"""Initialize a codebase with a spinner showing progress."""
with console.status("") as status:
try:
# Update status with specific steps
status.update(f"[bold blue]Cloning {repo_name}...[/bold blue]")
codebase = Codebase.from_repo(repo_name)
status.update("[bold green]✓ Repository cloned successfully![/bold green]")
return codebase
except Exception as e:
console.print(f"[bold red]Error initializing codebase:[/bold red] {e}")
return None


@click.group()
def cli():
"""[bold blue]🔍 Codegen Code Research CLI[/bold blue]

A powerful tool for deep code analysis and research.
"""
pass


@cli.command()
@click.argument("repo_name", required=False)
@click.option("--query", "-q", default=None, help="Initial research query to start with.")
def research(repo_name: Optional[str] = None, query: Optional[str] = None):
"""[bold green]Start a code research session[/bold green]

[blue]Arguments:[/blue]
[yellow]REPO_NAME[/yellow]: GitHub repository in format 'owner/repo' (optional, will prompt if not provided)
"""
# If no repo name provided, prompt for it
if not repo_name:
console.print("\n[bold]Welcome to the Code Research CLI![/bold]")
console.print("\nEnter a GitHub repository to analyze (format: owner/repo)\nExamples:\n • fastapi/fastapi\n • pytorch/pytorch\n • microsoft/TypeScript")
repo_name = Prompt.ask("\n[bold cyan]Repository name[/bold cyan]")

# Initialize codebase
codebase = initialize_codebase(repo_name)
if not codebase:
return

# Create research tools
tools = [
ViewFileTool(codebase),
ListDirectoryTool(codebase),
SearchTool(codebase),
SemanticSearchTool(codebase),
RevealSymbolTool(codebase),
]

# Initialize agent with research tools
with console.status("[bold blue]Initializing research agent...[/bold blue]") as status:
agent = create_agent_with_tools(codebase=codebase, tools=tools, chat_history=[SystemMessage(content=RESEARCH_AGENT_PROMPT)], verbose=True)
status.update("[bold green]✓ Research agent ready![/bold green]")

# Get initial query if not provided
if not query:
console.print(
"\n[bold]What would you like to research?[/bold]"
"\n[dim]Example queries:[/dim]"
"\n• [italic]Explain the main components and their relationships[/italic]"
"\n• [italic]Find all usages of X function/class[/italic]"
"\n• [italic]Show me the dependency graph for Y module[/italic]"
"\n• [italic]What design patterns are used in this codebase?[/italic]"
)
query = Prompt.ask("\n[bold cyan]Research query[/bold cyan]")

# Main research loop
while True:
if not query:
query = Prompt.ask("\n[bold cyan]Research query[/bold cyan]")

if query.lower() in ["exit", "quit"]:
console.print("\n[bold green]Thanks for using the Code Research CLI! Goodbye![/bold green]")
break

# Run the agent
with console.status("[bold blue]Researching...[/bold blue]", spinner="dots") as status:
try:
result = agent.invoke(
{"input": query},
config={"configurable": {"session_id": "research"}},
)
# Display the result
console.print("\n[bold blue]📊 Research Findings:[/bold blue]")
console.print(Markdown(result["output"]))
except Exception as e:
console.print(f"\n[bold red]Error during research:[/bold red] {e}")

# Clear query for next iteration
query = None


if __name__ == "__main__":
cli()
57 changes: 57 additions & 0 deletions src/codegen/extensions/langchain/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from langchain.agents import AgentExecutor
from langchain.agents.openai_functions_agent.base import OpenAIFunctionsAgent
from langchain.hub import pull
from langchain.tools import BaseTool
from langchain_core.chat_history import InMemoryChatMessageHistory
from langchain_core.messages import BaseMessage
from langchain_core.runnables.history import RunnableWithMessageHistory
Expand Down Expand Up @@ -96,7 +97,7 @@

# Wrap with message history
return RunnableWithMessageHistory(
agent_executor,

Check failure on line 100 in src/codegen/extensions/langchain/agent.py

View workflow job for this annotation

GitHub Actions / mypy

error: Argument 1 to "RunnableWithMessageHistory" has incompatible type "AgentExecutor"; expected "Runnable[Sequence[BaseMessage] | dict[str, Any], str | BaseMessage | Sequence[BaseMessage] | dict[str, Any]] | Runnable[PromptValue | str | Sequence[BaseMessage | list[str] | tuple[str, str] | str | dict[str, Any]], BaseMessage | str]" [arg-type]
lambda session_id: message_history,
input_messages_key="input",
history_messages_key="chat_history",
Expand Down Expand Up @@ -158,8 +159,64 @@

# Wrap with message history
return RunnableWithMessageHistory(
agent_executor,

Check failure on line 162 in src/codegen/extensions/langchain/agent.py

View workflow job for this annotation

GitHub Actions / mypy

error: Argument 1 to "RunnableWithMessageHistory" has incompatible type "AgentExecutor"; expected "Runnable[Sequence[BaseMessage] | dict[str, Any], str | BaseMessage | Sequence[BaseMessage] | dict[str, Any]] | Runnable[PromptValue | str | Sequence[BaseMessage | list[str] | tuple[str, str] | str | dict[str, Any]], BaseMessage | str]" [arg-type]
lambda session_id: message_history,
input_messages_key="input",
history_messages_key="chat_history",
)


def create_agent_with_tools(
codebase: Codebase,
tools: list[BaseTool],
model_name: str = "gpt-4o",
temperature: float = 0,
verbose: bool = True,
chat_history: list[BaseMessage] = [],
) -> RunnableWithMessageHistory:
"""Create an agent with a specific set of tools.

Args:
codebase: The codebase to operate on
tools: List of tools to provide to the agent
model_name: Name of the model to use (default: gpt-4)
temperature: Model temperature (default: 0)
verbose: Whether to print agent's thought process (default: True)
chat_history: Optional list of messages to initialize chat history with

Returns:
Initialized agent with message history
"""
# Initialize language model
llm = ChatOpenAI(
model_name=model_name,
temperature=temperature,
)

# Get the prompt to use
prompt = pull("hwchase17/openai-functions-agent")

# Create the agent
agent = OpenAIFunctionsAgent(
llm=llm,
tools=tools,
prompt=prompt,
)

# Create the agent executor
agent_executor = AgentExecutor(
agent=agent,
tools=tools,
verbose=verbose,
)

# Create message history handler
message_history = InMemoryChatMessageHistory(messages=chat_history)

# Wrap with message history
return RunnableWithMessageHistory(
agent_executor,

Check failure on line 218 in src/codegen/extensions/langchain/agent.py

View workflow job for this annotation

GitHub Actions / mypy

error: Argument 1 to "RunnableWithMessageHistory" has incompatible type "AgentExecutor"; expected "Runnable[Sequence[BaseMessage] | dict[str, Any], str | BaseMessage | Sequence[BaseMessage] | dict[str, Any]] | Runnable[PromptValue | str | Sequence[BaseMessage | list[str] | tuple[str, str] | str | dict[str, Any]], BaseMessage | str]" [arg-type]
lambda session_id: message_history,
input_messages_key="input",
history_messages_key="chat_history",
)
Loading