From d4243e6e7bf2c88b04bba3697d3ce747d9d7fd58 Mon Sep 17 00:00:00 2001 From: jayhack Date: Sat, 15 Feb 2025 12:42:14 -0800 Subject: [PATCH 1/2] . --- .../examples/deep_code_research/README.md | 67 ++++++++ .../deep_code_research/requirements.txt | 7 + .../examples/deep_code_research/run.py | 152 ++++++++++++++++++ 3 files changed, 226 insertions(+) create mode 100644 codegen-examples/examples/deep_code_research/README.md create mode 100644 codegen-examples/examples/deep_code_research/requirements.txt create mode 100644 codegen-examples/examples/deep_code_research/run.py diff --git a/codegen-examples/examples/deep_code_research/README.md b/codegen-examples/examples/deep_code_research/README.md new file mode 100644 index 000000000..140f37806 --- /dev/null +++ b/codegen-examples/examples/deep_code_research/README.md @@ -0,0 +1,67 @@ +# Deep Code Research Example + +This example demonstrates how to use Codegen to build a CLI tool for deep code research. The tool allows you to: + +- Clone and analyze any GitHub repository +- Ask questions about the codebase +- Explore dependencies and relationships +- Search for patterns and implementations + +## Setup + +1. Install the requirements: + +```bash +uv venv +source .venv/bin/activate +uv sync +``` + +2. Set up your OpenAI API key in a `.env`: + +```bash +OPENAI_API_KEY=your-api-key +``` + +## Usage + +Run the CLI tool by providing a GitHub repository: + +```bash +python run.py research "owner/repo" +``` + +For example: + +```bash +python run.py research "fastapi/fastapi" +``` + +You can also provide an initial query: + +```bash +python run.py research "fastapi/fastapi" -q "Explain the main components" +``` + +## Example Queries + +- "Explain the main components and their relationships" +- "Find all usages of the FastAPI class" +- "Show me the dependency graph for the routing module" +- "What design patterns are used in this codebase?" +- "How is dependency injection implemented?" + +## Features + +The research agent has access to several powerful tools: + +- Semantic code search +- Symbol relationship analysis +- Directory structure exploration +- Code viewing and analysis + +The agent maintains conversation history, so you can ask follow-up questions and build on previous findings. + +## Exit + +Type "exit" or "quit" to end the research session. diff --git a/codegen-examples/examples/deep_code_research/requirements.txt b/codegen-examples/examples/deep_code_research/requirements.txt new file mode 100644 index 000000000..87a0b8786 --- /dev/null +++ b/codegen-examples/examples/deep_code_research/requirements.txt @@ -0,0 +1,7 @@ +click>=8.0.0 +rich>=10.0.0 +rich-click>=1.7.0 +langchain-core>=0.1.0 +langchain-openai>=0.0.5 +langchain>=0.1.0 +codegen-sdk>=0.1.0 diff --git a/codegen-examples/examples/deep_code_research/run.py b/codegen-examples/examples/deep_code_research/run.py new file mode 100644 index 000000000..4c4c8e432 --- /dev/null +++ b/codegen-examples/examples/deep_code_research/run.py @@ -0,0 +1,152 @@ +"""CLI program for deep code research using Codegen.""" + +import sys +import warnings +from pathlib import Path +from typing import Optional + +import rich_click as click +from codegen import Codebase +from codegen.extensions.langchain.agent import create_agent_with_tools +from codegen.extensions.langchain.tools import ( + ListDirectoryTool, + RevealSymbolTool, + SearchTool, + SemanticSearchTool, + ViewFileTool, +) +from langchain_core.messages import SystemMessage +from rich.console import Console +from rich.markdown import Markdown +from rich.prompt import Prompt + +# Suppress LangSmith warning +warnings.filterwarnings("ignore", message="API key must be provided when using hosted LangSmith API") + +# Add the project root to Python path +project_root = str(Path(__file__).parent.parent.parent) +sys.path.append(project_root) + +# Configure rich-click +click.rich_click.USE_RICH_MARKUP = True +click.rich_click.USE_MARKDOWN = True +click.rich_click.SHOW_ARGUMENTS = True +click.rich_click.GROUP_ARGUMENTS_OPTIONS = True +click.rich_click.STYLE_ERRORS_SUGGESTION = "yellow italic" +click.rich_click.ERRORS_SUGGESTION = "Try running the command with --help for more information" + +console = Console() + +RESEARCH_AGENT_PROMPT = """You are a code research expert. Your goal is to help users understand codebases by: +1. Finding relevant code through semantic and text search +2. Analyzing symbol relationships and dependencies +3. Exploring directory structures +4. Reading and explaining code + +Always explain your findings in detail and provide context about how different parts of the code relate to each other. +When analyzing code, consider: +- The purpose and functionality of each component +- How different parts interact +- Key patterns and design decisions +- Potential areas for improvement + +Break down complex concepts into understandable pieces and use examples when helpful.""" + + +def initialize_codebase(repo_name: str) -> Optional[Codebase]: + """Initialize a codebase with a spinner showing progress.""" + with console.status("") as status: + try: + # Update status with specific steps + status.update(f"[bold blue]Cloning {repo_name}...[/bold blue]") + codebase = Codebase.from_repo(repo_name) + status.update("[bold green]āœ“ Repository cloned successfully![/bold green]") + return codebase + except Exception as e: + console.print(f"[bold red]Error initializing codebase:[/bold red] {e}") + return None + + +@click.group() +def cli(): + """[bold blue]šŸ” Codegen Code Research CLI[/bold blue] + + A powerful tool for deep code analysis and research. + """ + pass + + +@cli.command() +@click.argument("repo_name", required=False) +@click.option("--query", "-q", default=None, help="Initial research query to start with.") +def research(repo_name: Optional[str] = None, query: Optional[str] = None): + """[bold green]Start a code research session[/bold green] + + [blue]Arguments:[/blue] + [yellow]REPO_NAME[/yellow]: GitHub repository in format 'owner/repo' (optional, will prompt if not provided) + """ + # If no repo name provided, prompt for it + if not repo_name: + console.print("\n[bold]Welcome to the Code Research CLI![/bold]") + console.print("\nEnter a GitHub repository to analyze (format: owner/repo)\nExamples:\n • fastapi/fastapi\n • pytorch/pytorch\n • microsoft/TypeScript") + repo_name = Prompt.ask("\n[bold cyan]Repository name[/bold cyan]") + + # Initialize codebase + codebase = initialize_codebase(repo_name) + if not codebase: + return + + # Create research tools + tools = [ + ViewFileTool(codebase), + ListDirectoryTool(codebase), + SearchTool(codebase), + SemanticSearchTool(codebase), + RevealSymbolTool(codebase), + ] + + # Initialize agent with research tools + with console.status("[bold blue]Initializing research agent...[/bold blue]") as status: + agent = create_agent_with_tools(codebase=codebase, tools=tools, chat_history=[SystemMessage(content=RESEARCH_AGENT_PROMPT)], verbose=True) + status.update("[bold green]āœ“ Research agent ready![/bold green]") + + # Get initial query if not provided + if not query: + console.print( + "\n[bold]What would you like to research?[/bold]" + "\n[dim]Example queries:[/dim]" + "\n• [italic]Explain the main components and their relationships[/italic]" + "\n• [italic]Find all usages of X function/class[/italic]" + "\n• [italic]Show me the dependency graph for Y module[/italic]" + "\n• [italic]What design patterns are used in this codebase?[/italic]" + ) + query = Prompt.ask("\n[bold cyan]Research query[/bold cyan]") + + # Main research loop + while True: + if not query: + query = Prompt.ask("\n[bold cyan]Research query[/bold cyan]") + + if query.lower() in ["exit", "quit"]: + console.print("\n[bold green]Thanks for using the Code Research CLI! Goodbye![/bold green]") + break + + # Run the agent + with console.status("[bold blue]Researching...[/bold blue]", spinner="dots") as status: + try: + result = agent.invoke( + {"input": query}, + config={"configurable": {"session_id": "research"}}, + ) + # Display the result + console.print("\n[bold blue]šŸ“Š Research Findings:[/bold blue]") + console.print(Markdown(result["output"])) + except Exception as e: + console.print(f"\n[bold red]Error during research:[/bold red] {e}") + + # Clear query for next iteration + query = None + + +if __name__ == "__main__": + cli() From 662f90fdc80c058d9c64a02eb6b0ec9a7b34cf73 Mon Sep 17 00:00:00 2001 From: jayhack Date: Sat, 15 Feb 2025 12:42:25 -0800 Subject: [PATCH 2/2] . --- src/codegen/extensions/langchain/agent.py | 57 +++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/src/codegen/extensions/langchain/agent.py b/src/codegen/extensions/langchain/agent.py index 22964c5ac..986b90fc8 100644 --- a/src/codegen/extensions/langchain/agent.py +++ b/src/codegen/extensions/langchain/agent.py @@ -3,6 +3,7 @@ from langchain.agents import AgentExecutor from langchain.agents.openai_functions_agent.base import OpenAIFunctionsAgent from langchain.hub import pull +from langchain.tools import BaseTool from langchain_core.chat_history import InMemoryChatMessageHistory from langchain_core.messages import BaseMessage from langchain_core.runnables.history import RunnableWithMessageHistory @@ -163,3 +164,59 @@ def create_codebase_inspector_agent( input_messages_key="input", history_messages_key="chat_history", ) + + +def create_agent_with_tools( + codebase: Codebase, + tools: list[BaseTool], + model_name: str = "gpt-4o", + temperature: float = 0, + verbose: bool = True, + chat_history: list[BaseMessage] = [], +) -> RunnableWithMessageHistory: + """Create an agent with a specific set of tools. + + Args: + codebase: The codebase to operate on + tools: List of tools to provide to the agent + model_name: Name of the model to use (default: gpt-4) + temperature: Model temperature (default: 0) + verbose: Whether to print agent's thought process (default: True) + chat_history: Optional list of messages to initialize chat history with + + Returns: + Initialized agent with message history + """ + # Initialize language model + llm = ChatOpenAI( + model_name=model_name, + temperature=temperature, + ) + + # Get the prompt to use + prompt = pull("hwchase17/openai-functions-agent") + + # Create the agent + agent = OpenAIFunctionsAgent( + llm=llm, + tools=tools, + prompt=prompt, + ) + + # Create the agent executor + agent_executor = AgentExecutor( + agent=agent, + tools=tools, + verbose=verbose, + ) + + # Create message history handler + message_history = InMemoryChatMessageHistory(messages=chat_history) + + # Wrap with message history + return RunnableWithMessageHistory( + agent_executor, + lambda session_id: message_history, + input_messages_key="input", + history_messages_key="chat_history", + )