From 844809c69759bfef744e841fbaaab325633b1fc3 Mon Sep 17 00:00:00 2001 From: jemeza-codegen Date: Mon, 24 Feb 2025 11:42:02 -0800 Subject: [PATCH 1/2] feat: agent run snapshots --- .../swebench_agent_run/entry_point.py | 21 +++++++++++++++++++ .../examples/swebench_agent_run/run_eval.py | 8 +++---- .../examples/swebench_agent_run/test.py | 14 +++++++++++++ src/codegen/extensions/swebench/harness.py | 5 +++-- 4 files changed, 42 insertions(+), 6 deletions(-) create mode 100644 codegen-examples/examples/swebench_agent_run/test.py diff --git a/codegen-examples/examples/swebench_agent_run/entry_point.py b/codegen-examples/examples/swebench_agent_run/entry_point.py index 0d5007419..6af3ed862 100644 --- a/codegen-examples/examples/swebench_agent_run/entry_point.py +++ b/codegen-examples/examples/swebench_agent_run/entry_point.py @@ -1,6 +1,8 @@ from codegen.extensions.swebench.utils import SweBenchExample from codegen.extensions.swebench.harness import run_agent_on_entry import modal +import sys +from codegen.sdk.core.codebase import Codebase image = ( modal.Image.debian_slim(python_version="3.13") @@ -17,3 +19,22 @@ async def run_agent_modal(entry: SweBenchExample): """Modal function to process a single example from the SWE-bench dataset.""" return run_agent_on_entry(entry) + + +@app.cls(image=image, secrets=[modal.Secret.from_dotenv()], enable_memory_snapshot=True) +class SwebenchAgentRun: + repo_full_name: str = modal.parameter() + commit: str = modal.parameter() + codebase: Codebase | None = None + + @modal.enter(snap=True) + def load(self): + self.codebase = Codebase.from_repo(repo_full_name=self.repo_full_name, commit=self.commit, language="python") + + @modal.exit() + def exit(self): + sys.exit(0) + + @modal.method() + async def run(self, entry: SweBenchExample): + return run_agent_on_entry(entry, codebase=self.codebase) diff --git a/codegen-examples/examples/swebench_agent_run/run_eval.py b/codegen-examples/examples/swebench_agent_run/run_eval.py index ac1308832..e2c844254 100644 --- a/codegen-examples/examples/swebench_agent_run/run_eval.py +++ b/codegen-examples/examples/swebench_agent_run/run_eval.py @@ -6,16 +6,16 @@ import modal import click from datetime import datetime -from codegen.extensions.swebench.utils import SWEBenchDataset, get_swe_bench_example, get_swe_bench_examples +from codegen.extensions.swebench.utils import SWEBenchDataset, SweBenchExample, get_swe_bench_example, get_swe_bench_examples from codegen.extensions.swebench.report import generate_report PREDS_DNAME = Path(__file__).parent / "predictions" LOG_DIR = Path(__file__).parent / "logs" -run_agent_modal = modal.Function.lookup("swebench-agent-run", "run_agent_modal") +SwebenchAgentRun = modal.Cls.from_name(app_name="swebench-agent-run", name="SwebenchAgentRun") -async def process_batch(examples, batch_size=10): +async def process_batch(examples: list[SweBenchExample], batch_size=10): """Process a batch of examples concurrently. Args: @@ -31,7 +31,7 @@ async def process_batch(examples, batch_size=10): batch = examples[i : i + batch_size] # Create tasks for this batch - batch_tasks = [run_agent_modal.remote.aio(example) for example in batch] + batch_tasks = [SwebenchAgentRun(repo_full_name=example.repo, commit=example.base_commit).run.remote.aio(example) for example in batch] # Wait for all tasks in this batch to complete print(f"Processing batch {i // batch_size + 1}/{len(examples) // batch_size + 1} (examples {i + 1}-{min(i + batch_size, len(examples))})") diff --git a/codegen-examples/examples/swebench_agent_run/test.py b/codegen-examples/examples/swebench_agent_run/test.py new file mode 100644 index 000000000..b119fe32d --- /dev/null +++ b/codegen-examples/examples/swebench_agent_run/test.py @@ -0,0 +1,14 @@ +from codegen import Codebase +import modal + +image = modal.Image.debian_slim(python_version="3.13").apt_install("git").pip_install("fastapi[standard]").run_commands("pip install codegen") + +app = modal.App(name="codegen-examples", image=image, secrets=[modal.Secret.from_dotenv()]) + + +@app.function() +def run_agent(AgentClass): + codebase = Codebase.from_repo(repo_full_name="pallets/flask") + agent = AgentClass(codebase) + agent.run(prompt="What is the main purpose of the LangChain library?") + return True diff --git a/src/codegen/extensions/swebench/harness.py b/src/codegen/extensions/swebench/harness.py index 13d5e5e62..70c9bbe01 100644 --- a/src/codegen/extensions/swebench/harness.py +++ b/src/codegen/extensions/swebench/harness.py @@ -48,7 +48,7 @@ def show_problems(dataset): print(f"{inst}: {problem}") -def run_agent_on_entry(entry: SweBenchExample): +def run_agent_on_entry(entry: SweBenchExample, codebase: Codebase | None = None): """Process one `entry` from SWE Bench using the LLM `models` at the given `temperature`. Set `model_name_or_path` in the result json. """ @@ -63,7 +63,8 @@ def run_agent_on_entry(entry: SweBenchExample): gold_files = files_in_patch(entry.patch) - codebase = Codebase.from_repo(repo_full_name=entry.repo, commit=base_commit, language="python") # check out the repo + if codebase is None: + codebase = Codebase.from_repo(repo_full_name=entry.repo, commit=base_commit, language="python") # check out the repo agent = CodeAgent(codebase=codebase) From 5d0c33d9ba11f331013817392add7d90e16df3a3 Mon Sep 17 00:00:00 2001 From: jemeza-codegen Date: Mon, 24 Feb 2025 12:46:25 -0800 Subject: [PATCH 2/2] chore: updated prompt --- codegen-examples/examples/swebench_agent_run/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codegen-examples/examples/swebench_agent_run/test.py b/codegen-examples/examples/swebench_agent_run/test.py index b119fe32d..fb6e4eb5a 100644 --- a/codegen-examples/examples/swebench_agent_run/test.py +++ b/codegen-examples/examples/swebench_agent_run/test.py @@ -10,5 +10,5 @@ def run_agent(AgentClass): codebase = Codebase.from_repo(repo_full_name="pallets/flask") agent = AgentClass(codebase) - agent.run(prompt="What is the main purpose of the LangChain library?") + agent.run(prompt="Tell me about the codebase and the files in it.") return True