diff --git a/codegen-examples/examples/swebench_agent_run/entry_point.py b/codegen-examples/examples/swebench_agent_run/entry_point.py index bc5a2f757..cde50bbba 100644 --- a/codegen-examples/examples/swebench_agent_run/entry_point.py +++ b/codegen-examples/examples/swebench_agent_run/entry_point.py @@ -1,6 +1,8 @@ from codegen.extensions.swebench.utils import SweBenchExample from codegen.extensions.swebench.harness import run_agent_on_entry import modal +import sys +from codegen.sdk.core.codebase import Codebase image = ( modal.Image.debian_slim(python_version="3.13") @@ -17,3 +19,22 @@ async def run_agent_modal(entry: SweBenchExample): """Modal function to process a single example from the SWE-bench dataset.""" return run_agent_on_entry(entry) + + +@app.cls(image=image, secrets=[modal.Secret.from_dotenv()], enable_memory_snapshot=True) +class SwebenchAgentRun: + repo_full_name: str = modal.parameter() + commit: str = modal.parameter() + codebase: Codebase | None = None + + @modal.enter(snap=True) + def load(self): + self.codebase = Codebase.from_repo(repo_full_name=self.repo_full_name, commit=self.commit, language="python") + + @modal.exit() + def exit(self): + sys.exit(0) + + @modal.method() + async def run(self, entry: SweBenchExample): + return run_agent_on_entry(entry, codebase=self.codebase) diff --git a/codegen-examples/examples/swebench_agent_run/run_eval.py b/codegen-examples/examples/swebench_agent_run/run_eval.py index ac1308832..e2c844254 100644 --- a/codegen-examples/examples/swebench_agent_run/run_eval.py +++ b/codegen-examples/examples/swebench_agent_run/run_eval.py @@ -6,16 +6,16 @@ import modal import click from datetime import datetime -from codegen.extensions.swebench.utils import SWEBenchDataset, get_swe_bench_example, get_swe_bench_examples +from codegen.extensions.swebench.utils import SWEBenchDataset, SweBenchExample, get_swe_bench_example, get_swe_bench_examples from codegen.extensions.swebench.report import generate_report PREDS_DNAME = Path(__file__).parent / "predictions" LOG_DIR = Path(__file__).parent / "logs" -run_agent_modal = modal.Function.lookup("swebench-agent-run", "run_agent_modal") +SwebenchAgentRun = modal.Cls.from_name(app_name="swebench-agent-run", name="SwebenchAgentRun") -async def process_batch(examples, batch_size=10): +async def process_batch(examples: list[SweBenchExample], batch_size=10): """Process a batch of examples concurrently. Args: @@ -31,7 +31,7 @@ async def process_batch(examples, batch_size=10): batch = examples[i : i + batch_size] # Create tasks for this batch - batch_tasks = [run_agent_modal.remote.aio(example) for example in batch] + batch_tasks = [SwebenchAgentRun(repo_full_name=example.repo, commit=example.base_commit).run.remote.aio(example) for example in batch] # Wait for all tasks in this batch to complete print(f"Processing batch {i // batch_size + 1}/{len(examples) // batch_size + 1} (examples {i + 1}-{min(i + batch_size, len(examples))})") diff --git a/codegen-examples/examples/swebench_agent_run/test.py b/codegen-examples/examples/swebench_agent_run/test.py new file mode 100644 index 000000000..fb6e4eb5a --- /dev/null +++ b/codegen-examples/examples/swebench_agent_run/test.py @@ -0,0 +1,14 @@ +from codegen import Codebase +import modal + +image = modal.Image.debian_slim(python_version="3.13").apt_install("git").pip_install("fastapi[standard]").run_commands("pip install codegen") + +app = modal.App(name="codegen-examples", image=image, secrets=[modal.Secret.from_dotenv()]) + + +@app.function() +def run_agent(AgentClass): + codebase = Codebase.from_repo(repo_full_name="pallets/flask") + agent = AgentClass(codebase) + agent.run(prompt="Tell me about the codebase and the files in it.") + return True diff --git a/src/codegen/extensions/swebench/harness.py b/src/codegen/extensions/swebench/harness.py index 13d5e5e62..70c9bbe01 100644 --- a/src/codegen/extensions/swebench/harness.py +++ b/src/codegen/extensions/swebench/harness.py @@ -48,7 +48,7 @@ def show_problems(dataset): print(f"{inst}: {problem}") -def run_agent_on_entry(entry: SweBenchExample): +def run_agent_on_entry(entry: SweBenchExample, codebase: Codebase | None = None): """Process one `entry` from SWE Bench using the LLM `models` at the given `temperature`. Set `model_name_or_path` in the result json. """ @@ -63,7 +63,8 @@ def run_agent_on_entry(entry: SweBenchExample): gold_files = files_in_patch(entry.patch) - codebase = Codebase.from_repo(repo_full_name=entry.repo, commit=base_commit, language="python") # check out the repo + if codebase is None: + codebase = Codebase.from_repo(repo_full_name=entry.repo, commit=base_commit, language="python") # check out the repo agent = CodeAgent(codebase=codebase)