From 844809c69759bfef744e841fbaaab325633b1fc3 Mon Sep 17 00:00:00 2001
From: jemeza-codegen <jmeza@codegen.com>
Date: Mon, 24 Feb 2025 11:42:02 -0800
Subject: [PATCH 1/2] feat: agent run snapshots

---
 .../swebench_agent_run/entry_point.py         | 21 +++++++++++++++++++
 .../examples/swebench_agent_run/run_eval.py   |  8 +++----
 .../examples/swebench_agent_run/test.py       | 14 +++++++++++++
 src/codegen/extensions/swebench/harness.py    |  5 +++--
 4 files changed, 42 insertions(+), 6 deletions(-)
 create mode 100644 codegen-examples/examples/swebench_agent_run/test.py

diff --git a/codegen-examples/examples/swebench_agent_run/entry_point.py b/codegen-examples/examples/swebench_agent_run/entry_point.py
index 0d5007419..6af3ed862 100644
--- a/codegen-examples/examples/swebench_agent_run/entry_point.py
+++ b/codegen-examples/examples/swebench_agent_run/entry_point.py
@@ -1,6 +1,8 @@
 from codegen.extensions.swebench.utils import SweBenchExample
 from codegen.extensions.swebench.harness import run_agent_on_entry
 import modal
+import sys
+from codegen.sdk.core.codebase import Codebase
 
 image = (
     modal.Image.debian_slim(python_version="3.13")
@@ -17,3 +19,22 @@
 async def run_agent_modal(entry: SweBenchExample):
     """Modal function to process a single example from the SWE-bench dataset."""
     return run_agent_on_entry(entry)
+
+
+@app.cls(image=image, secrets=[modal.Secret.from_dotenv()], enable_memory_snapshot=True)
+class SwebenchAgentRun:
+    repo_full_name: str = modal.parameter()
+    commit: str = modal.parameter()
+    codebase: Codebase | None = None
+
+    @modal.enter(snap=True)
+    def load(self):
+        self.codebase = Codebase.from_repo(repo_full_name=self.repo_full_name, commit=self.commit, language="python")
+
+    @modal.exit()
+    def exit(self):
+        sys.exit(0)
+
+    @modal.method()
+    async def run(self, entry: SweBenchExample):
+        return run_agent_on_entry(entry, codebase=self.codebase)
diff --git a/codegen-examples/examples/swebench_agent_run/run_eval.py b/codegen-examples/examples/swebench_agent_run/run_eval.py
index ac1308832..e2c844254 100644
--- a/codegen-examples/examples/swebench_agent_run/run_eval.py
+++ b/codegen-examples/examples/swebench_agent_run/run_eval.py
@@ -6,16 +6,16 @@
 import modal
 import click
 from datetime import datetime
-from codegen.extensions.swebench.utils import SWEBenchDataset, get_swe_bench_example, get_swe_bench_examples
+from codegen.extensions.swebench.utils import SWEBenchDataset, SweBenchExample, get_swe_bench_example, get_swe_bench_examples
 from codegen.extensions.swebench.report import generate_report
 
 PREDS_DNAME = Path(__file__).parent / "predictions"
 LOG_DIR = Path(__file__).parent / "logs"
 
-run_agent_modal = modal.Function.lookup("swebench-agent-run", "run_agent_modal")
+SwebenchAgentRun = modal.Cls.from_name(app_name="swebench-agent-run", name="SwebenchAgentRun")
 
 
-async def process_batch(examples, batch_size=10):
+async def process_batch(examples: list[SweBenchExample], batch_size=10):
     """Process a batch of examples concurrently.
 
     Args:
@@ -31,7 +31,7 @@ async def process_batch(examples, batch_size=10):
         batch = examples[i : i + batch_size]
 
         # Create tasks for this batch
-        batch_tasks = [run_agent_modal.remote.aio(example) for example in batch]
+        batch_tasks = [SwebenchAgentRun(repo_full_name=example.repo, commit=example.base_commit).run.remote.aio(example) for example in batch]
 
         # Wait for all tasks in this batch to complete
         print(f"Processing batch {i // batch_size + 1}/{len(examples) // batch_size + 1} (examples {i + 1}-{min(i + batch_size, len(examples))})")
diff --git a/codegen-examples/examples/swebench_agent_run/test.py b/codegen-examples/examples/swebench_agent_run/test.py
new file mode 100644
index 000000000..b119fe32d
--- /dev/null
+++ b/codegen-examples/examples/swebench_agent_run/test.py
@@ -0,0 +1,14 @@
+from codegen import Codebase
+import modal
+
+image = modal.Image.debian_slim(python_version="3.13").apt_install("git").pip_install("fastapi[standard]").run_commands("pip install codegen")
+
+app = modal.App(name="codegen-examples", image=image, secrets=[modal.Secret.from_dotenv()])
+
+
+@app.function()
+def run_agent(AgentClass):
+    codebase = Codebase.from_repo(repo_full_name="pallets/flask")
+    agent = AgentClass(codebase)
+    agent.run(prompt="What is the main purpose of the LangChain library?")
+    return True
diff --git a/src/codegen/extensions/swebench/harness.py b/src/codegen/extensions/swebench/harness.py
index 13d5e5e62..70c9bbe01 100644
--- a/src/codegen/extensions/swebench/harness.py
+++ b/src/codegen/extensions/swebench/harness.py
@@ -48,7 +48,7 @@ def show_problems(dataset):
         print(f"{inst}: {problem}")
 
 
-def run_agent_on_entry(entry: SweBenchExample):
+def run_agent_on_entry(entry: SweBenchExample, codebase: Codebase | None = None):
     """Process one `entry` from SWE Bench using the LLM `models` at the
     given `temperature`.  Set `model_name_or_path` in the result json.
     """
@@ -63,7 +63,8 @@ def run_agent_on_entry(entry: SweBenchExample):
 
     gold_files = files_in_patch(entry.patch)
 
-    codebase = Codebase.from_repo(repo_full_name=entry.repo, commit=base_commit, language="python")  # check out the repo
+    if codebase is None:
+        codebase = Codebase.from_repo(repo_full_name=entry.repo, commit=base_commit, language="python")  # check out the repo
 
     agent = CodeAgent(codebase=codebase)
 

From 5d0c33d9ba11f331013817392add7d90e16df3a3 Mon Sep 17 00:00:00 2001
From: jemeza-codegen <jmeza@codegen.com>
Date: Mon, 24 Feb 2025 12:46:25 -0800
Subject: [PATCH 2/2] chore: updated prompt

---
 codegen-examples/examples/swebench_agent_run/test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/codegen-examples/examples/swebench_agent_run/test.py b/codegen-examples/examples/swebench_agent_run/test.py
index b119fe32d..fb6e4eb5a 100644
--- a/codegen-examples/examples/swebench_agent_run/test.py
+++ b/codegen-examples/examples/swebench_agent_run/test.py
@@ -10,5 +10,5 @@
 def run_agent(AgentClass):
     codebase = Codebase.from_repo(repo_full_name="pallets/flask")
     agent = AgentClass(codebase)
-    agent.run(prompt="What is the main purpose of the LangChain library?")
+    agent.run(prompt="Tell me about the codebase and the files in it.")
     return True