From c8e47cb3ab2713a96e7fb6e00372c127824ebcef Mon Sep 17 00:00:00 2001
From: nanjiangwill <willjiang2018@gmail.com>
Date: Sat, 28 Sep 2024 21:59:50 -0700
Subject: [PATCH 1/8] tmp

---
 agent/agent_utils.py    |  70 +++++++++++-
 agent/display.py        |  28 +++++
 agent/run_agent.py      |  19 ++--
 agent/run_agent_test.py | 237 ++++++++++++++++++++++++++++++++++++++++
 4 files changed, 341 insertions(+), 13 deletions(-)
 create mode 100644 agent/run_agent_test.py

diff --git a/agent/agent_utils.py b/agent/agent_utils.py
index c6ec4d5..da792af 100644
--- a/agent/agent_utils.py
+++ b/agent/agent_utils.py
@@ -6,6 +6,8 @@
 from pathlib import Path
 from typing import List
 import fitz
+from import_deps import ModuleSet
+from graphlib import TopologicalSorter, CycleError
 import yaml
 
 from agent.class_types import AgentConfig
@@ -190,8 +192,46 @@ def _find_files_to_edit(base_dir: str, src_dir: str, test_dir: str) -> list[str]
     return files
 
 
-def get_target_edit_files(target_dir: str, src_dir: str, test_dir: str) -> list[str]:
+def ignore_cycles(graph: dict):
+    ts = TopologicalSorter(graph)
+    try:
+        return list(set(ts.static_order()))
+    except CycleError as e:
+        # print(f"Cycle detected: {e.args[1]}")
+        # You can either break the cycle by modifying the graph or handle it as needed.
+        # For now, let's just remove the first node in the cycle and try again.
+        cycle_nodes = e.args[1]
+        node_to_remove = cycle_nodes[0]
+        # print(f"Removing node {node_to_remove} to resolve cycle.")
+        graph.pop(node_to_remove, None)
+        return ignore_cycles(graph)
+
+
+def topological_sort_based_on_dependencies(pkg_paths: list[str]) -> list[str]:
+    """Topological sort based on dependencies."""
+    module_set = ModuleSet([str(p) for p in pkg_paths])
+
+    import_dependencies = {}
+    for path in sorted(module_set.by_path.keys()):
+        module_name = ".".join(module_set.by_path[path].fqn)
+        mod = module_set.by_name[module_name]
+        imports = module_set.get_imports(mod)
+        import_dependencies[path] = set([str(x) for x in imports])
+
+    import_dependencies_files = ignore_cycles(import_dependencies)
+
+    return import_dependencies_files
+
+
+def get_target_edit_files(
+    local_repo: git.Repo,
+    src_dir: str,
+    test_dir: str,
+    latest_commit: str,
+    reference_commit: str,
+) -> list[str]:
     """Find the files with functions with the pass statement."""
+    target_dir = local_repo.working_dir
     files = _find_files_to_edit(target_dir, src_dir, test_dir)
     filtered_files = []
     for file_path in files:
@@ -202,13 +242,33 @@ def get_target_edit_files(target_dir: str, src_dir: str, test_dir: str) -> list[
             if "    pass" in content:
                 filtered_files.append(file_path)
 
+    # Change to reference commit to get the correct dependencies
+    local_repo.git.checkout(reference_commit)
+
+    topological_sort_files = topological_sort_based_on_dependencies(filtered_files)
+    if len(topological_sort_files) != len(filtered_files):
+        if len(topological_sort_files) < len(filtered_files):
+            # Find the missing elements
+            missing_files = set(filtered_files) - set(topological_sort_files)
+            # Add the missing files to the end of the list
+            topological_sort_files = topological_sort_files + list(missing_files)
+        else:
+            raise ValueError(
+                "topological_sort_files should not be longer than filtered_files"
+            )
+    assert len(topological_sort_files) == len(
+        filtered_files
+    ), "all files should be included"
+
+    # change to latest commit
+    local_repo.git.checkout(latest_commit)
+
     # Remove the base_dir prefix
-    filtered_files = [
-        file.replace(target_dir, "").lstrip("/") for file in filtered_files
+    topological_sort_files = [
+        file.replace(target_dir, "").lstrip("/") for file in topological_sort_files
     ]
-    # Only keep python files
 
-    return filtered_files
+    return topological_sort_files
 
 
 def get_message(
diff --git a/agent/display.py b/agent/display.py
index a5f389c..53d01fe 100644
--- a/agent/display.py
+++ b/agent/display.py
@@ -17,6 +17,8 @@
 from rich.align import Align
 from collections import OrderedDict
 from types import TracebackType
+import json
+from datetime import datetime
 
 
 class RepoBox:
@@ -404,3 +406,29 @@ def __exit__(
             f"{'Total':<30} {self.total_time_spent:>13.2f}s {total_files:>18} {total_money:>13.2f}$"
         )
         print("-" * 80)
+
+        # Write summary to JSON file
+
+        summary_data = {
+            "timestamp": datetime.now().isoformat(),
+            "total_time_spent": self.total_time_spent,
+            "total_files_processed": total_files,
+            "total_money_spent": total_money,
+            "repositories": [
+                {
+                    "name": repo_name,
+                    "time_spent": self.end_time_per_repo[repo_name]
+                    - self.start_time_per_repo[repo_name],
+                    "files_processed": self.total_files_per_repo[repo_name],
+                    "money_spent": sum(
+                        self.repo_money_spent.get(repo_name, {}).values()
+                    ),
+                }
+                for repo_name in self.end_time_per_repo
+            ],
+        }
+
+        with open("processing_summary.json", "w") as json_file:
+            json.dump(summary_data, json_file, indent=4)
+
+        print("\nSummary has been written to processing_summary.json")
diff --git a/agent/run_agent.py b/agent/run_agent.py
index 3ef2a08..8a3c4de 100644
--- a/agent/run_agent.py
+++ b/agent/run_agent.py
@@ -66,13 +66,6 @@ def run_agent_for_repo(
     repo_path = os.path.join(repo_base_dir, repo_name)
     repo_path = os.path.abspath(repo_path)
 
-    target_edit_files = get_target_edit_files(
-        repo_path, example["src_dir"], example["test"]["test_dir"]
-    )
-    # Call the commit0 get-tests command to retrieve test files
-    test_files_str = get_tests(repo_name, verbose=0)
-    test_files = sorted(list(set([i.split(":")[0] for i in test_files_str])))
-
     try:
         local_repo = Repo(repo_path)
     except Exception:
@@ -90,7 +83,6 @@ def run_agent_for_repo(
     # # if branch_name is not provided, create a new branch name based on agent_config
     # if branch is None:
     #     branch = args2string(agent_config)
-
     create_branch(local_repo, branch, example["base_commit"])
 
     # in cases where the latest commit of branch is not commit 0
@@ -99,6 +91,17 @@ def run_agent_for_repo(
     if latest_commit.hexsha != example["base_commit"] and override_previous_changes:
         local_repo.git.reset("--hard", example["base_commit"])
 
+    target_edit_files = get_target_edit_files(
+        local_repo,
+        example["src_dir"],
+        example["test"]["test_dir"],
+        latest_commit,
+        example["reference_commit"],
+    )
+    # Call the commit0 get-tests command to retrieve test files
+    test_files_str = get_tests(repo_name, verbose=0)
+    test_files = sorted(list(set([i.split(":")[0] for i in test_files_str])))
+
     # prepare the log dir
     experiment_log_dir = (
         Path(log_dir)
diff --git a/agent/run_agent_test.py b/agent/run_agent_test.py
new file mode 100644
index 0000000..64f7ca2
--- /dev/null
+++ b/agent/run_agent_test.py
@@ -0,0 +1,237 @@
+import os
+import yaml
+import multiprocessing
+from tqdm import tqdm
+from datasets import load_dataset
+from git import Repo
+from agent.agent_utils import (
+    args2string,
+    create_branch,
+    get_message,
+    get_target_edit_files,
+    get_lint_cmd,
+    read_yaml_config,
+)
+from agent.agents import AiderAgents
+from typing import Optional, Type, cast
+from types import TracebackType
+from agent.class_types import AgentConfig
+from commit0.harness.constants import SPLIT
+from commit0.harness.get_pytest_ids import main as get_tests
+from commit0.harness.constants import RUN_AGENT_LOG_DIR, RepoInstance
+from commit0.cli import read_commit0_dot_file
+from pathlib import Path
+from datetime import datetime
+
+
+class DirContext:
+    def __init__(self, d: str):
+        self.dir = d
+        self.cwd = os.getcwd()
+
+    def __enter__(self):
+        os.chdir(self.dir)
+
+    def __exit__(
+        self,
+        exctype: Optional[Type[BaseException]],
+        excinst: Optional[BaseException],
+        exctb: Optional[TracebackType],
+    ) -> None:
+        os.chdir(self.cwd)
+
+
+def run_agent_for_repo(
+    repo_base_dir: str,
+    agent_config: AgentConfig,
+    example: RepoInstance,
+    branch: Optional[str] = None,
+    override_previous_changes: bool = False,
+    backend: str = "modal",
+    log_dir: str = str(RUN_AGENT_LOG_DIR.resolve()),
+) -> None:
+    """Run Aider for a given repository."""
+    # get repo info
+    _, repo_name = example["repo"].split("/")
+    print("Working on repo: ", repo_name)
+
+    # repo_name = repo_name.lower()
+    # repo_name = repo_name.replace(".", "-")
+
+    repo_path = os.path.join(repo_base_dir, repo_name)
+    repo_path = os.path.abspath(repo_path)
+
+    try:
+        local_repo = Repo(repo_path)
+    except Exception:
+        raise Exception(
+            f"{repo_path} is not a git repo. Check if base_dir is correctly specified."
+        )
+
+    if agent_config.agent_name == "aider":
+        agent = AiderAgents(agent_config.max_iteration, agent_config.model_name)
+    else:
+        raise NotImplementedError(
+            f"{agent_config.agent_name} is not implemented; please add your implementations in baselines/agents.py."
+        )
+
+    # if branch_name is not provided, create a new branch name based on agent_config
+    if branch is None:
+        branch = args2string(agent_config)
+
+    create_branch(local_repo, branch, example["base_commit"])
+
+    # in cases where the latest commit of branch is not commit 0
+    # set it back to commit 0
+    latest_commit = local_repo.commit(branch)
+    if latest_commit.hexsha != example["base_commit"] and override_previous_changes:
+        local_repo.git.reset("--hard", example["base_commit"])
+
+    # get target files to edit and test files to run
+    target_edit_files = get_target_edit_files(
+        local_repo, example["src_dir"], example["test"]["test_dir"], latest_commit, example["reference_commit"]
+    )
+    print(target_edit_files)
+    return
+    # Call the commit0 get-tests command to retrieve test files
+    test_files_str = get_tests(repo_name, verbose=0)
+    test_files = sorted(list(set([i.split(":")[0] for i in test_files_str])))
+
+    # prepare the log dir
+    experiment_log_dir = (
+        Path(log_dir)
+        / repo_name
+        / branch
+        / datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
+    )
+    experiment_log_dir.mkdir(parents=True, exist_ok=True)
+
+    # write agent_config to .agent.yaml in the log_dir for record
+    agent_config_log_file = experiment_log_dir / ".agent.yaml"
+    with open(agent_config_log_file, "w") as agent_config_file:
+        yaml.dump(agent_config, agent_config_file)
+
+    # TODO: make this path more general
+    commit0_dot_file_path = str(Path(repo_path).parent.parent / ".commit0.yaml")
+
+    with DirContext(repo_path):
+        if agent_config is None:
+            raise ValueError("Invalid input")
+
+        if agent_config.run_tests:
+            # when unit test feedback is available, iterate over test files
+            for test_file in test_files:
+                test_cmd = f"python -m commit0 test {repo_path} {test_file} --branch {branch} --backend {backend} --commit0-dot-file-path {commit0_dot_file_path}"
+                test_file_name = test_file.replace(".py", "").replace("/", "__")
+                test_log_dir = experiment_log_dir / test_file_name
+                lint_cmd = get_lint_cmd(repo_name, agent_config.use_lint_info)
+                message = get_message(agent_config, repo_path, test_file=test_file)
+                _ = agent.run(
+                    message,
+                    test_cmd,
+                    lint_cmd,
+                    target_edit_files,
+                    test_log_dir,
+                    test_first=True,
+                )
+                # cost = agent_return.last_cost
+        else:
+            # when unit test feedback is not available, iterate over target files to edit
+            message = get_message(
+                agent_config, repo_path, test_dir=example["test"]["test_dir"]
+            )
+            for f in target_edit_files:
+                file_name = f.replace(".py", "").replace("/", "__")
+                file_log_dir = experiment_log_dir / file_name
+                lint_cmd = get_lint_cmd(repo_name, agent_config.use_lint_info)
+                _ = agent.run(message, "", lint_cmd, [f], file_log_dir)
+                # cost = agent_return.last_cost
+
+
+def run_agent(
+    branch: str,
+    override_previous_changes: bool,
+    backend: str,
+    agent_config_file: str,
+    log_dir: str,
+    max_parallel_repos: int,
+) -> None:
+    """Main function to run Aider for a given repository.
+
+    Will run in parallel for each repo.
+    """
+    config = read_yaml_config(agent_config_file)
+
+    agent_config = AgentConfig(**config)
+
+    commit0_config = read_commit0_dot_file(".commit0.yaml")
+
+    dataset = load_dataset(
+        commit0_config["dataset_name"], split=commit0_config["dataset_split"]
+    )
+    filtered_dataset = [
+        example
+        for example in dataset
+        if commit0_config["repo_split"] == "all"
+        or (
+            isinstance(example, dict)
+            and "repo" in example
+            and isinstance(example["repo"], str)
+            and example["repo"].split("/")[-1]
+            in SPLIT.get(commit0_config["repo_split"], [])
+        )
+    ]
+    assert len(filtered_dataset) > 0, "No examples available"
+
+    # if len(filtered_dataset) > 1:
+    #     sys.stdout = open(os.devnull, "w")
+    for i in range(len(filtered_dataset)):
+        if "python-rsa" not in filtered_dataset[i]["repo"]:
+            continue
+        run_agent_for_repo(
+            commit0_config["base_dir"],
+            agent_config,
+            filtered_dataset[i],
+            branch,
+            override_previous_changes,
+            backend,
+            log_dir,
+        )
+    # with tqdm(
+    #     total=len(filtered_dataset), smoothing=0, desc="Running Aider for repos"
+    # ) as pbar:
+    #     with multiprocessing.Pool(processes=max_parallel_repos) as pool:
+    #         results = []
+
+    #         # Use apply_async to submit jobs and add progress bar updates
+    #         for example in filtered_dataset:
+    #             result = pool.apply_async(
+    #                 run_agent_for_repo,
+    #                 args=(
+    #                     commit0_config["base_dir"],
+    #                     agent_config,
+    #                     cast(RepoInstance, example),
+    #                     branch,
+    #                     override_previous_changes,
+    #                     backend,
+    #                     log_dir,
+    #                 ),
+    #                 callback=lambda _: pbar.update(
+    #                     1
+    #                 ),  # Update progress bar on task completion
+    #             )
+    #             results.append(result)
+
+    #         for result in results:
+    #             result.wait()
+
+
+if __name__ == "__main__":
+    run_agent(
+        "fillin",
+        False,
+        "modal",
+        ".agent_with_test.yaml",
+        "logs",
+        10,
+    )

From 99779c9eac207447fa5e9b1d7a2d5bfa54041477 Mon Sep 17 00:00:00 2001
From: nanjiangwill <willjiang2018@gmail.com>
Date: Sun, 29 Sep 2024 10:39:14 -0700
Subject: [PATCH 2/8] update

---
 agent/agent_utils.py    |  9 ++++-----
 agent/run_agent.py      |  4 ++--
 agent/run_agent_test.py | 10 ++++++----
 3 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/agent/agent_utils.py b/agent/agent_utils.py
index da792af..197f358 100644
--- a/agent/agent_utils.py
+++ b/agent/agent_utils.py
@@ -192,10 +192,11 @@ def _find_files_to_edit(base_dir: str, src_dir: str, test_dir: str) -> list[str]
     return files
 
 
-def ignore_cycles(graph: dict):
+def ignore_cycles(graph: dict) -> list[str]:
+    """Ignore the cycles in the graph."""
     ts = TopologicalSorter(graph)
     try:
-        return list(set(ts.static_order()))
+        return list(ts.static_order())
     except CycleError as e:
         # print(f"Cycle detected: {e.args[1]}")
         # You can either break the cycle by modifying the graph or handle it as needed.
@@ -231,7 +232,7 @@ def get_target_edit_files(
     reference_commit: str,
 ) -> list[str]:
     """Find the files with functions with the pass statement."""
-    target_dir = local_repo.working_dir
+    target_dir = str(local_repo.working_dir)
     files = _find_files_to_edit(target_dir, src_dir, test_dir)
     filtered_files = []
     for file_path in files:
@@ -241,10 +242,8 @@ def get_target_edit_files(
                 continue
             if "    pass" in content:
                 filtered_files.append(file_path)
-
     # Change to reference commit to get the correct dependencies
     local_repo.git.checkout(reference_commit)
-
     topological_sort_files = topological_sort_based_on_dependencies(filtered_files)
     if len(topological_sort_files) != len(filtered_files):
         if len(topological_sort_files) < len(filtered_files):
diff --git a/agent/run_agent.py b/agent/run_agent.py
index 8a3c4de..3a6425a 100644
--- a/agent/run_agent.py
+++ b/agent/run_agent.py
@@ -87,8 +87,8 @@ def run_agent_for_repo(
 
     # in cases where the latest commit of branch is not commit 0
     # set it back to commit 0
-    latest_commit = local_repo.commit(branch)
-    if latest_commit.hexsha != example["base_commit"] and override_previous_changes:
+    latest_commit = str(local_repo.commit(branch))
+    if latest_commit != example["base_commit"] and override_previous_changes:
         local_repo.git.reset("--hard", example["base_commit"])
 
     target_edit_files = get_target_edit_files(
diff --git a/agent/run_agent_test.py b/agent/run_agent_test.py
index 64f7ca2..5e1b4a9 100644
--- a/agent/run_agent_test.py
+++ b/agent/run_agent_test.py
@@ -1,7 +1,5 @@
 import os
 import yaml
-import multiprocessing
-from tqdm import tqdm
 from datasets import load_dataset
 from git import Repo
 from agent.agent_utils import (
@@ -13,7 +11,7 @@
     read_yaml_config,
 )
 from agent.agents import AiderAgents
-from typing import Optional, Type, cast
+from typing import Optional, Type
 from types import TracebackType
 from agent.class_types import AgentConfig
 from commit0.harness.constants import SPLIT
@@ -89,7 +87,11 @@ def run_agent_for_repo(
 
     # get target files to edit and test files to run
     target_edit_files = get_target_edit_files(
-        local_repo, example["src_dir"], example["test"]["test_dir"], latest_commit, example["reference_commit"]
+        local_repo,
+        example["src_dir"],
+        example["test"]["test_dir"],
+        latest_commit,
+        example["reference_commit"],
     )
     print(target_edit_files)
     return

From 7f4929909cb09a510bb990f21371844c88be1709 Mon Sep 17 00:00:00 2001
From: nanjiangwill <willjiang2018@gmail.com>
Date: Sun, 29 Sep 2024 10:42:10 -0700
Subject: [PATCH 3/8] update

---
 agent/run_agent.py         |   6 +-
 agent/run_agent_no_rich.py |  21 ++--
 agent/run_agent_test.py    | 239 -------------------------------------
 3 files changed, 16 insertions(+), 250 deletions(-)
 delete mode 100644 agent/run_agent_test.py

diff --git a/agent/run_agent.py b/agent/run_agent.py
index 3a6425a..7eaa926 100644
--- a/agent/run_agent.py
+++ b/agent/run_agent.py
@@ -87,15 +87,15 @@ def run_agent_for_repo(
 
     # in cases where the latest commit of branch is not commit 0
     # set it back to commit 0
-    latest_commit = str(local_repo.commit(branch))
-    if latest_commit != example["base_commit"] and override_previous_changes:
+    latest_commit = local_repo.commit(branch)
+    if latest_commit.hexsha != example["base_commit"] and override_previous_changes:
         local_repo.git.reset("--hard", example["base_commit"])
 
     target_edit_files = get_target_edit_files(
         local_repo,
         example["src_dir"],
         example["test"]["test_dir"],
-        latest_commit,
+        str(latest_commit),
         example["reference_commit"],
     )
     # Call the commit0 get-tests command to retrieve test files
diff --git a/agent/run_agent_no_rich.py b/agent/run_agent_no_rich.py
index c46ae2f..754256d 100644
--- a/agent/run_agent_no_rich.py
+++ b/agent/run_agent_no_rich.py
@@ -61,14 +61,6 @@ def run_agent_for_repo(
     repo_path = os.path.join(repo_base_dir, repo_name)
     repo_path = os.path.abspath(repo_path)
 
-    # get target files to edit and test files to run
-    target_edit_files = get_target_edit_files(
-        repo_path, example["src_dir"], example["test"]["test_dir"]
-    )
-    # Call the commit0 get-tests command to retrieve test files
-    test_files_str = get_tests(repo_name, verbose=0)
-    test_files = sorted(list(set([i.split(":")[0] for i in test_files_str])))
-
     try:
         local_repo = Repo(repo_path)
     except Exception:
@@ -95,6 +87,19 @@ def run_agent_for_repo(
     if latest_commit.hexsha != example["base_commit"] and override_previous_changes:
         local_repo.git.reset("--hard", example["base_commit"])
 
+    # get target files to edit and test files to run
+    target_edit_files = get_target_edit_files(
+        local_repo,
+        example["src_dir"],
+        example["test"]["test_dir"],
+        str(latest_commit),
+        str(example["reference_commit"]),
+    )
+
+    # Call the commit0 get-tests command to retrieve test files
+    test_files_str = get_tests(repo_name, verbose=0)
+    test_files = sorted(list(set([i.split(":")[0] for i in test_files_str])))
+
     # prepare the log dir
     experiment_log_dir = (
         Path(log_dir)
diff --git a/agent/run_agent_test.py b/agent/run_agent_test.py
deleted file mode 100644
index 5e1b4a9..0000000
--- a/agent/run_agent_test.py
+++ /dev/null
@@ -1,239 +0,0 @@
-import os
-import yaml
-from datasets import load_dataset
-from git import Repo
-from agent.agent_utils import (
-    args2string,
-    create_branch,
-    get_message,
-    get_target_edit_files,
-    get_lint_cmd,
-    read_yaml_config,
-)
-from agent.agents import AiderAgents
-from typing import Optional, Type
-from types import TracebackType
-from agent.class_types import AgentConfig
-from commit0.harness.constants import SPLIT
-from commit0.harness.get_pytest_ids import main as get_tests
-from commit0.harness.constants import RUN_AGENT_LOG_DIR, RepoInstance
-from commit0.cli import read_commit0_dot_file
-from pathlib import Path
-from datetime import datetime
-
-
-class DirContext:
-    def __init__(self, d: str):
-        self.dir = d
-        self.cwd = os.getcwd()
-
-    def __enter__(self):
-        os.chdir(self.dir)
-
-    def __exit__(
-        self,
-        exctype: Optional[Type[BaseException]],
-        excinst: Optional[BaseException],
-        exctb: Optional[TracebackType],
-    ) -> None:
-        os.chdir(self.cwd)
-
-
-def run_agent_for_repo(
-    repo_base_dir: str,
-    agent_config: AgentConfig,
-    example: RepoInstance,
-    branch: Optional[str] = None,
-    override_previous_changes: bool = False,
-    backend: str = "modal",
-    log_dir: str = str(RUN_AGENT_LOG_DIR.resolve()),
-) -> None:
-    """Run Aider for a given repository."""
-    # get repo info
-    _, repo_name = example["repo"].split("/")
-    print("Working on repo: ", repo_name)
-
-    # repo_name = repo_name.lower()
-    # repo_name = repo_name.replace(".", "-")
-
-    repo_path = os.path.join(repo_base_dir, repo_name)
-    repo_path = os.path.abspath(repo_path)
-
-    try:
-        local_repo = Repo(repo_path)
-    except Exception:
-        raise Exception(
-            f"{repo_path} is not a git repo. Check if base_dir is correctly specified."
-        )
-
-    if agent_config.agent_name == "aider":
-        agent = AiderAgents(agent_config.max_iteration, agent_config.model_name)
-    else:
-        raise NotImplementedError(
-            f"{agent_config.agent_name} is not implemented; please add your implementations in baselines/agents.py."
-        )
-
-    # if branch_name is not provided, create a new branch name based on agent_config
-    if branch is None:
-        branch = args2string(agent_config)
-
-    create_branch(local_repo, branch, example["base_commit"])
-
-    # in cases where the latest commit of branch is not commit 0
-    # set it back to commit 0
-    latest_commit = local_repo.commit(branch)
-    if latest_commit.hexsha != example["base_commit"] and override_previous_changes:
-        local_repo.git.reset("--hard", example["base_commit"])
-
-    # get target files to edit and test files to run
-    target_edit_files = get_target_edit_files(
-        local_repo,
-        example["src_dir"],
-        example["test"]["test_dir"],
-        latest_commit,
-        example["reference_commit"],
-    )
-    print(target_edit_files)
-    return
-    # Call the commit0 get-tests command to retrieve test files
-    test_files_str = get_tests(repo_name, verbose=0)
-    test_files = sorted(list(set([i.split(":")[0] for i in test_files_str])))
-
-    # prepare the log dir
-    experiment_log_dir = (
-        Path(log_dir)
-        / repo_name
-        / branch
-        / datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
-    )
-    experiment_log_dir.mkdir(parents=True, exist_ok=True)
-
-    # write agent_config to .agent.yaml in the log_dir for record
-    agent_config_log_file = experiment_log_dir / ".agent.yaml"
-    with open(agent_config_log_file, "w") as agent_config_file:
-        yaml.dump(agent_config, agent_config_file)
-
-    # TODO: make this path more general
-    commit0_dot_file_path = str(Path(repo_path).parent.parent / ".commit0.yaml")
-
-    with DirContext(repo_path):
-        if agent_config is None:
-            raise ValueError("Invalid input")
-
-        if agent_config.run_tests:
-            # when unit test feedback is available, iterate over test files
-            for test_file in test_files:
-                test_cmd = f"python -m commit0 test {repo_path} {test_file} --branch {branch} --backend {backend} --commit0-dot-file-path {commit0_dot_file_path}"
-                test_file_name = test_file.replace(".py", "").replace("/", "__")
-                test_log_dir = experiment_log_dir / test_file_name
-                lint_cmd = get_lint_cmd(repo_name, agent_config.use_lint_info)
-                message = get_message(agent_config, repo_path, test_file=test_file)
-                _ = agent.run(
-                    message,
-                    test_cmd,
-                    lint_cmd,
-                    target_edit_files,
-                    test_log_dir,
-                    test_first=True,
-                )
-                # cost = agent_return.last_cost
-        else:
-            # when unit test feedback is not available, iterate over target files to edit
-            message = get_message(
-                agent_config, repo_path, test_dir=example["test"]["test_dir"]
-            )
-            for f in target_edit_files:
-                file_name = f.replace(".py", "").replace("/", "__")
-                file_log_dir = experiment_log_dir / file_name
-                lint_cmd = get_lint_cmd(repo_name, agent_config.use_lint_info)
-                _ = agent.run(message, "", lint_cmd, [f], file_log_dir)
-                # cost = agent_return.last_cost
-
-
-def run_agent(
-    branch: str,
-    override_previous_changes: bool,
-    backend: str,
-    agent_config_file: str,
-    log_dir: str,
-    max_parallel_repos: int,
-) -> None:
-    """Main function to run Aider for a given repository.
-
-    Will run in parallel for each repo.
-    """
-    config = read_yaml_config(agent_config_file)
-
-    agent_config = AgentConfig(**config)
-
-    commit0_config = read_commit0_dot_file(".commit0.yaml")
-
-    dataset = load_dataset(
-        commit0_config["dataset_name"], split=commit0_config["dataset_split"]
-    )
-    filtered_dataset = [
-        example
-        for example in dataset
-        if commit0_config["repo_split"] == "all"
-        or (
-            isinstance(example, dict)
-            and "repo" in example
-            and isinstance(example["repo"], str)
-            and example["repo"].split("/")[-1]
-            in SPLIT.get(commit0_config["repo_split"], [])
-        )
-    ]
-    assert len(filtered_dataset) > 0, "No examples available"
-
-    # if len(filtered_dataset) > 1:
-    #     sys.stdout = open(os.devnull, "w")
-    for i in range(len(filtered_dataset)):
-        if "python-rsa" not in filtered_dataset[i]["repo"]:
-            continue
-        run_agent_for_repo(
-            commit0_config["base_dir"],
-            agent_config,
-            filtered_dataset[i],
-            branch,
-            override_previous_changes,
-            backend,
-            log_dir,
-        )
-    # with tqdm(
-    #     total=len(filtered_dataset), smoothing=0, desc="Running Aider for repos"
-    # ) as pbar:
-    #     with multiprocessing.Pool(processes=max_parallel_repos) as pool:
-    #         results = []
-
-    #         # Use apply_async to submit jobs and add progress bar updates
-    #         for example in filtered_dataset:
-    #             result = pool.apply_async(
-    #                 run_agent_for_repo,
-    #                 args=(
-    #                     commit0_config["base_dir"],
-    #                     agent_config,
-    #                     cast(RepoInstance, example),
-    #                     branch,
-    #                     override_previous_changes,
-    #                     backend,
-    #                     log_dir,
-    #                 ),
-    #                 callback=lambda _: pbar.update(
-    #                     1
-    #                 ),  # Update progress bar on task completion
-    #             )
-    #             results.append(result)
-
-    #         for result in results:
-    #             result.wait()
-
-
-if __name__ == "__main__":
-    run_agent(
-        "fillin",
-        False,
-        "modal",
-        ".agent_with_test.yaml",
-        "logs",
-        10,
-    )

From e47004798c51d7b7f2433a24a8ec881f1d476dac Mon Sep 17 00:00:00 2001
From: nanjiangwill <willjiang2018@gmail.com>
Date: Sun, 29 Sep 2024 10:57:22 -0700
Subject: [PATCH 4/8] update file finding logic

---
 agent/agent_utils.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/agent/agent_utils.py b/agent/agent_utils.py
index 197f358..fcbb87e 100644
--- a/agent/agent_utils.py
+++ b/agent/agent_utils.py
@@ -216,8 +216,11 @@ def topological_sort_based_on_dependencies(pkg_paths: list[str]) -> list[str]:
     for path in sorted(module_set.by_path.keys()):
         module_name = ".".join(module_set.by_path[path].fqn)
         mod = module_set.by_name[module_name]
-        imports = module_set.get_imports(mod)
-        import_dependencies[path] = set([str(x) for x in imports])
+        try:
+            imports = module_set.get_imports(mod)
+            import_dependencies[path] = set([str(x) for x in imports])
+        except Exception:
+            import_dependencies[path] = set()
 
     import_dependencies_files = ignore_cycles(import_dependencies)
 
@@ -236,7 +239,7 @@ def get_target_edit_files(
     files = _find_files_to_edit(target_dir, src_dir, test_dir)
     filtered_files = []
     for file_path in files:
-        with open(file_path, "r", encoding="utf-8", errors="ignore") as file:
+        with open(file_path, "r", encoding="utf-8-sig", errors="ignore") as file:
             content = file.read()
             if len(content.splitlines()) > 1500:
                 continue
@@ -244,6 +247,7 @@ def get_target_edit_files(
                 filtered_files.append(file_path)
     # Change to reference commit to get the correct dependencies
     local_repo.git.checkout(reference_commit)
+
     topological_sort_files = topological_sort_based_on_dependencies(filtered_files)
     if len(topological_sort_files) != len(filtered_files):
         if len(topological_sort_files) < len(filtered_files):

From 0932f76cd793ed9acdd884a2d1064f4b8bced816 Mon Sep 17 00:00:00 2001
From: nanjiangwill <willjiang2018@gmail.com>
Date: Sun, 29 Sep 2024 15:32:02 -0700
Subject: [PATCH 5/8] update file finding logic

---
 agent/agent_utils.py       | 36 +++++++++++++++++++++++++++++++-----
 agent/cli.py               |  6 ++++++
 agent/run_agent.py         |  8 ++++++--
 agent/run_agent_no_rich.py |  8 ++++++--
 4 files changed, 49 insertions(+), 9 deletions(-)

diff --git a/agent/agent_utils.py b/agent/agent_utils.py
index fcbb87e..3c5cb60 100644
--- a/agent/agent_utils.py
+++ b/agent/agent_utils.py
@@ -18,6 +18,7 @@
 UNIT_TESTS_INFO_HEADER = "\n\n>>> Here are the Unit Tests Information:\n"
 LINT_INFO_HEADER = "\n\n>>> Here is the Lint Information:\n"
 SPEC_INFO_HEADER = "\n\n>>> Here is the Specification Information:\n"
+IMPORT_DEPENDENCIES_HEADER = "\n\n>>> Here are the Import Dependencies:\n"
 # prefix components:
 space = "    "
 branch = "│   "
@@ -208,7 +209,9 @@ def ignore_cycles(graph: dict) -> list[str]:
         return ignore_cycles(graph)
 
 
-def topological_sort_based_on_dependencies(pkg_paths: list[str]) -> list[str]:
+def topological_sort_based_on_dependencies(
+    pkg_paths: list[str],
+) -> tuple[list[str], dict]:
     """Topological sort based on dependencies."""
     module_set = ModuleSet([str(p) for p in pkg_paths])
 
@@ -224,7 +227,7 @@ def topological_sort_based_on_dependencies(pkg_paths: list[str]) -> list[str]:
 
     import_dependencies_files = ignore_cycles(import_dependencies)
 
-    return import_dependencies_files
+    return import_dependencies_files, import_dependencies
 
 
 def get_target_edit_files(
@@ -233,7 +236,7 @@ def get_target_edit_files(
     test_dir: str,
     latest_commit: str,
     reference_commit: str,
-) -> list[str]:
+) -> tuple[list[str], dict]:
     """Find the files with functions with the pass statement."""
     target_dir = str(local_repo.working_dir)
     files = _find_files_to_edit(target_dir, src_dir, test_dir)
@@ -248,7 +251,9 @@ def get_target_edit_files(
     # Change to reference commit to get the correct dependencies
     local_repo.git.checkout(reference_commit)
 
-    topological_sort_files = topological_sort_based_on_dependencies(filtered_files)
+    topological_sort_files, import_dependencies = (
+        topological_sort_based_on_dependencies(filtered_files)
+    )
     if len(topological_sort_files) != len(filtered_files):
         if len(topological_sort_files) < len(filtered_files):
             # Find the missing elements
@@ -271,7 +276,14 @@ def get_target_edit_files(
         file.replace(target_dir, "").lstrip("/") for file in topological_sort_files
     ]
 
-    return topological_sort_files
+    # Remove the base_dir prefix from import dependencies
+    import_dependencies_without_prefix = {}
+    for key, value in import_dependencies.items():
+        key_without_prefix = key.replace(target_dir, "").lstrip("/")
+        value_without_prefix = [v.replace(target_dir, "").lstrip("/") for v in value]
+        import_dependencies_without_prefix[key_without_prefix] = value_without_prefix
+
+    return topological_sort_files, import_dependencies_without_prefix
 
 
 def get_message(
@@ -331,6 +343,20 @@ def get_message(
     return message_to_agent
 
 
+def update_message_with_dependencies(message: str, dependencies: list[str]) -> str:
+    """Update the message with the dependencies."""
+    if len(dependencies) == 0:
+        return message
+    import_dependencies_info = f"\n{IMPORT_DEPENDENCIES_HEADER}"
+    for dependency in dependencies:
+        with open(dependency, "r") as file:
+            import_dependencies_info += (
+                f"\nHere is the content of the file {dependency}:\n{file.read()}"
+            )
+    message += import_dependencies_info
+    return message
+
+
 def get_specification(specification_pdf_path: Path) -> str:
     """Get the reference for a given specification PDF path."""
     # TODO: after pdf_to_text is available, use it to extract the text from the PDF
diff --git a/agent/cli.py b/agent/cli.py
index 905191b..8d06891 100644
--- a/agent/cli.py
+++ b/agent/cli.py
@@ -178,6 +178,10 @@ def run(
         ".agent.yaml",
         help="Path to the agent config file",
     ),
+    commit0_config_file: str = typer.Option(
+        ".commit0.yaml",
+        help="Path to the commit0 config file",
+    ),
     log_dir: str = typer.Option(
         str(RUN_AGENT_LOG_DIR.resolve()),
         help="Log directory to store the logs",
@@ -202,6 +206,7 @@ def run(
             override_previous_changes,
             backend,
             agent_config_file,
+            commit0_config_file,
             log_dir,
             max_parallel_repos,
             display_repo_progress_num,
@@ -212,6 +217,7 @@ def run(
             override_previous_changes,
             backend,
             agent_config_file,
+            commit0_config_file,
             log_dir,
             max_parallel_repos,
         )
diff --git a/agent/run_agent.py b/agent/run_agent.py
index 7eaa926..5315086 100644
--- a/agent/run_agent.py
+++ b/agent/run_agent.py
@@ -7,6 +7,7 @@
     create_branch,
     get_message,
     get_target_edit_files,
+    update_message_with_dependencies,
     get_lint_cmd,
     read_yaml_config,
 )
@@ -91,7 +92,7 @@ def run_agent_for_repo(
     if latest_commit.hexsha != example["base_commit"] and override_previous_changes:
         local_repo.git.reset("--hard", example["base_commit"])
 
-    target_edit_files = get_target_edit_files(
+    target_edit_files, import_dependencies = get_target_edit_files(
         local_repo,
         example["src_dir"],
         example["test"]["test_dir"],
@@ -161,6 +162,8 @@ def run_agent_for_repo(
             )
             for f in target_edit_files:
                 update_queue.put(("set_current_file", (repo_name, f)))
+                dependencies = import_dependencies[f]
+                message = update_message_with_dependencies(message, dependencies)
                 file_name = f.replace(".py", "").replace("/", "__")
                 file_log_dir = experiment_log_dir / file_name
                 lint_cmd = get_lint_cmd(repo_name, agent_config.use_lint_info)
@@ -179,6 +182,7 @@ def run_agent(
     override_previous_changes: bool,
     backend: str,
     agent_config_file: str,
+    commit0_config_file: str,
     log_dir: str,
     max_parallel_repos: int,
     display_repo_progress_num: int,
@@ -188,7 +192,7 @@ def run_agent(
 
     agent_config = AgentConfig(**config)
 
-    commit0_config = read_commit0_dot_file(".commit0.yaml")
+    commit0_config = read_commit0_dot_file(commit0_config_file)
 
     dataset = load_dataset(
         commit0_config["dataset_name"], split=commit0_config["dataset_split"]
diff --git a/agent/run_agent_no_rich.py b/agent/run_agent_no_rich.py
index 754256d..ec1334a 100644
--- a/agent/run_agent_no_rich.py
+++ b/agent/run_agent_no_rich.py
@@ -9,6 +9,7 @@
     create_branch,
     get_message,
     get_target_edit_files,
+    update_message_with_dependencies,
     get_lint_cmd,
     read_yaml_config,
 )
@@ -88,7 +89,7 @@ def run_agent_for_repo(
         local_repo.git.reset("--hard", example["base_commit"])
 
     # get target files to edit and test files to run
-    target_edit_files = get_target_edit_files(
+    target_edit_files, import_dependencies = get_target_edit_files(
         local_repo,
         example["src_dir"],
         example["test"]["test_dir"],
@@ -144,6 +145,8 @@ def run_agent_for_repo(
                 agent_config, repo_path, test_dir=example["test"]["test_dir"]
             )
             for f in target_edit_files:
+                dependencies = import_dependencies[f]
+                message = update_message_with_dependencies(message, dependencies)
                 file_name = f.replace(".py", "").replace("/", "__")
                 file_log_dir = experiment_log_dir / file_name
                 lint_cmd = get_lint_cmd(repo_name, agent_config.use_lint_info)
@@ -156,6 +159,7 @@ def run_agent(
     override_previous_changes: bool,
     backend: str,
     agent_config_file: str,
+    commit0_config_file: str,
     log_dir: str,
     max_parallel_repos: int,
 ) -> None:
@@ -167,7 +171,7 @@ def run_agent(
 
     agent_config = AgentConfig(**config)
 
-    commit0_config = read_commit0_dot_file(".commit0.yaml")
+    commit0_config = read_commit0_dot_file(commit0_config_file)
 
     dataset = load_dataset(
         commit0_config["dataset_name"], split=commit0_config["dataset_split"]

From ebfb6566fb6126a6796ad9223b8cc66fc7050fac Mon Sep 17 00:00:00 2001
From: nanjiangwill <willjiang2018@gmail.com>
Date: Sun, 29 Sep 2024 15:41:10 -0700
Subject: [PATCH 6/8] update files

---
 agent/agents.py | 3 +++
 pyproject.toml  | 1 +
 2 files changed, 4 insertions(+)

diff --git a/agent/agents.py b/agent/agents.py
index 9255a9f..f99e360 100644
--- a/agent/agents.py
+++ b/agent/agents.py
@@ -86,6 +86,9 @@ def run(
             format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
         )
 
+        # Log the message
+        logging.info(f"Message Sent: {message} \n\n")
+
         # Redirect print statements to the log file
         sys.stdout = open(log_file, "a")
         sys.stderr = open(log_file, "a")
diff --git a/pyproject.toml b/pyproject.toml
index 8befc62..7666711 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -11,6 +11,7 @@ requires-python = ">=3.11"
 dependencies = [
     "ruff>=0.6.4",
     "pre-commit>=3.8.0",
+    "import-deps>=0.3.0",
     "PyMuPDF>=1.24.5",
     "modal==0.64.95",
     "typer>=0.12.0",

From d9039b06b09ccf6c041d35930f7fc80c6ce05002 Mon Sep 17 00:00:00 2001
From: nanjiangwill <willjiang2018@gmail.com>
Date: Sun, 29 Sep 2024 15:46:12 -0700
Subject: [PATCH 7/8] update files

---
 agent/agents.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/agent/agents.py b/agent/agents.py
index f99e360..9d9c4e3 100644
--- a/agent/agents.py
+++ b/agent/agents.py
@@ -86,13 +86,13 @@ def run(
             format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
         )
 
-        # Log the message
-        logging.info(f"Message Sent: {message} \n\n")
-
         # Redirect print statements to the log file
         sys.stdout = open(log_file, "a")
         sys.stderr = open(log_file, "a")
 
+        # Log the message
+        logging.info(f"Message Sent: {message} \n\n")
+
         # Configure httpx and backoff logging
         handle_logging("httpx", log_file)
         handle_logging("backoff", log_file)

From c34bde7142bf3b493422fd3b4386a381885204f0 Mon Sep 17 00:00:00 2001
From: nanjiangwill <willjiang2018@gmail.com>
Date: Sun, 29 Sep 2024 15:50:22 -0700
Subject: [PATCH 8/8] update files

---
 agent/agents.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/agent/agents.py b/agent/agents.py
index 9d9c4e3..6e7d9d8 100644
--- a/agent/agents.py
+++ b/agent/agents.py
@@ -91,7 +91,9 @@ def run(
         sys.stderr = open(log_file, "a")
 
         # Log the message
-        logging.info(f"Message Sent: {message} \n\n")
+        agent_message_log_file = log_dir / "agent_message.log"
+        with open(agent_message_log_file, "a") as f:
+            f.write(f"Message Sent: {message}\n\n")
 
         # Configure httpx and backoff logging
         handle_logging("httpx", log_file)