pre-commit

wenting-zhao · wenting-zhao · commit ca81e37827b6 · 2024-09-21T04:17:25.000Z
diff --git a/commit0/cli.py b/commit0/cli.py
@@ -249,7 +249,9 @@ def test(
     if reference:
         branch = "reference"
     if branch is None and not reference:
-        git_path = os.path.join(commit0_config["base_dir"], repo_or_repo_path.split("/")[-1])
+        git_path = os.path.join(
+            commit0_config["base_dir"], repo_or_repo_path.split("/")[-1]
+        )
         branch = get_active_branch(git_path)
 
     if verbose == 2:
@@ -262,7 +264,7 @@ def test(
         commit0_config["dataset_split"],
         commit0_config["base_dir"],
         repo_or_repo_path,
-        branch,
+        branch,  # type: ignore
         test_ids,
         backend,
         timeout,
diff --git a/commit0/harness/evaluate.py b/commit0/harness/evaluate.py
@@ -5,7 +5,7 @@
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from datasets import load_dataset
 from tqdm import tqdm
-from typing import Iterator
+from typing import Iterator, Union
 
 from commit0.harness.run_pytest_ids import main as run_tests
 from commit0.harness.get_pytest_ids import main as get_tests
@@ -23,7 +23,7 @@ def main(
     dataset_split: str,
     repo_split: str,
     base_dir: str,
-    branch: str,
+    branch: Union[str, None],
     backend: str,
     timeout: int,
     num_cpus: int,
@@ -32,19 +32,19 @@ def main(
 ) -> None:
     dataset: Iterator[RepoInstance] = load_dataset(dataset_name, split=dataset_split)  # type: ignore
     repos = SPLIT[repo_split]
-    pairs = []
+    triples = []
     log_dirs = []
     for example in dataset:
         repo_name = example["repo"].split("/")[-1]
         if repo_split != "all" and repo_name not in SPLIT[repo_split]:
             continue
-        pairs.append((repo_name, example["test"]["test_dir"]))
         hashed_test_ids = get_hash_string(example["test"]["test_dir"])
         if branch is None:
             git_path = os.path.join(base_dir, repo_name)
             branch = get_active_branch(git_path)
         log_dir = RUN_PYTEST_LOG_DIR / repo_name / branch / hashed_test_ids
         log_dirs.append(str(log_dir))
+        triples.append((repo_name, example["test"]["test_dir"], branch))
 
     with tqdm(total=len(repos), smoothing=0, desc="Evaluating repos") as pbar:
         with ThreadPoolExecutor(max_workers=num_workers) as executor:
@@ -64,7 +64,7 @@ def main(
                     rebuild_image=rebuild_image,
                     verbose=0,
                 ): None
-                for repo, test_dir in pairs
+                for repo, test_dir, branch in triples
             }
             # Wait for each future to complete
             for future in as_completed(futures):
diff --git a/commit0/harness/utils.py b/commit0/harness/utils.py
@@ -190,25 +190,30 @@ def generate_patch_between_commits(
 
 
 def get_active_branch(repo_path: Union[str, Path]) -> str:
-    """
-    Retrieve the current active branch of a Git repository.
+    """Retrieve the current active branch of a Git repository.
 
     Args:
+    ----
         repo_path (Path): The path to git repo.
 
     Returns:
+    -------
         str: The name of the active branch.
 
     Raises:
+    ------
         Exception: If the repository is in a detached HEAD state.
+
     """
     repo = git.Repo(repo_path)
     try:
         # Get the current active branch
         branch = repo.active_branch.name
     except TypeError as e:
-        raise Exception(f"{e}\nThis means the repository is in a detached HEAD state. "
-                        "To proceed, please specify a valid branch.")
+        raise Exception(
+            f"{e}\nThis means the repository is in a detached HEAD state. "
+            "To proceed, please specify a valid branch by using --branch {branch}."
+        )
 
     return branch