astral-sh · zanieb · Jul 12, 2023 · Jul 12, 2023 · Jul 12, 2023 · Jul 12, 2023
diff --git a/scripts/check_ecosystem.py b/scripts/check_ecosystem.py
@@ -45,11 +45,11 @@ async def clone(self: Self, checkout_dir: Path) -> AsyncIterator[Path]:
         """Shallow clone this repository to a temporary directory."""
         if checkout_dir.exists():
             logger.debug(f"Reusing {self.org}:{self.repo}")
-            yield Path(checkout_dir)
+            yield await self._get_commit(checkout_dir)
             return
 
         logger.debug(f"Cloning {self.org}:{self.repo}")
-        git_command = [
+        git_clone_command = [
             "git",
             "clone",
             "--config",
@@ -60,39 +60,48 @@ async def clone(self: Self, checkout_dir: Path) -> AsyncIterator[Path]:
             "--no-tags",
         ]
         if self.ref:
-            git_command.extend(["--branch", self.ref])
+            git_clone_command.extend(["--branch", self.ref])
 
-        git_command.extend(
+        git_clone_command.extend(
             [
                 f"https://github.com/{self.org}/{self.repo}",
                 checkout_dir,
             ],
         )
 
-        process = await create_subprocess_exec(
-            *git_command,
+        git_clone_process = await create_subprocess_exec(
+            *git_clone_command,
             env={"GIT_TERMINAL_PROMPT": "0"},
         )
 
-        status_code = await process.wait()
+        status_code = await git_clone_process.wait()
 
         logger.debug(
             f"Finished cloning {self.org}/{self.repo} with status {status_code}",
         )
+        yield await self._get_commit(checkout_dir)
 
-        yield Path(checkout_dir)
-
-    def url_for(self: Self, path: str, lnum: int | None = None) -> str:
-        """Return the GitHub URL for the given path and line number, if given."""
+    def url_for(self: Self, commit_sha: str, path: str, lnum: int | None = None) -> str:
+        """Return the GitHub URL for the given commit, path, and line number, if given."""
         # Default to main branch
-        url = (
-            f"https://github.com/{self.org}/{self.repo}"
-            f"/blob/{self.ref or 'main'}/{path}"
-        )
+        url = f"https://github.com/{self.org}/{self.repo}" f"/blob/{commit_sha}/{path}"
         if lnum:
             url += f"#L{lnum}"
         return url
 
+    async def _get_commit(self: Self, checkout_dir: Path) -> str:
+        """Return the commit sha for the repository in the checkout directory."""
+        git_sha_process = await create_subprocess_exec(
 check_output(["git", "rev-parse", "--show-toplevel"], text=True).strip(), 
 check_output(["git", "rev-parse", "--show-toplevel"], text=True).strip(), 
+            *["git", "rev-parse", "HEAD"],
+            cwd=str(checkout_dir),
+            stdout=PIPE,
+        )
+        git_sha_stdout, _ = await git_sha_process.communicate()
+        assert (
+            await git_sha_process.wait() == 0
+        ), f"Failed to retrieve commit sha at {checkout_dir}"
+        return git_sha_stdout.decode().strip()
+
 
 REPOSITORIES: list[Repository] = [
     Repository("apache", "airflow", "main", select="ALL"),
@@ -169,6 +178,7 @@ class Diff(NamedTuple):
 
     removed: set[str]
     added: set[str]
+    source_sha: str
 
     def __bool__(self: Self) -> bool:
         """Return true if this diff is non-empty."""
@@ -203,13 +213,13 @@ async def compare(
         assert ":" not in repo.org
         assert ":" not in repo.repo
         checkout_dir = Path(checkout_parent).joinpath(f"{repo.org}:{repo.repo}")
-        async with repo.clone(checkout_dir) as path:
+        async with repo.clone(checkout_dir) as checkout_sha:
             try:
                 async with asyncio.TaskGroup() as tg:
                     check1 = tg.create_task(
                         check(
                             ruff=ruff1,
-                            path=path,
+                            path=checkout_dir,
                             name=f"{repo.org}/{repo.repo}",
                             select=repo.select,
                             ignore=repo.ignore,
@@ -220,7 +230,7 @@ async def compare(
                     check2 = tg.create_task(
                         check(
                             ruff=ruff2,
-                            path=path,
+                            path=checkout_dir,
                             name=f"{repo.org}/{repo.repo}",
                             select=repo.select,
                             ignore=repo.ignore,
@@ -237,7 +247,7 @@ async def compare(
                 elif line.startswith("+ "):
                     added.add(line[2:])
 
-    return Diff(removed, added)
+    return Diff(removed, added, checkout_sha)
 
 
 def read_projects_jsonl(projects_jsonl: Path) -> dict[tuple[str, str], Repository]:
@@ -379,7 +389,7 @@ async def limited_parallelism(coroutine: T) -> T:
                         continue
 
                     pre, inner, path, lnum, post = match.groups()
-                    url = repo.url_for(path, int(lnum))
+                    url = repo.url_for(diff.source_sha, path, int(lnum))
                     print(f"{pre} <a href='{url}'>{inner}</a> {post}")
                 print("</pre>")