In [None]:
import pandas as pd
from github import Github
import os
import tomllib
import contextlib
import rich
from collections.abc import Generator
import json

In [None]:
df = pd.read_json("scikit-build.json", orient="records")

In [None]:
df = df[~df.Repository.str.startswith(("scikit-build"))]

In [None]:
df

In [None]:
g = Github(os.environ["GITHUB_API_TOKEN"])

In [None]:
def to_dict(repo: str, setup_py: str) -> Generator[tuple[str, str], None, None]:
    yield setup_py, g.get_repo(repo).get_contents(setup_py).decoded_content.decode(
        "utf-8"
    )
    base = setup_py.rsplit("/", maxsplit=1)[0] if "/" in setup_py else ""
    with contextlib.suppress(Exception):
        pyproject_toml = f"{base}/pyproject.toml".lstrip("/")
        yield pyproject_toml, g.get_repo(repo).get_contents(
            pyproject_toml
        ).decoded_content.decode("utf-8")
    with contextlib.suppress(Exception):
        setup_cfg = f"{base}/setup.cfg".lstrip("/")
        yield setup_cfg, g.get_repo(repo).get_contents(
            setup_cfg
        ).decoded_content.decode("utf-8")

In [None]:
contents_tuples = (
    (repo, dict(to_dict(repo, fn)))
    for _, (repo, fn) in df[["Repository", "File path"]].iterrows()
)

In [None]:
contents = {}
for repo, content in contents_tuples:
    contents[repo] = contents.setdefault(repo, {}) | content

In [None]:
with open("scikit-build-contents.json", "w", encoding="utf-8") as f:
    json.dump(contents, f)