In [None]:
import json
import collections
import ast
import dataclasses
import tomllib
import rich
import pandas as pd
import packaging.specifiers
import functools

In [None]:
with open("scikit-build-contents.json", encoding="utf-8") as f:
    content = json.load(f)

In [None]:
collections.Counter(len(c) for c in content.values())

In [None]:
setup_pys = [
    (r, fn)
    for r, ks in content.items()
    for fn in ks
    if fn.split("/")[-1] == "setup.py" and "{{" not in fn
]

In [None]:
len(setup_pys)

In [None]:
@dataclasses.dataclass
class Vistor(ast.NodeVisitor):
    result: dict[str | None, str] = dataclasses.field(default_factory=dict)

    def visit_Call(self, node: ast.Call) -> None:
        match node.func:
            case ast.Name(id="setup") | ast.Attribute(attr="setup"):
                for k in node.keywords:
                    self.result[k.arg or "**"] = ast.unparse(k.value)

In [None]:
def get_info(repo: str, path: str) -> dict[str | None, str]:
    tree = ast.parse(content[repo][path], filename="setup.py")
    visitor = Vistor()
    visitor.visit(tree)
    return visitor.result

In [None]:
keyword_counter = collections.Counter()
for i, (repo, key) in enumerate(setup_pys):
    result = get_info(repo, key)
    keywords = [
        f"    {key} = {val if len(val) < 80 else '...'},"
        if key != "**"
        else f"    {key}{val},"
        for key, val in result.items()
    ]
    if i < 10:
        print(f"{repo}: {key}")
        print("  setup(", *keywords, ")", sep="\n")
    keyword_counter.update(result.keys())

In [None]:
for k, c in keyword_counter.most_common():
    if k.startswith("cmake_"):
        print(f"{k}: {c}")

In [None]:
for k, c in keyword_counter.most_common():
    if not k.startswith("cmake_"):
        print(f"{k}: {c}")

In [None]:
def print_kw(keyword: str) -> None:
    print("| Location | entry |")
    print("|----------|-------|")
    for repo, key in setup_pys:
        repo_key = f"{repo}:{key if len(key) < 20 else '...'}"
        repo_url = f"https://github.com/{repo}/blob/HEAD/{key}"
        result = get_info(repo, key)
        if keyword in result:
            print(f"| [{repo_key}]({repo_url}) | `{result[keyword]}` |")

In [None]:
for k, c in keyword_counter.most_common():
    if k.startswith("cmake_") and c < 100:
        print(f"{k}: {c}\n")
        print_kw(k)
        print()

In [None]:
print_kw("cmake_install_dir")

In [None]:
pyproject_tomls = [
    (r, fn)
    for r, ks in content.items()
    for fn in ks
    if fn.split("/")[-1] == "pyproject.toml" and "{{" not in fn
]

In [None]:
for repo, fn in pyproject_tomls[:5]:
    c = content[repo][fn]
    build_system = tomllib.loads(c).get("build-system", {})
    print(f"{repo}:{fn}")
    rich.print(build_system.get("requires"))

In [None]:
index = pd.MultiIndex.from_tuples(pyproject_tomls, names=["repo", "fn"])
pypro = pd.DataFrame({"requires_skbuild": pd.Series(dtype="str")}, index=index)

In [None]:
for repo, fn in pyproject_tomls:
    c = content[repo][fn]
    build_system = tomllib.loads(c).get("build-system", {})
    items = [
        x.replace(" ", "") for x in build_system.get("requires", []) if "scikit" in x
    ]
    if len(items) > 1:
        print(items)
    if items:
        pypro.loc[repo, fn] = items[0]

In [None]:
def contains(value: str, spec: str) -> bool:
    if "@" in spec:
        return False
    return packaging.specifiers.SpecifierSet(
        spec.lstrip("scikit-build-core") or ">0"
    ).contains(value)

In [None]:
pypro[pypro.isna()]

In [None]:
pypro = pypro.dropna()

In [None]:
for k, v in collections.Counter(pypro.requires_skbuild).most_common():
    c = "[red]"
    if "@" in k:
        c = "[blue]"
    elif packaging.specifiers.SpecifierSet(k.lstrip("scikt-buldore") or ">0").contains(
        "0.17.5"
    ):
        c = "[green]"
    rich.print(f"{c}{v:3} {k}")

In [None]:
pypro[~pypro.requires_skbuild.map(functools.partial(contains, "0.17.5"))]