In [None]:
import json
import collections
import ast
import dataclasses

In [None]:
with open("scikit-build-contents.json", encoding="utf-8") as f:
    content = json.load(f)

In [None]:
collections.Counter(len(c) for c in content.values())

In [None]:
setup_pys = [
    (r, fn)
    for r, ks in content.items()
    for fn in ks
    if fn.split("/")[-1] == "setup.py" and "{{" not in fn
]

In [None]:
len(setup_pys)

In [None]:
@dataclasses.dataclass
class Vistor(ast.NodeVisitor):
    result: dict[str | None, str] = dataclasses.field(default_factory=dict)

    def visit_Call(self, node: ast.Call) -> None:
        match node.func:
            case ast.Name(id="setup") | ast.Attribute(attr="setup"):
                for k in node.keywords:
                    self.result[k.arg or "**"] = ast.unparse(k.value)

In [None]:
def get_info(repo: str, path: str) -> dict[str | None, str]:
    tree = ast.parse(content[repo][path], filename="setup.py")
    visitor = Vistor()
    visitor.visit(tree)
    return visitor.result

In [None]:
keyword_counter = collections.Counter()
for repo, key in setup_pys[:10]:
    result = get_info(repo, key)
    keywords = [
        f"    {key} = {val if len(val) < 80 else '...'},"
        if key != "**"
        else f"    {key}{val},"
        for key, val in result.items()
    ]
    print(f"{repo}: {key}")
    print("  setup(", *keywords, ")", sep="\n")
    keyword_counter.update(result.keys())

In [None]:
for k, c in keyword_counter.items():
    if k.startswith("cmake_"):
        print(f"{k}: {c}")

In [None]:
for k, c in keyword_counter.items():
    if not k.startswith("cmake_"):
        print(f"{k}: {c}")

In [None]:
for repo, key in setup_pys:
    result = get_info(repo, key)
    if "cmake_languages" in result:
        print(repo, key, "cmake_languages =", result["cmake_languages"])