In [None]:
import pandas as pd
from github import Github
import os

In [None]:
g = Github(os.environ["GITHUB_API_TOKEN"])

In [None]:
def read_file(filename: str, g: Github) -> pd.DataFrame:
    tbl = pd.read_csv(filename, usecols=[1,3])

    ghtbl = tbl[tbl["Repository"].str.startswith("github.com/")].copy()
    ghtbl["Repository"] = ghtbl["Repository"].str.removeprefix("github.com/")

    stars = {repo: g.get_repo(repo).stargazers_count for repo in set(ghtbl["Repository"])}
    ghtbl["stars"] = ghtbl["Repository"].map(stars)

    return ghtbl.sort_values("stars", ascending=False)

In [None]:
def print_tbl(ghtbl: pd.DataFrame) -> None:
    print("| Repo | ⭐️ |\n|---|---|")
    for _, line in ghtbl.drop_duplicates("Repository").iterrows():
        repo = line["Repository"]
        stars = line["stars"]
        if repo.startswith("scikit-build"):
            continue
        print(f"| [{repo}](https://github.com/{repo}) | {stars} |")

In [None]:
scikit_build = read_file(
    "sourcegraph-search-export-context-global-skbuild-path-setup-py-count-2000.csv",
    g
)

In [None]:
print_tbl(scikit_build)

In [None]:
scikit_build_core = read_file(
    "sourcegraph-search-export-context-global-scikit-build-core-path-pyproject-toml.csv",
    g
)

In [None]:
print_tbl(scikit_build_core)

In [None]:
scikit_build_core.drop_duplicates("Repository").to_json("orient="records", indent=2))

In [None]:
scikit_build_core.to_json("scikit-build-core.json", orient="records", indent=2)

In [None]:
scikit_build.to_json("scikit-build.json", orient="records", indent=2)