In [1]:
import json
import tarfile
import tomllib as toml
import traceback
import re

from pip._internal.req.constructors import install_req_from_req_string
import requests
import pandas as pd

from pybuild_deps.parsers import parse_requirements
from pybuild_deps.utils import get_version
from pybuild_deps.source import get_package_source
from pybuild_deps.finder import find_build_dependencies
from pybuild_deps.exceptions import PyBuildDepsError

In [2]:
# these filenames should be capitalized, but a github search revealed
# many repos have those filenames in lowercase
LOCK_FILE = "cargo.lock"
PACKAGE_FILE = "cargo.toml"


def depends_on_rust(pkg_name, pkg_version) -> bool:
    buildeps = find_build_dependencies(pkg_name, pkg_version)
    for dependency in buildeps:
        if dependency.startswith(("setuptools-rust", "maturin", "setuptools_rust")):
            return True
    return False


def get_cargo_locks(pkg_name, pkg_version):
    source_code = get_package_source(pkg_name, pkg_version)
    cargo_locks = []
    with tarfile.open(fileobj=source_code.open("rb")) as tarball:
        filenames = tarball.getnames()
        for full_file_name in filenames:
            if full_file_name.lower().endswith(LOCK_FILE):
                file_handle = tarball.extractfile(full_file_name)
                cargo_locks.append(toml.loads(file_handle.read().decode()))
    return cargo_locks

def get_cargo_files(pkg_name, pkg_version):
    source_code = get_package_source(pkg_name, pkg_version)
    cargo_locks = []
    cargo_tomls = []
    with tarfile.open(fileobj=source_code.open("rb")) as tarball:
        filenames = tarball.getnames()
        for full_file_name in tarball.getnames():
            if full_file_name.lower().endswith(LOCK_FILE):
                cargo_locks.append(full_file_name)
            if full_file_name.lower().endswith(PACKAGE_FILE):
                cargo_tomls.append(full_file_name)
    return cargo_tomls, cargo_locks

def get_latest_package_version(pkg_name):
    r = requests.get(f"https://pypi.org/pypi/{pkg_name}/json", timeout=10)
    if not r.ok:
        return
    return r.json()["info"]["version"]


In [3]:
popular_packages = requests.get("https://pythonwheels.com/results.json").json()
popular_packages.keys()
pd.DataFrame(popular_packages["data"]).head()

Unnamed: 0,downloads,name,wheel,value,css_class,icon,title
0,750231089,boto3,True,1,success,✓,This package provides a wheel.
1,378371521,urllib3,True,1,success,✓,This package provides a wheel.
2,330557585,botocore,True,1,success,✓,This package provides a wheel.
3,323394974,requests,True,1,success,✓,This package provides a wheel.
4,297651249,setuptools,True,1,success,✓,This package provides a wheel.


In [4]:
def research_cargo_content(pkg_name, pkg_version):
    """Lookup for Cargo.toml/Cargo.lock in a given python package."""
    results = {"pkg_name": pkg_name, "pkg_version": pkg_version}
    try:
        build_deps = find_build_dependencies(pkg_name, pkg_version)
        results["build_deps"] = {install_req_from_req_string(r).name for r in build_deps}
        tomls, locks = get_cargo_files(pkg_name, pkg_version)
        results["cargo_toml_qty"] = len(tomls)
        results["cargo_lock_qty"] = len(locks)
        results["cargo_toml"] = tomls
        results["cargo_lock"] = locks
        results["cargo_lock_contents"] = get_cargo_locks(pkg_name, pkg_version) if locks else []
        
    except Exception as err:
        results["error"] = str(err)
        results["exc_class"] = type(err).__name__
    return results

popular_packages_results = []
for pkg in popular_packages["data"]:
    pkg_name = pkg["name"]
    pkg_version = get_latest_package_version(pkg_name)
    res = research_cargo_content(pkg_name, pkg_version)
    res["downloads"] = pkg["downloads"]
    popular_packages_results.append(res)

df_wheels = pd.DataFrame(popular_packages_results)
df_wheels[(df_wheels.cargo_toml_qty > 0) | (df_wheels.cargo_lock_qty > 0)]



Unnamed: 0,pkg_name,pkg_version,build_deps,cargo_toml_qty,cargo_lock_qty,cargo_toml,cargo_lock,cargo_lock_contents,downloads,error,exc_class
14,cryptography,41.0.4,"{wheel, cffi, setuptools, setuptools-rust}",4.0,1.0,"[cryptography-41.0.4/src/rust/Cargo.toml, cryp...",[cryptography-41.0.4/src/rust/Cargo.lock],"[{'version': 3, 'package': [{'name': 'Inflecto...",186136558,,
98,rpds-py,0.10.4,{maturin},1.0,1.0,[rpds_py-0.10.4/Cargo.toml],[rpds_py-0.10.4/Cargo.lock],"[{'version': 3, 'package': [{'name': 'archery'...",44442080,,
113,bcrypt,4.0.1,"{wheel, setuptools, setuptools-rust}",1.0,1.0,[bcrypt-4.0.1/src/_bcrypt/Cargo.toml],[bcrypt-4.0.1/src/_bcrypt/Cargo.lock],"[{'version': 3, 'package': [{'name': 'autocfg'...",38478981,,
242,pydantic-core,2.10.1,"{maturin, typing-extensions}",1.0,1.0,[pydantic_core-2.10.1/Cargo.toml],[pydantic_core-2.10.1/Cargo.lock],"[{'version': 3, 'package': [{'name': 'ahash', ...",18787799,,
317,orjson,3.9.8,{maturin},37.0,7.0,"[orjson-3.9.8/Cargo.toml, orjson-3.9.8/include...","[orjson-3.9.8/Cargo.lock, orjson-3.9.8/include...","[{'version': 3, 'package': [{'name': 'ahash', ...",12653856,,
338,tokenizers,0.14.1,{maturin},2.0,1.0,"[tokenizers-0.14.1/tokenizers/Cargo.toml, toke...",[tokenizers-0.14.1/bindings/python/Cargo.lock],"[{'version': 3, 'package': [{'name': 'aho-cora...",11359598,,
355,pre-commit,3.4.0,{},1.0,0.0,[pre_commit-3.4.0/pre_commit/resources/empty_t...,[],[],10899315,,


In [17]:
def search_pypi_link_on_github_readme(repo):
    url = "https://raw.githubusercontent.com/{repo}/{branch}/{file}"
    for branch in ("main", "master"):
        for file in ("README.md", "readme.md", "README.txt", "README.rst", "README"):
            response = requests.get(url.format(repo=repo, branch=branch, file=file))
            if response.ok:
                break
    if not response.ok:
        return
    match = re.search("pypi.org\/project/(\w+)", response.text)
    if match:
        return match.groups()[0]

def get_data_from_pypi(gh_repo):
    res = {"repo": gh_repo, "is_on_pypi": False}
    name = gh_repo.split("/")[1].lower()
    version = get_latest_package_version(name)
    if not version:
        name = search_pypi_link_on_github_readme(gh_repo)
        if not name:
            return res
        version = get_latest_package_version(name)
        if not version:
            return res
    res.update(is_on_pypi=True, **research_cargo_content(name, version))
    return res


In [16]:
dependants_setuptoolsrust = !github-dependents-info --repo PyO3/setuptools-rust --json 2> /dev/null
dependants_setuptoolsrust = json.loads("".join(dependants_setuptoolsrust))

In [18]:
dependants_maturin = !github-dependents-info --repo PyO3/maturin --json 2> /dev/null
dependants_maturin = json.loads("".join(dependants_maturin))

In [19]:
df_gh = pd.DataFrame(dependants_setuptoolsrust["all_public_dependent_repos"] + dependants_maturin["all_public_dependent_repos"])
df_gh = df_gh.sort_values("stars", ascending=False)
df_gh.head()

Unnamed: 0,name,stars
828,certbot/certbot,30267
398,OpenBB-finance/OpenBBTerminal,24516
3306,pola-rs/polars,20816
234,InstaPy/InstaPy,15775
721,ansible/awx,12688


In [20]:
df_gh[df_gh.stars >= 10].shape

(370, 2)

In [21]:
dependendants_cargo_results = []

for gh_repo in list(df_gh[df_gh.stars >= 10].name.unique()):
    dependendants_cargo_results.append(get_data_from_pypi(gh_repo))



In [43]:
df_pypi_analysis = pd.DataFrame(dependendants_cargo_results)
df_pypi_analysis.head()

Unnamed: 0,repo,is_on_pypi,pkg_name,pkg_version,build_deps,cargo_toml_qty,cargo_lock_qty,cargo_toml,cargo_lock,cargo_lock_contents,error,exc_class
0,certbot/certbot,True,certbot,2.7.1,{},0.0,0.0,[],[],[],,
1,OpenBB-finance/OpenBBTerminal,False,,,,,,,,,,
2,pola-rs/polars,True,polars,0.19.8,{maturin},19.0,1.0,"[polars-0.19.8/crates/polars-io/Cargo.toml, po...",[polars-0.19.8/py-polars/Cargo.lock],"[{'version': 3, 'package': [{'name': 'addr2lin...",,
3,InstaPy/InstaPy,True,instapy,0.6.16,{},0.0,0.0,[],[],[],,
4,ansible/awx,True,awx,0.1.1,{},0.0,0.0,[],[],[],,


In [23]:
!mkdir -p results

In [29]:
from pathlib import Path

def _set_encoder(val):
    if isinstance(val, set):
        return list(val)


result_files = (
    (popular_packages, "pythonwheel-results.json"),
    (popular_packages_results, "cargo-research-pythonwheel.json"),
    (dependants_setuptoolsrust, "dependants-setuptoolsrust.json"),
    (dependants_maturin, "dependants-maturin.json"),
    (dependendants_cargo_results, "cargo-research-dependants-unique.json"),
)
for var, filename in result_files:
    json_contents = json.dumps(var, default=_set_encoder)
    Path(f"results/{filename}").write_text(json_contents)

In [46]:
def is_lock_version_3(lock_contents):
    if not isinstance(lock_contents, list):
        return
    res = None
    for lock in lock_contents:
        if lock.get("version") != 3:
            return False
        if lock.get("version") == 3:
            res = True
    return res

In [38]:
df_wheels[df_wheels.cargo_lock_qty > 0].cargo_lock_contents.apply(is_lock_version_3)

14     True
98     True
113    True
242    True
317    True
338    True
Name: cargo_lock_contents, dtype: bool

In [48]:
df_pypi_analysis["is_lock_version_3"] = df_pypi_analysis.cargo_lock_contents.apply(is_lock_version_3)


In [50]:
df_pypi_analysis[df_pypi_analysis["is_lock_version_3"] == False]

Unnamed: 0,repo,is_on_pypi,pkg_name,pkg_version,build_deps,cargo_toml_qty,cargo_lock_qty,cargo_toml,cargo_lock,cargo_lock_contents,error,exc_class,is_lock_version_3
88,PyO3/tokio,True,tokio,0.2.0,{setuptools-rust},1.0,1.0,[tokio-0.2.0/Cargo.toml],[tokio-0.2.0/Cargo.lock],"[{'root': {'name': 'async-tokio', 'version': '...",,,False
118,pierogis/pierogis,True,pierogis,0.4.1,"{wheel, setuptools_scm, setuptools-scm, setupt...",1.0,1.0,[pierogis-0.4.1/Cargo.toml],[pierogis-0.4.1/Cargo.lock],"[{'package': [{'name': 'adler', 'version': '0....",,,False
150,danielgatis/darknetpy,True,darknetpy,4.2,"{wheel, setuptools, setuptools-rust}",1.0,1.0,[darknetpy-4.2/Cargo.toml],[darknetpy-4.2/Cargo.lock],"[{'package': [{'name': 'aho-corasick', 'versio...",,,False
251,fastobo/fastobo-py,True,fastobo,0.12.2,"{setuptools_rust, setuptools-rust, setuptools}",146.0,27.0,"[fastobo-0.12.2/Cargo.toml, fastobo-0.12.2/cra...","[fastobo-0.12.2/Cargo.lock, fastobo-0.12.2/cra...","[{'version': 3, 'package': [{'name': 'android_...",,,False


In [55]:
tokyo_lock = get_cargo_locks("tokio", "0.2.0")[0]
tokyo_lock.keys()

dict_keys(['root', 'package', 'metadata'])

In [56]:
tokyo_lock["root"]

{'name': 'async-tokio',
 'version': '0.2.0',
 'dependencies': ['boxfnonce 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)',
  'bytes 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)',
  'chan 0.1.19 (registry+https://github.com/rust-lang/crates.io-index)',
  'env_logger 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)',
  'futures 0.1.14 (registry+https://github.com/rust-lang/crates.io-index)',
  'http-muncher 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)',
  'httparse 1.2.3 (registry+https://github.com/rust-lang/crates.io-index)',
  'lazy_static 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)',
  'libc 0.2.27 (registry+https://github.com/rust-lang/crates.io-index)',
  'log 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)',
  'mio 0.6.9 (registry+https://github.com/rust-lang/crates.io-index)',
  'net2 0.2.30 (registry+https://github.com/rust-lang/crates.io-index)',
  'pyo3 0.1.0 (registry+https://github.

In [60]:
import tomli_w

In [62]:
print(tomli_w.dumps(tokyo_lock))

[root]
name = "async-tokio"
version = "0.2.0"
dependencies = [
    "boxfnonce 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
    "bytes 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)",
    "chan 0.1.19 (registry+https://github.com/rust-lang/crates.io-index)",
    "env_logger 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)",
    "futures 0.1.14 (registry+https://github.com/rust-lang/crates.io-index)",
    "http-muncher 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
    "httparse 1.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
    "lazy_static 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
    "libc 0.2.27 (registry+https://github.com/rust-lang/crates.io-index)",
    "log 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)",
    "mio 0.6.9 (registry+https://github.com/rust-lang/crates.io-index)",
    "net2 0.2.30 (registry+https://github.com/rust-lang/crates.io-index)",
    "pyo3 0.1

I was unable to compile tokio. Maybe it is safe to ignore packages with older cargo locks

```
error[E0557]: feature has been removed
 --> /home/bruno/.cargo/registry/src/github.com-1ecc6299db9ec823/spin-0.4.5/src/lib.rs:8:43
  |
8 | #![cfg_attr(feature = "const_fn", feature(const_fn))]
  |                                           ^^^^^^^^ feature has been removed
  |
  = note: split into finer-grained feature gates

   Compiling synom v0.11.3
   Compiling boxfnonce v0.0.3
error: cannot find macro `asm` in this scope
 --> /home/bruno/.cargo/registry/src/github.com-1ecc6299db9ec823/spin-0.4.5/src/util.rs:8:14
  |
8 |     unsafe { asm!("pause" :::: "volatile"); }
  |              ^^^
  |
  = note: consider importing this macro:
          core::arch::asm

error[E0554]: `#![feature]` may not be used on the stable release channel
 --> /home/bruno/.cargo/registry/src/github.com-1ecc6299db9ec823/spin-0.4.5/src/lib.rs:6:38
  |
6 | #![cfg_attr(feature = "asm", feature(asm))]
  |                                      ^^^

error[E0554]: `#![feature]` may not be used on the stable release channel
 --> /home/bruno/.cargo/registry/src/github.com-1ecc6299db9ec823/spin-0.4.5/src/lib.rs:7:50
  |
7 | #![cfg_attr(feature = "core_intrinsics", feature(core_intrinsics))]
  |                                                  ^^^^^^^^^^^^^^^

   Compiling thread_local v0.3.4
Some errors have detailed explanations: E0554, E0557.
For more information about an error, try `rustc --explain E0554`.
error: could not compile `spin` due to 4 previous errors
warning: build failed, waiting for other jobs to finish...

```

In [64]:
fastobo_locks = get_cargo_locks("fastobo", "0.12.2")

In [67]:
# [l for l in fastobo_locks if l.get("version") != 3]

long story short: fastobo vendored lots of dependencies - that's why it has so many cargo files.

we might need to find only the main dependency in a repo.
