diff --git a/.github/workflows/pypi-release.yml b/.github/workflows/pypi-release.yml index d41fbf2..a461f63 100644 --- a/.github/workflows/pypi-release.yml +++ b/.github/workflows/pypi-release.yml @@ -71,6 +71,9 @@ jobs: needs: - create-gh-release runs-on: ubuntu-24.04 + environment: pypi-publish + permissions: + id-token: write steps: - name: Download built archives @@ -82,5 +85,3 @@ jobs: - name: Publish to PyPI if: startsWith(github.ref, 'refs/tags') uses: pypa/gh-action-pypi-publish@release/v1 - with: - password: ${{ secrets.PYPI_API_TOKEN }} diff --git a/.readthedocs.yml b/.readthedocs.yml index 683f3a8..27c1595 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -9,7 +9,7 @@ version: 2 build: os: ubuntu-22.04 tools: - python: "3.11" + python: "3.13" # Build PDF & ePub formats: diff --git a/AUTHORS.rst b/AUTHORS.rst index 16e2046..7f5774f 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -1,3 +1,5 @@ The following organizations or individuals have contributed to this repo: -- +- Michael Herzog +- Philippe Ombredanne +- Jono Yang diff --git a/README.rst b/README.rst index 210535e..2ac763f 100644 --- a/README.rst +++ b/README.rst @@ -1,15 +1,15 @@ purl-validator ================================ -PURLs are everywhere in SBOMs. But with adoption comes widespread errors. +PURLs are everywhere in SBOMs. But with adoption comes widespread errors. A recent study on the quality of SBOMs revealed that for many proprietary and -open source tools, PURLs in SBOMs are inconsistent, fake, incorrect, or -misleading. This is a serious problem to any application of SBOMs for -cybersecurity and application security, as well as related compliance -regulations. This project is to create a PURL validator that's decentralized +open source tools, PURLs in SBOMs are inconsistent, fake, incorrect, or +misleading. This is a serious problem to any application of SBOMs for +cybersecurity and application security, as well as related compliance +regulations. This project is to create a PURL validator that's decentralized such that libraries can use it offline and help them create better PURLs. -Building this compact dataset is new territory. There is research -and exploration necessary for creating a super compact data structure -that is also easy and fast to query across multiple languages. The data +Building this compact dataset is new territory. There is research +and exploration necessary for creating a super compact data structure +that is also easy and fast to query across multiple languages. The data structure will also need memory-mapping to avoid running out of memory. diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 4d347b7..7230c41 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -19,7 +19,7 @@ jobs: parameters: job_name: ubuntu22_cpython image_name: ubuntu-22.04 - python_versions: ['3.9', '3.10', '3.11', '3.12', '3.13'] + python_versions: ['3.10', '3.11', '3.12', '3.13', '3.14'] test_suites: all: venv/bin/pytest -n 2 -vvs @@ -27,7 +27,7 @@ jobs: parameters: job_name: ubuntu24_cpython image_name: ubuntu-24.04 - python_versions: ['3.9', '3.10', '3.11', '3.12', '3.13'] + python_versions: ['3.10', '3.11', '3.12', '3.13', '3.14'] test_suites: all: venv/bin/pytest -n 2 -vvs @@ -35,7 +35,7 @@ jobs: parameters: job_name: macos13_cpython image_name: macOS-13 - python_versions: ['3.9', '3.10', '3.11', '3.12', '3.13'] + python_versions: ['3.10', '3.11', '3.12', '3.13', '3.14'] test_suites: all: venv/bin/pytest -n 2 -vvs @@ -43,7 +43,7 @@ jobs: parameters: job_name: macos14_cpython image_name: macOS-14 - python_versions: ['3.9', '3.10', '3.11', '3.12', '3.13'] + python_versions: ['3.10', '3.11', '3.12', '3.13', '3.14'] test_suites: all: venv/bin/pytest -n 2 -vvs @@ -51,7 +51,7 @@ jobs: parameters: job_name: macos15_cpython image_name: macOS-15 - python_versions: ['3.9', '3.10', '3.11', '3.12', '3.13'] + python_versions: ['3.10', '3.11', '3.12', '3.13', '3.14'] test_suites: all: venv/bin/pytest -n 2 -vvs @@ -59,7 +59,7 @@ jobs: parameters: job_name: win2022_cpython image_name: windows-2022 - python_versions: ['3.9', '3.10', '3.11', '3.12', '3.13'] + python_versions: ['3.10', '3.11', '3.12', '3.13', '3.14'] test_suites: all: venv\Scripts\pytest -n 2 -vvs @@ -67,6 +67,6 @@ jobs: parameters: job_name: win2025_cpython image_name: windows-2025 - python_versions: ['3.9', '3.10', '3.11', '3.12', '3.13'] + python_versions: ['3.10', '3.11', '3.12', '3.13', '3.14'] test_suites: all: venv\Scripts\pytest -n 2 -vvs diff --git a/etc/scripts/utils_requirements.py b/etc/scripts/utils_requirements.py index b9b2c0e..424bed2 100644 --- a/etc/scripts/utils_requirements.py +++ b/etc/scripts/utils_requirements.py @@ -153,7 +153,7 @@ def split_req(req): if not req: raise ValueError("req is required") # do not allow multiple constraints and tags - if not any(c in req for c in ",;"): + if any(c in req for c in ",;"): raise Exception(f"complex requirements with : or ; not supported: {req}") req = "".join(req.split()) if not any(c in req for c in comparators): diff --git a/etc/scripts/utils_thirdparty.py b/etc/scripts/utils_thirdparty.py index 6f812f0..bc68ac7 100644 --- a/etc/scripts/utils_thirdparty.py +++ b/etc/scripts/utils_thirdparty.py @@ -115,14 +115,14 @@ TRACE_ULTRA_DEEP = False # Supported environments -PYTHON_VERSIONS = "39", "310", "311", "312", "313" +PYTHON_VERSIONS = "310", "311", "312", "313", "314" PYTHON_DOT_VERSIONS_BY_VER = { - "39": "3.9", "310": "3.10", "311": "3.11", "312": "3.12", "313": "3.13", + "314": "3.14", } @@ -134,11 +134,11 @@ def get_python_dot_version(version): ABIS_BY_PYTHON_VERSION = { - "39": ["cp39", "cp39m", "abi3"], "310": ["cp310", "cp310m", "abi3"], "311": ["cp311", "cp311m", "abi3"], "312": ["cp312", "cp312m", "abi3"], "313": ["cp313", "cp313m", "abi3"], + "314": ["cp314", "cp314m", "abi3"], } PLATFORMS_BY_OS = { diff --git a/pyproject.toml b/pyproject.toml index d79574e..039e2ed 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [build-system] -requires = ["setuptools >= 50", "wheel", "setuptools_scm[toml] >= 6"] +requires = ["setuptools >= 50", "wheel"] build-backend = "setuptools.build_meta" [tool.setuptools_scm] @@ -68,7 +68,7 @@ include = [ "." ] -# ignore test data and testfiles: they should never be linted nor formatted +# ignore test data and testfiles: they should never be linted nor formatted exclude = [ # main style "**/tests/data/**/*", diff --git a/requirements-dev.txt b/requirements-dev.txt index e69de29..9092c81 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -0,0 +1,65 @@ +aboutcode-toolkit==11.1.1 +alabaster==1.0.0 +anyio==4.11.0 +babel==2.17.0 +boolean.py==5.0 +cffi==2.0.0 +colorama==0.4.6 +cryptography==46.0.3 +doc8==2.0.0 +docutils==0.21.2 +et_xmlfile==2.0.0 +execnet==2.1.1 +h11==0.16.0 +id==1.5.0 +imagesize==1.4.1 +iniconfig==2.3.0 +jaraco.classes==3.4.0 +jaraco.context==6.0.1 +jaraco.functools==4.3.0 +jeepney==0.9.0 +jinja2==3.1.6 +keyring==25.6.0 +license-expression==30.4.4 +markdown-it-py==4.0.0 +markupsafe==3.0.3 +mdurl==0.1.2 +more-itertools==10.8.0 +nh3==0.3.1 +openpyxl==3.1.5 +packageurl-python==0.17.5 +packaging==25.0 +pluggy==1.6.0 +pycparser==2.23 +pygments==2.19.2 +pytest==8.4.2 +pytest-xdist==3.8.0 +readme_renderer==44.0 +requests-toolbelt==1.0.0 +restructuredtext_lint==1.4.0 +rfc3986==2.0.0 +rich==14.2.0 +roman-numerals-py==3.1.0 +ruff==0.14.2 +secretstorage==3.4.0 +sniffio==1.3.1 +snowballstemmer==3.0.1 +sphinx==8.2.3 +sphinx-autobuild==2025.8.25 +sphinx-copybutton==0.5.2 +sphinx-reredirects==1.0.0 +sphinx-rtd-dark-mode==1.3.0 +sphinx-rtd-theme==3.0.2 +sphinxcontrib-applehelp==2.0.0 +sphinxcontrib-devhelp==2.0.0 +sphinxcontrib-htmlhelp==2.1.0 +sphinxcontrib-jquery==4.1 +sphinxcontrib-jsmath==1.0.1 +sphinxcontrib-qthelp==2.0.0 +sphinxcontrib-serializinghtml==2.0.0 +starlette==0.48.0 +stevedore==5.5.0 +twine==6.2.0 +uvicorn==0.38.0 +watchfiles==1.1.1 +websockets==15.0.1 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index e69de29..b36ac9e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -0,0 +1,18 @@ +attrs==25.4.0 +beautifulsoup4==4.14.2 +certifi==2025.10.5 +chardet==5.2.0 +charset-normalizer==3.4.4 +click==8.3.0 +commoncode==32.4.0 +ducer==1.2.0 +idna==3.11 +pip==25.2 +PyYAML==6.0.3 +requests==2.32.5 +saneyaml==0.6.1 +setuptools==80.9.0 +soupsieve==2.8 +text-unidecode==1.3 +typing_extensions==4.15.0 +urllib3==2.5.0 diff --git a/setup.cfg b/setup.cfg index 69f850c..49f3d2f 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,12 +1,12 @@ [metadata] -name = skeleton +name = purl-validator license = Apache-2.0 # description must be on ONE line https://github.com/pypa/setuptools/issues/1390 -description = skeleton +description = purl-validator long_description = file:README.rst long_description_content_type = text/x-rst -url = https://github.com/aboutcode-org/skeleton +url = https://github.com/aboutcode-org/purl-validator author = nexB. Inc. and others author_email = info@aboutcode.org @@ -31,7 +31,7 @@ license_files = README.rst [options] -python_requires = >=3.9 +python_requires = >=3.10 package_dir = =src @@ -43,6 +43,8 @@ setup_requires = setuptools_scm[toml] >= 4 install_requires = + commoncode + ducer [options.packages.find] diff --git a/src/README.rst b/src/README.rst deleted file mode 100644 index ec651fc..0000000 --- a/src/README.rst +++ /dev/null @@ -1,2 +0,0 @@ -Put your Python source code (and installable data) in this directory. - diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/purl_validator/__init__.py b/src/purl_validator/__init__.py new file mode 100644 index 0000000..451c2a6 --- /dev/null +++ b/src/purl_validator/__init__.py @@ -0,0 +1,94 @@ +# +# Copyright (c) nexB Inc. and others. +# SPDX-License-Identifier: Apache-2.0 +# +# Visit https://aboutcode.org and https://github.com/aboutcode-org/ for support and download. +# ScanCode is a trademark of nexB Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from pathlib import Path +import mmap + +from commoncode import fileutils +from packageurl import PackageURL +import ducer + + +PURL_MAP_LOCATION = Path(__file__).parent / "purls.map" + + +def create_purl_map_entry(purl): + """ + Given a `purl` that is a PackageURL proper or a string representation of + one, return a bytestring containing the type, namespace (if available), and + name of the package from `purl`. + """ + if not isinstance(purl, (PackageURL, str)): + raise ValueError(f"invalid `purl`: {purl}") + + # Ensure `purl` is a PackageURL + if isinstance(purl, str): + p = PackageURL.from_string(purl) + else: + p = purl + + # Create purl map entry string + if p.namespace: + purl_str = f"{p.type}/{p.namespace}/{p.name}" + else: + purl_str = f"{p.type}/{p.name}" + + # Convert purl map entry string to bytes + return bytes(purl_str, "utf-8") + + +def create_purl_map(purls): + """ + Given an iterable of `purls`, that can be either PackageURLs proper or + strings representing them, return a Ducer map that contains strings created + from the type, namespace (if available), and name of the packages from + `purls`. + """ + # purl map entries must be unique, sorted, and converted to bytes before going into the Map + purl_map_entries = set(create_purl_map_entry(purl) for purl in purls) + prepared_purl_map_entries = sorted((purl_map_entry, 1) for purl_map_entry in purl_map_entries) + + # create map + temp_dir = fileutils.get_temp_dir() + map_loc = Path(temp_dir) / "purls.map" + ducer.Map.build(map_loc, prepared_purl_map_entries) + + return map_loc + + +class PurlValidator: + def __init__(self, purl_map_loc=PURL_MAP_LOCATION): + self.purl_map = self.load_map(location=purl_map_loc) + + @classmethod + def load_map(cls, location): + with open(location, "rb") as f: + mm = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) + m = ducer.Map(mm) + return m + + def validate_purl(self, purl): + """ + Given a `purl` that is a PackageURL proper or a string representation of + one, return True if `purl` exists, False otherwise. + """ + purl_map_entry = create_purl_map_entry(purl) + in_purl_map = bool(self.purl_map.get(purl_map_entry)) + return in_purl_map diff --git a/src/purl_validator/purls.map b/src/purl_validator/purls.map new file mode 100644 index 0000000..bf2a09a Binary files /dev/null and b/src/purl_validator/purls.map differ diff --git a/tests/README.rst b/tests/README.rst deleted file mode 100644 index d94783e..0000000 --- a/tests/README.rst +++ /dev/null @@ -1,2 +0,0 @@ -Put your Python test modules in this directory. - diff --git a/tests/test_purl_validator.py b/tests/test_purl_validator.py new file mode 100644 index 0000000..844c26a --- /dev/null +++ b/tests/test_purl_validator.py @@ -0,0 +1,71 @@ +# +# Copyright (c) nexB Inc. and others. +# SPDX-License-Identifier: Apache-2.0 +# +# Visit https://aboutcode.org and https://github.com/aboutcode-org/ for support and download. +# ScanCode is a trademark of nexB Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import purl_validator +from packageurl import PackageURL +from commoncode import fileutils +from commoncode.testcase import FileBasedTesting + + +class TestPurlValidator(FileBasedTesting): + def setUp(self): + self.created_purl_maps = [] + return super().setUp() + + def tearDown(self): + for purl_map in self.created_purl_maps: + fileutils.delete(purl_map.parent) + return super().tearDown() + + def test_purl_validator_create_purl_map_entry(self): + test_purl1 = PackageURL(type="npm", namespace="@test", name="test", version="1.0") + test_purl2 = "pkg:npm/test2@2.0" + test_purl3 = "not-a-purl" + test_purl4 = [] + + self.assertEqual(b"npm/@test/test", purl_validator.create_purl_map_entry(test_purl1)) + self.assertEqual(b"npm/test2", purl_validator.create_purl_map_entry(test_purl2)) + + with self.assertRaises(ValueError): + purl_validator.create_purl_map_entry(test_purl3) + + with self.assertRaises(ValueError): + purl_validator.create_purl_map_entry(test_purl4) + + def test_purl_validator_create_purl_map_entry(self): + test_purl1 = PackageURL(type="npm", namespace="@test", name="test", version="1.0") + test_purl2 = "pkg:npm/test2@2.0" + test_purl3 = "not-a-purl" + test_purl4 = [] + purls = [test_purl1, test_purl2] + + purl_map_loc = purl_validator.create_purl_map(purls) + self.created_purl_maps.append(purl_map_loc) + + purl_map = purl_validator.PurlValidator.load_map(purl_map_loc) + expected_results = [(b"npm/@test/test", 1), (b"npm/test2", 1)] + results = [(k, v) for k, v in purl_map.items()] + self.assertEqual(expected_results, results) + + with self.assertRaises(ValueError): + purl_validator.create_purl_map([test_purl3]) + + with self.assertRaises(ValueError): + purl_validator.create_purl_map([test_purl4])