From 2c3bd7592010779cb4a5a25a1e322c5413afe13c Mon Sep 17 00:00:00 2001 From: Philippe Ombredanne Date: Mon, 6 May 2024 15:03:04 +0200 Subject: [PATCH 01/10] Create purl2vcs library #374 Reference: https://github.com/nexB/purldb/issues/374 Signed-off-by: Philippe Ombredanne --- Makefile | 1 + packagedb/from_purl.py | 4 +- .../commands/create_source_repo_packages.py | 4 +- .../commands/get_source_download_url.py | 2 +- purl2vcs/CHANGELOG.rst | 8 + purl2vcs/README.rst | 51 ++++ purl2vcs/__init__.py | 0 {packagedb => purl2vcs}/find_source_repo.py | 0 .../tests/test_find_source_repo.py | 36 +-- .../find_source_repo/tags_commits.txt | 0 .../find_source_repo/tags_commits_list.txt | 0 pyproject-purl2vcs.toml | 241 ++++++++++++++++++ setup.cfg | 1 + 13 files changed, 325 insertions(+), 23 deletions(-) create mode 100644 purl2vcs/CHANGELOG.rst create mode 100644 purl2vcs/README.rst create mode 100644 purl2vcs/__init__.py rename {packagedb => purl2vcs}/find_source_repo.py (100%) rename {packagedb => purl2vcs}/tests/test_find_source_repo.py (91%) rename {packagedb => purl2vcs}/tests/testfiles/find_source_repo/tags_commits.txt (100%) rename {packagedb => purl2vcs}/tests/testfiles/find_source_repo/tags_commits_list.txt (100%) create mode 100644 pyproject-purl2vcs.toml diff --git a/Makefile b/Makefile index afed4013..691ba6fc 100644 --- a/Makefile +++ b/Makefile @@ -130,6 +130,7 @@ test: ${ACTIVATE} DJANGO_SETTINGS_MODULE=purldb_project.settings ${PYTHON_EXE} -m pytest -vvs packagedb/tests/test_throttling.py ${ACTIVATE} DJANGO_SETTINGS_MODULE=matchcode_project.settings ${PYTHON_EXE} -m pytest -vvs matchcode_pipeline ${ACTIVATE} ${PYTHON_EXE} -m pytest -vvs purldb-toolkit/ + ${ACTIVATE} DJANGO_SETTINGS_MODULE=purldb_project.settings ${PYTHON_EXE} -m pytest -vvs purl2vcs shell: ${MANAGE} shell diff --git a/packagedb/from_purl.py b/packagedb/from_purl.py index 1e0c7b5b..0706ac8a 100644 --- a/packagedb/from_purl.py +++ b/packagedb/from_purl.py @@ -14,8 +14,8 @@ from rest_framework import viewsets from rest_framework.response import Response -from packagedb.find_source_repo import get_package_object_from_purl -from packagedb.find_source_repo import get_source_repo +from purl2vcs.find_source_repo import get_package_object_from_purl +from purl2vcs.find_source_repo import get_source_repo from packagedb.serializers import PurltoGitRepoResponseSerializer from packagedb.serializers import PurltoGitRepoSerializer diff --git a/packagedb/management/commands/create_source_repo_packages.py b/packagedb/management/commands/create_source_repo_packages.py index 70617278..ba15c4bd 100644 --- a/packagedb/management/commands/create_source_repo_packages.py +++ b/packagedb/management/commands/create_source_repo_packages.py @@ -15,8 +15,8 @@ from minecode.management.commands import VerboseCommand from minecode.model_utils import add_package_to_scan_queue -from packagedb.find_source_repo import add_source_package_to_package_set -from packagedb.find_source_repo import get_package_object_from_purl +from purl2vcs.find_source_repo import add_source_package_to_package_set +from purl2vcs.find_source_repo import get_package_object_from_purl from packagedb.models import Package from packagedb.models import PackageContentType diff --git a/packagedb/management/commands/get_source_download_url.py b/packagedb/management/commands/get_source_download_url.py index 17d7cc8a..8a0c1ba4 100644 --- a/packagedb/management/commands/get_source_download_url.py +++ b/packagedb/management/commands/get_source_download_url.py @@ -11,7 +11,7 @@ import sys from minecode.management.commands import VerboseCommand -from packagedb.find_source_repo import get_source_repo_and_add_to_package_set +from purl2vcs.find_source_repo import get_source_repo_and_add_to_package_set TRACE = False diff --git a/purl2vcs/CHANGELOG.rst b/purl2vcs/CHANGELOG.rst new file mode 100644 index 00000000..fd6fa1ea --- /dev/null +++ b/purl2vcs/CHANGELOG.rst @@ -0,0 +1,8 @@ +Changelog +========= + + +v2.0.0 +------ + +Initial release. diff --git a/purl2vcs/README.rst b/purl2vcs/README.rst new file mode 100644 index 00000000..60d7fc2f --- /dev/null +++ b/purl2vcs/README.rst @@ -0,0 +1,51 @@ +purl2vcs +========== + +purl2vcs is an add-on library working with the PurlDB to find the version control system (VCS) URL of a package and detect the commit, and tags for a given version. + +In the future, it will also find paths and branches, which is useful for monorepos. + +Usage +------- + +Installation +------------ + +Requirements +############ + +* install purldb dependencies +* `pip install purl2vcs` + + +Funding +------- + +This project was funded through the NGI Assure Fund https://nlnet.nl/assure, a +fund established by NLnet https://nlnet.nl/ with financial support from the +European Commission's Next Generation Internet programme, under the aegis of DG +Communications Networks, Content and Technology under grant agreement No 957073. + +This project is also funded through grants from the Google Summer of Code +program, continuing support and sponsoring from nexB Inc. and generous +donations from multiple sponsors. + + +License +------- + +Copyright (c) nexB Inc. and others. All rights reserved. + +purldb is a trademark of nexB Inc. + +SPDX-License-Identifier: Apache-2.0 + +pur2vcs is licensed under the Apache License version 2.0. + +See https://www.apache.org/licenses/LICENSE-2.0 for the license text. + +See https://creativecommons.org/licenses/by-sa/4.0/legalcode for the license text. + +See https://github.com/nexB/purldb for support or download. + +See https://aboutcode.org for more information about nexB OSS projects. diff --git a/purl2vcs/__init__.py b/purl2vcs/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/packagedb/find_source_repo.py b/purl2vcs/find_source_repo.py similarity index 100% rename from packagedb/find_source_repo.py rename to purl2vcs/find_source_repo.py diff --git a/packagedb/tests/test_find_source_repo.py b/purl2vcs/tests/test_find_source_repo.py similarity index 91% rename from packagedb/tests/test_find_source_repo.py rename to purl2vcs/tests/test_find_source_repo.py index 2268f4eb..dab6fc3e 100644 --- a/packagedb/tests/test_find_source_repo.py +++ b/purl2vcs/tests/test_find_source_repo.py @@ -15,15 +15,15 @@ from django.test import TestCase from packageurl import PackageURL -from packagedb.find_source_repo import convert_repo_urls_to_purls -from packagedb.find_source_repo import fetch_response -from packagedb.find_source_repo import get_repo_urls -from packagedb.find_source_repo import get_source_repo -from packagedb.find_source_repo import get_source_urls_from_package_data_and_resources -from packagedb.find_source_repo import get_tag_and_commit -from packagedb.find_source_repo import get_tags_and_commits -from packagedb.find_source_repo import get_urls_from_package_data -from packagedb.find_source_repo import get_urls_from_package_resources +from purl2vcs.find_source_repo import convert_repo_urls_to_purls +from purl2vcs.find_source_repo import fetch_response +from purl2vcs.find_source_repo import get_repo_urls +from purl2vcs.find_source_repo import get_source_repo +from purl2vcs.find_source_repo import get_source_urls_from_package_data_and_resources +from purl2vcs.find_source_repo import get_tag_and_commit +from purl2vcs.find_source_repo import get_tags_and_commits +from purl2vcs.find_source_repo import get_urls_from_package_data +from purl2vcs.find_source_repo import get_urls_from_package_resources from packagedb.models import Package from packagedb.models import PackageContentType from packagedb.models import Resource @@ -123,8 +123,8 @@ def setUp(self): download_url="https://repo1.maven.org/maven2/com/foo/bar/11/bar.11.jar", ) - @mock.patch("packagedb.find_source_repo.fetch_response") - @mock.patch("packagedb.find_source_repo.get_urls_from_text") + @mock.patch("purl2vcs.find_source_repo.fetch_response") + @mock.patch("purl2vcs.find_source_repo.get_urls_from_text") def test_get_source_purl_from_package_data(self, mock_text, mock_response): mock_response.side_effect = [ None, @@ -154,9 +154,9 @@ def test_get_source_purl_from_package_resources(self): "https://bitbucket.org/connect2id/oauth-2.0-sdk-with-openid-connect-extensions/tree/master/oauth-oidc-sdk/src/main/resources/META-INF/MANIFEST.MF", ] - @mock.patch("packagedb.find_source_repo.get_urls_from_package_data") + @mock.patch("purl2vcs.find_source_repo.get_urls_from_package_data") @mock.patch( - "packagedb.find_source_repo.get_merged_ancestor_package_from_maven_package" + "purl2vcs.find_source_repo.get_merged_ancestor_package_from_maven_package" ) def test_get_source_purl_from_package_data_and_resources(self, mock1, mock2): mock1.return_value = None @@ -179,9 +179,9 @@ def test_get_source_purl_from_package_data_and_resources(self, mock1, mock2): "https://bitbucket.org/connect2id/oauth-2.0-sdk-with-openid-connect-extensions", ] - @mock.patch("packagedb.find_source_repo.get_urls_from_package_data") + @mock.patch("purl2vcs.find_source_repo.get_urls_from_package_data") @mock.patch( - "packagedb.find_source_repo.get_merged_ancestor_package_from_maven_package" + "purl2vcs.find_source_repo.get_merged_ancestor_package_from_maven_package" ) def test_get_repo_urls(self, mock1, mock2): mock1.return_value = None @@ -279,7 +279,7 @@ def test_convert_repo_urls_to_purls(self): ] def test_get_tags_commits(self): - with patch("packagedb.find_source_repo.fetch_response"): + with patch("purl2vcs.find_source_repo.fetch_response"): with patch("subprocess.getoutput") as mock_popen: mock_popen.return_value = open(TEST_DATA).read() with open(TAGS_COMMITS_FILE) as f: @@ -302,7 +302,7 @@ def test_get_tags_commits(self): ) == ("9.35", "fdc8117af75b192e3f8afcc0119c904b02686af8") def test_get_source_repo(self): - with patch("packagedb.find_source_repo.fetch_response"): + with patch("purl2vcs.find_source_repo.fetch_response"): with patch("subprocess.getoutput") as mock_popen: mock_popen.return_value = open(TEST_DATA).read() assert get_source_repo( @@ -316,7 +316,7 @@ def test_get_source_repo(self): subpath=None, ) - @mock.patch("packagedb.find_source_repo.requests.get") + @mock.patch("purl2vcs.find_source_repo.requests.get") def test_fetch_response(self, mock_get): mock_get.return_value.status_code = 200 mock_get.return_value.text = "abc" diff --git a/packagedb/tests/testfiles/find_source_repo/tags_commits.txt b/purl2vcs/tests/testfiles/find_source_repo/tags_commits.txt similarity index 100% rename from packagedb/tests/testfiles/find_source_repo/tags_commits.txt rename to purl2vcs/tests/testfiles/find_source_repo/tags_commits.txt diff --git a/packagedb/tests/testfiles/find_source_repo/tags_commits_list.txt b/purl2vcs/tests/testfiles/find_source_repo/tags_commits_list.txt similarity index 100% rename from packagedb/tests/testfiles/find_source_repo/tags_commits_list.txt rename to purl2vcs/tests/testfiles/find_source_repo/tags_commits_list.txt diff --git a/pyproject-purl2vcs.toml b/pyproject-purl2vcs.toml new file mode 100644 index 00000000..1edc4bfd --- /dev/null +++ b/pyproject-purl2vcs.toml @@ -0,0 +1,241 @@ +[build-system] +requires = [ "flot>=0.7.0" ] +build-backend = "flot.buildapi" + +[project] +name = "purl2vcs" +# keep version same as purldb for now +version = "2.0.0" +description = "purl2vcs is an add-on library working with the PurlDB to find the version control system (VCS) URL of a package and detect the commit, tags and path for a given version." +readme = "purl2vcs/README.rst" +license = { text = "Apache-2.0" } +requires-python = ">=3.8" +authors = [ + { name = "nexB. Inc. and others", email = "info@aboutcode.org" }, +] +keywords = [ + "package-url", + "purl", + "git", + "version", + "hg", + "svn", + "tag", + "commit", + "open source", + "scan", + "package", + "dependency", + "sca", +] + +classifiers = [ "SBOM", + "spdx", + "cyclonedx", + + "Development Status :: 5 - Production/Stable", + "Intended Audience :: Developers", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Topic :: Software Development", + "Topic :: Utilities", +] + + + +dependencies = [ + # get packagedb, purldb, minecode and related module for now from this single package + "purldb >= 2.0.0", + "requests >= 2.7.0", + "scancode-toolkit >= 32.0.0", + "packageurl_python >= 0.9.0", +] + +[project.urls] +Homepage = "https://github.com/nexB/purldb" + + + +[tool.bumpversion] +current_version = "2.0.0" +allow_dirty = true + +files = [ + { filename = "pyproject-purl2vcs.toml" }, + +] + + +[tool.flot] + +includes = [ + "purl2vcs/__init__.py", + "purl2vcs/find_source_repo.py", +] + +excludes = [ + # Python compiled files + "**/*.py[cod]", + "**/*.egg-info", + # Various junk and temp files + "**/.DS_Store", + "**/*~", + "**/.*.sw[po]", + "**/.ve", + "**/*.bak", + "**/.ipynb_checkpoints", +] + +metadata_files = [ + "apache-2.0.LICENSE", + "NOTICE", + "purl2vcs/CHANGELOG.rst", + "purl2vcs/README.rst", + "CODE_OF_CONDUCT.rst", +] + +editable_paths = [ "purl2vcs", "purl2vcs/tests" ] + +sdist_extra_includes = [ + "purl2vcs/tests", + ".gitignore", + "NOTICE", + "configure*", +] + + +[tool.isort] +force_single_line = "True" +line_length = 88 +sections = "FUTURE,STDLIB,THIRDPARTY,FIRSTPARTY,LOCALFOLDER" +skip = "doc,venv,tmp,thirdparty,build,dist" + + +[project.scripts] +scancode = "scancode.cli:scancode" +scancode-reindex-licenses = "licensedcode.reindex:reindex_licenses" +scancode-license-data = "licensedcode.license_db:dump_scancode_license_data" +regen-package-docs = "packagedcode.regen_package_docs:regen_package_docs" + + +# These are configurations for ScanCode plugins as entry points. +# Each plugin entry hast this form: +# plugin-name = f"ully.qualified.module:PluginClass" +# where plugin-name must be a unique arbitrary name for this entrypoint. + +# scancode_pre_scan is the entry point for pre_scan plugins executed before the +# scans. See also plugincode.pre_scan module for details and doc. +[project.entry-points.scancode_pre_scan] +ignore = "scancode.plugin_ignore:ProcessIgnore" +facet = "summarycode.facet:AddFacet" + + +# scancode_scan is the entry point for scan plugins that run a scan after the +# pre_scan plugins and before the post_scan plugins. See also plugincode.scan +# module for details and doc. +[project.entry-points.scancode_scan] +info = "scancode.plugin_info:InfoScanner" +licenses = "licensedcode.plugin_license:LicenseScanner" +copyrights = "cluecode.plugin_copyright:CopyrightScanner" +packages = "packagedcode.plugin_package:PackageScanner" +emails = "cluecode.plugin_email:EmailScanner" +urls = "cluecode.plugin_url:UrlScanner" +generated = "summarycode.generated:GeneratedCodeDetector" + + +# scancode_post_scan is the entry point for post_scan plugins executed after the +# scan plugins and before the output plugins. See also plugincode.post_scan +# module for details and doc. +[project.entry-points.scancode_post_scan] +summary = "summarycode.summarizer:ScanSummary" +tallies = "summarycode.tallies:Tallies" +tallies-with-details = "summarycode.tallies:TalliesWithDetails" +tallies-key-files = "summarycode.tallies:KeyFilesTallies" +tallies-by-facet = "summarycode.tallies:FacetTallies" +license-clarity-score = "summarycode.score:LicenseClarityScore" +license-policy = "licensedcode.plugin_license_policy:LicensePolicy" +mark-source = "scancode.plugin_mark_source:MarkSource" +filter-clues = "cluecode.plugin_filter_clues:RedundantCluesFilter" +consolidate = "summarycode.plugin_consolidate:Consolidator" +license-references = "licensedcode.licenses_reference:LicenseReference" +todo = "summarycode.todo:AmbiguousDetectionsToDoPlugin" +classify = "summarycode.classify_plugin:FileClassifier" + + +# scancode_output_filter is the entry point for filter plugins executed after +# the post-scan plugins and used by the output plugins to exclude/filter certain +# files or directories from the codebase. See also plugincode.post_scan module +# for details and doc. +[project.entry-points.scancode_output_filter] +only-findings = "scancode.plugin_only_findings:OnlyFindings" +ignore-copyrights = "cluecode.plugin_ignore_copyrights:IgnoreCopyrights" + + +# scancode_output is the entry point for output plugins that write a scan output +# in a given format at the end of a scan. See also plugincode._output module for +# details and doc. +[project.entry-points.scancode_output] +html = "formattedcode.output_html:HtmlOutput" +html-app = "formattedcode.output_html:HtmlAppOutput" +json = "formattedcode.output_json:JsonCompactOutput" +json-pp = "formattedcode.output_json:JsonPrettyOutput" +spdx-tv = "formattedcode.output_spdx:SpdxTvOutput" +spdx-rdf = "formattedcode.output_spdx:SpdxRdfOutput" +csv = "formattedcode.output_csv:CsvOutput" +jsonlines = "formattedcode.output_jsonlines:JsonLinesOutput" +template = "formattedcode.output_html:CustomTemplateOutput" +debian = "formattedcode.output_debian:DebianCopyrightOutput" +yaml = "formattedcode.output_yaml:YamlOutput" +cyclonedx = "formattedcode.output_cyclonedx:CycloneDxJsonOutput" +cyclonedx-xml = "formattedcode.output_cyclonedx:CycloneDxXmlOutput" + + +[tool.pytest.ini_options] +norecursedirs = [ + ".git", + "bin", + "dist", + "build", + "_build", + "dist", + "etc", + "local", + "ci", + "docs", + "man", + "share", + "samples", + ".cache", + ".settings", + "Include", + "include", + "Lib", + "lib", + "lib64", + "Lib64", + "Scripts", + "thirdparty", + "tmp", + "venv", + "tests/data", + ".eggs", + "src/*/data", + "tests/*/data", + "src/licensedcode/_vendor" +] + +python_files = "*.py" + +python_classes = "Test" +python_functions = "test" + +addopts = [ + "-rfExXw", + "--strict-markers", + "--doctest-modules" +] diff --git a/setup.cfg b/setup.cfg index 399f4157..08f6e82a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -77,6 +77,7 @@ testing = aboutcode-toolkit >= 6.0.0 black mock + flot docs = Sphinx>=5.0.2 From 7e63d2b655769e319d83ede5b78607f106483796 Mon Sep 17 00:00:00 2001 From: Philippe Ombredanne Date: Mon, 6 May 2024 15:31:42 +0200 Subject: [PATCH 02/10] Create purl2vcs library #374 Add documentation and cosmetic refactor Reference: https://github.com/nexB/purldb/issues/374 Signed-off-by: Philippe Ombredanne --- purl2vcs/README.rst | 18 ++++ purl2vcs/find_source_repo.py | 106 +++++++++++++----------- purl2vcs/tests/test_find_source_repo.py | 24 +++--- 3 files changed, 88 insertions(+), 60 deletions(-) diff --git a/purl2vcs/README.rst b/purl2vcs/README.rst index 60d7fc2f..7e723333 100644 --- a/purl2vcs/README.rst +++ b/purl2vcs/README.rst @@ -8,6 +8,24 @@ In the future, it will also find paths and branches, which is useful for monorep Usage ------- +- First, import the main module: ``from purl2vcs import find_source_repo`` + +- To use the functions you first need to acquire some Package objects: + Use the ``get_package_object_from_purl(package_url)`` passing a PURL string to get an object from the database + +- To find the source repository of a Package, call `get_source_repo(package)` + to will get a PackageURL object back. + +- To generate all the source repository URLs of a Package, call `get_repo_urls(package)`. + +- To convert a single source repo URLs to PURLs, call ``convert_repo_url_to_purls`` +- To convert a list of source repo URLs to PURLs, call ``convert_repo_urls_to_purls`` + +- To find the commit or tags from a source repo PURL use ``get_tags_and_commits`` + +- The low level ``get_tags_and_commits`` is used in ``find_package_version_tag_and_commit`` to find the tag and commit of a given package ``version`` in a source repo PURL. + + Installation ------------ diff --git a/purl2vcs/find_source_repo.py b/purl2vcs/find_source_repo.py index 62ebabe5..f060685d 100644 --- a/purl2vcs/find_source_repo.py +++ b/purl2vcs/find_source_repo.py @@ -9,22 +9,18 @@ import logging import subprocess -from typing import Generator -from typing import List +from typing import Generator, List from urllib.parse import urlparse import requests from packageurl import PackageURL from packageurl.contrib.django.utils import purl_to_lookups -from packageurl.contrib.purl2url import get_download_url -from packageurl.contrib.purl2url import purl2url +from packageurl.contrib.purl2url import get_download_url, purl2url from scancode.api import get_urls as get_urls_from_location from minecode.model_utils import add_package_to_scan_queue from minecode.visitors.maven import get_merged_ancestor_package_from_maven_package -from packagedb.models import Package -from packagedb.models import PackageContentType -from packagedb.models import PackageSet +from packagedb.models import Package, PackageContentType, PackageSet logger = logging.getLogger(__name__) @@ -248,53 +244,67 @@ def get_source_urls_from_package_data_and_resources(package: Package) -> List[st def convert_repo_urls_to_purls(source_urls): """ - Convert a source URL to a purl + Yield PURLs from a list from a list of source repository URLs. + """ + for source_url in source_urls or []: + yield from convert_repo_url_to_purls(source_url) + + +def convert_repo_url_to_purls(source_url): + """ + Yield PURLs from a single source repository URL. """ url_hints = [ "github", "gitlab", "bitbucket", ] - if not source_urls: - return - for source_url in source_urls: - # git@github.com+https://github.com/graphql-java/java-dataloader.git - if source_url.startswith("git@github.com+"): - _, _, source_url = source_url.partition("+") - # https+//github.com/graphql-java-kickstart/graphql-java-servlet.git - if source_url.startswith("https+//"): - # convert https+// to https:// - source_url = source_url.replace("https+//", "https://") - if ( - source_url.startswith("git+https://") or source_url.startswith("git://") - ) and "@" in source_url: - # remove the commit from the end of the URL - source_url, _, _ = source_url.rpartition("@") - # remove .git from the end of the URL - if source_url.endswith(".git"): - source_url, _, _ = source_url.rpartition(".git") - if source_url.startswith("git://"): - # remove git:// from the beginning of the URL - _, _, source_url = source_url.partition("git://") - if ":" in source_url: - # convert : to / - source_url = source_url.replace(":", "/") - source_url = f"https://{source_url}" - urlparse_result = urlparse(source_url) - path_segments = urlparse_result.path.split("/") - if not len(path_segments) > 2: - continue - namespace = path_segments[1] - name = path_segments[2] - if not name: - continue - for url_hint in url_hints: - if url_hint in urlparse_result.netloc: - yield PackageURL( - type=url_hint, - namespace=namespace, - name=name, - ) + # URL like: git@github.com+https://github.com/graphql-java/java-dataloader.git + if source_url.startswith("git@github.com+"): + _, _, source_url = source_url.partition("+") + + # VCS URL like: https+//github.com/graphql-java-kickstart/graphql-java-servlet.git + if source_url.startswith("https+//"): + # convert https+// to https:// + source_url = source_url.replace("https+//", "https://") + + if ( + source_url.startswith("git+https://") or source_url.startswith("git://") + ) and "@" in source_url: + # remove the commit from the end of the URL + source_url, _, _ = source_url.rpartition("@") + + # remove .git from the end of the URL + if source_url.endswith(".git"): + source_url, _, _ = source_url.rpartition(".git") + + # git:: URLs + if source_url.startswith("git://"): + # remove git:// from the beginning of the URL + _, _, source_url = source_url.partition("git://") + if ":" in source_url: + # convert : to / + source_url = source_url.replace(":", "/") + source_url = f"https://{source_url}" + + urlparse_result = urlparse(source_url) + + path_segments = urlparse_result.path.split("/") + if not len(path_segments) > 2: + continue + + namespace = path_segments[1] + name = path_segments[2] + if not name: + continue + + for url_hint in url_hints: + if url_hint in urlparse_result.netloc: + yield PackageURL( + type=url_hint, + namespace=namespace, + name=name, + ) def get_urls_from_package_resources(package): diff --git a/purl2vcs/tests/test_find_source_repo.py b/purl2vcs/tests/test_find_source_repo.py index dab6fc3e..38bce2a2 100644 --- a/purl2vcs/tests/test_find_source_repo.py +++ b/purl2vcs/tests/test_find_source_repo.py @@ -15,18 +15,18 @@ from django.test import TestCase from packageurl import PackageURL -from purl2vcs.find_source_repo import convert_repo_urls_to_purls -from purl2vcs.find_source_repo import fetch_response -from purl2vcs.find_source_repo import get_repo_urls -from purl2vcs.find_source_repo import get_source_repo -from purl2vcs.find_source_repo import get_source_urls_from_package_data_and_resources -from purl2vcs.find_source_repo import get_tag_and_commit -from purl2vcs.find_source_repo import get_tags_and_commits -from purl2vcs.find_source_repo import get_urls_from_package_data -from purl2vcs.find_source_repo import get_urls_from_package_resources -from packagedb.models import Package -from packagedb.models import PackageContentType -from packagedb.models import Resource +from packagedb.models import Package, PackageContentType, Resource +from purl2vcs.find_source_repo import ( + convert_repo_urls_to_purls, + fetch_response, + get_repo_urls, + get_source_repo, + get_source_urls_from_package_data_and_resources, + get_tag_and_commit, + get_tags_and_commits, + get_urls_from_package_data, + get_urls_from_package_resources, +) BASE_DIR = os.path.dirname(os.path.abspath(__file__)) TEST_DATA = os.path.join(BASE_DIR, "testfiles", "find_source_repo", "tags_commits.txt") From cf70e1bdae89977ea56a7472e239dad8df7657a6 Mon Sep 17 00:00:00 2001 From: Philippe Ombredanne Date: Mon, 6 May 2024 15:41:12 +0200 Subject: [PATCH 03/10] Make purl2vcs tests pass #374 Reference: https://github.com/nexB/purldb/issues/374 Signed-off-by: Philippe Ombredanne --- purl2vcs/find_source_repo.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/purl2vcs/find_source_repo.py b/purl2vcs/find_source_repo.py index f060685d..16730464 100644 --- a/purl2vcs/find_source_repo.py +++ b/purl2vcs/find_source_repo.py @@ -156,6 +156,7 @@ def get_source_repo_and_add_to_package_set(): source_package = Package.objects.for_package_url( purl_str=str(source_purl) ).get_or_none() + if not source_package: source_package, _created = Package.objects.get_or_create( type=source_purl.type, @@ -291,12 +292,12 @@ def convert_repo_url_to_purls(source_url): path_segments = urlparse_result.path.split("/") if not len(path_segments) > 2: - continue + return namespace = path_segments[1] name = path_segments[2] if not name: - continue + return for url_hint in url_hints: if url_hint in urlparse_result.netloc: From 9b1c7d930e4acbc2876e1264739ede55fcce344f Mon Sep 17 00:00:00 2001 From: Philippe Ombredanne Date: Mon, 6 May 2024 15:46:31 +0200 Subject: [PATCH 04/10] Streamline and update main purldb version Signed-off-by: Philippe Ombredanne --- CHANGELOG.rst | 4 ++-- setup.cfg | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 2e1e556c..25e02745 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -9,10 +9,11 @@ Next Release v4.0.0 ------------- +--------- - Add `/api/docs` Swagger API documentation for API endpoints. + v3.0.0 ------- @@ -28,7 +29,6 @@ This is a major release with major API changes - This is now using the latest version of ScanCode toolkit - v2.0.0 ------ diff --git a/setup.cfg b/setup.cfg index 08f6e82a..7cf6020d 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = purldb -version = 2.0.0 +version = 4.0.0 license_files = LICENSE AUTHORS.rst From dc5ae1648370b32f5a06f484fe4619ec5d0f81a8 Mon Sep 17 00:00:00 2001 From: Philippe Ombredanne Date: Mon, 6 May 2024 15:55:11 +0200 Subject: [PATCH 05/10] Remove cruft from purl2vcs pyproject #374 Reference: https://github.com/nexB/purldb/issues/374 Signed-off-by: Philippe Ombredanne --- pyproject-purl2vcs.toml | 151 +++++----------------------------------- 1 file changed, 16 insertions(+), 135 deletions(-) diff --git a/pyproject-purl2vcs.toml b/pyproject-purl2vcs.toml index 1edc4bfd..e5ce6c94 100644 --- a/pyproject-purl2vcs.toml +++ b/pyproject-purl2vcs.toml @@ -5,7 +5,7 @@ build-backend = "flot.buildapi" [project] name = "purl2vcs" # keep version same as purldb for now -version = "2.0.0" +version = "1.0.0" description = "purl2vcs is an add-on library working with the PurlDB to find the version control system (VCS) URL of a package and detect the commit, tags and path for a given version." readme = "purl2vcs/README.rst" license = { text = "Apache-2.0" } @@ -47,7 +47,6 @@ classifiers = [ "SBOM", ] - dependencies = [ # get packagedb, purldb, minecode and related module for now from this single package "purldb >= 2.0.0", @@ -60,9 +59,23 @@ dependencies = [ Homepage = "https://github.com/nexB/purldb" +[project.optional-dependencies] + +testing = [ + "flot", + "pytest >= 6, != 7.0.0", + "pytest-xdist >= 2", + "aboutcode-toolkit >= 7.0.2", + "pycodestyle >= 2.8.0", + "twine", + "black", + "isort", + "pytest-rerunfailures", + "bump-my-version", +] [tool.bumpversion] -current_version = "2.0.0" +current_version = "1.0.0" allow_dirty = true files = [ @@ -107,135 +120,3 @@ sdist_extra_includes = [ "NOTICE", "configure*", ] - - -[tool.isort] -force_single_line = "True" -line_length = 88 -sections = "FUTURE,STDLIB,THIRDPARTY,FIRSTPARTY,LOCALFOLDER" -skip = "doc,venv,tmp,thirdparty,build,dist" - - -[project.scripts] -scancode = "scancode.cli:scancode" -scancode-reindex-licenses = "licensedcode.reindex:reindex_licenses" -scancode-license-data = "licensedcode.license_db:dump_scancode_license_data" -regen-package-docs = "packagedcode.regen_package_docs:regen_package_docs" - - -# These are configurations for ScanCode plugins as entry points. -# Each plugin entry hast this form: -# plugin-name = f"ully.qualified.module:PluginClass" -# where plugin-name must be a unique arbitrary name for this entrypoint. - -# scancode_pre_scan is the entry point for pre_scan plugins executed before the -# scans. See also plugincode.pre_scan module for details and doc. -[project.entry-points.scancode_pre_scan] -ignore = "scancode.plugin_ignore:ProcessIgnore" -facet = "summarycode.facet:AddFacet" - - -# scancode_scan is the entry point for scan plugins that run a scan after the -# pre_scan plugins and before the post_scan plugins. See also plugincode.scan -# module for details and doc. -[project.entry-points.scancode_scan] -info = "scancode.plugin_info:InfoScanner" -licenses = "licensedcode.plugin_license:LicenseScanner" -copyrights = "cluecode.plugin_copyright:CopyrightScanner" -packages = "packagedcode.plugin_package:PackageScanner" -emails = "cluecode.plugin_email:EmailScanner" -urls = "cluecode.plugin_url:UrlScanner" -generated = "summarycode.generated:GeneratedCodeDetector" - - -# scancode_post_scan is the entry point for post_scan plugins executed after the -# scan plugins and before the output plugins. See also plugincode.post_scan -# module for details and doc. -[project.entry-points.scancode_post_scan] -summary = "summarycode.summarizer:ScanSummary" -tallies = "summarycode.tallies:Tallies" -tallies-with-details = "summarycode.tallies:TalliesWithDetails" -tallies-key-files = "summarycode.tallies:KeyFilesTallies" -tallies-by-facet = "summarycode.tallies:FacetTallies" -license-clarity-score = "summarycode.score:LicenseClarityScore" -license-policy = "licensedcode.plugin_license_policy:LicensePolicy" -mark-source = "scancode.plugin_mark_source:MarkSource" -filter-clues = "cluecode.plugin_filter_clues:RedundantCluesFilter" -consolidate = "summarycode.plugin_consolidate:Consolidator" -license-references = "licensedcode.licenses_reference:LicenseReference" -todo = "summarycode.todo:AmbiguousDetectionsToDoPlugin" -classify = "summarycode.classify_plugin:FileClassifier" - - -# scancode_output_filter is the entry point for filter plugins executed after -# the post-scan plugins and used by the output plugins to exclude/filter certain -# files or directories from the codebase. See also plugincode.post_scan module -# for details and doc. -[project.entry-points.scancode_output_filter] -only-findings = "scancode.plugin_only_findings:OnlyFindings" -ignore-copyrights = "cluecode.plugin_ignore_copyrights:IgnoreCopyrights" - - -# scancode_output is the entry point for output plugins that write a scan output -# in a given format at the end of a scan. See also plugincode._output module for -# details and doc. -[project.entry-points.scancode_output] -html = "formattedcode.output_html:HtmlOutput" -html-app = "formattedcode.output_html:HtmlAppOutput" -json = "formattedcode.output_json:JsonCompactOutput" -json-pp = "formattedcode.output_json:JsonPrettyOutput" -spdx-tv = "formattedcode.output_spdx:SpdxTvOutput" -spdx-rdf = "formattedcode.output_spdx:SpdxRdfOutput" -csv = "formattedcode.output_csv:CsvOutput" -jsonlines = "formattedcode.output_jsonlines:JsonLinesOutput" -template = "formattedcode.output_html:CustomTemplateOutput" -debian = "formattedcode.output_debian:DebianCopyrightOutput" -yaml = "formattedcode.output_yaml:YamlOutput" -cyclonedx = "formattedcode.output_cyclonedx:CycloneDxJsonOutput" -cyclonedx-xml = "formattedcode.output_cyclonedx:CycloneDxXmlOutput" - - -[tool.pytest.ini_options] -norecursedirs = [ - ".git", - "bin", - "dist", - "build", - "_build", - "dist", - "etc", - "local", - "ci", - "docs", - "man", - "share", - "samples", - ".cache", - ".settings", - "Include", - "include", - "Lib", - "lib", - "lib64", - "Lib64", - "Scripts", - "thirdparty", - "tmp", - "venv", - "tests/data", - ".eggs", - "src/*/data", - "tests/*/data", - "src/licensedcode/_vendor" -] - -python_files = "*.py" - -python_classes = "Test" -python_functions = "test" - -addopts = [ - "-rfExXw", - "--strict-markers", - "--doctest-modules" -] From fbe3e7dbc75866a3a240822eb46a79cdcb69e1b9 Mon Sep 17 00:00:00 2001 From: Philippe Ombredanne Date: Mon, 6 May 2024 23:19:30 +0200 Subject: [PATCH 06/10] Prepare move to purl2vcs dir #374 Reference: https://github.com/nexB/purldb/issues/374 Signed-off-by: Philippe Ombredanne --- purl2vcs/{ => src/purl2vcs}/__init__.py | 0 .../{ => src/purl2vcs}/find_source_repo.py | 0 pyproject-purl2vcs.toml | 24 +++++++------------ 3 files changed, 9 insertions(+), 15 deletions(-) rename purl2vcs/{ => src/purl2vcs}/__init__.py (100%) rename purl2vcs/{ => src/purl2vcs}/find_source_repo.py (100%) diff --git a/purl2vcs/__init__.py b/purl2vcs/src/purl2vcs/__init__.py similarity index 100% rename from purl2vcs/__init__.py rename to purl2vcs/src/purl2vcs/__init__.py diff --git a/purl2vcs/find_source_repo.py b/purl2vcs/src/purl2vcs/find_source_repo.py similarity index 100% rename from purl2vcs/find_source_repo.py rename to purl2vcs/src/purl2vcs/find_source_repo.py diff --git a/pyproject-purl2vcs.toml b/pyproject-purl2vcs.toml index e5ce6c94..8268efac 100644 --- a/pyproject-purl2vcs.toml +++ b/pyproject-purl2vcs.toml @@ -7,7 +7,7 @@ name = "purl2vcs" # keep version same as purldb for now version = "1.0.0" description = "purl2vcs is an add-on library working with the PurlDB to find the version control system (VCS) URL of a package and detect the commit, tags and path for a given version." -readme = "purl2vcs/README.rst" +readme = "README.rst" license = { text = "Apache-2.0" } requires-python = ">=3.8" authors = [ @@ -29,10 +29,7 @@ keywords = [ "sca", ] -classifiers = [ "SBOM", - "spdx", - "cyclonedx", - +classifiers = [ "Development Status :: 5 - Production/Stable", "Intended Audience :: Developers", "Programming Language :: Python :: 3", @@ -79,16 +76,16 @@ current_version = "1.0.0" allow_dirty = true files = [ - { filename = "pyproject-purl2vcs.toml" }, + { filename = "pyproject.toml" }, ] [tool.flot] +wheel_path_prefixes_to_strip = [ "src" ] includes = [ - "purl2vcs/__init__.py", - "purl2vcs/find_source_repo.py", + "src/**/*", ] excludes = [ @@ -107,16 +104,13 @@ excludes = [ metadata_files = [ "apache-2.0.LICENSE", "NOTICE", - "purl2vcs/CHANGELOG.rst", - "purl2vcs/README.rst", + "CHANGELOG.rst", + "README.rst", "CODE_OF_CONDUCT.rst", ] -editable_paths = [ "purl2vcs", "purl2vcs/tests" ] +editable_paths = [ "src", "tests" ] sdist_extra_includes = [ - "purl2vcs/tests", - ".gitignore", - "NOTICE", - "configure*", + "tests/**/*", ] From 5ff8f4e13362a5835d314657c4d212790cf337e9 Mon Sep 17 00:00:00 2001 From: Philippe Ombredanne Date: Mon, 6 May 2024 23:20:59 +0200 Subject: [PATCH 07/10] Move all to purl2vcs dir #374 Reference: https://github.com/nexB/purldb/issues/374 Signed-off-by: Philippe Ombredanne --- purl2vcs/CODE_OF_CONDUCT.rst | 1 + purl2vcs/NOTICE | 1 + purl2vcs/apache-2.0.LICENSE | 1 + pyproject-purl2vcs.toml => purl2vcs/pyproject.toml | 0 4 files changed, 3 insertions(+) create mode 120000 purl2vcs/CODE_OF_CONDUCT.rst create mode 120000 purl2vcs/NOTICE create mode 120000 purl2vcs/apache-2.0.LICENSE rename pyproject-purl2vcs.toml => purl2vcs/pyproject.toml (100%) diff --git a/purl2vcs/CODE_OF_CONDUCT.rst b/purl2vcs/CODE_OF_CONDUCT.rst new file mode 120000 index 00000000..74bfe5d1 --- /dev/null +++ b/purl2vcs/CODE_OF_CONDUCT.rst @@ -0,0 +1 @@ +../CODE_OF_CONDUCT.rst \ No newline at end of file diff --git a/purl2vcs/NOTICE b/purl2vcs/NOTICE new file mode 120000 index 00000000..7e1b82f6 --- /dev/null +++ b/purl2vcs/NOTICE @@ -0,0 +1 @@ +../NOTICE \ No newline at end of file diff --git a/purl2vcs/apache-2.0.LICENSE b/purl2vcs/apache-2.0.LICENSE new file mode 120000 index 00000000..cb11df30 --- /dev/null +++ b/purl2vcs/apache-2.0.LICENSE @@ -0,0 +1 @@ +../apache-2.0.LICENSE \ No newline at end of file diff --git a/pyproject-purl2vcs.toml b/purl2vcs/pyproject.toml similarity index 100% rename from pyproject-purl2vcs.toml rename to purl2vcs/pyproject.toml From cdcdac43fcbe9b513ca6bc26bd2cf42e54433b8e Mon Sep 17 00:00:00 2001 From: Philippe Ombredanne Date: Mon, 6 May 2024 23:42:36 +0200 Subject: [PATCH 08/10] Add purl2vcs to configure #374 Reference: https://github.com/nexB/purldb/issues/374 Signed-off-by: Philippe Ombredanne --- configure | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/configure b/configure index ea0619c0..54feadc3 100755 --- a/configure +++ b/configure @@ -30,9 +30,9 @@ CLI_ARGS=$1 CUSTOM_PACKAGES="" # Requirement arguments passed to pip and used by default or with --dev. -REQUIREMENTS="$CUSTOM_PACKAGES --editable purldb-toolkit/ --editable . --constraint requirements.txt" -DEV_REQUIREMENTS="$CUSTOM_PACKAGES --editable purldb-toolkit/[testing] --editable .[testing] --constraint requirements.txt --constraint requirements-dev.txt" -DOCS_REQUIREMENTS="$CUSTOM_PACKAGES --editable purldb-toolkit/ --editable .[docs] --constraint requirements.txt" +REQUIREMENTS="$CUSTOM_PACKAGES --editable purldb-toolkit/ --editable purl2vcs/ --editable . --constraint requirements.txt" +DEV_REQUIREMENTS="$CUSTOM_PACKAGES --editable purldb-toolkit/[testing] --editable .[testing] --editable purl2vcs/[testing] --constraint requirements.txt --constraint requirements-dev.txt" +DOCS_REQUIREMENTS="$CUSTOM_PACKAGES --editable purldb-toolkit/ --editable purl2vcs/ --editable .[docs] --constraint requirements.txt" # where we create a virtualenv VIRTUALENV_DIR=venv From b487844043c459fab75426b7c8eff3e9ada818a5 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Thu, 9 May 2024 14:22:32 +0530 Subject: [PATCH 09/10] Fix dependencies #374 Signed-off-by: Tushar Goel --- purl2vcs/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/purl2vcs/pyproject.toml b/purl2vcs/pyproject.toml index 8268efac..58b0e493 100644 --- a/purl2vcs/pyproject.toml +++ b/purl2vcs/pyproject.toml @@ -46,7 +46,7 @@ classifiers = [ dependencies = [ # get packagedb, purldb, minecode and related module for now from this single package - "purldb >= 2.0.0", + "purldb", "requests >= 2.7.0", "scancode-toolkit >= 32.0.0", "packageurl_python >= 0.9.0", From a310e9ff6dd955e31b7b87fef82fb6de920d8e14 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Thu, 9 May 2024 14:27:20 +0530 Subject: [PATCH 10/10] Remove docs from configure #374 Signed-off-by: Tushar Goel --- configure | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure b/configure index 54feadc3..906137be 100755 --- a/configure +++ b/configure @@ -32,7 +32,7 @@ CUSTOM_PACKAGES="" # Requirement arguments passed to pip and used by default or with --dev. REQUIREMENTS="$CUSTOM_PACKAGES --editable purldb-toolkit/ --editable purl2vcs/ --editable . --constraint requirements.txt" DEV_REQUIREMENTS="$CUSTOM_PACKAGES --editable purldb-toolkit/[testing] --editable .[testing] --editable purl2vcs/[testing] --constraint requirements.txt --constraint requirements-dev.txt" -DOCS_REQUIREMENTS="$CUSTOM_PACKAGES --editable purldb-toolkit/ --editable purl2vcs/ --editable .[docs] --constraint requirements.txt" +DOCS_REQUIREMENTS="$CUSTOM_PACKAGES --editable purldb-toolkit/ --editable .[docs] --constraint requirements.txt" # where we create a virtualenv VIRTUALENV_DIR=venv