diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 2e1e556c..25e02745 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -9,10 +9,11 @@ Next Release v4.0.0 ------------- +--------- - Add `/api/docs` Swagger API documentation for API endpoints. + v3.0.0 ------- @@ -28,7 +29,6 @@ This is a major release with major API changes - This is now using the latest version of ScanCode toolkit - v2.0.0 ------ diff --git a/Makefile b/Makefile index afed4013..691ba6fc 100644 --- a/Makefile +++ b/Makefile @@ -130,6 +130,7 @@ test: ${ACTIVATE} DJANGO_SETTINGS_MODULE=purldb_project.settings ${PYTHON_EXE} -m pytest -vvs packagedb/tests/test_throttling.py ${ACTIVATE} DJANGO_SETTINGS_MODULE=matchcode_project.settings ${PYTHON_EXE} -m pytest -vvs matchcode_pipeline ${ACTIVATE} ${PYTHON_EXE} -m pytest -vvs purldb-toolkit/ + ${ACTIVATE} DJANGO_SETTINGS_MODULE=purldb_project.settings ${PYTHON_EXE} -m pytest -vvs purl2vcs shell: ${MANAGE} shell diff --git a/configure b/configure index ea0619c0..906137be 100755 --- a/configure +++ b/configure @@ -30,8 +30,8 @@ CLI_ARGS=$1 CUSTOM_PACKAGES="" # Requirement arguments passed to pip and used by default or with --dev. -REQUIREMENTS="$CUSTOM_PACKAGES --editable purldb-toolkit/ --editable . --constraint requirements.txt" -DEV_REQUIREMENTS="$CUSTOM_PACKAGES --editable purldb-toolkit/[testing] --editable .[testing] --constraint requirements.txt --constraint requirements-dev.txt" +REQUIREMENTS="$CUSTOM_PACKAGES --editable purldb-toolkit/ --editable purl2vcs/ --editable . --constraint requirements.txt" +DEV_REQUIREMENTS="$CUSTOM_PACKAGES --editable purldb-toolkit/[testing] --editable .[testing] --editable purl2vcs/[testing] --constraint requirements.txt --constraint requirements-dev.txt" DOCS_REQUIREMENTS="$CUSTOM_PACKAGES --editable purldb-toolkit/ --editable .[docs] --constraint requirements.txt" # where we create a virtualenv diff --git a/packagedb/from_purl.py b/packagedb/from_purl.py index 1e0c7b5b..0706ac8a 100644 --- a/packagedb/from_purl.py +++ b/packagedb/from_purl.py @@ -14,8 +14,8 @@ from rest_framework import viewsets from rest_framework.response import Response -from packagedb.find_source_repo import get_package_object_from_purl -from packagedb.find_source_repo import get_source_repo +from purl2vcs.find_source_repo import get_package_object_from_purl +from purl2vcs.find_source_repo import get_source_repo from packagedb.serializers import PurltoGitRepoResponseSerializer from packagedb.serializers import PurltoGitRepoSerializer diff --git a/packagedb/management/commands/create_source_repo_packages.py b/packagedb/management/commands/create_source_repo_packages.py index 70617278..ba15c4bd 100644 --- a/packagedb/management/commands/create_source_repo_packages.py +++ b/packagedb/management/commands/create_source_repo_packages.py @@ -15,8 +15,8 @@ from minecode.management.commands import VerboseCommand from minecode.model_utils import add_package_to_scan_queue -from packagedb.find_source_repo import add_source_package_to_package_set -from packagedb.find_source_repo import get_package_object_from_purl +from purl2vcs.find_source_repo import add_source_package_to_package_set +from purl2vcs.find_source_repo import get_package_object_from_purl from packagedb.models import Package from packagedb.models import PackageContentType diff --git a/packagedb/management/commands/get_source_download_url.py b/packagedb/management/commands/get_source_download_url.py index 17d7cc8a..8a0c1ba4 100644 --- a/packagedb/management/commands/get_source_download_url.py +++ b/packagedb/management/commands/get_source_download_url.py @@ -11,7 +11,7 @@ import sys from minecode.management.commands import VerboseCommand -from packagedb.find_source_repo import get_source_repo_and_add_to_package_set +from purl2vcs.find_source_repo import get_source_repo_and_add_to_package_set TRACE = False diff --git a/purl2vcs/CHANGELOG.rst b/purl2vcs/CHANGELOG.rst new file mode 100644 index 00000000..fd6fa1ea --- /dev/null +++ b/purl2vcs/CHANGELOG.rst @@ -0,0 +1,8 @@ +Changelog +========= + + +v2.0.0 +------ + +Initial release. diff --git a/purl2vcs/CODE_OF_CONDUCT.rst b/purl2vcs/CODE_OF_CONDUCT.rst new file mode 120000 index 00000000..74bfe5d1 --- /dev/null +++ b/purl2vcs/CODE_OF_CONDUCT.rst @@ -0,0 +1 @@ +../CODE_OF_CONDUCT.rst \ No newline at end of file diff --git a/purl2vcs/NOTICE b/purl2vcs/NOTICE new file mode 120000 index 00000000..7e1b82f6 --- /dev/null +++ b/purl2vcs/NOTICE @@ -0,0 +1 @@ +../NOTICE \ No newline at end of file diff --git a/purl2vcs/README.rst b/purl2vcs/README.rst new file mode 100644 index 00000000..7e723333 --- /dev/null +++ b/purl2vcs/README.rst @@ -0,0 +1,69 @@ +purl2vcs +========== + +purl2vcs is an add-on library working with the PurlDB to find the version control system (VCS) URL of a package and detect the commit, and tags for a given version. + +In the future, it will also find paths and branches, which is useful for monorepos. + +Usage +------- + +- First, import the main module: ``from purl2vcs import find_source_repo`` + +- To use the functions you first need to acquire some Package objects: + Use the ``get_package_object_from_purl(package_url)`` passing a PURL string to get an object from the database + +- To find the source repository of a Package, call `get_source_repo(package)` + to will get a PackageURL object back. + +- To generate all the source repository URLs of a Package, call `get_repo_urls(package)`. + +- To convert a single source repo URLs to PURLs, call ``convert_repo_url_to_purls`` +- To convert a list of source repo URLs to PURLs, call ``convert_repo_urls_to_purls`` + +- To find the commit or tags from a source repo PURL use ``get_tags_and_commits`` + +- The low level ``get_tags_and_commits`` is used in ``find_package_version_tag_and_commit`` to find the tag and commit of a given package ``version`` in a source repo PURL. + + +Installation +------------ + +Requirements +############ + +* install purldb dependencies +* `pip install purl2vcs` + + +Funding +------- + +This project was funded through the NGI Assure Fund https://nlnet.nl/assure, a +fund established by NLnet https://nlnet.nl/ with financial support from the +European Commission's Next Generation Internet programme, under the aegis of DG +Communications Networks, Content and Technology under grant agreement No 957073. + +This project is also funded through grants from the Google Summer of Code +program, continuing support and sponsoring from nexB Inc. and generous +donations from multiple sponsors. + + +License +------- + +Copyright (c) nexB Inc. and others. All rights reserved. + +purldb is a trademark of nexB Inc. + +SPDX-License-Identifier: Apache-2.0 + +pur2vcs is licensed under the Apache License version 2.0. + +See https://www.apache.org/licenses/LICENSE-2.0 for the license text. + +See https://creativecommons.org/licenses/by-sa/4.0/legalcode for the license text. + +See https://github.com/nexB/purldb for support or download. + +See https://aboutcode.org for more information about nexB OSS projects. diff --git a/purl2vcs/apache-2.0.LICENSE b/purl2vcs/apache-2.0.LICENSE new file mode 120000 index 00000000..cb11df30 --- /dev/null +++ b/purl2vcs/apache-2.0.LICENSE @@ -0,0 +1 @@ +../apache-2.0.LICENSE \ No newline at end of file diff --git a/purl2vcs/pyproject.toml b/purl2vcs/pyproject.toml new file mode 100644 index 00000000..58b0e493 --- /dev/null +++ b/purl2vcs/pyproject.toml @@ -0,0 +1,116 @@ +[build-system] +requires = [ "flot>=0.7.0" ] +build-backend = "flot.buildapi" + +[project] +name = "purl2vcs" +# keep version same as purldb for now +version = "1.0.0" +description = "purl2vcs is an add-on library working with the PurlDB to find the version control system (VCS) URL of a package and detect the commit, tags and path for a given version." +readme = "README.rst" +license = { text = "Apache-2.0" } +requires-python = ">=3.8" +authors = [ + { name = "nexB. Inc. and others", email = "info@aboutcode.org" }, +] +keywords = [ + "package-url", + "purl", + "git", + "version", + "hg", + "svn", + "tag", + "commit", + "open source", + "scan", + "package", + "dependency", + "sca", +] + +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Intended Audience :: Developers", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Topic :: Software Development", + "Topic :: Utilities", +] + + +dependencies = [ + # get packagedb, purldb, minecode and related module for now from this single package + "purldb", + "requests >= 2.7.0", + "scancode-toolkit >= 32.0.0", + "packageurl_python >= 0.9.0", +] + +[project.urls] +Homepage = "https://github.com/nexB/purldb" + + +[project.optional-dependencies] + +testing = [ + "flot", + "pytest >= 6, != 7.0.0", + "pytest-xdist >= 2", + "aboutcode-toolkit >= 7.0.2", + "pycodestyle >= 2.8.0", + "twine", + "black", + "isort", + "pytest-rerunfailures", + "bump-my-version", +] + +[tool.bumpversion] +current_version = "1.0.0" +allow_dirty = true + +files = [ + { filename = "pyproject.toml" }, + +] + + +[tool.flot] +wheel_path_prefixes_to_strip = [ "src" ] + +includes = [ + "src/**/*", +] + +excludes = [ + # Python compiled files + "**/*.py[cod]", + "**/*.egg-info", + # Various junk and temp files + "**/.DS_Store", + "**/*~", + "**/.*.sw[po]", + "**/.ve", + "**/*.bak", + "**/.ipynb_checkpoints", +] + +metadata_files = [ + "apache-2.0.LICENSE", + "NOTICE", + "CHANGELOG.rst", + "README.rst", + "CODE_OF_CONDUCT.rst", +] + +editable_paths = [ "src", "tests" ] + +sdist_extra_includes = [ + "tests/**/*", +] diff --git a/purl2vcs/src/purl2vcs/__init__.py b/purl2vcs/src/purl2vcs/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/packagedb/find_source_repo.py b/purl2vcs/src/purl2vcs/find_source_repo.py similarity index 87% rename from packagedb/find_source_repo.py rename to purl2vcs/src/purl2vcs/find_source_repo.py index 62ebabe5..16730464 100644 --- a/packagedb/find_source_repo.py +++ b/purl2vcs/src/purl2vcs/find_source_repo.py @@ -9,22 +9,18 @@ import logging import subprocess -from typing import Generator -from typing import List +from typing import Generator, List from urllib.parse import urlparse import requests from packageurl import PackageURL from packageurl.contrib.django.utils import purl_to_lookups -from packageurl.contrib.purl2url import get_download_url -from packageurl.contrib.purl2url import purl2url +from packageurl.contrib.purl2url import get_download_url, purl2url from scancode.api import get_urls as get_urls_from_location from minecode.model_utils import add_package_to_scan_queue from minecode.visitors.maven import get_merged_ancestor_package_from_maven_package -from packagedb.models import Package -from packagedb.models import PackageContentType -from packagedb.models import PackageSet +from packagedb.models import Package, PackageContentType, PackageSet logger = logging.getLogger(__name__) @@ -160,6 +156,7 @@ def get_source_repo_and_add_to_package_set(): source_package = Package.objects.for_package_url( purl_str=str(source_purl) ).get_or_none() + if not source_package: source_package, _created = Package.objects.get_or_create( type=source_purl.type, @@ -248,53 +245,67 @@ def get_source_urls_from_package_data_and_resources(package: Package) -> List[st def convert_repo_urls_to_purls(source_urls): """ - Convert a source URL to a purl + Yield PURLs from a list from a list of source repository URLs. + """ + for source_url in source_urls or []: + yield from convert_repo_url_to_purls(source_url) + + +def convert_repo_url_to_purls(source_url): + """ + Yield PURLs from a single source repository URL. """ url_hints = [ "github", "gitlab", "bitbucket", ] - if not source_urls: + # URL like: git@github.com+https://github.com/graphql-java/java-dataloader.git + if source_url.startswith("git@github.com+"): + _, _, source_url = source_url.partition("+") + + # VCS URL like: https+//github.com/graphql-java-kickstart/graphql-java-servlet.git + if source_url.startswith("https+//"): + # convert https+// to https:// + source_url = source_url.replace("https+//", "https://") + + if ( + source_url.startswith("git+https://") or source_url.startswith("git://") + ) and "@" in source_url: + # remove the commit from the end of the URL + source_url, _, _ = source_url.rpartition("@") + + # remove .git from the end of the URL + if source_url.endswith(".git"): + source_url, _, _ = source_url.rpartition(".git") + + # git:: URLs + if source_url.startswith("git://"): + # remove git:// from the beginning of the URL + _, _, source_url = source_url.partition("git://") + if ":" in source_url: + # convert : to / + source_url = source_url.replace(":", "/") + source_url = f"https://{source_url}" + + urlparse_result = urlparse(source_url) + + path_segments = urlparse_result.path.split("/") + if not len(path_segments) > 2: return - for source_url in source_urls: - # git@github.com+https://github.com/graphql-java/java-dataloader.git - if source_url.startswith("git@github.com+"): - _, _, source_url = source_url.partition("+") - # https+//github.com/graphql-java-kickstart/graphql-java-servlet.git - if source_url.startswith("https+//"): - # convert https+// to https:// - source_url = source_url.replace("https+//", "https://") - if ( - source_url.startswith("git+https://") or source_url.startswith("git://") - ) and "@" in source_url: - # remove the commit from the end of the URL - source_url, _, _ = source_url.rpartition("@") - # remove .git from the end of the URL - if source_url.endswith(".git"): - source_url, _, _ = source_url.rpartition(".git") - if source_url.startswith("git://"): - # remove git:// from the beginning of the URL - _, _, source_url = source_url.partition("git://") - if ":" in source_url: - # convert : to / - source_url = source_url.replace(":", "/") - source_url = f"https://{source_url}" - urlparse_result = urlparse(source_url) - path_segments = urlparse_result.path.split("/") - if not len(path_segments) > 2: - continue - namespace = path_segments[1] - name = path_segments[2] - if not name: - continue - for url_hint in url_hints: - if url_hint in urlparse_result.netloc: - yield PackageURL( - type=url_hint, - namespace=namespace, - name=name, - ) + + namespace = path_segments[1] + name = path_segments[2] + if not name: + return + + for url_hint in url_hints: + if url_hint in urlparse_result.netloc: + yield PackageURL( + type=url_hint, + namespace=namespace, + name=name, + ) def get_urls_from_package_resources(package): diff --git a/packagedb/tests/test_find_source_repo.py b/purl2vcs/tests/test_find_source_repo.py similarity index 90% rename from packagedb/tests/test_find_source_repo.py rename to purl2vcs/tests/test_find_source_repo.py index 2268f4eb..38bce2a2 100644 --- a/packagedb/tests/test_find_source_repo.py +++ b/purl2vcs/tests/test_find_source_repo.py @@ -15,18 +15,18 @@ from django.test import TestCase from packageurl import PackageURL -from packagedb.find_source_repo import convert_repo_urls_to_purls -from packagedb.find_source_repo import fetch_response -from packagedb.find_source_repo import get_repo_urls -from packagedb.find_source_repo import get_source_repo -from packagedb.find_source_repo import get_source_urls_from_package_data_and_resources -from packagedb.find_source_repo import get_tag_and_commit -from packagedb.find_source_repo import get_tags_and_commits -from packagedb.find_source_repo import get_urls_from_package_data -from packagedb.find_source_repo import get_urls_from_package_resources -from packagedb.models import Package -from packagedb.models import PackageContentType -from packagedb.models import Resource +from packagedb.models import Package, PackageContentType, Resource +from purl2vcs.find_source_repo import ( + convert_repo_urls_to_purls, + fetch_response, + get_repo_urls, + get_source_repo, + get_source_urls_from_package_data_and_resources, + get_tag_and_commit, + get_tags_and_commits, + get_urls_from_package_data, + get_urls_from_package_resources, +) BASE_DIR = os.path.dirname(os.path.abspath(__file__)) TEST_DATA = os.path.join(BASE_DIR, "testfiles", "find_source_repo", "tags_commits.txt") @@ -123,8 +123,8 @@ def setUp(self): download_url="https://repo1.maven.org/maven2/com/foo/bar/11/bar.11.jar", ) - @mock.patch("packagedb.find_source_repo.fetch_response") - @mock.patch("packagedb.find_source_repo.get_urls_from_text") + @mock.patch("purl2vcs.find_source_repo.fetch_response") + @mock.patch("purl2vcs.find_source_repo.get_urls_from_text") def test_get_source_purl_from_package_data(self, mock_text, mock_response): mock_response.side_effect = [ None, @@ -154,9 +154,9 @@ def test_get_source_purl_from_package_resources(self): "https://bitbucket.org/connect2id/oauth-2.0-sdk-with-openid-connect-extensions/tree/master/oauth-oidc-sdk/src/main/resources/META-INF/MANIFEST.MF", ] - @mock.patch("packagedb.find_source_repo.get_urls_from_package_data") + @mock.patch("purl2vcs.find_source_repo.get_urls_from_package_data") @mock.patch( - "packagedb.find_source_repo.get_merged_ancestor_package_from_maven_package" + "purl2vcs.find_source_repo.get_merged_ancestor_package_from_maven_package" ) def test_get_source_purl_from_package_data_and_resources(self, mock1, mock2): mock1.return_value = None @@ -179,9 +179,9 @@ def test_get_source_purl_from_package_data_and_resources(self, mock1, mock2): "https://bitbucket.org/connect2id/oauth-2.0-sdk-with-openid-connect-extensions", ] - @mock.patch("packagedb.find_source_repo.get_urls_from_package_data") + @mock.patch("purl2vcs.find_source_repo.get_urls_from_package_data") @mock.patch( - "packagedb.find_source_repo.get_merged_ancestor_package_from_maven_package" + "purl2vcs.find_source_repo.get_merged_ancestor_package_from_maven_package" ) def test_get_repo_urls(self, mock1, mock2): mock1.return_value = None @@ -279,7 +279,7 @@ def test_convert_repo_urls_to_purls(self): ] def test_get_tags_commits(self): - with patch("packagedb.find_source_repo.fetch_response"): + with patch("purl2vcs.find_source_repo.fetch_response"): with patch("subprocess.getoutput") as mock_popen: mock_popen.return_value = open(TEST_DATA).read() with open(TAGS_COMMITS_FILE) as f: @@ -302,7 +302,7 @@ def test_get_tags_commits(self): ) == ("9.35", "fdc8117af75b192e3f8afcc0119c904b02686af8") def test_get_source_repo(self): - with patch("packagedb.find_source_repo.fetch_response"): + with patch("purl2vcs.find_source_repo.fetch_response"): with patch("subprocess.getoutput") as mock_popen: mock_popen.return_value = open(TEST_DATA).read() assert get_source_repo( @@ -316,7 +316,7 @@ def test_get_source_repo(self): subpath=None, ) - @mock.patch("packagedb.find_source_repo.requests.get") + @mock.patch("purl2vcs.find_source_repo.requests.get") def test_fetch_response(self, mock_get): mock_get.return_value.status_code = 200 mock_get.return_value.text = "abc" diff --git a/packagedb/tests/testfiles/find_source_repo/tags_commits.txt b/purl2vcs/tests/testfiles/find_source_repo/tags_commits.txt similarity index 100% rename from packagedb/tests/testfiles/find_source_repo/tags_commits.txt rename to purl2vcs/tests/testfiles/find_source_repo/tags_commits.txt diff --git a/packagedb/tests/testfiles/find_source_repo/tags_commits_list.txt b/purl2vcs/tests/testfiles/find_source_repo/tags_commits_list.txt similarity index 100% rename from packagedb/tests/testfiles/find_source_repo/tags_commits_list.txt rename to purl2vcs/tests/testfiles/find_source_repo/tags_commits_list.txt diff --git a/setup.cfg b/setup.cfg index 399f4157..7cf6020d 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = purldb -version = 2.0.0 +version = 4.0.0 license_files = LICENSE AUTHORS.rst @@ -77,6 +77,7 @@ testing = aboutcode-toolkit >= 6.0.0 black mock + flot docs = Sphinx>=5.0.2