From 66f225417f9c3d1ac7948503bd024878a03d4474 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Thu, 18 Apr 2024 00:59:52 +0530 Subject: [PATCH 1/5] Add support for addon_pipeline in /api/collect endpoint Signed-off-by: Keshav Priyadarshi --- packagedb/api.py | 61 +++++++++++++++++++++++++--------------- packagedb/models.py | 8 ++++-- packagedb/serializers.py | 35 +++++++++++++++++++++++ 3 files changed, 80 insertions(+), 24 deletions(-) diff --git a/packagedb/api.py b/packagedb/api.py index 03611733..8450c50c 100644 --- a/packagedb/api.py +++ b/packagedb/api.py @@ -17,6 +17,9 @@ from django_filters.filters import Filter from django_filters.filters import OrderingFilter from django_filters.rest_framework import FilterSet +from drf_spectacular.plumbing import build_array_type +from drf_spectacular.plumbing import build_basic_type +from drf_spectacular.types import OpenApiTypes from drf_spectacular.utils import OpenApiParameter from drf_spectacular.utils import extend_schema from packageurl import PackageURL @@ -50,6 +53,7 @@ from packagedb.package_managers import VERSION_API_CLASSES_BY_PACKAGE_TYPE from packagedb.package_managers import get_api_package_name from packagedb.package_managers import get_version_fetcher +from packagedb.serializers import CollectPackageSerializer from packagedb.serializers import DependentPackageSerializer from packagedb.serializers import IndexPackagesResponseSerializer from packagedb.serializers import IndexPackagesSerializer @@ -679,36 +683,44 @@ class CollectViewSet(viewsets.ViewSet): **Note:** Use `Index packages` for bulk indexing/reindexing of packages. """ - serializer_class=None + serializer_class=CollectPackageSerializer @extend_schema( parameters=[ - OpenApiParameter('purl', str, 'query', description='PackageURL'), - OpenApiParameter('source_purl', str, 'query', description='Source PackageURL', default=False), + OpenApiParameter('purl', str, 'query', description='PackageURL', required=True), + OpenApiParameter('source_purl', str, 'query', description='Source PackageURL'), + + # There is no OpenApiTypes.LIST https://github.com/tfranzel/drf-spectacular/issues/341 + OpenApiParameter( + 'addon_pipelines', + build_array_type(build_basic_type(OpenApiTypes.STR)), + 'query', description='Addon pipelines', + ), ], responses={200:PackageAPISerializer()}, ) def list(self, request, format=None): - purl = request.query_params.get('purl') - source_purl = request.query_params.get('source_purl', None) + serializer = self.serializer_class(data=request.query_params) + if not serializer.is_valid(): + return Response( + {'errors': serializer.errors}, + status=status.HTTP_400_BAD_REQUEST, + ) + + validated_data = serializer.validated_data + purl = validated_data.get('purl') + + kwargs = dict() + if source_purl := validated_data.get('source_purl', None): + kwargs["source_purl"] = source_purl + + if addon_pipelines := validated_data.get('addon_pipelines', []): + kwargs["pipelines"] = addon_pipelines - # validate purl - try: - package_url = PackageURL.from_string(purl) - if source_purl: - source_package_url = PackageURL.from_string(source_purl) - except ValueError as e: - message = { - 'status': f'purl validation error: {e}' - } - return Response(message, status=status.HTTP_400_BAD_REQUEST) lookups = purl_to_lookups(purl) packages = Package.objects.filter(**lookups) if packages.count() == 0: try: - kwargs = dict() - if source_purl: - kwargs["source_purl"] = source_purl errors = priority_router.process(purl, **kwargs) except NoRouteAvailable: message = { @@ -799,10 +811,10 @@ def index_packages(self, request, *args, **kwargs): - unsupported_vers - A list of vers range that are not supported by the univers or package_manager. """ - def _reindex_package(package, reindexed_packages): + def _reindex_package(package, reindexed_packages, **kwargs): if package in reindexed_packages: return - package.reindex() + package.reindex(**kwargs) reindexed_packages.append(package) serializer = self.serializer_class(data=request.data) @@ -829,15 +841,18 @@ def _reindex_package(package, reindexed_packages): if reindex: for package in unique_packages: purl = package['purl'] + kwargs = dict() + if addon_pipelines := package.get('source_purl'): + kwargs["addon_pipelines"] = addon_pipelines lookups = purl_to_lookups(purl) packages = Package.objects.filter(**lookups) if packages.count() > 0: for package in packages: - _reindex_package(package, reindexed_packages) + _reindex_package(package, reindexed_packages, **kwargs) if reindex_set: for package_set in package.package_sets.all(): for p in package_set.packages.all(): - _reindex_package(p, reindexed_packages) + _reindex_package(p, reindexed_packages, **kwargs) else: nonexistent_packages.append(package) requeued_packages.extend([p.package_url for p in reindexed_packages]) @@ -854,6 +869,8 @@ def _reindex_package(package, reindexed_packages): extra_fields = dict() if source_purl := package.get('source_purl'): extra_fields["source_uri"] = source_purl + if addon_pipelines := package.get('addon_pipelines'): + extra_fields["addon_pipelines"] = addon_pipelines priority_resource_uri = PriorityResourceURI.objects.insert(purl, **extra_fields) if priority_resource_uri: queued_packages.append(purl) diff --git a/packagedb/models.py b/packagedb/models.py index ac097ed3..f1069e45 100644 --- a/packagedb/models.py +++ b/packagedb/models.py @@ -620,15 +620,19 @@ def get_latest_version(self): if sorted_versions: return sorted_versions[-1] - def reindex(self): + def reindex(self, **kwargs): """ Trigger another scan of this Package, where a new ScannableURI is created for this Package. The fingerprints and Resources associated with this Package are deleted and recreated from the updated scan data. """ from minecode.model_utils import add_package_to_scan_queue + from minecode.model_utils import DEFAULT_PIPELINES - add_package_to_scan_queue(self, reindex_uri=True, priority=100) + addon_pipelines = kwargs.get('addon_pipelines', []) + pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines) + + add_package_to_scan_queue(self, pipelines=pipelines, reindex_uri=True, priority=100) def update_fields(self, save=False, **values_by_fields): """ diff --git a/packagedb/serializers.py b/packagedb/serializers.py index 2ed184d2..98efcb30 100644 --- a/packagedb/serializers.py +++ b/packagedb/serializers.py @@ -15,6 +15,8 @@ from packagedb.models import PackageWatch from packagedb.models import Party from packagedb.models import Resource +from packageurl import PackageURL +from rest_framework.exceptions import ValidationError from rest_framework.serializers import BooleanField from rest_framework.serializers import CharField from rest_framework.serializers import HyperlinkedIdentityField @@ -369,11 +371,44 @@ class Meta: fields = ['depth', 'watch_interval', 'is_active'] +class CollectPackageSerializer(Serializer): + purl = CharField(help_text="PackageURL strings in canonical form.") + source_purl = CharField( + required=False, + help_text="Source PackageURL.", + ) + + addon_pipelines = ListField( + required=False, + allow_empty=True, + help_text="Addon pipelines to run on the package.", + ) + + def validate_purl(self, value): + try: + PackageURL.from_string(value) + except ValueError as e: + raise ValidationError(f'purl validation error: {e}') + return value + + def validate_source_purl(self, value): + if value: + try: + PackageURL.from_string(value) + except ValueError as e: + raise ValidationError(f'purl validation error: {e}') + return value + class PackageVersSerializer(Serializer): purl = CharField() vers = CharField(required=False) source_purl = CharField(required=False) + addon_pipelines = ListField( + required=False, + allow_empty=True, + help_text="Addon pipelines to run on the package.", + ) class PackageUpdateSerializer(Serializer): From 7338d9992c461edb1819677b6100eebb9032212d Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Thu, 18 Apr 2024 01:07:57 +0530 Subject: [PATCH 2/5] Pass addon_pipelines in scan queue Signed-off-by: Keshav Priyadarshi --- minecode/model_utils.py | 3 --- minecode/visitors/conan.py | 13 +++++++++---- minecode/visitors/debian.py | 16 ++++++++++++---- minecode/visitors/generic.py | 26 ++++++++++++++++++-------- minecode/visitors/github.py | 8 ++++++-- minecode/visitors/gnu.py | 9 +++++++-- minecode/visitors/maven.py | 25 +++++++++++++++---------- minecode/visitors/npm.py | 13 +++++++++---- minecode/visitors/openssl.py | 9 +++++++-- 9 files changed, 83 insertions(+), 39 deletions(-) diff --git a/minecode/model_utils.py b/minecode/model_utils.py index cdcc7b29..e9f987b1 100644 --- a/minecode/model_utils.py +++ b/minecode/model_utils.py @@ -30,9 +30,6 @@ DEFAULT_PIPELINES = ( 'scan_single_package', 'fingerprint_codebase', - 'collect_symbols', - 'collect_source_strings', - 'inspect_elf_binaries', ) diff --git a/minecode/visitors/conan.py b/minecode/visitors/conan.py index 15ec2678..dacbe206 100644 --- a/minecode/visitors/conan.py +++ b/minecode/visitors/conan.py @@ -99,7 +99,7 @@ def get_download_info(conandata, version): return download_url, sha256 -def map_conan_package(package_url): +def map_conan_package(package_url, pipelines): """ Add a conan `package_url` to the PackageDB. @@ -134,13 +134,13 @@ def map_conan_package(package_url): # Submit package for scanning if db_package: - add_package_to_scan_queue(db_package) + add_package_to_scan_queue(db_package, pipelines) return error @priority_router.route("pkg:conan/.*") -def process_request(purl_str): +def process_request(purl_str, **kwargs): """ Process `priority_resource_uri` containing a conan Package URL (PURL) as a URI. @@ -149,11 +149,16 @@ def process_request(purl_str): https://github.com/conan-io/conan-center-index and using it to create a new PackageDB entry. The package is then added to the scan queue afterwards. """ + from minecode.model_utils import DEFAULT_PIPELINES + package_url = PackageURL.from_string(purl_str) + addon_pipelines = kwargs.get('addon_pipelines', []) + pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines) + if not package_url.version: return - error_msg = map_conan_package(package_url) + error_msg = map_conan_package(package_url, pipelines) if error_msg: return error_msg diff --git a/minecode/visitors/debian.py b/minecode/visitors/debian.py index db05a79d..e0690cdd 100644 --- a/minecode/visitors/debian.py +++ b/minecode/visitors/debian.py @@ -332,7 +332,12 @@ def process_request(purl_str, **kwargs): Return an error string for errors that occur, or empty string if there is no error. """ + from minecode.model_utils import DEFAULT_PIPELINES + source_purl = kwargs.get("source_purl", None) + addon_pipelines = kwargs.get('addon_pipelines', []) + pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines) + try: package_url = PackageURL.from_string(purl_str) source_package_url = None @@ -348,12 +353,13 @@ def process_request(purl_str, **kwargs): error = map_debian_metadata_binary_and_source( package_url=package_url, source_package_url=source_package_url, + pipelines=pipelines, ) return error -def map_debian_package(debian_package, package_content): +def map_debian_package(debian_package, package_content, pipelines): """ Add a debian `package_url` to the PackageDB. @@ -421,7 +427,7 @@ def map_debian_package(debian_package, package_content): # Submit package for scanning if db_package: - add_package_to_scan_queue(db_package) + add_package_to_scan_queue(db_package, pipelines) return db_package, error @@ -501,13 +507,13 @@ def update_license_copyright_fields(package_from, package_to, replace=True): setattr(package_to, field, value) -def map_debian_metadata_binary_and_source(package_url, source_package_url): +def map_debian_metadata_binary_and_source(package_url, source_package_url, pipelines): """ Get metadata for the binary and source release of the Debian package `package_url` and save it to the PackageDB. Return an error string for errors that occur, or empty string if there is no error. - """ + """ error = '' if "repository_url" in package_url.qualifiers: @@ -537,6 +543,7 @@ def map_debian_metadata_binary_and_source(package_url, source_package_url): binary_package, emsg = map_debian_package( debian_package, PackageContentType.BINARY, + pipelines, ) if emsg: error += emsg @@ -545,6 +552,7 @@ def map_debian_metadata_binary_and_source(package_url, source_package_url): source_package, emsg = map_debian_package( debian_package, PackageContentType.SOURCE_ARCHIVE, + pipelines, ) if emsg: error += emsg diff --git a/minecode/visitors/generic.py b/minecode/visitors/generic.py index 6011209f..67ba344a 100644 --- a/minecode/visitors/generic.py +++ b/minecode/visitors/generic.py @@ -26,7 +26,7 @@ logger.setLevel(logging.INFO) -def map_generic_package(package_url): +def map_generic_package(package_url, pipelines): """ Add a npm `package_url` to the PackageDB. @@ -51,17 +51,22 @@ def map_generic_package(package_url): # Submit package for scanning if db_package: - add_package_to_scan_queue(db_package) + add_package_to_scan_queue(db_package, pipelines) return error @priority_router.route("pkg:generic/.*?download_url=.*") -def process_request(purl_str): +def process_request(purl_str, **kwargs): """ Process `priority_resource_uri` containing a generic Package URL (PURL) with download_url as a qualifier """ + from minecode.model_utils import DEFAULT_PIPELINES + + addon_pipelines = kwargs.get('addon_pipelines', []) + pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines) + try: package_url = PackageURL.from_string(purl_str) except ValueError as e: @@ -73,7 +78,7 @@ def process_request(purl_str): error = f'package_url {purl_str} does not contain a download_url qualifier' return error - error_msg = map_generic_package(package_url) + error_msg = map_generic_package(package_url, pipelines) if error_msg: return error_msg @@ -91,7 +96,7 @@ def packagedata_from_dict(package_data): return PackageData.from_data(cleaned_package_data) -def map_fetchcode_supported_package(package_url): +def map_fetchcode_supported_package(package_url, pipelines): """ Add a `package_url` supported by fetchcode to the PackageDB. @@ -116,7 +121,7 @@ def map_fetchcode_supported_package(package_url): # Submit package for scanning if db_package: - add_package_to_scan_queue(db_package) + add_package_to_scan_queue(db_package, pipelines) return error @@ -156,7 +161,7 @@ def map_fetchcode_supported_package(package_url): # Indexing some generic PURLs requires a GitHub API token. # Please add your GitHub API key to the `.env` file, for example: `GH_TOKEN=your-github-api`. @priority_router.route(*GENERIC_FETCHCODE_SUPPORTED_PURLS) -def process_request_fetchcode_generic(purl_str): +def process_request_fetchcode_generic(purl_str, **kwargs): """ Process `priority_resource_uri` containing a generic Package URL (PURL) supported by fetchcode. @@ -165,13 +170,18 @@ def process_request_fetchcode_generic(purl_str): https://github.com/nexB/fetchcode and using it to create a new PackageDB entry. The package is then added to the scan queue afterwards. """ + from minecode.model_utils import DEFAULT_PIPELINES + + addon_pipelines = kwargs.get('addon_pipelines', []) + pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines) + try: package_url = PackageURL.from_string(purl_str) except ValueError as e: error = f"error occurred when parsing {purl_str}: {e}" return error - error_msg = map_fetchcode_supported_package(package_url) + error_msg = map_fetchcode_supported_package(package_url, pipelines) if error_msg: return error_msg diff --git a/minecode/visitors/github.py b/minecode/visitors/github.py index 522f8875..f439b041 100644 --- a/minecode/visitors/github.py +++ b/minecode/visitors/github.py @@ -186,7 +186,7 @@ def json_serial_date_obj(obj): # Indexing GitHub PURLs requires a GitHub API token. # Please add your GitHub API key to the `.env` file, for example: `GH_TOKEN=your-github-api`. @priority_router.route('pkg:github/.*') -def process_request_dir_listed(purl_str): +def process_request_dir_listed(purl_str, **kwargs): """ Process `priority_resource_uri` containing a GitHub Package URL (PURL). @@ -194,13 +194,17 @@ def process_request_dir_listed(purl_str): https://github.com/nexB/fetchcode and using it to create a new PackageDB entry. The package is then added to the scan queue afterwards. """ + from minecode.model_utils import DEFAULT_PIPELINES + + addon_pipelines = kwargs.get('addon_pipelines', []) + pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines) try: package_url = PackageURL.from_string(purl_str) except ValueError as e: error = f"error occurred when parsing {purl_str}: {e}" return error - error_msg = map_fetchcode_supported_package(package_url) + error_msg = map_fetchcode_supported_package(package_url, pipelines) if error_msg: return error_msg diff --git a/minecode/visitors/gnu.py b/minecode/visitors/gnu.py index 8aec30cc..bf1f541a 100644 --- a/minecode/visitors/gnu.py +++ b/minecode/visitors/gnu.py @@ -22,7 +22,7 @@ @priority_router.route("pkg:gnu/.*") -def process_request(purl_str): +def process_request(purl_str, **kwargs): """ Process `priority_resource_uri` containing a GNU Package URL (PURL) as a URI. @@ -31,11 +31,16 @@ def process_request(purl_str): https://github.com/nexB/fetchcode and using it to create a new PackageDB entry. The package is then added to the scan queue afterwards. """ + from minecode.model_utils import DEFAULT_PIPELINES + + addon_pipelines = kwargs.get('addon_pipelines', []) + pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines) + package_url = PackageURL.from_string(purl_str) if not package_url.version: return - error_msg = map_fetchcode_supported_package(package_url) + error_msg = map_fetchcode_supported_package(package_url, pipelines) if error_msg: return error_msg diff --git a/minecode/visitors/maven.py b/minecode/visitors/maven.py index ba8a209b..98cc069f 100644 --- a/minecode/visitors/maven.py +++ b/minecode/visitors/maven.py @@ -243,7 +243,7 @@ def merge_ancestors(ancestor_pom_texts, package): return package -def map_maven_package(package_url, package_content): +def map_maven_package(package_url, package_content, pipelines): """ Add a maven `package_url` to the PackageDB. @@ -316,12 +316,12 @@ def map_maven_package(package_url, package_content): # Submit package for scanning if db_package: - add_package_to_scan_queue(db_package) + add_package_to_scan_queue(db_package, pipelines) return db_package, error -def map_maven_binary_and_source(package_url): +def map_maven_binary_and_source(package_url, pipelines): """ Get metadata for the binary and source release of the Maven package `package_url` and save it to the PackageDB. @@ -329,14 +329,14 @@ def map_maven_binary_and_source(package_url): Return an error string for errors that occur, or empty string if there is no error. """ error = '' - package, emsg = map_maven_package(package_url, PackageContentType.BINARY) + package, emsg = map_maven_package(package_url, PackageContentType.BINARY, pipelines) if emsg: error += emsg source_package_url = package_url source_package_url.qualifiers['classifier'] = 'sources' source_package, emsg = map_maven_package( - source_package_url, PackageContentType.SOURCE_ARCHIVE + source_package_url, PackageContentType.SOURCE_ARCHIVE, pipelines ) if emsg: error += emsg @@ -351,7 +351,7 @@ def map_maven_binary_and_source(package_url): return error -def map_maven_packages(package_url): +def map_maven_packages(package_url, pipelines): """ Given a valid `package_url` with no version, get metadata for the binary and source release for each version of the Maven package `package_url` and save @@ -375,7 +375,7 @@ def map_maven_packages(package_url): name=listing.get('a'), version=listing.get('v'), ) - emsg = map_maven_binary_and_source(purl) + emsg = map_maven_binary_and_source(purl, pipelines) if emsg: error += emsg return error @@ -405,7 +405,7 @@ def get_package_sha1(package): @priority_router.route('pkg:maven/.*') -def process_request(purl_str): +def process_request(purl_str, **kwargs): """ Process `priority_resource_uri` containing a maven Package URL (PURL) as a URI. @@ -418,6 +418,11 @@ def process_request(purl_str): Return an error string for errors that occur, or empty string if there is no error. """ + from minecode.model_utils import DEFAULT_PIPELINES + + addon_pipelines = kwargs.get('addon_pipelines', []) + pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines) + try: package_url = PackageURL.from_string(purl_str) except ValueError as e: @@ -426,9 +431,9 @@ def process_request(purl_str): has_version = bool(package_url.version) if has_version: - error = map_maven_binary_and_source(package_url) + error = map_maven_binary_and_source(package_url, pipelines) else: - error = map_maven_packages(package_url) + error = map_maven_packages(package_url, pipelines) return error diff --git a/minecode/visitors/npm.py b/minecode/visitors/npm.py index 3fdceddf..cdeee271 100644 --- a/minecode/visitors/npm.py +++ b/minecode/visitors/npm.py @@ -127,7 +127,7 @@ def get_package_json(namespace, name, version): logger.error(f"HTTP error occurred: {err}") -def map_npm_package(package_url): +def map_npm_package(package_url, pipelines): """ Add a npm `package_url` to the PackageDB. @@ -156,13 +156,13 @@ def map_npm_package(package_url): # Submit package for scanning if db_package: - add_package_to_scan_queue(db_package) + add_package_to_scan_queue(db_package, pipelines) return error @priority_router.route('pkg:npm/.*') -def process_request(purl_str): +def process_request(purl_str, *kwargs): """ Process `priority_resource_uri` containing a npm Package URL (PURL) as a URI. @@ -171,11 +171,16 @@ def process_request(purl_str): using it to create a new PackageDB entry. The package is then added to the scan queue afterwards. """ + from minecode.model_utils import DEFAULT_PIPELINES + + addon_pipelines = kwargs.get('addon_pipelines', []) + pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines) + package_url = PackageURL.from_string(purl_str) if not package_url.version: return - error_msg = map_npm_package(package_url) + error_msg = map_npm_package(package_url, pipelines) if error_msg: return error_msg diff --git a/minecode/visitors/openssl.py b/minecode/visitors/openssl.py index 390de77b..1993c1fa 100644 --- a/minecode/visitors/openssl.py +++ b/minecode/visitors/openssl.py @@ -94,7 +94,7 @@ def get_uris(self, content): # Indexing OpenSSL PURLs requires a GitHub API token. # Please add your GitHub API key to the `.env` file, for example: `GH_TOKEN=your-github-api`. @priority_router.route('pkg:openssl/openssl@.*') -def process_request_dir_listed(purl_str): +def process_request_dir_listed(purl_str, **kwargs): """ Process `priority_resource_uri` containing a OpenSSL Package URL (PURL) supported by fetchcode. @@ -103,13 +103,18 @@ def process_request_dir_listed(purl_str): https://github.com/nexB/fetchcode and using it to create a new PackageDB entry. The package is then added to the scan queue afterwards. """ + from minecode.model_utils import DEFAULT_PIPELINES + + addon_pipelines = kwargs.get('addon_pipelines', []) + pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines) + try: package_url = PackageURL.from_string(purl_str) except ValueError as e: error = f"error occurred when parsing {purl_str}: {e}" return error - error_msg = map_fetchcode_supported_package(package_url) + error_msg = map_fetchcode_supported_package(package_url, pipelines) if error_msg: return error_msg \ No newline at end of file From 7d5e7e815f033365c4d59565051ac51c5d805d3a Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Thu, 18 Apr 2024 01:10:02 +0530 Subject: [PATCH 3/5] Use dummy test_pipeline in tests Signed-off-by: Keshav Priyadarshi --- minecode/tests/test_conan.py | 2 +- minecode/tests/test_generic.py | 4 ++-- minecode/tests/test_maven.py | 4 ++-- minecode/tests/test_npm.py | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/minecode/tests/test_conan.py b/minecode/tests/test_conan.py index f15b9611..c4558707 100644 --- a/minecode/tests/test_conan.py +++ b/minecode/tests/test_conan.py @@ -98,7 +98,7 @@ def test_map_conan_package(self, mock_get_conan_recipe): package_count = packagedb.models.Package.objects.all().count() self.assertEqual(package_count, 0) - conan.map_conan_package(self.package_url1) + conan.map_conan_package(self.package_url1, ('test_pipelines')) package_count = packagedb.models.Package.objects.all().count() self.assertEqual(package_count, 1) package = packagedb.models.Package.objects.all().first() diff --git a/minecode/tests/test_generic.py b/minecode/tests/test_generic.py index 466fe436..fef28ece 100644 --- a/minecode/tests/test_generic.py +++ b/minecode/tests/test_generic.py @@ -49,7 +49,7 @@ def test_map_generic_package(self): purl = 'pkg:generic/test@1.0.0?download_url=http://example.com/test.tar.gz' package_url = PackageURL.from_string(purl) - error_msg = generic.map_generic_package(package_url) + error_msg = generic.map_generic_package(package_url, ('test_pipeline')) self.assertEqual('', error_msg) package_count = Package.objects.all().count() @@ -65,7 +65,7 @@ def test_map_fetchcode_supported_package(self): self.assertEqual(0, package_count) purl = PackageURL.from_string("pkg:generic/udhcp@0.9.1") - error_msg = generic.map_fetchcode_supported_package(purl) + error_msg = generic.map_fetchcode_supported_package(purl, ('test_pipeline')) self.assertEqual('', error_msg) package_count = Package.objects.all().count() diff --git a/minecode/tests/test_maven.py b/minecode/tests/test_maven.py index 29ca649a..db6e0fee 100644 --- a/minecode/tests/test_maven.py +++ b/minecode/tests/test_maven.py @@ -720,7 +720,7 @@ def test_map_maven_package(self): package_count = packagedb.models.Package.objects.all().count() self.assertEqual(0, package_count) package_url = PackageURL.from_string(self.scan_package.purl) - maven_visitor.map_maven_package(package_url, packagedb.models.PackageContentType.BINARY) + maven_visitor.map_maven_package(package_url, packagedb.models.PackageContentType.BINARY, ('test_pipeline')) package_count = packagedb.models.Package.objects.all().count() self.assertEqual(1, package_count) package = packagedb.models.Package.objects.all().first() @@ -732,7 +732,7 @@ def test_map_maven_package_custom_repo_url(self): self.assertEqual(0, package_count) custom_repo_purl = "pkg:maven/org.eclipse.core/runtime@20070801?repository_url=https://packages.atlassian.com/mvn/maven-atlassian-external/" package_url = PackageURL.from_string(custom_repo_purl) - maven_visitor.map_maven_package(package_url, packagedb.models.PackageContentType.BINARY) + maven_visitor.map_maven_package(package_url, packagedb.models.PackageContentType.BINARY, ('test_pipeline')) package_count = packagedb.models.Package.objects.all().count() self.assertEqual(1, package_count) package = packagedb.models.Package.objects.all().first() diff --git a/minecode/tests/test_npm.py b/minecode/tests/test_npm.py index 8ad5bb18..1887480e 100644 --- a/minecode/tests/test_npm.py +++ b/minecode/tests/test_npm.py @@ -199,7 +199,7 @@ def test_map_npm_package(self): package_count = packagedb.models.Package.objects.all().count() self.assertEqual(0, package_count) package_url = PackageURL.from_string(self.scan_package.purl) - npm.map_npm_package(package_url) + npm.map_npm_package(package_url, ('test_pipeline')) package_count = packagedb.models.Package.objects.all().count() self.assertEqual(1, package_count) package = packagedb.models.Package.objects.all().first() From 32ea6e810c3c371e24318440f079a7453f1c5d4a Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Thu, 18 Apr 2024 01:38:00 +0530 Subject: [PATCH 4/5] Update docs for addon_pipelines Signed-off-by: Keshav Priyadarshi --- packagedb/api.py | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/packagedb/api.py b/packagedb/api.py index 8450c50c..1c7e32de 100644 --- a/packagedb/api.py +++ b/packagedb/api.py @@ -679,7 +679,14 @@ class CollectViewSet(viewsets.ViewSet): Return Package data for the purl passed in the `purl` query parameter. If the package does not exist, we will fetch the Package data and return - it in the same request. + it in the same request. + Optionally, provide the list of addon_pipelines + to run on the package. Find all addon pipelines [here.](https://scancodeio.readthedocs.io/en/latest/built-in-pipelines.html) + + **Example:** + + /api/collect/?purl=pkg:npm/foo@1.2.3&addon_pipelines=collect_symbols&addon_pipelines=inspect_elf_binaries + **Note:** Use `Index packages` for bulk indexing/reindexing of packages. """ @@ -752,7 +759,10 @@ def index_packages(self, request, *args, **kwargs): """ Take a list of `packages` (where each item is a dictionary containing either PURL or versionless PURL along with vers range, optionally with source package PURL) - and index it. + and index it. + Also each package can have list of `addon_pipelines` to run on the package. + Find all addon pipelines [here.](https://scancodeio.readthedocs.io/en/latest/built-in-pipelines.html) + If `reindex` flag is True then existing package will be rescanned, if `reindex_set` is True then all the package in the same set will be rescanned. @@ -768,17 +778,20 @@ def index_packages(self, request, *args, **kwargs): { "purl": "pkg:npm/less@1.0.32", "vers": null, - "source_purl": None + "source_purl": None, + "addon_pipelines": ['collect_symbols'] }, { "purl": "pkg:npm/less", "vers": "vers:npm/>=1.1.0|<=1.1.4", - "source_purl": None + "source_purl": None, + "addon_pipelines": None }, { "purl": "pkg:npm/foobar", "vers": null, - "source_purl": None + "source_purl": None, + "addon_pipelines": ['inspect_elf_binaries', 'collect_symbols'] } ] "reindex": true, From 94647c8286c48cbb346351551f25de2c401be49f Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Thu, 18 Apr 2024 18:59:29 +0530 Subject: [PATCH 5/5] Validate the user input addon pipelines Signed-off-by: Keshav Priyadarshi --- minecode/model_utils.py | 8 ++++++++ minecode/visitors/npm.py | 2 +- packagedb/api.py | 6 +++--- packagedb/serializers.py | 14 ++++++++++++++ 4 files changed, 26 insertions(+), 4 deletions(-) diff --git a/minecode/model_utils.py b/minecode/model_utils.py index e9f987b1..ba305288 100644 --- a/minecode/model_utils.py +++ b/minecode/model_utils.py @@ -32,6 +32,14 @@ 'fingerprint_codebase', ) +# These are the list of supported addon pipelines to run when we scan a Package for +# indexing. +SUPPORTED_ADDON_PIPELINES = ( + 'collect_symbols', + 'collect_source_strings', + 'inspect_elf_binaries', +) + def add_package_to_scan_queue(package, pipelines=DEFAULT_PIPELINES, reindex_uri=False, priority=0): """ diff --git a/minecode/visitors/npm.py b/minecode/visitors/npm.py index cdeee271..5906038a 100644 --- a/minecode/visitors/npm.py +++ b/minecode/visitors/npm.py @@ -162,7 +162,7 @@ def map_npm_package(package_url, pipelines): @priority_router.route('pkg:npm/.*') -def process_request(purl_str, *kwargs): +def process_request(purl_str, **kwargs): """ Process `priority_resource_uri` containing a npm Package URL (PURL) as a URI. diff --git a/packagedb/api.py b/packagedb/api.py index 1c7e32de..4f28e443 100644 --- a/packagedb/api.py +++ b/packagedb/api.py @@ -53,7 +53,7 @@ from packagedb.package_managers import VERSION_API_CLASSES_BY_PACKAGE_TYPE from packagedb.package_managers import get_api_package_name from packagedb.package_managers import get_version_fetcher -from packagedb.serializers import CollectPackageSerializer +from packagedb.serializers import CollectPackageSerializer, is_supported_addon_pipeline from packagedb.serializers import DependentPackageSerializer from packagedb.serializers import IndexPackagesResponseSerializer from packagedb.serializers import IndexPackagesSerializer @@ -856,7 +856,7 @@ def _reindex_package(package, reindexed_packages, **kwargs): purl = package['purl'] kwargs = dict() if addon_pipelines := package.get('source_purl'): - kwargs["addon_pipelines"] = addon_pipelines + kwargs["addon_pipelines"] = [pipe for pipe in addon_pipelines if is_supported_addon_pipeline(pipe)] lookups = purl_to_lookups(purl) packages = Package.objects.filter(**lookups) if packages.count() > 0: @@ -883,7 +883,7 @@ def _reindex_package(package, reindexed_packages, **kwargs): if source_purl := package.get('source_purl'): extra_fields["source_uri"] = source_purl if addon_pipelines := package.get('addon_pipelines'): - extra_fields["addon_pipelines"] = addon_pipelines + extra_fields["addon_pipelines"] = [pipe for pipe in addon_pipelines if is_supported_addon_pipeline(pipe)] priority_resource_uri = PriorityResourceURI.objects.insert(purl, **extra_fields) if priority_resource_uri: queued_packages.append(purl) diff --git a/packagedb/serializers.py b/packagedb/serializers.py index 98efcb30..bd229f70 100644 --- a/packagedb/serializers.py +++ b/packagedb/serializers.py @@ -379,6 +379,7 @@ class CollectPackageSerializer(Serializer): ) addon_pipelines = ListField( + child = CharField(), required=False, allow_empty=True, help_text="Addon pipelines to run on the package.", @@ -399,6 +400,14 @@ def validate_source_purl(self, value): raise ValidationError(f'purl validation error: {e}') return value + def validate_addon_pipelines(self, value): + invalid_pipelines = [pipe for pipe in value if not is_supported_addon_pipeline(pipe)] + if invalid_pipelines: + raise ValidationError(f'Error unsupported addon pipelines: {",".join(invalid_pipelines)}') + + return value + + class PackageVersSerializer(Serializer): purl = CharField() vers = CharField(required=False) @@ -486,3 +495,8 @@ class PurltoGitRepoSerializer(Serializer): class PurltoGitRepoResponseSerializer(Serializer): git_repo = CharField(required=True) + + +def is_supported_addon_pipeline(addon_pipeline): + from minecode.model_utils import SUPPORTED_ADDON_PIPELINES + return addon_pipeline in SUPPORTED_ADDON_PIPELINES