From 172af466e40e0a98fd60434608d6d8415dc3cfaf Mon Sep 17 00:00:00 2001 From: Jono Yang Date: Wed, 31 May 2023 17:38:58 -0700 Subject: [PATCH 1/6] Add handler for generic purls Signed-off-by: Jono Yang --- minecode/visitors/generic.py | 80 ++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 minecode/visitors/generic.py diff --git a/minecode/visitors/generic.py b/minecode/visitors/generic.py new file mode 100644 index 00000000..30fb4af7 --- /dev/null +++ b/minecode/visitors/generic.py @@ -0,0 +1,80 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# purldb is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/purldb for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import logging + +from packageurl import PackageURL + +from packagedcode.models import PackageData + +from minecode import priority_router +from packagedb.models import PackageContentType + + +""" +Collect generic packages from a download URL. +""" + +logger = logging.getLogger(__name__) +handler = logging.StreamHandler() +logger.addHandler(handler) +logger.setLevel(logging.INFO) + + +def map_generic_package(package_url): + """ + Add a npm `package_url` to the PackageDB. + + Return an error string if any errors are encountered during the process + """ + from minecode.model_utils import add_package_to_scan_queue + from minecode.model_utils import merge_or_create_package + + download_url = package_url.qualifiers.get('download_url') + package = PackageData( + type=package_url.type, + namespace=package_url.namespace, + name=package_url.name, + version=package_url.version, + qualifiers=package_url.qualifiers, + subpath=package_url.subpath, + download_url=download_url, + ) + # TODO: set package_content type + + db_package, _, _, error = merge_or_create_package(package, visit_level=0) + + # Submit package for scanning + if db_package: + add_package_to_scan_queue(db_package) + + return error + + +@priority_router.route('pkg:generic/.*') +def process_request(purl_str): + """ + Process `priority_resource_uri` containing a generic Package URL (PURL) with + download_url as a qualifier + """ + try: + package_url = PackageURL.from_string(purl_str) + except ValueError as e: + error = f'error occured when parsing {purl_str}: {e}' + return error + + download_url = package_url.qualifiers.get('download_url') + if not download_url: + error = f'package_url {purl_str} does not contain a download_url qualifier' + return + + error_msg = map_generic_package(package_url) + + if error_msg: + return error_msg From d1f9fd210635460049e1f1f6a9c5100e4f18882a Mon Sep 17 00:00:00 2001 From: Jono Yang Date: Wed, 31 May 2023 19:03:44 -0700 Subject: [PATCH 2/6] Get Package sha1 from scan info, if not present Signed-off-by: Jono Yang --- minecode/management/commands/process_scans.py | 12 ++++++++++ minecode/management/scanning.py | 23 +++++++++++++++++-- 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/minecode/management/commands/process_scans.py b/minecode/management/commands/process_scans.py index 4bc2af89..27b8cf94 100644 --- a/minecode/management/commands/process_scans.py +++ b/minecode/management/commands/process_scans.py @@ -73,6 +73,12 @@ def process_scan(cls, scannable_uri, get_scan_info_save_loc='', get_scan_data_sa ) scan_index_errors = index_package_files(package, scan_data) + scan_info = scanning._get_scan_info( + scannable_uri.scan_uuid, + api_url=cls.api_url, + api_auth_headers=cls.api_auth_headers, + get_scan_data_save_loc=get_scan_data_save_loc + ) summary = scanning.get_scan_summary( scannable_uri.scan_uuid, api_url=cls.api_url, @@ -81,6 +87,12 @@ def process_scan(cls, scannable_uri, get_scan_info_save_loc='', get_scan_data_sa ) package_updated = False + + sha1 = scan_info.get('extra_data', {}).get('sha1') + if not package.sha1 and sha1: + package.sha1 = sha1 + package_updated = True + if summary: package.summary = summary package_updated = True diff --git a/minecode/management/scanning.py b/minecode/management/scanning.py index 87555cab..eb57dc11 100644 --- a/minecode/management/scanning.py +++ b/minecode/management/scanning.py @@ -243,20 +243,39 @@ def _call_scan_get_api(scan_uuid, endpoint='', return response.json() -def get_scan_info( +def _get_scan_info( scan_uuid, api_url=SCANCODEIO_API_URL_PROJECTS, api_auth_headers=SCANCODEIO_AUTH_HEADERS, get_scan_info_save_loc='' ): """ - Return a Scan object for `scan_uuid` fetched from ScanCode.io or None. + Return a mapping of project info for `scan_uuid` fetched from ScanCode.io or None. Raise an exception on error. """ results = _call_scan_get_api(scan_uuid, endpoint='', api_url=api_url, api_auth_headers=api_auth_headers) if get_scan_info_save_loc: with open(get_scan_info_save_loc, 'w') as f: json.dump(results, f) + return results + + +def get_scan_info( + scan_uuid, + api_url=SCANCODEIO_API_URL_PROJECTS, + api_auth_headers=SCANCODEIO_AUTH_HEADERS, + get_scan_info_save_loc='' +): + """ + Return a Scan object for `scan_uuid` fetched from ScanCode.io or None. + Raise an exception on error. + """ + results = _get_scan_info( + scan_uuid=scan_uuid, + api_url=api_url, + api_auth_headers=api_auth_headers, + get_scan_info_save_loc=get_scan_info_save_loc, + ) return Scan.from_response(**results) From e5c9999efcf6e670bbddcb6a7a8c83939149936e Mon Sep 17 00:00:00 2001 From: Jono Yang Date: Thu, 1 Jun 2023 12:21:11 -0700 Subject: [PATCH 3/6] Clean up scan processing code * Get package hash info from scan info Signed-off-by: Jono Yang --- minecode/management/commands/process_scans.py | 42 ++++++++----------- minecode/management/scanning.py | 19 ++++++++- 2 files changed, 34 insertions(+), 27 deletions(-) diff --git a/minecode/management/commands/process_scans.py b/minecode/management/commands/process_scans.py index 27b8cf94..aa09f617 100644 --- a/minecode/management/commands/process_scans.py +++ b/minecode/management/commands/process_scans.py @@ -73,12 +73,6 @@ def process_scan(cls, scannable_uri, get_scan_info_save_loc='', get_scan_data_sa ) scan_index_errors = index_package_files(package, scan_data) - scan_info = scanning._get_scan_info( - scannable_uri.scan_uuid, - api_url=cls.api_url, - api_auth_headers=cls.api_auth_headers, - get_scan_data_save_loc=get_scan_data_save_loc - ) summary = scanning.get_scan_summary( scannable_uri.scan_uuid, api_url=cls.api_url, @@ -86,26 +80,24 @@ def process_scan(cls, scannable_uri, get_scan_info_save_loc='', get_scan_data_sa get_scan_data_save_loc=get_scan_data_save_loc ) - package_updated = False - - sha1 = scan_info.get('extra_data', {}).get('sha1') - if not package.sha1 and sha1: - package.sha1 = sha1 - package_updated = True - - if summary: - package.summary = summary - package_updated = True - - license_expression = summary.get('declared_license_expression') - if not package.declared_license_expression and license_expression: - package.declared_license_expression = license_expression - package_updated = True - + copyright = '' declared_holder = summary.get('declared_holder') - if not package.copyright: - if declared_holder: - package.copyright = f'Copyright (c) {declared_holder}' + if declared_holder: + copyright = f'Copyright (c) {declared_holder}' + + values_by_updateable_fields = { + 'sha1': scan_info.sha1, + 'sha256': scan_info.sha256, + 'sha512': scan_info.sha512, + 'summary': summary, + 'declared_license_expression': summary.get('declared_license_expression'), + 'copyright': copyright, + } + + for field, value in values_by_updateable_fields.items(): + p_val = getattr(package, field) + if not p_val and value: + setattr(package, field, value) package_updated = True if package_updated: diff --git a/minecode/management/scanning.py b/minecode/management/scanning.py index eb57dc11..f75f7a63 100644 --- a/minecode/management/scanning.py +++ b/minecode/management/scanning.py @@ -74,9 +74,15 @@ class Scan(object): status = attr.ib(default=None) # as a time stamp execution_time = attr.ib(default=None) + md5 = attr.ib(default=None) + sha1 = attr.ib(default=None) + sha256 = attr.ib(default=None) + sha512 = attr.ib(default=None) + sha1_git = attr.ib(default=None) + filename = attr.ib(default=None) @classmethod - def from_response(cls, url, uuid, runs, input_sources, **kwargs): + def from_response(cls, url, uuid, runs, input_sources, extra_data={}, **kwargs): """ Return a Scan object built from an API response data arguments. """ @@ -95,11 +101,20 @@ def from_response(cls, url, uuid, runs, input_sources, **kwargs): if len(input_sources) > 0: uri = input_sources[0]["source"] + md5 = extra_data.get('md5') + sha1 = extra_data.get('sha1') + sha256 = extra_data.get('sha256') + sha512 = extra_data.get('sha512') + sha1_git = extra_data.get('sha1_git') + filename = extra_data.get('filename') + return Scan( url=url, uuid=uuid, run_uuid=run_uuid, uri=uri, created_date=created_date, task_start_date=task_start_date, task_end_date=task_end_date, task_exitcode=task_exitcode, - status=status, execution_time=execution_time + status=status, execution_time=execution_time, + md5=md5, sha1=sha1, sha256=sha256, sha512=sha512, + sha1_git=sha1_git, filename=filename ) @property From 0808d106aa44f833bf30b51a19dd2612875fdbdc Mon Sep 17 00:00:00 2001 From: Jono Yang Date: Thu, 1 Jun 2023 17:27:03 -0700 Subject: [PATCH 4/6] Get other license expressions from summary * Update homepage_url and descriptions in get_enhanced_package * Display package_set and package_content field in Package metadata serializer Signed-off-by: Jono Yang --- minecode/management/commands/process_scans.py | 48 +++---------------- packagedb/api.py | 2 + packagedb/serializers.py | 6 +++ 3 files changed, 14 insertions(+), 42 deletions(-) diff --git a/minecode/management/commands/process_scans.py b/minecode/management/commands/process_scans.py index aa09f617..0764b63a 100644 --- a/minecode/management/commands/process_scans.py +++ b/minecode/management/commands/process_scans.py @@ -2,14 +2,13 @@ # Copyright (c) 2018 by nexB, Inc. http://www.nexb.com/ - All rights reserved. # -from collections import OrderedDict import logging import signal import sys from django.db import transaction -from license_expression import Licensing +from packagedcode.utils import combine_expressions from matchcode.models import ApproximateDirectoryContentIndex from matchcode.models import ApproximateDirectoryStructureIndex @@ -80,6 +79,10 @@ def process_scan(cls, scannable_uri, get_scan_info_save_loc='', get_scan_data_sa get_scan_data_save_loc=get_scan_data_save_loc ) + other_license_expressions = summary.get('other_license_expressions', []) + other_license_expressions = [l['value'] for l in other_license_expressions] + other_license_expression = combine_expressions(other_license_expressions) + copyright = '' declared_holder = summary.get('declared_holder') if declared_holder: @@ -91,6 +94,7 @@ def process_scan(cls, scannable_uri, get_scan_info_save_loc='', get_scan_data_sa 'sha512': scan_info.sha512, 'summary': summary, 'declared_license_expression': summary.get('declared_license_expression'), + 'other_license_expression': other_license_expression, 'copyright': copyright, } @@ -259,43 +263,3 @@ def index_package_files(package, scan_data): logger.error(msg) return scan_index_errors - - -# TODO: Remove this when scancode-toolkit is upgraded. The current version of -# scancode-toolkit in Minecode does not have this function -# TODO: from packagedcode.utils import combine_expressions -def combine_expressions(expressions, relation='AND', licensing=Licensing()): - """ - Return a combined license expression string with relation, given a list of - license expressions strings. - - For example: - >>> a = 'mit' - >>> b = 'gpl' - >>> combine_expressions([a, b]) - 'mit AND gpl' - >>> assert 'mit' == combine_expressions([a]) - >>> combine_expressions([]) - >>> combine_expressions(None) - >>> combine_expressions(('gpl', 'mit', 'apache',)) - 'gpl AND mit AND apache' - """ - if not expressions: - return - - if not isinstance(expressions, (list, tuple)): - raise TypeError( - 'expressions should be a list or tuple and not: {}'.format( - type(expressions))) - - # Remove duplicate element in the expressions list - expressions = list(OrderedDict((x, True) for x in expressions).keys()) - - if len(expressions) == 1: - return expressions[0] - - expressions = [licensing.parse(le, simple=True) for le in expressions] - if relation == 'OR': - return str(licensing.OR(*expressions)) - else: - return str(licensing.AND(*expressions)) diff --git a/packagedb/api.py b/packagedb/api.py index 8e3d9c8a..5dfabad7 100644 --- a/packagedb/api.py +++ b/packagedb/api.py @@ -316,6 +316,8 @@ def get_enhanced_package_data(self, request, *args, **kwargs): 'repository_homepage_url', 'dependencies', 'parties', + 'homepage_url', + 'description', ] diff --git a/packagedb/serializers.py b/packagedb/serializers.py index e6ba5b6f..e1eda130 100644 --- a/packagedb/serializers.py +++ b/packagedb/serializers.py @@ -201,6 +201,7 @@ class PackageMetadataSerializer(ModelSerializer): """ dependencies = DependentPackageSerializer(many=True) parties = PartySerializer(many=True) + package_content = SerializerMethodField() class Meta: model = Package @@ -211,6 +212,8 @@ class Meta: 'version', 'qualifiers', 'subpath', + 'package_set', + 'package_content', 'primary_language', 'description', 'release_date', @@ -247,3 +250,6 @@ class Meta: 'api_data_url', 'file_references', ) + + def get_package_content(self, obj): + return obj.get_package_content_display() From 3c9038f466ded7da790167b1cfb5d512fc80f697 Mon Sep 17 00:00:00 2001 From: Jono Yang Date: Mon, 5 Jun 2023 12:34:51 -0700 Subject: [PATCH 5/6] Create test for generic ondemand handler Signed-off-by: Jono Yang --- minecode/tests/test_generic.py | 68 ++++++++++++++++++++++++++++++++++ minecode/visitors/generic.py | 1 - 2 files changed, 68 insertions(+), 1 deletion(-) create mode 100644 minecode/tests/test_generic.py diff --git a/minecode/tests/test_generic.py b/minecode/tests/test_generic.py new file mode 100644 index 00000000..bee29c1b --- /dev/null +++ b/minecode/tests/test_generic.py @@ -0,0 +1,68 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# purldb is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/purldb for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import os + +from django.test import TestCase as DjangoTestCase + +from minecode.utils_test import JsonBasedTesting +from minecode.visitors import generic +from packagedb.models import Package + +from packagedcode.maven import _parse +from packageurl import PackageURL + + +class GenericPriorityQueueTests(JsonBasedTesting, DjangoTestCase): + def test_process_request(self): + package_count = Package.objects.all().count() + self.assertEqual(0, package_count) + + purl = 'pkg:generic/test@1.0.0?download_url=http://example.com/test.tar.gz' + error_msg = generic.process_request(purl) + + self.assertEqual('', error_msg) + package_count = Package.objects.all().count() + self.assertEqual(1, package_count) + + package = Package.objects.first() + self.assertEqual('test', package.name) + self.assertEqual('1.0.0', package.version) + self.assertEqual('http://example.com/test.tar.gz', package.download_url) + + def test_process_request_no_download_url(self): + package_count = Package.objects.all().count() + self.assertEqual(0, package_count) + + purl = 'pkg:generic/test@1.0.0' + error_msg = generic.process_request(purl) + + self.assertEqual( + f'package_url {purl} does not contain a download_url qualifier', + error_msg + ) + package_count = Package.objects.all().count() + self.assertEqual(0, package_count) + + def test_map_generic_package(self): + package_count = Package.objects.all().count() + self.assertEqual(0, package_count) + + purl = 'pkg:generic/test@1.0.0?download_url=http://example.com/test.tar.gz' + package_url = PackageURL.from_string(purl) + error_msg = generic.map_generic_package(package_url) + + self.assertEqual('', error_msg) + package_count = Package.objects.all().count() + self.assertEqual(1, package_count) + + package = Package.objects.first() + self.assertEqual('test', package.name) + self.assertEqual('1.0.0', package.version) + self.assertEqual('http://example.com/test.tar.gz', package.download_url) diff --git a/minecode/visitors/generic.py b/minecode/visitors/generic.py index 30fb4af7..7a64938f 100644 --- a/minecode/visitors/generic.py +++ b/minecode/visitors/generic.py @@ -14,7 +14,6 @@ from packagedcode.models import PackageData from minecode import priority_router -from packagedb.models import PackageContentType """ From 72254be3238e8710f787fd33ab6acf020c51deea Mon Sep 17 00:00:00 2001 From: Jono Yang Date: Mon, 5 Jun 2023 13:44:48 -0700 Subject: [PATCH 6/6] Update JSON test data normalization * Add new argument such that we can remove fields at the same time we normalize Package UIDs * Update test expectations Signed-off-by: Jono Yang --- .../async-0.2.9-i-expected-content.json | 2 ++ .../async-0.2.9-i-expected-structure.json | 2 ++ minecode/tests/test_generic.py | 2 +- minecode/tests/test_maven.py | 6 ++--- minecode/tests/test_rubygems.py | 2 +- minecode/tests/test_scanning.py | 6 +++++ .../directories/find-ls-expected.json | 2 +- .../declared_license_search_expected.json | 2 ++ .../housekeeping/example_expected.json | 2 ++ .../ignore_upper_case_search_expected.json | 2 ++ .../license_expression_search_expected.json | 2 ++ .../end2end/expected_mapped_packages.json | 19 +++++++++++++ ..._mapped_commons-jaxrs-1.21-from-index.json | 1 + ...ed_mapped_commons-jaxrs-1.21-from-pom.json | 1 + ...prockets-vendor_gems-0.1.3.gem.mapped.json | 1 + ...pdate_with_same_mining_level-expected.json | 2 ++ ...with_higher_new_mining_level-expected.json | 2 ++ ...with_lesser_new_mining_level-expected.json | 2 ++ ...st_merge_packages_no_replace-expected.json | 2 ++ ..._merge_packages_with_replace-expected.json | 2 ++ minecode/utils_test.py | 27 ++++++++++++++----- minecode/visitors/generic.py | 2 +- packagedb/tests/test_api.py | 15 ++++++----- .../tests/testfiles/api/enhanced_package.json | 1 + 24 files changed, 88 insertions(+), 19 deletions(-) diff --git a/matchcode/tests/testfiles/models/directory-matching/async-0.2.9-i-expected-content.json b/matchcode/tests/testfiles/models/directory-matching/async-0.2.9-i-expected-content.json index d44a95bb..7ff06670 100644 --- a/matchcode/tests/testfiles/models/directory-matching/async-0.2.9-i-expected-content.json +++ b/matchcode/tests/testfiles/models/directory-matching/async-0.2.9-i-expected-content.json @@ -28,6 +28,8 @@ "version": "0.2.9", "qualifiers": "", "subpath": "", + "package_set": null, + "package_content": null, "primary_language": null, "description": null, "release_date": null, diff --git a/matchcode/tests/testfiles/models/directory-matching/async-0.2.9-i-expected-structure.json b/matchcode/tests/testfiles/models/directory-matching/async-0.2.9-i-expected-structure.json index ac5f8fc5..0f2b8692 100644 --- a/matchcode/tests/testfiles/models/directory-matching/async-0.2.9-i-expected-structure.json +++ b/matchcode/tests/testfiles/models/directory-matching/async-0.2.9-i-expected-structure.json @@ -28,6 +28,8 @@ "version": "0.2.9", "qualifiers": "", "subpath": "", + "package_set": null, + "package_content": null, "primary_language": null, "description": null, "release_date": null, diff --git a/minecode/tests/test_generic.py b/minecode/tests/test_generic.py index bee29c1b..e290d750 100644 --- a/minecode/tests/test_generic.py +++ b/minecode/tests/test_generic.py @@ -27,7 +27,7 @@ def test_process_request(self): purl = 'pkg:generic/test@1.0.0?download_url=http://example.com/test.tar.gz' error_msg = generic.process_request(purl) - self.assertEqual('', error_msg) + self.assertEqual(None, error_msg) package_count = Package.objects.all().count() self.assertEqual(1, package_count) diff --git a/minecode/tests/test_maven.py b/minecode/tests/test_maven.py index c14117ef..ef69fdaf 100644 --- a/minecode/tests/test_maven.py +++ b/minecode/tests/test_maven.py @@ -312,7 +312,7 @@ def test_MavenNexusIndexVisitor_with_run_visit_then_map_end2end(self): package_results = list(pac.to_dict() for pac in newly_mapped) expected_loc = self.get_test_loc('maven/end2end/expected_mapped_packages.json') - self.check_expected_results(package_results, expected_loc, regen=False) + self.check_expected_results(package_results, expected_loc, fields_to_remove=['package_set'], regen=False) # check that the map status has been updated correctly visited_then_mapped = ResourceURI.objects.filter(uri__contains='maven-index://') @@ -372,7 +372,7 @@ def test_visit_and_map_using_pom_with_unicode_multisteps(self): package_results = sorted((pac.to_dict() for pac in mapped), key=lambda d: list(d.keys())) expected_loc = self.get_test_loc('maven/end2end_multisteps/expected_mapped_commons-jaxrs-1.21-from-index.json') - self.check_expected_results(package_results, expected_loc, regen=False) + self.check_expected_results(package_results, expected_loc, fields_to_remove=['package_set'], regen=False) # Step 2: map a POM @@ -389,7 +389,7 @@ def test_visit_and_map_using_pom_with_unicode_multisteps(self): package_results = sorted((pac.to_dict() for pac in mapped), key=lambda d: list(d.keys())) expected_loc = self.get_test_loc('maven/end2end_multisteps/expected_mapped_commons-jaxrs-1.21-from-pom.json') - self.check_expected_results(package_results, expected_loc, regen=False) + self.check_expected_results(package_results, expected_loc, fields_to_remove=['package_set'], regen=False) def test_visit_and_map_with_index(self): uri = 'https://repo1.maven.org/maven2/.index/nexus-maven-repository-index.properties' diff --git a/minecode/tests/test_rubygems.py b/minecode/tests/test_rubygems.py index 3989a284..a56689b3 100644 --- a/minecode/tests/test_rubygems.py +++ b/minecode/tests/test_rubygems.py @@ -299,4 +299,4 @@ def test_visit_and_map_end2end(self): package_results = [pac.to_dict() for pac in mapped] expected_loc = self.get_test_loc('rubygems/sprockets-vendor_gems-0.1.3.gem.mapped.json') - self.check_expected_results(package_results, expected_loc, regen=False) + self.check_expected_results(package_results, expected_loc, fields_to_remove=['package_set'], regen=False) diff --git a/minecode/tests/test_scanning.py b/minecode/tests/test_scanning.py index 6d59f55a..1099f4d4 100644 --- a/minecode/tests/test_scanning.py +++ b/minecode/tests/test_scanning.py @@ -144,6 +144,12 @@ def testscanning_get_scan_info(self, mock_get): task_exitcode=0, status='success', execution_time=9, + md5='57431f2f6d5841eebdb964b04091b8ed', + sha1='feff0d7bacd11d37a9c96daed87dc1db163065b1', + sha256='05155c2c588ac5922d930eeb1e8a1da896956f4696ae758d110708e9f095baba', + sha512='4431f237bcdfee5d2b86b1b3f01c8abaa160d5b7007c63e6281845a3f920d89fdb2e4044f97694ddef91e174d9dd30e5016bbad46eec2d68af200a47e9cedd85', + sha1_git='ad18d88bdae8449e7c170f8e7db1bfe336dbb4e0', + filename='wagon-api-20040705.181715.jar', ) expected = attr.asdict(expected) result = attr.asdict(result) diff --git a/minecode/tests/testfiles/directories/find-ls-expected.json b/minecode/tests/testfiles/directories/find-ls-expected.json index 75eb05a8..70f9dc1e 100644 --- a/minecode/tests/testfiles/directories/find-ls-expected.json +++ b/minecode/tests/testfiles/directories/find-ls-expected.json @@ -10,7 +10,7 @@ "path":"groovy/.revision", "type":"f", "size":6, - "date":"2022-05", + "date":"2023-05", "target":null }, { diff --git a/minecode/tests/testfiles/housekeeping/declared_license_search_expected.json b/minecode/tests/testfiles/housekeeping/declared_license_search_expected.json index 543e4efb..1622b770 100644 --- a/minecode/tests/testfiles/housekeeping/declared_license_search_expected.json +++ b/minecode/tests/testfiles/housekeeping/declared_license_search_expected.json @@ -6,6 +6,8 @@ "version":"", "qualifiers":"", "subpath":"", + "package_set":null, + "package_content":null, "primary_language":null, "description":null, "release_date":null, diff --git a/minecode/tests/testfiles/housekeeping/example_expected.json b/minecode/tests/testfiles/housekeeping/example_expected.json index 543e4efb..1622b770 100644 --- a/minecode/tests/testfiles/housekeeping/example_expected.json +++ b/minecode/tests/testfiles/housekeeping/example_expected.json @@ -6,6 +6,8 @@ "version":"", "qualifiers":"", "subpath":"", + "package_set":null, + "package_content":null, "primary_language":null, "description":null, "release_date":null, diff --git a/minecode/tests/testfiles/housekeeping/ignore_upper_case_search_expected.json b/minecode/tests/testfiles/housekeeping/ignore_upper_case_search_expected.json index 36127036..148aaf20 100644 --- a/minecode/tests/testfiles/housekeeping/ignore_upper_case_search_expected.json +++ b/minecode/tests/testfiles/housekeeping/ignore_upper_case_search_expected.json @@ -6,6 +6,8 @@ "version":"", "qualifiers":"", "subpath":"", + "package_set":null, + "package_content":null, "primary_language":null, "description":null, "release_date":null, diff --git a/minecode/tests/testfiles/housekeeping/license_expression_search_expected.json b/minecode/tests/testfiles/housekeeping/license_expression_search_expected.json index 543e4efb..1622b770 100644 --- a/minecode/tests/testfiles/housekeeping/license_expression_search_expected.json +++ b/minecode/tests/testfiles/housekeeping/license_expression_search_expected.json @@ -6,6 +6,8 @@ "version":"", "qualifiers":"", "subpath":"", + "package_set":null, + "package_content":null, "primary_language":null, "description":null, "release_date":null, diff --git a/minecode/tests/testfiles/maven/end2end/expected_mapped_packages.json b/minecode/tests/testfiles/maven/end2end/expected_mapped_packages.json index 0a856e01..5dfd490a 100644 --- a/minecode/tests/testfiles/maven/end2end/expected_mapped_packages.json +++ b/minecode/tests/testfiles/maven/end2end/expected_mapped_packages.json @@ -6,6 +6,7 @@ "version":"1.2.0", "qualifiers":"", "subpath":"", + "package_content":null, "primary_language":null, "description":"APIs that App Engine provides to you to build your application.", "release_date":"2009-05-21", @@ -49,6 +50,7 @@ "version":"1.2.0", "qualifiers":"", "subpath":"", + "package_content":null, "primary_language":null, "description":null, "release_date":"2009-05-21", @@ -92,6 +94,7 @@ "version":"1.0.0", "qualifiers":"", "subpath":"", + "package_content":null, "primary_language":null, "description":null, "release_date":"2009-05-21", @@ -135,6 +138,7 @@ "version":"1.0", "qualifiers":"classifier=sources", "subpath":"", + "package_content":null, "primary_language":null, "description":"Library which allows discovering classes at runtime", "release_date":"2009-05-21", @@ -178,6 +182,7 @@ "version":"1.0", "qualifiers":"", "subpath":"", + "package_content":null, "primary_language":null, "description":"Library which allows discovering classes at runtime", "release_date":"2009-05-21", @@ -221,6 +226,7 @@ "version":"0.8", "qualifiers":"classifier=sources", "subpath":"", + "package_content":null, "primary_language":null, "description":"Google Collections Library is a suite of new collections and collection-related goodness for Java 5.0", "release_date":"2009-05-21", @@ -264,6 +270,7 @@ "version":"0.8", "qualifiers":"", "subpath":"", + "package_content":null, "primary_language":null, "description":"Google Collections Library is a suite of new collections and collection-related goodness for Java 5.0", "release_date":"2009-05-21", @@ -307,6 +314,7 @@ "version":"0.9", "qualifiers":"classifier=sources", "subpath":"", + "package_content":null, "primary_language":null, "description":"Google Collections Library is a suite of new collections and collection-related goodness for Java 5.0", "release_date":"2009-05-21", @@ -350,6 +358,7 @@ "version":"0.9", "qualifiers":"", "subpath":"", + "package_content":null, "primary_language":null, "description":"Google Collections Library is a suite of new collections and collection-related goodness for Java 5.0", "release_date":"2009-05-21", @@ -393,6 +402,7 @@ "version":"1.0-rc1", "qualifiers":"classifier=sources", "subpath":"", + "package_content":null, "primary_language":null, "description":"Google Collections Library is a suite of new collections and collection-related goodness for Java 5.0", "release_date":"2009-05-21", @@ -436,6 +446,7 @@ "version":"1.0-rc1", "qualifiers":"", "subpath":"", + "package_content":null, "primary_language":null, "description":"Google Collections Library is a suite of new collections and collection-related goodness for Java 5.0", "release_date":"2009-05-21", @@ -479,6 +490,7 @@ "version":"2.0.1", "qualifiers":"classifier=sources", "subpath":"", + "package_content":null, "primary_language":null, "description":"Protocol Buffers are a way of encoding structured data in an efficient yet\n extensible format.", "release_date":"2009-05-21", @@ -522,6 +534,7 @@ "version":"2.0.1", "qualifiers":"", "subpath":"", + "package_content":null, "primary_language":null, "description":"Protocol Buffers are a way of encoding structured data in an efficient yet\n extensible format.", "release_date":"2009-05-21", @@ -565,6 +578,7 @@ "version":"2.0.3", "qualifiers":"classifier=sources", "subpath":"", + "package_content":null, "primary_language":null, "description":"Protocol Buffers are a way of encoding structured data in an efficient yet\n extensible format.", "release_date":"2009-05-21", @@ -608,6 +622,7 @@ "version":"2.0.3", "qualifiers":"", "subpath":"", + "package_content":null, "primary_language":null, "description":"Protocol Buffers are a way of encoding structured data in an efficient yet\n extensible format.", "release_date":"2009-05-21", @@ -651,6 +666,7 @@ "version":"2.1.0", "qualifiers":"classifier=sources", "subpath":"", + "package_content":null, "primary_language":null, "description":"Protocol Buffers are a way of encoding structured data in an efficient yet\n extensible format.", "release_date":"2009-05-21", @@ -694,6 +710,7 @@ "version":"2.1.0", "qualifiers":"", "subpath":"", + "package_content":null, "primary_language":null, "description":"Protocol Buffers are a way of encoding structured data in an efficient yet\n extensible format.", "release_date":"2009-05-21", @@ -737,6 +754,7 @@ "version":"1.0", "qualifiers":"classifier=sources", "subpath":"", + "package_content":null, "primary_language":null, "description":"The Social Graph Node Mapper is a community project to build a portable library to map social networking sites' URLs to and from a new canonical form (sgn:// URLs).", "release_date":"2009-05-21", @@ -780,6 +798,7 @@ "version":"1.0", "qualifiers":"", "subpath":"", + "package_content":null, "primary_language":null, "description":"The Social Graph Node Mapper is a community project to build a portable library to map social networking sites' URLs to and from a new canonical form (sgn:// URLs).", "release_date":"2009-05-21", diff --git a/minecode/tests/testfiles/maven/end2end_multisteps/expected_mapped_commons-jaxrs-1.21-from-index.json b/minecode/tests/testfiles/maven/end2end_multisteps/expected_mapped_commons-jaxrs-1.21-from-index.json index f0469bd5..bc9d0ae4 100644 --- a/minecode/tests/testfiles/maven/end2end_multisteps/expected_mapped_commons-jaxrs-1.21-from-index.json +++ b/minecode/tests/testfiles/maven/end2end_multisteps/expected_mapped_commons-jaxrs-1.21-from-index.json @@ -6,6 +6,7 @@ "version":"1.21", "qualifiers":"", "subpath":"", + "package_content":null, "primary_language":null, "description":"Common classes to make creating REST services more consistent.", "release_date":"2009-05-21", diff --git a/minecode/tests/testfiles/maven/end2end_multisteps/expected_mapped_commons-jaxrs-1.21-from-pom.json b/minecode/tests/testfiles/maven/end2end_multisteps/expected_mapped_commons-jaxrs-1.21-from-pom.json index f0469bd5..bc9d0ae4 100644 --- a/minecode/tests/testfiles/maven/end2end_multisteps/expected_mapped_commons-jaxrs-1.21-from-pom.json +++ b/minecode/tests/testfiles/maven/end2end_multisteps/expected_mapped_commons-jaxrs-1.21-from-pom.json @@ -6,6 +6,7 @@ "version":"1.21", "qualifiers":"", "subpath":"", + "package_content":null, "primary_language":null, "description":"Common classes to make creating REST services more consistent.", "release_date":"2009-05-21", diff --git a/minecode/tests/testfiles/rubygems/sprockets-vendor_gems-0.1.3.gem.mapped.json b/minecode/tests/testfiles/rubygems/sprockets-vendor_gems-0.1.3.gem.mapped.json index 117ffd8c..3513944b 100644 --- a/minecode/tests/testfiles/rubygems/sprockets-vendor_gems-0.1.3.gem.mapped.json +++ b/minecode/tests/testfiles/rubygems/sprockets-vendor_gems-0.1.3.gem.mapped.json @@ -6,6 +6,7 @@ "version":"0.1.3", "qualifiers":"", "subpath":"", + "package_content":null, "primary_language":null, "description":"Get the vendored assets paths in gems.", "release_date":"2012-08-03", diff --git a/minecode/tests/testfiles/run_map/test_map_uri_does_update_with_same_mining_level-expected.json b/minecode/tests/testfiles/run_map/test_map_uri_does_update_with_same_mining_level-expected.json index 6f15eead..09b67467 100644 --- a/minecode/tests/testfiles/run_map/test_map_uri_does_update_with_same_mining_level-expected.json +++ b/minecode/tests/testfiles/run_map/test_map_uri_does_update_with_same_mining_level-expected.json @@ -5,6 +5,8 @@ "version":"0.2", "qualifiers":"", "subpath":"", + "package_set":null, + "package_content":null, "primary_language":null, "description":"Description Updated", "release_date":null, diff --git a/minecode/tests/testfiles/run_map/test_map_uri_replace_with_new_with_higher_new_mining_level-expected.json b/minecode/tests/testfiles/run_map/test_map_uri_replace_with_new_with_higher_new_mining_level-expected.json index 12e61f2b..79491005 100644 --- a/minecode/tests/testfiles/run_map/test_map_uri_replace_with_new_with_higher_new_mining_level-expected.json +++ b/minecode/tests/testfiles/run_map/test_map_uri_replace_with_new_with_higher_new_mining_level-expected.json @@ -5,6 +5,8 @@ "version":"0.2", "qualifiers":"", "subpath":"", + "package_set":null, + "package_content":null, "primary_language":null, "description":"Description Updated", "release_date":null, diff --git a/minecode/tests/testfiles/run_map/test_map_uri_update_only_empties_with_lesser_new_mining_level-expected.json b/minecode/tests/testfiles/run_map/test_map_uri_update_only_empties_with_lesser_new_mining_level-expected.json index 6de08a6a..08ac0005 100644 --- a/minecode/tests/testfiles/run_map/test_map_uri_update_only_empties_with_lesser_new_mining_level-expected.json +++ b/minecode/tests/testfiles/run_map/test_map_uri_update_only_empties_with_lesser_new_mining_level-expected.json @@ -5,6 +5,8 @@ "version":"0.1", "qualifiers":"", "subpath":"", + "package_set":null, + "package_content":null, "primary_language":null, "description":"Description Updated", "release_date":null, diff --git a/minecode/tests/testfiles/run_map/test_merge_packages_no_replace-expected.json b/minecode/tests/testfiles/run_map/test_merge_packages_no_replace-expected.json index 84e51a44..609f1604 100644 --- a/minecode/tests/testfiles/run_map/test_merge_packages_no_replace-expected.json +++ b/minecode/tests/testfiles/run_map/test_merge_packages_no_replace-expected.json @@ -5,6 +5,8 @@ "version":"0.1", "qualifiers":"", "subpath":"", + "package_set":null, + "package_content":null, "primary_language":null, "description":"Description Updated", "release_date":null, diff --git a/minecode/tests/testfiles/run_map/test_merge_packages_with_replace-expected.json b/minecode/tests/testfiles/run_map/test_merge_packages_with_replace-expected.json index 7c2e1a35..cccaadc0 100644 --- a/minecode/tests/testfiles/run_map/test_merge_packages_with_replace-expected.json +++ b/minecode/tests/testfiles/run_map/test_merge_packages_with_replace-expected.json @@ -5,6 +5,8 @@ "version":"0.2", "qualifiers":"", "subpath":"", + "package_set":null, + "package_content":null, "primary_language":null, "description":"Description Updated", "release_date":null, diff --git a/minecode/utils_test.py b/minecode/utils_test.py index ac20f831..b22ee5d6 100644 --- a/minecode/utils_test.py +++ b/minecode/utils_test.py @@ -196,19 +196,20 @@ def response_403(url, request): class JsonBasedTesting(FileBasedTesting): - def _normalize_package_uids(self, data): + def _normalize_results(self, data, fields_to_remove=[]): """ Returns the `data`, where any `package_uid` value has been normalized - with `purl_with_fake_uuid()` + with `purl_with_fake_uuid()` and fields from `fields_to_remove` have + been removed from `data`. """ if type(data) == list: - return [self._normalize_package_uids(entry) for entry in data] + return [self._normalize_results(entry, fields_to_remove) for entry in data] if type(data) in (dict, OrderedDict, ReturnDict): normalized_data = {} for key, value in data.items(): if type(value) in [list, dict, OrderedDict, ReturnDict]: - value = self._normalize_package_uids(value) + value = self._normalize_results(value, fields_to_remove) if ( key in ("package_uid", "dependency_uid", "for_package_uid") and value @@ -216,12 +217,26 @@ def _normalize_package_uids(self, data): value = purl_with_fake_uuid(value) if key == "for_packages": value = [purl_with_fake_uuid(package_uid) for package_uid in value] + if key in fields_to_remove: + continue normalized_data[key] = value return normalized_data return data - def check_expected_results(self, results, expected_loc, regen=False): + def _remove_fields_from_results(self, data, fields_to_remove): + if type(data) == list: + return [self._remove_fields_from_results(entry, fields_to_remove) for entry in data] + + if type(data) in (dict, OrderedDict, ReturnDict): + normalized_data = {} + # Remove fields from results and normalize Package UIDs + for field in fields_to_remove: + if not field in data: + continue + data.pop(field) + + def check_expected_results(self, results, expected_loc, fields_to_remove=[], regen=False): """ Check `results` are equal to expected data stored in a JSON file at `expected_loc`. @@ -232,7 +247,7 @@ def check_expected_results(self, results, expected_loc, regen=False): if isinstance(results, str): results = json.loads(results) - results = self._normalize_package_uids(results) + results = self._normalize_results(results, fields_to_remove) if regen: with codecs.open(expected_loc, mode='wb', encoding='utf-8') as expect: diff --git a/minecode/visitors/generic.py b/minecode/visitors/generic.py index 7a64938f..628b3ba8 100644 --- a/minecode/visitors/generic.py +++ b/minecode/visitors/generic.py @@ -71,7 +71,7 @@ def process_request(purl_str): download_url = package_url.qualifiers.get('download_url') if not download_url: error = f'package_url {purl_str} does not contain a download_url qualifier' - return + return error error_msg = map_generic_package(package_url) diff --git a/packagedb/tests/test_api.py b/packagedb/tests/test_api.py index e22da63e..7fecd9ba 100644 --- a/packagedb/tests/test_api.py +++ b/packagedb/tests/test_api.py @@ -580,19 +580,22 @@ def test_package_api_get_or_fetch_package(self): self.assertEqual(2, len(response.data)) result = response.data[0] - # pop fields + + # remove fields result.pop('url') - result.pop('uuid') - result.pop('resources') - result.pop('package_set') + fields_to_remove = [ + 'uuid', + 'resources', + 'package_set', + ] - self.check_expected_results(result, expected, regen=False) + self.check_expected_results(result, expected, fields_to_remove=fields_to_remove, regen=False) def test_package_api_get_enhanced_package(self): response = self.client.get(reverse('api:package-get-enhanced-package-data', args=[self.package3.uuid])) result = response.data expected = self.get_test_loc('api/enhanced_package.json') - self.check_expected_results(result, expected, regen=False) + self.check_expected_results(result, expected, fields_to_remove=['package_set'], regen=False) class ResourceApiTestCase(TestCase): diff --git a/packagedb/tests/testfiles/api/enhanced_package.json b/packagedb/tests/testfiles/api/enhanced_package.json index e4bdfe6d..65c3f279 100644 --- a/packagedb/tests/testfiles/api/enhanced_package.json +++ b/packagedb/tests/testfiles/api/enhanced_package.json @@ -5,6 +5,7 @@ "version":"1.0.0", "qualifiers":"", "subpath":"", + "package_content":"binary", "primary_language":null, "description":null, "release_date":null,