diff --git a/matchcode-toolkit/CHANGELOG.rst b/matchcode-toolkit/CHANGELOG.rst index 41cf3e53..ee235eea 100644 --- a/matchcode-toolkit/CHANGELOG.rst +++ b/matchcode-toolkit/CHANGELOG.rst @@ -1,6 +1,16 @@ Changelog ========= +v1.1.1 +------ + +*2023-06-29* -- Do not include empty files when computing directory fingerprints. + +v1.1.0 +------ + +*2023-06-22* -- Rename ``compute_directory_fingerprints`` to ``compute_codebase_directory_fingerprints`` and create a new version of ``compute_directory_fingerprints`` that works on Resource objects instead of codebases. + v1.0.0 ------ diff --git a/matchcode-toolkit/pyproject.toml b/matchcode-toolkit/pyproject.toml index 8c075d6c..b64839f2 100644 --- a/matchcode-toolkit/pyproject.toml +++ b/matchcode-toolkit/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "matchcode-toolkit" -version = "1.0.0" +version = "1.1.1" [build-system] requires = ["setuptools >= 50", "wheel", "setuptools_scm[toml] >= 6"] diff --git a/matchcode-toolkit/setup.cfg b/matchcode-toolkit/setup.cfg index eb9c6162..9d0f6c84 100644 --- a/matchcode-toolkit/setup.cfg +++ b/matchcode-toolkit/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = matchcode-toolkit -version = 1.0.0 +version = 1.1.1 license = Apache-2.0 # description must be on ONE line https://github.com/pypa/setuptools/issues/1390 diff --git a/matchcode-toolkit/src/matchcode_toolkit/fingerprinting.py b/matchcode-toolkit/src/matchcode_toolkit/fingerprinting.py index 9c72ba5e..943e6f97 100644 --- a/matchcode-toolkit/src/matchcode_toolkit/fingerprinting.py +++ b/matchcode-toolkit/src/matchcode_toolkit/fingerprinting.py @@ -69,30 +69,50 @@ def create_structure_fingerprint(directory, children): return _create_directory_fingerprint(features) -def compute_directory_fingerprints(codebase): +def _compute_directory_fingerprints(directory, codebase): """ - Compute fingerprints for a directory from `codebase` + Compute fingerprints for `directory` from `codebase` """ - for resource in codebase.walk(topdown=False): - if resource.is_file or not resource.path: - continue - children = [r for r in resource.walk(codebase) if r.is_file] - if len(children) == 1: - continue + # We do not want to add empty files to our fingerprint + children = [r for r in directory.walk(codebase) if r.is_file and r.size] + if len(children) == 1: + return - directory_content_fingerprint = create_content_fingerprint(children) - if hasattr(resource, 'directory_content_fingerprint'): - resource.directory_content_fingerprint = directory_content_fingerprint - else: - resource.extra_data['directory_content'] = directory_content_fingerprint + directory_content_fingerprint = create_content_fingerprint(children) + if hasattr(directory, 'directory_content_fingerprint'): + directory.directory_content_fingerprint = directory_content_fingerprint + else: + directory.extra_data['directory_content'] = directory_content_fingerprint - directory_structure_fingerprint = create_structure_fingerprint(resource, children) - if hasattr(resource, 'directory_structure_fingerprint'): - resource.directory_structure_fingerprint = directory_structure_fingerprint - else: - resource.extra_data['directory_structure'] = create_structure_fingerprint(resource, children) + directory_structure_fingerprint = create_structure_fingerprint(directory, children) + if hasattr(directory, 'directory_structure_fingerprint'): + directory.directory_structure_fingerprint = directory_structure_fingerprint + else: + directory.extra_data['directory_structure'] = directory_structure_fingerprint + + directory.save(codebase) + return directory + + +def compute_directory_fingerprints(directory, codebase): + """ + Recursivly compute fingerprints for `directory` from `codebase` + """ + for resource in directory.walk(codebase, topdown=False): + if resource.is_file: + continue + _ = _compute_directory_fingerprints(resource, codebase) + return directory - resource.save(codebase) + +def compute_codebase_directory_fingerprints(codebase): + """ + Compute fingerprints for directories from `codebase` + """ + for resource in codebase.walk(topdown=False): + if resource.is_file or not resource.path: + continue + _ = _compute_directory_fingerprints(resource, codebase) return codebase diff --git a/matchcode-toolkit/src/matchcode_toolkit/pipelines/scan_and_fingerprint_package.py b/matchcode-toolkit/src/matchcode_toolkit/pipelines/scan_and_fingerprint_package.py index 842e5a95..7bc5e13b 100644 --- a/matchcode-toolkit/src/matchcode_toolkit/pipelines/scan_and_fingerprint_package.py +++ b/matchcode-toolkit/src/matchcode_toolkit/pipelines/scan_and_fingerprint_package.py @@ -20,7 +20,7 @@ # ScanCode.io is a free software code scanning tool from nexB Inc. and others. # Visit https://github.com/nexB/scancode.io for support and download. -from matchcode_toolkit.fingerprinting import compute_directory_fingerprints +from matchcode_toolkit.fingerprinting import compute_codebase_directory_fingerprints from scanpipe.pipelines.scan_package import ScanPackage from scanpipe.pipes.codebase import ProjectCodebase @@ -63,4 +63,4 @@ def fingerprint_codebase(self): Compute directory fingerprints for matching purposes """ project_codebase = ProjectCodebase(self.project) - compute_directory_fingerprints(project_codebase) + compute_codebase_directory_fingerprints(project_codebase) diff --git a/matchcode-toolkit/src/matchcode_toolkit/plugin_fingerprint.py b/matchcode-toolkit/src/matchcode_toolkit/plugin_fingerprint.py index fba3e7d1..d55e4bb7 100644 --- a/matchcode-toolkit/src/matchcode_toolkit/plugin_fingerprint.py +++ b/matchcode-toolkit/src/matchcode_toolkit/plugin_fingerprint.py @@ -11,7 +11,7 @@ from commoncode.cliutils import PluggableCommandLineOption from commoncode.cliutils import POST_SCAN_GROUP -from matchcode_toolkit.fingerprinting import compute_directory_fingerprints +from matchcode_toolkit.fingerprinting import compute_codebase_directory_fingerprints from plugincode.post_scan import post_scan_impl from plugincode.post_scan import PostScanPlugin @@ -41,4 +41,4 @@ def is_enabled(self, fingerprint, **kwargs): return fingerprint def process_codebase(self, codebase, **kwargs): - codebase = compute_directory_fingerprints(codebase) + codebase = compute_codebase_directory_fingerprints(codebase) diff --git a/matchcode-toolkit/tests/test_fingerprinting.py b/matchcode-toolkit/tests/test_fingerprinting.py index 761691f5..5b37de8f 100644 --- a/matchcode-toolkit/tests/test_fingerprinting.py +++ b/matchcode-toolkit/tests/test_fingerprinting.py @@ -14,7 +14,7 @@ from matchcode_toolkit.fingerprinting import _create_directory_fingerprint from matchcode_toolkit.fingerprinting import _get_resource_subpath -from matchcode_toolkit.fingerprinting import compute_directory_fingerprints +from matchcode_toolkit.fingerprinting import compute_codebase_directory_fingerprints from matchcode_toolkit.fingerprinting import create_content_fingerprint from matchcode_toolkit.fingerprinting import create_halohash_chunks from matchcode_toolkit.fingerprinting import create_structure_fingerprint @@ -95,10 +95,10 @@ def test_create_halohash_chunks(self): self.assertEqual(chunk3, expected_chunk3) self.assertEqual(chunk4, expected_chunk4) - def test_compute_directory_fingerprints(self): + def test_compute_codebase_directory_fingerprints(self): scan_loc = self.get_test_loc('abbrev-1.0.3-i.json') vc = VirtualCodebase(location=scan_loc) - vc = compute_directory_fingerprints(vc) + vc = compute_codebase_directory_fingerprints(vc) directory_content = vc.root.extra_data['directory_content'] directory_structure = vc.root.extra_data['directory_structure'] expected_directory_content = '0000000346ce04751a3c98f00086f16a91d9790b' diff --git a/matchcode/api.py b/matchcode/api.py index f20dd166..092933e3 100644 --- a/matchcode/api.py +++ b/matchcode/api.py @@ -129,6 +129,28 @@ class MultipleCharFilter(MultipleChoiceFilter): field_class = MultipleCharField +# TODO: Think of a better name for this filter +class MultipleCharInFilter(MultipleCharFilter): + def filter(self, qs, value): + if not value: + # Even though not a noop, no point filtering if empty. + return qs + + if self.is_noop(qs, value): + return qs + + predicate = self.get_filter_predicate(value) + old_field_name = next(iter(predicate)) + new_field_name = f'{old_field_name}__in' + predicate[new_field_name] = predicate[old_field_name] + predicate.pop(old_field_name) + + q = Q(**predicate) + qs = self.get_method(qs)(q) + + return qs.distinct() if self.distinct else qs + + class MultipleSHA1Filter(MultipleCharFilter): """ Overrides `MultipleCharFilter.filter()` to convert the SHA1 diff --git a/matchcode/indexing.py b/matchcode/indexing.py index e4289031..107dd698 100644 --- a/matchcode/indexing.py +++ b/matchcode/indexing.py @@ -13,7 +13,7 @@ from commoncode.resource import VirtualCodebase -from matchcode_toolkit.fingerprinting import compute_directory_fingerprints +from matchcode_toolkit.fingerprinting import compute_codebase_directory_fingerprints from matchcode.models import ApproximateDirectoryContentIndex from matchcode.models import ApproximateDirectoryStructureIndex from matchcode.models import ExactPackageArchiveIndex @@ -150,5 +150,5 @@ def index_package_directories(package): if not vc: return 0, 0 - vc = compute_directory_fingerprints(vc) + vc = compute_codebase_directory_fingerprints(vc) return index_directory_fingerprints(vc, package) diff --git a/matchcode/tests/test_index_packages.py b/matchcode/tests/test_index_packages.py index f6850d33..c1db948c 100644 --- a/matchcode/tests/test_index_packages.py +++ b/matchcode/tests/test_index_packages.py @@ -11,7 +11,7 @@ from commoncode.resource import VirtualCodebase -from matchcode_toolkit.fingerprinting import compute_directory_fingerprints +from matchcode_toolkit.fingerprinting import compute_codebase_directory_fingerprints from matchcode_toolkit.fingerprinting import hexstring_to_binarray from matchcode.indexing import _create_virtual_codebase_from_package_resources from matchcode.indexing import index_directory_fingerprints @@ -155,7 +155,7 @@ def test__create_virtual_codebase_from_package_resources(self): def test_index_directory_fingerprints(self): vc = _create_virtual_codebase_from_package_resources(self.test_package1) - vc = compute_directory_fingerprints(vc) + vc = compute_codebase_directory_fingerprints(vc) # Ensure tables are empty prior to indexing self.assertFalse(ApproximateDirectoryContentIndex.objects.all()) diff --git a/matchcode/tests/test_match.py b/matchcode/tests/test_match.py index 0488edcb..3b12e7d2 100644 --- a/matchcode/tests/test_match.py +++ b/matchcode/tests/test_match.py @@ -13,7 +13,7 @@ from commoncode.resource import VirtualCodebase from packagedb.models import Package -from matchcode_toolkit.fingerprinting import compute_directory_fingerprints +from matchcode_toolkit.fingerprinting import compute_codebase_directory_fingerprints from matchcode.management.commands.index_packages import index_package_directories from matchcode.match import EXACT_PACKAGE_ARCHIVE_MATCH from matchcode.match import APPROXIMATE_DIRECTORY_STRUCTURE_MATCH @@ -37,7 +37,7 @@ def run_do_match_from_scan(scan_file_location, match_type): matched_to=attr.ib(default=attr.Factory(list)) ) ) - vc = compute_directory_fingerprints(vc) + vc = compute_codebase_directory_fingerprints(vc) do_match(vc, match_type) return vc diff --git a/matchcode/tests/test_models.py b/matchcode/tests/test_models.py index ae6b9e06..5e5744f7 100644 --- a/matchcode/tests/test_models.py +++ b/matchcode/tests/test_models.py @@ -13,7 +13,7 @@ from packagedb.models import Package import attr -from matchcode_toolkit.fingerprinting import compute_directory_fingerprints +from matchcode_toolkit.fingerprinting import compute_codebase_directory_fingerprints from matchcode_toolkit.fingerprinting import hexstring_to_binarray from matchcode.management.commands.index_packages import index_package_directories from matchcode.models import ApproximateDirectoryContentIndex @@ -169,7 +169,7 @@ def test_ApproximateDirectoryStructureIndex_match_subdir(self): location=scan_location, resource_attributes=dict(packages=attr.ib(default=attr.Factory(list))) ) - codebase = compute_directory_fingerprints(vc) + codebase = compute_codebase_directory_fingerprints(vc) # populate codebase with match results for resource in codebase.walk(topdown=True): @@ -192,7 +192,7 @@ def test_ApproximateDirectoryContentIndex_match_subdir(self): location=scan_location, resource_attributes=dict(packages=attr.ib(default=attr.Factory(list))) ) - codebase = compute_directory_fingerprints(vc) + codebase = compute_codebase_directory_fingerprints(vc) # populate codebase with match results for resource in codebase.walk(topdown=True): diff --git a/minecode/management/commands/process_scans.py b/minecode/management/commands/process_scans.py index 0764b63a..87235bac 100644 --- a/minecode/management/commands/process_scans.py +++ b/minecode/management/commands/process_scans.py @@ -61,58 +61,69 @@ def process_scan(cls, scannable_uri, get_scan_info_save_loc='', get_scan_data_sa if scannable_uri.scan_status in (ScannableURI.SCAN_SUBMITTED, ScannableURI.SCAN_IN_PROGRESS): scannable_uri.scan_status = get_scan_status(scan_info) elif scannable_uri.scan_status in (ScannableURI.SCAN_COMPLETED,): - logger.info('Indexing scanned files for URI: {}'.format(scannable_uri)) - - package = scannable_uri.package - scan_data = scanning.get_scan_data( - scannable_uri.scan_uuid, - api_url=cls.api_url, - api_auth_headers=cls.api_auth_headers, - get_scan_data_save_loc=get_scan_data_save_loc - ) - scan_index_errors = index_package_files(package, scan_data) + scan_index_errors = [] + try: + logger.info('Indexing scanned files for URI: {}'.format(scannable_uri)) + + package = scannable_uri.package + input_size = scan_info.size + if input_size: + computed_timeout = ((input_size / 1000000) / 2) * 60 + timeout = max(computed_timeout, scanning.REQUEST_TIMEOUT) + else: + timeout = scanning.REQUEST_TIMEOUT + scan_data = scanning.get_scan_data( + scannable_uri.scan_uuid, + api_url=cls.api_url, + api_auth_headers=cls.api_auth_headers, + timeout=timeout, + get_scan_data_save_loc=get_scan_data_save_loc + ) + scan_index_errors.extend(index_package_files(package, scan_data)) - summary = scanning.get_scan_summary( - scannable_uri.scan_uuid, - api_url=cls.api_url, - api_auth_headers=cls.api_auth_headers, - get_scan_data_save_loc=get_scan_data_save_loc - ) + summary = scanning.get_scan_summary( + scannable_uri.scan_uuid, + api_url=cls.api_url, + api_auth_headers=cls.api_auth_headers, + get_scan_data_save_loc=get_scan_data_save_loc + ) + + other_license_expressions = summary.get('other_license_expressions', []) + other_license_expressions = [l['value'] for l in other_license_expressions if l['value']] + other_license_expression = combine_expressions(other_license_expressions) + + copyright = '' + declared_holder = summary.get('declared_holder') + if declared_holder: + copyright = f'Copyright (c) {declared_holder}' + + values_by_updateable_fields = { + 'sha1': scan_info.sha1, + 'sha256': scan_info.sha256, + 'sha512': scan_info.sha512, + 'summary': summary, + 'declared_license_expression': summary.get('declared_license_expression'), + 'other_license_expression': other_license_expression, + 'copyright': copyright, + } + + for field, value in values_by_updateable_fields.items(): + p_val = getattr(package, field) + if not p_val and value: + setattr(package, field, value) + package_updated = True + + if package_updated: + package.save() - other_license_expressions = summary.get('other_license_expressions', []) - other_license_expressions = [l['value'] for l in other_license_expressions] - other_license_expression = combine_expressions(other_license_expressions) - - copyright = '' - declared_holder = summary.get('declared_holder') - if declared_holder: - copyright = f'Copyright (c) {declared_holder}' - - values_by_updateable_fields = { - 'sha1': scan_info.sha1, - 'sha256': scan_info.sha256, - 'sha512': scan_info.sha512, - 'summary': summary, - 'declared_license_expression': summary.get('declared_license_expression'), - 'other_license_expression': other_license_expression, - 'copyright': copyright, - } - - for field, value in values_by_updateable_fields.items(): - p_val = getattr(package, field) - if not p_val and value: - setattr(package, field, value) - package_updated = True - - if package_updated: - package.save() - - # TODO: We should rerun the specific indexers that have failed - if scan_index_errors: - scannable_uri.index_error = '\n'.join(scan_index_errors) - scannable_uri.scan_status = ScannableURI.SCAN_INDEX_FAILED - else: scannable_uri.scan_status = ScannableURI.SCAN_INDEXED + except Exception as e: + error_message = str(e) + '\n' + # TODO: We should rerun the specific indexers that have failed + if scan_index_errors: + error_message += '\n'.join(scan_index_errors) + scannable_uri.index_error + scannable_uri.scan_status = ScannableURI.SCAN_INDEX_FAILED scannable_uri.wip_date = None scannable_uri.save() diff --git a/minecode/management/scanning.py b/minecode/management/scanning.py index f75f7a63..e7a6e4a2 100644 --- a/minecode/management/scanning.py +++ b/minecode/management/scanning.py @@ -26,7 +26,7 @@ SLEEP_WHEN_EMPTY = 1 # in seconds -REQUEST_TIMEOUT = 3 +REQUEST_TIMEOUT = 120 # Only SCANCODEIO_URL can be provided through setting SCANCODEIO_URL = settings.SCANCODEIO_URL @@ -80,6 +80,7 @@ class Scan(object): sha512 = attr.ib(default=None) sha1_git = attr.ib(default=None) filename = attr.ib(default=None) + size = attr.ib(default=None) @classmethod def from_response(cls, url, uuid, runs, input_sources, extra_data={}, **kwargs): @@ -107,6 +108,7 @@ def from_response(cls, url, uuid, runs, input_sources, extra_data={}, **kwargs): sha512 = extra_data.get('sha512') sha1_git = extra_data.get('sha1_git') filename = extra_data.get('filename') + size = extra_data.get('size') return Scan( url=url, uuid=uuid, run_uuid=run_uuid, uri=uri, @@ -114,7 +116,7 @@ def from_response(cls, url, uuid, runs, input_sources, extra_data={}, **kwargs): task_end_date=task_end_date, task_exitcode=task_exitcode, status=status, execution_time=execution_time, md5=md5, sha1=sha1, sha256=sha256, sha512=sha512, - sha1_git=sha1_git, filename=filename + sha1_git=sha1_git, filename=filename, size=size ) @property @@ -243,8 +245,13 @@ def get_scan_url(scan_uuid, api_url=SCANCODEIO_API_URL_PROJECTS, suffix=''): return url -def _call_scan_get_api(scan_uuid, endpoint='', - api_url=SCANCODEIO_API_URL_PROJECTS, api_auth_headers=SCANCODEIO_AUTH_HEADERS): +def _call_scan_get_api( + scan_uuid, + endpoint='', + api_url=SCANCODEIO_API_URL_PROJECTS, + api_auth_headers=SCANCODEIO_AUTH_HEADERS, + timeout=REQUEST_TIMEOUT, +): """ Send a get request to the scan API for `scan_uuid` and return response mapping from a JSON response. Call either the plain scan enpoint or the data @@ -252,7 +259,7 @@ def _call_scan_get_api(scan_uuid, endpoint='', exception on error. """ scan_url = get_scan_url(scan_uuid, api_url=api_url, suffix=endpoint) - response = requests.get(url=scan_url, timeout=REQUEST_TIMEOUT, headers=api_auth_headers) + response = requests.get(url=scan_url, timeout=timeout, headers=api_auth_headers) if not response.ok: response.raise_for_status() return response.json() @@ -262,13 +269,20 @@ def _get_scan_info( scan_uuid, api_url=SCANCODEIO_API_URL_PROJECTS, api_auth_headers=SCANCODEIO_AUTH_HEADERS, + timeout=REQUEST_TIMEOUT, get_scan_info_save_loc='' ): """ Return a mapping of project info for `scan_uuid` fetched from ScanCode.io or None. Raise an exception on error. """ - results = _call_scan_get_api(scan_uuid, endpoint='', api_url=api_url, api_auth_headers=api_auth_headers) + results = _call_scan_get_api( + scan_uuid, + endpoint='', + api_url=api_url, + api_auth_headers=api_auth_headers, + timeout=timeout + ) if get_scan_info_save_loc: with open(get_scan_info_save_loc, 'w') as f: json.dump(results, f) @@ -279,6 +293,7 @@ def get_scan_info( scan_uuid, api_url=SCANCODEIO_API_URL_PROJECTS, api_auth_headers=SCANCODEIO_AUTH_HEADERS, + timeout=REQUEST_TIMEOUT, get_scan_info_save_loc='' ): """ @@ -289,6 +304,7 @@ def get_scan_info( scan_uuid=scan_uuid, api_url=api_url, api_auth_headers=api_auth_headers, + timeout=timeout, get_scan_info_save_loc=get_scan_info_save_loc, ) return Scan.from_response(**results) @@ -298,6 +314,7 @@ def get_scan_data( scan_uuid, api_url=SCANCODEIO_API_URL_PROJECTS, api_auth_headers=SCANCODEIO_AUTH_HEADERS, + timeout=REQUEST_TIMEOUT, get_scan_data_save_loc='' ): """ @@ -305,7 +322,13 @@ def get_scan_data( ScanCode.io or None. Raise an exception on error. """ # FIXME: we should return a temp location instead - results = _call_scan_get_api(scan_uuid, endpoint='results', api_url=api_url, api_auth_headers=api_auth_headers) + results = _call_scan_get_api( + scan_uuid, + endpoint='results', + api_url=api_url, + api_auth_headers=api_auth_headers, + timeout=timeout + ) if get_scan_data_save_loc: with open(get_scan_data_save_loc, 'w') as f: json.dump(results, f) @@ -316,6 +339,7 @@ def get_scan_summary( scan_uuid, api_url=SCANCODEIO_API_URL_PROJECTS, api_auth_headers=SCANCODEIO_AUTH_HEADERS, + timeout=REQUEST_TIMEOUT, get_scan_data_save_loc='' ): """ @@ -323,7 +347,13 @@ def get_scan_summary( ScanCode.io or None. Raise an exception on error. """ # FIXME: we should return a temp location instead - results = _call_scan_get_api(scan_uuid, endpoint='summary', api_url=api_url, api_auth_headers=api_auth_headers) + results = _call_scan_get_api( + scan_uuid, + endpoint='summary', + api_url=api_url, + api_auth_headers=api_auth_headers, + timeout=timeout + ) if get_scan_data_save_loc: with open(get_scan_data_save_loc, 'w') as f: json.dump(results, f) @@ -383,7 +413,7 @@ def process_scans(cls, exit_on_empty=False, max_uris=0, **kwargs): while True: # Wait before processing anything - time.sleep(REQUEST_TIMEOUT) + time.sleep(3) if cls.MUST_STOP: cls.logger.info('Graceful exit of the scan processing loop.') diff --git a/minecode/tests/test_migrations.py b/minecode/tests/test_migrations.py index ac9961a3..2f930856 100644 --- a/minecode/tests/test_migrations.py +++ b/minecode/tests/test_migrations.py @@ -90,16 +90,9 @@ def test_populate_has_error_fields(self): "map_error", "has_visit_error", "visit_error", - ).all() + ).order_by('uri') ) expected = [ - { - 'has_map_error': False, - 'has_visit_error': False, - 'map_error': None, - 'uri': 'http://example.com/4', - 'visit_error': None - }, { 'has_map_error': True, 'has_visit_error': True, @@ -120,7 +113,14 @@ def test_populate_has_error_fields(self): 'map_error': 'error', 'uri': 'http://example.com/3', 'visit_error': None - } + }, + { + 'has_map_error': False, + 'has_visit_error': False, + 'map_error': None, + 'uri': 'http://example.com/4', + 'visit_error': None + }, ] self.assertEquals(results, expected) diff --git a/minecode/tests/test_scanning.py b/minecode/tests/test_scanning.py index 1099f4d4..a5203740 100644 --- a/minecode/tests/test_scanning.py +++ b/minecode/tests/test_scanning.py @@ -150,6 +150,7 @@ def testscanning_get_scan_info(self, mock_get): sha512='4431f237bcdfee5d2b86b1b3f01c8abaa160d5b7007c63e6281845a3f920d89fdb2e4044f97694ddef91e174d9dd30e5016bbad46eec2d68af200a47e9cedd85', sha1_git='ad18d88bdae8449e7c170f8e7db1bfe336dbb4e0', filename='wagon-api-20040705.181715.jar', + size=47069, ) expected = attr.asdict(expected) result = attr.asdict(result) diff --git a/packagedb/api.py b/packagedb/api.py index 626517c5..48a0c278 100644 --- a/packagedb/api.py +++ b/packagedb/api.py @@ -14,7 +14,6 @@ from django_filters.filters import OrderingFilter import django_filters -from packagedcode.models import PackageData from packageurl import PackageURL from packageurl.contrib.django.utils import purl_to_lookups from rest_framework import status @@ -23,6 +22,7 @@ from rest_framework.response import Response from matchcode.api import MultipleCharFilter +from matchcode.api import MultipleCharInFilter # UnusedImport here! # But importing the mappers and visitors module triggers routes registration from minecode import visitors # NOQA @@ -69,16 +69,59 @@ def filter(self, qs, value): class ResourceFilter(FilterSet): package = PackageResourceUUIDFilter(label='Package UUID') purl = PackageResourcePurlFilter(label='Package pURL') - md5 = MultipleCharFilter( + md5 = MultipleCharInFilter( help_text="Exact MD5. Multi-value supported.", ) - sha1 = MultipleCharFilter( + sha1 = MultipleCharInFilter( help_text="Exact SHA1. Multi-value supported.", ) class ResourceViewSet(viewsets.ReadOnlyModelViewSet): - queryset = Resource.objects.prefetch_related('package') + queryset = Resource.objects.select_related('package').defer( + 'package__history', + 'package__md5', + 'package__sha1', + 'package__sha256', + 'package__sha512', + 'package__extra_data', + 'package__filename', + 'package__primary_language', + 'package__description', + 'package__release_date', + 'package__homepage_url', + 'package__download_url', + 'package__size', + 'package__bug_tracking_url', + 'package__code_view_url', + 'package__vcs_url', + 'package__repository_homepage_url', + 'package__repository_download_url', + 'package__api_data_url', + 'package__copyright', + 'package__holder', + 'package__declared_license_expression', + 'package__declared_license_expression_spdx', + 'package__license_detections', + 'package__other_license_expression', + 'package__other_license_expression_spdx', + 'package__other_license_detections', + 'package__extracted_license_statement', + 'package__notice_text', + 'package__datasource_id', + 'package__file_references', + 'package__last_modified_date', + 'package__mining_level', + 'package__keywords', + 'package__root_path', + 'package__source_packages', + 'package__last_indexed_date', + 'package__index_error', + 'package__package_set', + 'package__package_content', + 'package__summary', + 'package__search_vector', + ) serializer_class = ResourceAPISerializer filterset_class = ResourceFilter lookup_field = 'sha1' @@ -140,10 +183,10 @@ class PackageFilter(FilterSet): version = MultipleCharFilter( help_text="Exact version. Multi-value supported.", ) - md5 = MultipleCharFilter( + md5 = MultipleCharInFilter( help_text="Exact MD5. Multi-value supported.", ) - sha1 = MultipleCharFilter( + sha1 = MultipleCharInFilter( help_text="Exact SHA1. Multi-value supported.", ) purl = MultiplePackageURLFilter(label='Package URL') @@ -182,7 +225,7 @@ class Meta: class PackageViewSet(viewsets.ReadOnlyModelViewSet): - queryset = Package.objects.all() + queryset = Package.objects.prefetch_related('dependencies', 'parties') serializer_class = PackageAPISerializer lookup_field = 'uuid' filterset_class = PackageFilter diff --git a/packagedb/models.py b/packagedb/models.py index 16ff482e..e940e543 100644 --- a/packagedb/models.py +++ b/packagedb/models.py @@ -909,7 +909,7 @@ class Meta: unique_together = ( ('package', 'path'), ) - ordering = ('package', 'path') + ordering = ('id',) indexes = [ models.Index(fields=['md5']), models.Index(fields=['sha1']), diff --git a/packagedb/serializers.py b/packagedb/serializers.py index e1eda130..374dd7eb 100644 --- a/packagedb/serializers.py +++ b/packagedb/serializers.py @@ -61,6 +61,7 @@ class Meta: 'urls', 'extra_data', ) + read_only_fields = fields class ResourceMetadataSerializer(HyperlinkedModelSerializer): @@ -186,6 +187,7 @@ class Meta: 'dependencies', 'resources', ) + read_only_fields = fields def get_package_content(self, obj): return obj.get_package_content_display() diff --git a/packagedb/tests/test_api.py b/packagedb/tests/test_api.py index 7fecd9ba..1bb36ee4 100644 --- a/packagedb/tests/test_api.py +++ b/packagedb/tests/test_api.py @@ -7,6 +7,7 @@ # See https://aboutcode.org for more information about nexB OSS projects. # +from unittest.case import expectedFailure from uuid import uuid4 import json import os diff --git a/purldb/urls.py b/purldb/urls.py index da126ed3..590be7f1 100644 --- a/purldb/urls.py +++ b/purldb/urls.py @@ -8,7 +8,7 @@ # from django.conf.urls import include -from django.urls import re_path +from django.urls import path from django.views.generic import RedirectView from rest_framework import routers @@ -23,16 +23,16 @@ api_router = routers.DefaultRouter() -api_router.register(r'packages', PackageViewSet) -api_router.register(r'resources', ResourceViewSet) -api_router.register(r'approximate_directory_content_index', ApproximateDirectoryContentIndexViewSet) -api_router.register(r'approximate_directory_structure_index', ApproximateDirectoryStructureIndexViewSet) -api_router.register(r'exact_file_index', ExactFileIndexViewSet) -api_router.register(r'exact_package_archive_index', ExactPackageArchiveIndexViewSet) -api_router.register(r'cditems', CDitemViewSet, 'cditems') -api_router.register(r'on_demand_queue', PriorityResourceURIViewSet) +api_router.register('packages', PackageViewSet) +api_router.register('resources', ResourceViewSet) +api_router.register('approximate_directory_content_index', ApproximateDirectoryContentIndexViewSet) +api_router.register('approximate_directory_structure_index', ApproximateDirectoryStructureIndexViewSet) +api_router.register('exact_file_index', ExactFileIndexViewSet) +api_router.register('exact_package_archive_index', ExactPackageArchiveIndexViewSet) +api_router.register('cditems', CDitemViewSet, 'cditems') +api_router.register('on_demand_queue', PriorityResourceURIViewSet) urlpatterns = [ - re_path(r'^api/', include((api_router.urls, 'api'))), - re_path("", RedirectView.as_view(url="api/")), + path('api/', include((api_router.urls, 'api'))), + path("", RedirectView.as_view(url="api/")), ] diff --git a/setup.cfg b/setup.cfg index 76e5bc30..681b6fe7 100644 --- a/setup.cfg +++ b/setup.cfg @@ -55,7 +55,7 @@ install_requires = rubymarshal == 1.0.3 scancode-toolkit[full] == 32.0.1 urlpy == 0.5 - matchcode-toolkit == 1.0.0 + matchcode-toolkit == 1.1.1 setup_requires = setuptools_scm[toml] >= 4 python_requires = >=3.8