diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 9dae02c..4b5298f 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,11 @@ Changelog ========= +v5.1.0 +----- + +*2024-05-10* -- Update scancode-toolkit fingerprint plugin to get file fingerprints for text files. + v5.0.0 ----- diff --git a/setup.cfg b/setup.cfg index 05dcac8..6190978 100644 --- a/setup.cfg +++ b/setup.cfg @@ -75,8 +75,8 @@ docs = [options.entry_points] -scancode_post_scan = - fingerprint = matchcode_toolkit.plugin_fingerprint:Fingerprint +scancode_scan = + fingerprint = matchcode_toolkit.plugin_fingerprint:FingerprintScanner scancodeio_pipelines = fingerprint_codebase = matchcode_toolkit.pipelines.fingerprint_codebase:FingerprintCodebase diff --git a/src/matchcode_toolkit/fingerprinting.py b/src/matchcode_toolkit/fingerprinting.py index b357765..22c4a1e 100644 --- a/src/matchcode_toolkit/fingerprinting.py +++ b/src/matchcode_toolkit/fingerprinting.py @@ -215,7 +215,7 @@ def get_file_fingerprint_hashes(location, ngram_length=8, **kwargs): if not (filetype.is_file(location) and ft.is_text): return {} - with open(location, encoding='utf-8') as f: + with open(location) as f: content = f.read() file_fingerprint = create_file_fingerprint( diff --git a/src/matchcode_toolkit/plugin_fingerprint.py b/src/matchcode_toolkit/plugin_fingerprint.py index d55e4bb..28eb7a6 100644 --- a/src/matchcode_toolkit/plugin_fingerprint.py +++ b/src/matchcode_toolkit/plugin_fingerprint.py @@ -10,20 +10,21 @@ import attr from commoncode.cliutils import PluggableCommandLineOption -from commoncode.cliutils import POST_SCAN_GROUP +from commoncode.cliutils import SCAN_GROUP from matchcode_toolkit.fingerprinting import compute_codebase_directory_fingerprints -from plugincode.post_scan import post_scan_impl -from plugincode.post_scan import PostScanPlugin +from matchcode_toolkit.fingerprinting import get_file_fingerprint_hashes +from plugincode.scan import ScanPlugin +from plugincode.scan import scan_impl -@post_scan_impl -class Fingerprint(PostScanPlugin): +@scan_impl +class FingerprintScanner(ScanPlugin): resource_attributes = dict( directory_content_fingerprint=attr.ib(default=None, repr=False), directory_structure_fingerprint=attr.ib(default=None, repr=False), + halo1=attr.ib(default=None, repr=False), ) sort_order = 6 - options = [ PluggableCommandLineOption( ( @@ -31,14 +32,17 @@ class Fingerprint(PostScanPlugin): ), is_flag=True, default=False, - help='Compute directory fingerprints that are used for matching', - help_group=POST_SCAN_GROUP, + help='Compute directory and resource fingerprints that are used for matching', + help_group=SCAN_GROUP, sort_order=20, ) ] def is_enabled(self, fingerprint, **kwargs): return fingerprint + + def get_scanner(self, **kwargs): + return get_file_fingerprint_hashes def process_codebase(self, codebase, **kwargs): codebase = compute_codebase_directory_fingerprints(codebase) diff --git a/tests/test_plugin_fingerprinting.py b/tests/test_plugin_fingerprinting.py new file mode 100644 index 0000000..1b9959b --- /dev/null +++ b/tests/test_plugin_fingerprinting.py @@ -0,0 +1,52 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# ScanCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/scancode-toolkit for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import os + +from commoncode.testcase import FileBasedTesting +from scancode.cli_test_utils import check_json_scan +from scancode.cli_test_utils import run_scan_click +from scancode_config import REGEN_TEST_FIXTURES + +from matchcode_toolkit.fingerprinting import get_file_fingerprint_hashes + + + +""" +These tests spawn new process as if launched from the command line. +""" + + +class TestPluginFingerprinting(FileBasedTesting): + test_data_dir = os.path.join(os.path.dirname(__file__), 'testfiles') + + def test_plugin_fingerprinting_api_works(self): + test_loc = self.get_test_loc('fingerprinting/inflate.c') + detections = list(get_file_fingerprint_hashes(location=test_loc)) + assert detections + + def test_fingerprinting_plugin_works(self): + test_dir = self.get_test_loc('fingerprinting', copy=True) + result_file = self.get_temp_file('json') + args = [ + '--info', + '--fingerprint', + '--verbose', + '--json', result_file, + test_dir, + ] + run_scan_click(args) + test_loc = self.get_test_loc('fingerprinting-expected.json') + check_json_scan( + test_loc, + result_file, + remove_file_date=True, + check_headers=False, + regen=REGEN_TEST_FIXTURES + ) diff --git a/tests/testfiles/fingerprinting-expected.json b/tests/testfiles/fingerprinting-expected.json new file mode 100644 index 0000000..ac02467 --- /dev/null +++ b/tests/testfiles/fingerprinting-expected.json @@ -0,0 +1,166 @@ +{ + "files": [ + { + "path": "fingerprinting", + "type": "directory", + "name": "fingerprinting", + "base_name": "fingerprinting", + "extension": "", + "size": 0, + "sha1": null, + "md5": null, + "sha256": null, + "mime_type": null, + "file_type": null, + "programming_language": null, + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "directory_content_fingerprint": "00000005dc2f55cc5357ffff9adbdf86110e2616", + "directory_structure_fingerprint": "0000000508fe892c79327359ae042804214319db", + "halo1": null, + "files_count": 5, + "dirs_count": 0, + "size_count": 177286, + "scan_errors": [] + }, + { + "path": "fingerprinting/abbrev-1.0.3-i.json", + "type": "file", + "name": "abbrev-1.0.3-i.json", + "base_name": "abbrev-1.0.3-i", + "extension": ".json", + "size": 4793, + "sha1": "a9f36dd42181f7c75f2faac7fb5cb245d6d76755", + "md5": "2b56d6fabb52b7d647c77d32d5938e21", + "sha256": "353c17fd0cb1f9382fd65884cf7c2cb6e2107985cdb492178bed40345ce28be4", + "mime_type": "application/json", + "file_type": "JSON data", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "directory_content_fingerprint": null, + "directory_structure_fingerprint": null, + "halo1": "000001e9b8a9774175bc45d1a65f69306d5c609d", + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "fingerprinting/inflate-mod.c", + "type": "file", + "name": "inflate-mod.c", + "base_name": "inflate-mod", + "extension": ".c", + "size": 55466, + "sha1": "12d8cd7cb0db81b8578f608ef3619304ac87f0e0", + "md5": "b86e60b3ad49c08b6cfa0ab3b989b8d5", + "sha256": "82f60fddd2fe80234b63f9c4757740fc17fc87ba0a2c9cb74799a02a82b43c7e", + "mime_type": "text/x-c", + "file_type": "C source, ASCII text", + "programming_language": "C", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "directory_content_fingerprint": null, + "directory_structure_fingerprint": null, + "halo1": "000018f4aa3a49e4cd40718d1297be519e6564a4", + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "fingerprinting/inflate-mod2.c", + "type": "file", + "name": "inflate-mod2.c", + "base_name": "inflate-mod2", + "extension": ".c", + "size": 55546, + "sha1": "5d6dbe37ac3859b0a7ae72203a07d2b124c479d4", + "md5": "80926a46eafbf956d9b323a8258ffcd1", + "sha256": "c0c9a0895fd4411289be3014b080b98a78f5ba2b219d1201ed1acf5fd6efed9a", + "mime_type": "text/x-c", + "file_type": "C source, ASCII text", + "programming_language": "C", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "directory_content_fingerprint": null, + "directory_structure_fingerprint": null, + "halo1": "000018fea23b49e4cd40708d1297be719c6564a4", + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "fingerprinting/inflate.c", + "type": "file", + "name": "inflate.c", + "base_name": "inflate", + "extension": ".c", + "size": 55519, + "sha1": "b65399b472e2806e4483ab937b13f74e176fb311", + "md5": "c300e231ec51960c347b95b07345f9d7", + "sha256": "34c998ce0037c0537c04b03b276f680b945f9b2c9d1e01b287605bd6879f7fd2", + "mime_type": "text/x-c", + "file_type": "C source, ASCII text", + "programming_language": "C", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": true, + "is_script": false, + "directory_content_fingerprint": null, + "directory_structure_fingerprint": null, + "halo1": "000018fba23a49e4cd40718d1297be719e6564a4", + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + }, + { + "path": "fingerprinting/test.json", + "type": "file", + "name": "test.json", + "base_name": "test", + "extension": ".json", + "size": 5962, + "sha1": "2d30df4b09c8c9a032a2fda289c9ed62d13c39de", + "md5": "7412ef9025c34a2b3de5bc44c1ae0da4", + "sha256": "e29c2d69963d73f9774705670edbebcbe8b06af4daf5606e4f4c7c84de7261c4", + "mime_type": "application/json", + "file_type": "JSON data", + "programming_language": null, + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_media": false, + "is_source": false, + "is_script": false, + "directory_content_fingerprint": null, + "directory_structure_fingerprint": null, + "halo1": "0000026832a97761e52e4793863f3d20cc70611d", + "files_count": 0, + "dirs_count": 0, + "size_count": 0, + "scan_errors": [] + } + ] +} \ No newline at end of file