From 2a1bf86bafd01df8fdb255fc65d973d2ca797c97 Mon Sep 17 00:00:00 2001 From: Chin Yeung Li Date: Tue, 28 Oct 2025 15:18:25 +0800 Subject: [PATCH 1/3] Update "find_jvm_packages" to include all resources - Ensure extra_data is populated in "find_jvm_packages()" for all resources for mapping purposes - Adjust test logic to update extra_data after map_checksum, matching d2d pipeline sequence Signed-off-by: Chin Yeung Li --- scanpipe/pipes/d2d.py | 2 +- scanpipe/tests/pipes/test_d2d.py | 9 ++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/scanpipe/pipes/d2d.py b/scanpipe/pipes/d2d.py index 6b4a9ca728..da0bb8462b 100644 --- a/scanpipe/pipes/d2d.py +++ b/scanpipe/pipes/d2d.py @@ -263,7 +263,7 @@ def find_jvm_packages(project, jvm_lang: jvm.JvmLanguage, logger=None): Note: we use the same API as the ScanCode scans by design """ resources = ( - project.codebaseresources.files().no_status().from_codebase().has_no_relation() + project.codebaseresources.files().no_status().from_codebase() ) from_jvm_resources = resources.filter(extension__in=jvm_lang.source_extensions) diff --git a/scanpipe/tests/pipes/test_d2d.py b/scanpipe/tests/pipes/test_d2d.py index 74903b2648..164ed9bb51 100644 --- a/scanpipe/tests/pipes/test_d2d.py +++ b/scanpipe/tests/pipes/test_d2d.py @@ -399,7 +399,6 @@ def test_scanpipe_pipes_d2d_map_java_to_class_with_java_in_deploy(self): self.project1, path="from/flume-ng-node-1.9.0-sources.jar-extract/org/apache/flume/node/" "AbstractConfigurationProvider.java", - extra_data={"java_package": "org.apache.flume.node"}, sha1=sha1, ) to1 = make_resource_file( @@ -425,6 +424,14 @@ def test_scanpipe_pipes_d2d_map_java_to_class_with_java_in_deploy(self): self.assertEqual("sha1", relation1.map_type) self.assertEqual(from1, relation1.from_resource) + # The "java_package" field in extra_data is required for mapping + # Java packages to classes. + # Simulate the "find_jvm_packages" call by updating extra_data in + # the from1 resource + # Note that "find_java_packages" is called before + # "map_java_to_class" + from1.update_extra_data({"java_package": "org.apache.flume.node"}) + # Now run map_java_to_class d2d.map_jvm_to_class( self.project1, logger=buffer.write, jvm_lang=jvm.JavaLanguage From e1c8b5c29bb53502b72d58d26ed598ccda2c6850 Mon Sep 17 00:00:00 2001 From: Chin Yeung Li Date: Tue, 28 Oct 2025 16:47:29 +0800 Subject: [PATCH 2/3] Reformat code with `make valid` #1854 Signed-off-by: Chin Yeung Li --- scanpipe/pipes/d2d.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/scanpipe/pipes/d2d.py b/scanpipe/pipes/d2d.py index da0bb8462b..126834fa12 100644 --- a/scanpipe/pipes/d2d.py +++ b/scanpipe/pipes/d2d.py @@ -262,9 +262,7 @@ def find_jvm_packages(project, jvm_lang: jvm.JvmLanguage, logger=None): Note: we use the same API as the ScanCode scans by design """ - resources = ( - project.codebaseresources.files().no_status().from_codebase() - ) + resources = project.codebaseresources.files().no_status().from_codebase() from_jvm_resources = resources.filter(extension__in=jvm_lang.source_extensions) From 4dc2b3484cf7519eaa72fe40eefcb73bb298e8f8 Mon Sep 17 00:00:00 2001 From: Chin Yeung Li Date: Thu, 30 Oct 2025 15:45:33 +0800 Subject: [PATCH 3/3] Rewrite the test to simulate the D2D steps. #1854 Signed-off-by: Chin Yeung Li --- .../data/d2d/find_java_packages/from-Baz.zip | Bin 0 -> 187 bytes .../data/d2d/find_java_packages/to-Baz.jar | Bin 0 -> 552 bytes scanpipe/tests/pipes/test_d2d.py | 71 ++++++++---------- 3 files changed, 30 insertions(+), 41 deletions(-) create mode 100644 scanpipe/tests/data/d2d/find_java_packages/from-Baz.zip create mode 100644 scanpipe/tests/data/d2d/find_java_packages/to-Baz.jar diff --git a/scanpipe/tests/data/d2d/find_java_packages/from-Baz.zip b/scanpipe/tests/data/d2d/find_java_packages/from-Baz.zip new file mode 100644 index 0000000000000000000000000000000000000000..5ea951a62aa02d6ea0bda03b8e16ee0bdfa39f79 GIT binary patch literal 187 zcmWIWW@h1H0DA}HYi literal 0 HcmV?d00001 diff --git a/scanpipe/tests/data/d2d/find_java_packages/to-Baz.jar b/scanpipe/tests/data/d2d/find_java_packages/to-Baz.jar new file mode 100644 index 0000000000000000000000000000000000000000..70c33f22d7c2fd4100123aa3ad9f3a07b7c98ad8 GIT binary patch literal 552 zcmWIWW@h1HVBlb22(65ZW# zYGkyiO?@_XO4g^Mk420?i`Y4;SWFiF!!eUU6Ao~XtBnWUC$OLHv1yF!DD;r3L1qfdOX$?jY4*-qtYmEQ^ literal 0 HcmV?d00001 diff --git a/scanpipe/tests/pipes/test_d2d.py b/scanpipe/tests/pipes/test_d2d.py index 164ed9bb51..30eb617293 100644 --- a/scanpipe/tests/pipes/test_d2d.py +++ b/scanpipe/tests/pipes/test_d2d.py @@ -394,57 +394,46 @@ def test_scanpipe_pipes_d2d_map_java_to_class(self): self.assertEqual("", to3.status) def test_scanpipe_pipes_d2d_map_java_to_class_with_java_in_deploy(self): - sha1 = "abcde" - from1 = make_resource_file( - self.project1, - path="from/flume-ng-node-1.9.0-sources.jar-extract/org/apache/flume/node/" - "AbstractConfigurationProvider.java", - sha1=sha1, - ) - to1 = make_resource_file( - self.project1, - path="to/flume-ng-node-1.9.0.jar-extract/org/apache/flume/node/" - "AbstractConfigurationProvider.java", - sha1=sha1, - ) - to2 = make_resource_file( - self.project1, - path="to/flume-ng-node-1.9.0.jar-extract/org/apache/flume/node/" - "AbstractConfigurationProvider.class", - ) + input_dir = self.project1.input_path + # "from-Baz.zip" contains Baz.java + # "to-Baz.jar" contains Baz.java and Baz.class + input_resources = [ + self.data / "d2d" / "find_java_packages" / "from-Baz.zip", + self.data / "d2d" / "find_java_packages" / "to-Baz.jar", + ] + copy_inputs(input_resources, input_dir) + self.from_files, self.to_files = d2d.get_inputs(self.project1) + inputs_with_codebase_path_destination = [ + (self.from_files, self.project1.codebase_path / d2d.FROM), + (self.to_files, self.project1.codebase_path / d2d.TO), + ] + for input_files, codebase_path in inputs_with_codebase_path_destination: + for input_file_path in input_files: + scancode.extract_archive(input_file_path, codebase_path) + + scancode.extract_archives( + self.project1.codebase_path, + recurse=True, + ) + pipes.collect_and_create_codebase_resources(self.project1) buffer = io.StringIO() - # The pipeline will run map_checksum first - d2d.map_checksum(self.project1, "sha1", logger=buffer.write) - expected = "Mapping 1 to/ resources using sha1 against from/ codebase" - self.assertEqual(expected, buffer.getvalue()) - self.assertEqual(1, to1.related_from.count()) - relation1 = to1.related_from.get() - self.assertEqual("sha1", relation1.map_type) - self.assertEqual(from1, relation1.from_resource) - - # The "java_package" field in extra_data is required for mapping - # Java packages to classes. - # Simulate the "find_jvm_packages" call by updating extra_data in - # the from1 resource - # Note that "find_java_packages" is called before - # "map_java_to_class" - from1.update_extra_data({"java_package": "org.apache.flume.node"}) + d2d.map_checksum( + project=self.project1, checksum_field="sha1", logger=buffer.write + ) + d2d.find_jvm_packages( + self.project1, jvm_lang=jvm.JavaLanguage, logger=buffer.write + ) + expected = "Finding java packages for 1 ('.java',) resources." + self.assertIn(expected, buffer.getvalue()) # Now run map_java_to_class d2d.map_jvm_to_class( self.project1, logger=buffer.write, jvm_lang=jvm.JavaLanguage ) expected = "Mapping 1 .class resources to 1 ('.java',)" self.assertIn(expected, buffer.getvalue()) - self.assertEqual(2, self.project1.codebaserelations.count()) - relation2 = self.project1.codebaserelations.get( - to_resource=to2, from_resource=from1 - ) - self.assertEqual("java_to_class", relation2.map_type) - expected = {"from_source_root": "from/flume-ng-node-1.9.0-sources.jar-extract/"} - self.assertEqual(expected, relation2.extra_data) def test_scanpipe_pipes_d2d_map_java_to_class_no_java(self): make_resource_file(self.project1, path="to/Abstract.class")