Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions scanpipe/pipes/d2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,9 +262,7 @@ def find_jvm_packages(project, jvm_lang: jvm.JvmLanguage, logger=None):

Note: we use the same API as the ScanCode scans by design
"""
resources = (
project.codebaseresources.files().no_status().from_codebase().has_no_relation()
)
resources = project.codebaseresources.files().no_status().from_codebase()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What issue or problem this change solves ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is for #1854
This update addresses a scenario where both the development and deployment codebases contain the same .java file, but the deployed version also includes the corresponding .class file.

I think the tool relies on the "extra_data" field such as extra_data={"java_package": "org.apache.flume.node"}, (https://github.com/aboutcode-org/scancode.io/blob/main/scanpipe/tests/pipes/test_d2d.py#L402) to perform a mapping from .class to .java. (I may be wrong, but it seems I need this field to trigger the "path" mapping in the test)

However, if a .java file in the development codebase has already been checksum-matched to its counterpart in the deployed codebase, it won't be indexed again, meaning it won’t receive extra_data and won’t be available for mapping.

Since "extra_data" is generated in find_jvm_packages(), this change ensures that all source files in the from_codebase are indexed, even if they’ve already been matched by checksum.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense to me, @tdruez please have a look as well. Thanks!


from_jvm_resources = resources.filter(extension__in=jvm_lang.source_extensions)

Expand Down
Binary file not shown.
Binary file not shown.
64 changes: 30 additions & 34 deletions scanpipe/tests/pipes/test_d2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,50 +394,46 @@ def test_scanpipe_pipes_d2d_map_java_to_class(self):
self.assertEqual("", to3.status)

def test_scanpipe_pipes_d2d_map_java_to_class_with_java_in_deploy(self):
sha1 = "abcde"
from1 = make_resource_file(
self.project1,
path="from/flume-ng-node-1.9.0-sources.jar-extract/org/apache/flume/node/"
"AbstractConfigurationProvider.java",
extra_data={"java_package": "org.apache.flume.node"},
sha1=sha1,
)
to1 = make_resource_file(
self.project1,
path="to/flume-ng-node-1.9.0.jar-extract/org/apache/flume/node/"
"AbstractConfigurationProvider.java",
sha1=sha1,
)
to2 = make_resource_file(
self.project1,
path="to/flume-ng-node-1.9.0.jar-extract/org/apache/flume/node/"
"AbstractConfigurationProvider.class",
)
input_dir = self.project1.input_path
# "from-Baz.zip" contains Baz.java
# "to-Baz.jar" contains Baz.java and Baz.class
input_resources = [
self.data / "d2d" / "find_java_packages" / "from-Baz.zip",
self.data / "d2d" / "find_java_packages" / "to-Baz.jar",
]

copy_inputs(input_resources, input_dir)
self.from_files, self.to_files = d2d.get_inputs(self.project1)
inputs_with_codebase_path_destination = [
(self.from_files, self.project1.codebase_path / d2d.FROM),
(self.to_files, self.project1.codebase_path / d2d.TO),
]
for input_files, codebase_path in inputs_with_codebase_path_destination:
for input_file_path in input_files:
scancode.extract_archive(input_file_path, codebase_path)

scancode.extract_archives(
self.project1.codebase_path,
recurse=True,
)
pipes.collect_and_create_codebase_resources(self.project1)
buffer = io.StringIO()

# The pipeline will run map_checksum first
d2d.map_checksum(self.project1, "sha1", logger=buffer.write)
expected = "Mapping 1 to/ resources using sha1 against from/ codebase"
self.assertEqual(expected, buffer.getvalue())
self.assertEqual(1, to1.related_from.count())
relation1 = to1.related_from.get()
self.assertEqual("sha1", relation1.map_type)
self.assertEqual(from1, relation1.from_resource)
d2d.map_checksum(
project=self.project1, checksum_field="sha1", logger=buffer.write
)

d2d.find_jvm_packages(
self.project1, jvm_lang=jvm.JavaLanguage, logger=buffer.write
)
expected = "Finding java packages for 1 ('.java',) resources."
self.assertIn(expected, buffer.getvalue())
# Now run map_java_to_class
d2d.map_jvm_to_class(
self.project1, logger=buffer.write, jvm_lang=jvm.JavaLanguage
)
expected = "Mapping 1 .class resources to 1 ('.java',)"
self.assertIn(expected, buffer.getvalue())
self.assertEqual(2, self.project1.codebaserelations.count())
relation2 = self.project1.codebaserelations.get(
to_resource=to2, from_resource=from1
)
self.assertEqual("java_to_class", relation2.map_type)
expected = {"from_source_root": "from/flume-ng-node-1.9.0-sources.jar-extract/"}
self.assertEqual(expected, relation2.extra_data)

def test_scanpipe_pipes_d2d_map_java_to_class_no_java(self):
make_resource_file(self.project1, path="to/Abstract.class")
Expand Down