From 67554fe1f53628be3d2eff280a1643d0d0fa6c7c Mon Sep 17 00:00:00 2001 From: Pratyksh Gupta Date: Sat, 22 Nov 2025 00:46:06 +0530 Subject: [PATCH] Fix : Replace deprecated documentDescribes with DESCRIBES relationships in SPDX 2.3+ Signed-off-by: Pratyksh Gupta --- scanpipe/pipes/output.py | 7 ++-- scanpipe/pipes/spdx.py | 61 +++++++++++++++++++++++++++---- scanpipe/tests/pipes/test_spdx.py | 34 ++++++++++++++++- 3 files changed, 90 insertions(+), 12 deletions(-) diff --git a/scanpipe/pipes/output.py b/scanpipe/pipes/output.py index 03fc4ca51c..b34a9de7cf 100644 --- a/scanpipe/pipes/output.py +++ b/scanpipe/pipes/output.py @@ -741,15 +741,16 @@ def to_spdx(project, version=spdx.SPDX_SPEC_VERSION_2_3, include_files=False): packages_as_spdx.extend(project_inputs_as_spdx_packages) # Use the Project's input as the root element that the SPDX document describes. - # This ensures "documentDescribes" points only to the main subject of the SBOM, - # not to every dependency or file in the project. + # This ensures the "describes" field (which generates DESCRIBES relationships + # in SPDX 2.3+ or documentDescribes in SPDX 2.2) points only to the main subject + # of the SBOM, not to every dependency or file in the project. # See https://github.com/spdx/spdx-spec/issues/395 and # https://github.com/aboutcode-org/scancode.io/issues/564#issuecomment-3269296563 # for detailed context. if len(project_inputs_as_spdx_packages) == 1: describe_spdx_id = project_inputs_as_spdx_packages[0].spdx_id - # Fallback to the Project as the SPDX root element for the "documentDescribes", + # Fallback to the Project as the SPDX root element for the "describes" field, # if more than one input, or if no inputs, are available. else: project_as_root_package = spdx.Package( diff --git a/scanpipe/pipes/spdx.py b/scanpipe/pipes/spdx.py index 842541712b..7398650eda 100644 --- a/scanpipe/pipes/spdx.py +++ b/scanpipe/pipes/spdx.py @@ -576,14 +576,16 @@ class Document: name: str namespace: str - # "documentDescribes" identifies the root element(s) that this SPDX document - # describes. + # "describes" identifies the root element(s) that this SPDX document describes. # In most SBOM cases, this will be a single SPDX ID representing the top-level # package or project (e.g., the root manifest in a repository or the main # distribution artifact). # Although defined as an array, it should NOT list every package, file, or snippet. # Multiple entries are only expected in special, non-SBOM cases # (e.g., SPDX license lists). + # Note: In SPDX 2.3+, the "documentDescribes" field is deprecated. This field + # is used internally to generate DESCRIBES relationships from SPDXRef-DOCUMENT + # to the described elements. For SPDX 2.2, it is still output as documentDescribes. # See https://github.com/spdx/spdx-spec/issues/395 for discussion and clarification. describes: list creation_info: CreationInfo @@ -606,11 +608,15 @@ def as_dict(self): "SPDXID": self.spdx_id, "name": self.safe_document_name(self.name), "documentNamespace": self.namespace, - "documentDescribes": self.describes, "creationInfo": self.creation_info.as_dict(), "packages": [package.as_dict(self.version) for package in self.packages], } + # documentDescribes is deprecated in SPDX 2.3, use relationships instead + # For SPDX 2.2, keep documentDescribes for backward compatibility + if self.version == SPDX_SPEC_VERSION_2_2: + data["documentDescribes"] = self.describes + if self.files: data["files"] = [file.as_dict() for file in self.files] @@ -619,9 +625,23 @@ def as_dict(self): license_info.as_dict() for license_info in self.extracted_licenses ] - if self.relationships: + # Build relationships list, including DESCRIBES relationships for SPDX 2.3 + relationships = list(self.relationships) + + # For SPDX 2.3, add DESCRIBES relationships from document to described elements + # This replaces the deprecated documentDescribes field + if self.version == SPDX_SPEC_VERSION_2_3 and self.describes: + for described_id in self.describes: + describes_relationship = Relationship( + spdx_id=self.spdx_id, + related_spdx_id=described_id, + relationship="DESCRIBES", + ) + relationships.append(describes_relationship) + + if relationships: data["relationships"] = [ - relationship.as_dict() for relationship in self.relationships + relationship.as_dict() for relationship in relationships ] if self.comment: @@ -635,13 +655,38 @@ def as_json(self, indent=2): @classmethod def from_data(cls, data): + # Extract describes from documentDescribes field (SPDX 2.2) or from DESCRIBES relationships (SPDX 2.3+) + document_spdx_id = data.get("SPDXID", "SPDXRef-DOCUMENT") + relationships_data = data.get("relationships", []) + + describes = data.get("documentDescribes") + + # If documentDescribes is not present, try to extract from DESCRIBES relationships + if not describes: + describes = [ + rel.get("relatedSpdxElement") + for rel in relationships_data + if rel.get("spdxElementId") == document_spdx_id + and rel.get("relationshipType") == "DESCRIBES" + ] + + # Filter out DESCRIBES relationships from the relationships list to avoid duplication + # when converting back to dict (they will be regenerated for SPDX 2.3) + filtered_relationships = [ + rel for rel in relationships_data + if not ( + rel.get("spdxElementId") == document_spdx_id + and rel.get("relationshipType") == "DESCRIBES" + ) + ] + return cls( - spdx_id=data.get("SPDXID"), + spdx_id=document_spdx_id, version=data.get("spdxVersion", "").split("SPDX-")[-1], data_license=data.get("dataLicense"), name=data.get("name"), namespace=data.get("documentNamespace"), - describes=data.get("documentDescribes"), + describes=describes or [], creation_info=CreationInfo.from_data(data.get("creationInfo", {})), packages=[ Package.from_data(package_data) @@ -654,7 +699,7 @@ def from_data(cls, data): ], relationships=[ Relationship.from_data(relationship_data) - for relationship_data in data.get("relationships", []) + for relationship_data in filtered_relationships ], comment=data.get("comment"), ) diff --git a/scanpipe/tests/pipes/test_spdx.py b/scanpipe/tests/pipes/test_spdx.py index d9b4384bc1..55920b61d8 100644 --- a/scanpipe/tests/pipes/test_spdx.py +++ b/scanpipe/tests/pipes/test_spdx.py @@ -196,7 +196,6 @@ def setUp(self): "SPDXID": "SPDXRef-DOCUMENT", "name": "document_name", "documentNamespace": "https://[CreatorWebsite]/[DocumentName]-[UUID]", - "documentDescribes": ["SPDXRef-project"], "creationInfo": { "created": "2022-09-21T13:50:20Z", "creators": [ @@ -276,6 +275,11 @@ def setUp(self): "spdxElementId": "SPDXRef-package1", "relatedSpdxElement": "SPDXRef-file1", "relationshipType": "CONTAINS", + }, + { + "spdxElementId": "SPDXRef-DOCUMENT", + "relatedSpdxElement": "SPDXRef-project", + "relationshipType": "DESCRIBES", } ], "comment": "This document was created using SPDXCode-1.0", @@ -412,3 +416,31 @@ def test_spdx_validate_document(self): with self.assertRaises(Exception): spdx.validate_document({}, self.schema_2_3) + + def test_spdx_document_2_3_uses_relationships_not_documentdescribes(self): + """Test that SPDX 2.3 uses DESCRIBES relationships instead of documentDescribes.""" + document = spdx.Document(**self.document_data) + document_dict = document.as_dict() + + # SPDX 2.3 should NOT have documentDescribes + assert "documentDescribes" not in document_dict + + # SPDX 2.3 should have DESCRIBES relationships + relationships = document_dict.get("relationships", []) + describes_relationships = [ + rel for rel in relationships + if rel.get("relationshipType") == "DESCRIBES" + ] + assert len(describes_relationships) > 0 + assert describes_relationships[0]["spdxElementId"] == "SPDXRef-DOCUMENT" + assert describes_relationships[0]["relatedSpdxElement"] == "SPDXRef-project" + + def test_spdx_document_2_2_still_uses_documentdescribes(self): + """Test that SPDX 2.2 still includes documentDescribes field.""" + document_data_2_2 = self.document_data.copy() + document = spdx.Document(**document_data_2_2, version=spdx.SPDX_SPEC_VERSION_2_2) + document_dict = document.as_dict() + + # SPDX 2.2 should still have documentDescribes + assert "documentDescribes" in document_dict + assert document_dict["documentDescribes"] == ["SPDXRef-project"]