Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions scanpipe/pipes/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -741,15 +741,16 @@ def to_spdx(project, version=spdx.SPDX_SPEC_VERSION_2_3, include_files=False):
packages_as_spdx.extend(project_inputs_as_spdx_packages)

# Use the Project's input as the root element that the SPDX document describes.
# This ensures "documentDescribes" points only to the main subject of the SBOM,
# not to every dependency or file in the project.
# This ensures the "describes" field (which generates DESCRIBES relationships
# in SPDX 2.3+ or documentDescribes in SPDX 2.2) points only to the main subject
# of the SBOM, not to every dependency or file in the project.
# See https://github.com/spdx/spdx-spec/issues/395 and
# https://github.com/aboutcode-org/scancode.io/issues/564#issuecomment-3269296563
# for detailed context.
if len(project_inputs_as_spdx_packages) == 1:
describe_spdx_id = project_inputs_as_spdx_packages[0].spdx_id

# Fallback to the Project as the SPDX root element for the "documentDescribes",
# Fallback to the Project as the SPDX root element for the "describes" field,
# if more than one input, or if no inputs, are available.
else:
project_as_root_package = spdx.Package(
Expand Down
61 changes: 53 additions & 8 deletions scanpipe/pipes/spdx.py
Original file line number Diff line number Diff line change
Expand Up @@ -576,14 +576,16 @@ class Document:

name: str
namespace: str
# "documentDescribes" identifies the root element(s) that this SPDX document
# describes.
# "describes" identifies the root element(s) that this SPDX document describes.
# In most SBOM cases, this will be a single SPDX ID representing the top-level
# package or project (e.g., the root manifest in a repository or the main
# distribution artifact).
# Although defined as an array, it should NOT list every package, file, or snippet.
# Multiple entries are only expected in special, non-SBOM cases
# (e.g., SPDX license lists).
# Note: In SPDX 2.3+, the "documentDescribes" field is deprecated. This field
# is used internally to generate DESCRIBES relationships from SPDXRef-DOCUMENT
# to the described elements. For SPDX 2.2, it is still output as documentDescribes.
# See https://github.com/spdx/spdx-spec/issues/395 for discussion and clarification.
describes: list
creation_info: CreationInfo
Expand All @@ -606,11 +608,15 @@ def as_dict(self):
"SPDXID": self.spdx_id,
"name": self.safe_document_name(self.name),
"documentNamespace": self.namespace,
"documentDescribes": self.describes,
"creationInfo": self.creation_info.as_dict(),
"packages": [package.as_dict(self.version) for package in self.packages],
}

# documentDescribes is deprecated in SPDX 2.3, use relationships instead
# For SPDX 2.2, keep documentDescribes for backward compatibility
if self.version == SPDX_SPEC_VERSION_2_2:
data["documentDescribes"] = self.describes

if self.files:
data["files"] = [file.as_dict() for file in self.files]

Expand All @@ -619,9 +625,23 @@ def as_dict(self):
license_info.as_dict() for license_info in self.extracted_licenses
]

if self.relationships:
# Build relationships list, including DESCRIBES relationships for SPDX 2.3
relationships = list(self.relationships)

# For SPDX 2.3, add DESCRIBES relationships from document to described elements
# This replaces the deprecated documentDescribes field
if self.version == SPDX_SPEC_VERSION_2_3 and self.describes:
for described_id in self.describes:
describes_relationship = Relationship(
spdx_id=self.spdx_id,
related_spdx_id=described_id,
relationship="DESCRIBES",
)
relationships.append(describes_relationship)

if relationships:
data["relationships"] = [
relationship.as_dict() for relationship in self.relationships
relationship.as_dict() for relationship in relationships
]

if self.comment:
Expand All @@ -635,13 +655,38 @@ def as_json(self, indent=2):

@classmethod
def from_data(cls, data):
# Extract describes from documentDescribes field (SPDX 2.2) or from DESCRIBES relationships (SPDX 2.3+)
document_spdx_id = data.get("SPDXID", "SPDXRef-DOCUMENT")
relationships_data = data.get("relationships", [])

describes = data.get("documentDescribes")

# If documentDescribes is not present, try to extract from DESCRIBES relationships
if not describes:
describes = [
rel.get("relatedSpdxElement")
for rel in relationships_data
if rel.get("spdxElementId") == document_spdx_id
and rel.get("relationshipType") == "DESCRIBES"
]

# Filter out DESCRIBES relationships from the relationships list to avoid duplication
# when converting back to dict (they will be regenerated for SPDX 2.3)
filtered_relationships = [
rel for rel in relationships_data
if not (
rel.get("spdxElementId") == document_spdx_id
and rel.get("relationshipType") == "DESCRIBES"
)
]

return cls(
spdx_id=data.get("SPDXID"),
spdx_id=document_spdx_id,
version=data.get("spdxVersion", "").split("SPDX-")[-1],
data_license=data.get("dataLicense"),
name=data.get("name"),
namespace=data.get("documentNamespace"),
describes=data.get("documentDescribes"),
describes=describes or [],
creation_info=CreationInfo.from_data(data.get("creationInfo", {})),
packages=[
Package.from_data(package_data)
Expand All @@ -654,7 +699,7 @@ def from_data(cls, data):
],
relationships=[
Relationship.from_data(relationship_data)
for relationship_data in data.get("relationships", [])
for relationship_data in filtered_relationships
],
comment=data.get("comment"),
)
Expand Down
34 changes: 33 additions & 1 deletion scanpipe/tests/pipes/test_spdx.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,6 @@ def setUp(self):
"SPDXID": "SPDXRef-DOCUMENT",
"name": "document_name",
"documentNamespace": "https://[CreatorWebsite]/[DocumentName]-[UUID]",
"documentDescribes": ["SPDXRef-project"],
"creationInfo": {
"created": "2022-09-21T13:50:20Z",
"creators": [
Expand Down Expand Up @@ -276,6 +275,11 @@ def setUp(self):
"spdxElementId": "SPDXRef-package1",
"relatedSpdxElement": "SPDXRef-file1",
"relationshipType": "CONTAINS",
},
{
"spdxElementId": "SPDXRef-DOCUMENT",
"relatedSpdxElement": "SPDXRef-project",
"relationshipType": "DESCRIBES",
}
],
"comment": "This document was created using SPDXCode-1.0",
Expand Down Expand Up @@ -412,3 +416,31 @@ def test_spdx_validate_document(self):

with self.assertRaises(Exception):
spdx.validate_document({}, self.schema_2_3)

def test_spdx_document_2_3_uses_relationships_not_documentdescribes(self):
"""Test that SPDX 2.3 uses DESCRIBES relationships instead of documentDescribes."""
document = spdx.Document(**self.document_data)
document_dict = document.as_dict()

# SPDX 2.3 should NOT have documentDescribes
assert "documentDescribes" not in document_dict

# SPDX 2.3 should have DESCRIBES relationships
relationships = document_dict.get("relationships", [])
describes_relationships = [
rel for rel in relationships
if rel.get("relationshipType") == "DESCRIBES"
]
assert len(describes_relationships) > 0
assert describes_relationships[0]["spdxElementId"] == "SPDXRef-DOCUMENT"
assert describes_relationships[0]["relatedSpdxElement"] == "SPDXRef-project"

def test_spdx_document_2_2_still_uses_documentdescribes(self):
"""Test that SPDX 2.2 still includes documentDescribes field."""
document_data_2_2 = self.document_data.copy()
document = spdx.Document(**document_data_2_2, version=spdx.SPDX_SPEC_VERSION_2_2)
document_dict = document.as_dict()

# SPDX 2.2 should still have documentDescribes
assert "documentDescribes" in document_dict
assert document_dict["documentDescribes"] == ["SPDXRef-project"]