Skip to content

Commit

Permalink
Attach hasSBOM nodes to artifacts instead of packages (#1883)
Browse files Browse the repository at this point in the history
* Attach hasSBOM nodes to artifacts instead of packages

- If possible (i.e. a digest is available for the subject of
  an SBOM), hasSBOM nodes will be attached to artifacts now,
  not packages.
- Also removed some unneeded parser map accessor funcs, and
  added a slice utility func used in this branch.

Signed-off-by: Narsimham Chelluri (Narsa) <narsa@kusari.dev>

* Fix top level artifacts not being added with the DOCUMENT key

Signed-off-by: Narsimham Chelluri (Narsa) <narsa@kusari.dev>

* Update tests to cover new HasSBOM artifact behavior

Signed-off-by: Narsimham Chelluri (Narsa) <narsa@kusari.dev>

* Fix SPDX file artifact parsing

- In this PR I introduced a bug where files in the SBOM were not
  promoted to top-level Document file artifacts and packages even
  if there was a relationship that indicated they were such. I
  fixed that here.

Signed-off-by: Narsimham Chelluri (Narsa) <narsa@kusari.dev>

* Call s.getTopLevelSPDXIDs() just once and store it

- We pass it into both callers instead of calling it in
  each one.
- Also rename the function since:
  1. It is not just getting package SPIDs anymore.
  2. We want to comply with https://go.dev/wiki/CodeReviewComments#initialisms

Signed-off-by: Narsimham Chelluri (Narsa) <narsa@kusari.dev>

* Simplify collection of top-level components

- We don't actually need maps for this as we only ever
  access the key SPDXRef-DOCUMENT within those maps.
- So make them slices. And we need just one slice for
  top-level artifacts, be they from packages or files.
- This makes it possible to delete the slice concat
  utilty func as well.

Signed-off-by: Narsimham Chelluri (Narsa) <narsa@kusari.dev>

* Make test a bit clearer

Signed-off-by: Narsimham Chelluri (Narsa) <narsa@kusari.dev>

* Log if t-l art count differs from t-l pkg count

Signed-off-by: Narsimham Chelluri (Narsa) <narsa@kusari.dev>

---------

Signed-off-by: Narsimham Chelluri (Narsa) <narsa@kusari.dev>
  • Loading branch information
nchelluri committed May 14, 2024
1 parent 4485169 commit efa328a
Show file tree
Hide file tree
Showing 8 changed files with 479 additions and 81 deletions.
5 changes: 4 additions & 1 deletion internal/testing/testdata/testdata.go
Original file line number Diff line number Diff line change
Expand Up @@ -1099,7 +1099,10 @@ var (

CdxQuarkusHasSBOM = []assembler.HasSBOMIngest{
{
Pkg: cdxTopQuarkusPack,
Artifact: &model.ArtifactInputSpec{
Algorithm: "sha3-512",
Digest: "85240ed8faa3cc4493db96d0223094842e7153890b091ff364040ad3ad89363157fc9d1bd852262124aec83134f0c19aa4fd0fa482031d38a76d74dfd36b7964",
},
HasSBOM: &model.HasSBOMInputSpec{
Uri: "urn:uuid:0697952e-9848-4785-95bf-f81ff9731682",
Algorithm: "sha256",
Expand Down
24 changes: 17 additions & 7 deletions pkg/ingestor/parser/common/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,17 +101,27 @@ func CreateTopLevelIsDeps(topLevel *model.PkgInputSpec, packages map[string][]*m
return isDeps
}

func CreateTopLevelHasSBOM(topLevel *model.PkgInputSpec, sbomDoc *processor.Document, uri string, timeStamp time.Time) assembler.HasSBOMIngest {
sha256sum := sha256.Sum256(sbomDoc.Blob)
hash := hex.EncodeToString(sha256sum[:])
func CreateTopLevelHasSBOMFromPkg(topLevelPkg *model.PkgInputSpec, sbomDoc *processor.Document, uri string, timestamp time.Time) assembler.HasSBOMIngest {
rv := createTopLevelHasSBOM(sbomDoc.Blob, uri, sbomDoc.SourceInformation.Source, timestamp)
rv.Pkg = topLevelPkg
return rv
}

func CreateTopLevelHasSBOMFromArtifact(topLevelArt *model.ArtifactInputSpec, sbomDoc *processor.Document, uri string, timestamp time.Time) assembler.HasSBOMIngest {
rv := createTopLevelHasSBOM(sbomDoc.Blob, uri, sbomDoc.SourceInformation.Source, timestamp)
rv.Artifact = topLevelArt
return rv
}

func createTopLevelHasSBOM(blob []byte, uri string, source string, timestamp time.Time) assembler.HasSBOMIngest {
sha256sum := sha256.Sum256(blob)
return assembler.HasSBOMIngest{
Pkg: topLevel,
HasSBOM: &model.HasSBOMInputSpec{
Uri: uri,
Algorithm: "sha256",
Digest: hash,
DownloadLocation: sbomDoc.SourceInformation.Source,
KnownSince: timeStamp,
Digest: hex.EncodeToString(sha256sum[:]),
DownloadLocation: source,
KnownSince: timestamp,
},
}
}
Expand Down
32 changes: 16 additions & 16 deletions pkg/ingestor/parser/cyclonedx/parser_cyclonedx.go
Original file line number Diff line number Diff line change
Expand Up @@ -234,16 +234,18 @@ func (c *cyclonedxParser) GetIdentifiers(ctx context.Context) (*common.Identifie
func (c *cyclonedxParser) GetPredicates(ctx context.Context) *assembler.IngestPredicates {
logger := logging.FromContext(ctx)
preds := &assembler.IngestPredicates{}
var toplevel []*model.PkgInputSpec
var topLevelArts []*model.ArtifactInputSpec
var topLevelPkgs []*model.PkgInputSpec

if c.cdxBom.Metadata != nil && c.cdxBom.Metadata.Component != nil {
toplevel = c.getPackageElement(c.cdxBom.Metadata.Component.BOMRef)
topLevelArts = c.packageArtifacts[c.cdxBom.Metadata.Component.BOMRef]
topLevelPkgs = c.packagePackages[c.cdxBom.Metadata.Component.BOMRef]
}

// adding top level package edge manually for all depends on package
// TODO: This is not based on the relationship so that can be inaccurate (can capture both direct and in-direct)...Remove this and be done below by the *c.cdxBom.Dependencies?
// see https://github.com/CycloneDX/specification/issues/33
if toplevel != nil {
if len(topLevelArts) > 0 || len(topLevelPkgs) > 0 {
var timestamp time.Time
var err error
if c.cdxBom.Metadata.Timestamp == "" {
Expand All @@ -257,8 +259,15 @@ func (c *cyclonedxParser) GetPredicates(ctx context.Context) *assembler.IngestPr
}
}

preds.IsDependency = append(preds.IsDependency, common.CreateTopLevelIsDeps(toplevel[0], c.packagePackages, nil, "top-level package GUAC heuristic connecting to each file/package")...)
preds.HasSBOM = append(preds.HasSBOM, common.CreateTopLevelHasSBOM(toplevel[0], c.doc, c.cdxBom.SerialNumber, timestamp))
preds.IsDependency = append(preds.IsDependency, common.CreateTopLevelIsDeps(topLevelPkgs[0], c.packagePackages, nil, "top-level package GUAC heuristic connecting to each file/package")...)

if len(topLevelArts) > 0 {
for _, topLevelArt := range topLevelArts {
preds.HasSBOM = append(preds.HasSBOM, common.CreateTopLevelHasSBOMFromArtifact(topLevelArt, c.doc, c.cdxBom.SerialNumber, timestamp))
}
} else {
preds.HasSBOM = append(preds.HasSBOM, common.CreateTopLevelHasSBOMFromPkg(topLevelPkgs[0], c.doc, c.cdxBom.SerialNumber, timestamp))
}
}

for id := range c.packagePackages {
Expand Down Expand Up @@ -287,7 +296,7 @@ func (c *cyclonedxParser) GetPredicates(ctx context.Context) *assembler.IngestPr
if !found {
continue
}
if reflect.DeepEqual(currPkg, toplevel) {
if reflect.DeepEqual(currPkg, topLevelPkgs) {
continue
}
if deps.Dependencies != nil {
Expand Down Expand Up @@ -415,9 +424,7 @@ func (c *cyclonedxParser) getAffectedPackages(ctx context.Context, vulnInput *mo

var foundVexIngest []assembler.VexIngest

foundPkgElements := c.getPackageElement(affectsObj.Ref)

for _, foundPkgElement := range foundPkgElements {
for _, foundPkgElement := range c.packagePackages[affectsObj.Ref] {
foundVexIngest = append(foundVexIngest, assembler.VexIngest{VexData: &vexData, Vulnerability: vulnInput, Pkg: foundPkgElement})
}

Expand Down Expand Up @@ -475,13 +482,6 @@ func (c *cyclonedxParser) getAffectedPackages(ctx context.Context, vulnInput *mo
return &viList, nil
}

func (c *cyclonedxParser) getPackageElement(elementID string) []*model.PkgInputSpec {
if packNode, ok := c.packagePackages[elementID]; ok {
return packNode
}
return nil
}

func guacCDXFilePurl(fileName string, version string, topLevel bool) string {
escapedName := asmhelpers.SanitizeString(fileName)
if topLevel {
Expand Down
2 changes: 1 addition & 1 deletion pkg/ingestor/parser/cyclonedx/parser_cyclonedx_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ func Test_cyclonedxParser(t *testing.T) {
wantPredicates: &testdata.CdxIngestionPredicates,
wantErr: false,
}, {
name: "valid small CycloneDX document with package dependencies",
name: "valid small CycloneDX document with package dependencies and a hash",
doc: &processor.Document{
Blob: testdata.CycloneDXExampleSmallDeps,
Format: processor.FormatJSON,
Expand Down
2 changes: 1 addition & 1 deletion pkg/ingestor/parser/deps_dev/deps_dev.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ func (d *depsDevParser) GetPredicates(ctx context.Context) *assembler.IngestPred
IsDependency: isDepComp.IsDependency,
})
}
preds.HasSBOM = append(preds.HasSBOM, common.CreateTopLevelHasSBOM(d.packComponent.CurrentPackage, d.doc, helpers.PkgInputSpecToPurl(d.packComponent.CurrentPackage), d.packComponent.UpdateTime))
preds.HasSBOM = append(preds.HasSBOM, common.CreateTopLevelHasSBOMFromPkg(d.packComponent.CurrentPackage, d.doc, helpers.PkgInputSpecToPurl(d.packComponent.CurrentPackage), d.packComponent.UpdateTime))

return preds
}
Expand Down
113 changes: 60 additions & 53 deletions pkg/ingestor/parser/spdx/parse_spdx.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ type spdxParser struct {
packageLegals map[string][]*model.CertifyLegalInputSpec
filePackages map[string][]*model.PkgInputSpec
fileArtifacts map[string][]*model.ArtifactInputSpec
topLevelPackages map[string][]*model.PkgInputSpec
topLevelPackages []*model.PkgInputSpec
topLevelArtifacts map[string][]*model.ArtifactInputSpec
identifierStrings *common.IdentifierStrings
spdxDoc *spdx.Document
topLevelIsHeuristic bool
Expand All @@ -56,7 +57,7 @@ func NewSpdxParser() common.DocumentParser {
packageLegals: map[string][]*model.CertifyLegalInputSpec{},
filePackages: map[string][]*model.PkgInputSpec{},
fileArtifacts: map[string][]*model.ArtifactInputSpec{},
topLevelPackages: map[string][]*model.PkgInputSpec{},
topLevelArtifacts: make(map[string][]*model.ArtifactInputSpec),
identifierStrings: &common.IdentifierStrings{},
topLevelIsHeuristic: false,
}
Expand All @@ -77,17 +78,25 @@ func (s *spdxParser) Parse(ctx context.Context, doc *processor.Document) error {
return fmt.Errorf("SPDX document had invalid created time %q : %w", spdxDoc.CreationInfo.Created, err)
}
s.timeScanned = time
if err := s.getPackages(); err != nil {

topLevelSPDXIDs, err := s.getTopLevelSPDXIDs()
if err != nil {
return err
}
if err := s.getFiles(); err != nil {

if err := s.getFiles(topLevelSPDXIDs); err != nil {
return err
}

if err := s.getPackages(topLevelSPDXIDs); err != nil {
return err
}

return nil
}

// creating top level package manually until https://github.com/anchore/syft/issues/1241 is resolved
func (s *spdxParser) getTopLevelPackageSpdxIds() ([]string, error) {
// creating top level IDs manually until https://github.com/anchore/syft/issues/1241 is resolved
func (s *spdxParser) getTopLevelSPDXIDs() ([]string, error) {
// TODO: Add CertifyPkg to make a connection from GUAC purl to OCI purl guessed
// oci purl: pkg:oci/debian@sha256%3A244fd47e07d10?repository_url=ghcr.io/debian&tag=bullseye
var spdxIds []string
Expand All @@ -114,12 +123,7 @@ func (s *spdxParser) getTopLevelPackageSpdxIds() ([]string, error) {
return spdxIds, nil
}

func (s *spdxParser) getPackages() error {
topLevelSpdxIds, err := s.getTopLevelPackageSpdxIds()
if err != nil {
return err
}

func (s *spdxParser) getPackages(topLevelSPDXIDs []string) error {
for _, pac := range s.spdxDoc.Packages {
// for each package create a package for each of them
purl := ""
Expand All @@ -139,18 +143,22 @@ func (s *spdxParser) getPackages() error {
return err
}

if slices.Contains(topLevelSpdxIds, string(pac.PackageSPDXIdentifier)) {
s.topLevelPackages[string(s.spdxDoc.SPDXIdentifier)] = append(s.topLevelPackages[string(s.spdxDoc.SPDXIdentifier)], pkg)
if slices.Contains(topLevelSPDXIDs, string(pac.PackageSPDXIdentifier)) {
s.topLevelPackages = append(s.topLevelPackages, pkg)
}
s.packagePackages[string(pac.PackageSPDXIdentifier)] = append(s.packagePackages[string(pac.PackageSPDXIdentifier)], pkg)

// if checksums exists create an artifact for each of them
for _, checksum := range pac.PackageChecksums {
artifact := &model.ArtifactInputSpec{
art := &model.ArtifactInputSpec{
Algorithm: strings.ToLower(string(checksum.Algorithm)),
Digest: checksum.Value,
}
s.packageArtifacts[string(pac.PackageSPDXIdentifier)] = append(s.packageArtifacts[string(pac.PackageSPDXIdentifier)], artifact)
id := string(pac.PackageSPDXIdentifier)
if slices.Contains(topLevelSPDXIDs, id) {
s.topLevelArtifacts[id] = append(s.topLevelArtifacts[id], art)
}
s.packageArtifacts[id] = append(s.packageArtifacts[id], art)
}

if pac.PackageLicenseDeclared != "" ||
Expand All @@ -173,21 +181,21 @@ func (s *spdxParser) getPackages() error {
}

// If there is no top level Spdx Id that can be derived from the relationships, we take a best guess for the SpdxId.
if _, ok := s.topLevelPackages[string(s.spdxDoc.SPDXIdentifier)]; !ok {
if len(s.topLevelPackages) == 0 {
purl := "pkg:guac/spdx/" + asmhelpers.SanitizeString(s.spdxDoc.DocumentName)
topPackage, err := asmhelpers.PurlToPkg(purl)
if err != nil {
return err
}
s.topLevelPackages[string(s.spdxDoc.SPDXIdentifier)] = append(s.topLevelPackages[string(s.spdxDoc.SPDXIdentifier)], topPackage)
s.topLevelPackages = append(s.topLevelPackages, topPackage)
s.identifierStrings.PurlStrings = append(s.identifierStrings.PurlStrings, purl)
s.topLevelIsHeuristic = true
}

return nil
}

func (s *spdxParser) getFiles() error {
func (s *spdxParser) getFiles(topLevelSPDXIDs []string) error {
for _, file := range s.spdxDoc.Files {
// if checksums exists create an artifact for each of them
for _, checksum := range file.Checksums {
Expand All @@ -200,14 +208,21 @@ func (s *spdxParser) getFiles() error {
if err != nil {
return err
}
if slices.Contains(topLevelSPDXIDs, string(file.FileSPDXIdentifier)) {
s.topLevelPackages = append(s.topLevelPackages, pkg)
}
s.filePackages[string(file.FileSPDXIdentifier)] = append(s.filePackages[string(file.FileSPDXIdentifier)], pkg)

artifact := &model.ArtifactInputSpec{
art := &model.ArtifactInputSpec{
Algorithm: strings.ToLower(string(checksum.Algorithm)),
Digest: checksum.Value,
}

s.fileArtifacts[string(file.FileSPDXIdentifier)] = append(s.fileArtifacts[string(file.FileSPDXIdentifier)], artifact)
id := string(file.FileSPDXIdentifier)
if slices.Contains(topLevelSPDXIDs, id) {
s.topLevelArtifacts[id] = append(s.topLevelArtifacts[id], art)
}
s.fileArtifacts[id] = append(s.fileArtifacts[id], art)
}
}
return nil
Expand All @@ -217,34 +232,12 @@ func parseSpdxBlob(p []byte) (*spdx.Document, error) {
return json.Read(bytes.NewReader(p))
}

func (s *spdxParser) getPackageElement(elementID string) []*model.PkgInputSpec {
if packNode, ok := s.packagePackages[string(elementID)]; ok {
return packNode
}
return nil
}

func (s *spdxParser) getTopLevelPackageElement(elementID string) []*model.PkgInputSpec {
if packNode, ok := s.topLevelPackages[string(elementID)]; ok {
return packNode
}
return nil
}

func (s *spdxParser) getFileElement(elementID string) []*model.PkgInputSpec {
if fileNode, ok := s.filePackages[string(elementID)]; ok {
return fileNode
}
return nil
}

func (s *spdxParser) GetPredicates(ctx context.Context) *assembler.IngestPredicates {
logger := logging.FromContext(ctx)
preds := &assembler.IngestPredicates{}

topLevel := s.getTopLevelPackageElement(string(s.spdxDoc.SPDXIdentifier))
if topLevel == nil {
logger.Errorf("error getting predicates: unable to find top level package element")
if len(s.topLevelArtifacts) == 0 && len(s.topLevelPackages) == 0 {
logger.Errorf("error getting predicates: unable to find top level artifact or package element")
return preds
} else {
// adding top level package edge manually for all depends on package
Expand All @@ -253,13 +246,27 @@ func (s *spdxParser) GetPredicates(ctx context.Context) *assembler.IngestPredica
logger.Errorf("SPDX document had invalid created time %q : %w", s.spdxDoc.CreationInfo.Created, err)
return nil
}
for _, topLevelPkg := range topLevel {
preds.HasSBOM = append(preds.HasSBOM, common.CreateTopLevelHasSBOM(topLevelPkg, s.doc, s.spdxDoc.DocumentNamespace, timestamp))

if len(s.topLevelArtifacts) > 0 {
for _, arts := range s.topLevelArtifacts {
for _, art := range arts {
preds.HasSBOM = append(preds.HasSBOM, common.CreateTopLevelHasSBOMFromArtifact(art, s.doc, s.spdxDoc.DocumentNamespace, timestamp))
}
}

if len(s.topLevelArtifacts) != len(s.topLevelPackages) {
logger.Warnf("Top-level unique artifact count (%d) and top-level package count (%d) are mismatched. SBOM ingestion may not be as expected.",
len(s.topLevelArtifacts), len(s.topLevelPackages))
}
} else {
for _, topLevelPkg := range s.topLevelPackages {
preds.HasSBOM = append(preds.HasSBOM, common.CreateTopLevelHasSBOMFromPkg(topLevelPkg, s.doc, s.spdxDoc.DocumentNamespace, timestamp))
}
}

if s.topLevelIsHeuristic {
preds.IsDependency = append(preds.IsDependency,
common.CreateTopLevelIsDeps(topLevel[0], s.packagePackages, s.filePackages,
common.CreateTopLevelIsDeps(s.topLevelPackages[0], s.packagePackages, s.filePackages,
"top-level package GUAC heuristic connecting to each file/package")...)
}
}
Expand All @@ -283,10 +290,10 @@ func (s *spdxParser) GetPredicates(ctx context.Context) *assembler.IngestPredica
continue
}

foundPackNodes := s.getPackageElement(foundId)
foundFileNodes := s.getFileElement(foundId)
relatedPackNodes := s.getPackageElement(relatedId)
relatedFileNodes := s.getFileElement(relatedId)
foundPackNodes := s.packagePackages[foundId]
foundFileNodes := s.filePackages[foundId]
relatedPackNodes := s.packagePackages[relatedId]
relatedFileNodes := s.filePackages[relatedId]

justification := getJustification(rel)

Expand Down Expand Up @@ -376,7 +383,7 @@ func (s *spdxParser) GetPredicates(ctx context.Context) *assembler.IngestPredica
}

for _, pkg := range s.spdxDoc.Packages {
pkgInputSpecs := s.getPackageElement(string(pkg.PackageSPDXIdentifier))
pkgInputSpecs := s.packagePackages[string(pkg.PackageSPDXIdentifier)]
for _, extRef := range pkg.PackageExternalReferences {
if extRef.Category == spdx_common.CategorySecurity {
locator := extRef.Locator
Expand Down
Loading

0 comments on commit efa328a

Please sign in to comment.