From 7d2fe9d95e05fa61837a18900dc4246318ec08e6 Mon Sep 17 00:00:00 2001 From: Christopher Angelo Phillips <32073428+spiffcs@users.noreply.github.com> Date: Wed, 5 Oct 2022 14:01:40 -0400 Subject: [PATCH] feat: add identifiable field to source object (#1243) --- go.mod | 2 +- syft/source/source.go | 73 ++++++++++++++++++++++++++++++++++++++ syft/source/source_test.go | 67 ++++++++++++++++++++++++++++++++++ 3 files changed, 141 insertions(+), 1 deletion(-) diff --git a/go.mod b/go.mod index 0b87b0ea819..f0774e29290 100644 --- a/go.mod +++ b/go.mod @@ -60,6 +60,7 @@ require ( github.com/google/go-containerregistry v0.11.0 github.com/in-toto/in-toto-golang v0.3.4-0.20220709202702-fa494aaa0add github.com/knqyf263/go-rpmdb v0.0.0-20220629110411-9a3bd2ebb923 + github.com/opencontainers/go-digest v1.0.0 github.com/sassoftware/go-rpmutils v0.2.0 github.com/sigstore/cosign v1.12.1 github.com/sigstore/rekor v0.12.1-0.20220915152154-4bb6f441c1b2 @@ -218,7 +219,6 @@ require ( github.com/mozillazg/docker-credential-acr-helper v0.3.0 // indirect github.com/nwaples/rardecode v1.1.0 // indirect github.com/oklog/ulid v1.3.1 // indirect - github.com/opencontainers/go-digest v1.0.0 // indirect github.com/opencontainers/image-spec v1.0.3-0.20220114050600-8b9d41f48198 // indirect github.com/opentracing/opentracing-go v1.2.0 // indirect github.com/pelletier/go-toml/v2 v2.0.5 // indirect diff --git a/syft/source/source.go b/syft/source/source.go index c2ea7326f83..cab3e65405d 100644 --- a/syft/source/source.go +++ b/syft/source/source.go @@ -15,16 +15,19 @@ import ( "github.com/bmatcuk/doublestar/v4" "github.com/mholt/archiver/v3" + digest "github.com/opencontainers/go-digest" "github.com/spf13/afero" "github.com/anchore/stereoscope" "github.com/anchore/stereoscope/pkg/image" "github.com/anchore/syft/internal/log" + "github.com/anchore/syft/syft/artifact" ) // Source is an object that captures the data source to be cataloged, configuration, and a specific resolver used // in cataloging (based on the data source and configuration) type Source struct { + id artifact.ID Image *image.Image // the image object to be cataloged (image only) Metadata Metadata directoryResolver *directoryResolver @@ -304,6 +307,76 @@ func NewFromImage(img *image.Image, userImageStr string) (Source, error) { }, nil } +func (s Source) ID() artifact.ID { + if s.id == "" { + s.SetID() + } + return s.id +} + +func (s *Source) SetID() { + var d string + switch s.Metadata.Scheme { + case DirectoryScheme: + d = digest.FromString(s.Metadata.Path).String() + case FileScheme: + // attempt to use the digest of the contents of the file as the ID + file, err := os.Open(s.Metadata.Path) + if err != nil { + d = digest.FromString(s.Metadata.Path).String() + break + } + di, err := digest.FromReader(file) + if err != nil { + d = digest.FromString(s.Metadata.Path).String() + break + } + d = di.String() + case ImageScheme: + manifestDigest := digest.FromBytes(s.Image.Metadata.RawManifest).String() + if manifestDigest != "" { + d = manifestDigest + break + } + + // calcuate chain ID for image sources where manifestDigest is not available + // https://github.com/opencontainers/image-spec/blob/main/config.md#layer-chainid + d = calculateChainID(s.Image) + if d == "" { + // TODO what happens here if image has no layers? + // Is this case possible + d = digest.FromString(s.Metadata.ImageMetadata.UserInput).String() + } + default: // for UnknownScheme we hash the struct + id, _ := artifact.IDByHash(s) + d = string(id) + } + + s.id = artifact.ID(strings.TrimPrefix(d, "sha256:")) +} + +func calculateChainID(img *image.Image) string { + if len(img.Layers) < 1 { + return "" + } + + // DiffID(L0) = digest of layer 0 + // https://github.com/anchore/stereoscope/blob/1b1b744a919964f38d14e1416fb3f25221b761ce/pkg/image/layer_metadata.go#L19-L32 + chainID := img.Layers[0].Metadata.Digest + id := chain(chainID, img.Layers[1:]) + + return id +} + +func chain(chainID string, layers []*image.Layer) string { + if len(layers) < 1 { + return chainID + } + + chainID = digest.FromString(layers[0].Metadata.Digest + " " + chainID).String() + return chain(chainID, layers[1:]) +} + func (s *Source) FileResolver(scope Scope) (FileResolver, error) { switch s.Metadata.Scheme { case DirectoryScheme, FileScheme: diff --git a/syft/source/source_test.go b/syft/source/source_test.go index e16e608da0c..48c68fb847d 100644 --- a/syft/source/source_test.go +++ b/syft/source/source_test.go @@ -18,6 +18,7 @@ import ( "github.com/anchore/stereoscope/pkg/image" "github.com/anchore/stereoscope/pkg/imagetest" + "github.com/anchore/syft/syft/artifact" ) func TestParseInput(t *testing.T) { @@ -65,6 +66,72 @@ func TestNewFromImageFails(t *testing.T) { }) } +func TestSetID(t *testing.T) { + layer := image.NewLayer(nil) + layer.Metadata = image.LayerMetadata{ + Digest: "sha256:6f4fb385d4e698647bf2a450749dfbb7bc2831ec9a730ef4046c78c08d468e89", + } + img := image.Image{ + Layers: []*image.Layer{layer}, + } + + tests := []struct { + name string + input *Source + expected artifact.ID + }{ + { + name: "source.SetID sets the ID for FileScheme", + input: &Source{ + Metadata: Metadata{ + Scheme: FileScheme, + Path: "test-fixtures/image-simple/file-1.txt", + }, + }, + expected: artifact.ID("55096713247489add592ce977637be868497132b36d1e294a3831925ec64319a"), + }, + { + name: "source.SetID sets the ID for ImageScheme", + input: &Source{ + Image: &img, + Metadata: Metadata{ + Scheme: ImageScheme, + }, + }, + expected: artifact.ID("e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"), + }, + { + name: "source.SetID sets the ID for DirectoryScheme", + input: &Source{ + Image: &img, + Metadata: Metadata{ + Scheme: DirectoryScheme, + Path: "test-fixtures/image-simple", + }, + }, + expected: artifact.ID("91db61e5e0ae097ef764796ce85e442a93f2a03e5313d4c7307e9b413f62e8c4"), + }, + { + name: "source.SetID sets the ID for UnknownScheme", + input: &Source{ + Image: &img, + Metadata: Metadata{ + Scheme: UnknownScheme, + Path: "test-fixtures/image-simple", + }, + }, + expected: artifact.ID("febd2d6148dc327d"), + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + test.input.SetID() + assert.Equal(t, test.expected, test.input.ID()) + }) + } +} + func TestNewFromImage(t *testing.T) { layer := image.NewLayer(nil) img := image.Image{