diff --git a/.golangci.yaml b/.golangci.yaml index 927d1d47ab8..8e447bfc366 100644 --- a/.golangci.yaml +++ b/.golangci.yaml @@ -6,7 +6,6 @@ issues: # include: # - EXC0002 # disable excluding of issues about comments from golint - linters: # inverted configuration with `enable-all` and `disable` is not scalable during updates of golangci-lint disable-all: true @@ -14,25 +13,24 @@ linters: - asciicheck - bodyclose - depguard + - dogsled - dupl - errcheck - - errorlint - exportloopref - - forcetypeassert - funlen - gocognit - goconst - gocritic - gocyclo - gofmt - - tparallel - - importas + - goimports + - goprintffuncname - gosec - gosimple - govet - ineffassign - misspell - - nolintlint + - nakedret - revive - staticcheck - stylecheck @@ -41,6 +39,7 @@ linters: - unparam - unused - whitespace + linters-settings: funlen: # Checks the number of lines in a function. @@ -57,7 +56,7 @@ run: timeout: 10m # do not enable... -# - dogsled # found to be to niche and ineffective +# - deadcode # The owner seems to have abandoned the linter. Replaced by "unused". # - goprintffuncname # does not catch all cases and there are exceptions # - nakedret # does not catch all cases and should not fail a build # - gochecknoglobals @@ -73,7 +72,11 @@ run: # - lll # without a way to specify per-line exception cases, this is not usable # - maligned # this is an excellent linter, but tricky to optimize and we are not sensitive to memory layout optimizations # - nestif -# - prealloc # following this rule isn't consistently a good idea, as it sometimes forces unnecessary allocations that result in less idiomatic code -# - scopelint # deprecated +# - nolintlint # as of go1.19 this conflicts with the behavior of gofmt, which is a deal-breaker (lint-fix will still fail when running lint) +# - prealloc # following this rule isn't consistently a good idea, as it sometimes forces unnecessary allocations that result in less idiomatic code +# - rowserrcheck # not in a repo with sql, so this is not useful +# - scopelint # deprecated +# - structcheck # The owner seems to have abandoned the linter. Replaced by "unused". # - testpackage -# - wsl # this doens't have an auto-fixer yet and is pretty noisy (https://github.com/bombsimon/wsl/issues/90) +# - varcheck # The owner seems to have abandoned the linter. Replaced by "unused". +# - wsl # this doens't have an auto-fixer yet and is pretty noisy (https://github.com/bombsimon/wsl/issues/90) diff --git a/DEVELOPING.md b/DEVELOPING.md index e5242647dde..cddb5070035 100644 --- a/DEVELOPING.md +++ b/DEVELOPING.md @@ -118,45 +118,55 @@ sequenceDiagram Catalogers are the way in which syft is able to identify and construct packages given some amount of source metadata. For example, Syft can locate and process `package-lock.json` files when performing filesystem scans. -See: [how to specify file globs](https://github.com/anchore/syft/blob/main/syft/pkg/cataloger/javascript/cataloger.go#L16-L21) -and an implementation of the [package-lock.json parser](https://github.com/anchore/syft/blob/main/syft/pkg/cataloger/javascript/cataloger.go#L16-L21) fora quick review. +See: [how to specify file globs](https://github.com/anchore/syft/tree/v0.70.0/syft/pkg/cataloger/javascript/cataloger.go#L16-L21) +and an implementation of the [package-lock.json parser](https://github.com/anchore/syft/tree/v0.70.0/syft/pkg/cataloger/javascript/cataloger.go#L16-L21) fora quick review. #### Building a new Cataloger -Catalogers must fulfill the interface [found here](https://github.com/anchore/syft/blob/main/syft/pkg/cataloger.go). +Catalogers must fulfill the interface [found here](https://github.com/anchore/syft/tree/v0.70.0/syft/pkg/cataloger.go). This means that when building a new cataloger, the new struct must implement both method signatures of `Catalog` and `Name`. -A top level view of the functions that construct all the catalogers can be found [here](https://github.com/anchore/syft/blob/main/syft/pkg/cataloger/cataloger.go). +A top level view of the functions that construct all the catalogers can be found [here](https://github.com/anchore/syft/tree/v0.70.0/syft/pkg/cataloger/cataloger.go). When an author has finished writing a new cataloger this is the spot to plug in the new catalog constructor. -For a top level view of how the catalogers are used see [this function](https://github.com/anchore/syft/blob/main/syft/pkg/cataloger/catalog.go#L41-L100) as a reference. It ranges over all catalogers passed as an argument and invokes the `Catalog` method: +For a top level view of how the catalogers are used see [this function](https://github.com/anchore/syft/tree/v0.70.0/syft/pkg/cataloger/catalog.go#L41-L100) as a reference. It ranges over all catalogers passed as an argument and invokes the `Catalog` method: Each cataloger has its own `Catalog` method, but this does not mean that they are all vastly different. -Take a look at the `apkdb` cataloger for alpine to see how it [constructs a generic.NewCataloger](https://github.com/anchore/syft/blob/main/syft/pkg/cataloger/apkdb/cataloger.go). +Take a look at the `apkdb` cataloger for alpine to see how it [constructs a generic.NewCataloger](https://github.com/anchore/syft/tree/v0.70.0/syft/pkg/cataloger/apkdb/cataloger.go). `generic.NewCataloger` is an abstraction syft uses to make writing common components easier. First, it takes the `catalogerName` to identify the cataloger. On the other side of the call it uses two key pieces which inform the cataloger how to identify and return packages, the `globPatterns` and the `parseFunction`: - The first piece is a `parseByGlob` matching pattern used to identify the files that contain the package metadata. -See [here for the APK example](https://github.com/anchore/syft/blob/main/syft/pkg/apk_metadata.go#L16-L41). +See [here for the APK example](https://github.com/anchore/syft/tree/v0.70.0/syft/pkg/apk_metadata.go#L16-L41). - The other is a `parseFunction` which informs the cataloger what to do when it has found one of the above matches files. -See this [link for an example](https://github.com/anchore/syft/blob/main/syft/pkg/cataloger/apkdb/parse_apk_db.go#L22-L102). +See this [link for an example](https://github.com/anchore/syft/tree/v0.70.0/syft/pkg/cataloger/apkdb/parse_apk_db.go#L22-L102). If you're unsure about using the `Generic Cataloger` and think the use case being filled requires something more custom just file an issue or ask in our slack, and we'd be more than happy to help on the design. -Identified packages share a common struct so be sure that when the new cataloger is constructing a new package it is using the [`Package` struct](https://github.com/anchore/syft/blob/main/syft/pkg/package.go#L16-L31). +Identified packages share a common struct so be sure that when the new cataloger is constructing a new package it is using the [`Package` struct](https://github.com/anchore/syft/tree/v0.70.0/syft/pkg/package.go#L16-L31). Metadata Note: Identified packages are also assigned specific metadata that can be unique to their environment. -See [this folder](https://github.com/anchore/syft/tree/main/syft/pkg) for examples of the different metadata types. +See [this folder](https://github.com/anchore/syft/tree/v0.70.0/syft/pkg) for examples of the different metadata types. These are plugged into the `MetadataType` and `Metadata` fields in the above struct. `MetadataType` informs which type is being used. `Metadata` is an interface converted to that type. Finally, here is an example of where the package construction is done in the apk cataloger. The first link is where `newPackage` is called in the `parseFunction`. The second link shows the package construction: -- [Call for new package](https://github.com/anchore/syft/blob/6a7d6e6071829c7ce2943266c0e187b27c0b325c/syft/pkg/cataloger/apkdb/parse_apk_db.go#L96-L99) -- [APK Package Constructor](https://github.com/anchore/syft/blob/6a7d6e6071829c7ce2943266c0e187b27c0b325c/syft/pkg/cataloger/apkdb/package.go#L12-L27) +- [Call for new package](https://github.com/anchore/syft/blob/v0.70.0/syft/pkg/cataloger/apkdb/parse_apk_db.go#L106) +- [APK Package Constructor](https://github.com/anchore/syft/tree/v0.70.0/syft/pkg/cataloger/apkdb/package.go#L12-L27) If you have more questions about implementing a cataloger or questions about one you might be currently working always feel free to file an issue or reach out to us [on slack](https://anchore.com/slack). +#### Searching for files + +All catalogers are provided an instance of the [`source.FileResolver`](https://github.com/anchore/syft/blob/v0.70.0/syft/source/file_resolver.go#L8) to interface with the image and search for files. The implementations for these +abstractions leverage [`stereoscope`](https://github.com/anchore/stereoscope) in order to perform searching. Here is a +rough outline how that works: + +1. a stereoscope `file.Index` is searched based on the input given (a path, glob, or MIME type). The index is relatively fast to search, but requires results to be filtered down to the files that exist in the specific layer(s) of interest. This is done automatically by the `filetree.Searcher` abstraction. This abstraction will fallback to searching directly against the raw `filetree.FileTree` if the index does not contain the file(s) of interest. Note: the `filetree.Searcher` is used by the `source.FileResolver` abstraction. +2. Once the set of files are returned from the `filetree.Searcher` the results are filtered down further to return the most unique file results. For example, you may have requested for files by a glob that returns multiple results. These results are filtered down to deduplicate by real files, so if a result contains two references to the same file, say one accessed via symlink and one accessed via the real path, then the real path reference is returned and the symlink reference is filtered out. If both were accessed by symlink then the first (by lexical order) is returned. This is done automatically by the `source.FileResolver` abstraction. +3. By the time results reach the `pkg.Cataloger` you are guaranteed to have a set of unique files that exist in the layer(s) of interest (relative to what the resolver supports). + ## Testing ### Levels of testing diff --git a/go.mod b/go.mod index 495cdcd0351..c17db6cc057 100644 --- a/go.mod +++ b/go.mod @@ -52,7 +52,7 @@ require ( github.com/CycloneDX/cyclonedx-go v0.7.1-0.20221222100750-41a1ac565cce github.com/Masterminds/sprig/v3 v3.2.3 github.com/anchore/go-logger v0.0.0-20220728155337-03b66a5207d8 - github.com/anchore/stereoscope v0.0.0-20230203152723-c49244e4d66f + github.com/anchore/stereoscope v0.0.0-20230208154630-5a306f07f2e7 github.com/docker/docker v23.0.0+incompatible github.com/google/go-containerregistry v0.13.0 github.com/invopop/jsonschema v0.7.0 @@ -60,7 +60,7 @@ require ( github.com/opencontainers/go-digest v1.0.0 github.com/sassoftware/go-rpmutils v0.2.0 github.com/vbatts/go-mtree v0.5.2 - golang.org/x/exp v0.0.0-20220823124025-807a23277127 + golang.org/x/exp v0.0.0-20230202163644-54bba9f4231b gopkg.in/yaml.v3 v3.0.1 ) @@ -70,6 +70,7 @@ require ( github.com/Masterminds/semver/v3 v3.2.0 // indirect github.com/Microsoft/go-winio v0.6.0 // indirect github.com/anchore/go-struct-converter v0.0.0-20221118182256-c68fdcfa2092 // indirect + github.com/becheran/wildmatch-go v1.0.0 // indirect github.com/containerd/containerd v1.6.12 // indirect github.com/containerd/stargz-snapshotter/estargz v0.12.1 // indirect github.com/davecgh/go-spew v1.1.1 // indirect @@ -127,7 +128,7 @@ require ( golang.org/x/sync v0.1.0 // indirect golang.org/x/sys v0.5.0 // indirect golang.org/x/text v0.7.0 // indirect - golang.org/x/tools v0.1.12 // indirect + golang.org/x/tools v0.2.0 // indirect golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 // indirect google.golang.org/genproto v0.0.0-20221227171554-f9683d7f8bef // indirect google.golang.org/grpc v1.52.0 // indirect diff --git a/go.sum b/go.sum index 503c5b23399..b5200470432 100644 --- a/go.sum +++ b/go.sum @@ -90,8 +90,8 @@ github.com/anchore/go-version v1.2.2-0.20200701162849-18adb9c92b9b h1:e1bmaoJfZV github.com/anchore/go-version v1.2.2-0.20200701162849-18adb9c92b9b/go.mod h1:Bkc+JYWjMCF8OyZ340IMSIi2Ebf3uwByOk6ho4wne1E= github.com/anchore/packageurl-go v0.1.1-0.20230104203445-02e0a6721501 h1:AV7qjwMcM4r8wFhJq3jLRztew3ywIyPTRapl2T1s9o8= github.com/anchore/packageurl-go v0.1.1-0.20230104203445-02e0a6721501/go.mod h1:Blo6OgJNiYF41ufcgHKkbCKF2MDOMlrqhXv/ij6ocR4= -github.com/anchore/stereoscope v0.0.0-20230203152723-c49244e4d66f h1:hyEFgDzqZRr/+q1cPfjgIKXWJ7lMHDHmDXAOrhKMhRA= -github.com/anchore/stereoscope v0.0.0-20230203152723-c49244e4d66f/go.mod h1:YerDPu5voTWZUmjrAHhak7gGGdGLilqroEEFLA/PUHo= +github.com/anchore/stereoscope v0.0.0-20230208154630-5a306f07f2e7 h1:PrdFBPMyika+AM1/AwDmYqrVeUATDU90wbrd81ugicU= +github.com/anchore/stereoscope v0.0.0-20230208154630-5a306f07f2e7/go.mod h1:TUCfo52tEz7ahTUFtKN//wcB7kJzQs0Oifmnd4NkIXw= github.com/andreyvit/diff v0.0.0-20170406064948-c7f18ee00883/go.mod h1:rCTlJbsFo29Kk6CurOXKm700vrz8f0KW0JNfpkRJY/8= github.com/andybalholm/brotli v1.0.1/go.mod h1:loMXtMfwqflxFJPmdbJO0a3KNoPuLBgiu3qAvBg8x/Y= github.com/andybalholm/brotli v1.0.4 h1:V7DdXeJtZscaqfNuAdSRuRFzuiKlHSC/Zh3zl9qY3JY= @@ -102,6 +102,8 @@ github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da/go.mod h1:Q73ZrmV github.com/armon/go-metrics v0.3.10/go.mod h1:4O98XIr/9W0sxpJ8UaYkvjk10Iff7SnFrb4QAOwNTFc= github.com/armon/go-radix v0.0.0-20180808171621-7fddfc383310/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8= github.com/armon/go-radix v1.0.0/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8= +github.com/becheran/wildmatch-go v1.0.0 h1:mE3dGGkTmpKtT4Z+88t8RStG40yN9T+kFEGj2PZFSzA= +github.com/becheran/wildmatch-go v1.0.0/go.mod h1:gbMvj0NtVdJ15Mg/mH9uxk2R1QCistMyU7d9KFzroX4= github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= @@ -637,8 +639,8 @@ golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u0 golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM= golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU= -golang.org/x/exp v0.0.0-20220823124025-807a23277127 h1:S4NrSKDfihhl3+4jSTgwoIevKxX9p7Iv9x++OEIptDo= -golang.org/x/exp v0.0.0-20220823124025-807a23277127/go.mod h1:cyybsKvd6eL0RnXn6p/Grxp8F5bW7iYuBgsNCOHpMYE= +golang.org/x/exp v0.0.0-20230202163644-54bba9f4231b h1:EqBVA+nNsObCwQoBEHy4wLU0pi7i8a4AL3pbItPdPkE= +golang.org/x/exp v0.0.0-20230202163644-54bba9f4231b/go.mod h1:CxIveKay+FTh1D0yPZemJVgC/95VzuuOLq5Qi4xnoYc= golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= @@ -904,8 +906,9 @@ golang.org/x/tools v0.1.2/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.1.3/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.1.4/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= -golang.org/x/tools v0.1.12 h1:VveCTK38A2rkS8ZqFY25HIDFscX5X9OoEhJd3quQmXU= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= +golang.org/x/tools v0.2.0 h1:G6AHpWxTMGY1KyEYoAQ5WTtIekUUvDNjan3ugu60JvE= +golang.org/x/tools v0.2.0/go.mod h1:y4OqIKeOV/fWJetJ8bXPU1sEVniLMIyDAZWeHdV+NTA= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/internal/string_helpers.go b/internal/string_helpers.go index 09a8fda2d5a..4f5c65a4ad5 100644 --- a/internal/string_helpers.go +++ b/internal/string_helpers.go @@ -28,3 +28,10 @@ func StringInSlice(a string, list []string) bool { } return false } + +func SplitAny(s string, seps string) []string { + splitter := func(r rune) bool { + return strings.ContainsRune(seps, r) + } + return strings.FieldsFunc(s, splitter) +} diff --git a/internal/string_helpers_test.go b/internal/string_helpers_test.go index f89a0820fca..45b90195aaa 100644 --- a/internal/string_helpers_test.go +++ b/internal/string_helpers_test.go @@ -104,3 +104,37 @@ func TestTruncateMiddleEllipsis(t *testing.T) { }) } } + +func TestSplitAny(t *testing.T) { + + tests := []struct { + name string + input string + fields string + want []string + }{ + { + name: "simple", + input: "a,b,c", + fields: ",", + want: []string{"a", "b", "c"}, + }, + { + name: "empty", + input: "", + fields: ",", + want: []string{}, + }, + { + name: "multiple separators", + input: "a,b\nc:d", + fields: ",:\n", + want: []string{"a", "b", "c", "d"}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.want, SplitAny(tt.input, tt.fields)) + }) + } +} diff --git a/syft/file/all_regular_files.go b/syft/file/all_regular_files.go index e7612c8b242..5dcf8974430 100644 --- a/syft/file/all_regular_files.go +++ b/syft/file/all_regular_files.go @@ -1,6 +1,7 @@ package file import ( + "github.com/anchore/stereoscope/pkg/file" "github.com/anchore/syft/internal/log" "github.com/anchore/syft/syft/source" ) @@ -20,7 +21,7 @@ func allRegularFiles(resolver source.FileResolver) (locations []source.Location) continue } - if metadata.Type != source.RegularFile { + if metadata.Type != file.TypeRegular { continue } locations = append(locations, resolvedLocation) diff --git a/syft/file/all_regular_files_test.go b/syft/file/all_regular_files_test.go index d1e261652fc..096480721a5 100644 --- a/syft/file/all_regular_files_test.go +++ b/syft/file/all_regular_files_test.go @@ -3,6 +3,7 @@ package file import ( "testing" + "github.com/google/go-cmp/cmp" "github.com/scylladb/go-set/strset" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -69,8 +70,8 @@ func Test_allRegularFiles(t *testing.T) { virtualLocations.Add(l.VirtualPath) } } - assert.ElementsMatch(t, tt.wantRealPaths.List(), realLocations.List(), "mismatched real paths") - assert.ElementsMatch(t, tt.wantVirtualPaths.List(), virtualLocations.List(), "mismatched virtual paths") + assert.ElementsMatch(t, tt.wantRealPaths.List(), realLocations.List(), "real paths differ: "+cmp.Diff(tt.wantRealPaths.List(), realLocations.List())) + assert.ElementsMatch(t, tt.wantVirtualPaths.List(), virtualLocations.List(), "virtual paths differ: "+cmp.Diff(tt.wantVirtualPaths.List(), virtualLocations.List())) }) } } diff --git a/syft/file/digest_cataloger.go b/syft/file/digest_cataloger.go index c435dd107c9..1ce45b1d722 100644 --- a/syft/file/digest_cataloger.go +++ b/syft/file/digest_cataloger.go @@ -11,6 +11,7 @@ import ( "github.com/wagoodman/go-partybus" "github.com/wagoodman/go-progress" + "github.com/anchore/stereoscope/pkg/file" "github.com/anchore/syft/internal" "github.com/anchore/syft/internal/bus" "github.com/anchore/syft/internal/log" @@ -65,7 +66,7 @@ func (i *DigestsCataloger) catalogLocation(resolver source.FileResolver, locatio } // we should only attempt to report digests for files that are regular files (don't attempt to resolve links) - if meta.Type != source.RegularFile { + if meta.Type != file.TypeRegular { return nil, errUndigestableFile } diff --git a/syft/file/digest_cataloger_test.go b/syft/file/digest_cataloger_test.go index dc017fbf957..57a1a8071ee 100644 --- a/syft/file/digest_cataloger_test.go +++ b/syft/file/digest_cataloger_test.go @@ -146,7 +146,7 @@ func TestDigestsCataloger_MixFileTypes(t *testing.T) { if err != nil { t.Fatalf("unable to get file=%q : %+v", test.path, err) } - l := source.NewLocationFromImage(test.path, *ref, img) + l := source.NewLocationFromImage(test.path, *ref.Reference, img) if len(actual[l.Coordinates]) == 0 { if test.expected != "" { diff --git a/syft/file/metadata_cataloger_test.go b/syft/file/metadata_cataloger_test.go index c7633bb7c8c..5166857ba42 100644 --- a/syft/file/metadata_cataloger_test.go +++ b/syft/file/metadata_cataloger_test.go @@ -6,6 +6,7 @@ import ( "testing" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" "github.com/anchore/stereoscope/pkg/file" "github.com/anchore/stereoscope/pkg/imagetest" @@ -50,8 +51,9 @@ func TestFileMetadataCataloger(t *testing.T) { path: "/file-1.txt", exists: true, expected: source.FileMetadata{ + Path: "/file-1.txt", Mode: 0644, - Type: "RegularFile", + Type: file.TypeRegular, UserID: 1, GroupID: 2, Size: 7, @@ -62,8 +64,9 @@ func TestFileMetadataCataloger(t *testing.T) { path: "/hardlink-1", exists: true, expected: source.FileMetadata{ + Path: "/hardlink-1", Mode: 0644, - Type: "HardLink", + Type: file.TypeHardLink, LinkDestination: "file-1.txt", UserID: 1, GroupID: 2, @@ -74,8 +77,9 @@ func TestFileMetadataCataloger(t *testing.T) { path: "/symlink-1", exists: true, expected: source.FileMetadata{ + Path: "/symlink-1", Mode: 0777 | os.ModeSymlink, - Type: "SymbolicLink", + Type: file.TypeSymLink, LinkDestination: "file-1.txt", UserID: 0, GroupID: 0, @@ -86,8 +90,9 @@ func TestFileMetadataCataloger(t *testing.T) { path: "/char-device-1", exists: true, expected: source.FileMetadata{ + Path: "/char-device-1", Mode: 0644 | os.ModeDevice | os.ModeCharDevice, - Type: "CharacterDevice", + Type: file.TypeCharacterDevice, UserID: 0, GroupID: 0, MIMEType: "", @@ -97,8 +102,9 @@ func TestFileMetadataCataloger(t *testing.T) { path: "/block-device-1", exists: true, expected: source.FileMetadata{ + Path: "/block-device-1", Mode: 0644 | os.ModeDevice, - Type: "BlockDevice", + Type: file.TypeBlockDevice, UserID: 0, GroupID: 0, MIMEType: "", @@ -108,8 +114,9 @@ func TestFileMetadataCataloger(t *testing.T) { path: "/fifo-1", exists: true, expected: source.FileMetadata{ + Path: "/fifo-1", Mode: 0644 | os.ModeNamedPipe, - Type: "FIFONode", + Type: file.TypeFIFO, UserID: 0, GroupID: 0, MIMEType: "", @@ -119,11 +126,13 @@ func TestFileMetadataCataloger(t *testing.T) { path: "/bin", exists: true, expected: source.FileMetadata{ + Path: "/bin", Mode: 0755 | os.ModeDir, - Type: "Directory", + Type: file.TypeDirectory, UserID: 0, GroupID: 0, MIMEType: "", + IsDir: true, }, }, } @@ -131,11 +140,9 @@ func TestFileMetadataCataloger(t *testing.T) { for _, test := range tests { t.Run(test.path, func(t *testing.T) { _, ref, err := img.SquashedTree().File(file.Path(test.path)) - if err != nil { - t.Fatalf("unable to get file: %+v", err) - } + require.NoError(t, err) - l := source.NewLocationFromImage(test.path, *ref, img) + l := source.NewLocationFromImage(test.path, *ref.Reference, img) assert.Equal(t, test.expected, actual[l.Coordinates], "mismatched metadata") diff --git a/syft/formats/cyclonedxjson/encoder_test.go b/syft/formats/cyclonedxjson/encoder_test.go index 005d44b9591..4649ad31701 100644 --- a/syft/formats/cyclonedxjson/encoder_test.go +++ b/syft/formats/cyclonedxjson/encoder_test.go @@ -5,7 +5,7 @@ import ( "regexp" "testing" - "github.com/anchore/syft/syft/formats/common/testutils" + "github.com/anchore/syft/syft/formats/internal/testutils" ) var updateCycloneDx = flag.Bool("update-cyclonedx", false, "update the *.golden files for cyclone-dx encoders") diff --git a/syft/formats/cyclonedxxml/encoder_test.go b/syft/formats/cyclonedxxml/encoder_test.go index c7ecd80650d..1070f44cad5 100644 --- a/syft/formats/cyclonedxxml/encoder_test.go +++ b/syft/formats/cyclonedxxml/encoder_test.go @@ -5,7 +5,7 @@ import ( "regexp" "testing" - "github.com/anchore/syft/syft/formats/common/testutils" + "github.com/anchore/syft/syft/formats/internal/testutils" ) var updateCycloneDx = flag.Bool("update-cyclonedx", false, "update the *.golden files for cyclone-dx encoders") diff --git a/syft/formats/formats.go b/syft/formats/formats.go index 900db713cbd..f3e4721e87f 100644 --- a/syft/formats/formats.go +++ b/syft/formats/formats.go @@ -44,7 +44,7 @@ func Identify(by []byte) sbom.Format { for _, f := range Formats() { if err := f.Validate(bytes.NewReader(by)); err != nil { if !errors.Is(err, sbom.ErrValidationNotSupported) { - log.Debugf("format %s returned err: %+v", f.ID(), err) + log.WithFields("error", err).Tracef("format validation for %s failed", f.ID()) } continue } diff --git a/syft/formats/common/testutils/utils.go b/syft/formats/internal/testutils/utils.go similarity index 98% rename from syft/formats/common/testutils/utils.go rename to syft/formats/internal/testutils/utils.go index 39090d29192..f214c4f07f3 100644 --- a/syft/formats/common/testutils/utils.go +++ b/syft/formats/internal/testutils/utils.go @@ -168,7 +168,7 @@ func populateImageCatalog(catalog *pkg.Catalog, img *image.Image) { Name: "package-1", Version: "1.0.1", Locations: source.NewLocationSet( - source.NewLocationFromImage(string(ref1.RealPath), *ref1, img), + source.NewLocationFromImage(string(ref1.RealPath), *ref1.Reference, img), ), Type: pkg.PythonPkg, FoundBy: "the-cataloger-1", @@ -188,7 +188,7 @@ func populateImageCatalog(catalog *pkg.Catalog, img *image.Image) { Name: "package-2", Version: "2.0.1", Locations: source.NewLocationSet( - source.NewLocationFromImage(string(ref2.RealPath), *ref2, img), + source.NewLocationFromImage(string(ref2.RealPath), *ref2.Reference, img), ), Type: pkg.DebPkg, FoundBy: "the-cataloger-2", diff --git a/syft/formats/spdxjson/encoder_test.go b/syft/formats/spdxjson/encoder_test.go index 90f1342841b..f33a87708b3 100644 --- a/syft/formats/spdxjson/encoder_test.go +++ b/syft/formats/spdxjson/encoder_test.go @@ -5,7 +5,7 @@ import ( "regexp" "testing" - "github.com/anchore/syft/syft/formats/common/testutils" + "github.com/anchore/syft/syft/formats/internal/testutils" ) var updateSpdxJson = flag.Bool("update-spdx-json", false, "update the *.golden files for spdx-json encoders") diff --git a/syft/formats/spdxtagvalue/encoder_test.go b/syft/formats/spdxtagvalue/encoder_test.go index c2b8e4e8563..116f9ae8fbe 100644 --- a/syft/formats/spdxtagvalue/encoder_test.go +++ b/syft/formats/spdxtagvalue/encoder_test.go @@ -5,7 +5,7 @@ import ( "regexp" "testing" - "github.com/anchore/syft/syft/formats/common/testutils" + "github.com/anchore/syft/syft/formats/internal/testutils" "github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/sbom" "github.com/anchore/syft/syft/source" diff --git a/syft/formats/syftjson/decoder_test.go b/syft/formats/syftjson/decoder_test.go index ed9043b8b75..06d41711dad 100644 --- a/syft/formats/syftjson/decoder_test.go +++ b/syft/formats/syftjson/decoder_test.go @@ -8,7 +8,7 @@ import ( "github.com/go-test/deep" "github.com/stretchr/testify/assert" - "github.com/anchore/syft/syft/formats/common/testutils" + "github.com/anchore/syft/syft/formats/internal/testutils" ) func TestEncodeDecodeCycle(t *testing.T) { diff --git a/syft/formats/syftjson/encoder_test.go b/syft/formats/syftjson/encoder_test.go index 216e60fc162..3c0ee8c54f4 100644 --- a/syft/formats/syftjson/encoder_test.go +++ b/syft/formats/syftjson/encoder_test.go @@ -5,10 +5,11 @@ import ( "regexp" "testing" + stereoFile "github.com/anchore/stereoscope/pkg/file" "github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/cpe" "github.com/anchore/syft/syft/file" - "github.com/anchore/syft/syft/formats/common/testutils" + "github.com/anchore/syft/syft/formats/internal/testutils" "github.com/anchore/syft/syft/linux" "github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/sbom" @@ -107,26 +108,26 @@ func TestEncodeFullJSONDocument(t *testing.T) { FileMetadata: map[source.Coordinates]source.FileMetadata{ source.NewLocation("/a/place").Coordinates: { Mode: 0775, - Type: "directory", + Type: stereoFile.TypeDirectory, UserID: 0, GroupID: 0, }, source.NewLocation("/a/place/a").Coordinates: { Mode: 0775, - Type: "regularFile", + Type: stereoFile.TypeRegular, UserID: 0, GroupID: 0, }, source.NewLocation("/b").Coordinates: { Mode: 0775, - Type: "symbolicLink", + Type: stereoFile.TypeSymLink, LinkDestination: "/c", UserID: 0, GroupID: 0, }, source.NewLocation("/b/place/b").Coordinates: { Mode: 0644, - Type: "regularFile", + Type: stereoFile.TypeRegular, UserID: 1, GroupID: 2, }, diff --git a/syft/formats/syftjson/model/file.go b/syft/formats/syftjson/model/file.go index e230ef127b6..1bed51e582e 100644 --- a/syft/formats/syftjson/model/file.go +++ b/syft/formats/syftjson/model/file.go @@ -14,10 +14,10 @@ type File struct { } type FileMetadataEntry struct { - Mode int `json:"mode"` - Type source.FileType `json:"type"` - LinkDestination string `json:"linkDestination,omitempty"` - UserID int `json:"userID"` - GroupID int `json:"groupID"` - MIMEType string `json:"mimeType"` + Mode int `json:"mode"` + Type string `json:"type"` + LinkDestination string `json:"linkDestination,omitempty"` + UserID int `json:"userID"` + GroupID int `json:"groupID"` + MIMEType string `json:"mimeType"` } diff --git a/syft/formats/syftjson/test-fixtures/snapshot/TestDirectoryEncoder.golden b/syft/formats/syftjson/test-fixtures/snapshot/TestDirectoryEncoder.golden index e8526917755..6a051331a0e 100644 --- a/syft/formats/syftjson/test-fixtures/snapshot/TestDirectoryEncoder.golden +++ b/syft/formats/syftjson/test-fixtures/snapshot/TestDirectoryEncoder.golden @@ -89,7 +89,7 @@ } }, "schema": { - "version": "6.1.0", - "url": "https://raw.githubusercontent.com/anchore/syft/main/schema/json/schema-6.1.0.json" + "version": "6.2.0", + "url": "https://raw.githubusercontent.com/anchore/syft/main/schema/json/schema-6.2.0.json" } } diff --git a/syft/formats/syftjson/test-fixtures/snapshot/TestEncodeFullJSONDocument.golden b/syft/formats/syftjson/test-fixtures/snapshot/TestEncodeFullJSONDocument.golden index 0b9a4036ac0..3b638f94106 100644 --- a/syft/formats/syftjson/test-fixtures/snapshot/TestEncodeFullJSONDocument.golden +++ b/syft/formats/syftjson/test-fixtures/snapshot/TestEncodeFullJSONDocument.golden @@ -78,7 +78,7 @@ }, "metadata": { "mode": 775, - "type": "directory", + "type": "Directory", "userID": 0, "groupID": 0, "mimeType": "" @@ -91,7 +91,7 @@ }, "metadata": { "mode": 775, - "type": "regularFile", + "type": "RegularFile", "userID": 0, "groupID": 0, "mimeType": "" @@ -111,7 +111,7 @@ }, "metadata": { "mode": 775, - "type": "symbolicLink", + "type": "SymbolicLink", "linkDestination": "/c", "userID": 0, "groupID": 0, @@ -125,7 +125,7 @@ }, "metadata": { "mode": 644, - "type": "regularFile", + "type": "RegularFile", "userID": 1, "groupID": 2, "mimeType": "" diff --git a/syft/formats/syftjson/test-fixtures/snapshot/TestImageEncoder.golden b/syft/formats/syftjson/test-fixtures/snapshot/TestImageEncoder.golden index 362f9886fad..a72adb10c60 100644 --- a/syft/formats/syftjson/test-fixtures/snapshot/TestImageEncoder.golden +++ b/syft/formats/syftjson/test-fixtures/snapshot/TestImageEncoder.golden @@ -9,7 +9,7 @@ "locations": [ { "path": "/somefile-1.txt", - "layerID": "sha256:6afd1cb55939d87ba4c298907d0a53059bb3742c2d65314643e2464071cf0a2d" + "layerID": "sha256:fb6beecb75b39f4bb813dbf177e501edd5ddb3e69bb45cedeb78c676ee1b7a59" } ], "licenses": [ @@ -40,7 +40,7 @@ "locations": [ { "path": "/somefile-2.txt", - "layerID": "sha256:657997cff9a836139186239bdfe77250239a700d0ed97d57e101c295e8244319" + "layerID": "sha256:319b588ce64253a87b533c8ed01cf0025e0eac98e7b516e12532957e1244fdec" } ], "licenses": [], @@ -64,11 +64,11 @@ ], "artifactRelationships": [], "source": { - "id": "c85f7ae1b0ac38342c1cf1a6f7ea2b4b1ddc49cd1b24219ebd05dc10b3303491", + "id": "1a678f111c8ddc66fd82687bb024e0dd6af61314404937a80e810c0cf317b796", "type": "image", "target": { "userInput": "user-image-input", - "imageID": "sha256:b5c0bfa8bcf70c75d92ebebbf76af667906d56e6fad50c37e7f93df824a64b79", + "imageID": "sha256:3c51b06feb0cda8ee62d0e3755ef2a8496a6b71f8a55b245f07f31c4bb813d31", "manifestDigest": "sha256:2731251dc34951c0e50fcc643b4c5f74922dad1a5d98f302b504cf46cd5d9368", "mediaType": "application/vnd.docker.distribution.manifest.v2+json", "tags": [ @@ -78,17 +78,17 @@ "layers": [ { "mediaType": "application/vnd.docker.image.rootfs.diff.tar.gzip", - "digest": "sha256:6afd1cb55939d87ba4c298907d0a53059bb3742c2d65314643e2464071cf0a2d", + "digest": "sha256:fb6beecb75b39f4bb813dbf177e501edd5ddb3e69bb45cedeb78c676ee1b7a59", "size": 22 }, { "mediaType": "application/vnd.docker.image.rootfs.diff.tar.gzip", - "digest": "sha256:657997cff9a836139186239bdfe77250239a700d0ed97d57e101c295e8244319", + "digest": "sha256:319b588ce64253a87b533c8ed01cf0025e0eac98e7b516e12532957e1244fdec", "size": 16 } ], - "manifest": "eyJzY2hlbWFWZXJzaW9uIjoyLCJtZWRpYVR5cGUiOiJhcHBsaWNhdGlvbi92bmQuZG9ja2VyLmRpc3RyaWJ1dGlvbi5tYW5pZmVzdC52Mitqc29uIiwiY29uZmlnIjp7Im1lZGlhVHlwZSI6ImFwcGxpY2F0aW9uL3ZuZC5kb2NrZXIuY29udGFpbmVyLmltYWdlLnYxK2pzb24iLCJzaXplIjo2NzMsImRpZ2VzdCI6InNoYTI1NjpiNWMwYmZhOGJjZjcwYzc1ZDkyZWJlYmJmNzZhZjY2NzkwNmQ1NmU2ZmFkNTBjMzdlN2Y5M2RmODI0YTY0Yjc5In0sImxheWVycyI6W3sibWVkaWFUeXBlIjoiYXBwbGljYXRpb24vdm5kLmRvY2tlci5pbWFnZS5yb290ZnMuZGlmZi50YXIuZ3ppcCIsInNpemUiOjIwNDgsImRpZ2VzdCI6InNoYTI1Njo2YWZkMWNiNTU5MzlkODdiYTRjMjk4OTA3ZDBhNTMwNTliYjM3NDJjMmQ2NTMxNDY0M2UyNDY0MDcxY2YwYTJkIn0seyJtZWRpYVR5cGUiOiJhcHBsaWNhdGlvbi92bmQuZG9ja2VyLmltYWdlLnJvb3Rmcy5kaWZmLnRhci5nemlwIiwic2l6ZSI6MjA0OCwiZGlnZXN0Ijoic2hhMjU2OjY1Nzk5N2NmZjlhODM2MTM5MTg2MjM5YmRmZTc3MjUwMjM5YTcwMGQwZWQ5N2Q1N2UxMDFjMjk1ZTgyNDQzMTkifV19", - "config": "eyJhcmNoaXRlY3R1cmUiOiJhbWQ2NCIsImNvbmZpZyI6eyJFbnYiOlsiUEFUSD0vdXNyL2xvY2FsL3NiaW46L3Vzci9sb2NhbC9iaW46L3Vzci9zYmluOi91c3IvYmluOi9zYmluOi9iaW4iXSwiV29ya2luZ0RpciI6Ii8iLCJPbkJ1aWxkIjpudWxsfSwiY3JlYXRlZCI6IjIwMjItMDgtMjVUMTY6MjI6MDguODkxMzY0Mjc4WiIsImhpc3RvcnkiOlt7ImNyZWF0ZWQiOiIyMDIyLTA4LTI1VDE2OjIyOjA4Ljc2MzMzMDMyM1oiLCJjcmVhdGVkX2J5IjoiQUREIGZpbGUtMS50eHQgL3NvbWVmaWxlLTEudHh0ICMgYnVpbGRraXQiLCJjb21tZW50IjoiYnVpbGRraXQuZG9ja2VyZmlsZS52MCJ9LHsiY3JlYXRlZCI6IjIwMjItMDgtMjVUMTY6MjI6MDguODkxMzY0Mjc4WiIsImNyZWF0ZWRfYnkiOiJBREQgZmlsZS0yLnR4dCAvc29tZWZpbGUtMi50eHQgIyBidWlsZGtpdCIsImNvbW1lbnQiOiJidWlsZGtpdC5kb2NrZXJmaWxlLnYwIn1dLCJvcyI6ImxpbnV4Iiwicm9vdGZzIjp7InR5cGUiOiJsYXllcnMiLCJkaWZmX2lkcyI6WyJzaGEyNTY6NmFmZDFjYjU1OTM5ZDg3YmE0YzI5ODkwN2QwYTUzMDU5YmIzNzQyYzJkNjUzMTQ2NDNlMjQ2NDA3MWNmMGEyZCIsInNoYTI1Njo2NTc5OTdjZmY5YTgzNjEzOTE4NjIzOWJkZmU3NzI1MDIzOWE3MDBkMGVkOTdkNTdlMTAxYzI5NWU4MjQ0MzE5Il19fQ==", + "manifest": "eyJzY2hlbWFWZXJzaW9uIjoyLCJtZWRpYVR5cGUiOiJhcHBsaWNhdGlvbi92bmQuZG9ja2VyLmRpc3RyaWJ1dGlvbi5tYW5pZmVzdC52Mitqc29uIiwiY29uZmlnIjp7Im1lZGlhVHlwZSI6ImFwcGxpY2F0aW9uL3ZuZC5kb2NrZXIuY29udGFpbmVyLmltYWdlLnYxK2pzb24iLCJzaXplIjo2NzMsImRpZ2VzdCI6InNoYTI1NjozYzUxYjA2ZmViMGNkYThlZTYyZDBlMzc1NWVmMmE4NDk2YTZiNzFmOGE1NWIyNDVmMDdmMzFjNGJiODEzZDMxIn0sImxheWVycyI6W3sibWVkaWFUeXBlIjoiYXBwbGljYXRpb24vdm5kLmRvY2tlci5pbWFnZS5yb290ZnMuZGlmZi50YXIuZ3ppcCIsInNpemUiOjIwNDgsImRpZ2VzdCI6InNoYTI1NjpmYjZiZWVjYjc1YjM5ZjRiYjgxM2RiZjE3N2U1MDFlZGQ1ZGRiM2U2OWJiNDVjZWRlYjc4YzY3NmVlMWI3YTU5In0seyJtZWRpYVR5cGUiOiJhcHBsaWNhdGlvbi92bmQuZG9ja2VyLmltYWdlLnJvb3Rmcy5kaWZmLnRhci5nemlwIiwic2l6ZSI6MjA0OCwiZGlnZXN0Ijoic2hhMjU2OjMxOWI1ODhjZTY0MjUzYTg3YjUzM2M4ZWQwMWNmMDAyNWUwZWFjOThlN2I1MTZlMTI1MzI5NTdlMTI0NGZkZWMifV19", + "config": "eyJhcmNoaXRlY3R1cmUiOiJhbWQ2NCIsImNvbmZpZyI6eyJFbnYiOlsiUEFUSD0vdXNyL2xvY2FsL3NiaW46L3Vzci9sb2NhbC9iaW46L3Vzci9zYmluOi91c3IvYmluOi9zYmluOi9iaW4iXSwiV29ya2luZ0RpciI6Ii8iLCJPbkJ1aWxkIjpudWxsfSwiY3JlYXRlZCI6IjIwMjItMDgtMDFUMjA6MDk6MjIuNTA5NDIxNzEyWiIsImhpc3RvcnkiOlt7ImNyZWF0ZWQiOiIyMDIyLTA4LTAxVDIwOjA5OjIyLjQ4Nzg5NTUxOVoiLCJjcmVhdGVkX2J5IjoiQUREIGZpbGUtMS50eHQgL3NvbWVmaWxlLTEudHh0ICMgYnVpbGRraXQiLCJjb21tZW50IjoiYnVpbGRraXQuZG9ja2VyZmlsZS52MCJ9LHsiY3JlYXRlZCI6IjIwMjItMDgtMDFUMjA6MDk6MjIuNTA5NDIxNzEyWiIsImNyZWF0ZWRfYnkiOiJBREQgZmlsZS0yLnR4dCAvc29tZWZpbGUtMi50eHQgIyBidWlsZGtpdCIsImNvbW1lbnQiOiJidWlsZGtpdC5kb2NrZXJmaWxlLnYwIn1dLCJvcyI6ImxpbnV4Iiwicm9vdGZzIjp7InR5cGUiOiJsYXllcnMiLCJkaWZmX2lkcyI6WyJzaGEyNTY6ZmI2YmVlY2I3NWIzOWY0YmI4MTNkYmYxNzdlNTAxZWRkNWRkYjNlNjliYjQ1Y2VkZWI3OGM2NzZlZTFiN2E1OSIsInNoYTI1NjozMTliNTg4Y2U2NDI1M2E4N2I1MzNjOGVkMDFjZjAwMjVlMGVhYzk4ZTdiNTE2ZTEyNTMyOTU3ZTEyNDRmZGVjIl19fQ==", "repoDigests": [], "architecture": "", "os": "" @@ -112,7 +112,7 @@ } }, "schema": { - "version": "6.1.0", - "url": "https://raw.githubusercontent.com/anchore/syft/main/schema/json/schema-6.1.0.json" + "version": "6.2.0", + "url": "https://raw.githubusercontent.com/anchore/syft/main/schema/json/schema-6.2.0.json" } } diff --git a/syft/formats/syftjson/test-fixtures/snapshot/stereoscope-fixture-image-simple.golden b/syft/formats/syftjson/test-fixtures/snapshot/stereoscope-fixture-image-simple.golden index 0a4b4d25667..11a1958c893 100644 Binary files a/syft/formats/syftjson/test-fixtures/snapshot/stereoscope-fixture-image-simple.golden and b/syft/formats/syftjson/test-fixtures/snapshot/stereoscope-fixture-image-simple.golden differ diff --git a/syft/formats/syftjson/to_format_model.go b/syft/formats/syftjson/to_format_model.go index 00c62fc0db5..e81a1a197d2 100644 --- a/syft/formats/syftjson/to_format_model.go +++ b/syft/formats/syftjson/to_format_model.go @@ -5,6 +5,7 @@ import ( "sort" "strconv" + stereoscopeFile "github.com/anchore/stereoscope/pkg/file" "github.com/anchore/syft/internal" "github.com/anchore/syft/internal/log" "github.com/anchore/syft/syft/artifact" @@ -137,7 +138,7 @@ func toFileMetadataEntry(coordinates source.Coordinates, metadata *source.FileMe return &model.FileMetadataEntry{ Mode: mode, - Type: metadata.Type, + Type: toFileType(metadata.Type), LinkDestination: metadata.LinkDestination, UserID: metadata.UserID, GroupID: metadata.GroupID, @@ -145,6 +146,31 @@ func toFileMetadataEntry(coordinates source.Coordinates, metadata *source.FileMe } } +func toFileType(ty stereoscopeFile.Type) string { + switch ty { + case stereoscopeFile.TypeSymLink: + return "SymbolicLink" + case stereoscopeFile.TypeHardLink: + return "HardLink" + case stereoscopeFile.TypeDirectory: + return "Directory" + case stereoscopeFile.TypeSocket: + return "Socket" + case stereoscopeFile.TypeBlockDevice: + return "BlockDevice" + case stereoscopeFile.TypeCharacterDevice: + return "CharacterDevice" + case stereoscopeFile.TypeFIFO: + return "FIFONode" + case stereoscopeFile.TypeRegular: + return "RegularFile" + case stereoscopeFile.TypeIrregular: + return "IrregularFile" + default: + return "Unknown" + } +} + func toPackageModels(catalog *pkg.Catalog) []model.Package { artifacts := make([]model.Package, 0) if catalog == nil { diff --git a/syft/formats/syftjson/to_format_model_test.go b/syft/formats/syftjson/to_format_model_test.go index f28dc6404a9..2dcfae67c52 100644 --- a/syft/formats/syftjson/to_format_model_test.go +++ b/syft/formats/syftjson/to_format_model_test.go @@ -8,6 +8,7 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "github.com/anchore/stereoscope/pkg/file" "github.com/anchore/syft/internal" "github.com/anchore/syft/syft/formats/syftjson/model" "github.com/anchore/syft/syft/source" @@ -97,3 +98,64 @@ func Test_toSourceModel(t *testing.T) { // assert all possible schemes were under test assert.ElementsMatch(t, allSchemes.List(), testedSchemes.List(), "not all source.Schemes are under test") } + +func Test_toFileType(t *testing.T) { + + badType := file.Type(0x1337) + var allTypesTested []file.Type + tests := []struct { + ty file.Type + name string + }{ + { + ty: file.TypeRegular, + name: "RegularFile", + }, + { + ty: file.TypeDirectory, + name: "Directory", + }, + { + ty: file.TypeSymLink, + name: "SymbolicLink", + }, + { + ty: file.TypeHardLink, + name: "HardLink", + }, + { + ty: file.TypeSocket, + name: "Socket", + }, + { + ty: file.TypeCharacterDevice, + name: "CharacterDevice", + }, + { + ty: file.TypeBlockDevice, + name: "BlockDevice", + }, + { + ty: file.TypeFIFO, + name: "FIFONode", + }, + { + ty: file.TypeIrregular, + name: "IrregularFile", + }, + { + ty: badType, + name: "Unknown", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equalf(t, tt.name, toFileType(tt.ty), "toFileType(%v)", tt.ty) + if tt.ty != badType { + allTypesTested = append(allTypesTested, tt.ty) + } + }) + } + + assert.ElementsMatch(t, allTypesTested, file.AllTypes(), "not all file.Types are under test") +} diff --git a/syft/formats/table/encoder_test.go b/syft/formats/table/encoder_test.go index 830f28bb3b5..44e9f473069 100644 --- a/syft/formats/table/encoder_test.go +++ b/syft/formats/table/encoder_test.go @@ -6,7 +6,7 @@ import ( "github.com/go-test/deep" - "github.com/anchore/syft/syft/formats/common/testutils" + "github.com/anchore/syft/syft/formats/internal/testutils" ) var updateTableGoldenFiles = flag.Bool("update-table", false, "update the *.golden files for table format") diff --git a/syft/formats/template/encoder_test.go b/syft/formats/template/encoder_test.go index e41d3109c92..34eb6c79a5a 100644 --- a/syft/formats/template/encoder_test.go +++ b/syft/formats/template/encoder_test.go @@ -6,7 +6,7 @@ import ( "github.com/stretchr/testify/assert" - "github.com/anchore/syft/syft/formats/common/testutils" + "github.com/anchore/syft/syft/formats/internal/testutils" ) var updateTmpl = flag.Bool("update-tmpl", false, "update the *.golden files for json encoders") diff --git a/syft/formats/text/encoder_test.go b/syft/formats/text/encoder_test.go index a50d90a1e85..7b5e9f4727f 100644 --- a/syft/formats/text/encoder_test.go +++ b/syft/formats/text/encoder_test.go @@ -4,7 +4,7 @@ import ( "flag" "testing" - "github.com/anchore/syft/syft/formats/common/testutils" + "github.com/anchore/syft/syft/formats/internal/testutils" ) var updateTextEncoderGoldenFiles = flag.Bool("update-text", false, "update the *.golden files for text encoder") diff --git a/syft/pkg/cataloger/alpm/cataloger_test.go b/syft/pkg/cataloger/alpm/cataloger_test.go new file mode 100644 index 00000000000..65447d66d8b --- /dev/null +++ b/syft/pkg/cataloger/alpm/cataloger_test.go @@ -0,0 +1,209 @@ +package alpm + +import ( + "testing" + + "github.com/google/go-cmp/cmp/cmpopts" + + "github.com/anchore/syft/syft/artifact" + "github.com/anchore/syft/syft/file" + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest" + "github.com/anchore/syft/syft/source" +) + +func TestAlpmCataloger(t *testing.T) { + + expectedPkgs := []pkg.Package{ + { + Name: "gmp", + Version: "6.2.1-2", + Type: pkg.AlpmPkg, + FoundBy: "alpmdb-cataloger", + Licenses: []string{"LGPL3", "GPL"}, + Locations: source.NewLocationSet(source.NewLocation("var/lib/pacman/local/gmp-6.2.1-2/desc")), + CPEs: nil, + PURL: "", + MetadataType: "AlpmMetadata", + Metadata: pkg.AlpmMetadata{ + BasePackage: "gmp", + Package: "gmp", + Version: "6.2.1-2", + Description: "A free library for arbitrary precision arithmetic", + Architecture: "x86_64", + Size: 1044438, + Packager: "Antonio Rojas ", + License: "LGPL3\nGPL", + URL: "https://gmplib.org/", + Validation: "pgp", + Reason: 1, + Files: []pkg.AlpmFileRecord{ + { + Path: "/usr", + Type: "dir", + Digests: []file.Digest{}, + }, + { + Path: "/usr/include", + Type: "dir", + Digests: []file.Digest{}, + }, + { + Path: "/usr/include/gmp.h", + Size: "84140", + Digests: []file.Digest{ + {Algorithm: "md5", Value: "76595f70565c72550eb520809bf86856"}, + {Algorithm: "sha256", Value: "91a614b9202453153fe3b7512d15e89659108b93ce8841c8e13789eb85da9e3a"}, + }, + }, + { + Path: "/usr/include/gmpxx.h", + Size: "129113", + Digests: []file.Digest{ + {Algorithm: "md5", Value: "ea3d21de4bcf7c696799c5c55dd3655b"}, + {Algorithm: "sha256", Value: "0011ae411a0bc1030e07d968b32fdc1343f5ac2a17b7d28f493e7976dde2ac82"}, + }, + }, + { + Path: "/usr/lib", + Type: "dir", + Digests: []file.Digest{}, + }, + { + Path: "/usr/lib/libgmp.so", + Type: "link", + Link: "libgmp.so.10.4.1", + Digests: []file.Digest{}, + }, + { + Path: "/usr/lib/libgmp.so.10", + Type: "link", + Link: "libgmp.so.10.4.1", + Digests: []file.Digest{}, + }, + { + Path: "/usr/lib/libgmp.so.10.4.1", + Size: "663224", + Digests: []file.Digest{ + {Algorithm: "md5", Value: "d6d03eadacdd9048d5b2adf577e9d722"}, + {Algorithm: "sha256", Value: "39898bd3d8d6785222432fa8b8aef7ce3b7e5bbfc66a52b7c0da09bed4adbe6a"}, + }, + }, + { + Path: "/usr/lib/libgmpxx.so", + Type: "link", + Link: "libgmpxx.so.4.6.1", + Digests: []file.Digest{}, + }, + { + Path: "/usr/lib/libgmpxx.so.4", + Type: "link", + Link: "libgmpxx.so.4.6.1", + Digests: []file.Digest{}, + }, + { + Path: "/usr/lib/libgmpxx.so.4.6.1", + Size: "30680", + Digests: []file.Digest{ + {Algorithm: "md5", Value: "dd5f0c4d635fa599fa7f4339c0e8814d"}, + {Algorithm: "sha256", Value: "0ef67cbde4841f58d2e4b41f59425eb87c9eeaf4e649c060b326342c53bedbec"}, + }, + }, + { + Path: "/usr/lib/pkgconfig", + Type: "dir", + Digests: []file.Digest{}, + }, + { + Path: "/usr/lib/pkgconfig/gmp.pc", + Size: "245", + Digests: []file.Digest{ + {Algorithm: "md5", Value: "a91a9f1b66218cb77b9cd2cdf341756d"}, + {Algorithm: "sha256", Value: "4e9de547a48c4e443781e9fa702a1ec5a23ee28b4bc520306cff2541a855be37"}, + }, + }, + { + Path: "/usr/lib/pkgconfig/gmpxx.pc", + Size: "280", + Digests: []file.Digest{ + {Algorithm: "md5", Value: "8c0f54e987934352177a6a30a811b001"}, + {Algorithm: "sha256", Value: "fc5dbfbe75977057ba50953d94b9daecf696c9fdfe5b94692b832b44ecca871b"}, + }, + }, + { + Path: "/usr/share", + Type: "dir", + Digests: []file.Digest{}, + }, + { + Path: "/usr/share/info", + Type: "dir", + Digests: []file.Digest{}, + }, + { + Path: "/usr/share/info/gmp.info-1.gz", + Size: "85892", + Digests: []file.Digest{ + {Algorithm: "md5", Value: "63304d4d2f0247fb8a999fae66a81c19"}, + {Algorithm: "sha256", Value: "86288c1531a2789db5da8b9838b5cde4db07bda230ae11eba23a1f33698bd14e"}, + }, + }, + { + Path: "/usr/share/info/gmp.info-2.gz", + Size: "48484", + Digests: []file.Digest{ + {Algorithm: "md5", Value: "4bb0dadec416d305232cac6eae712ff7"}, + {Algorithm: "sha256", Value: "b7443c1b529588d98a074266087f79b595657ac7274191c34b10a9ceedfa950e"}, + }, + }, + { + Path: "/usr/share/info/gmp.info.gz", + Size: "2380", + Digests: []file.Digest{ + {Algorithm: "md5", Value: "cf6880fb0d862ee1da0d13c3831b5720"}, + {Algorithm: "sha256", Value: "a13c8eecda3f3e5ad1e09773e47a9686f07d9d494eaddf326f3696bbef1548fd"}, + }, + }, + }, + Backup: []pkg.AlpmFileRecord{}, + }, + }, + } + + // TODO: relationships are not under test yet + var expectedRelationships []artifact.Relationship + + pkgtest.NewCatalogTester(). + FromDirectory(t, "test-fixtures/gmp-fixture"). + WithCompareOptions(cmpopts.IgnoreFields(pkg.AlpmFileRecord{}, "Time")). + Expects(expectedPkgs, expectedRelationships). + TestCataloger(t, NewAlpmdbCataloger()) + +} + +func TestCataloger_Globs(t *testing.T) { + tests := []struct { + name string + fixture string + expected []string + }{ + { + name: "obtain description files", + fixture: "test-fixtures/glob-paths", + expected: []string{ + "var/lib/pacman/local/base-1.0/desc", + "var/lib/pacman/local/dive-0.10.0/desc", + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + pkgtest.NewCatalogTester(). + FromDirectory(t, test.fixture). + ExpectsResolverContentQueries(test.expected). + IgnoreUnfulfilledPathResponses("var/lib/pacman/local/base-1.0/mtree", "var/lib/pacman/local/dive-0.10.0/mtree"). + TestCataloger(t, NewAlpmdbCataloger()) + }) + } +} diff --git a/syft/pkg/cataloger/alpm/package.go b/syft/pkg/cataloger/alpm/package.go index ac9ac1b6592..1a5bdf13135 100644 --- a/syft/pkg/cataloger/alpm/package.go +++ b/syft/pkg/cataloger/alpm/package.go @@ -1,9 +1,8 @@ package alpm import ( - "strings" - "github.com/anchore/packageurl-go" + "github.com/anchore/syft/internal" "github.com/anchore/syft/syft/linux" "github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/source" @@ -15,7 +14,7 @@ func newPackage(m pkg.AlpmMetadata, release *linux.Release, locations ...source. Version: m.Version, Locations: source.NewLocationSet(locations...), Type: pkg.AlpmPkg, - Licenses: strings.Split(m.License, " "), + Licenses: internal.SplitAny(m.License, " \n"), PURL: packageURL(m, release), MetadataType: pkg.AlpmMetadataType, Metadata: m, diff --git a/syft/pkg/cataloger/alpm/parse_alpm_db.go b/syft/pkg/cataloger/alpm/parse_alpm_db.go index 5bc776a4443..9de9620b429 100644 --- a/syft/pkg/cataloger/alpm/parse_alpm_db.go +++ b/syft/pkg/cataloger/alpm/parse_alpm_db.go @@ -65,6 +65,10 @@ func parseAlpmDB(resolver source.FileResolver, env *generic.Environment, reader metadata.Backup = filesMetadata.Backup } + if metadata.Package == "" { + return nil, nil, nil + } + return []pkg.Package{ newPackage(*metadata, env.LinuxRelease, reader.Location), }, nil, nil diff --git a/syft/pkg/cataloger/alpm/parse_alpm_db_test.go b/syft/pkg/cataloger/alpm/parse_alpm_db_test.go index 9debc22a6d2..907b2aa36e2 100644 --- a/syft/pkg/cataloger/alpm/parse_alpm_db_test.go +++ b/syft/pkg/cataloger/alpm/parse_alpm_db_test.go @@ -16,10 +16,12 @@ import ( func TestDatabaseParser(t *testing.T) { tests := []struct { name string + fixture string expected pkg.AlpmMetadata }{ { - name: "test alpm database parsing", + name: "test alpm database parsing", + fixture: "test-fixtures/files", expected: pkg.AlpmMetadata{ Backup: []pkg.AlpmFileRecord{ { @@ -90,7 +92,7 @@ func TestDatabaseParser(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { - f, err := os.Open("test-fixtures/files") + f, err := os.Open(test.fixture) require.NoError(t, err) t.Cleanup(func() { require.NoError(t, f.Close()) }) diff --git a/syft/pkg/cataloger/alpm/test-fixtures/glob-paths/var/lib/pacman/local/base-1.0/desc b/syft/pkg/cataloger/alpm/test-fixtures/glob-paths/var/lib/pacman/local/base-1.0/desc new file mode 100644 index 00000000000..34ca538d2b8 --- /dev/null +++ b/syft/pkg/cataloger/alpm/test-fixtures/glob-paths/var/lib/pacman/local/base-1.0/desc @@ -0,0 +1 @@ +bogus desc file \ No newline at end of file diff --git a/syft/pkg/cataloger/alpm/test-fixtures/glob-paths/var/lib/pacman/local/base-1.0/files b/syft/pkg/cataloger/alpm/test-fixtures/glob-paths/var/lib/pacman/local/base-1.0/files new file mode 100644 index 00000000000..eb016bb551f --- /dev/null +++ b/syft/pkg/cataloger/alpm/test-fixtures/glob-paths/var/lib/pacman/local/base-1.0/files @@ -0,0 +1 @@ +bogus files \ No newline at end of file diff --git a/syft/pkg/cataloger/alpm/test-fixtures/glob-paths/var/lib/pacman/local/dive-0.10.0/desc b/syft/pkg/cataloger/alpm/test-fixtures/glob-paths/var/lib/pacman/local/dive-0.10.0/desc new file mode 100644 index 00000000000..34ca538d2b8 --- /dev/null +++ b/syft/pkg/cataloger/alpm/test-fixtures/glob-paths/var/lib/pacman/local/dive-0.10.0/desc @@ -0,0 +1 @@ +bogus desc file \ No newline at end of file diff --git a/syft/pkg/cataloger/alpm/test-fixtures/gmp-fixture/var/lib/pacman/local/gmp-6.2.1-2/desc b/syft/pkg/cataloger/alpm/test-fixtures/gmp-fixture/var/lib/pacman/local/gmp-6.2.1-2/desc new file mode 100644 index 00000000000..39ff43a9e50 --- /dev/null +++ b/syft/pkg/cataloger/alpm/test-fixtures/gmp-fixture/var/lib/pacman/local/gmp-6.2.1-2/desc @@ -0,0 +1,44 @@ +%NAME% +gmp + +%VERSION% +6.2.1-2 + +%BASE% +gmp + +%DESC% +A free library for arbitrary precision arithmetic + +%URL% +https://gmplib.org/ + +%ARCH% +x86_64 + +%BUILDDATE% +1653121258 + +%INSTALLDATE% +1665878640 + +%PACKAGER% +Antonio Rojas + +%SIZE% +1044438 + +%REASON% +1 + +%LICENSE% +LGPL3 +GPL + +%VALIDATION% +pgp + +%DEPENDS% +gcc-libs +sh + diff --git a/syft/pkg/cataloger/alpm/test-fixtures/gmp-fixture/var/lib/pacman/local/gmp-6.2.1-2/files b/syft/pkg/cataloger/alpm/test-fixtures/gmp-fixture/var/lib/pacman/local/gmp-6.2.1-2/files new file mode 100644 index 00000000000..737b8dae42e --- /dev/null +++ b/syft/pkg/cataloger/alpm/test-fixtures/gmp-fixture/var/lib/pacman/local/gmp-6.2.1-2/files @@ -0,0 +1,21 @@ +%FILES% +usr/ +usr/include/ +usr/include/gmp.h +usr/include/gmpxx.h +usr/lib/ +usr/lib/libgmp.so +usr/lib/libgmp.so.10 +usr/lib/libgmp.so.10.4.1 +usr/lib/libgmpxx.so +usr/lib/libgmpxx.so.4 +usr/lib/libgmpxx.so.4.6.1 +usr/lib/pkgconfig/ +usr/lib/pkgconfig/gmp.pc +usr/lib/pkgconfig/gmpxx.pc +usr/share/ +usr/share/info/ +usr/share/info/gmp.info-1.gz +usr/share/info/gmp.info-2.gz +usr/share/info/gmp.info.gz + diff --git a/syft/pkg/cataloger/alpm/test-fixtures/gmp-fixture/var/lib/pacman/local/gmp-6.2.1-2/mtree b/syft/pkg/cataloger/alpm/test-fixtures/gmp-fixture/var/lib/pacman/local/gmp-6.2.1-2/mtree new file mode 100644 index 00000000000..fe9d46ef34e Binary files /dev/null and b/syft/pkg/cataloger/alpm/test-fixtures/gmp-fixture/var/lib/pacman/local/gmp-6.2.1-2/mtree differ diff --git a/syft/pkg/cataloger/apkdb/cataloger_test.go b/syft/pkg/cataloger/apkdb/cataloger_test.go new file mode 100644 index 00000000000..5a29607913a --- /dev/null +++ b/syft/pkg/cataloger/apkdb/cataloger_test.go @@ -0,0 +1,30 @@ +package apkdb + +import ( + "testing" + + "github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest" +) + +func TestCataloger_Globs(t *testing.T) { + tests := []struct { + name string + fixture string + expected []string + }{ + { + name: "obtain DB files", + fixture: "test-fixtures/glob-paths", + expected: []string{"lib/apk/db/installed"}, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + pkgtest.NewCatalogTester(). + FromDirectory(t, test.fixture). + ExpectsResolverContentQueries(test.expected). + TestCataloger(t, NewApkdbCataloger()) + }) + } +} diff --git a/syft/pkg/cataloger/apkdb/parse_apk_db.go b/syft/pkg/cataloger/apkdb/parse_apk_db.go index b5a5be32128..f83b7bb521f 100644 --- a/syft/pkg/cataloger/apkdb/parse_apk_db.go +++ b/syft/pkg/cataloger/apkdb/parse_apk_db.go @@ -7,6 +7,7 @@ import ( "strconv" "strings" + "github.com/anchore/syft/internal" "github.com/anchore/syft/internal/log" "github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/file" @@ -350,19 +351,12 @@ func discoverPackageDependencies(pkgs []pkg.Package) (relationships []artifact.R return relationships } -func splitAny(s string, seps string) []string { - splitter := func(r rune) bool { - return strings.ContainsRune(seps, r) - } - return strings.FieldsFunc(s, splitter) -} - func stripVersionSpecifier(s string) string { // examples: // musl>=1 --> musl // cmd:scanelf=1.3.4-r0 --> cmd:scanelf - items := splitAny(s, "<>=") + items := internal.SplitAny(s, "<>=") if len(items) == 0 { return s } diff --git a/syft/pkg/cataloger/apkdb/test-fixtures/glob-paths/lib/apk/db/installed b/syft/pkg/cataloger/apkdb/test-fixtures/glob-paths/lib/apk/db/installed new file mode 100644 index 00000000000..616203752e6 --- /dev/null +++ b/syft/pkg/cataloger/apkdb/test-fixtures/glob-paths/lib/apk/db/installed @@ -0,0 +1 @@ +bogus db contents \ No newline at end of file diff --git a/syft/pkg/cataloger/binary/cataloger.go b/syft/pkg/cataloger/binary/cataloger.go index a3f8b19e696..a610de19d18 100644 --- a/syft/pkg/cataloger/binary/cataloger.go +++ b/syft/pkg/cataloger/binary/cataloger.go @@ -32,6 +32,7 @@ func (c Cataloger) Catalog(resolver source.FileResolver) ([]pkg.Package, []artif var relationships []artifact.Relationship for _, cls := range defaultClassifiers { + log.WithFields("classifier", cls.Class).Trace("cataloging binaries") pkgs, err := catalog(resolver, cls) if err != nil { log.WithFields("error", err, "classifier", cls.Class).Warn("unable to catalog binary package: %w", err) diff --git a/syft/pkg/cataloger/binary/cataloger_test.go b/syft/pkg/cataloger/binary/cataloger_test.go index ab70b4c26fb..6e916271de7 100644 --- a/syft/pkg/cataloger/binary/cataloger_test.go +++ b/syft/pkg/cataloger/binary/cataloger_test.go @@ -13,7 +13,7 @@ import ( "github.com/anchore/syft/syft/source" ) -func TestClassifierCataloger_DefaultClassifiers_PositiveCases(t *testing.T) { +func Test_Cataloger_DefaultClassifiers_PositiveCases(t *testing.T) { tests := []struct { name string fixtureDir string @@ -440,7 +440,7 @@ func TestClassifierCataloger_DefaultClassifiers_PositiveCases(t *testing.T) { } } -func TestClassifierCataloger_DefaultClassifiers_PositiveCases_Image(t *testing.T) { +func Test_Cataloger_DefaultClassifiers_PositiveCases_Image(t *testing.T) { tests := []struct { name string fixtureImage string @@ -521,39 +521,57 @@ func assertPackagesAreEqual(t *testing.T, expected pkg.Package, p pkg.Package) { } type panicyResolver struct { - globCalled bool + searchCalled bool } -func (p panicyResolver) FileContentsByLocation(location source.Location) (io.ReadCloser, error) { +func (p *panicyResolver) FilesByExtension(_ ...string) ([]source.Location, error) { + p.searchCalled = true return nil, errors.New("not implemented") } -func (p panicyResolver) HasPath(s string) bool { +func (p *panicyResolver) FilesByBasename(_ ...string) ([]source.Location, error) { + p.searchCalled = true + return nil, errors.New("not implemented") +} + +func (p *panicyResolver) FilesByBasenameGlob(_ ...string) ([]source.Location, error) { + p.searchCalled = true + return nil, errors.New("not implemented") +} + +func (p *panicyResolver) FileContentsByLocation(_ source.Location) (io.ReadCloser, error) { + p.searchCalled = true + return nil, errors.New("not implemented") +} + +func (p *panicyResolver) HasPath(s string) bool { return true } -func (p panicyResolver) FilesByPath(paths ...string) ([]source.Location, error) { +func (p *panicyResolver) FilesByPath(_ ...string) ([]source.Location, error) { + p.searchCalled = true return nil, errors.New("not implemented") } -func (p *panicyResolver) FilesByGlob(patterns ...string) ([]source.Location, error) { - p.globCalled = true +func (p *panicyResolver) FilesByGlob(_ ...string) ([]source.Location, error) { + p.searchCalled = true return nil, errors.New("not implemented") } -func (p panicyResolver) FilesByMIMEType(types ...string) ([]source.Location, error) { +func (p *panicyResolver) FilesByMIMEType(_ ...string) ([]source.Location, error) { + p.searchCalled = true return nil, errors.New("not implemented") } -func (p panicyResolver) RelativeFileByPath(_ source.Location, path string) *source.Location { +func (p *panicyResolver) RelativeFileByPath(_ source.Location, _ string) *source.Location { return nil } -func (p panicyResolver) AllLocations() <-chan source.Location { +func (p *panicyResolver) AllLocations() <-chan source.Location { return nil } -func (p panicyResolver) FileMetadataByLocation(location source.Location) (source.FileMetadata, error) { +func (p *panicyResolver) FileMetadataByLocation(_ source.Location) (source.FileMetadata, error) { return source.FileMetadata{}, errors.New("not implemented") } @@ -563,5 +581,5 @@ func Test_Cataloger_ResilientToErrors(t *testing.T) { resolver := &panicyResolver{} _, _, err := c.Catalog(resolver) assert.NoError(t, err) - assert.True(t, resolver.globCalled) + assert.True(t, resolver.searchCalled) } diff --git a/syft/pkg/cataloger/binary/classifier_test.go b/syft/pkg/cataloger/binary/classifier_test.go index c0c9520af52..cc7d2b0d6aa 100644 --- a/syft/pkg/cataloger/binary/classifier_test.go +++ b/syft/pkg/cataloger/binary/classifier_test.go @@ -21,7 +21,7 @@ func Test_ClassifierCPEs(t *testing.T) { fixture: "test-fixtures/version.txt", classifier: classifier{ Package: "some-app", - FileGlob: ".*/version.txt", + FileGlob: "**/version.txt", EvidenceMatcher: fileContentsVersionMatcher(`(?m)my-verison:(?P[0-9.]+)`), CPEs: []cpe.CPE{}, }, @@ -32,7 +32,7 @@ func Test_ClassifierCPEs(t *testing.T) { fixture: "test-fixtures/version.txt", classifier: classifier{ Package: "some-app", - FileGlob: ".*/version.txt", + FileGlob: "**/version.txt", EvidenceMatcher: fileContentsVersionMatcher(`(?m)my-verison:(?P[0-9.]+)`), CPEs: []cpe.CPE{ cpe.Must("cpe:2.3:a:some:app:*:*:*:*:*:*:*:*"), @@ -47,7 +47,7 @@ func Test_ClassifierCPEs(t *testing.T) { fixture: "test-fixtures/version.txt", classifier: classifier{ Package: "some-app", - FileGlob: ".*/version.txt", + FileGlob: "**/version.txt", EvidenceMatcher: fileContentsVersionMatcher(`(?m)my-verison:(?P[0-9.]+)`), CPEs: []cpe.CPE{ cpe.Must("cpe:2.3:a:some:app:*:*:*:*:*:*:*:*"), diff --git a/syft/pkg/cataloger/cataloger.go b/syft/pkg/cataloger/cataloger.go index bfc2e1e9cd6..e59388ff07f 100644 --- a/syft/pkg/cataloger/cataloger.go +++ b/syft/pkg/cataloger/cataloger.go @@ -41,8 +41,8 @@ func ImageCatalogers(cfg Config) []pkg.Cataloger { alpm.NewAlpmdbCataloger(), ruby.NewGemSpecCataloger(), python.NewPythonPackageCataloger(), - php.NewPHPComposerInstalledCataloger(), - javascript.NewJavascriptPackageCataloger(), + php.NewComposerInstalledCataloger(), + javascript.NewPackageCataloger(), deb.NewDpkgdbCataloger(), rpm.NewRpmDBCataloger(), java.NewJavaCataloger(cfg.Java()), @@ -63,8 +63,8 @@ func DirectoryCatalogers(cfg Config) []pkg.Cataloger { ruby.NewGemFileLockCataloger(), python.NewPythonIndexCataloger(), python.NewPythonPackageCataloger(), - php.NewPHPComposerLockCataloger(), - javascript.NewJavascriptLockCataloger(), + php.NewComposerLockCataloger(), + javascript.NewLockCataloger(), deb.NewDpkgdbCataloger(), rpm.NewRpmDBCataloger(), rpm.NewFileCataloger(), @@ -96,8 +96,8 @@ func AllCatalogers(cfg Config) []pkg.Cataloger { ruby.NewGemSpecCataloger(), python.NewPythonIndexCataloger(), python.NewPythonPackageCataloger(), - javascript.NewJavascriptLockCataloger(), - javascript.NewJavascriptPackageCataloger(), + javascript.NewLockCataloger(), + javascript.NewPackageCataloger(), deb.NewDpkgdbCataloger(), rpm.NewRpmDBCataloger(), rpm.NewFileCataloger(), @@ -111,8 +111,8 @@ func AllCatalogers(cfg Config) []pkg.Cataloger { rust.NewAuditBinaryCataloger(), dart.NewPubspecLockCataloger(), dotnet.NewDotnetDepsCataloger(), - php.NewPHPComposerInstalledCataloger(), - php.NewPHPComposerLockCataloger(), + php.NewComposerInstalledCataloger(), + php.NewComposerLockCataloger(), swift.NewCocoapodsCataloger(), cpp.NewConanCataloger(), portage.NewPortageCataloger(), diff --git a/syft/pkg/cataloger/cpp/cataloger_test.go b/syft/pkg/cataloger/cpp/cataloger_test.go new file mode 100644 index 00000000000..144d4cc915c --- /dev/null +++ b/syft/pkg/cataloger/cpp/cataloger_test.go @@ -0,0 +1,33 @@ +package cpp + +import ( + "testing" + + "github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest" +) + +func TestCataloger_Globs(t *testing.T) { + tests := []struct { + name string + fixture string + expected []string + }{ + { + name: "obtain conan files", + fixture: "test-fixtures/glob-paths", + expected: []string{ + "somewhere/src/conanfile.txt", + "somewhere/src/conan.lock", + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + pkgtest.NewCatalogTester(). + FromDirectory(t, test.fixture). + ExpectsResolverContentQueries(test.expected). + TestCataloger(t, NewConanCataloger()) + }) + } +} diff --git a/syft/pkg/cataloger/cpp/test-fixtures/glob-paths/somewhere/src/conan.lock b/syft/pkg/cataloger/cpp/test-fixtures/glob-paths/somewhere/src/conan.lock new file mode 100644 index 00000000000..1068548a948 --- /dev/null +++ b/syft/pkg/cataloger/cpp/test-fixtures/glob-paths/somewhere/src/conan.lock @@ -0,0 +1 @@ +bogus conan.lock \ No newline at end of file diff --git a/syft/pkg/cataloger/cpp/test-fixtures/glob-paths/somewhere/src/conanfile.txt b/syft/pkg/cataloger/cpp/test-fixtures/glob-paths/somewhere/src/conanfile.txt new file mode 100644 index 00000000000..7159f327782 --- /dev/null +++ b/syft/pkg/cataloger/cpp/test-fixtures/glob-paths/somewhere/src/conanfile.txt @@ -0,0 +1 @@ +bogus conan file \ No newline at end of file diff --git a/syft/pkg/cataloger/dart/cataloger_test.go b/syft/pkg/cataloger/dart/cataloger_test.go new file mode 100644 index 00000000000..94aa8cfa0f7 --- /dev/null +++ b/syft/pkg/cataloger/dart/cataloger_test.go @@ -0,0 +1,32 @@ +package dart + +import ( + "testing" + + "github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest" +) + +func TestCataloger_Globs(t *testing.T) { + tests := []struct { + name string + fixture string + expected []string + }{ + { + name: "obtain pubspec files", + fixture: "test-fixtures/glob-paths", + expected: []string{ + "src/pubspec.lock", + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + pkgtest.NewCatalogTester(). + FromDirectory(t, test.fixture). + ExpectsResolverContentQueries(test.expected). + TestCataloger(t, NewPubspecLockCataloger()) + }) + } +} diff --git a/syft/pkg/cataloger/dart/test-fixtures/glob-paths/src/pubspec.lock b/syft/pkg/cataloger/dart/test-fixtures/glob-paths/src/pubspec.lock new file mode 100644 index 00000000000..bf061c80b28 --- /dev/null +++ b/syft/pkg/cataloger/dart/test-fixtures/glob-paths/src/pubspec.lock @@ -0,0 +1 @@ +bogus pubspec.lock \ No newline at end of file diff --git a/syft/pkg/cataloger/deb/cataloger.go b/syft/pkg/cataloger/deb/cataloger.go index 4484757105d..fb25550c404 100644 --- a/syft/pkg/cataloger/deb/cataloger.go +++ b/syft/pkg/cataloger/deb/cataloger.go @@ -4,7 +4,6 @@ Package dpkg provides a concrete Cataloger implementation for Debian package DB package deb import ( - "github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/pkg/cataloger/generic" ) @@ -13,5 +12,7 @@ const catalogerName = "dpkgdb-cataloger" // NewDpkgdbCataloger returns a new Deb package cataloger capable of parsing DPKG status DB files. func NewDpkgdbCataloger() *generic.Cataloger { return generic.NewCataloger(catalogerName). - WithParserByGlobs(parseDpkgDB, pkg.DpkgDBGlob) + // note: these globs have been intentionally split up in order to improve search performance, + // please do NOT combine into: "**/var/lib/dpkg/{status,status.d/*}" + WithParserByGlobs(parseDpkgDB, "**/var/lib/dpkg/status", "**/var/lib/dpkg/status.d/*") } diff --git a/syft/pkg/cataloger/deb/cataloger_test.go b/syft/pkg/cataloger/deb/cataloger_test.go index 7e627b1c9a7..8fac459c613 100644 --- a/syft/pkg/cataloger/deb/cataloger_test.go +++ b/syft/pkg/cataloger/deb/cataloger_test.go @@ -81,3 +81,29 @@ func TestDpkgCataloger(t *testing.T) { Expects(expected, nil). TestCataloger(t, c) } + +func TestCataloger_Globs(t *testing.T) { + tests := []struct { + name string + fixture string + expected []string + }{ + { + name: "obtain db status files", + fixture: "test-fixtures/glob-paths", + expected: []string{ + "var/lib/dpkg/status", + "var/lib/dpkg/status.d/pkg-1.0", + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + pkgtest.NewCatalogTester(). + FromDirectory(t, test.fixture). + ExpectsResolverContentQueries(test.expected). + TestCataloger(t, NewDpkgdbCataloger()) + }) + } +} diff --git a/syft/pkg/cataloger/deb/package.go b/syft/pkg/cataloger/deb/package.go index 8ef890b1719..3f10dbe3fe1 100644 --- a/syft/pkg/cataloger/deb/package.go +++ b/syft/pkg/cataloger/deb/package.go @@ -83,7 +83,7 @@ func packageURL(m pkg.DpkgMetadata, distro *linux.Release) string { func addLicenses(resolver source.FileResolver, dbLocation source.Location, p *pkg.Package) { metadata, ok := p.Metadata.(pkg.DpkgMetadata) if !ok { - log.WithFields("package", p.String()).Warn("unable to extract DPKG metadata to add licenses") + log.WithFields("package", p).Warn("unable to extract DPKG metadata to add licenses") return } @@ -103,7 +103,7 @@ func addLicenses(resolver source.FileResolver, dbLocation source.Location, p *pk func mergeFileListing(resolver source.FileResolver, dbLocation source.Location, p *pkg.Package) { metadata, ok := p.Metadata.(pkg.DpkgMetadata) if !ok { - log.WithFields("package", p.String()).Warn("unable to extract DPKG metadata to file listing") + log.WithFields("package", p).Warn("unable to extract DPKG metadata to file listing") return } diff --git a/syft/pkg/cataloger/deb/test-fixtures/glob-paths/var/lib/dpkg/status b/syft/pkg/cataloger/deb/test-fixtures/glob-paths/var/lib/dpkg/status new file mode 100644 index 00000000000..90786794c1e --- /dev/null +++ b/syft/pkg/cataloger/deb/test-fixtures/glob-paths/var/lib/dpkg/status @@ -0,0 +1 @@ +bogus status \ No newline at end of file diff --git a/syft/pkg/cataloger/deb/test-fixtures/glob-paths/var/lib/dpkg/status.d/pkg-1.0 b/syft/pkg/cataloger/deb/test-fixtures/glob-paths/var/lib/dpkg/status.d/pkg-1.0 new file mode 100644 index 00000000000..b09abcf2897 --- /dev/null +++ b/syft/pkg/cataloger/deb/test-fixtures/glob-paths/var/lib/dpkg/status.d/pkg-1.0 @@ -0,0 +1 @@ +bogus package \ No newline at end of file diff --git a/syft/pkg/cataloger/dotnet/cataloger_test.go b/syft/pkg/cataloger/dotnet/cataloger_test.go new file mode 100644 index 00000000000..8b131449148 --- /dev/null +++ b/syft/pkg/cataloger/dotnet/cataloger_test.go @@ -0,0 +1,32 @@ +package dotnet + +import ( + "testing" + + "github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest" +) + +func TestCataloger_Globs(t *testing.T) { + tests := []struct { + name string + fixture string + expected []string + }{ + { + name: "obtain deps.json files", + fixture: "test-fixtures/glob-paths", + expected: []string{ + "src/something.deps.json", + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + pkgtest.NewCatalogTester(). + FromDirectory(t, test.fixture). + ExpectsResolverContentQueries(test.expected). + TestCataloger(t, NewDotnetDepsCataloger()) + }) + } +} diff --git a/syft/pkg/cataloger/dotnet/test-fixtures/glob-paths/src/something.deps.json b/syft/pkg/cataloger/dotnet/test-fixtures/glob-paths/src/something.deps.json new file mode 100644 index 00000000000..8bdc51dca4f --- /dev/null +++ b/syft/pkg/cataloger/dotnet/test-fixtures/glob-paths/src/something.deps.json @@ -0,0 +1 @@ +bogus deps.json \ No newline at end of file diff --git a/syft/pkg/cataloger/elixir/cataloger_test.go b/syft/pkg/cataloger/elixir/cataloger_test.go new file mode 100644 index 00000000000..538da15a5f2 --- /dev/null +++ b/syft/pkg/cataloger/elixir/cataloger_test.go @@ -0,0 +1,32 @@ +package elixir + +import ( + "testing" + + "github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest" +) + +func TestCataloger_Globs(t *testing.T) { + tests := []struct { + name string + fixture string + expected []string + }{ + { + name: "obtain mix.lock files", + fixture: "test-fixtures/glob-paths", + expected: []string{ + "src/mix.lock", + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + pkgtest.NewCatalogTester(). + FromDirectory(t, test.fixture). + ExpectsResolverContentQueries(test.expected). + TestCataloger(t, NewMixLockCataloger()) + }) + } +} diff --git a/syft/pkg/cataloger/elixir/test-fixtures/glob-paths/src/mix.lock b/syft/pkg/cataloger/elixir/test-fixtures/glob-paths/src/mix.lock new file mode 100644 index 00000000000..52d9b1e433e --- /dev/null +++ b/syft/pkg/cataloger/elixir/test-fixtures/glob-paths/src/mix.lock @@ -0,0 +1 @@ +bogus mix.lock \ No newline at end of file diff --git a/syft/pkg/cataloger/erlang/cataloger_test.go b/syft/pkg/cataloger/erlang/cataloger_test.go new file mode 100644 index 00000000000..1d959b10389 --- /dev/null +++ b/syft/pkg/cataloger/erlang/cataloger_test.go @@ -0,0 +1,32 @@ +package erlang + +import ( + "testing" + + "github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest" +) + +func TestCataloger_Globs(t *testing.T) { + tests := []struct { + name string + fixture string + expected []string + }{ + { + name: "obtain rebar.lock files", + fixture: "test-fixtures/glob-paths", + expected: []string{ + "src/rebar.lock", + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + pkgtest.NewCatalogTester(). + FromDirectory(t, test.fixture). + ExpectsResolverContentQueries(test.expected). + TestCataloger(t, NewRebarLockCataloger()) + }) + } +} diff --git a/syft/pkg/cataloger/erlang/test-fixtures/glob-paths/src/rebar.lock b/syft/pkg/cataloger/erlang/test-fixtures/glob-paths/src/rebar.lock new file mode 100644 index 00000000000..68baa46d48c --- /dev/null +++ b/syft/pkg/cataloger/erlang/test-fixtures/glob-paths/src/rebar.lock @@ -0,0 +1 @@ +bogus rebar.lock \ No newline at end of file diff --git a/syft/pkg/cataloger/generic/cataloger.go b/syft/pkg/cataloger/generic/cataloger.go index 73533288d09..d2069ffff52 100644 --- a/syft/pkg/cataloger/generic/cataloger.go +++ b/syft/pkg/cataloger/generic/cataloger.go @@ -47,16 +47,13 @@ func (c *Cataloger) WithParserByMimeTypes(parser Parser, types ...string) *Catal c.processor = append(c.processor, func(resolver source.FileResolver, env Environment) []request { var requests []request - for _, t := range types { - log.WithFields("mimetype", t).Trace("searching for paths matching mimetype") - - matches, err := resolver.FilesByMIMEType(t) - if err != nil { - log.Warnf("unable to process mimetype=%q: %+v", t, err) - continue - } - requests = append(requests, makeRequests(parser, matches)...) + log.WithFields("mimetypes", types).Trace("searching for paths matching mimetype") + matches, err := resolver.FilesByMIMEType(types...) + if err != nil { + log.Warnf("unable to process mimetypes=%+v: %+v", types, err) + return nil } + requests = append(requests, makeRequests(parser, matches)...) return requests }, ) diff --git a/syft/pkg/cataloger/golang/cataloger_test.go b/syft/pkg/cataloger/golang/cataloger_test.go new file mode 100644 index 00000000000..55a18e6af71 --- /dev/null +++ b/syft/pkg/cataloger/golang/cataloger_test.go @@ -0,0 +1,58 @@ +package golang + +import ( + "testing" + + "github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest" +) + +func Test_Mod_Cataloger_Globs(t *testing.T) { + tests := []struct { + name string + fixture string + expected []string + }{ + { + name: "obtain go.mod files", + fixture: "test-fixtures/glob-paths", + expected: []string{ + "src/go.mod", + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + pkgtest.NewCatalogTester(). + FromDirectory(t, test.fixture). + ExpectsResolverContentQueries(test.expected). + IgnoreUnfulfilledPathResponses("src/go.sum"). + TestCataloger(t, NewGoModFileCataloger()) + }) + } +} + +func Test_Binary_Cataloger_Globs(t *testing.T) { + tests := []struct { + name string + fixture string + expected []string + }{ + { + name: "obtain binary files", + fixture: "test-fixtures/glob-paths", + expected: []string{ + "partial-binary", + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + pkgtest.NewCatalogTester(). + FromDirectory(t, test.fixture). + ExpectsResolverContentQueries(test.expected). + TestCataloger(t, NewGoModuleBinaryCataloger()) + }) + } +} diff --git a/syft/pkg/cataloger/golang/test-fixtures/glob-paths/partial-binary b/syft/pkg/cataloger/golang/test-fixtures/glob-paths/partial-binary new file mode 100644 index 00000000000..125d737c55f --- /dev/null +++ b/syft/pkg/cataloger/golang/test-fixtures/glob-paths/partial-binary @@ -0,0 +1 @@ +Ïúíþ \ No newline at end of file diff --git a/syft/pkg/cataloger/golang/test-fixtures/glob-paths/src/go.mod b/syft/pkg/cataloger/golang/test-fixtures/glob-paths/src/go.mod new file mode 100644 index 00000000000..2a49df8b21d --- /dev/null +++ b/syft/pkg/cataloger/golang/test-fixtures/glob-paths/src/go.mod @@ -0,0 +1 @@ +// bogus go.mod \ No newline at end of file diff --git a/syft/pkg/cataloger/haskell/cataloger_test.go b/syft/pkg/cataloger/haskell/cataloger_test.go new file mode 100644 index 00000000000..86a4b3670b3 --- /dev/null +++ b/syft/pkg/cataloger/haskell/cataloger_test.go @@ -0,0 +1,34 @@ +package haskell + +import ( + "testing" + + "github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest" +) + +func TestCataloger_Globs(t *testing.T) { + tests := []struct { + name string + fixture string + expected []string + }{ + { + name: "obtain stack and cabal files", + fixture: "test-fixtures/glob-paths", + expected: []string{ + "src/stack.yaml", + "src/stack.yaml.lock", + "src/cabal.project.freeze", + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + pkgtest.NewCatalogTester(). + FromDirectory(t, test.fixture). + ExpectsResolverContentQueries(test.expected). + TestCataloger(t, NewHackageCataloger()) + }) + } +} diff --git a/syft/pkg/cataloger/haskell/test-fixtures/glob-paths/src/cabal.project.freeze b/syft/pkg/cataloger/haskell/test-fixtures/glob-paths/src/cabal.project.freeze new file mode 100644 index 00000000000..ba0e55e316c --- /dev/null +++ b/syft/pkg/cataloger/haskell/test-fixtures/glob-paths/src/cabal.project.freeze @@ -0,0 +1 @@ +cabal.project.freeze \ No newline at end of file diff --git a/syft/pkg/cataloger/haskell/test-fixtures/glob-paths/src/stack.yaml b/syft/pkg/cataloger/haskell/test-fixtures/glob-paths/src/stack.yaml new file mode 100644 index 00000000000..f48f35b7351 --- /dev/null +++ b/syft/pkg/cataloger/haskell/test-fixtures/glob-paths/src/stack.yaml @@ -0,0 +1 @@ +bogus stack.yaml \ No newline at end of file diff --git a/syft/pkg/cataloger/haskell/test-fixtures/glob-paths/src/stack.yaml.lock b/syft/pkg/cataloger/haskell/test-fixtures/glob-paths/src/stack.yaml.lock new file mode 100644 index 00000000000..447e1a04697 --- /dev/null +++ b/syft/pkg/cataloger/haskell/test-fixtures/glob-paths/src/stack.yaml.lock @@ -0,0 +1 @@ +bogus stack.yaml.lock \ No newline at end of file diff --git a/syft/pkg/cataloger/internal/pkgtest/observing_resolver.go b/syft/pkg/cataloger/internal/pkgtest/observing_resolver.go new file mode 100644 index 00000000000..fd0a5428a08 --- /dev/null +++ b/syft/pkg/cataloger/internal/pkgtest/observing_resolver.go @@ -0,0 +1,222 @@ +package pkgtest + +import ( + "fmt" + "io" + "sort" + + "github.com/scylladb/go-set/strset" + + "github.com/anchore/syft/syft/source" +) + +var _ source.FileResolver = (*ObservingResolver)(nil) + +type ObservingResolver struct { + decorated source.FileResolver + pathQueries map[string][]string + pathResponses []source.Location + contentQueries []source.Location + emptyPathResponses map[string][]string +} + +func NewObservingResolver(resolver source.FileResolver) *ObservingResolver { + return &ObservingResolver{ + decorated: resolver, + pathResponses: make([]source.Location, 0), + emptyPathResponses: make(map[string][]string), + pathQueries: make(map[string][]string), + } +} + +// testing helpers... + +func (r *ObservingResolver) ObservedPathQuery(input string) bool { + for _, vs := range r.pathQueries { + for _, v := range vs { + if v == input { + return true + } + } + } + return false +} + +func (r *ObservingResolver) ObservedPathResponses(path string) bool { + for _, loc := range r.pathResponses { + if loc.RealPath == path { + return true + } + } + return false +} + +func (r *ObservingResolver) ObservedContentQueries(path string) bool { + for _, loc := range r.contentQueries { + if loc.RealPath == path { + return true + } + } + return false +} + +func (r *ObservingResolver) AllContentQueries() []string { + observed := strset.New() + for _, loc := range r.contentQueries { + observed.Add(loc.RealPath) + } + return observed.List() +} + +func (r *ObservingResolver) AllPathQueries() map[string][]string { + return r.pathQueries +} + +func (r *ObservingResolver) PruneUnfulfilledPathResponses(ignore map[string][]string, ignorePaths ...string) { + if ignore == nil { + return + } + // remove any paths that were ignored for specific calls + for k, v := range ignore { + results := r.emptyPathResponses[k] + for _, ig := range v { + for i, result := range results { + if result == ig { + results = append(results[:i], results[i+1:]...) + break + } + } + } + if len(results) > 0 { + r.emptyPathResponses[k] = results + } else { + delete(r.emptyPathResponses, k) + } + } + + // remove any paths that were ignored for all calls + for _, ig := range ignorePaths { + for k, v := range r.emptyPathResponses { + for i, result := range v { + if result == ig { + v = append(v[:i], v[i+1:]...) + break + } + } + if len(v) > 0 { + r.emptyPathResponses[k] = v + } else { + delete(r.emptyPathResponses, k) + } + } + } +} + +func (r *ObservingResolver) HasUnfulfilledPathRequests() bool { + return len(r.emptyPathResponses) > 0 +} + +func (r *ObservingResolver) PrettyUnfulfilledPathRequests() string { + var res string + var keys []string + + for k := range r.emptyPathResponses { + keys = append(keys, k) + } + + sort.Strings(keys) + + for _, k := range keys { + res += fmt.Sprintf(" %s: %+v\n", k, r.emptyPathResponses[k]) + } + return res +} + +// For the file path resolver... + +func (r *ObservingResolver) addPathQuery(name string, input ...string) { + r.pathQueries[name] = append(r.pathQueries[name], input...) +} + +func (r *ObservingResolver) addPathResponse(locs ...source.Location) { + r.pathResponses = append(r.pathResponses, locs...) +} + +func (r *ObservingResolver) addEmptyPathResponse(name string, locs []source.Location, paths ...string) { + if len(locs) == 0 { + results := r.emptyPathResponses[name] + results = append(results, paths...) + r.emptyPathResponses[name] = results + } +} + +func (r *ObservingResolver) FilesByPath(paths ...string) ([]source.Location, error) { + name := "FilesByPath" + r.addPathQuery(name, paths...) + + locs, err := r.decorated.FilesByPath(paths...) + + r.addPathResponse(locs...) + r.addEmptyPathResponse(name, locs, paths...) + return locs, err +} + +func (r *ObservingResolver) FilesByGlob(patterns ...string) ([]source.Location, error) { + name := "FilesByGlob" + r.addPathQuery(name, patterns...) + + locs, err := r.decorated.FilesByGlob(patterns...) + + r.addPathResponse(locs...) + r.addEmptyPathResponse(name, locs, patterns...) + return locs, err +} + +func (r *ObservingResolver) FilesByMIMEType(types ...string) ([]source.Location, error) { + name := "FilesByMIMEType" + r.addPathQuery(name, types...) + + locs, err := r.decorated.FilesByMIMEType(types...) + + r.addPathResponse(locs...) + r.addEmptyPathResponse(name, locs, types...) + return locs, err +} + +func (r *ObservingResolver) RelativeFileByPath(l source.Location, path string) *source.Location { + name := "RelativeFileByPath" + r.addPathQuery(name, path) + + loc := r.decorated.RelativeFileByPath(l, path) + + if loc != nil { + r.addPathResponse(*loc) + } else { + results := r.emptyPathResponses[name] + results = append(results, path) + r.emptyPathResponses[name] = results + } + return loc +} + +// For the content resolver methods... + +func (r *ObservingResolver) FileContentsByLocation(location source.Location) (io.ReadCloser, error) { + r.contentQueries = append(r.contentQueries, location) + reader, err := r.decorated.FileContentsByLocation(location) + return reader, err +} + +// For the remaining resolver methods... + +func (r *ObservingResolver) AllLocations() <-chan source.Location { + return r.decorated.AllLocations() +} + +func (r *ObservingResolver) HasPath(s string) bool { + return r.decorated.HasPath(s) +} + +func (r *ObservingResolver) FileMetadataByLocation(location source.Location) (source.FileMetadata, error) { + return r.decorated.FileMetadataByLocation(location) +} diff --git a/syft/pkg/cataloger/internal/pkgtest/test_generic_parser.go b/syft/pkg/cataloger/internal/pkgtest/test_generic_parser.go index 59a5d2d52bd..99df368f45e 100644 --- a/syft/pkg/cataloger/internal/pkgtest/test_generic_parser.go +++ b/syft/pkg/cataloger/internal/pkgtest/test_generic_parser.go @@ -1,6 +1,7 @@ package pkgtest import ( + "fmt" "io" "os" "strings" @@ -8,6 +9,7 @@ import ( "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/anchore/stereoscope/pkg/imagetest" @@ -21,20 +23,35 @@ import ( type locationComparer func(x, y source.Location) bool type CatalogTester struct { - expectedPkgs []pkg.Package - expectedRelationships []artifact.Relationship - env *generic.Environment - reader source.LocationReadCloser - resolver source.FileResolver - wantErr require.ErrorAssertionFunc - compareOptions []cmp.Option - locationComparer locationComparer + expectedPkgs []pkg.Package + expectedRelationships []artifact.Relationship + assertResultExpectations bool + expectedPathResponses []string // this is a minimum set, the resolver may return more that just this list + expectedContentQueries []string // this is a full set, any other queries are unexpected (and will fail the test) + ignoreUnfulfilledPathResponses map[string][]string + ignoreAnyUnfulfilledPaths []string + env *generic.Environment + reader source.LocationReadCloser + resolver source.FileResolver + wantErr require.ErrorAssertionFunc + compareOptions []cmp.Option + locationComparer locationComparer } func NewCatalogTester() *CatalogTester { return &CatalogTester{ wantErr: require.NoError, locationComparer: DefaultLocationComparer, + ignoreUnfulfilledPathResponses: map[string][]string{ + "FilesByPath": { + // most catalogers search for a linux release, which will not be fulfilled in testing + "/etc/os-release", + "/usr/lib/os-release", + "/etc/system-release-cpe", + "/etc/redhat-release", + "/bin/busybox", + }, + }, } } @@ -90,6 +107,7 @@ func (p *CatalogTester) WithEnv(env *generic.Environment) *CatalogTester { } func (p *CatalogTester) WithError() *CatalogTester { + p.assertResultExpectations = true p.wantErr = require.Error return p } @@ -129,12 +147,33 @@ func (p *CatalogTester) IgnorePackageFields(fields ...string) *CatalogTester { return p } +func (p *CatalogTester) WithCompareOptions(opts ...cmp.Option) *CatalogTester { + p.compareOptions = append(p.compareOptions, opts...) + return p +} + func (p *CatalogTester) Expects(pkgs []pkg.Package, relationships []artifact.Relationship) *CatalogTester { + p.assertResultExpectations = true p.expectedPkgs = pkgs p.expectedRelationships = relationships return p } +func (p *CatalogTester) ExpectsResolverPathResponses(locations []string) *CatalogTester { + p.expectedPathResponses = locations + return p +} + +func (p *CatalogTester) ExpectsResolverContentQueries(locations []string) *CatalogTester { + p.expectedContentQueries = locations + return p +} + +func (p *CatalogTester) IgnoreUnfulfilledPathResponses(paths ...string) *CatalogTester { + p.ignoreAnyUnfulfilledPaths = append(p.ignoreAnyUnfulfilledPaths, paths...) + return p +} + func (p *CatalogTester) TestParser(t *testing.T, parser generic.Parser) { t.Helper() pkgs, relationships, err := parser(p.resolver, p.env, p.reader) @@ -144,9 +183,30 @@ func (p *CatalogTester) TestParser(t *testing.T, parser generic.Parser) { func (p *CatalogTester) TestCataloger(t *testing.T, cataloger pkg.Cataloger) { t.Helper() - pkgs, relationships, err := cataloger.Catalog(p.resolver) - p.wantErr(t, err) - p.assertPkgs(t, pkgs, relationships) + + resolver := NewObservingResolver(p.resolver) + + pkgs, relationships, err := cataloger.Catalog(resolver) + + // this is a minimum set, the resolver may return more that just this list + for _, path := range p.expectedPathResponses { + assert.Truef(t, resolver.ObservedPathResponses(path), "expected path query for %q was not observed", path) + } + + // this is a full set, any other queries are unexpected (and will fail the test) + if len(p.expectedContentQueries) > 0 { + assert.ElementsMatchf(t, p.expectedContentQueries, resolver.AllContentQueries(), "unexpected content queries observed: diff %s", cmp.Diff(p.expectedContentQueries, resolver.AllContentQueries())) + } + + if p.assertResultExpectations { + p.wantErr(t, err) + p.assertPkgs(t, pkgs, relationships) + } else { + resolver.PruneUnfulfilledPathResponses(p.ignoreUnfulfilledPathResponses, p.ignoreAnyUnfulfilledPaths...) + + // if we aren't testing the results, we should focus on what was searched for (for glob-centric tests) + assert.Falsef(t, resolver.HasUnfulfilledPathRequests(), "unfulfilled path requests: \n%v", resolver.PrettyUnfulfilledPathRequests()) + } } func (p *CatalogTester) assertPkgs(t *testing.T, pkgs []pkg.Package, relationships []artifact.Relationship) { @@ -175,12 +235,31 @@ func (p *CatalogTester) assertPkgs(t *testing.T, pkgs []pkg.Package, relationshi ), ) - if diff := cmp.Diff(p.expectedPkgs, pkgs, p.compareOptions...); diff != "" { - t.Errorf("unexpected packages from parsing (-expected +actual)\n%s", diff) + { + var r diffReporter + var opts []cmp.Option + + opts = append(opts, p.compareOptions...) + opts = append(opts, cmp.Reporter(&r)) + + if diff := cmp.Diff(p.expectedPkgs, pkgs, opts...); diff != "" { + t.Log("Specific Differences:\n" + r.String()) + t.Errorf("unexpected packages from parsing (-expected +actual)\n%s", diff) + } } - if diff := cmp.Diff(p.expectedRelationships, relationships, p.compareOptions...); diff != "" { - t.Errorf("unexpected relationships from parsing (-expected +actual)\n%s", diff) + { + var r diffReporter + var opts []cmp.Option + + opts = append(opts, p.compareOptions...) + opts = append(opts, cmp.Reporter(&r)) + + if diff := cmp.Diff(p.expectedRelationships, relationships, opts...); diff != "" { + t.Log("Specific Differences:\n" + r.String()) + + t.Errorf("unexpected relationships from parsing (-expected +actual)\n%s", diff) + } } } @@ -223,3 +302,28 @@ func AssertPackagesEqual(t *testing.T, a, b pkg.Package) { t.Errorf("unexpected packages from parsing (-expected +actual)\n%s", diff) } } + +// diffReporter is a simple custom reporter that only records differences detected during comparison. +type diffReporter struct { + path cmp.Path + diffs []string +} + +func (r *diffReporter) PushStep(ps cmp.PathStep) { + r.path = append(r.path, ps) +} + +func (r *diffReporter) Report(rs cmp.Result) { + if !rs.Equal() { + vx, vy := r.path.Last().Values() + r.diffs = append(r.diffs, fmt.Sprintf("%#v:\n\t-: %+v\n\t+: %+v\n", r.path, vx, vy)) + } +} + +func (r *diffReporter) PopStep() { + r.path = r.path[:len(r.path)-1] +} + +func (r *diffReporter) String() string { + return strings.Join(r.diffs, "\n") +} diff --git a/syft/pkg/cataloger/java/cataloger.go b/syft/pkg/cataloger/java/cataloger.go index 73bdf71541f..09ed0d1ab01 100644 --- a/syft/pkg/cataloger/java/cataloger.go +++ b/syft/pkg/cataloger/java/cataloger.go @@ -29,5 +29,5 @@ func NewJavaCataloger(cfg Config) *generic.Cataloger { // Pom files list dependencies that maybe not be locally installed yet. func NewJavaPomCataloger() *generic.Cataloger { return generic.NewCataloger("java-pom-cataloger"). - WithParserByGlobs(parserPomXML, pomXMLDirGlob) + WithParserByGlobs(parserPomXML, "**/pom.xml") } diff --git a/syft/pkg/cataloger/java/cataloger_test.go b/syft/pkg/cataloger/java/cataloger_test.go new file mode 100644 index 00000000000..6ec834aff43 --- /dev/null +++ b/syft/pkg/cataloger/java/cataloger_test.go @@ -0,0 +1,87 @@ +package java + +import ( + "testing" + + "github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest" +) + +func Test_ArchiveCataloger_Globs(t *testing.T) { + tests := []struct { + name string + fixture string + expected []string + }{ + { + name: "obtain java archive files", + fixture: "test-fixtures/glob-paths", + expected: []string{ + "java-archives/example.jar", + "java-archives/example.war", + "java-archives/example.ear", + "java-archives/example.par", + "java-archives/example.sar", + "java-archives/example.jpi", + "java-archives/example.hpi", + "java-archives/example.lpkg", + "archives/example.zip", + "archives/example.tar", + "archives/example.tar.gz", + "archives/example.tgz", + "archives/example.tar.bz", + "archives/example.tar.bz2", + "archives/example.tbz", + "archives/example.tbz2", + "archives/example.tar.br", + "archives/example.tbr", + "archives/example.tar.lz4", + "archives/example.tlz4", + "archives/example.tar.sz", + "archives/example.tsz", + "archives/example.tar.xz", + "archives/example.txz", + "archives/example.tar.zst", + "archives/example.tzst", + "archives/example.tar.zstd", + "archives/example.tzstd", + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + pkgtest.NewCatalogTester(). + FromDirectory(t, test.fixture). + ExpectsResolverContentQueries(test.expected). + TestCataloger(t, NewJavaCataloger(Config{ + SearchUnindexedArchives: true, + SearchIndexedArchives: true, + })) + }) + } +} + +func Test_POMCataloger_Globs(t *testing.T) { + tests := []struct { + name string + fixture string + expected []string + }{ + { + name: "obtain java pom files", + fixture: "test-fixtures/glob-paths", + expected: []string{ + "src/pom.xml", + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + pkgtest.NewCatalogTester(). + FromDirectory(t, test.fixture). + ExpectsResolverContentQueries(test.expected). + TestCataloger(t, NewJavaPomCataloger()) + }) + } +} diff --git a/syft/pkg/cataloger/java/parse_pom_xml.go b/syft/pkg/cataloger/java/parse_pom_xml.go index eb37dd7209e..0f30e092999 100644 --- a/syft/pkg/cataloger/java/parse_pom_xml.go +++ b/syft/pkg/cataloger/java/parse_pom_xml.go @@ -18,7 +18,6 @@ import ( ) const pomXMLGlob = "*pom.xml" -const pomXMLDirGlob = "**/pom.xml" var propertyMatcher = regexp.MustCompile("[$][{][^}]+[}]") diff --git a/syft/pkg/cataloger/java/tar_wrapped_archive_parser.go b/syft/pkg/cataloger/java/tar_wrapped_archive_parser.go index 882493b01a5..99c723f44b5 100644 --- a/syft/pkg/cataloger/java/tar_wrapped_archive_parser.go +++ b/syft/pkg/cataloger/java/tar_wrapped_archive_parser.go @@ -34,6 +34,9 @@ var genericTarGlobs = []string{ "**/*.txz", // zst "**/*.tar.zst", + "**/*.tzst", + "**/*.tar.zstd", + "**/*.tzstd", } // TODO: when the generic archive cataloger is implemented, this should be removed (https://github.com/anchore/syft/issues/246) diff --git a/syft/pkg/cataloger/java/test-fixtures/glob-paths/archives/.gitignore b/syft/pkg/cataloger/java/test-fixtures/glob-paths/archives/.gitignore new file mode 100644 index 00000000000..4ec97210de8 --- /dev/null +++ b/syft/pkg/cataloger/java/test-fixtures/glob-paths/archives/.gitignore @@ -0,0 +1,2 @@ +# we want to override some of the root level ignores just for the fixutes that we know are safe +!* \ No newline at end of file diff --git a/syft/pkg/cataloger/java/test-fixtures/glob-paths/archives/example.tar b/syft/pkg/cataloger/java/test-fixtures/glob-paths/archives/example.tar new file mode 100644 index 00000000000..8944cbcc070 --- /dev/null +++ b/syft/pkg/cataloger/java/test-fixtures/glob-paths/archives/example.tar @@ -0,0 +1 @@ +example archive diff --git a/syft/pkg/cataloger/java/test-fixtures/glob-paths/archives/example.tar.br b/syft/pkg/cataloger/java/test-fixtures/glob-paths/archives/example.tar.br new file mode 100644 index 00000000000..8944cbcc070 --- /dev/null +++ b/syft/pkg/cataloger/java/test-fixtures/glob-paths/archives/example.tar.br @@ -0,0 +1 @@ +example archive diff --git a/syft/pkg/cataloger/java/test-fixtures/glob-paths/archives/example.tar.bz b/syft/pkg/cataloger/java/test-fixtures/glob-paths/archives/example.tar.bz new file mode 100644 index 00000000000..8944cbcc070 --- /dev/null +++ b/syft/pkg/cataloger/java/test-fixtures/glob-paths/archives/example.tar.bz @@ -0,0 +1 @@ +example archive diff --git a/syft/pkg/cataloger/java/test-fixtures/glob-paths/archives/example.tar.bz2 b/syft/pkg/cataloger/java/test-fixtures/glob-paths/archives/example.tar.bz2 new file mode 100644 index 00000000000..8944cbcc070 --- /dev/null +++ b/syft/pkg/cataloger/java/test-fixtures/glob-paths/archives/example.tar.bz2 @@ -0,0 +1 @@ +example archive diff --git a/syft/pkg/cataloger/java/test-fixtures/glob-paths/archives/example.tar.gz b/syft/pkg/cataloger/java/test-fixtures/glob-paths/archives/example.tar.gz new file mode 100644 index 00000000000..8944cbcc070 --- /dev/null +++ b/syft/pkg/cataloger/java/test-fixtures/glob-paths/archives/example.tar.gz @@ -0,0 +1 @@ +example archive diff --git a/syft/pkg/cataloger/java/test-fixtures/glob-paths/archives/example.tar.lz4 b/syft/pkg/cataloger/java/test-fixtures/glob-paths/archives/example.tar.lz4 new file mode 100644 index 00000000000..8944cbcc070 --- /dev/null +++ b/syft/pkg/cataloger/java/test-fixtures/glob-paths/archives/example.tar.lz4 @@ -0,0 +1 @@ +example archive diff --git a/syft/pkg/cataloger/java/test-fixtures/glob-paths/archives/example.tar.sz b/syft/pkg/cataloger/java/test-fixtures/glob-paths/archives/example.tar.sz new file mode 100644 index 00000000000..8944cbcc070 --- /dev/null +++ b/syft/pkg/cataloger/java/test-fixtures/glob-paths/archives/example.tar.sz @@ -0,0 +1 @@ +example archive diff --git a/syft/pkg/cataloger/java/test-fixtures/glob-paths/archives/example.tar.xz b/syft/pkg/cataloger/java/test-fixtures/glob-paths/archives/example.tar.xz new file mode 100644 index 00000000000..8944cbcc070 --- /dev/null +++ b/syft/pkg/cataloger/java/test-fixtures/glob-paths/archives/example.tar.xz @@ -0,0 +1 @@ +example archive diff --git a/syft/pkg/cataloger/java/test-fixtures/glob-paths/archives/example.tar.zst b/syft/pkg/cataloger/java/test-fixtures/glob-paths/archives/example.tar.zst new file mode 100644 index 00000000000..8944cbcc070 --- /dev/null +++ b/syft/pkg/cataloger/java/test-fixtures/glob-paths/archives/example.tar.zst @@ -0,0 +1 @@ +example archive diff --git a/syft/pkg/cataloger/java/test-fixtures/glob-paths/archives/example.tar.zstd b/syft/pkg/cataloger/java/test-fixtures/glob-paths/archives/example.tar.zstd new file mode 100644 index 00000000000..8944cbcc070 --- /dev/null +++ b/syft/pkg/cataloger/java/test-fixtures/glob-paths/archives/example.tar.zstd @@ -0,0 +1 @@ +example archive diff --git a/syft/pkg/cataloger/java/test-fixtures/glob-paths/archives/example.tbr b/syft/pkg/cataloger/java/test-fixtures/glob-paths/archives/example.tbr new file mode 100644 index 00000000000..8944cbcc070 --- /dev/null +++ b/syft/pkg/cataloger/java/test-fixtures/glob-paths/archives/example.tbr @@ -0,0 +1 @@ +example archive diff --git a/syft/pkg/cataloger/java/test-fixtures/glob-paths/archives/example.tbz b/syft/pkg/cataloger/java/test-fixtures/glob-paths/archives/example.tbz new file mode 100644 index 00000000000..8944cbcc070 --- /dev/null +++ b/syft/pkg/cataloger/java/test-fixtures/glob-paths/archives/example.tbz @@ -0,0 +1 @@ +example archive diff --git a/syft/pkg/cataloger/java/test-fixtures/glob-paths/archives/example.tbz2 b/syft/pkg/cataloger/java/test-fixtures/glob-paths/archives/example.tbz2 new file mode 100644 index 00000000000..8944cbcc070 --- /dev/null +++ b/syft/pkg/cataloger/java/test-fixtures/glob-paths/archives/example.tbz2 @@ -0,0 +1 @@ +example archive diff --git a/syft/pkg/cataloger/java/test-fixtures/glob-paths/archives/example.tgz b/syft/pkg/cataloger/java/test-fixtures/glob-paths/archives/example.tgz new file mode 100644 index 00000000000..8944cbcc070 --- /dev/null +++ b/syft/pkg/cataloger/java/test-fixtures/glob-paths/archives/example.tgz @@ -0,0 +1 @@ +example archive diff --git a/syft/pkg/cataloger/java/test-fixtures/glob-paths/archives/example.tlz4 b/syft/pkg/cataloger/java/test-fixtures/glob-paths/archives/example.tlz4 new file mode 100644 index 00000000000..8944cbcc070 --- /dev/null +++ b/syft/pkg/cataloger/java/test-fixtures/glob-paths/archives/example.tlz4 @@ -0,0 +1 @@ +example archive diff --git a/syft/pkg/cataloger/java/test-fixtures/glob-paths/archives/example.tsz b/syft/pkg/cataloger/java/test-fixtures/glob-paths/archives/example.tsz new file mode 100644 index 00000000000..8944cbcc070 --- /dev/null +++ b/syft/pkg/cataloger/java/test-fixtures/glob-paths/archives/example.tsz @@ -0,0 +1 @@ +example archive diff --git a/syft/pkg/cataloger/java/test-fixtures/glob-paths/archives/example.txz b/syft/pkg/cataloger/java/test-fixtures/glob-paths/archives/example.txz new file mode 100644 index 00000000000..8944cbcc070 --- /dev/null +++ b/syft/pkg/cataloger/java/test-fixtures/glob-paths/archives/example.txz @@ -0,0 +1 @@ +example archive diff --git a/syft/pkg/cataloger/java/test-fixtures/glob-paths/archives/example.tzst b/syft/pkg/cataloger/java/test-fixtures/glob-paths/archives/example.tzst new file mode 100644 index 00000000000..8944cbcc070 --- /dev/null +++ b/syft/pkg/cataloger/java/test-fixtures/glob-paths/archives/example.tzst @@ -0,0 +1 @@ +example archive diff --git a/syft/pkg/cataloger/java/test-fixtures/glob-paths/archives/example.tzstd b/syft/pkg/cataloger/java/test-fixtures/glob-paths/archives/example.tzstd new file mode 100644 index 00000000000..8944cbcc070 --- /dev/null +++ b/syft/pkg/cataloger/java/test-fixtures/glob-paths/archives/example.tzstd @@ -0,0 +1 @@ +example archive diff --git a/syft/pkg/cataloger/java/test-fixtures/glob-paths/archives/example.zip b/syft/pkg/cataloger/java/test-fixtures/glob-paths/archives/example.zip new file mode 100644 index 00000000000..8944cbcc070 --- /dev/null +++ b/syft/pkg/cataloger/java/test-fixtures/glob-paths/archives/example.zip @@ -0,0 +1 @@ +example archive diff --git a/syft/pkg/cataloger/java/test-fixtures/glob-paths/java-archives/.gitignore b/syft/pkg/cataloger/java/test-fixtures/glob-paths/java-archives/.gitignore new file mode 100644 index 00000000000..4ec97210de8 --- /dev/null +++ b/syft/pkg/cataloger/java/test-fixtures/glob-paths/java-archives/.gitignore @@ -0,0 +1,2 @@ +# we want to override some of the root level ignores just for the fixutes that we know are safe +!* \ No newline at end of file diff --git a/syft/pkg/cataloger/java/test-fixtures/glob-paths/java-archives/example.ear b/syft/pkg/cataloger/java/test-fixtures/glob-paths/java-archives/example.ear new file mode 100644 index 00000000000..8944cbcc070 --- /dev/null +++ b/syft/pkg/cataloger/java/test-fixtures/glob-paths/java-archives/example.ear @@ -0,0 +1 @@ +example archive diff --git a/syft/pkg/cataloger/java/test-fixtures/glob-paths/java-archives/example.hpi b/syft/pkg/cataloger/java/test-fixtures/glob-paths/java-archives/example.hpi new file mode 100644 index 00000000000..8944cbcc070 --- /dev/null +++ b/syft/pkg/cataloger/java/test-fixtures/glob-paths/java-archives/example.hpi @@ -0,0 +1 @@ +example archive diff --git a/syft/pkg/cataloger/java/test-fixtures/glob-paths/java-archives/example.jar b/syft/pkg/cataloger/java/test-fixtures/glob-paths/java-archives/example.jar new file mode 100644 index 00000000000..8944cbcc070 --- /dev/null +++ b/syft/pkg/cataloger/java/test-fixtures/glob-paths/java-archives/example.jar @@ -0,0 +1 @@ +example archive diff --git a/syft/pkg/cataloger/java/test-fixtures/glob-paths/java-archives/example.jpi b/syft/pkg/cataloger/java/test-fixtures/glob-paths/java-archives/example.jpi new file mode 100644 index 00000000000..8944cbcc070 --- /dev/null +++ b/syft/pkg/cataloger/java/test-fixtures/glob-paths/java-archives/example.jpi @@ -0,0 +1 @@ +example archive diff --git a/syft/pkg/cataloger/java/test-fixtures/glob-paths/java-archives/example.lpkg b/syft/pkg/cataloger/java/test-fixtures/glob-paths/java-archives/example.lpkg new file mode 100644 index 00000000000..8944cbcc070 --- /dev/null +++ b/syft/pkg/cataloger/java/test-fixtures/glob-paths/java-archives/example.lpkg @@ -0,0 +1 @@ +example archive diff --git a/syft/pkg/cataloger/java/test-fixtures/glob-paths/java-archives/example.par b/syft/pkg/cataloger/java/test-fixtures/glob-paths/java-archives/example.par new file mode 100644 index 00000000000..8944cbcc070 --- /dev/null +++ b/syft/pkg/cataloger/java/test-fixtures/glob-paths/java-archives/example.par @@ -0,0 +1 @@ +example archive diff --git a/syft/pkg/cataloger/java/test-fixtures/glob-paths/java-archives/example.sar b/syft/pkg/cataloger/java/test-fixtures/glob-paths/java-archives/example.sar new file mode 100644 index 00000000000..8944cbcc070 --- /dev/null +++ b/syft/pkg/cataloger/java/test-fixtures/glob-paths/java-archives/example.sar @@ -0,0 +1 @@ +example archive diff --git a/syft/pkg/cataloger/java/test-fixtures/glob-paths/java-archives/example.war b/syft/pkg/cataloger/java/test-fixtures/glob-paths/java-archives/example.war new file mode 100644 index 00000000000..8944cbcc070 --- /dev/null +++ b/syft/pkg/cataloger/java/test-fixtures/glob-paths/java-archives/example.war @@ -0,0 +1 @@ +example archive diff --git a/syft/pkg/cataloger/java/test-fixtures/glob-paths/src/pom.xml b/syft/pkg/cataloger/java/test-fixtures/glob-paths/src/pom.xml new file mode 100644 index 00000000000..0fbd1e3b4d5 --- /dev/null +++ b/syft/pkg/cataloger/java/test-fixtures/glob-paths/src/pom.xml @@ -0,0 +1 @@ +bogus pom.xml \ No newline at end of file diff --git a/syft/pkg/cataloger/javascript/cataloger.go b/syft/pkg/cataloger/javascript/cataloger.go index 6688abcdc8b..2109eb198b9 100644 --- a/syft/pkg/cataloger/javascript/cataloger.go +++ b/syft/pkg/cataloger/javascript/cataloger.go @@ -7,13 +7,14 @@ import ( "github.com/anchore/syft/syft/pkg/cataloger/generic" ) -// NewJavascriptPackageCataloger returns a new JavaScript cataloger object based on detection of npm based packages. -func NewJavascriptPackageCataloger() *generic.Cataloger { +// NewPackageCataloger returns a new JavaScript cataloger object based on detection of npm based packages. +func NewPackageCataloger() *generic.Cataloger { return generic.NewCataloger("javascript-package-cataloger"). WithParserByGlobs(parsePackageJSON, "**/package.json") } -func NewJavascriptLockCataloger() *generic.Cataloger { +// NewLockCataloger returns a new JavaScript cataloger object based on detection of lock files. +func NewLockCataloger() *generic.Cataloger { return generic.NewCataloger("javascript-lock-cataloger"). WithParserByGlobs(parsePackageLock, "**/package-lock.json"). WithParserByGlobs(parseYarnLock, "**/yarn.lock"). diff --git a/syft/pkg/cataloger/javascript/cataloger_test.go b/syft/pkg/cataloger/javascript/cataloger_test.go index 2d310932baa..6e0ba16b613 100644 --- a/syft/pkg/cataloger/javascript/cataloger_test.go +++ b/syft/pkg/cataloger/javascript/cataloger_test.go @@ -139,6 +139,58 @@ func Test_JavascriptCataloger(t *testing.T) { pkgtest.NewCatalogTester(). FromDirectory(t, "test-fixtures/pkg-lock"). Expects(expectedPkgs, nil). - TestCataloger(t, NewJavascriptLockCataloger()) + TestCataloger(t, NewLockCataloger()) } + +func Test_PackageCataloger_Globs(t *testing.T) { + tests := []struct { + name string + fixture string + expected []string + }{ + { + name: "obtain package files", + fixture: "test-fixtures/glob-paths", + expected: []string{ + "src/package.json", + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + pkgtest.NewCatalogTester(). + FromDirectory(t, test.fixture). + ExpectsResolverContentQueries(test.expected). + TestCataloger(t, NewPackageCataloger()) + }) + } +} + +func Test_LockCataloger_Globs(t *testing.T) { + tests := []struct { + name string + fixture string + expected []string + }{ + { + name: "obtain package files", + fixture: "test-fixtures/glob-paths", + expected: []string{ + "src/package-lock.json", + "src/pnpm-lock.yaml", + "src/yarn.lock", + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + pkgtest.NewCatalogTester(). + FromDirectory(t, test.fixture). + ExpectsResolverContentQueries(test.expected). + TestCataloger(t, NewLockCataloger()) + }) + } +} diff --git a/syft/pkg/cataloger/javascript/test-fixtures/glob-paths/src/package-lock.json b/syft/pkg/cataloger/javascript/test-fixtures/glob-paths/src/package-lock.json new file mode 100644 index 00000000000..9f8049938dc --- /dev/null +++ b/syft/pkg/cataloger/javascript/test-fixtures/glob-paths/src/package-lock.json @@ -0,0 +1 @@ +bogus package-lock.json \ No newline at end of file diff --git a/syft/pkg/cataloger/javascript/test-fixtures/glob-paths/src/package.json b/syft/pkg/cataloger/javascript/test-fixtures/glob-paths/src/package.json new file mode 100644 index 00000000000..e69de29bb2d diff --git a/syft/pkg/cataloger/javascript/test-fixtures/glob-paths/src/pnpm-lock.yaml b/syft/pkg/cataloger/javascript/test-fixtures/glob-paths/src/pnpm-lock.yaml new file mode 100644 index 00000000000..b3979e3ba3a --- /dev/null +++ b/syft/pkg/cataloger/javascript/test-fixtures/glob-paths/src/pnpm-lock.yaml @@ -0,0 +1 @@ +bogus pnpm-lock.yaml \ No newline at end of file diff --git a/syft/pkg/cataloger/javascript/test-fixtures/glob-paths/src/yarn.lock b/syft/pkg/cataloger/javascript/test-fixtures/glob-paths/src/yarn.lock new file mode 100644 index 00000000000..f3260a1809d --- /dev/null +++ b/syft/pkg/cataloger/javascript/test-fixtures/glob-paths/src/yarn.lock @@ -0,0 +1 @@ +bogus yarn.lock \ No newline at end of file diff --git a/syft/pkg/cataloger/php/cataloger.go b/syft/pkg/cataloger/php/cataloger.go index 35ac46bb354..5beba45acaf 100644 --- a/syft/pkg/cataloger/php/cataloger.go +++ b/syft/pkg/cataloger/php/cataloger.go @@ -7,14 +7,14 @@ import ( "github.com/anchore/syft/syft/pkg/cataloger/generic" ) -// NewPHPComposerInstalledCataloger returns a new cataloger for PHP installed.json files. -func NewPHPComposerInstalledCataloger() *generic.Cataloger { +// NewComposerInstalledCataloger returns a new cataloger for PHP installed.json files. +func NewComposerInstalledCataloger() *generic.Cataloger { return generic.NewCataloger("php-composer-installed-cataloger"). WithParserByGlobs(parseInstalledJSON, "**/installed.json") } -// NewPHPComposerLockCataloger returns a new cataloger for PHP composer.lock files. -func NewPHPComposerLockCataloger() *generic.Cataloger { +// NewComposerLockCataloger returns a new cataloger for PHP composer.lock files. +func NewComposerLockCataloger() *generic.Cataloger { return generic.NewCataloger("php-composer-lock-cataloger"). WithParserByGlobs(parseComposerLock, "**/composer.lock") } diff --git a/syft/pkg/cataloger/php/cataloger_test.go b/syft/pkg/cataloger/php/cataloger_test.go new file mode 100644 index 00000000000..8d5f49093eb --- /dev/null +++ b/syft/pkg/cataloger/php/cataloger_test.go @@ -0,0 +1,57 @@ +package php + +import ( + "testing" + + "github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest" +) + +func Test_ComposerInstalledCataloger_Globs(t *testing.T) { + tests := []struct { + name string + fixture string + expected []string + }{ + { + name: "obtain composer files", + fixture: "test-fixtures/glob-paths", + expected: []string{ + "src/installed.json", + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + pkgtest.NewCatalogTester(). + FromDirectory(t, test.fixture). + ExpectsResolverContentQueries(test.expected). + TestCataloger(t, NewComposerInstalledCataloger()) + }) + } +} + +func Test_ComposerLockCataloger_Globs(t *testing.T) { + tests := []struct { + name string + fixture string + expected []string + }{ + { + name: "obtain composer lock files", + fixture: "test-fixtures/glob-paths", + expected: []string{ + "src/composer.lock", + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + pkgtest.NewCatalogTester(). + FromDirectory(t, test.fixture). + ExpectsResolverContentQueries(test.expected). + TestCataloger(t, NewComposerLockCataloger()) + }) + } +} diff --git a/syft/pkg/cataloger/php/test-fixtures/glob-paths/src/composer.lock b/syft/pkg/cataloger/php/test-fixtures/glob-paths/src/composer.lock new file mode 100644 index 00000000000..bad0c04cf42 --- /dev/null +++ b/syft/pkg/cataloger/php/test-fixtures/glob-paths/src/composer.lock @@ -0,0 +1 @@ +bogus composer.lock \ No newline at end of file diff --git a/syft/pkg/cataloger/php/test-fixtures/glob-paths/src/installed.json b/syft/pkg/cataloger/php/test-fixtures/glob-paths/src/installed.json new file mode 100644 index 00000000000..49fd591e4c5 --- /dev/null +++ b/syft/pkg/cataloger/php/test-fixtures/glob-paths/src/installed.json @@ -0,0 +1 @@ +bogus installed.json \ No newline at end of file diff --git a/syft/pkg/cataloger/portage/cataloger.go b/syft/pkg/cataloger/portage/cataloger.go index fb29c024ede..b6be1a3ca23 100644 --- a/syft/pkg/cataloger/portage/cataloger.go +++ b/syft/pkg/cataloger/portage/cataloger.go @@ -4,159 +4,10 @@ Package portage provides a concrete Cataloger implementation for Gentoo Portage. package portage import ( - "bufio" - "fmt" - "path" - "path/filepath" - "regexp" - "sort" - "strconv" - "strings" - - "github.com/anchore/syft/internal" - "github.com/anchore/syft/internal/log" - "github.com/anchore/syft/syft/artifact" - "github.com/anchore/syft/syft/file" - "github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/pkg/cataloger/generic" - "github.com/anchore/syft/syft/source" -) - -var ( - cpvRe = regexp.MustCompile(`/([^/]*/[\w+][\w+-]*)-((\d+)((\.\d+)*)([a-z]?)((_(pre|p|beta|alpha|rc)\d*)*)(-r\d+)?)/CONTENTS$`) - _ generic.Parser = parsePortageContents ) func NewPortageCataloger() *generic.Cataloger { return generic.NewCataloger("portage-cataloger"). WithParserByGlobs(parsePortageContents, "**/var/db/pkg/*/*/CONTENTS") } - -func parsePortageContents(resolver source.FileResolver, _ *generic.Environment, reader source.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { - cpvMatch := cpvRe.FindStringSubmatch(reader.Location.RealPath) - if cpvMatch == nil { - return nil, nil, fmt.Errorf("failed to match package and version in %s", reader.Location.RealPath) - } - - name, version := cpvMatch[1], cpvMatch[2] - if name == "" || version == "" { - log.WithFields("path", reader.Location.RealPath).Warnf("failed to parse portage name and version") - return nil, nil, nil - } - - p := pkg.Package{ - Name: name, - Version: version, - PURL: packageURL(name, version), - Locations: source.NewLocationSet(), - Type: pkg.PortagePkg, - MetadataType: pkg.PortageMetadataType, - Metadata: pkg.PortageMetadata{ - // ensure the default value for a collection is never nil since this may be shown as JSON - Files: make([]pkg.PortageFileRecord, 0), - }, - } - addLicenses(resolver, reader.Location, &p) - addSize(resolver, reader.Location, &p) - addFiles(resolver, reader.Location, &p) - - p.SetID() - - return []pkg.Package{p}, nil, nil -} - -func addFiles(resolver source.FileResolver, dbLocation source.Location, p *pkg.Package) { - contentsReader, err := resolver.FileContentsByLocation(dbLocation) - if err != nil { - log.WithFields("path", dbLocation.RealPath).Warnf("failed to fetch portage contents (package=%s): %+v", p.Name, err) - return - } - - entry, ok := p.Metadata.(pkg.PortageMetadata) - if !ok { - return - } - - scanner := bufio.NewScanner(contentsReader) - for scanner.Scan() { - line := strings.Trim(scanner.Text(), "\n") - fields := strings.Split(line, " ") - - if fields[0] == "obj" { - record := pkg.PortageFileRecord{ - Path: fields[1], - } - record.Digest = &file.Digest{ - Algorithm: "md5", - Value: fields[2], - } - entry.Files = append(entry.Files, record) - } - } - - p.Metadata = entry - p.Locations.Add(dbLocation) -} - -func addLicenses(resolver source.FileResolver, dbLocation source.Location, p *pkg.Package) { - parentPath := filepath.Dir(dbLocation.RealPath) - - location := resolver.RelativeFileByPath(dbLocation, path.Join(parentPath, "LICENSE")) - - if location == nil { - return - } - - licenseReader, err := resolver.FileContentsByLocation(*location) - if err != nil { - log.WithFields("path", dbLocation.RealPath).Warnf("failed to fetch portage LICENSE: %+v", err) - return - } - - findings := internal.NewStringSet() - scanner := bufio.NewScanner(licenseReader) - scanner.Split(bufio.ScanWords) - for scanner.Scan() { - token := scanner.Text() - if token != "||" && token != "(" && token != ")" { - findings.Add(token) - } - } - licenses := findings.ToSlice() - sort.Strings(licenses) - p.Licenses = licenses - p.Locations.Add(*location) -} - -func addSize(resolver source.FileResolver, dbLocation source.Location, p *pkg.Package) { - parentPath := filepath.Dir(dbLocation.RealPath) - - location := resolver.RelativeFileByPath(dbLocation, path.Join(parentPath, "SIZE")) - - if location == nil { - return - } - - entry, ok := p.Metadata.(pkg.PortageMetadata) - if !ok { - return - } - - sizeReader, err := resolver.FileContentsByLocation(*location) - if err != nil { - log.WithFields("name", p.Name).Warnf("failed to fetch portage SIZE: %+v", err) - return - } - - scanner := bufio.NewScanner(sizeReader) - for scanner.Scan() { - line := strings.Trim(scanner.Text(), "\n") - size, err := strconv.Atoi(line) - if err == nil { - entry.InstalledSize = size - } - } - - p.Metadata = entry - p.Locations.Add(*location) -} diff --git a/syft/pkg/cataloger/portage/cataloger_test.go b/syft/pkg/cataloger/portage/cataloger_test.go index 6623f27e713..3f4494b29c1 100644 --- a/syft/pkg/cataloger/portage/cataloger_test.go +++ b/syft/pkg/cataloger/portage/cataloger_test.go @@ -71,3 +71,28 @@ func TestPortageCataloger(t *testing.T) { TestCataloger(t, NewPortageCataloger()) } + +func TestCataloger_Globs(t *testing.T) { + tests := []struct { + name string + fixture string + expected []string + }{ + { + name: "obtain portage contents file", + fixture: "test-fixtures/glob-paths", + expected: []string{ + "var/db/pkg/x/y/CONTENTS", + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + pkgtest.NewCatalogTester(). + FromDirectory(t, test.fixture). + ExpectsResolverContentQueries(test.expected). + TestCataloger(t, NewPortageCataloger()) + }) + } +} diff --git a/syft/pkg/cataloger/portage/parse_portage_contents.go b/syft/pkg/cataloger/portage/parse_portage_contents.go new file mode 100644 index 00000000000..dba65ace0dd --- /dev/null +++ b/syft/pkg/cataloger/portage/parse_portage_contents.go @@ -0,0 +1,154 @@ +package portage + +import ( + "bufio" + "fmt" + "path" + "path/filepath" + "regexp" + "sort" + "strconv" + "strings" + + "github.com/anchore/syft/internal" + "github.com/anchore/syft/internal/log" + "github.com/anchore/syft/syft/artifact" + "github.com/anchore/syft/syft/file" + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/pkg/cataloger/generic" + "github.com/anchore/syft/syft/source" +) + +var ( + cpvRe = regexp.MustCompile(`/([^/]*/[\w+][\w+-]*)-((\d+)((\.\d+)*)([a-z]?)((_(pre|p|beta|alpha|rc)\d*)*)(-r\d+)?)/CONTENTS$`) + _ generic.Parser = parsePortageContents +) + +func parsePortageContents(resolver source.FileResolver, _ *generic.Environment, reader source.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { + cpvMatch := cpvRe.FindStringSubmatch(reader.Location.RealPath) + if cpvMatch == nil { + return nil, nil, fmt.Errorf("failed to match package and version in %s", reader.Location.RealPath) + } + + name, version := cpvMatch[1], cpvMatch[2] + if name == "" || version == "" { + log.WithFields("path", reader.Location.RealPath).Warnf("failed to parse portage name and version") + return nil, nil, nil + } + + p := pkg.Package{ + Name: name, + Version: version, + PURL: packageURL(name, version), + Locations: source.NewLocationSet(), + Type: pkg.PortagePkg, + MetadataType: pkg.PortageMetadataType, + Metadata: pkg.PortageMetadata{ + // ensure the default value for a collection is never nil since this may be shown as JSON + Files: make([]pkg.PortageFileRecord, 0), + }, + } + addLicenses(resolver, reader.Location, &p) + addSize(resolver, reader.Location, &p) + addFiles(resolver, reader.Location, &p) + + p.SetID() + + return []pkg.Package{p}, nil, nil +} + +func addFiles(resolver source.FileResolver, dbLocation source.Location, p *pkg.Package) { + contentsReader, err := resolver.FileContentsByLocation(dbLocation) + if err != nil { + log.WithFields("path", dbLocation.RealPath).Warnf("failed to fetch portage contents (package=%s): %+v", p.Name, err) + return + } + + entry, ok := p.Metadata.(pkg.PortageMetadata) + if !ok { + return + } + + scanner := bufio.NewScanner(contentsReader) + for scanner.Scan() { + line := strings.Trim(scanner.Text(), "\n") + fields := strings.Split(line, " ") + + if fields[0] == "obj" { + record := pkg.PortageFileRecord{ + Path: fields[1], + } + record.Digest = &file.Digest{ + Algorithm: "md5", + Value: fields[2], + } + entry.Files = append(entry.Files, record) + } + } + + p.Metadata = entry + p.Locations.Add(dbLocation) +} + +func addLicenses(resolver source.FileResolver, dbLocation source.Location, p *pkg.Package) { + parentPath := filepath.Dir(dbLocation.RealPath) + + location := resolver.RelativeFileByPath(dbLocation, path.Join(parentPath, "LICENSE")) + + if location == nil { + return + } + + licenseReader, err := resolver.FileContentsByLocation(*location) + if err != nil { + log.WithFields("path", dbLocation.RealPath).Warnf("failed to fetch portage LICENSE: %+v", err) + return + } + + findings := internal.NewStringSet() + scanner := bufio.NewScanner(licenseReader) + scanner.Split(bufio.ScanWords) + for scanner.Scan() { + token := scanner.Text() + if token != "||" && token != "(" && token != ")" { + findings.Add(token) + } + } + licenses := findings.ToSlice() + sort.Strings(licenses) + p.Licenses = licenses + p.Locations.Add(*location) +} + +func addSize(resolver source.FileResolver, dbLocation source.Location, p *pkg.Package) { + parentPath := filepath.Dir(dbLocation.RealPath) + + location := resolver.RelativeFileByPath(dbLocation, path.Join(parentPath, "SIZE")) + + if location == nil { + return + } + + entry, ok := p.Metadata.(pkg.PortageMetadata) + if !ok { + return + } + + sizeReader, err := resolver.FileContentsByLocation(*location) + if err != nil { + log.WithFields("name", p.Name).Warnf("failed to fetch portage SIZE: %+v", err) + return + } + + scanner := bufio.NewScanner(sizeReader) + for scanner.Scan() { + line := strings.Trim(scanner.Text(), "\n") + size, err := strconv.Atoi(line) + if err == nil { + entry.InstalledSize = size + } + } + + p.Metadata = entry + p.Locations.Add(*location) +} diff --git a/syft/pkg/cataloger/portage/test-fixtures/glob-paths/var/db/pkg/x/y/CONTENTS b/syft/pkg/cataloger/portage/test-fixtures/glob-paths/var/db/pkg/x/y/CONTENTS new file mode 100644 index 00000000000..f8d606cae96 --- /dev/null +++ b/syft/pkg/cataloger/portage/test-fixtures/glob-paths/var/db/pkg/x/y/CONTENTS @@ -0,0 +1 @@ +bogus contents \ No newline at end of file diff --git a/syft/pkg/cataloger/python/cataloger.go b/syft/pkg/cataloger/python/cataloger.go index cd2ba358b34..1401c3a2f79 100644 --- a/syft/pkg/cataloger/python/cataloger.go +++ b/syft/pkg/cataloger/python/cataloger.go @@ -4,11 +4,7 @@ import ( "github.com/anchore/syft/syft/pkg/cataloger/generic" ) -const ( - eggMetadataGlob = "**/*egg-info/PKG-INFO" - eggFileMetadataGlob = "**/*.egg-info" - wheelMetadataGlob = "**/*dist-info/METADATA" -) +const eggInfoGlob = "**/*.egg-info" // NewPythonIndexCataloger returns a new cataloger for python packages referenced from poetry lock files, requirements.txt files, and setup.py files. func NewPythonIndexCataloger() *generic.Cataloger { @@ -22,5 +18,5 @@ func NewPythonIndexCataloger() *generic.Cataloger { // NewPythonPackageCataloger returns a new cataloger for python packages within egg or wheel installation directories. func NewPythonPackageCataloger() *generic.Cataloger { return generic.NewCataloger("python-package-cataloger"). - WithParserByGlobs(parseWheelOrEgg, eggMetadataGlob, eggFileMetadataGlob, wheelMetadataGlob) + WithParserByGlobs(parseWheelOrEgg, eggInfoGlob, "**/*dist-info/METADATA", "**/*egg-info/PKG-INFO") } diff --git a/syft/pkg/cataloger/python/cataloger_test.go b/syft/pkg/cataloger/python/cataloger_test.go index 0c02ac36fed..8515daf4b41 100644 --- a/syft/pkg/cataloger/python/cataloger_test.go +++ b/syft/pkg/cataloger/python/cataloger_test.go @@ -3,6 +3,8 @@ package python import ( "testing" + "github.com/stretchr/testify/require" + "github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest" "github.com/anchore/syft/syft/source" @@ -192,9 +194,7 @@ func Test_PackageCataloger(t *testing.T) { resolver := source.NewMockResolverForPaths(test.fixtures...) locations, err := resolver.FilesByPath(test.fixtures...) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) test.expectedPackage.Locations = source.NewLocationSet(locations...) @@ -223,9 +223,7 @@ func Test_PackageCataloger_IgnorePackage(t *testing.T) { resolver := source.NewMockResolverForPaths(test.MetadataFixture) actual, _, err := NewPythonPackageCataloger().Catalog(resolver) - if err != nil { - t.Fatalf("failed to catalog python package: %+v", err) - } + require.NoError(t, err) if len(actual) != 0 { t.Fatalf("Expected 0 packages but found: %d", len(actual)) @@ -233,3 +231,61 @@ func Test_PackageCataloger_IgnorePackage(t *testing.T) { }) } } + +func Test_IndexCataloger_Globs(t *testing.T) { + tests := []struct { + name string + fixture string + expected []string + }{ + { + name: "obtain index files", + fixture: "test-fixtures/glob-paths", + expected: []string{ + "src/requirements.txt", + "src/extra-requirements.txt", + "src/requirements-dev.txt", + "src/1-requirements-dev.txt", + "src/setup.py", + "src/poetry.lock", + "src/Pipfile.lock", + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + pkgtest.NewCatalogTester(). + FromDirectory(t, test.fixture). + ExpectsResolverContentQueries(test.expected). + TestCataloger(t, NewPythonIndexCataloger()) + }) + } +} + +func Test_PackageCataloger_Globs(t *testing.T) { + tests := []struct { + name string + fixture string + expected []string + }{ + { + name: "obtain index files", + fixture: "test-fixtures/glob-paths", + expected: []string{ + "site-packages/x.dist-info/METADATA", + "site-packages/y.egg-info/PKG-INFO", + "site-packages/z.egg-info", + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + pkgtest.NewCatalogTester(). + FromDirectory(t, test.fixture). + ExpectsResolverContentQueries(test.expected). + TestCataloger(t, NewPythonPackageCataloger()) + }) + } +} diff --git a/syft/pkg/cataloger/python/parse_wheel_egg_metadata.go b/syft/pkg/cataloger/python/parse_wheel_egg_metadata.go index f21c31bfa64..ab97a06a94f 100644 --- a/syft/pkg/cataloger/python/parse_wheel_egg_metadata.go +++ b/syft/pkg/cataloger/python/parse_wheel_egg_metadata.go @@ -81,7 +81,7 @@ func parseWheelOrEggMetadata(path string, reader io.Reader) (pkg.PythonPackageMe // of egg metadata (as opposed to a directory that contains more metadata // files). func isEggRegularFile(path string) bool { - return file.GlobMatch(eggFileMetadataGlob, path) + return file.GlobMatch(eggInfoGlob, path) } // determineSitePackagesRootPath returns the path of the site packages root, diff --git a/syft/pkg/cataloger/python/test-fixtures/glob-paths/site-packages/x.dist-info/METADATA b/syft/pkg/cataloger/python/test-fixtures/glob-paths/site-packages/x.dist-info/METADATA new file mode 100644 index 00000000000..5ffba7b57dc --- /dev/null +++ b/syft/pkg/cataloger/python/test-fixtures/glob-paths/site-packages/x.dist-info/METADATA @@ -0,0 +1 @@ +bogus diff --git a/syft/pkg/cataloger/python/test-fixtures/glob-paths/site-packages/y.egg-info/PKG-INFO b/syft/pkg/cataloger/python/test-fixtures/glob-paths/site-packages/y.egg-info/PKG-INFO new file mode 100644 index 00000000000..5ffba7b57dc --- /dev/null +++ b/syft/pkg/cataloger/python/test-fixtures/glob-paths/site-packages/y.egg-info/PKG-INFO @@ -0,0 +1 @@ +bogus diff --git a/syft/pkg/cataloger/python/test-fixtures/glob-paths/site-packages/z.egg-info b/syft/pkg/cataloger/python/test-fixtures/glob-paths/site-packages/z.egg-info new file mode 100644 index 00000000000..882b6040c5d --- /dev/null +++ b/syft/pkg/cataloger/python/test-fixtures/glob-paths/site-packages/z.egg-info @@ -0,0 +1 @@ +bogus \ No newline at end of file diff --git a/syft/pkg/cataloger/python/test-fixtures/glob-paths/src/1-requirements-dev.txt b/syft/pkg/cataloger/python/test-fixtures/glob-paths/src/1-requirements-dev.txt new file mode 100644 index 00000000000..8944cbcc070 --- /dev/null +++ b/syft/pkg/cataloger/python/test-fixtures/glob-paths/src/1-requirements-dev.txt @@ -0,0 +1 @@ +example archive diff --git a/syft/pkg/cataloger/python/test-fixtures/glob-paths/src/Pipfile.lock b/syft/pkg/cataloger/python/test-fixtures/glob-paths/src/Pipfile.lock new file mode 100644 index 00000000000..5ffba7b57dc --- /dev/null +++ b/syft/pkg/cataloger/python/test-fixtures/glob-paths/src/Pipfile.lock @@ -0,0 +1 @@ +bogus diff --git a/syft/pkg/cataloger/python/test-fixtures/glob-paths/src/extra-requirements.txt b/syft/pkg/cataloger/python/test-fixtures/glob-paths/src/extra-requirements.txt new file mode 100644 index 00000000000..8944cbcc070 --- /dev/null +++ b/syft/pkg/cataloger/python/test-fixtures/glob-paths/src/extra-requirements.txt @@ -0,0 +1 @@ +example archive diff --git a/syft/pkg/cataloger/python/test-fixtures/glob-paths/src/poetry.lock b/syft/pkg/cataloger/python/test-fixtures/glob-paths/src/poetry.lock new file mode 100644 index 00000000000..5ffba7b57dc --- /dev/null +++ b/syft/pkg/cataloger/python/test-fixtures/glob-paths/src/poetry.lock @@ -0,0 +1 @@ +bogus diff --git a/syft/pkg/cataloger/python/test-fixtures/glob-paths/src/requirements-dev.txt b/syft/pkg/cataloger/python/test-fixtures/glob-paths/src/requirements-dev.txt new file mode 100644 index 00000000000..8944cbcc070 --- /dev/null +++ b/syft/pkg/cataloger/python/test-fixtures/glob-paths/src/requirements-dev.txt @@ -0,0 +1 @@ +example archive diff --git a/syft/pkg/cataloger/python/test-fixtures/glob-paths/src/requirements.txt b/syft/pkg/cataloger/python/test-fixtures/glob-paths/src/requirements.txt new file mode 100644 index 00000000000..8944cbcc070 --- /dev/null +++ b/syft/pkg/cataloger/python/test-fixtures/glob-paths/src/requirements.txt @@ -0,0 +1 @@ +example archive diff --git a/syft/pkg/cataloger/python/test-fixtures/glob-paths/src/setup.py b/syft/pkg/cataloger/python/test-fixtures/glob-paths/src/setup.py new file mode 100644 index 00000000000..5ffba7b57dc --- /dev/null +++ b/syft/pkg/cataloger/python/test-fixtures/glob-paths/src/setup.py @@ -0,0 +1 @@ +bogus diff --git a/syft/pkg/cataloger/rpm/cataloger_test.go b/syft/pkg/cataloger/rpm/cataloger_test.go new file mode 100644 index 00000000000..ca8907e2101 --- /dev/null +++ b/syft/pkg/cataloger/rpm/cataloger_test.go @@ -0,0 +1,60 @@ +package rpm + +import ( + "testing" + + "github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest" +) + +func Test_DBCataloger_Globs(t *testing.T) { + tests := []struct { + name string + fixture string + expected []string + }{ + { + name: "obtain DB files", + fixture: "test-fixtures/glob-paths", + expected: []string{ + "var/lib/rpm/Packages", + "var/lib/rpm/Packages.db", + "var/lib/rpm/rpmdb.sqlite", + "var/lib/rpmmanifest/container-manifest-2", + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + pkgtest.NewCatalogTester(). + FromDirectory(t, test.fixture). + ExpectsResolverContentQueries(test.expected). + TestCataloger(t, NewRpmDBCataloger()) + }) + } +} + +func Test_RPMFileCataloger_Globs(t *testing.T) { + tests := []struct { + name string + fixture string + expected []string + }{ + { + name: "obtain rpm files", + fixture: "test-fixtures/glob-paths", + expected: []string{ + "dive-0.10.0.rpm", + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + pkgtest.NewCatalogTester(). + FromDirectory(t, test.fixture). + ExpectsResolverContentQueries(test.expected). + TestCataloger(t, NewFileCataloger()) + }) + } +} diff --git a/syft/pkg/cataloger/rpm/parse_rpm_db_test.go b/syft/pkg/cataloger/rpm/parse_rpm_db_test.go index 8ca68434a04..111195fd34b 100644 --- a/syft/pkg/cataloger/rpm/parse_rpm_db_test.go +++ b/syft/pkg/cataloger/rpm/parse_rpm_db_test.go @@ -19,19 +19,28 @@ type rpmdbTestFileResolverMock struct { ignorePaths bool } +func (r rpmdbTestFileResolverMock) FilesByExtension(extensions ...string) ([]source.Location, error) { + panic("not implemented") +} + +func (r rpmdbTestFileResolverMock) FilesByBasename(filenames ...string) ([]source.Location, error) { + panic("not implemented") +} + +func (r rpmdbTestFileResolverMock) FilesByBasenameGlob(globs ...string) ([]source.Location, error) { + panic("not implemented") +} + func (r rpmdbTestFileResolverMock) FileContentsByLocation(location source.Location) (io.ReadCloser, error) { - //TODO implement me - panic("implement me") + panic("not implemented") } func (r rpmdbTestFileResolverMock) AllLocations() <-chan source.Location { - //TODO implement me - panic("implement me") + panic("not implemented") } func (r rpmdbTestFileResolverMock) FileMetadataByLocation(location source.Location) (source.FileMetadata, error) { - //TODO implement me - panic("implement me") + panic("not implemented") } func newTestFileResolver(ignorePaths bool) *rpmdbTestFileResolverMock { diff --git a/syft/pkg/cataloger/rpm/test-fixtures/glob-paths/dive-0.10.0.rpm b/syft/pkg/cataloger/rpm/test-fixtures/glob-paths/dive-0.10.0.rpm new file mode 100644 index 00000000000..882b6040c5d --- /dev/null +++ b/syft/pkg/cataloger/rpm/test-fixtures/glob-paths/dive-0.10.0.rpm @@ -0,0 +1 @@ +bogus \ No newline at end of file diff --git a/syft/pkg/cataloger/rpm/test-fixtures/glob-paths/var/lib/rpm/Packages b/syft/pkg/cataloger/rpm/test-fixtures/glob-paths/var/lib/rpm/Packages new file mode 100644 index 00000000000..882b6040c5d --- /dev/null +++ b/syft/pkg/cataloger/rpm/test-fixtures/glob-paths/var/lib/rpm/Packages @@ -0,0 +1 @@ +bogus \ No newline at end of file diff --git a/syft/pkg/cataloger/rpm/test-fixtures/glob-paths/var/lib/rpm/Packages.db b/syft/pkg/cataloger/rpm/test-fixtures/glob-paths/var/lib/rpm/Packages.db new file mode 100644 index 00000000000..882b6040c5d --- /dev/null +++ b/syft/pkg/cataloger/rpm/test-fixtures/glob-paths/var/lib/rpm/Packages.db @@ -0,0 +1 @@ +bogus \ No newline at end of file diff --git a/syft/pkg/cataloger/rpm/test-fixtures/glob-paths/var/lib/rpm/rpmdb.sqlite b/syft/pkg/cataloger/rpm/test-fixtures/glob-paths/var/lib/rpm/rpmdb.sqlite new file mode 100644 index 00000000000..882b6040c5d --- /dev/null +++ b/syft/pkg/cataloger/rpm/test-fixtures/glob-paths/var/lib/rpm/rpmdb.sqlite @@ -0,0 +1 @@ +bogus \ No newline at end of file diff --git a/syft/pkg/cataloger/rpm/test-fixtures/glob-paths/var/lib/rpmmanifest/container-manifest-2 b/syft/pkg/cataloger/rpm/test-fixtures/glob-paths/var/lib/rpmmanifest/container-manifest-2 new file mode 100644 index 00000000000..882b6040c5d --- /dev/null +++ b/syft/pkg/cataloger/rpm/test-fixtures/glob-paths/var/lib/rpmmanifest/container-manifest-2 @@ -0,0 +1 @@ +bogus \ No newline at end of file diff --git a/syft/pkg/cataloger/ruby/catalogers_test.go b/syft/pkg/cataloger/ruby/catalogers_test.go new file mode 100644 index 00000000000..3d79c7a9375 --- /dev/null +++ b/syft/pkg/cataloger/ruby/catalogers_test.go @@ -0,0 +1,58 @@ +package ruby + +import ( + "testing" + + "github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest" +) + +func Test_GemFileLock_Globs(t *testing.T) { + tests := []struct { + name string + fixture string + expected []string + }{ + { + name: "obtain gemfile lock files", + fixture: "test-fixtures/glob-paths", + expected: []string{ + "src/Gemfile.lock", + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + pkgtest.NewCatalogTester(). + FromDirectory(t, test.fixture). + ExpectsResolverContentQueries(test.expected). + TestCataloger(t, NewGemFileLockCataloger()) + }) + } +} + +func Test_GemSpec_Globs(t *testing.T) { + tests := []struct { + name string + fixture string + expected []string + }{ + { + name: "obtain gemspec files", + fixture: "test-fixtures/glob-paths", + expected: []string{ + "specifications/root.gemspec", + "specifications/pkg/nested.gemspec", + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + pkgtest.NewCatalogTester(). + FromDirectory(t, test.fixture). + ExpectsResolverContentQueries(test.expected). + TestCataloger(t, NewGemSpecCataloger()) + }) + } +} diff --git a/syft/pkg/cataloger/ruby/test-fixtures/glob-paths/specifications/pkg/nested.gemspec b/syft/pkg/cataloger/ruby/test-fixtures/glob-paths/specifications/pkg/nested.gemspec new file mode 100644 index 00000000000..882b6040c5d --- /dev/null +++ b/syft/pkg/cataloger/ruby/test-fixtures/glob-paths/specifications/pkg/nested.gemspec @@ -0,0 +1 @@ +bogus \ No newline at end of file diff --git a/syft/pkg/cataloger/ruby/test-fixtures/glob-paths/specifications/root.gemspec b/syft/pkg/cataloger/ruby/test-fixtures/glob-paths/specifications/root.gemspec new file mode 100644 index 00000000000..882b6040c5d --- /dev/null +++ b/syft/pkg/cataloger/ruby/test-fixtures/glob-paths/specifications/root.gemspec @@ -0,0 +1 @@ +bogus \ No newline at end of file diff --git a/syft/pkg/cataloger/ruby/test-fixtures/glob-paths/src/Gemfile.lock b/syft/pkg/cataloger/ruby/test-fixtures/glob-paths/src/Gemfile.lock new file mode 100644 index 00000000000..882b6040c5d --- /dev/null +++ b/syft/pkg/cataloger/ruby/test-fixtures/glob-paths/src/Gemfile.lock @@ -0,0 +1 @@ +bogus \ No newline at end of file diff --git a/syft/pkg/cataloger/rust/cataloger_test.go b/syft/pkg/cataloger/rust/cataloger_test.go index c4c152854e3..73b442c817b 100644 --- a/syft/pkg/cataloger/rust/cataloger_test.go +++ b/syft/pkg/cataloger/rust/cataloger_test.go @@ -49,3 +49,53 @@ func TestNewAuditBinaryCataloger(t *testing.T) { Expects(expectedPkgs, nil). TestCataloger(t, NewAuditBinaryCataloger()) } + +func Test_CargoLockCataloger_Globs(t *testing.T) { + tests := []struct { + name string + fixture string + expected []string + }{ + { + name: "obtain Cargo.lock files", + fixture: "test-fixtures/glob-paths", + expected: []string{ + "src/Cargo.lock", + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + pkgtest.NewCatalogTester(). + FromDirectory(t, test.fixture). + ExpectsResolverContentQueries(test.expected). + TestCataloger(t, NewCargoLockCataloger()) + }) + } +} + +func Test_AuditBinaryCataloger_Globs(t *testing.T) { + tests := []struct { + name string + fixture string + expected []string + }{ + { + name: "obtain audit binary files", + fixture: "test-fixtures/glob-paths", + expected: []string{ + "partial-binary", + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + pkgtest.NewCatalogTester(). + FromDirectory(t, test.fixture). + ExpectsResolverContentQueries(test.expected). + TestCataloger(t, NewAuditBinaryCataloger()) + }) + } +} diff --git a/syft/pkg/cataloger/rust/test-fixtures/glob-paths/partial-binary b/syft/pkg/cataloger/rust/test-fixtures/glob-paths/partial-binary new file mode 100644 index 00000000000..125d737c55f --- /dev/null +++ b/syft/pkg/cataloger/rust/test-fixtures/glob-paths/partial-binary @@ -0,0 +1 @@ +Ïúíþ \ No newline at end of file diff --git a/syft/pkg/cataloger/rust/test-fixtures/glob-paths/src/Cargo.lock b/syft/pkg/cataloger/rust/test-fixtures/glob-paths/src/Cargo.lock new file mode 100644 index 00000000000..882b6040c5d --- /dev/null +++ b/syft/pkg/cataloger/rust/test-fixtures/glob-paths/src/Cargo.lock @@ -0,0 +1 @@ +bogus \ No newline at end of file diff --git a/syft/pkg/cataloger/sbom/cataloger_test.go b/syft/pkg/cataloger/sbom/cataloger_test.go index b7489712de4..ea372d50ccc 100644 --- a/syft/pkg/cataloger/sbom/cataloger_test.go +++ b/syft/pkg/cataloger/sbom/cataloger_test.go @@ -290,3 +290,38 @@ func Test_parseSBOM(t *testing.T) { }) } } + +func Test_Cataloger_Globs(t *testing.T) { + tests := []struct { + name string + fixture string + expected []string + }{ + { + name: "obtain sbom files", + fixture: "test-fixtures/glob-paths", + expected: []string{ + "bom", + "sbom", + "app.syft.json", + "app.bom", + "app.sbom", + "app.cdx", + "app.spdx", + "app.bom.json", + "app.sbom.json", + "app.cdx.json", + "app.spdx.json", + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + pkgtest.NewCatalogTester(). + FromDirectory(t, test.fixture). + ExpectsResolverContentQueries(test.expected). + TestCataloger(t, NewSBOMCataloger()) + }) + } +} diff --git a/syft/pkg/cataloger/sbom/test-fixtures/glob-paths/app.bom b/syft/pkg/cataloger/sbom/test-fixtures/glob-paths/app.bom new file mode 100644 index 00000000000..5ffba7b57dc --- /dev/null +++ b/syft/pkg/cataloger/sbom/test-fixtures/glob-paths/app.bom @@ -0,0 +1 @@ +bogus diff --git a/syft/pkg/cataloger/sbom/test-fixtures/glob-paths/app.bom.json b/syft/pkg/cataloger/sbom/test-fixtures/glob-paths/app.bom.json new file mode 100644 index 00000000000..5ffba7b57dc --- /dev/null +++ b/syft/pkg/cataloger/sbom/test-fixtures/glob-paths/app.bom.json @@ -0,0 +1 @@ +bogus diff --git a/syft/pkg/cataloger/sbom/test-fixtures/glob-paths/app.cdx b/syft/pkg/cataloger/sbom/test-fixtures/glob-paths/app.cdx new file mode 100644 index 00000000000..5ffba7b57dc --- /dev/null +++ b/syft/pkg/cataloger/sbom/test-fixtures/glob-paths/app.cdx @@ -0,0 +1 @@ +bogus diff --git a/syft/pkg/cataloger/sbom/test-fixtures/glob-paths/app.cdx.json b/syft/pkg/cataloger/sbom/test-fixtures/glob-paths/app.cdx.json new file mode 100644 index 00000000000..5ffba7b57dc --- /dev/null +++ b/syft/pkg/cataloger/sbom/test-fixtures/glob-paths/app.cdx.json @@ -0,0 +1 @@ +bogus diff --git a/syft/pkg/cataloger/sbom/test-fixtures/glob-paths/app.sbom b/syft/pkg/cataloger/sbom/test-fixtures/glob-paths/app.sbom new file mode 100644 index 00000000000..5ffba7b57dc --- /dev/null +++ b/syft/pkg/cataloger/sbom/test-fixtures/glob-paths/app.sbom @@ -0,0 +1 @@ +bogus diff --git a/syft/pkg/cataloger/sbom/test-fixtures/glob-paths/app.sbom.json b/syft/pkg/cataloger/sbom/test-fixtures/glob-paths/app.sbom.json new file mode 100644 index 00000000000..5ffba7b57dc --- /dev/null +++ b/syft/pkg/cataloger/sbom/test-fixtures/glob-paths/app.sbom.json @@ -0,0 +1 @@ +bogus diff --git a/syft/pkg/cataloger/sbom/test-fixtures/glob-paths/app.spdx b/syft/pkg/cataloger/sbom/test-fixtures/glob-paths/app.spdx new file mode 100644 index 00000000000..5ffba7b57dc --- /dev/null +++ b/syft/pkg/cataloger/sbom/test-fixtures/glob-paths/app.spdx @@ -0,0 +1 @@ +bogus diff --git a/syft/pkg/cataloger/sbom/test-fixtures/glob-paths/app.spdx.json b/syft/pkg/cataloger/sbom/test-fixtures/glob-paths/app.spdx.json new file mode 100644 index 00000000000..5ffba7b57dc --- /dev/null +++ b/syft/pkg/cataloger/sbom/test-fixtures/glob-paths/app.spdx.json @@ -0,0 +1 @@ +bogus diff --git a/syft/pkg/cataloger/sbom/test-fixtures/glob-paths/app.syft.json b/syft/pkg/cataloger/sbom/test-fixtures/glob-paths/app.syft.json new file mode 100644 index 00000000000..5ffba7b57dc --- /dev/null +++ b/syft/pkg/cataloger/sbom/test-fixtures/glob-paths/app.syft.json @@ -0,0 +1 @@ +bogus diff --git a/syft/pkg/cataloger/sbom/test-fixtures/glob-paths/bom b/syft/pkg/cataloger/sbom/test-fixtures/glob-paths/bom new file mode 100644 index 00000000000..5ffba7b57dc --- /dev/null +++ b/syft/pkg/cataloger/sbom/test-fixtures/glob-paths/bom @@ -0,0 +1 @@ +bogus diff --git a/syft/pkg/cataloger/sbom/test-fixtures/glob-paths/sbom b/syft/pkg/cataloger/sbom/test-fixtures/glob-paths/sbom new file mode 100644 index 00000000000..5ffba7b57dc --- /dev/null +++ b/syft/pkg/cataloger/sbom/test-fixtures/glob-paths/sbom @@ -0,0 +1 @@ +bogus diff --git a/syft/pkg/cataloger/swift/cataloger_test.go b/syft/pkg/cataloger/swift/cataloger_test.go new file mode 100644 index 00000000000..8100ce8b15c --- /dev/null +++ b/syft/pkg/cataloger/swift/cataloger_test.go @@ -0,0 +1,32 @@ +package swift + +import ( + "testing" + + "github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest" +) + +func Test_Cataloger_Globs(t *testing.T) { + tests := []struct { + name string + fixture string + expected []string + }{ + { + name: "obtain swift files", + fixture: "test-fixtures/glob-paths", + expected: []string{ + "src/Podfile.lock", + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + pkgtest.NewCatalogTester(). + FromDirectory(t, test.fixture). + ExpectsResolverContentQueries(test.expected). + TestCataloger(t, NewCocoapodsCataloger()) + }) + } +} diff --git a/syft/pkg/cataloger/swift/test-fixtures/glob-paths/src/Podfile.lock b/syft/pkg/cataloger/swift/test-fixtures/glob-paths/src/Podfile.lock new file mode 100644 index 00000000000..882b6040c5d --- /dev/null +++ b/syft/pkg/cataloger/swift/test-fixtures/glob-paths/src/Podfile.lock @@ -0,0 +1 @@ +bogus \ No newline at end of file diff --git a/syft/source/directory_indexer.go b/syft/source/directory_indexer.go new file mode 100644 index 00000000000..4f414a670ae --- /dev/null +++ b/syft/source/directory_indexer.go @@ -0,0 +1,367 @@ +package source + +import ( + "errors" + "fmt" + "io/fs" + "os" + "path" + "path/filepath" + "runtime" + + "github.com/wagoodman/go-partybus" + "github.com/wagoodman/go-progress" + + "github.com/anchore/stereoscope/pkg/file" + "github.com/anchore/stereoscope/pkg/filetree" + "github.com/anchore/syft/internal" + "github.com/anchore/syft/internal/bus" + "github.com/anchore/syft/internal/log" + "github.com/anchore/syft/syft/event" +) + +type pathIndexVisitor func(string, os.FileInfo, error) error + +type directoryIndexer struct { + path string + base string + pathIndexVisitors []pathIndexVisitor + errPaths map[string]error + tree filetree.ReadWriter + index filetree.Index +} + +func newDirectoryIndexer(path, base string, visitors ...pathIndexVisitor) *directoryIndexer { + i := &directoryIndexer{ + path: path, + base: base, + tree: filetree.New(), + index: filetree.NewIndex(), + pathIndexVisitors: append([]pathIndexVisitor{requireFileInfo, disallowByFileType, disallowUnixSystemRuntimePath}, visitors...), + errPaths: make(map[string]error), + } + + // these additional stateful visitors should be the first thing considered when walking / indexing + i.pathIndexVisitors = append( + []pathIndexVisitor{ + i.disallowRevisitingVisitor, + i.disallowFileAccessErr, + }, + i.pathIndexVisitors..., + ) + + return i +} + +func (r *directoryIndexer) build() (filetree.Reader, filetree.IndexReader, error) { + return r.tree, r.index, indexAllRoots(r.path, r.indexTree) +} + +func indexAllRoots(root string, indexer func(string, *progress.Stage) ([]string, error)) error { + // why account for multiple roots? To cover cases when there is a symlink that references above the root path, + // in which case we need to additionally index where the link resolves to. it's for this reason why the filetree + // must be relative to the root of the filesystem (and not just relative to the given path). + pathsToIndex := []string{root} + fullPathsMap := map[string]struct{}{} + + stager, prog := indexingProgress(root) + defer prog.SetCompleted() +loop: + for { + var currentPath string + switch len(pathsToIndex) { + case 0: + break loop + case 1: + currentPath, pathsToIndex = pathsToIndex[0], nil + default: + currentPath, pathsToIndex = pathsToIndex[0], pathsToIndex[1:] + } + + additionalRoots, err := indexer(currentPath, stager) + if err != nil { + return fmt.Errorf("unable to index filesystem path=%q: %w", currentPath, err) + } + + for _, newRoot := range additionalRoots { + if _, ok := fullPathsMap[newRoot]; !ok { + fullPathsMap[newRoot] = struct{}{} + pathsToIndex = append(pathsToIndex, newRoot) + } + } + } + + return nil +} + +func (r *directoryIndexer) indexTree(root string, stager *progress.Stage) ([]string, error) { + log.WithFields("path", root).Trace("indexing filetree") + + var roots []string + var err error + + root, err = filepath.Abs(root) + if err != nil { + return nil, err + } + + // we want to be able to index single files with the directory resolver. However, we should also allow for attempting + // to index paths that do not exist (that is, a root that does not exist is not an error case that should stop indexing). + // For this reason we look for an opportunity to discover if the given root is a file, and if so add a single root, + // but continue forth with index regardless if the given root path exists or not. + fi, err := os.Stat(root) + if err != nil && fi != nil && !fi.IsDir() { + // note: we want to index the path regardless of an error stat-ing the path + newRoot, _ := r.indexPath(root, fi, nil) + if newRoot != "" { + roots = append(roots, newRoot) + } + return roots, nil + } + + err = filepath.Walk(root, + func(path string, info os.FileInfo, err error) error { + stager.Current = path + + newRoot, err := r.indexPath(path, info, err) + + if err != nil { + return err + } + + if newRoot != "" { + roots = append(roots, newRoot) + } + + return nil + }) + + if err != nil { + return nil, fmt.Errorf("unable to index root=%q: %w", root, err) + } + + return roots, nil +} + +func (r *directoryIndexer) indexPath(path string, info os.FileInfo, err error) (string, error) { + // ignore any path which a filter function returns true + for _, filterFn := range r.pathIndexVisitors { + if filterFn == nil { + continue + } + + if filterErr := filterFn(path, info, err); filterErr != nil { + if errors.Is(filterErr, fs.SkipDir) { + // signal to walk() to skip this directory entirely (even if we're processing a file) + return "", filterErr + } + // skip this path but don't affect walk() trajectory + return "", nil + } + } + + if info == nil { + // walk may not be able to provide a FileInfo object, don't allow for this to stop indexing; keep track of the paths and continue. + r.errPaths[path] = fmt.Errorf("no file info observable at path=%q", path) + return "", nil + } + + // here we check to see if we need to normalize paths to posix on the way in coming from windows + if runtime.GOOS == WindowsOS { + path = windowsToPosix(path) + } + + newRoot, err := r.addPathToIndex(path, info) + if r.isFileAccessErr(path, err) { + return "", nil + } + + return newRoot, nil +} + +func (r *directoryIndexer) disallowFileAccessErr(path string, _ os.FileInfo, err error) error { + if r.isFileAccessErr(path, err) { + return errSkipPath + } + return nil +} + +func (r *directoryIndexer) isFileAccessErr(path string, err error) bool { + // don't allow for errors to stop indexing, keep track of the paths and continue. + if err != nil { + log.Warnf("unable to access path=%q: %+v", path, err) + r.errPaths[path] = err + return true + } + return false +} + +func (r directoryIndexer) addPathToIndex(p string, info os.FileInfo) (string, error) { + switch t := file.TypeFromMode(info.Mode()); t { + case file.TypeSymLink: + return r.addSymlinkToIndex(p, info) + case file.TypeDirectory: + return "", r.addDirectoryToIndex(p, info) + case file.TypeRegular: + return "", r.addFileToIndex(p, info) + default: + return "", fmt.Errorf("unsupported file type: %s", t) + } +} + +func (r directoryIndexer) addDirectoryToIndex(p string, info os.FileInfo) error { + ref, err := r.tree.AddDir(file.Path(p)) + if err != nil { + return err + } + + metadata := file.NewMetadataFromPath(p, info) + r.index.Add(*ref, metadata) + + return nil +} + +func (r directoryIndexer) addFileToIndex(p string, info os.FileInfo) error { + ref, err := r.tree.AddFile(file.Path(p)) + if err != nil { + return err + } + + metadata := file.NewMetadataFromPath(p, info) + r.index.Add(*ref, metadata) + + return nil +} + +func (r directoryIndexer) addSymlinkToIndex(p string, info os.FileInfo) (string, error) { + linkTarget, err := os.Readlink(p) + if err != nil { + return "", fmt.Errorf("unable to readlink for path=%q: %w", p, err) + } + + if filepath.IsAbs(linkTarget) { + // if the link is absolute (e.g, /bin/ls -> /bin/busybox) we need to + // resolve relative to the root of the base directory + linkTarget = filepath.Join(r.base, filepath.Clean(linkTarget)) + } else { + // if the link is not absolute (e.g, /dev/stderr -> fd/2 ) we need to + // resolve it relative to the directory in question (e.g. resolve to + // /dev/fd/2) + if r.base == "" { + linkTarget = filepath.Join(filepath.Dir(p), linkTarget) + } else { + // if the base is set, then we first need to resolve the link, + // before finding it's location in the base + dir, err := filepath.Rel(r.base, filepath.Dir(p)) + if err != nil { + return "", fmt.Errorf("unable to resolve relative path for path=%q: %w", p, err) + } + linkTarget = filepath.Join(r.base, filepath.Clean(filepath.Join("/", dir, linkTarget))) + } + } + + ref, err := r.tree.AddSymLink(file.Path(p), file.Path(linkTarget)) + if err != nil { + return "", err + } + + targetAbsPath := linkTarget + if !filepath.IsAbs(targetAbsPath) { + targetAbsPath = filepath.Clean(filepath.Join(path.Dir(p), linkTarget)) + } + + metadata := file.NewMetadataFromPath(p, info) + metadata.LinkDestination = linkTarget + r.index.Add(*ref, metadata) + + return targetAbsPath, nil +} + +func (r directoryIndexer) hasBeenIndexed(p string) (bool, *file.Metadata) { + filePath := file.Path(p) + if !r.tree.HasPath(filePath) { + return false, nil + } + + exists, ref, err := r.tree.File(filePath) + if err != nil || !exists || !ref.HasReference() { + return false, nil + } + + // cases like "/" will be in the tree, but not been indexed yet (a special case). We want to capture + // these cases as new paths to index. + if !ref.HasReference() { + return false, nil + } + + entry, err := r.index.Get(*ref.Reference) + if err != nil { + return false, nil + } + + return true, &entry.Metadata +} + +func (r *directoryIndexer) disallowRevisitingVisitor(path string, _ os.FileInfo, _ error) error { + // this prevents visiting: + // - link destinations twice, once for the real file and another through the virtual path + // - infinite link cycles + if indexed, metadata := r.hasBeenIndexed(path); indexed { + if metadata.IsDir { + // signal to walk() that we should skip this directory entirely + return fs.SkipDir + } + return errSkipPath + } + return nil +} + +func disallowUnixSystemRuntimePath(path string, _ os.FileInfo, _ error) error { + if internal.HasAnyOfPrefixes(path, unixSystemRuntimePrefixes...) { + return fs.SkipDir + } + return nil +} + +func disallowByFileType(_ string, info os.FileInfo, _ error) error { + if info == nil { + // we can't filter out by filetype for non-existent files + return nil + } + switch file.TypeFromMode(info.Mode()) { + case file.TypeCharacterDevice, file.TypeSocket, file.TypeBlockDevice, file.TypeFIFO, file.TypeIrregular: + return errSkipPath + // note: symlinks that point to these files may still get by. + // We handle this later in processing to help prevent against infinite links traversal. + } + + return nil +} + +func requireFileInfo(_ string, info os.FileInfo, _ error) error { + if info == nil { + return errSkipPath + } + return nil +} + +func indexingProgress(path string) (*progress.Stage, *progress.Manual) { + stage := &progress.Stage{} + prog := &progress.Manual{ + Total: -1, + } + + bus.Publish(partybus.Event{ + Type: event.FileIndexingStarted, + Source: path, + Value: struct { + progress.Stager + progress.Progressable + }{ + Stager: progress.Stager(stage), + Progressable: prog, + }, + }) + + return stage, prog +} diff --git a/syft/source/directory_indexer_test.go b/syft/source/directory_indexer_test.go new file mode 100644 index 00000000000..b6403559d16 --- /dev/null +++ b/syft/source/directory_indexer_test.go @@ -0,0 +1,328 @@ +package source + +import ( + "io/fs" + "os" + "path" + "sort" + "strings" + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/scylladb/go-set/strset" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/wagoodman/go-progress" + + "github.com/anchore/stereoscope/pkg/file" +) + +type indexerMock struct { + observedRoots []string + additionalRoots map[string][]string +} + +func (m *indexerMock) indexer(s string, _ *progress.Stage) ([]string, error) { + m.observedRoots = append(m.observedRoots, s) + return m.additionalRoots[s], nil +} + +func Test_indexAllRoots(t *testing.T) { + tests := []struct { + name string + root string + mock indexerMock + expectedRoots []string + }{ + { + name: "no additional roots", + root: "a/place", + mock: indexerMock{ + additionalRoots: make(map[string][]string), + }, + expectedRoots: []string{ + "a/place", + }, + }, + { + name: "additional roots from a single call", + root: "a/place", + mock: indexerMock{ + additionalRoots: map[string][]string{ + "a/place": { + "another/place", + "yet-another/place", + }, + }, + }, + expectedRoots: []string{ + "a/place", + "another/place", + "yet-another/place", + }, + }, + { + name: "additional roots from a multiple calls", + root: "a/place", + mock: indexerMock{ + additionalRoots: map[string][]string{ + "a/place": { + "another/place", + "yet-another/place", + }, + "yet-another/place": { + "a-quiet-place-2", + "a-final/place", + }, + }, + }, + expectedRoots: []string{ + "a/place", + "another/place", + "yet-another/place", + "a-quiet-place-2", + "a-final/place", + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + assert.NoError(t, indexAllRoots(test.root, test.mock.indexer)) + }) + } +} + +func TestDirectoryIndexer_handleFileAccessErr(t *testing.T) { + tests := []struct { + name string + input error + expectedPathTracked bool + }{ + { + name: "permission error does not propagate", + input: os.ErrPermission, + expectedPathTracked: true, + }, + { + name: "file does not exist error does not propagate", + input: os.ErrNotExist, + expectedPathTracked: true, + }, + { + name: "non-permission errors are tracked", + input: os.ErrInvalid, + expectedPathTracked: true, + }, + { + name: "non-errors ignored", + input: nil, + expectedPathTracked: false, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + r := directoryIndexer{ + errPaths: make(map[string]error), + } + p := "a/place" + assert.Equal(t, r.isFileAccessErr(p, test.input), test.expectedPathTracked) + _, exists := r.errPaths[p] + assert.Equal(t, test.expectedPathTracked, exists) + }) + } +} + +func TestDirectoryIndexer_IncludeRootPathInIndex(t *testing.T) { + filterFn := func(path string, _ os.FileInfo, _ error) error { + if path != "/" { + return fs.SkipDir + } + return nil + } + + indexer := newDirectoryIndexer("/", "", filterFn) + tree, index, err := indexer.build() + require.NoError(t, err) + + exists, ref, err := tree.File(file.Path("/")) + require.NoError(t, err) + require.NotNil(t, ref) + assert.True(t, exists) + + _, err = index.Get(*ref.Reference) + require.NoError(t, err) +} + +func TestDirectoryIndexer_indexPath_skipsNilFileInfo(t *testing.T) { + // TODO: Ideally we can use an OS abstraction, which would obviate the need for real FS setup. + tempFile, err := os.CreateTemp("", "") + require.NoError(t, err) + + indexer := newDirectoryIndexer(tempFile.Name(), "") + + t.Run("filtering path with nil os.FileInfo", func(t *testing.T) { + assert.NotPanics(t, func() { + _, err := indexer.indexPath("/dont-care", nil, nil) + assert.NoError(t, err) + assert.False(t, indexer.tree.HasPath("/dont-care")) + }) + }) +} + +func TestDirectoryIndexer_index(t *testing.T) { + // note: this test is testing the effects from newDirectoryResolver, indexTree, and addPathToIndex + indexer := newDirectoryIndexer("test-fixtures/system_paths/target", "") + tree, index, err := indexer.build() + require.NoError(t, err) + + tests := []struct { + name string + path string + }{ + { + name: "has dir", + path: "test-fixtures/system_paths/target/home", + }, + { + name: "has path", + path: "test-fixtures/system_paths/target/home/place", + }, + { + name: "has symlink", + path: "test-fixtures/system_paths/target/link/a-symlink", + }, + { + name: "has symlink target", + path: "test-fixtures/system_paths/outside_root/link_target/place", + }, + } + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + info, err := os.Stat(test.path) + assert.NoError(t, err) + + // note: the index uses absolute paths, so assertions MUST keep this in mind + cwd, err := os.Getwd() + require.NoError(t, err) + + p := file.Path(path.Join(cwd, test.path)) + assert.Equal(t, true, tree.HasPath(p)) + exists, ref, err := tree.File(p) + assert.Equal(t, true, exists) + if assert.NoError(t, err) { + return + } + + entry, err := index.Get(*ref.Reference) + require.NoError(t, err) + assert.Equal(t, info.Mode(), entry.Mode) + }) + } +} + +func TestDirectoryIndexer_SkipsAlreadyVisitedLinkDestinations(t *testing.T) { + var observedPaths []string + pathObserver := func(p string, _ os.FileInfo, _ error) error { + fields := strings.Split(p, "test-fixtures/symlinks-prune-indexing") + if len(fields) != 2 { + t.Fatalf("unable to parse path: %s", p) + } + clean := strings.TrimLeft(fields[1], "/") + if clean != "" { + observedPaths = append(observedPaths, clean) + } + return nil + } + resolver := newDirectoryIndexer("./test-fixtures/symlinks-prune-indexing", "") + // we want to cut ahead of any possible filters to see what paths are considered for indexing (closest to walking) + resolver.pathIndexVisitors = append([]pathIndexVisitor{pathObserver}, resolver.pathIndexVisitors...) + + // note: this test is NOT about the effects left on the tree or the index, but rather the WHICH paths that are + // considered for indexing and HOW traversal prunes paths that have already been visited + _, _, err := resolver.build() + require.NoError(t, err) + + expected := []string{ + "before-path", + "c-file.txt", + "c-path", + "path", + "path/1", + "path/1/2", + "path/1/2/3", + "path/1/2/3/4", + "path/1/2/3/4/dont-index-me-twice.txt", + "path/5", + "path/5/6", + "path/5/6/7", + "path/5/6/7/8", + "path/5/6/7/8/dont-index-me-twice-either.txt", + "path/file.txt", + // everything below is after the original tree is indexed, and we are now indexing additional roots from symlinks + "path", // considered from symlink before-path, but pruned + "before-path/file.txt", // considered from symlink c-file.txt, but pruned + "before-path", // considered from symlink c-path, but pruned + } + + assert.Equal(t, expected, observedPaths, "visited paths differ \n %s", cmp.Diff(expected, observedPaths)) + +} + +func TestDirectoryIndexer_IndexesAllTypes(t *testing.T) { + indexer := newDirectoryIndexer("./test-fixtures/symlinks-prune-indexing", "") + + tree, index, err := indexer.build() + require.NoError(t, err) + + allRefs := tree.AllFiles(file.AllTypes()...) + var pathRefs []file.Reference + paths := strset.New() + for _, ref := range allRefs { + fields := strings.Split(string(ref.RealPath), "test-fixtures/symlinks-prune-indexing") + if len(fields) != 2 { + t.Fatalf("unable to parse path: %s", ref.RealPath) + } + clean := strings.TrimLeft(fields[1], "/") + if clean == "" { + continue + } + paths.Add(clean) + pathRefs = append(pathRefs, ref) + } + + pathsList := paths.List() + sort.Strings(pathsList) + + expected := []string{ + "before-path", // link + "c-file.txt", // link + "c-path", // link + "path", // dir + "path/1", // dir + "path/1/2", // dir + "path/1/2/3", // dir + "path/1/2/3/4", // dir + "path/1/2/3/4/dont-index-me-twice.txt", // file + "path/5", // dir + "path/5/6", // dir + "path/5/6/7", // dir + "path/5/6/7/8", // dir + "path/5/6/7/8/dont-index-me-twice-either.txt", // file + "path/file.txt", // file + } + expectedSet := strset.New(expected...) + + // make certain all expected paths are in the tree (and no extra ones are their either) + + assert.True(t, paths.IsEqual(expectedSet), "expected all paths to be indexed, but found different paths: \n%s", cmp.Diff(expected, pathsList)) + + // make certain that the paths are also in the file index + + for _, ref := range pathRefs { + _, err := index.Get(ref) + require.NoError(t, err) + } + +} diff --git a/syft/source/directory_resolver.go b/syft/source/directory_resolver.go index d31994d70ff..b68ce890344 100644 --- a/syft/source/directory_resolver.go +++ b/syft/source/directory_resolver.go @@ -4,22 +4,15 @@ import ( "errors" "fmt" "io" - "io/fs" "os" "path" "path/filepath" "runtime" "strings" - "github.com/wagoodman/go-partybus" - "github.com/wagoodman/go-progress" - "github.com/anchore/stereoscope/pkg/file" "github.com/anchore/stereoscope/pkg/filetree" - "github.com/anchore/syft/internal" - "github.com/anchore/syft/internal/bus" "github.com/anchore/syft/internal/log" - "github.com/anchore/syft/syft/event" ) const WindowsOS = "windows" @@ -30,9 +23,9 @@ var unixSystemRuntimePrefixes = []string{ "/sys", } -var _ FileResolver = (*directoryResolver)(nil) +var errSkipPath = errors.New("skip path") -type pathFilterFn func(string, os.FileInfo) bool +var _ FileResolver = (*directoryResolver)(nil) // directoryResolver implements path and content access for the directory data source. type directoryResolver struct { @@ -40,15 +33,22 @@ type directoryResolver struct { base string currentWdRelativeToRoot string currentWd string - fileTree *filetree.FileTree - metadata map[file.ID]FileMetadata - // TODO: wire up to report these paths in the json report - pathFilterFns []pathFilterFn - refsByMIMEType map[string][]file.Reference - errPaths map[string]error + tree filetree.Reader + index filetree.IndexReader + searchContext filetree.Searcher + indexer *directoryIndexer +} + +func newDirectoryResolver(root string, base string, pathFilters ...pathIndexVisitor) (*directoryResolver, error) { + r, err := newDirectoryResolverWithoutIndex(root, base, pathFilters...) + if err != nil { + return nil, err + } + + return r, r.buildIndex() } -func newDirectoryResolver(root string, base string, pathFilters ...pathFilterFn) (*directoryResolver, error) { +func newDirectoryResolverWithoutIndex(root string, base string, pathFilters ...pathIndexVisitor) (*directoryResolver, error) { currentWD, err := os.Getwd() if err != nil { return nil, fmt.Errorf("could not get CWD: %w", err) @@ -87,226 +87,33 @@ func newDirectoryResolver(root string, base string, pathFilters ...pathFilterFn) currentWdRelRoot = filepath.Clean(cleanRoot) } - resolver := directoryResolver{ + return &directoryResolver{ path: cleanRoot, base: cleanBase, currentWd: cleanCWD, currentWdRelativeToRoot: currentWdRelRoot, - fileTree: filetree.NewFileTree(), - metadata: make(map[file.ID]FileMetadata), - pathFilterFns: append([]pathFilterFn{isUnallowableFileType, isUnixSystemRuntimePath}, pathFilters...), - refsByMIMEType: make(map[string][]file.Reference), - errPaths: make(map[string]error), - } - - return &resolver, indexAllRoots(cleanRoot, resolver.indexTree) + tree: filetree.New(), + index: filetree.NewIndex(), + indexer: newDirectoryIndexer(cleanRoot, cleanBase, pathFilters...), + }, nil } -func (r *directoryResolver) indexTree(root string, stager *progress.Stage) ([]string, error) { - log.Debugf("indexing filesystem path=%q", root) - - var roots []string - var err error - - root, err = filepath.Abs(root) - if err != nil { - return nil, err - } - - // we want to be able to index single files with the directory resolver. However, we should also allow for attempting - // to index paths that do not exist (that is, a root that does not exist is not an error case that should stop indexing). - // For this reason we look for an opportunity to discover if the given root is a file, and if so add a single root, - // but continue forth with index regardless if the given root path exists or not. - fi, err := os.Stat(root) - if err != nil && fi != nil && !fi.IsDir() { - // note: we want to index the path regardless of an error stat-ing the path - newRoot, _ := r.indexPath(root, fi, nil) - if newRoot != "" { - roots = append(roots, newRoot) - } - return roots, nil +func (r *directoryResolver) buildIndex() error { + if r.indexer == nil { + return fmt.Errorf("no directory indexer configured") } - - return roots, filepath.Walk(root, - func(path string, info os.FileInfo, err error) error { - stager.Current = path - - newRoot, err := r.indexPath(path, info, err) - - if err != nil { - return err - } - - if newRoot != "" { - roots = append(roots, newRoot) - } - - return nil - }) -} - -func (r *directoryResolver) indexPath(path string, info os.FileInfo, err error) (string, error) { - // link cycles could cause a revisit --we should not allow this - if r.hasBeenIndexed(path) { - return "", nil - } - - // ignore any path which a filter function returns true - for _, filterFn := range r.pathFilterFns { - if filterFn != nil && filterFn(path, info) { - if info != nil && info.IsDir() { - return "", fs.SkipDir - } - return "", nil - } - } - - if r.isFileAccessErr(path, err) { - return "", nil - } - - if info == nil { - // walk may not be able to provide a FileInfo object, don't allow for this to stop indexing; keep track of the paths and continue. - r.errPaths[path] = fmt.Errorf("no file info observable at path=%q", path) - return "", nil - } - - // here we check to see if we need to normalize paths to posix on the way in coming from windows - if runtime.GOOS == WindowsOS { - path = windowsToPosix(path) - } - - newRoot, err := r.addPathToIndex(path, info) - if r.isFileAccessErr(path, err) { - return "", nil - } - - return newRoot, nil -} - -func (r *directoryResolver) isFileAccessErr(path string, err error) bool { - // don't allow for errors to stop indexing, keep track of the paths and continue. - if err != nil { - log.Warnf("unable to access path=%q: %+v", path, err) - r.errPaths[path] = err - return true - } - return false -} - -func (r directoryResolver) addPathToIndex(p string, info os.FileInfo) (string, error) { - switch t := newFileTypeFromMode(info.Mode()); t { - case SymbolicLink: - return r.addSymlinkToIndex(p, info) - case Directory: - return "", r.addDirectoryToIndex(p, info) - case RegularFile: - return "", r.addFileToIndex(p, info) - default: - return "", fmt.Errorf("unsupported file type: %s", t) - } -} - -func (r directoryResolver) hasBeenIndexed(p string) bool { - filePath := file.Path(p) - if !r.fileTree.HasPath(filePath) { - return false - } - - exists, ref, err := r.fileTree.File(filePath) - if err != nil || !exists || ref == nil { - return false - } - - // cases like "/" will be in the tree, but not been indexed yet (a special case). We want to capture - // these cases as new paths to index. - _, exists = r.metadata[ref.ID()] - return exists -} - -func (r directoryResolver) addDirectoryToIndex(p string, info os.FileInfo) error { - ref, err := r.fileTree.AddDir(file.Path(p)) + tree, index, err := r.indexer.build() if err != nil { return err } - location := NewLocationFromDirectory(p, *ref) - metadata := fileMetadataFromPath(p, info, r.isInIndex(location)) - r.addFileMetadataToIndex(ref, metadata) + r.tree = tree + r.index = index + r.searchContext = filetree.NewSearchContext(tree, index) return nil } -func (r directoryResolver) addFileToIndex(p string, info os.FileInfo) error { - ref, err := r.fileTree.AddFile(file.Path(p)) - if err != nil { - return err - } - - location := NewLocationFromDirectory(p, *ref) - metadata := fileMetadataFromPath(p, info, r.isInIndex(location)) - r.addFileMetadataToIndex(ref, metadata) - - return nil -} - -func (r directoryResolver) addSymlinkToIndex(p string, info os.FileInfo) (string, error) { - var usedInfo = info - - linkTarget, err := os.Readlink(p) - if err != nil { - return "", fmt.Errorf("unable to readlink for path=%q: %w", p, err) - } - - if filepath.IsAbs(linkTarget) { - // if the link is absolute (e.g, /bin/ls -> /bin/busybox) we need to - // resolve relative to the root of the base directory - linkTarget = filepath.Join(r.base, filepath.Clean(linkTarget)) - } else { - // if the link is not absolute (e.g, /dev/stderr -> fd/2 ) we need to - // resolve it relative to the directory in question (e.g. resolve to - // /dev/fd/2) - if r.base == "" { - linkTarget = filepath.Join(filepath.Dir(p), linkTarget) - } else { - // if the base is set, then we first need to resolve the link, - // before finding it's location in the base - dir, err := filepath.Rel(r.base, filepath.Dir(p)) - if err != nil { - return "", fmt.Errorf("unable to resolve relative path for path=%q: %w", p, err) - } - linkTarget = filepath.Join(r.base, filepath.Clean(filepath.Join("/", dir, linkTarget))) - } - } - - ref, err := r.fileTree.AddSymLink(file.Path(p), file.Path(linkTarget)) - if err != nil { - return "", err - } - - targetAbsPath := linkTarget - if !filepath.IsAbs(targetAbsPath) { - targetAbsPath = filepath.Clean(filepath.Join(path.Dir(p), linkTarget)) - } - - location := NewLocationFromDirectory(p, *ref) - location.VirtualPath = p - metadata := fileMetadataFromPath(p, usedInfo, r.isInIndex(location)) - metadata.LinkDestination = linkTarget - r.addFileMetadataToIndex(ref, metadata) - - return targetAbsPath, nil -} - -func (r directoryResolver) addFileMetadataToIndex(ref *file.Reference, metadata FileMetadata) { - if ref != nil { - if metadata.MIMEType != "" { - r.refsByMIMEType[metadata.MIMEType] = append(r.refsByMIMEType[metadata.MIMEType], *ref) - } - r.metadata[ref.ID()] = metadata - } -} - func (r directoryResolver) requestPath(userPath string) (string, error) { if filepath.IsAbs(userPath) { // don't allow input to potentially hop above root path @@ -352,7 +159,7 @@ func (r *directoryResolver) HasPath(userPath string) bool { if err != nil { return false } - return r.fileTree.HasPath(file.Path(requestPath)) + return r.tree.HasPath(file.Path(requestPath)) } // Stringer to represent a directory path data source @@ -372,34 +179,24 @@ func (r directoryResolver) FilesByPath(userPaths ...string) ([]Location, error) } // we should be resolving symlinks and preserving this information as a VirtualPath to the real file - exists, ref, err := r.fileTree.File(file.Path(userStrPath), filetree.FollowBasenameLinks) + ref, err := r.searchContext.SearchByPath(userStrPath, filetree.FollowBasenameLinks) if err != nil { log.Tracef("unable to evaluate symlink for path=%q : %+v", userPath, err) continue } - if !exists { + + if !ref.HasReference() { continue } - // TODO: why not use stored metadata? - fileMeta, err := os.Stat(string(ref.RealPath)) - if errors.Is(err, os.ErrNotExist) { - // note: there are other kinds of errors other than os.ErrNotExist that may be given that is platform - // specific, but essentially hints at the same overall problem (that the path does not exist). Such an - // error could be syscall.ENOTDIR (see https://github.com/golang/go/issues/18974). - continue - } else if err != nil { - // we don't want to consider any other syscalls that may hint at non-existence of the file/dir as - // invalid paths. This logging statement is meant to raise IO or permissions related problems. - var pathErr *os.PathError - if !errors.As(err, &pathErr) { - log.Warnf("path is not valid (%s): %+v", ref.RealPath, err) - } + entry, err := r.index.Get(*ref.Reference) + if err != nil { + log.Warnf("unable to get file by path=%q : %+v", userPath, err) continue } // don't consider directories - if fileMeta.IsDir() { + if entry.Metadata.IsDir { continue } @@ -407,12 +204,15 @@ func (r directoryResolver) FilesByPath(userPaths ...string) ([]Location, error) userStrPath = windowsToPosix(userStrPath) } - loc := NewVirtualLocationFromDirectory( - r.responsePath(string(ref.RealPath)), // the actual path relative to the resolver root - r.responsePath(userStrPath), // the path used to access this file, relative to the resolver root - *ref, - ) - references = append(references, loc) + if ref.HasReference() { + references = append(references, + NewVirtualLocationFromDirectory( + r.responsePath(string(ref.RealPath)), // the actual path relative to the resolver root + r.responsePath(userStrPath), // the path used to access this file, relative to the resolver root + *ref.Reference, + ), + ) + } } return references, nil @@ -420,24 +220,39 @@ func (r directoryResolver) FilesByPath(userPaths ...string) ([]Location, error) // FilesByGlob returns all file.References that match the given path glob pattern from any layer in the image. func (r directoryResolver) FilesByGlob(patterns ...string) ([]Location, error) { - result := make([]Location, 0) + uniqueFileIDs := file.NewFileReferenceSet() + uniqueLocations := make([]Location, 0) for _, pattern := range patterns { - globResults, err := r.fileTree.FilesByGlob(pattern, filetree.FollowBasenameLinks) + refVias, err := r.searchContext.SearchByGlob(pattern, filetree.FollowBasenameLinks) if err != nil { return nil, err } - for _, globResult := range globResults { + for _, refVia := range refVias { + if !refVia.HasReference() || uniqueFileIDs.Contains(*refVia.Reference) { + continue + } + entry, err := r.index.Get(*refVia.Reference) + if err != nil { + return nil, fmt.Errorf("unable to get file metadata for reference %s: %w", refVia.Reference.RealPath, err) + } + + // don't consider directories + if entry.Metadata.IsDir { + continue + } + loc := NewVirtualLocationFromDirectory( - r.responsePath(string(globResult.Reference.RealPath)), // the actual path relative to the resolver root - r.responsePath(string(globResult.MatchPath)), // the path used to access this file, relative to the resolver root - globResult.Reference, + r.responsePath(string(refVia.Reference.RealPath)), // the actual path relative to the resolver root + r.responsePath(string(refVia.RequestPath)), // the path used to access this file, relative to the resolver root + *refVia.Reference, ) - result = append(result, loc) + uniqueFileIDs.Add(*refVia.Reference) + uniqueLocations = append(uniqueLocations, loc) } } - return result, nil + return uniqueLocations, nil } // RelativeFileByPath fetches a single file at the given path relative to the layer squash of the given reference. @@ -461,34 +276,32 @@ func (r directoryResolver) FileContentsByLocation(location Location) (io.ReadClo if location.ref.RealPath == "" { return nil, errors.New("empty path given") } - if !r.isInIndex(location) { - // this is in cases where paths have been explicitly excluded from the tree index. In which case - // we should DENY all content requests. Why? These paths have been indicated to be inaccessible (either - // by preference or these files are not readable by the current user). - return nil, fmt.Errorf("file content is inaccessible path=%q", location.ref.RealPath) + + entry, err := r.index.Get(location.ref) + if err != nil { + return nil, err + } + + // don't consider directories + if entry.Type == file.TypeDirectory { + return nil, fmt.Errorf("cannot read contents of non-file %q", location.ref.RealPath) } + // RealPath is posix so for windows directory resolver we need to translate // to its true on disk path. filePath := string(location.ref.RealPath) if runtime.GOOS == WindowsOS { filePath = posixToWindows(filePath) } - return file.NewLazyReadCloser(filePath), nil -} -func (r directoryResolver) isInIndex(location Location) bool { - if location.ref.RealPath == "" { - return false - } - return r.fileTree.HasPath(location.ref.RealPath, filetree.FollowBasenameLinks) + return file.NewLazyReadCloser(filePath), nil } func (r *directoryResolver) AllLocations() <-chan Location { results := make(chan Location) go func() { defer close(results) - // this should be all non-directory types - for _, ref := range r.fileTree.AllFiles(file.TypeReg, file.TypeSymlink, file.TypeHardLink, file.TypeBlockDevice, file.TypeCharacterDevice, file.TypeFifo) { + for _, ref := range r.tree.AllFiles(file.AllTypes()...) { results <- NewLocationFromDirectory(r.responsePath(string(ref.RealPath)), ref) } }() @@ -496,24 +309,38 @@ func (r *directoryResolver) AllLocations() <-chan Location { } func (r *directoryResolver) FileMetadataByLocation(location Location) (FileMetadata, error) { - metadata, exists := r.metadata[location.ref.ID()] - if !exists { + entry, err := r.index.Get(location.ref) + if err != nil { return FileMetadata{}, fmt.Errorf("location: %+v : %w", location, os.ErrNotExist) } - return metadata, nil + return entry.Metadata, nil } func (r *directoryResolver) FilesByMIMEType(types ...string) ([]Location, error) { - var locations []Location - for _, ty := range types { - if refs, ok := r.refsByMIMEType[ty]; ok { - for _, ref := range refs { - locations = append(locations, NewLocationFromDirectory(r.responsePath(string(ref.RealPath)), ref)) - } + uniqueFileIDs := file.NewFileReferenceSet() + uniqueLocations := make([]Location, 0) + + refVias, err := r.searchContext.SearchByMIMEType(types...) + if err != nil { + return nil, err + } + for _, refVia := range refVias { + if !refVia.HasReference() { + continue } + if uniqueFileIDs.Contains(*refVia.Reference) { + continue + } + location := NewLocationFromDirectory( + r.responsePath(string(refVia.Reference.RealPath)), + *refVia.Reference, + ) + uniqueFileIDs.Add(*refVia.Reference) + uniqueLocations = append(uniqueLocations, location) } - return locations, nil + + return uniqueLocations, nil } func windowsToPosix(windowsPath string) (posixPath string) { @@ -542,80 +369,3 @@ func posixToWindows(posixPath string) (windowsPath string) { // combine volume name and backslash components return filepath.Clean(volumeName + remainingTranslatedPath) } - -func isUnixSystemRuntimePath(path string, _ os.FileInfo) bool { - return internal.HasAnyOfPrefixes(path, unixSystemRuntimePrefixes...) -} - -func isUnallowableFileType(_ string, info os.FileInfo) bool { - if info == nil { - // we can't filter out by filetype for non-existent files - return false - } - switch newFileTypeFromMode(info.Mode()) { - case CharacterDevice, Socket, BlockDevice, FIFONode, IrregularFile: - return true - // note: symlinks that point to these files may still get by. - // We handle this later in processing to help prevent against infinite links traversal. - } - - return false -} - -func indexAllRoots(root string, indexer func(string, *progress.Stage) ([]string, error)) error { - // why account for multiple roots? To cover cases when there is a symlink that references above the root path, - // in which case we need to additionally index where the link resolves to. it's for this reason why the filetree - // must be relative to the root of the filesystem (and not just relative to the given path). - pathsToIndex := []string{root} - fullPathsMap := map[string]struct{}{} - - stager, prog := indexingProgress(root) - defer prog.SetCompleted() -loop: - for { - var currentPath string - switch len(pathsToIndex) { - case 0: - break loop - case 1: - currentPath, pathsToIndex = pathsToIndex[0], nil - default: - currentPath, pathsToIndex = pathsToIndex[0], pathsToIndex[1:] - } - - additionalRoots, err := indexer(currentPath, stager) - if err != nil { - return fmt.Errorf("unable to index filesystem path=%q: %w", currentPath, err) - } - - for _, newRoot := range additionalRoots { - if _, ok := fullPathsMap[newRoot]; !ok { - fullPathsMap[newRoot] = struct{}{} - pathsToIndex = append(pathsToIndex, newRoot) - } - } - } - - return nil -} - -func indexingProgress(path string) (*progress.Stage, *progress.Manual) { - stage := &progress.Stage{} - prog := &progress.Manual{ - Total: -1, - } - - bus.Publish(partybus.Event{ - Type: event.FileIndexingStarted, - Source: path, - Value: struct { - progress.Stager - progress.Progressable - }{ - Stager: progress.Stager(stage), - Progressable: prog, - }, - }) - - return stage, prog -} diff --git a/syft/source/directory_resolver_test.go b/syft/source/directory_resolver_test.go index 20ed1e373e5..550488aaab9 100644 --- a/syft/source/directory_resolver_test.go +++ b/syft/source/directory_resolver_test.go @@ -8,17 +8,16 @@ import ( "io/fs" "io/ioutil" "os" - "path" "path/filepath" - "reflect" + "sort" "strings" "testing" "time" + "github.com/google/go-cmp/cmp" "github.com/scylladb/go-set/strset" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - "github.com/wagoodman/go-progress" "github.com/anchore/stereoscope/pkg/file" ) @@ -302,18 +301,14 @@ func TestDirectoryResolverDoesNotIgnoreRelativeSystemPaths(t *testing.T) { // let's make certain that "dev/place" is not ignored, since it is not "/dev/place" resolver, err := newDirectoryResolver("test-fixtures/system_paths/target", "") assert.NoError(t, err) - // ensure the correct filter function is wired up by default - expectedFn := reflect.ValueOf(isUnallowableFileType) - actualFn := reflect.ValueOf(resolver.pathFilterFns[0]) - assert.Equal(t, expectedFn.Pointer(), actualFn.Pointer()) // all paths should be found (non filtering matches a path) locations, err := resolver.FilesByGlob("**/place") assert.NoError(t, err) // 4: within target/ - // 1: target/link --> relative path to "place" + // 1: target/link --> relative path to "place" // NOTE: this is filtered out since it not unique relative to outside_root/link_target/place // 1: outside_root/link_target/place - assert.Len(t, locations, 6) + assert.Len(t, locations, 5) // ensure that symlink indexing outside of root worked testLocation := "test-fixtures/system_paths/outside_root/link_target/place" @@ -363,233 +358,65 @@ func Test_isUnallowableFileType(t *testing.T) { tests := []struct { name string info os.FileInfo - expected bool + expected error }{ { name: "regular file", info: testFileInfo{ mode: 0, }, - expected: false, }, { name: "dir", info: testFileInfo{ mode: os.ModeDir, }, - expected: false, }, { name: "symlink", info: testFileInfo{ mode: os.ModeSymlink, }, - expected: false, }, { name: "socket", info: testFileInfo{ mode: os.ModeSocket, }, - expected: true, + expected: errSkipPath, }, { name: "named pipe", info: testFileInfo{ mode: os.ModeNamedPipe, }, - expected: true, + expected: errSkipPath, }, { name: "char device", info: testFileInfo{ mode: os.ModeCharDevice, }, - expected: true, + expected: errSkipPath, }, { name: "block device", info: testFileInfo{ mode: os.ModeDevice, }, - expected: true, + expected: errSkipPath, }, { name: "irregular", info: testFileInfo{ mode: os.ModeIrregular, }, - expected: true, + expected: errSkipPath, }, } for _, test := range tests { t.Run(test.name, func(t *testing.T) { - assert.Equal(t, test.expected, isUnallowableFileType("dont/care", test.info)) - }) - } -} - -func Test_directoryResolver_index(t *testing.T) { - // note: this test is testing the effects from newDirectoryResolver, indexTree, and addPathToIndex - r, err := newDirectoryResolver("test-fixtures/system_paths/target", "") - if err != nil { - t.Fatalf("unable to get indexed dir resolver: %+v", err) - } - tests := []struct { - name string - path string - }{ - { - name: "has dir", - path: "test-fixtures/system_paths/target/home", - }, - { - name: "has path", - path: "test-fixtures/system_paths/target/home/place", - }, - { - name: "has symlink", - path: "test-fixtures/system_paths/target/link/a-symlink", - }, - { - name: "has symlink target", - path: "test-fixtures/system_paths/outside_root/link_target/place", - }, - } - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - info, err := os.Stat(test.path) - assert.NoError(t, err) - - // note: the index uses absolute paths, so assertions MUST keep this in mind - cwd, err := os.Getwd() - require.NoError(t, err) - - p := file.Path(path.Join(cwd, test.path)) - assert.Equal(t, true, r.fileTree.HasPath(p)) - exists, ref, err := r.fileTree.File(p) - assert.Equal(t, true, exists) - if assert.NoError(t, err) { - return - } - assert.Equal(t, info, r.metadata[ref.ID()]) - }) - } -} - -func Test_handleFileAccessErr(t *testing.T) { - tests := []struct { - name string - input error - expectedPathTracked bool - }{ - { - name: "permission error does not propagate", - input: os.ErrPermission, - expectedPathTracked: true, - }, - { - name: "file does not exist error does not propagate", - input: os.ErrNotExist, - expectedPathTracked: true, - }, - { - name: "non-permission errors are tracked", - input: os.ErrInvalid, - expectedPathTracked: true, - }, - { - name: "non-errors ignored", - input: nil, - expectedPathTracked: false, - }, - } - - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - r := directoryResolver{ - errPaths: make(map[string]error), - } - p := "a/place" - assert.Equal(t, r.isFileAccessErr(p, test.input), test.expectedPathTracked) - _, exists := r.errPaths[p] - assert.Equal(t, test.expectedPathTracked, exists) - }) - } -} - -type indexerMock struct { - observedRoots []string - additionalRoots map[string][]string -} - -func (m *indexerMock) indexer(s string, _ *progress.Stage) ([]string, error) { - m.observedRoots = append(m.observedRoots, s) - return m.additionalRoots[s], nil -} - -func Test_indexAllRoots(t *testing.T) { - tests := []struct { - name string - root string - mock indexerMock - expectedRoots []string - }{ - { - name: "no additional roots", - root: "a/place", - mock: indexerMock{ - additionalRoots: make(map[string][]string), - }, - expectedRoots: []string{ - "a/place", - }, - }, - { - name: "additional roots from a single call", - root: "a/place", - mock: indexerMock{ - additionalRoots: map[string][]string{ - "a/place": { - "another/place", - "yet-another/place", - }, - }, - }, - expectedRoots: []string{ - "a/place", - "another/place", - "yet-another/place", - }, - }, - { - name: "additional roots from a multiple calls", - root: "a/place", - mock: indexerMock{ - additionalRoots: map[string][]string{ - "a/place": { - "another/place", - "yet-another/place", - }, - "yet-another/place": { - "a-quiet-place-2", - "a-final/place", - }, - }, - }, - expectedRoots: []string{ - "a/place", - "another/place", - "yet-another/place", - "a-quiet-place-2", - "a-final/place", - }, - }, - } - - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - assert.NoError(t, indexAllRoots(test.root, test.mock.indexer)) + assert.Equal(t, test.expected, disallowByFileType("dont/care", test.info, nil)) }) } } @@ -646,12 +473,12 @@ func Test_IndexingNestedSymLinks(t *testing.T) { // check that we can access the same file via 2 symlinks locations, err = resolver.FilesByGlob("**/link_*") require.NoError(t, err) - require.Len(t, locations, 2) + require.Len(t, locations, 1) // you would think this is 2, however, they point to the same file, and glob only returns unique files // returned locations can be in any order expectedVirtualPaths := []string{ "link_to_link_to_new_readme", - "link_to_new_readme", + //"link_to_new_readme", // we filter out this one because the first symlink resolves to the same file } expectedRealPaths := []string{ @@ -670,8 +497,11 @@ func Test_IndexingNestedSymLinks(t *testing.T) { } func Test_IndexingNestedSymLinks_ignoredIndexes(t *testing.T) { - filterFn := func(path string, _ os.FileInfo) bool { - return strings.HasSuffix(path, string(filepath.Separator)+"readme") + filterFn := func(path string, _ os.FileInfo, _ error) error { + if strings.HasSuffix(path, string(filepath.Separator)+"readme") { + return errSkipPath + } + return nil } resolver, err := newDirectoryResolver("./test-fixtures/symlinks-simple", "", filterFn) @@ -732,6 +562,14 @@ func Test_directoryResolver_FileContentsByLocation(t *testing.T) { cwd, err := os.Getwd() require.NoError(t, err) + r, err := newDirectoryResolver(".", "") + require.NoError(t, err) + + exists, existingPath, err := r.tree.File(file.Path(filepath.Join(cwd, "test-fixtures/image-simple/file-1.txt"))) + require.True(t, exists) + require.NoError(t, err) + require.True(t, existingPath.HasReference()) + tests := []struct { name string location Location @@ -739,11 +577,9 @@ func Test_directoryResolver_FileContentsByLocation(t *testing.T) { err bool }{ { - name: "use file reference for content requests", - location: NewLocationFromDirectory("some/place", file.Reference{ - RealPath: file.Path(filepath.Join(cwd, "test-fixtures/image-simple/file-1.txt")), - }), - expects: "this file has contents", + name: "use file reference for content requests", + location: NewLocationFromDirectory("some/place", *existingPath.Reference), + expects: "this file has contents", }, { name: "error on empty file reference", @@ -753,8 +589,6 @@ func Test_directoryResolver_FileContentsByLocation(t *testing.T) { } for _, test := range tests { t.Run(test.name, func(t *testing.T) { - r, err := newDirectoryResolver(".", "") - require.NoError(t, err) actual, err := r.FileContentsByLocation(test.location) if test.err { @@ -775,44 +609,40 @@ func Test_directoryResolver_FileContentsByLocation(t *testing.T) { func Test_isUnixSystemRuntimePath(t *testing.T) { tests := []struct { path string - expected bool + expected error }{ { - path: "proc/place", - expected: false, + path: "proc/place", }, { path: "/proc/place", - expected: true, + expected: fs.SkipDir, }, { path: "/proc", - expected: true, + expected: fs.SkipDir, }, { - path: "/pro/c", - expected: false, + path: "/pro/c", }, { - path: "/pro", - expected: false, + path: "/pro", }, { path: "/dev", - expected: true, + expected: fs.SkipDir, }, { path: "/sys", - expected: true, + expected: fs.SkipDir, }, { - path: "/something/sys", - expected: false, + path: "/something/sys", }, } for _, test := range tests { t.Run(test.path, func(t *testing.T) { - assert.Equal(t, test.expected, isUnixSystemRuntimePath(test.path, nil)) + assert.Equal(t, test.expected, disallowUnixSystemRuntimePath(test.path, nil, nil)) }) } } @@ -824,11 +654,9 @@ func Test_SymlinkLoopWithGlobsShouldResolve(t *testing.T) { locations, err := resolver.FilesByGlob("**/file.target") require.NoError(t, err) - // Note: I'm not certain that this behavior is correct, but it is not an infinite loop (which is the point of the test) - // - block/loop0/file.target - // - devices/loop0/file.target - // - devices/loop0/subsystem/loop0/file.target - assert.Len(t, locations, 3) + + require.Len(t, locations, 1) + assert.Equal(t, "devices/loop0/file.target", locations[0].RealPath) } testWithTimeout(t, 5*time.Second, test) @@ -848,44 +676,6 @@ func testWithTimeout(t *testing.T, timeout time.Duration, test func(*testing.T)) } } -func Test_IncludeRootPathInIndex(t *testing.T) { - filterFn := func(path string, _ os.FileInfo) bool { - return path != "/" - } - - resolver, err := newDirectoryResolver("/", "", filterFn) - require.NoError(t, err) - - exists, ref, err := resolver.fileTree.File(file.Path("/")) - require.NoError(t, err) - require.NotNil(t, ref) - assert.True(t, exists) - - _, exists = resolver.metadata[ref.ID()] - require.True(t, exists) -} - -func TestDirectoryResolver_indexPath(t *testing.T) { - // TODO: Ideally we can use an OS abstraction, which would obviate the need for real FS setup. - tempFile, err := os.CreateTemp("", "") - require.NoError(t, err) - - resolver, err := newDirectoryResolver(tempFile.Name(), "") - require.NoError(t, err) - - t.Run("filtering path with nil os.FileInfo", func(t *testing.T) { - // We use one of these prefixes in order to trigger a pathFilterFn - filteredPath := unixSystemRuntimePrefixes[0] - - var fileInfo os.FileInfo = nil - - assert.NotPanics(t, func() { - _, err := resolver.indexPath(filteredPath, fileInfo, nil) - assert.NoError(t, err) - }) - }) -} - func TestDirectoryResolver_FilesByPath_baseRoot(t *testing.T) { cases := []struct { name string @@ -957,4 +747,333 @@ func TestDirectoryResolver_FilesByPath_baseRoot(t *testing.T) { assert.ElementsMatch(t, c.expected, s.List()) }) } + +} + +func Test_directoryResolver_resolvesLinks(t *testing.T) { + tests := []struct { + name string + runner func(FileResolver) []Location + expected []Location + }{ + { + name: "by mimetype", + runner: func(resolver FileResolver) []Location { + // links should not show up when searching mimetype + actualLocations, err := resolver.FilesByMIMEType("text/plain") + assert.NoError(t, err) + return actualLocations + }, + expected: []Location{ + { + Coordinates: Coordinates{ + RealPath: "file-1.txt", + }, + //VirtualPath: "file-1.txt", + }, + { + Coordinates: Coordinates{ + RealPath: "file-3.txt", + }, + //VirtualPath: "file-3.txt", + }, + { + Coordinates: Coordinates{ + RealPath: "file-2.txt", + }, + //VirtualPath: "file-2.txt", + }, + { + Coordinates: Coordinates{ + RealPath: "parent/file-4.txt", + }, + //VirtualPath: "parent/file-4.txt", + }, + }, + }, + { + name: "by glob to links", + runner: func(resolver FileResolver) []Location { + // links are searched, but resolve to the real files + // for that reason we need to place **/ in front (which is not the same for other resolvers) + actualLocations, err := resolver.FilesByGlob("**/*ink-*") + assert.NoError(t, err) + return actualLocations + }, + expected: []Location{ + { + Coordinates: Coordinates{ + RealPath: "file-1.txt", + }, + VirtualPath: "link-1", + }, + { + Coordinates: Coordinates{ + RealPath: "file-2.txt", + }, + VirtualPath: "link-2", + }, + // we already have this real file path via another link, so only one is returned + //{ + // Coordinates: Coordinates{ + // RealPath: "file-2.txt", + // }, + // VirtualPath: "link-indirect", + //}, + { + Coordinates: Coordinates{ + RealPath: "file-3.txt", + }, + VirtualPath: "link-within", + }, + }, + }, + { + name: "by basename", + runner: func(resolver FileResolver) []Location { + // links are searched, but resolve to the real files + actualLocations, err := resolver.FilesByGlob("**/file-2.txt") + assert.NoError(t, err) + return actualLocations + }, + expected: []Location{ + // this has two copies in the base image, which overwrites the same location + { + Coordinates: Coordinates{ + RealPath: "file-2.txt", + }, + //VirtualPath: "file-2.txt", + }, + }, + }, + { + name: "by basename glob", + runner: func(resolver FileResolver) []Location { + // links are searched, but resolve to the real files + actualLocations, err := resolver.FilesByGlob("**/file-?.txt") + assert.NoError(t, err) + return actualLocations + }, + expected: []Location{ + { + Coordinates: Coordinates{ + RealPath: "file-1.txt", + }, + //VirtualPath: "file-1.txt", + }, + { + Coordinates: Coordinates{ + RealPath: "file-2.txt", + }, + //VirtualPath: "file-2.txt", + }, + { + Coordinates: Coordinates{ + RealPath: "file-3.txt", + }, + //VirtualPath: "file-3.txt", + }, + { + Coordinates: Coordinates{ + RealPath: "parent/file-4.txt", + }, + //VirtualPath: "parent/file-4.txt", + }, + }, + }, + { + name: "by basename glob to links", + runner: func(resolver FileResolver) []Location { + actualLocations, err := resolver.FilesByGlob("**/link-*") + assert.NoError(t, err) + return actualLocations + }, + expected: []Location{ + { + Coordinates: Coordinates{ + RealPath: "file-1.txt", + }, + VirtualPath: "link-1", + ref: file.Reference{RealPath: "file-1.txt"}, + }, + { + Coordinates: Coordinates{ + RealPath: "file-2.txt", + }, + VirtualPath: "link-2", + ref: file.Reference{RealPath: "file-2.txt"}, + }, + // we already have this real file path via another link, so only one is returned + //{ + // Coordinates: Coordinates{ + // RealPath: "file-2.txt", + // }, + // VirtualPath: "link-indirect", + // ref: file.Reference{RealPath: "file-2.txt"}, + //}, + { + Coordinates: Coordinates{ + RealPath: "file-3.txt", + }, + VirtualPath: "link-within", + ref: file.Reference{RealPath: "file-3.txt"}, + }, + }, + }, + { + name: "by extension", + runner: func(resolver FileResolver) []Location { + // links are searched, but resolve to the real files + actualLocations, err := resolver.FilesByGlob("**/*.txt") + assert.NoError(t, err) + return actualLocations + }, + expected: []Location{ + { + Coordinates: Coordinates{ + RealPath: "file-1.txt", + }, + //VirtualPath: "file-1.txt", + }, + { + Coordinates: Coordinates{ + RealPath: "file-2.txt", + }, + //VirtualPath: "file-2.txt", + }, + { + Coordinates: Coordinates{ + RealPath: "file-3.txt", + }, + //VirtualPath: "file-3.txt", + }, + { + Coordinates: Coordinates{ + RealPath: "parent/file-4.txt", + }, + //VirtualPath: "parent/file-4.txt", + }, + }, + }, + { + name: "by path to degree 1 link", + runner: func(resolver FileResolver) []Location { + // links resolve to the final file + actualLocations, err := resolver.FilesByPath("/link-2") + assert.NoError(t, err) + return actualLocations + }, + expected: []Location{ + // we have multiple copies across layers + { + Coordinates: Coordinates{ + RealPath: "file-2.txt", + }, + VirtualPath: "link-2", + }, + }, + }, + { + name: "by path to degree 2 link", + runner: func(resolver FileResolver) []Location { + // multiple links resolves to the final file + actualLocations, err := resolver.FilesByPath("/link-indirect") + assert.NoError(t, err) + return actualLocations + }, + expected: []Location{ + // we have multiple copies across layers + { + Coordinates: Coordinates{ + RealPath: "file-2.txt", + }, + VirtualPath: "link-indirect", + }, + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + resolver, err := newDirectoryResolver("./test-fixtures/symlinks-from-image-symlinks-fixture", "") + require.NoError(t, err) + assert.NoError(t, err) + + actual := test.runner(resolver) + + compareLocations(t, test.expected, actual) + }) + } +} + +func TestDirectoryResolver_DoNotAddVirtualPathsToTree(t *testing.T) { + resolver, err := newDirectoryResolver("./test-fixtures/symlinks-prune-indexing", "") + require.NoError(t, err) + + allRealPaths := resolver.tree.AllRealPaths() + pathSet := file.NewPathSet(allRealPaths...) + + assert.False(t, + pathSet.Contains("/before-path/file.txt"), + "symlink destinations should only be indexed at their real path, not through their virtual (symlinked) path", + ) + + assert.False(t, + pathSet.Contains("/a-path/file.txt"), + "symlink destinations should only be indexed at their real path, not through their virtual (symlinked) path", + ) + +} + +func TestDirectoryResolver_FilesContents_errorOnDirRequest(t *testing.T) { + resolver, err := newDirectoryResolver("./test-fixtures/system_paths", "") + assert.NoError(t, err) + + var dirLoc *Location + for loc := range resolver.AllLocations() { + entry, err := resolver.index.Get(loc.ref) + require.NoError(t, err) + if entry.Metadata.IsDir { + dirLoc = &loc + break + } + } + + require.NotNil(t, dirLoc) + + reader, err := resolver.FileContentsByLocation(*dirLoc) + require.Error(t, err) + require.Nil(t, reader) +} + +func TestDirectoryResolver_AllLocations(t *testing.T) { + resolver, err := newDirectoryResolver("./test-fixtures/symlinks-from-image-symlinks-fixture", "") + assert.NoError(t, err) + + paths := strset.New() + for loc := range resolver.AllLocations() { + if strings.HasPrefix(loc.RealPath, "/") { + // ignore outside of the fixture root for now + continue + } + paths.Add(loc.RealPath) + } + expected := []string{ + "file-1.txt", + "file-2.txt", + "file-3.txt", + "link-1", + "link-2", + "link-dead", + "link-indirect", + "link-within", + "parent", + "parent-link", + "parent/file-4.txt", + } + + pathsList := paths.List() + sort.Strings(pathsList) + + assert.ElementsMatchf(t, expected, pathsList, "expected all paths to be indexed, but found different paths: \n%s", cmp.Diff(expected, paths.List())) } diff --git a/syft/source/excluding_file_resolver_test.go b/syft/source/excluding_file_resolver_test.go index 958864e2920..968d2b6a818 100644 --- a/syft/source/excluding_file_resolver_test.go +++ b/syft/source/excluding_file_resolver_test.go @@ -56,20 +56,20 @@ func TestExcludingResolver(t *testing.T) { resolver := &mockResolver{ locations: test.locations, } - excludingResolver := NewExcludingResolver(resolver, test.excludeFn) + er := NewExcludingResolver(resolver, test.excludeFn) - locations, _ := excludingResolver.FilesByPath() + locations, _ := er.FilesByPath() assert.ElementsMatch(t, locationPaths(locations), test.expected) - locations, _ = excludingResolver.FilesByGlob() + locations, _ = er.FilesByGlob() assert.ElementsMatch(t, locationPaths(locations), test.expected) - locations, _ = excludingResolver.FilesByMIMEType() + locations, _ = er.FilesByMIMEType() assert.ElementsMatch(t, locationPaths(locations), test.expected) locations = []Location{} - channel := excludingResolver.AllLocations() + channel := er.AllLocations() for location := range channel { locations = append(locations, location) } @@ -78,26 +78,26 @@ func TestExcludingResolver(t *testing.T) { diff := difference(test.locations, test.expected) for _, path := range diff { - assert.False(t, excludingResolver.HasPath(path)) - c, err := excludingResolver.FileContentsByLocation(makeLocation(path)) + assert.False(t, er.HasPath(path)) + c, err := er.FileContentsByLocation(makeLocation(path)) assert.Nil(t, c) assert.Error(t, err) - m, err := excludingResolver.FileMetadataByLocation(makeLocation(path)) + m, err := er.FileMetadataByLocation(makeLocation(path)) assert.Empty(t, m.LinkDestination) assert.Error(t, err) - l := excludingResolver.RelativeFileByPath(makeLocation(""), path) + l := er.RelativeFileByPath(makeLocation(""), path) assert.Nil(t, l) } for _, path := range test.expected { - assert.True(t, excludingResolver.HasPath(path)) - c, err := excludingResolver.FileContentsByLocation(makeLocation(path)) + assert.True(t, er.HasPath(path)) + c, err := er.FileContentsByLocation(makeLocation(path)) assert.NotNil(t, c) assert.Nil(t, err) - m, err := excludingResolver.FileMetadataByLocation(makeLocation(path)) + m, err := er.FileMetadataByLocation(makeLocation(path)) assert.NotEmpty(t, m.LinkDestination) assert.Nil(t, err) - l := excludingResolver.RelativeFileByPath(makeLocation(""), path) + l := er.RelativeFileByPath(makeLocation(""), path) assert.NotNil(t, l) } }) @@ -176,6 +176,18 @@ func (r *mockResolver) FilesByMIMEType(_ ...string) ([]Location, error) { return r.getLocations() } +func (r *mockResolver) FilesByExtension(_ ...string) ([]Location, error) { + return r.getLocations() +} + +func (r *mockResolver) FilesByBasename(_ ...string) ([]Location, error) { + return r.getLocations() +} + +func (r *mockResolver) FilesByBasenameGlob(_ ...string) ([]Location, error) { + return r.getLocations() +} + func (r *mockResolver) RelativeFileByPath(_ Location, path string) *Location { return &Location{ Coordinates: Coordinates{ diff --git a/syft/source/file_metadata.go b/syft/source/file_metadata.go index 8082c751269..0763564d0fb 100644 --- a/syft/source/file_metadata.go +++ b/syft/source/file_metadata.go @@ -1,67 +1,17 @@ package source import ( - "os" - "github.com/anchore/stereoscope/pkg/file" "github.com/anchore/stereoscope/pkg/image" - "github.com/anchore/syft/internal/log" ) -type FileMetadata struct { - Mode os.FileMode - Type FileType - UserID int - GroupID int - LinkDestination string - Size int64 - MIMEType string -} +type FileMetadata = file.Metadata -func fileMetadataByLocation(img *image.Image, location Location) (FileMetadata, error) { +func fileMetadataByLocation(img *image.Image, location Location) (file.Metadata, error) { entry, err := img.FileCatalog.Get(location.ref) if err != nil { return FileMetadata{}, err } - return FileMetadata{ - Mode: entry.Metadata.Mode, - Type: newFileTypeFromTarHeaderTypeFlag(entry.Metadata.TypeFlag), - UserID: entry.Metadata.UserID, - GroupID: entry.Metadata.GroupID, - LinkDestination: entry.Metadata.Linkname, - Size: entry.Metadata.Size, - MIMEType: entry.Metadata.MIMEType, - }, nil -} - -func fileMetadataFromPath(path string, info os.FileInfo, withMIMEType bool) FileMetadata { - var mimeType string - uid, gid := GetXid(info) - - if withMIMEType { - f, err := os.Open(path) - if err != nil { - // TODO: it may be that the file is inaccessible, however, this is not an error or a warning. In the future we need to track these as known-unknowns - f = nil - } else { - defer func() { - if err := f.Close(); err != nil { - log.Warnf("unable to close file while obtaining metadata: %s", path) - } - }() - } - - mimeType = file.MIMEType(f) - } - - return FileMetadata{ - Mode: info.Mode(), - Type: newFileTypeFromMode(info.Mode()), - // unsupported across platforms - UserID: uid, - GroupID: gid, - Size: info.Size(), - MIMEType: mimeType, - } + return entry.Metadata, nil } diff --git a/syft/source/file_metadata_test.go b/syft/source/file_metadata_test.go deleted file mode 100644 index 3bdedb42ff1..00000000000 --- a/syft/source/file_metadata_test.go +++ /dev/null @@ -1,57 +0,0 @@ -//go:build !windows -// +build !windows - -package source - -import ( - "os" - "testing" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -func Test_fileMetadataFromPath(t *testing.T) { - - tests := []struct { - path string - withMIMEType bool - expectedType string - expectedMIMEType string - }{ - { - path: "test-fixtures/symlinks-simple/readme", - withMIMEType: true, - expectedType: "RegularFile", - expectedMIMEType: "text/plain", - }, - { - path: "test-fixtures/symlinks-simple/link_to_new_readme", - withMIMEType: true, - expectedType: "SymbolicLink", - expectedMIMEType: "text/plain", - }, - { - path: "test-fixtures/symlinks-simple/readme", - withMIMEType: false, - expectedType: "RegularFile", - expectedMIMEType: "", - }, - { - path: "test-fixtures/symlinks-simple/link_to_new_readme", - withMIMEType: false, - expectedType: "SymbolicLink", - expectedMIMEType: "", - }, - } - for _, test := range tests { - t.Run(test.path, func(t *testing.T) { - info, err := os.Lstat(test.path) - require.NoError(t, err) - - actual := fileMetadataFromPath(test.path, info, test.withMIMEType) - assert.Equal(t, test.expectedMIMEType, actual.MIMEType) - assert.Equal(t, test.expectedType, string(actual.Type)) - }) - } -} diff --git a/syft/source/file_resolver.go b/syft/source/file_resolver.go index b6ccb481595..63b5dc90b04 100644 --- a/syft/source/file_resolver.go +++ b/syft/source/file_resolver.go @@ -24,18 +24,36 @@ type FileMetadataResolver interface { // FilePathResolver knows how to get a Location for given string paths and globs type FilePathResolver interface { // HasPath indicates if the given path exists in the underlying source. + // The implementation for this may vary, however, generally the following considerations should be made: + // - full symlink resolution should be performed on all requests + // - returns locations for any file or directory HasPath(string) bool - // FilesByPath fetches a set of file references which have the given path (for an image, there may be multiple matches) + + // FilesByPath fetches a set of file references which have the given path (for an image, there may be multiple matches). + // The implementation for this may vary, however, generally the following considerations should be made: + // - full symlink resolution should be performed on all requests + // - only returns locations to files (NOT directories) FilesByPath(paths ...string) ([]Location, error) - // FilesByGlob fetches a set of file references which the given glob matches + + // FilesByGlob fetches a set of file references for the given glob matches + // The implementation for this may vary, however, generally the following considerations should be made: + // - full symlink resolution should be performed on all requests + // - if multiple paths to the same file are found, the best single match should be returned + // - only returns locations to files (NOT directories) FilesByGlob(patterns ...string) ([]Location, error) - // FilesByMIMEType fetches a set of file references which the contents have been classified as one of the given MIME Types + + // FilesByMIMEType fetches a set of file references which the contents have been classified as one of the given MIME Types. FilesByMIMEType(types ...string) ([]Location, error) + // RelativeFileByPath fetches a single file at the given path relative to the layer squash of the given reference. // This is helpful when attempting to find a file that is in the same layer or lower as another file. RelativeFileByPath(_ Location, path string) *Location } type FileLocationResolver interface { + // AllLocations returns a channel of all file references from the underlying source. + // The implementation for this may vary, however, generally the following considerations should be made: + // - NO symlink resolution should be performed on results + // - returns locations for any file or directory AllLocations() <-chan Location } diff --git a/syft/source/file_type.go b/syft/source/file_type.go deleted file mode 100644 index 370ea0f8df0..00000000000 --- a/syft/source/file_type.go +++ /dev/null @@ -1,70 +0,0 @@ -package source - -import ( - "archive/tar" - "os" -) - -const ( - RegularFile FileType = "RegularFile" - // IrregularFile is how syft defines files that are neither regular, symbolic or directory. - // For ref: the seven standard Unix file types are regular, directory, symbolic link, - // FIFO special, block special, character special, and socket as defined by POSIX. - IrregularFile FileType = "IrregularFile" - HardLink FileType = "HardLink" - SymbolicLink FileType = "SymbolicLink" - CharacterDevice FileType = "CharacterDevice" - BlockDevice FileType = "BlockDevice" - Directory FileType = "Directory" - FIFONode FileType = "FIFONode" - Socket FileType = "Socket" -) - -type FileType string - -func newFileTypeFromTarHeaderTypeFlag(flag byte) FileType { - switch flag { - case tar.TypeReg, tar.TypeRegA: - return RegularFile - case tar.TypeLink: - return HardLink - case tar.TypeSymlink: - return SymbolicLink - case tar.TypeChar: - return CharacterDevice - case tar.TypeBlock: - return BlockDevice - case tar.TypeDir: - return Directory - case tar.TypeFifo: - return FIFONode - } - return IrregularFile -} - -func newFileTypeFromMode(mode os.FileMode) FileType { - switch { - case isSet(mode, os.ModeSymlink): - return SymbolicLink - case isSet(mode, os.ModeIrregular): - return IrregularFile - case isSet(mode, os.ModeCharDevice): - return CharacterDevice - case isSet(mode, os.ModeDevice): - return BlockDevice - case isSet(mode, os.ModeNamedPipe): - return FIFONode - case isSet(mode, os.ModeSocket): - return Socket - case mode.IsDir(): - return Directory - case mode.IsRegular(): - return RegularFile - default: - return IrregularFile - } -} - -func isSet(mode, field os.FileMode) bool { - return mode&field != 0 -} diff --git a/syft/source/all_layers_resolver.go b/syft/source/image_all_layers_resolver.go similarity index 59% rename from syft/source/all_layers_resolver.go rename to syft/source/image_all_layers_resolver.go index b01d77fab9b..ca40b12718c 100644 --- a/syft/source/all_layers_resolver.go +++ b/syft/source/image_all_layers_resolver.go @@ -1,7 +1,6 @@ package source import ( - "archive/tar" "fmt" "io" @@ -11,16 +10,16 @@ import ( "github.com/anchore/syft/internal/log" ) -var _ FileResolver = (*allLayersResolver)(nil) +var _ FileResolver = (*imageAllLayersResolver)(nil) -// allLayersResolver implements path and content access for the AllLayers source option for container image data sources. -type allLayersResolver struct { +// imageAllLayersResolver implements path and content access for the AllLayers source option for container image data sources. +type imageAllLayersResolver struct { img *image.Image layers []int } // newAllLayersResolver returns a new resolver from the perspective of all image layers for the given image. -func newAllLayersResolver(img *image.Image) (*allLayersResolver, error) { +func newAllLayersResolver(img *image.Image) (*imageAllLayersResolver, error) { if len(img.Layers) == 0 { return nil, fmt.Errorf("the image does not contain any layers") } @@ -29,14 +28,14 @@ func newAllLayersResolver(img *image.Image) (*allLayersResolver, error) { for idx := range img.Layers { layers = append(layers, idx) } - return &allLayersResolver{ + return &imageAllLayersResolver{ img: img, layers: layers, }, nil } // HasPath indicates if the given path exists in the underlying source. -func (r *allLayersResolver) HasPath(path string) bool { +func (r *imageAllLayersResolver) HasPath(path string) bool { p := file.Path(path) for _, layerIdx := range r.layers { tree := r.img.Layers[layerIdx].Tree @@ -47,7 +46,7 @@ func (r *allLayersResolver) HasPath(path string) bool { return false } -func (r *allLayersResolver) fileByRef(ref file.Reference, uniqueFileIDs file.ReferenceSet, layerIdx int) ([]file.Reference, error) { +func (r *imageAllLayersResolver) fileByRef(ref file.Reference, uniqueFileIDs file.ReferenceSet, layerIdx int) ([]file.Reference, error) { uniqueFiles := make([]file.Reference, 0) // since there is potentially considerable work for each symlink/hardlink that needs to be resolved, let's check to see if this is a symlink/hardlink first @@ -56,7 +55,7 @@ func (r *allLayersResolver) fileByRef(ref file.Reference, uniqueFileIDs file.Ref return nil, fmt.Errorf("unable to fetch metadata (ref=%+v): %w", ref, err) } - if entry.Metadata.TypeFlag == tar.TypeLink || entry.Metadata.TypeFlag == tar.TypeSymlink { + if entry.Metadata.Type == file.TypeHardLink || entry.Metadata.Type == file.TypeSymLink { // a link may resolve in this layer or higher, assuming a squashed tree is used to search // we should search all possible resolutions within the valid source for _, subLayerIdx := range r.layers[layerIdx:] { @@ -64,9 +63,9 @@ func (r *allLayersResolver) fileByRef(ref file.Reference, uniqueFileIDs file.Ref if err != nil { return nil, fmt.Errorf("failed to resolve link from layer (layer=%d ref=%+v): %w", subLayerIdx, ref, err) } - if resolvedRef != nil && !uniqueFileIDs.Contains(*resolvedRef) { - uniqueFileIDs.Add(*resolvedRef) - uniqueFiles = append(uniqueFiles, *resolvedRef) + if resolvedRef.HasReference() && !uniqueFileIDs.Contains(*resolvedRef.Reference) { + uniqueFileIDs.Add(*resolvedRef.Reference) + uniqueFiles = append(uniqueFiles, *resolvedRef.Reference) } } } else if !uniqueFileIDs.Contains(ref) { @@ -78,18 +77,17 @@ func (r *allLayersResolver) fileByRef(ref file.Reference, uniqueFileIDs file.Ref } // FilesByPath returns all file.References that match the given paths from any layer in the image. -func (r *allLayersResolver) FilesByPath(paths ...string) ([]Location, error) { +func (r *imageAllLayersResolver) FilesByPath(paths ...string) ([]Location, error) { uniqueFileIDs := file.NewFileReferenceSet() uniqueLocations := make([]Location, 0) for _, path := range paths { for idx, layerIdx := range r.layers { - tree := r.img.Layers[layerIdx].Tree - _, ref, err := tree.File(file.Path(path), filetree.FollowBasenameLinks, filetree.DoNotFollowDeadBasenameLinks) + ref, err := r.img.Layers[layerIdx].SearchContext.SearchByPath(path, filetree.FollowBasenameLinks, filetree.DoNotFollowDeadBasenameLinks) if err != nil { return nil, err } - if ref == nil { + if !ref.HasReference() { // no file found, keep looking through layers continue } @@ -97,8 +95,8 @@ func (r *allLayersResolver) FilesByPath(paths ...string) ([]Location, error) { // don't consider directories (special case: there is no path information for /) if ref.RealPath == "/" { continue - } else if r.img.FileCatalog.Exists(*ref) { - metadata, err := r.img.FileCatalog.Get(*ref) + } else if r.img.FileCatalog.Exists(*ref.Reference) { + metadata, err := r.img.FileCatalog.Get(*ref.Reference) if err != nil { return nil, fmt.Errorf("unable to get file metadata for path=%q: %w", ref.RealPath, err) } @@ -107,7 +105,7 @@ func (r *allLayersResolver) FilesByPath(paths ...string) ([]Location, error) { } } - results, err := r.fileByRef(*ref, uniqueFileIDs, idx) + results, err := r.fileByRef(*ref.Reference, uniqueFileIDs, idx) if err != nil { return nil, err } @@ -120,37 +118,42 @@ func (r *allLayersResolver) FilesByPath(paths ...string) ([]Location, error) { } // FilesByGlob returns all file.References that match the given path glob pattern from any layer in the image. -func (r *allLayersResolver) FilesByGlob(patterns ...string) ([]Location, error) { +// nolint:gocognit +func (r *imageAllLayersResolver) FilesByGlob(patterns ...string) ([]Location, error) { uniqueFileIDs := file.NewFileReferenceSet() uniqueLocations := make([]Location, 0) for _, pattern := range patterns { for idx, layerIdx := range r.layers { - results, err := r.img.Layers[layerIdx].SquashedTree.FilesByGlob(pattern, filetree.FollowBasenameLinks, filetree.DoNotFollowDeadBasenameLinks) + results, err := r.img.Layers[layerIdx].SquashedSearchContext.SearchByGlob(pattern, filetree.FollowBasenameLinks, filetree.DoNotFollowDeadBasenameLinks) if err != nil { return nil, fmt.Errorf("failed to resolve files by glob (%s): %w", pattern, err) } for _, result := range results { + if !result.HasReference() { + continue + } // don't consider directories (special case: there is no path information for /) if result.RealPath == "/" { continue - } else if r.img.FileCatalog.Exists(result.Reference) { - metadata, err := r.img.FileCatalog.Get(result.Reference) + } else if r.img.FileCatalog.Exists(*result.Reference) { + metadata, err := r.img.FileCatalog.Get(*result.Reference) if err != nil { - return nil, fmt.Errorf("unable to get file metadata for path=%q: %w", result.MatchPath, err) + return nil, fmt.Errorf("unable to get file metadata for path=%q: %w", result.RequestPath, err) } + // don't consider directories if metadata.Metadata.IsDir { continue } } - refResults, err := r.fileByRef(result.Reference, uniqueFileIDs, idx) + refResults, err := r.fileByRef(*result.Reference, uniqueFileIDs, idx) if err != nil { return nil, err } for _, refResult := range refResults { - uniqueLocations = append(uniqueLocations, NewLocationFromImage(string(result.MatchPath), refResult, r.img)) + uniqueLocations = append(uniqueLocations, NewLocationFromImage(string(result.RequestPath), refResult, r.img)) } } } @@ -161,36 +164,33 @@ func (r *allLayersResolver) FilesByGlob(patterns ...string) ([]Location, error) // RelativeFileByPath fetches a single file at the given path relative to the layer squash of the given reference. // This is helpful when attempting to find a file that is in the same layer or lower as another file. -func (r *allLayersResolver) RelativeFileByPath(location Location, path string) *Location { - entry, err := r.img.FileCatalog.Get(location.ref) - if err != nil { - return nil - } +func (r *imageAllLayersResolver) RelativeFileByPath(location Location, path string) *Location { + layer := r.img.FileCatalog.Layer(location.ref) - exists, relativeRef, err := entry.Layer.SquashedTree.File(file.Path(path), filetree.FollowBasenameLinks) + exists, relativeRef, err := layer.SquashedTree.File(file.Path(path), filetree.FollowBasenameLinks) if err != nil { log.Errorf("failed to find path=%q in squash: %+w", path, err) return nil } - if !exists && relativeRef == nil { + if !exists && !relativeRef.HasReference() { return nil } - relativeLocation := NewLocationFromImage(path, *relativeRef, r.img) + relativeLocation := NewLocationFromImage(path, *relativeRef.Reference, r.img) return &relativeLocation } // FileContentsByLocation fetches file contents for a single file reference, irregardless of the source layer. // If the path does not exist an error is returned. -func (r *allLayersResolver) FileContentsByLocation(location Location) (io.ReadCloser, error) { +func (r *imageAllLayersResolver) FileContentsByLocation(location Location) (io.ReadCloser, error) { entry, err := r.img.FileCatalog.Get(location.ref) if err != nil { return nil, fmt.Errorf("unable to get metadata for path=%q from file catalog: %w", location.RealPath, err) } - switch entry.Metadata.TypeFlag { - case tar.TypeSymlink, tar.TypeLink: + switch entry.Metadata.Type { + case file.TypeSymLink, file.TypeHardLink: // the location we are searching may be a symlink, we should always work with the resolved file newLocation := r.RelativeFileByPath(location, location.VirtualPath) if newLocation == nil { @@ -198,36 +198,48 @@ func (r *allLayersResolver) FileContentsByLocation(location Location) (io.ReadCl return nil, fmt.Errorf("no contents for location=%q", location.VirtualPath) } location = *newLocation + case file.TypeDirectory: + return nil, fmt.Errorf("cannot read contents of non-file %q", location.ref.RealPath) } return r.img.FileContentsByRef(location.ref) } -func (r *allLayersResolver) FilesByMIMEType(types ...string) ([]Location, error) { - var locations []Location - for _, layerIdx := range r.layers { - layer := r.img.Layers[layerIdx] +func (r *imageAllLayersResolver) FilesByMIMEType(types ...string) ([]Location, error) { + uniqueFileIDs := file.NewFileReferenceSet() + uniqueLocations := make([]Location, 0) - refs, err := layer.FilesByMIMEType(types...) + for idx, layerIdx := range r.layers { + refs, err := r.img.Layers[layerIdx].SearchContext.SearchByMIMEType(types...) if err != nil { return nil, err } for _, ref := range refs { - locations = append(locations, NewLocationFromImage(string(ref.RealPath), ref, r.img)) + if !ref.HasReference() { + continue + } + + refResults, err := r.fileByRef(*ref.Reference, uniqueFileIDs, idx) + if err != nil { + return nil, err + } + for _, refResult := range refResults { + uniqueLocations = append(uniqueLocations, NewLocationFromImage(string(ref.RequestPath), refResult, r.img)) + } } } - return locations, nil + return uniqueLocations, nil } -func (r *allLayersResolver) AllLocations() <-chan Location { +func (r *imageAllLayersResolver) AllLocations() <-chan Location { results := make(chan Location) go func() { defer close(results) for _, layerIdx := range r.layers { tree := r.img.Layers[layerIdx].Tree - for _, ref := range tree.AllFiles(file.AllTypes...) { + for _, ref := range tree.AllFiles(file.AllTypes()...) { results <- NewLocationFromImage(string(ref.RealPath), ref, r.img) } } @@ -235,6 +247,6 @@ func (r *allLayersResolver) AllLocations() <-chan Location { return results } -func (r *allLayersResolver) FileMetadataByLocation(location Location) (FileMetadata, error) { +func (r *imageAllLayersResolver) FileMetadataByLocation(location Location) (FileMetadata, error) { return fileMetadataByLocation(r.img, location) } diff --git a/syft/source/all_layers_resolver_test.go b/syft/source/image_all_layers_resolver_test.go similarity index 63% rename from syft/source/all_layers_resolver_test.go rename to syft/source/image_all_layers_resolver_test.go index 2eb17b82b24..1ba11304ddb 100644 --- a/syft/source/all_layers_resolver_test.go +++ b/syft/source/image_all_layers_resolver_test.go @@ -2,8 +2,11 @@ package source import ( "io" + "sort" "testing" + "github.com/google/go-cmp/cmp" + "github.com/scylladb/go-set/strset" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -124,13 +127,9 @@ func TestAllLayersResolver_FilesByPath(t *testing.T) { t.Errorf("we should always prefer real paths over ones with links") } - entry, err := img.FileCatalog.Get(actual.ref) - if err != nil { - t.Fatalf("failed to get metadata: %+v", err) - } - - if entry.Layer.Metadata.Index != expected.layer { - t.Errorf("bad resolve layer: '%d'!='%d'", entry.Layer.Metadata.Index, expected.layer) + layer := img.FileCatalog.Layer(actual.ref) + if layer.Metadata.Index != expected.layer { + t.Errorf("bad resolve layer: '%d'!='%d'", layer.Metadata.Index, expected.layer) } } }) @@ -231,13 +230,10 @@ func TestAllLayersResolver_FilesByGlob(t *testing.T) { t.Errorf("we should always prefer real paths over ones with links") } - entry, err := img.FileCatalog.Get(actual.ref) - if err != nil { - t.Fatalf("failed to get metadata: %+v", err) - } + layer := img.FileCatalog.Layer(actual.ref) - if entry.Layer.Metadata.Index != expected.layer { - t.Errorf("bad resolve layer: '%d'!='%d'", entry.Layer.Metadata.Index, expected.layer) + if layer.Metadata.Index != expected.layer { + t.Errorf("bad resolve layer: '%d'!='%d'", layer.Metadata.Index, expected.layer) } } }) @@ -361,6 +357,30 @@ func TestAllLayersImageResolver_FilesContents(t *testing.T) { } } +func TestAllLayersImageResolver_FilesContents_errorOnDirRequest(t *testing.T) { + + img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks") + + resolver, err := newAllLayersResolver(img) + assert.NoError(t, err) + + var dirLoc *Location + for loc := range resolver.AllLocations() { + entry, err := resolver.img.FileCatalog.Get(loc.ref) + require.NoError(t, err) + if entry.Metadata.IsDir { + dirLoc = &loc + break + } + } + + require.NotNil(t, dirLoc) + + reader, err := resolver.FileContentsByLocation(*dirLoc) + require.Error(t, err) + require.Nil(t, reader) +} + func Test_imageAllLayersResolver_resolvesLinks(t *testing.T) { tests := []struct { name string @@ -407,6 +427,8 @@ func Test_imageAllLayersResolver_resolvesLinks(t *testing.T) { }, VirtualPath: "/file-2.txt", }, + // note: we're de-duping the redundant access to file-3.txt + // ... (there would usually be two copies) { Coordinates: Coordinates{ RealPath: "/file-3.txt", @@ -437,7 +459,7 @@ func Test_imageAllLayersResolver_resolvesLinks(t *testing.T) { }, }, { - name: "by glob", + name: "by glob to links", runner: func(resolver FileResolver) []Location { // links are searched, but resolve to the real files actualLocations, err := resolver.FilesByGlob("*ink-*") @@ -473,6 +495,131 @@ func Test_imageAllLayersResolver_resolvesLinks(t *testing.T) { }, }, }, + { + name: "by basename", + runner: func(resolver FileResolver) []Location { + // links are searched, but resolve to the real files + actualLocations, err := resolver.FilesByGlob("**/file-2.txt") + assert.NoError(t, err) + return actualLocations + }, + expected: []Location{ + // copy 1 + { + Coordinates: Coordinates{ + RealPath: "/file-2.txt", + }, + VirtualPath: "/file-2.txt", + }, + // copy 2 + { + Coordinates: Coordinates{ + RealPath: "/file-2.txt", + }, + VirtualPath: "/file-2.txt", + }, + }, + }, + { + name: "by basename glob", + runner: func(resolver FileResolver) []Location { + // links are searched, but resolve to the real files + actualLocations, err := resolver.FilesByGlob("**/file-?.txt") + assert.NoError(t, err) + return actualLocations + }, + expected: []Location{ + { + Coordinates: Coordinates{ + RealPath: "/file-1.txt", + }, + VirtualPath: "/file-1.txt", + }, + // copy 1 + { + Coordinates: Coordinates{ + RealPath: "/file-2.txt", + }, + VirtualPath: "/file-2.txt", + }, + // copy 2 + { + Coordinates: Coordinates{ + RealPath: "/file-2.txt", + }, + VirtualPath: "/file-2.txt", + }, + { + Coordinates: Coordinates{ + RealPath: "/file-3.txt", + }, + VirtualPath: "/file-3.txt", + }, + { + Coordinates: Coordinates{ + RealPath: "/parent/file-4.txt", + }, + VirtualPath: "/parent/file-4.txt", + }, + // when we copy into the link path, the same file-4.txt is copied + { + Coordinates: Coordinates{ + RealPath: "/parent/file-4.txt", + }, + VirtualPath: "/parent/file-4.txt", + }, + }, + }, + { + name: "by extension", + runner: func(resolver FileResolver) []Location { + // links are searched, but resolve to the real files + actualLocations, err := resolver.FilesByGlob("**/*.txt") + assert.NoError(t, err) + return actualLocations + }, + expected: []Location{ + { + Coordinates: Coordinates{ + RealPath: "/file-1.txt", + }, + VirtualPath: "/file-1.txt", + }, + // copy 1 + { + Coordinates: Coordinates{ + RealPath: "/file-2.txt", + }, + VirtualPath: "/file-2.txt", + }, + // copy 2 + { + Coordinates: Coordinates{ + RealPath: "/file-2.txt", + }, + VirtualPath: "/file-2.txt", + }, + { + Coordinates: Coordinates{ + RealPath: "/file-3.txt", + }, + VirtualPath: "/file-3.txt", + }, + { + Coordinates: Coordinates{ + RealPath: "/parent/file-4.txt", + }, + VirtualPath: "/parent/file-4.txt", + }, + // when we copy into the link path, the same file-4.txt is copied + { + Coordinates: Coordinates{ + RealPath: "/parent/file-4.txt", + }, + VirtualPath: "/parent/file-4.txt", + }, + }, + }, { name: "by path to degree 1 link", runner: func(resolver FileResolver) []Location { @@ -531,13 +678,146 @@ func Test_imageAllLayersResolver_resolvesLinks(t *testing.T) { resolver, err := newAllLayersResolver(img) assert.NoError(t, err) - actualLocations := test.runner(resolver) - assert.Len(t, actualLocations, len(test.expected)) - for i, actual := range actualLocations { - assert.Equal(t, test.expected[i].RealPath, actual.RealPath) - assert.Equal(t, test.expected[i].VirtualPath, actual.VirtualPath) - } + actual := test.runner(resolver) + + compareLocations(t, test.expected, actual) }) } } + +func TestAllLayersResolver_AllLocations(t *testing.T) { + img := imagetest.GetFixtureImage(t, "docker-archive", "image-files-deleted") + + resolver, err := newAllLayersResolver(img) + assert.NoError(t, err) + + paths := strset.New() + for loc := range resolver.AllLocations() { + paths.Add(loc.RealPath) + } + expected := []string{ + "/Dockerfile", + "/file-1.txt", + "/file-3.txt", + "/target", + "/target/file-2.txt", + + "/.wh.bin", + "/.wh.file-1.txt", + "/.wh.lib", + "/bin", + "/bin/arch", + "/bin/ash", + "/bin/base64", + "/bin/bbconfig", + "/bin/busybox", + "/bin/cat", + "/bin/chattr", + "/bin/chgrp", + "/bin/chmod", + "/bin/chown", + "/bin/cp", + "/bin/date", + "/bin/dd", + "/bin/df", + "/bin/dmesg", + "/bin/dnsdomainname", + "/bin/dumpkmap", + "/bin/echo", + "/bin/ed", + "/bin/egrep", + "/bin/false", + "/bin/fatattr", + "/bin/fdflush", + "/bin/fgrep", + "/bin/fsync", + "/bin/getopt", + "/bin/grep", + "/bin/gunzip", + "/bin/gzip", + "/bin/hostname", + "/bin/ionice", + "/bin/iostat", + "/bin/ipcalc", + "/bin/kbd_mode", + "/bin/kill", + "/bin/link", + "/bin/linux32", + "/bin/linux64", + "/bin/ln", + "/bin/login", + "/bin/ls", + "/bin/lsattr", + "/bin/lzop", + "/bin/makemime", + "/bin/mkdir", + "/bin/mknod", + "/bin/mktemp", + "/bin/more", + "/bin/mount", + "/bin/mountpoint", + "/bin/mpstat", + "/bin/mv", + "/bin/netstat", + "/bin/nice", + "/bin/pidof", + "/bin/ping", + "/bin/ping6", + "/bin/pipe_progress", + "/bin/printenv", + "/bin/ps", + "/bin/pwd", + "/bin/reformime", + "/bin/rev", + "/bin/rm", + "/bin/rmdir", + "/bin/run-parts", + "/bin/sed", + "/bin/setpriv", + "/bin/setserial", + "/bin/sh", + "/bin/sleep", + "/bin/stat", + "/bin/stty", + "/bin/su", + "/bin/sync", + "/bin/tar", + "/bin/touch", + "/bin/true", + "/bin/umount", + "/bin/uname", + "/bin/usleep", + "/bin/watch", + "/bin/zcat", + "/lib", + "/lib/apk", + "/lib/apk/db", + "/lib/apk/db/installed", + "/lib/apk/db/lock", + "/lib/apk/db/scripts.tar", + "/lib/apk/db/triggers", + "/lib/apk/exec", + "/lib/firmware", + "/lib/ld-musl-x86_64.so.1", + "/lib/libapk.so.3.12.0", + "/lib/libc.musl-x86_64.so.1", + "/lib/libcrypto.so.3", + "/lib/libssl.so.3", + "/lib/libz.so.1", + "/lib/libz.so.1.2.13", + "/lib/mdev", + "/lib/modules-load.d", + "/lib/sysctl.d", + "/lib/sysctl.d/00-alpine.conf", + } + + // depending on how the image is built (either from linux or mac), sys and proc might accidentally be added to the image. + // this isn't important for the test, so we remove them. + paths.Remove("/proc", "/sys", "/dev", "/etc") + + pathsList := paths.List() + sort.Strings(pathsList) + + assert.ElementsMatchf(t, expected, pathsList, "expected all paths to be indexed, but found different paths: \n%s", cmp.Diff(expected, paths.List())) +} diff --git a/syft/source/image_squash_resolver.go b/syft/source/image_squash_resolver.go index 8977023a102..d62927b309c 100644 --- a/syft/source/image_squash_resolver.go +++ b/syft/source/image_squash_resolver.go @@ -1,7 +1,6 @@ package source import ( - "archive/tar" "fmt" "io" @@ -39,12 +38,11 @@ func (r *imageSquashResolver) FilesByPath(paths ...string) ([]Location, error) { uniqueLocations := make([]Location, 0) for _, path := range paths { - tree := r.img.SquashedTree() - _, ref, err := tree.File(file.Path(path), filetree.FollowBasenameLinks) + ref, err := r.img.SquashedSearchContext.SearchByPath(path, filetree.FollowBasenameLinks) if err != nil { return nil, err } - if ref == nil { + if !ref.HasReference() { // no file found, keep looking through layers continue } @@ -52,25 +50,26 @@ func (r *imageSquashResolver) FilesByPath(paths ...string) ([]Location, error) { // don't consider directories (special case: there is no path information for /) if ref.RealPath == "/" { continue - } else if r.img.FileCatalog.Exists(*ref) { - metadata, err := r.img.FileCatalog.Get(*ref) + } else if r.img.FileCatalog.Exists(*ref.Reference) { + metadata, err := r.img.FileCatalog.Get(*ref.Reference) if err != nil { return nil, fmt.Errorf("unable to get file metadata for path=%q: %w", ref.RealPath, err) } + // don't consider directories if metadata.Metadata.IsDir { continue } } // a file may be a symlink, process it as such and resolve it - resolvedRef, err := r.img.ResolveLinkByImageSquash(*ref) + resolvedRef, err := r.img.ResolveLinkByImageSquash(*ref.Reference) if err != nil { return nil, fmt.Errorf("failed to resolve link from img (ref=%+v): %w", ref, err) } - if resolvedRef != nil && !uniqueFileIDs.Contains(*resolvedRef) { - uniqueFileIDs.Add(*resolvedRef) - uniqueLocations = append(uniqueLocations, NewLocationFromImage(path, *resolvedRef, r.img)) + if resolvedRef.HasReference() && !uniqueFileIDs.Contains(*resolvedRef.Reference) { + uniqueFileIDs.Add(*resolvedRef.Reference) + uniqueLocations = append(uniqueLocations, NewLocationFromImage(path, *resolvedRef.Reference, r.img)) } } @@ -78,41 +77,47 @@ func (r *imageSquashResolver) FilesByPath(paths ...string) ([]Location, error) { } // FilesByGlob returns all file.References that match the given path glob pattern within the squashed representation of the image. +// nolint:gocognit func (r *imageSquashResolver) FilesByGlob(patterns ...string) ([]Location, error) { uniqueFileIDs := file.NewFileReferenceSet() uniqueLocations := make([]Location, 0) for _, pattern := range patterns { - results, err := r.img.SquashedTree().FilesByGlob(pattern, filetree.FollowBasenameLinks) + results, err := r.img.SquashedSearchContext.SearchByGlob(pattern, filetree.FollowBasenameLinks) if err != nil { return nil, fmt.Errorf("failed to resolve files by glob (%s): %w", pattern, err) } for _, result := range results { + if !result.HasReference() { + continue + } // don't consider directories (special case: there is no path information for /) - if result.MatchPath == "/" { + if result.RealPath == "/" { continue } - if r.img.FileCatalog.Exists(result.Reference) { - metadata, err := r.img.FileCatalog.Get(result.Reference) + if r.img.FileCatalog.Exists(*result.Reference) { + metadata, err := r.img.FileCatalog.Get(*result.Reference) if err != nil { - return nil, fmt.Errorf("unable to get file metadata for path=%q: %w", result.MatchPath, err) + return nil, fmt.Errorf("unable to get file metadata for path=%q: %w", result.RequestPath, err) } + // don't consider directories if metadata.Metadata.IsDir { continue } } - - resolvedLocations, err := r.FilesByPath(string(result.MatchPath)) + // TODO: alex: can't we just use the result.Reference here instead? + resolvedLocations, err := r.FilesByPath(string(result.RequestPath)) if err != nil { return nil, fmt.Errorf("failed to find files by path (result=%+v): %w", result, err) } for _, resolvedLocation := range resolvedLocations { - if !uniqueFileIDs.Contains(resolvedLocation.ref) { - uniqueFileIDs.Add(resolvedLocation.ref) - uniqueLocations = append(uniqueLocations, resolvedLocation) + if uniqueFileIDs.Contains(resolvedLocation.ref) { + continue } + uniqueFileIDs.Add(resolvedLocation.ref) + uniqueLocations = append(uniqueLocations, resolvedLocation) } } } @@ -143,8 +148,8 @@ func (r *imageSquashResolver) FileContentsByLocation(location Location) (io.Read return nil, fmt.Errorf("unable to get metadata for path=%q from file catalog: %w", location.RealPath, err) } - switch entry.Metadata.TypeFlag { - case tar.TypeSymlink, tar.TypeLink: + switch entry.Metadata.Type { + case file.TypeSymLink, file.TypeHardLink: // the location we are searching may be a symlink, we should always work with the resolved file locations, err := r.FilesByPath(location.RealPath) if err != nil { @@ -159,6 +164,8 @@ func (r *imageSquashResolver) FileContentsByLocation(location Location) (io.Read default: return nil, fmt.Errorf("link resolution resulted in multiple results while resolving content location: %+v", location) } + case file.TypeDirectory: + return nil, fmt.Errorf("unable to get file contents for directory: %+v", location) } return r.img.FileContentsByRef(location.ref) @@ -168,7 +175,7 @@ func (r *imageSquashResolver) AllLocations() <-chan Location { results := make(chan Location) go func() { defer close(results) - for _, ref := range r.img.SquashedTree().AllFiles(file.AllTypes...) { + for _, ref := range r.img.SquashedTree().AllFiles(file.AllTypes()...) { results <- NewLocationFromImage(string(ref.RealPath), ref, r.img) } }() @@ -176,17 +183,27 @@ func (r *imageSquashResolver) AllLocations() <-chan Location { } func (r *imageSquashResolver) FilesByMIMEType(types ...string) ([]Location, error) { - refs, err := r.img.FilesByMIMETypeFromSquash(types...) + refs, err := r.img.SquashedSearchContext.SearchByMIMEType(types...) if err != nil { return nil, err } - var locations []Location + uniqueFileIDs := file.NewFileReferenceSet() + uniqueLocations := make([]Location, 0) + for _, ref := range refs { - locations = append(locations, NewLocationFromImage(string(ref.RealPath), ref, r.img)) + if ref.HasReference() { + if uniqueFileIDs.Contains(*ref.Reference) { + continue + } + location := NewLocationFromImage(string(ref.RequestPath), *ref.Reference, r.img) + + uniqueFileIDs.Add(*ref.Reference) + uniqueLocations = append(uniqueLocations, location) + } } - return locations, nil + return uniqueLocations, nil } func (r *imageSquashResolver) FileMetadataByLocation(location Location) (FileMetadata, error) { diff --git a/syft/source/image_squash_resolver_test.go b/syft/source/image_squash_resolver_test.go index 7bb7c958769..cfbeff1677f 100644 --- a/syft/source/image_squash_resolver_test.go +++ b/syft/source/image_squash_resolver_test.go @@ -2,12 +2,16 @@ package source import ( "io" + "sort" "testing" + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" "github.com/scylladb/go-set/strset" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "github.com/anchore/stereoscope/pkg/file" "github.com/anchore/stereoscope/pkg/imagetest" ) @@ -114,13 +118,10 @@ func TestImageSquashResolver_FilesByPath(t *testing.T) { t.Errorf("we should always prefer real paths over ones with links") } - entry, err := img.FileCatalog.Get(actual.ref) - if err != nil { - t.Fatalf("failed to get metadata: %+v", err) - } + layer := img.FileCatalog.Layer(actual.ref) - if entry.Layer.Metadata.Index != c.resolveLayer { - t.Errorf("bad resolve layer: '%d'!='%d'", entry.Layer.Metadata.Index, c.resolveLayer) + if layer.Metadata.Index != c.resolveLayer { + t.Errorf("bad resolve layer: '%d'!='%d'", layer.Metadata.Index, c.resolveLayer) } }) } @@ -219,13 +220,10 @@ func TestImageSquashResolver_FilesByGlob(t *testing.T) { t.Errorf("we should always prefer real paths over ones with links") } - entry, err := img.FileCatalog.Get(actual.ref) - if err != nil { - t.Fatalf("failed to get metadata: %+v", err) - } + layer := img.FileCatalog.Layer(actual.ref) - if entry.Layer.Metadata.Index != c.resolveLayer { - t.Errorf("bad resolve layer: '%d'!='%d'", entry.Layer.Metadata.Index, c.resolveLayer) + if layer.Metadata.Index != c.resolveLayer { + t.Errorf("bad resolve layer: '%d'!='%d'", layer.Metadata.Index, c.resolveLayer) } }) } @@ -296,26 +294,26 @@ func TestSquashImageResolver_FilesContents(t *testing.T) { tests := []struct { name string - fixture string + path string contents []string }{ { - name: "one degree", - fixture: "link-2", + name: "one degree", + path: "link-2", contents: []string{ "NEW file override!", // always from the squashed perspective }, }, { - name: "two degrees", - fixture: "link-indirect", + name: "two degrees", + path: "link-indirect", contents: []string{ "NEW file override!", // always from the squashed perspective }, }, { name: "dead link", - fixture: "link-dead", + path: "link-dead", contents: []string{}, }, } @@ -327,7 +325,7 @@ func TestSquashImageResolver_FilesContents(t *testing.T) { resolver, err := newImageSquashResolver(img) assert.NoError(t, err) - refs, err := resolver.FilesByPath(test.fixture) + refs, err := resolver.FilesByPath(test.path) require.NoError(t, err) assert.Len(t, refs, len(test.contents)) @@ -344,6 +342,31 @@ func TestSquashImageResolver_FilesContents(t *testing.T) { }) } } + +func TestSquashImageResolver_FilesContents_errorOnDirRequest(t *testing.T) { + + img := imagetest.GetFixtureImage(t, "docker-archive", "image-symlinks") + + resolver, err := newImageSquashResolver(img) + assert.NoError(t, err) + + var dirLoc *Location + for loc := range resolver.AllLocations() { + entry, err := resolver.img.FileCatalog.Get(loc.ref) + require.NoError(t, err) + if entry.Metadata.IsDir { + dirLoc = &loc + break + } + } + + require.NotNil(t, dirLoc) + + reader, err := resolver.FileContentsByLocation(*dirLoc) + require.Error(t, err) + require.Nil(t, reader) +} + func Test_imageSquashResolver_resolvesLinks(t *testing.T) { tests := []struct { name string @@ -404,7 +427,7 @@ func Test_imageSquashResolver_resolvesLinks(t *testing.T) { }, }, { - name: "by glob", + name: "by glob to links", runner: func(resolver FileResolver) []Location { // links are searched, but resolve to the real files actualLocations, err := resolver.FilesByGlob("*ink-*") @@ -412,23 +435,158 @@ func Test_imageSquashResolver_resolvesLinks(t *testing.T) { return actualLocations }, expected: []Location{ + { + Coordinates: Coordinates{ + RealPath: "/file-1.txt", + }, + VirtualPath: "/link-1", + }, + { + Coordinates: Coordinates{ + RealPath: "/file-2.txt", + }, + VirtualPath: "/link-2", + }, + // though this is a link, and it matches to the file, the resolver de-duplicates files + // by the real path, so it is not included in the results + //{ + // Coordinates: Coordinates{ + // RealPath: "/file-2.txt", + // }, + // VirtualPath: "/link-indirect", + //}, { Coordinates: Coordinates{ RealPath: "/file-3.txt", }, VirtualPath: "/link-within", }, + }, + }, + { + name: "by basename", + runner: func(resolver FileResolver) []Location { + // links are searched, but resolve to the real files + actualLocations, err := resolver.FilesByGlob("**/file-2.txt") + assert.NoError(t, err) + return actualLocations + }, + expected: []Location{ + // this has two copies in the base image, which overwrites the same location { Coordinates: Coordinates{ RealPath: "/file-2.txt", }, - VirtualPath: "/link-2", + VirtualPath: "/file-2.txt", }, + }, + }, + { + name: "by basename glob", + runner: func(resolver FileResolver) []Location { + // links are searched, but resolve to the real files + actualLocations, err := resolver.FilesByGlob("**/file-?.txt") + assert.NoError(t, err) + return actualLocations + }, + expected: []Location{ + { + Coordinates: Coordinates{ + RealPath: "/file-1.txt", + }, + VirtualPath: "/file-1.txt", + }, + { + Coordinates: Coordinates{ + RealPath: "/file-2.txt", + }, + VirtualPath: "/file-2.txt", + }, + { + Coordinates: Coordinates{ + RealPath: "/file-3.txt", + }, + VirtualPath: "/file-3.txt", + }, + { + Coordinates: Coordinates{ + RealPath: "/parent/file-4.txt", + }, + VirtualPath: "/parent/file-4.txt", + }, + }, + }, + { + name: "by basename glob to links", + runner: func(resolver FileResolver) []Location { + actualLocations, err := resolver.FilesByGlob("**/link-*") + assert.NoError(t, err) + return actualLocations + }, + expected: []Location{ { Coordinates: Coordinates{ RealPath: "/file-1.txt", }, VirtualPath: "/link-1", + ref: file.Reference{RealPath: "/file-1.txt"}, + }, + { + Coordinates: Coordinates{ + RealPath: "/file-2.txt", + }, + VirtualPath: "/link-2", + ref: file.Reference{RealPath: "/file-2.txt"}, + }, + // we already have this real file path via another link, so only one is returned + //{ + // Coordinates: Coordinates{ + // RealPath: "/file-2.txt", + // }, + // VirtualPath: "/link-indirect", + // ref: file.Reference{RealPath: "/file-2.txt"}, + //}, + { + Coordinates: Coordinates{ + RealPath: "/file-3.txt", + }, + VirtualPath: "/link-within", + ref: file.Reference{RealPath: "/file-3.txt"}, + }, + }, + }, + { + name: "by extension", + runner: func(resolver FileResolver) []Location { + // links are searched, but resolve to the real files + actualLocations, err := resolver.FilesByGlob("**/*.txt") + assert.NoError(t, err) + return actualLocations + }, + expected: []Location{ + { + Coordinates: Coordinates{ + RealPath: "/file-1.txt", + }, + VirtualPath: "/file-1.txt", + }, + { + Coordinates: Coordinates{ + RealPath: "/file-2.txt", + }, + VirtualPath: "/file-2.txt", + }, + { + Coordinates: Coordinates{ + RealPath: "/file-3.txt", + }, + VirtualPath: "/file-3.txt", + }, + { + Coordinates: Coordinates{ + RealPath: "/parent/file-4.txt", + }, + VirtualPath: "/parent/file-4.txt", }, }, }, @@ -478,23 +636,55 @@ func Test_imageSquashResolver_resolvesLinks(t *testing.T) { resolver, err := newImageSquashResolver(img) assert.NoError(t, err) - actualLocations := test.runner(resolver) - require.Len(t, actualLocations, len(test.expected)) + actual := test.runner(resolver) - // some operations on this resolver do not return stable results (order may be different across runs) + compareLocations(t, test.expected, actual) + }) + } - expectedMap := make(map[string]string) - for _, e := range test.expected { - expectedMap[e.VirtualPath] = e.RealPath - } +} - actualMap := make(map[string]string) - for _, a := range test.expected { - actualMap[a.VirtualPath] = a.RealPath - } +func compareLocations(t *testing.T, expected, actual []Location) { + t.Helper() + ignoreUnexported := cmpopts.IgnoreFields(Location{}, "ref") + ignoreFS := cmpopts.IgnoreFields(Coordinates{}, "FileSystemID") - assert.Equal(t, expectedMap, actualMap) - }) + sort.Sort(Locations(expected)) + sort.Sort(Locations(actual)) + + if d := cmp.Diff(expected, actual, + ignoreUnexported, + ignoreFS, + ); d != "" { + + t.Errorf("unexpected locations (-want +got):\n%s", d) } } + +func TestSquashResolver_AllLocations(t *testing.T) { + img := imagetest.GetFixtureImage(t, "docker-archive", "image-files-deleted") + + resolver, err := newImageSquashResolver(img) + assert.NoError(t, err) + + paths := strset.New() + for loc := range resolver.AllLocations() { + paths.Add(loc.RealPath) + } + expected := []string{ + "/Dockerfile", + "/file-3.txt", + "/target", + "/target/file-2.txt", + } + + // depending on how the image is built (either from linux or mac), sys and proc might accidentally be added to the image. + // this isn't important for the test, so we remove them. + paths.Remove("/proc", "/sys", "/dev", "/etc") + + pathsList := paths.List() + sort.Strings(pathsList) + + assert.ElementsMatchf(t, expected, pathsList, "expected all paths to be indexed, but found different paths: \n%s", cmp.Diff(expected, paths.List())) +} diff --git a/syft/source/location.go b/syft/source/location.go index f131057d192..a284d8dacce 100644 --- a/syft/source/location.go +++ b/syft/source/location.go @@ -5,7 +5,6 @@ import ( "github.com/anchore/stereoscope/pkg/file" "github.com/anchore/stereoscope/pkg/image" - "github.com/anchore/syft/internal/log" ) // Location represents a path relative to a particular filesystem resolved to a specific file.Reference. This struct is used as a key @@ -46,22 +45,11 @@ func NewLocationFromCoordinates(coordinates Coordinates) Location { // NewLocationFromImage creates a new Location representing the given path (extracted from the ref) relative to the given image. func NewLocationFromImage(virtualPath string, ref file.Reference, img *image.Image) Location { - entry, err := img.FileCatalog.Get(ref) - if err != nil { - log.Warnf("unable to find file catalog entry for ref=%+v", ref) - return Location{ - Coordinates: Coordinates{ - RealPath: string(ref.RealPath), - }, - VirtualPath: virtualPath, - ref: ref, - } - } - + layer := img.FileCatalog.Layer(ref) return Location{ Coordinates: Coordinates{ RealPath: string(ref.RealPath), - FileSystemID: entry.Layer.Metadata.Digest, + FileSystemID: layer.Metadata.Digest, }, VirtualPath: virtualPath, ref: ref, diff --git a/syft/source/mock_resolver.go b/syft/source/mock_resolver.go index 51d7edc4944..aad47abd94d 100644 --- a/syft/source/mock_resolver.go +++ b/syft/source/mock_resolver.go @@ -4,8 +4,11 @@ import ( "fmt" "io" "os" + "path" "github.com/bmatcuk/doublestar/v4" + + "github.com/anchore/stereoscope/pkg/file" ) var _ FileResolver = (*MockResolver)(nil) @@ -17,34 +20,53 @@ type MockResolver struct { locations []Location metadata map[Location]FileMetadata mimeTypeIndex map[string][]Location + extension map[string][]Location + basename map[string][]Location } // NewMockResolverForPaths creates a new MockResolver, where the only resolvable // files are those specified by the supplied paths. func NewMockResolverForPaths(paths ...string) *MockResolver { var locations []Location + extension := make(map[string][]Location) + basename := make(map[string][]Location) for _, p := range paths { - locations = append(locations, NewLocation(p)) + loc := NewLocation(p) + locations = append(locations, loc) + ext := path.Ext(p) + extension[ext] = append(extension[ext], loc) + bn := path.Base(p) + basename[bn] = append(basename[bn], loc) } return &MockResolver{ locations: locations, metadata: make(map[Location]FileMetadata), + extension: extension, + basename: basename, } } func NewMockResolverForPathsWithMetadata(metadata map[Location]FileMetadata) *MockResolver { var locations []Location var mimeTypeIndex = make(map[string][]Location) + extension := make(map[string][]Location) + basename := make(map[string][]Location) for l, m := range metadata { locations = append(locations, l) mimeTypeIndex[m.MIMEType] = append(mimeTypeIndex[m.MIMEType], l) + ext := path.Ext(l.RealPath) + extension[ext] = append(extension[ext], l) + bn := path.Base(l.RealPath) + basename[bn] = append(basename[bn], l) } return &MockResolver{ locations: locations, metadata: metadata, mimeTypeIndex: mimeTypeIndex, + extension: extension, + basename: basename, } } @@ -139,9 +161,9 @@ func (r MockResolver) FileMetadataByLocation(l Location) (FileMetadata, error) { } // other types not supported - ty := RegularFile + ty := file.TypeRegular if info.IsDir() { - ty = Directory + ty = file.TypeDirectory } return FileMetadata{ @@ -160,3 +182,24 @@ func (r MockResolver) FilesByMIMEType(types ...string) ([]Location, error) { } return locations, nil } + +func (r MockResolver) FilesByExtension(extensions ...string) ([]Location, error) { + var results []Location + for _, ext := range extensions { + results = append(results, r.extension[ext]...) + } + return results, nil +} + +func (r MockResolver) FilesByBasename(filenames ...string) ([]Location, error) { + var results []Location + for _, filename := range filenames { + results = append(results, r.basename[filename]...) + } + return results, nil +} + +func (r MockResolver) FilesByBasenameGlob(globs ...string) ([]Location, error) { + // TODO implement me + panic("implement me") +} diff --git a/syft/source/source.go b/syft/source/source.go index ed96dd11061..76b5470638e 100644 --- a/syft/source/source.go +++ b/syft/source/source.go @@ -518,7 +518,7 @@ func getImageExclusionFunction(exclusions []string) func(string) bool { } } -func getDirectoryExclusionFunctions(root string, exclusions []string) ([]pathFilterFn, error) { +func getDirectoryExclusionFunctions(root string, exclusions []string) ([]pathIndexVisitor, error) { if len(exclusions) == 0 { return nil, nil } @@ -551,20 +551,23 @@ func getDirectoryExclusionFunctions(root string, exclusions []string) ([]pathFil return nil, fmt.Errorf("invalid exclusion pattern(s): '%s' (must start with one of: './', '*/', or '**/')", strings.Join(errors, "', '")) } - return []pathFilterFn{ - func(path string, _ os.FileInfo) bool { + return []pathIndexVisitor{ + func(path string, info os.FileInfo, _ error) error { for _, exclusion := range exclusions { // this is required to handle Windows filepaths path = filepath.ToSlash(path) matches, err := doublestar.Match(exclusion, path) if err != nil { - return false + return nil } if matches { - return true + if info != nil && info.IsDir() { + return filepath.SkipDir + } + return errSkipPath } } - return false + return nil }, }, nil } diff --git a/syft/source/source_test.go b/syft/source/source_test.go index c172a3572c5..971dfde9854 100644 --- a/syft/source/source_test.go +++ b/syft/source/source_test.go @@ -5,15 +5,19 @@ package source import ( "io" + "io/fs" "io/ioutil" "os" "os/exec" "path" "path/filepath" + "sort" "strings" "syscall" "testing" + "time" + "github.com/google/go-cmp/cmp" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -455,7 +459,7 @@ func TestDirectoryExclusions(t *testing.T) { desc string input string glob string - expected int + expected []string exclusions []string err bool }{ @@ -463,86 +467,128 @@ func TestDirectoryExclusions(t *testing.T) { input: "test-fixtures/system_paths", desc: "exclude everything", glob: "**", - expected: 0, + expected: nil, exclusions: []string{"**/*"}, }, { - input: "test-fixtures/image-simple", - desc: "a single path excluded", - glob: "**", - expected: 3, + input: "test-fixtures/image-simple", + desc: "a single path excluded", + glob: "**", + expected: []string{ + "Dockerfile", + "file-1.txt", + "file-2.txt", + }, exclusions: []string{"**/target/**"}, }, { - input: "test-fixtures/image-simple", - desc: "exclude explicit directory relative to the root", - glob: "**", - expected: 3, + input: "test-fixtures/image-simple", + desc: "exclude explicit directory relative to the root", + glob: "**", + expected: []string{ + "Dockerfile", + "file-1.txt", + "file-2.txt", + //"target/really/nested/file-3.txt", // explicitly skipped + }, exclusions: []string{"./target"}, }, { - input: "test-fixtures/image-simple", - desc: "exclude explicit file relative to the root", - glob: "**", - expected: 3, + input: "test-fixtures/image-simple", + desc: "exclude explicit file relative to the root", + glob: "**", + expected: []string{ + "Dockerfile", + //"file-1.txt", // explicitly skipped + "file-2.txt", + "target/really/nested/file-3.txt", + }, exclusions: []string{"./file-1.txt"}, }, { - input: "test-fixtures/image-simple", - desc: "exclude wildcard relative to the root", - glob: "**", - expected: 2, + input: "test-fixtures/image-simple", + desc: "exclude wildcard relative to the root", + glob: "**", + expected: []string{ + "Dockerfile", + //"file-1.txt", // explicitly skipped + //"file-2.txt", // explicitly skipped + "target/really/nested/file-3.txt", + }, exclusions: []string{"./*.txt"}, }, { - input: "test-fixtures/image-simple", - desc: "exclude files deeper", - glob: "**", - expected: 3, + input: "test-fixtures/image-simple", + desc: "exclude files deeper", + glob: "**", + expected: []string{ + "Dockerfile", + "file-1.txt", + "file-2.txt", + //"target/really/nested/file-3.txt", // explicitly skipped + }, exclusions: []string{"**/really/**"}, }, { - input: "test-fixtures/image-simple", - desc: "files excluded with extension", - glob: "**", - expected: 1, + input: "test-fixtures/image-simple", + desc: "files excluded with extension", + glob: "**", + expected: []string{ + "Dockerfile", + //"file-1.txt", // explicitly skipped + //"file-2.txt", // explicitly skipped + //"target/really/nested/file-3.txt", // explicitly skipped + }, exclusions: []string{"**/*.txt"}, }, { - input: "test-fixtures/image-simple", - desc: "keep files with different extensions", - glob: "**", - expected: 4, + input: "test-fixtures/image-simple", + desc: "keep files with different extensions", + glob: "**", + expected: []string{ + "Dockerfile", + "file-1.txt", + "file-2.txt", + "target/really/nested/file-3.txt", + }, exclusions: []string{"**/target/**/*.jar"}, }, { - input: "test-fixtures/path-detected", - desc: "file directly excluded", - glob: "**", - expected: 1, + input: "test-fixtures/path-detected", + desc: "file directly excluded", + glob: "**", + expected: []string{ + ".vimrc", + }, exclusions: []string{"**/empty"}, }, { - input: "test-fixtures/path-detected", - desc: "pattern error containing **/", - glob: "**", - expected: 1, + input: "test-fixtures/path-detected", + desc: "pattern error containing **/", + glob: "**", + expected: []string{ + ".vimrc", + }, exclusions: []string{"/**/empty"}, err: true, }, { - input: "test-fixtures/path-detected", - desc: "pattern error incorrect start", - glob: "**", - expected: 1, + input: "test-fixtures/path-detected", + desc: "pattern error incorrect start", + glob: "**", + expected: []string{ + ".vimrc", + }, exclusions: []string{"empty"}, err: true, }, { - input: "test-fixtures/path-detected", - desc: "pattern error starting with /", - glob: "**", - expected: 1, + input: "test-fixtures/path-detected", + desc: "pattern error starting with /", + glob: "**", + expected: []string{ + ".vimrc", + }, exclusions: []string{"/empty"}, err: true, }, @@ -570,13 +616,19 @@ func TestDirectoryExclusions(t *testing.T) { if err != nil { t.Errorf("could not get resolver error: %+v", err) } - contents, err := resolver.FilesByGlob(test.glob) + locations, err := resolver.FilesByGlob(test.glob) if err != nil { t.Errorf("could not get files by glob: %s+v", err) } - if len(contents) != test.expected { - t.Errorf("wrong number of files after exclusions (%s): %d != %d", test.glob, len(contents), test.expected) + var actual []string + for _, l := range locations { + actual = append(actual, l.RealPath) } + + sort.Strings(test.expected) + sort.Strings(actual) + + assert.Equal(t, test.expected, actual, "diff \n"+cmp.Diff(test.expected, actual)) }) } } @@ -667,59 +719,118 @@ func TestImageExclusions(t *testing.T) { } } +type dummyInfo struct { + isDir bool +} + +func (d dummyInfo) Name() string { + //TODO implement me + panic("implement me") +} + +func (d dummyInfo) Size() int64 { + //TODO implement me + panic("implement me") +} + +func (d dummyInfo) Mode() fs.FileMode { + //TODO implement me + panic("implement me") +} + +func (d dummyInfo) ModTime() time.Time { + //TODO implement me + panic("implement me") +} + +func (d dummyInfo) IsDir() bool { + return d.isDir +} + +func (d dummyInfo) Sys() any { + //TODO implement me + panic("implement me") +} + func Test_crossPlatformExclusions(t *testing.T) { testCases := []struct { - desc string - root string - path string - exclude string - match bool + desc string + root string + path string + finfo os.FileInfo + exclude string + walkHint error }{ { - desc: "linux doublestar", - root: "/usr", - path: "/usr/var/lib/etc.txt", - exclude: "**/*.txt", - match: true, + desc: "directory exclusion", + root: "/", + path: "/usr/var/lib", + exclude: "**/var/lib", + finfo: dummyInfo{isDir: true}, + walkHint: fs.SkipDir, + }, + { + desc: "no file info", + root: "/", + path: "/usr/var/lib", + exclude: "**/var/lib", + walkHint: errSkipPath, + }, + // linux specific tests... + { + desc: "linux doublestar", + root: "/usr", + path: "/usr/var/lib/etc.txt", + exclude: "**/*.txt", + finfo: dummyInfo{isDir: false}, + walkHint: errSkipPath, }, { desc: "linux relative", root: "/usr/var/lib", path: "/usr/var/lib/etc.txt", exclude: "./*.txt", - match: true, + finfo: dummyInfo{isDir: false}, + + walkHint: errSkipPath, }, { - desc: "linux one level", - root: "/usr", - path: "/usr/var/lib/etc.txt", - exclude: "*/*.txt", - match: false, + desc: "linux one level", + root: "/usr", + path: "/usr/var/lib/etc.txt", + exclude: "*/*.txt", + finfo: dummyInfo{isDir: false}, + walkHint: nil, }, // NOTE: since these tests will run in linux and macOS, the windows paths will be // considered relative if they do not start with a forward slash and paths with backslashes // won't be modified by the filepath.ToSlash call, so these are emulating the result of // filepath.ToSlash usage + + // windows specific tests... { - desc: "windows doublestar", - root: "/C:/User/stuff", - path: "/C:/User/stuff/thing.txt", - exclude: "**/*.txt", - match: true, + desc: "windows doublestar", + root: "/C:/User/stuff", + path: "/C:/User/stuff/thing.txt", + exclude: "**/*.txt", + finfo: dummyInfo{isDir: false}, + walkHint: errSkipPath, }, { - desc: "windows relative", - root: "/C:/User/stuff", - path: "/C:/User/stuff/thing.txt", - exclude: "./*.txt", - match: true, + desc: "windows relative", + root: "/C:/User/stuff", + path: "/C:/User/stuff/thing.txt", + exclude: "./*.txt", + finfo: dummyInfo{isDir: false}, + walkHint: errSkipPath, }, { - desc: "windows one level", - root: "/C:/User/stuff", - path: "/C:/User/stuff/thing.txt", - exclude: "*/*.txt", - match: false, + desc: "windows one level", + root: "/C:/User/stuff", + path: "/C:/User/stuff/thing.txt", + exclude: "*/*.txt", + finfo: dummyInfo{isDir: false}, + walkHint: nil, }, } @@ -729,8 +840,8 @@ func Test_crossPlatformExclusions(t *testing.T) { require.NoError(t, err) for _, f := range fns { - result := f(test.path, nil) - require.Equal(t, test.match, result) + result := f(test.path, test.finfo, nil) + require.Equal(t, test.walkHint, result) } }) } diff --git a/syft/source/test-fixtures/image-files-deleted/Dockerfile b/syft/source/test-fixtures/image-files-deleted/Dockerfile new file mode 100644 index 00000000000..5c5755194f3 --- /dev/null +++ b/syft/source/test-fixtures/image-files-deleted/Dockerfile @@ -0,0 +1,6 @@ +FROM alpine:3.17.1 as tools +FROM scratch +COPY --from=tools /bin /bin +COPY --from=tools /lib /lib +ADD . . +RUN rm -rf file-1.txt /bin /lib diff --git a/syft/source/test-fixtures/image-files-deleted/file-1.txt b/syft/source/test-fixtures/image-files-deleted/file-1.txt new file mode 100644 index 00000000000..985d3408e98 --- /dev/null +++ b/syft/source/test-fixtures/image-files-deleted/file-1.txt @@ -0,0 +1 @@ +this file has contents \ No newline at end of file diff --git a/syft/source/test-fixtures/image-files-deleted/file-3.txt b/syft/source/test-fixtures/image-files-deleted/file-3.txt new file mode 120000 index 00000000000..4036f0fd4d3 --- /dev/null +++ b/syft/source/test-fixtures/image-files-deleted/file-3.txt @@ -0,0 +1 @@ +file-1.txt \ No newline at end of file diff --git a/syft/source/test-fixtures/image-files-deleted/target/file-2.txt b/syft/source/test-fixtures/image-files-deleted/target/file-2.txt new file mode 100644 index 00000000000..396d08bbc72 --- /dev/null +++ b/syft/source/test-fixtures/image-files-deleted/target/file-2.txt @@ -0,0 +1 @@ +file-2 contents! \ No newline at end of file diff --git a/syft/source/test-fixtures/image-symlinks/Dockerfile b/syft/source/test-fixtures/image-symlinks/Dockerfile index afb2a30b32b..f05f29a1380 100644 --- a/syft/source/test-fixtures/image-symlinks/Dockerfile +++ b/syft/source/test-fixtures/image-symlinks/Dockerfile @@ -1,5 +1,5 @@ # LAYER 0: -FROM busybox:1.34.0 +FROM busybox:1.34.0@sha256:e8e5cca392e3cf056fcdb3093e7ac2bf83fcf28b3bcf5818fe8ae71cf360c231 # LAYER 1: ADD file-1.txt . @@ -9,6 +9,7 @@ RUN ln -s ./file-1.txt link-1 # LAYER 3: link with future data RUN ln -s ./file-2.txt link-2 + # LAYER 4: ADD file-2.txt . @@ -31,4 +32,18 @@ ADD parent /parent RUN ln -s /parent parent-link # LAYER 11: parent is a symlink and the child target is overridden -COPY new-file-4.txt /parent-link/file-4.txt \ No newline at end of file +COPY new-file-4.txt /parent-link/file-4.txt + +# squash representation +# . +# ├── file-1.txt +# ├── file-2.txt +# ├── file-3.txt +# ├── link-1 -> ./file-1.txt +# ├── link-2 -> ./file-2.txt +# ├── link-dead -> [./i-dont-exist.txt] (dead link) +# ├── link-indirect -> ./link-2 +# ├── link-within -> ./file-3.txt +# ├── parent +# │ └── file-4.txt +# └── parent-link -> /parent diff --git a/syft/source/test-fixtures/symlinks-from-image-symlinks-fixture/file-1.txt b/syft/source/test-fixtures/symlinks-from-image-symlinks-fixture/file-1.txt new file mode 100644 index 00000000000..5ffba7b57dc --- /dev/null +++ b/syft/source/test-fixtures/symlinks-from-image-symlinks-fixture/file-1.txt @@ -0,0 +1 @@ +bogus diff --git a/syft/source/test-fixtures/symlinks-from-image-symlinks-fixture/file-2.txt b/syft/source/test-fixtures/symlinks-from-image-symlinks-fixture/file-2.txt new file mode 100644 index 00000000000..5ffba7b57dc --- /dev/null +++ b/syft/source/test-fixtures/symlinks-from-image-symlinks-fixture/file-2.txt @@ -0,0 +1 @@ +bogus diff --git a/syft/source/test-fixtures/symlinks-from-image-symlinks-fixture/file-3.txt b/syft/source/test-fixtures/symlinks-from-image-symlinks-fixture/file-3.txt new file mode 100644 index 00000000000..5ffba7b57dc --- /dev/null +++ b/syft/source/test-fixtures/symlinks-from-image-symlinks-fixture/file-3.txt @@ -0,0 +1 @@ +bogus diff --git a/syft/source/test-fixtures/symlinks-from-image-symlinks-fixture/link-1 b/syft/source/test-fixtures/symlinks-from-image-symlinks-fixture/link-1 new file mode 120000 index 00000000000..4036f0fd4d3 --- /dev/null +++ b/syft/source/test-fixtures/symlinks-from-image-symlinks-fixture/link-1 @@ -0,0 +1 @@ +file-1.txt \ No newline at end of file diff --git a/syft/source/test-fixtures/symlinks-from-image-symlinks-fixture/link-2 b/syft/source/test-fixtures/symlinks-from-image-symlinks-fixture/link-2 new file mode 120000 index 00000000000..5042efb8f3b --- /dev/null +++ b/syft/source/test-fixtures/symlinks-from-image-symlinks-fixture/link-2 @@ -0,0 +1 @@ +file-2.txt \ No newline at end of file diff --git a/syft/source/test-fixtures/symlinks-from-image-symlinks-fixture/link-dead b/syft/source/test-fixtures/symlinks-from-image-symlinks-fixture/link-dead new file mode 120000 index 00000000000..e41fb17fb39 --- /dev/null +++ b/syft/source/test-fixtures/symlinks-from-image-symlinks-fixture/link-dead @@ -0,0 +1 @@ +./i-dont-exist.txt \ No newline at end of file diff --git a/syft/source/test-fixtures/symlinks-from-image-symlinks-fixture/link-indirect b/syft/source/test-fixtures/symlinks-from-image-symlinks-fixture/link-indirect new file mode 120000 index 00000000000..c3e9a545ed5 --- /dev/null +++ b/syft/source/test-fixtures/symlinks-from-image-symlinks-fixture/link-indirect @@ -0,0 +1 @@ +./link-2 \ No newline at end of file diff --git a/syft/source/test-fixtures/symlinks-from-image-symlinks-fixture/link-within b/syft/source/test-fixtures/symlinks-from-image-symlinks-fixture/link-within new file mode 120000 index 00000000000..89dfefbca44 --- /dev/null +++ b/syft/source/test-fixtures/symlinks-from-image-symlinks-fixture/link-within @@ -0,0 +1 @@ +file-3.txt \ No newline at end of file diff --git a/syft/source/test-fixtures/symlinks-from-image-symlinks-fixture/parent-link b/syft/source/test-fixtures/symlinks-from-image-symlinks-fixture/parent-link new file mode 120000 index 00000000000..25dd2a9fd67 --- /dev/null +++ b/syft/source/test-fixtures/symlinks-from-image-symlinks-fixture/parent-link @@ -0,0 +1 @@ +parent \ No newline at end of file diff --git a/syft/source/test-fixtures/symlinks-from-image-symlinks-fixture/parent/file-4.txt b/syft/source/test-fixtures/symlinks-from-image-symlinks-fixture/parent/file-4.txt new file mode 100644 index 00000000000..5ffba7b57dc --- /dev/null +++ b/syft/source/test-fixtures/symlinks-from-image-symlinks-fixture/parent/file-4.txt @@ -0,0 +1 @@ +bogus diff --git a/syft/source/test-fixtures/symlinks-prune-indexing/before-path b/syft/source/test-fixtures/symlinks-prune-indexing/before-path new file mode 120000 index 00000000000..b9dd30f5dc6 --- /dev/null +++ b/syft/source/test-fixtures/symlinks-prune-indexing/before-path @@ -0,0 +1 @@ +path \ No newline at end of file diff --git a/syft/source/test-fixtures/symlinks-prune-indexing/c-file.txt b/syft/source/test-fixtures/symlinks-prune-indexing/c-file.txt new file mode 120000 index 00000000000..d3e5e99ca68 --- /dev/null +++ b/syft/source/test-fixtures/symlinks-prune-indexing/c-file.txt @@ -0,0 +1 @@ +before-path/file.txt \ No newline at end of file diff --git a/syft/source/test-fixtures/symlinks-prune-indexing/c-path b/syft/source/test-fixtures/symlinks-prune-indexing/c-path new file mode 120000 index 00000000000..065736a4e5c --- /dev/null +++ b/syft/source/test-fixtures/symlinks-prune-indexing/c-path @@ -0,0 +1 @@ +before-path \ No newline at end of file diff --git a/syft/source/test-fixtures/symlinks-prune-indexing/path/1/2/3/4/dont-index-me-twice.txt b/syft/source/test-fixtures/symlinks-prune-indexing/path/1/2/3/4/dont-index-me-twice.txt new file mode 100644 index 00000000000..5ffba7b57dc --- /dev/null +++ b/syft/source/test-fixtures/symlinks-prune-indexing/path/1/2/3/4/dont-index-me-twice.txt @@ -0,0 +1 @@ +bogus diff --git a/syft/source/test-fixtures/symlinks-prune-indexing/path/5/6/7/8/dont-index-me-twice-either.txt b/syft/source/test-fixtures/symlinks-prune-indexing/path/5/6/7/8/dont-index-me-twice-either.txt new file mode 100644 index 00000000000..5ffba7b57dc --- /dev/null +++ b/syft/source/test-fixtures/symlinks-prune-indexing/path/5/6/7/8/dont-index-me-twice-either.txt @@ -0,0 +1 @@ +bogus diff --git a/syft/source/test-fixtures/symlinks-prune-indexing/path/file.txt b/syft/source/test-fixtures/symlinks-prune-indexing/path/file.txt new file mode 100644 index 00000000000..5ffba7b57dc --- /dev/null +++ b/syft/source/test-fixtures/symlinks-prune-indexing/path/file.txt @@ -0,0 +1 @@ +bogus diff --git a/test/integration/catalog_packages_test.go b/test/integration/catalog_packages_test.go index df73d14ed1e..2251e2fab46 100644 --- a/test/integration/catalog_packages_test.go +++ b/test/integration/catalog_packages_test.go @@ -67,7 +67,7 @@ func TestPkgCoverageImage(t *testing.T) { definedLanguages.Remove(pkg.Rust.String()) definedLanguages.Remove(pkg.Dart.String()) definedLanguages.Remove(pkg.Dotnet.String()) - definedLanguages.Remove(string(pkg.Swift.String())) + definedLanguages.Remove(pkg.Swift.String()) definedLanguages.Remove(pkg.CPP.String()) definedLanguages.Remove(pkg.Haskell.String()) definedLanguages.Remove(pkg.Erlang.String())