Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

931: binary cataloger exclusion defaults #1948

Merged
merged 18 commits into from
Aug 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -482,6 +482,10 @@ default-image-pull-source: ""
# - "./out/**/*.json"
exclude: []

# allows users to exclude synthetic binary packages from the sbom
# these packages are removed if an overlap with a non-synthetic package is found
exclude-overlap-by-ownership: true
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.


# os and/or architecture to use when referencing container images (e.g. "windows/armv6" or "arm64")
# same as --platform; SYFT_PLATFORM env var
platform: ""
Expand Down
57 changes: 30 additions & 27 deletions internal/config/application.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,31 +42,32 @@ type Application struct {
ConfigPath string `yaml:"configPath,omitempty" json:"configPath" mapstructure:"config"`
Verbosity uint `yaml:"verbosity,omitempty" json:"verbosity" mapstructure:"verbosity"`
// -q, indicates to not show any status output to stderr (ETUI or logging UI)
Quiet bool `yaml:"quiet" json:"quiet" mapstructure:"quiet"`
Outputs []string `yaml:"output" json:"output" mapstructure:"output"` // -o, the format to use for output
OutputTemplatePath string `yaml:"output-template-path" json:"output-template-path" mapstructure:"output-template-path"` // -t template file to use for output
File string `yaml:"file" json:"file" mapstructure:"file"` // --file, the file to write report output to
CheckForAppUpdate bool `yaml:"check-for-app-update" json:"check-for-app-update" mapstructure:"check-for-app-update"` // whether to check for an application update on start up or not
Dev development `yaml:"dev" json:"dev" mapstructure:"dev"`
Log logging `yaml:"log" json:"log" mapstructure:"log"` // all logging-related options
Catalogers []string `yaml:"catalogers" json:"catalogers" mapstructure:"catalogers"`
Package pkg `yaml:"package" json:"package" mapstructure:"package"`
Golang golang `yaml:"golang" json:"golang" mapstructure:"golang"`
LinuxKernel linuxKernel `yaml:"linux-kernel" json:"linux-kernel" mapstructure:"linux-kernel"`
Python python `yaml:"python" json:"python" mapstructure:"python"`
Attest attest `yaml:"attest" json:"attest" mapstructure:"attest"`
FileMetadata FileMetadata `yaml:"file-metadata" json:"file-metadata" mapstructure:"file-metadata"`
FileClassification fileClassification `yaml:"file-classification" json:"file-classification" mapstructure:"file-classification"`
FileContents fileContents `yaml:"file-contents" json:"file-contents" mapstructure:"file-contents"`
Secrets secrets `yaml:"secrets" json:"secrets" mapstructure:"secrets"`
Registry registry `yaml:"registry" json:"registry" mapstructure:"registry"`
Exclusions []string `yaml:"exclude" json:"exclude" mapstructure:"exclude"`
Platform string `yaml:"platform" json:"platform" mapstructure:"platform"`
Name string `yaml:"name" json:"name" mapstructure:"name"`
Source sourceCfg `yaml:"source" json:"source" mapstructure:"source"`
Parallelism int `yaml:"parallelism" json:"parallelism" mapstructure:"parallelism"` // the number of catalog workers to run in parallel
DefaultImagePullSource string `yaml:"default-image-pull-source" json:"default-image-pull-source" mapstructure:"default-image-pull-source"` // specify default image pull source
BasePath string `yaml:"base-path" json:"base-path" mapstructure:"base-path"` // specify base path for all file paths
Quiet bool `yaml:"quiet" json:"quiet" mapstructure:"quiet"`
Outputs []string `yaml:"output" json:"output" mapstructure:"output"` // -o, the format to use for output
OutputTemplatePath string `yaml:"output-template-path" json:"output-template-path" mapstructure:"output-template-path"` // -t template file to use for output
File string `yaml:"file" json:"file" mapstructure:"file"` // --file, the file to write report output to
CheckForAppUpdate bool `yaml:"check-for-app-update" json:"check-for-app-update" mapstructure:"check-for-app-update"` // whether to check for an application update on start up or not
Dev development `yaml:"dev" json:"dev" mapstructure:"dev"`
Log logging `yaml:"log" json:"log" mapstructure:"log"` // all logging-related options
Catalogers []string `yaml:"catalogers" json:"catalogers" mapstructure:"catalogers"`
Package pkg `yaml:"package" json:"package" mapstructure:"package"`
Golang golang `yaml:"golang" json:"golang" mapstructure:"golang"`
LinuxKernel linuxKernel `yaml:"linux-kernel" json:"linux-kernel" mapstructure:"linux-kernel"`
Python python `yaml:"python" json:"python" mapstructure:"python"`
Attest attest `yaml:"attest" json:"attest" mapstructure:"attest"`
FileMetadata FileMetadata `yaml:"file-metadata" json:"file-metadata" mapstructure:"file-metadata"`
FileClassification fileClassification `yaml:"file-classification" json:"file-classification" mapstructure:"file-classification"`
FileContents fileContents `yaml:"file-contents" json:"file-contents" mapstructure:"file-contents"`
Secrets secrets `yaml:"secrets" json:"secrets" mapstructure:"secrets"`
Registry registry `yaml:"registry" json:"registry" mapstructure:"registry"`
Exclusions []string `yaml:"exclude" json:"exclude" mapstructure:"exclude"`
Platform string `yaml:"platform" json:"platform" mapstructure:"platform"`
Name string `yaml:"name" json:"name" mapstructure:"name"`
Source sourceCfg `yaml:"source" json:"source" mapstructure:"source"`
Parallelism int `yaml:"parallelism" json:"parallelism" mapstructure:"parallelism"` // the number of catalog workers to run in parallel
DefaultImagePullSource string `yaml:"default-image-pull-source" json:"default-image-pull-source" mapstructure:"default-image-pull-source"` // specify default image pull source
BasePath string `yaml:"base-path" json:"base-path" mapstructure:"base-path"` // specify base path for all file paths
ExcludeBinaryOverlapByOwnership bool `yaml:"exclude-binary-overlap-by-ownership" json:"exclude-binary-overlap-by-ownership" mapstructure:"exclude-binary-overlap-by-ownership"` // exclude synthetic binary packages owned by os package files
}

func (cfg Application) ToCatalogerConfig() cataloger.Config {
Expand All @@ -76,8 +77,9 @@ func (cfg Application) ToCatalogerConfig() cataloger.Config {
IncludeUnindexedArchives: cfg.Package.SearchUnindexedArchives,
Scope: cfg.Package.Cataloger.ScopeOpt,
},
Catalogers: cfg.Catalogers,
Parallelism: cfg.Parallelism,
Catalogers: cfg.Catalogers,
Parallelism: cfg.Parallelism,
ExcludeBinaryOverlapByOwnership: cfg.ExcludeBinaryOverlapByOwnership,
Golang: golangCataloger.NewGoCatalogerOpts().
WithSearchLocalModCacheLicenses(cfg.Golang.SearchLocalModCacheLicenses).
WithLocalModCacheDir(cfg.Golang.LocalModCacheDir).
Expand Down Expand Up @@ -221,6 +223,7 @@ func loadDefaultValues(v *viper.Viper) {
v.SetDefault("catalogers", nil)
v.SetDefault("parallelism", 1)
v.SetDefault("default-image-pull-source", "")
v.SetDefault("exclude-binary-overlap-by-ownership", true)

// for each field in the configuration struct, see if the field implements the defaultValueLoader interface and invoke it if it does
value := reflect.ValueOf(Application{})
Expand Down
25 changes: 24 additions & 1 deletion syft/lib.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,11 +74,34 @@ func CatalogPackages(src source.Source, cfg cataloger.Config) (*pkg.Collection,

catalog, relationships, err := cataloger.Catalog(resolver, release, cfg.Parallelism, catalogers...)

relationships = append(relationships, newSourceRelationshipsFromCatalog(src, catalog)...)
// apply exclusions to the package catalog
// default config value for this is true
// https://github.com/anchore/syft/issues/931
if cfg.ExcludeBinaryOverlapByOwnership {
for _, r := range relationships {
if cataloger.Exclude(r, catalog) {
catalog.Delete(r.To.ID())
relationships = removeRelationshipsByID(relationships, r.To.ID())
}
}
}

// no need to consider source relationships for os -> binary exclusions
relationships = append(relationships, newSourceRelationshipsFromCatalog(src, catalog)...)
return catalog, relationships, release, err
}

func removeRelationshipsByID(relationships []artifact.Relationship, id artifact.ID) []artifact.Relationship {
// https://github.com/golang/go/wiki/SliceTricks#filtering-without-allocating
filtered := relationships[:0]
spiffcs marked this conversation as resolved.
Show resolved Hide resolved
for _, r := range relationships {
if r.To.ID() != id && r.From.ID() != id {
filtered = append(filtered, r)
}
}
return filtered
}

func newSourceRelationshipsFromCatalog(src source.Source, c *pkg.Collection) []artifact.Relationship {
relationships := make([]artifact.Relationship, 0) // Should we pre-allocate this by giving catalog a Len() method?
for p := range c.Enumerate() {
Expand Down
4 changes: 2 additions & 2 deletions syft/pkg/cataloger/alpm/cataloger.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@ import (
"github.com/anchore/syft/syft/pkg/cataloger/generic"
)

const catalogerName = "alpmdb-cataloger"
const CatalogerName = "alpmdb-cataloger"

func NewAlpmdbCataloger() *generic.Cataloger {
return generic.NewCataloger(catalogerName).
return generic.NewCataloger(CatalogerName).
WithParserByGlobs(parseAlpmDB, pkg.AlpmDBGlob)
}
4 changes: 2 additions & 2 deletions syft/pkg/cataloger/apkdb/cataloger.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@ import (
"github.com/anchore/syft/syft/pkg/cataloger/generic"
)

const catalogerName = "apkdb-cataloger"
const CatalogerName = "apkdb-cataloger"

// NewApkdbCataloger returns a new Alpine DB cataloger object.
func NewApkdbCataloger() *generic.Cataloger {
return generic.NewCataloger(catalogerName).
return generic.NewCataloger(CatalogerName).
WithParserByGlobs(parseApkDB, pkg.ApkDBGlob)
}
4 changes: 2 additions & 2 deletions syft/pkg/cataloger/binary/cataloger.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import (
"github.com/anchore/syft/syft/pkg"
)

const catalogerName = "binary-cataloger"
const CatalogerName = "binary-cataloger"

func NewCataloger() *Cataloger {
return &Cataloger{}
Expand All @@ -22,7 +22,7 @@ type Cataloger struct{}

// Name returns a string that uniquely describes the Cataloger
func (c Cataloger) Name() string {
return catalogerName
return CatalogerName
}

// Catalog is given an object to resolve file references and content, this function returns any discovered Packages
Expand Down
2 changes: 1 addition & 1 deletion syft/pkg/cataloger/binary/package.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ func newPackage(classifier classifier, location file.Location, matchMetadata map
),
Type: pkg.BinaryPkg,
CPEs: cpes,
FoundBy: catalogerName,
FoundBy: CatalogerName,
MetadataType: pkg.BinaryMetadataType,
Metadata: pkg.BinaryMetadata{
Matches: []pkg.ClassifierMatch{
Expand Down
23 changes: 7 additions & 16 deletions syft/pkg/cataloger/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,23 +8,14 @@ import (
)

// TODO: these field naming vs helper function naming schemes are inconsistent.

type Config struct {
Search SearchConfig
Golang golang.GoCatalogerOpts
LinuxKernel kernel.LinuxCatalogerConfig
Python python.CatalogerConfig
Catalogers []string
Parallelism int
}

func DefaultConfig() Config {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why delete the DefaultConfig method?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function was only used as a part of *_test.go files. It was moved here:

func defaultConfig() cataloger.Config {
return cataloger.Config{
Search: cataloger.DefaultSearchConfig(),
Parallelism: 1,
LinuxKernel: kernel.DefaultLinuxCatalogerConfig(),
Python: python.DefaultCatalogerConfig(),
ExcludeBinaryOverlapByOwnership: true,
}
}

Apologies for the boy scout change on an unrelated PR - my IDE was yelling about this being deadcode and I could not figure out why - the refactor over to test resolved that issue

return Config{
Search: DefaultSearchConfig(),
Parallelism: 1,
LinuxKernel: kernel.DefaultLinuxCatalogerConfig(),
Python: python.DefaultCatalogerConfig(),
}
Search SearchConfig
Golang golang.GoCatalogerOpts
LinuxKernel kernel.LinuxCatalogerConfig
Python python.CatalogerConfig
Catalogers []string
Parallelism int
ExcludeBinaryOverlapByOwnership bool
}

func (c Config) Java() java.Config {
Expand Down
4 changes: 2 additions & 2 deletions syft/pkg/cataloger/deb/cataloger.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@ import (
"github.com/anchore/syft/syft/pkg/cataloger/generic"
)

const catalogerName = "dpkgdb-cataloger"
const CatalogerName = "dpkgdb-cataloger"

// NewDpkgdbCataloger returns a new Deb package cataloger capable of parsing DPKG status DB files.
func NewDpkgdbCataloger() *generic.Cataloger {
return generic.NewCataloger(catalogerName).
return generic.NewCataloger(CatalogerName).
// note: these globs have been intentionally split up in order to improve search performance,
// please do NOT combine into: "**/var/lib/dpkg/{status,status.d/*}"
WithParserByGlobs(parseDpkgDB, "**/var/lib/dpkg/status", "**/var/lib/dpkg/status.d/*", "**/lib/opkg/info/*.control", "**/lib/opkg/status")
Expand Down
4 changes: 2 additions & 2 deletions syft/pkg/cataloger/nix/cataloger.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import (
)

const (
catalogerName = "nix-store-cataloger"
CatalogerName = "nix-store-cataloger"
nixStoreGlob = "**/nix/store/*"
)

Expand All @@ -24,7 +24,7 @@ func NewStoreCataloger() *StoreCataloger {
}

func (c *StoreCataloger) Name() string {
return catalogerName
return CatalogerName
}

func (c *StoreCataloger) Catalog(resolver file.Resolver) ([]pkg.Package, []artifact.Relationship, error) {
Expand Down
2 changes: 1 addition & 1 deletion syft/pkg/cataloger/nix/cataloger_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ func TestCataloger_Catalog(t *testing.T) {
Version: "2.34-210",
PURL: "pkg:nix/glibc@2.34-210?output=bin&outputhash=h0cnbmfcn93xm5dg2x27ixhag1cwndga",
Locations: file.NewLocationSet(file.NewLocation("nix/store/h0cnbmfcn93xm5dg2x27ixhag1cwndga-glibc-2.34-210-bin")),
FoundBy: catalogerName,
FoundBy: CatalogerName,
Type: pkg.NixPkg,
MetadataType: pkg.NixStoreMetadataType,
Metadata: pkg.NixStoreMetadata{
Expand Down
2 changes: 1 addition & 1 deletion syft/pkg/cataloger/nix/package.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ func newNixStorePackage(storePath nixStorePath, locations ...file.Location) pkg.
p := pkg.Package{
Name: storePath.name,
Version: storePath.version,
FoundBy: catalogerName,
FoundBy: CatalogerName,
Locations: file.NewLocationSet(locations...),
Type: pkg.NixPkg,
PURL: packageURL(storePath),
Expand Down
55 changes: 55 additions & 0 deletions syft/pkg/cataloger/package_exclusions.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
package cataloger

import (
"golang.org/x/exp/slices"

"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/alpm"
"github.com/anchore/syft/syft/pkg/cataloger/apkdb"
"github.com/anchore/syft/syft/pkg/cataloger/binary"
"github.com/anchore/syft/syft/pkg/cataloger/deb"
"github.com/anchore/syft/syft/pkg/cataloger/nix"
"github.com/anchore/syft/syft/pkg/cataloger/rpm"
)

var (
osCatalogerTypes = []string{
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the filtering should be based on the package type, not the cataloger names.

apkdb.CatalogerName,
alpm.CatalogerName,
deb.CatalogerName,
nix.CatalogerName,
rpm.DBCatalogerName,
rpm.FileCatalogerName,
}
binaryCatalogerTypes = []string{binary.CatalogerName}
)

// Exclude will remove packages from a collection given the following properties are true
// 1) the relationship between packages is OwnershipByFileOverlap
// 2) the parent is an "os" package
// 3) the child is a synthetic package generated by the binary cataloger
// 4) the package names are identical
// This exclude was implemented as a way to help resolve: https://github.com/anchore/syft/issues/931
func Exclude(r artifact.Relationship, c *pkg.Collection) bool {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this function seems very specific, but has a very generic name. I think the name should probably be tweaked to be a little more specific.

if artifact.OwnershipByFileOverlapRelationship != r.Type {
return false
}

parent := c.Package(r.From.ID())
if parent == nil {
return false
}

parentInExclusion := slices.Contains(osCatalogerTypes, parent.FoundBy)
if !parentInExclusion {
return false
}

child := c.Package(r.To.ID())
if child == nil {
return false
}

return slices.Contains(binaryCatalogerTypes, child.FoundBy)
}
78 changes: 78 additions & 0 deletions syft/pkg/cataloger/package_exclusions_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
package cataloger

import (
"testing"

"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/apkdb"
"github.com/anchore/syft/syft/pkg/cataloger/binary"
)

func TestExclude(t *testing.T) {
packageA := pkg.Package{Name: "package-a", Type: pkg.ApkPkg, FoundBy: apkdb.CatalogerName}
packageB := pkg.Package{Name: "package-a", Type: pkg.PythonPkg, FoundBy: "language-cataloger"}
packageC := pkg.Package{Name: "package-a", Type: pkg.BinaryPkg, FoundBy: binary.CatalogerName}
packageD := pkg.Package{Name: "package-d", Type: pkg.BinaryPkg, FoundBy: binary.CatalogerName}
for _, p := range []*pkg.Package{&packageA, &packageB, &packageC, &packageD} {
p := p
p.SetID()
}

tests := []struct {
name string
relationship artifact.Relationship
packages *pkg.Collection
shouldExclude bool
}{
{
name: "no exclusions from os -> python",
relationship: artifact.Relationship{
Type: artifact.OwnershipByFileOverlapRelationship,
From: packageA,
To: packageB,
},
packages: pkg.NewCollection(packageA, packageB),
shouldExclude: false,
},
{
name: "exclusions from os -> binary",
relationship: artifact.Relationship{
Type: artifact.OwnershipByFileOverlapRelationship,
From: packageA,
To: packageC,
},
packages: pkg.NewCollection(packageA, packageC),
shouldExclude: true,
},
{
name: "no exclusions from python -> binary",
relationship: artifact.Relationship{
Type: artifact.OwnershipByFileOverlapRelationship,
From: packageB,
To: packageC,
},
packages: pkg.NewCollection(packageB, packageC),
shouldExclude: false,
},
{
name: "no exclusions for different package names",
relationship: artifact.Relationship{
Type: artifact.OwnershipByFileOverlapRelationship,
From: packageA,
To: packageD,
},
packages: pkg.NewCollection(packageA, packageD),
shouldExclude: false,
},
}

for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
if !Exclude(test.relationship, test.packages) && test.shouldExclude {
t.Errorf("expected to exclude relationship %+v", test.relationship)
}
})

}
}
Loading
Loading