Skip to content

Commit

Permalink
feat: add nodejs-binary package classifier (#1296)
Browse files Browse the repository at this point in the history
  • Loading branch information
spiffcs committed Oct 31, 2022
1 parent 919c929 commit edeba9c
Show file tree
Hide file tree
Showing 20 changed files with 209 additions and 12 deletions.
1 change: 1 addition & 0 deletions internal/mimetype_helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ var (
"application/x-elf",
"application/x-sharedlib",
"application/vnd.microsoft.portable-executable",
"application/x-executable",
}...,
)
)
Expand Down
6 changes: 4 additions & 2 deletions syft/file/classification_cataloger_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,8 @@ func TestClassifierCataloger_DefaultClassifiers_PositiveCases(t *testing.T) {
location: "[", // note: busybox is a link to [
expected: []Classification{
{
Class: "busybox-binary",
Class: "busybox-binary",
VirtualPath: "busybox",
Metadata: map[string]string{
"version": "3.33.3",
},
Expand Down Expand Up @@ -148,7 +149,8 @@ func TestClassifierCataloger_DefaultClassifiers_PositiveCases_Image(t *testing.T
location: "/bin/[",
expected: []Classification{
{
Class: "busybox-binary",
Class: "busybox-binary",
VirtualPath: "/bin/busybox",
Metadata: map[string]string{
"version": "1.35.0",
},
Expand Down
24 changes: 18 additions & 6 deletions syft/file/classifier.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,16 @@ var DefaultClassifiers = []Classifier{
`(?m)go(?P<version>[0-9]+\.[0-9]+(\.[0-9]+|beta[0-9]+|alpha[0-9]+|rc[0-9]+)?)`,
},
},
{
Class: "nodejs-binary",
FilepathPatterns: []*regexp.Regexp{
regexp.MustCompile(`(.*/|^)node$`),
},
EvidencePatternTemplates: []string{
// regex that matches node.js/vx.y.z
`(?m)node\.js\/v(?P<version>[0-9]+\.[0-9]+\.[0-9]+)`,
},
},
{
Class: "go-binary-hint",
FilepathPatterns: []*regexp.Regexp{
Expand Down Expand Up @@ -67,12 +77,13 @@ type Classifier struct {
}

type Classification struct {
Class string `json:"class"`
Metadata map[string]string `json:"metadata"`
Class string `json:"class"`
VirtualPath string `json:"virtual_path"`
Metadata map[string]string `json:"metadata"`
}

func (c Classifier) Classify(resolver source.FileResolver, location source.Location) (*Classification, error) {
doesFilepathMatch, filepathNamedGroupValues := filepathMatches(c.FilepathPatterns, location)
doesFilepathMatch, filepathNamedGroupValues := FilepathMatches(c.FilepathPatterns, location)
if !doesFilepathMatch {
return nil, nil
}
Expand Down Expand Up @@ -114,8 +125,9 @@ func (c Classifier) Classify(resolver source.FileResolver, location source.Locat
matchMetadata := internal.MatchNamedCaptureGroups(pattern, string(contents))
if result == nil {
result = &Classification{
Class: c.Class,
Metadata: matchMetadata,
Class: c.Class,
VirtualPath: location.VirtualPath,
Metadata: matchMetadata,
}
} else {
for key, value := range matchMetadata {
Expand All @@ -126,7 +138,7 @@ func (c Classifier) Classify(resolver source.FileResolver, location source.Locat
return result, nil
}

func filepathMatches(patterns []*regexp.Regexp, location source.Location) (bool, map[string]string) {
func FilepathMatches(patterns []*regexp.Regexp, location source.Location) (bool, map[string]string) {
for _, path := range []string{location.RealPath, location.VirtualPath} {
if path == "" {
continue
Expand Down
2 changes: 1 addition & 1 deletion syft/file/classifier_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ func TestFilepathMatches(t *testing.T) {
for _, p := range test.patterns {
patterns = append(patterns, regexp.MustCompile(p))
}
actualMatches, actualNamedGroups := filepathMatches(patterns, test.location)
actualMatches, actualNamedGroups := FilepathMatches(patterns, test.location)
assert.Equal(t, test.expectedMatches, actualMatches)
assert.Equal(t, test.expectedNamedGroups, actualNamedGroups)
})
Expand Down
8 changes: 8 additions & 0 deletions syft/formats/common/spdxhelpers/source_info_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,14 @@ func Test_SourceInfo(t *testing.T) {
"from cabal or stack manifest files",
},
},
{
input: pkg.Package{
Type: pkg.BinaryPkg,
},
expected: []string{
"acquired package info from the following paths",
},
},
}
var pkgTypes []pkg.Type
for _, test := range tests {
Expand Down
7 changes: 7 additions & 0 deletions syft/pkg/binary_metadata.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
package pkg

type BinaryMetadata struct {
Classifier string
RealPath string
VirtualPath string
}
4 changes: 2 additions & 2 deletions syft/pkg/cataloger/catalog.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,8 @@ func Catalog(resolver source.FileResolver, release *linux.Release, catalogers ..

for _, p := range packages {
// generate CPEs (note: this is excluded from package ID, so is safe to mutate)
p.CPEs = cpe.Generate(p)
// we might have binary classified CPE already with the package so we want to append here
p.CPEs = append(p.CPEs, cpe.Generate(p)...)

// generate PURL (note: this is excluded from package ID, so is safe to mutate)
p.PURL = pkg.URL(p, release)
Expand All @@ -85,7 +86,6 @@ func Catalog(resolver source.FileResolver, release *linux.Release, catalogers ..
} else {
allRelationships = append(allRelationships, owningRelationships...)
}

// add to catalog
catalog.Add(p)
}
Expand Down
3 changes: 3 additions & 0 deletions syft/pkg/cataloger/cataloger.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ func ImageCatalogers(cfg Config) []pkg.Cataloger {
python.NewPythonPackageCataloger(),
php.NewPHPComposerInstalledCataloger(),
javascript.NewJavascriptPackageCataloger(),
javascript.NewNodeBinaryCataloger(),
deb.NewDpkgdbCataloger(),
rpm.NewRpmdbCataloger(),
java.NewJavaCataloger(cfg.Java()),
Expand All @@ -58,6 +59,7 @@ func DirectoryCatalogers(cfg Config) []pkg.Cataloger {
python.NewPythonPackageCataloger(),
php.NewPHPComposerLockCataloger(),
javascript.NewJavascriptLockCataloger(),
javascript.NewNodeBinaryCataloger(),
deb.NewDpkgdbCataloger(),
rpm.NewRpmdbCataloger(),
rpm.NewFileCataloger(),
Expand Down Expand Up @@ -86,6 +88,7 @@ func AllCatalogers(cfg Config) []pkg.Cataloger {
python.NewPythonPackageCataloger(),
javascript.NewJavascriptLockCataloger(),
javascript.NewJavascriptPackageCataloger(),
javascript.NewNodeBinaryCataloger(),
deb.NewDpkgdbCataloger(),
rpm.NewRpmdbCataloger(),
rpm.NewFileCataloger(),
Expand Down
5 changes: 5 additions & 0 deletions syft/pkg/cataloger/common/cpe/generate.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,11 @@ func candidateVendors(p pkg.Package) []string {
vendors := newFieldCandidateSet(candidateProducts(p)...)

switch p.Language {
case pkg.JavaScript:
// for JavaScript if we find node.js as a package then the vendor is "nodejs"
if p.Name == "node.js" {
vendors.addValue("nodejs")
}
case pkg.Ruby:
vendors.addValue("ruby-lang")
case pkg.Go:
Expand Down
87 changes: 87 additions & 0 deletions syft/pkg/cataloger/generic/classifier.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
package generic

import (
"fmt"
"io"
"path"
"regexp"

"github.com/anchore/syft/internal"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/internal/unionreader"
"github.com/anchore/syft/syft/source"
)

// Classifier is a generic package classifier that can be used to match a package definition
// to a file that meets the given content criteria of the EvidencePatternTemplates.
type Classifier struct {
Package string
// FilepathPatterns is a list of regular expressions that will be used to match against the file path of a given
// source location. If any of the patterns match, the file will be considered a candidate for parsing.
// If no patterns are provided, the reader is automatically considered a candidate.
FilepathPatterns []*regexp.Regexp
// EvidencePattern is a list of regular expressions that will be used to match against the file contents of a
// given file in the source location. If any of the patterns match, the file will be considered a candidate for parsing.
EvidencePatterns []*regexp.Regexp
// CPE is the CPE we want to match against
CPEs []pkg.CPE
}

func (c Classifier) Examine(reader source.LocationReadCloser) (p *pkg.Package, r *artifact.Relationship, err error) {
doesFilepathMatch := true
if len(c.FilepathPatterns) > 0 {
doesFilepathMatch, _ = file.FilepathMatches(c.FilepathPatterns, reader.Location)
}

if !doesFilepathMatch {
return nil, nil, fmt.Errorf("location: %s did not match any patterns for package=%q", reader.Location, c.Package)
}

contents, err := getContents(reader)
if err != nil {
return nil, nil, fmt.Errorf("unable to get read contents for file: %+v", err)
}

var classifiedPackage *pkg.Package
for _, patternTemplate := range c.EvidencePatterns {
if !patternTemplate.Match(contents) {
continue
}

matchMetadata := internal.MatchNamedCaptureGroups(patternTemplate, string(contents))
if classifiedPackage == nil {
classifiedPackage = &pkg.Package{
Name: path.Base(reader.VirtualPath),
Version: matchMetadata["version"],
Language: pkg.Binary,
Locations: source.NewLocationSet(reader.Location),
Type: pkg.BinaryPkg,
CPEs: c.CPEs,
MetadataType: pkg.BinaryMetadataType,
Metadata: pkg.BinaryMetadata{
Classifier: c.Package,
RealPath: reader.RealPath,
VirtualPath: reader.VirtualPath,
},
}
break
}
}
return classifiedPackage, nil, nil
}

func getContents(reader source.LocationReadCloser) ([]byte, error) {
unionReader, err := unionreader.GetUnionReader(reader.ReadCloser)
if err != nil {
return nil, fmt.Errorf("unable to get union reader for file: %+v", err)
}

contents, err := io.ReadAll(unionReader)
if err != nil {
return nil, fmt.Errorf("unable to get contents for file: %+v", err)
}

return contents, nil
}
7 changes: 7 additions & 0 deletions syft/pkg/cataloger/javascript/cataloger.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,11 @@ import (
"path"
"strings"

"github.com/anchore/syft/internal"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/common"
"github.com/anchore/syft/syft/pkg/cataloger/generic"
"github.com/anchore/syft/syft/source"
)

Expand All @@ -35,6 +37,11 @@ func NewJavascriptLockCataloger() *common.GenericCataloger {
return common.NewGenericCataloger(nil, globParsers, "javascript-lock-cataloger", addLicenses)
}

func NewNodeBinaryCataloger() *generic.Cataloger {
return generic.NewCataloger("node-binary-cataloger").
WithParserByMimeTypes(parseNodeBinary, internal.ExecutableMIMETypeSet.List()...)
}

func addLicenses(resolver source.FileResolver, location source.Location, p *pkg.Package) error {
dir := path.Dir(location.RealPath)
pkgPath := []string{dir, "node_modules"}
Expand Down
43 changes: 43 additions & 0 deletions syft/pkg/cataloger/javascript/parse_node_binary.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
package javascript

import (
"regexp"

"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/generic"
"github.com/anchore/syft/syft/source"
)

var nodeClassifier = generic.Classifier{
Package: "node.js", // Note: this purposely matches the "node.js" string to aid nvd vuln matching
FilepathPatterns: []*regexp.Regexp{
// note: should we just parse all files resolved with executable mimetypes
// regexp that matches node binary
regexp.MustCompile(`(.*/|^)node$`),
},
EvidencePatterns: []*regexp.Regexp{
// regex that matches node.js/vx.y.z
regexp.MustCompile(`(?m)node\.js\/v(?P<version>[0-9]+\.[0-9]+\.[0-9]+)`),
},
CPEs: []pkg.CPE{
pkg.MustCPE("cpe:2.3:a:nodejs:node.js:*:*:*:*:*:*:*:*"),
},
}

func parseNodeBinary(_ source.FileResolver, _ *generic.Environment, reader source.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
p, _, err := nodeClassifier.Examine(reader)
if err != nil {
log.Trace("failed to find node.js package: %+v", err)
return nil, nil, nil // we can silently fail here to reduce warning noise
}

// TODO add node specific metadata to the packages to help with vulnerability matching
if p != nil {
p.Language = pkg.JavaScript
return []pkg.Package{*p}, nil, nil
}
p.SetID()
return nil, nil, nil
}
1 change: 1 addition & 0 deletions syft/pkg/language.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ const (
Swift Language = "swift"
CPP Language = "c++"
Haskell Language = "haskell"
Binary Language = "binary"
)

// AllLanguages is a set of all programming languages detected by syft.
Expand Down
3 changes: 3 additions & 0 deletions syft/pkg/metadata.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ const (
UnknownMetadataType MetadataType = "UnknownMetadata"
ApkMetadataType MetadataType = "ApkMetadata"
AlpmMetadataType MetadataType = "AlpmMetadata"
BinaryMetadataType MetadataType = "BinaryMetadata"
DpkgMetadataType MetadataType = "DpkgMetadata"
GemMetadataType MetadataType = "GemMetadata"
JavaMetadataType MetadataType = "JavaMetadata"
Expand All @@ -35,6 +36,7 @@ const (
var AllMetadataTypes = []MetadataType{
ApkMetadataType,
AlpmMetadataType,
BinaryMetadataType,
DpkgMetadataType,
GemMetadataType,
JavaMetadataType,
Expand All @@ -57,6 +59,7 @@ var AllMetadataTypes = []MetadataType{
var MetadataTypeByName = map[MetadataType]reflect.Type{
ApkMetadataType: reflect.TypeOf(ApkMetadata{}),
AlpmMetadataType: reflect.TypeOf(AlpmMetadata{}),
BinaryMetadataType: reflect.TypeOf(BinaryMetadata{}),
DpkgMetadataType: reflect.TypeOf(DpkgMetadata{}),
GemMetadataType: reflect.TypeOf(GemMetadata{}),
JavaMetadataType: reflect.TypeOf(JavaMetadata{}),
Expand Down
2 changes: 2 additions & 0 deletions syft/pkg/type.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ type Type string
const (
// the full set of supported packages
UnknownPkg Type = "UnknownPackage"
BinaryPkg Type = "binary"
ApkPkg Type = "apk"
AlpmPkg Type = "alpm"
GemPkg Type = "gem"
Expand All @@ -33,6 +34,7 @@ const (
var AllPkgs = []Type{
ApkPkg,
AlpmPkg,
BinaryPkg,
GemPkg,
DebPkg,
RpmPkg,
Expand Down
4 changes: 3 additions & 1 deletion syft/pkg/type_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,12 @@ func TestTypeFromPURL(t *testing.T) {
expectedTypes.Add(string(ty))
}

// testing microsoft packages and jenkins-plugins is not valid for purl at this time
// testing microsoft packages and jenkins-plugins and custom binary type
// is not valid for purl at this time
expectedTypes.Remove(string(KbPkg))
expectedTypes.Remove(string(JenkinsPluginPkg))
expectedTypes.Remove(string(PortagePkg))
expectedTypes.Remove(string(BinaryPkg))

for _, test := range tests {
t.Run(string(test.expected), func(t *testing.T) {
Expand Down
1 change: 1 addition & 0 deletions syft/pkg/url_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ func TestPackageURL(t *testing.T) {
expectedTypes.Remove(string(DebPkg))
expectedTypes.Remove(string(GoModulePkg))
expectedTypes.Remove(string(HackagePkg))
expectedTypes.Remove(string(BinaryPkg))

for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
Expand Down
Loading

0 comments on commit edeba9c

Please sign in to comment.