Skip to content

Commit

Permalink
feat(nodejs): parse licenses in yarn projects (#4652)
Browse files Browse the repository at this point in the history
* feat(nodejs): parse licenses in yarn projects

* close the zip file

* use fsutils.WalkDir

* refactor: extract traverseFunc

* update tests

* update required

* improve required fn

* handle error

* fix required

* fix required

* fix required

* update test

* fix after review

* simplify test data

* fix path

* rename fn

* update docs

* update docs

* simplify required fn

* skip an empty license

* improve required

* improve required

* update golden

* classify license file

* fix path

* fix path

* improve license parsing from cache

* classify the license file from zip

* refactor

* refactor

* fix lint

* fix after review

* fix test

* mv files

* mv files

* fix dbg message

* refactor: use zip.Reader as fs.FS

* refactor: pass io.Reader

* refactor: use fs.Sub

* refactor: add a struct for license traversing

* refactor: use lo.Some

* feat: bump the yarn analyzer version

* go mod tidy

* fix: sort imports

* use multierror

---------

Co-authored-by: knqyf263 <knqyf263@gmail.com>
  • Loading branch information
nikpivkin and knqyf263 committed Aug 23, 2023
1 parent 3114c87 commit ec5d8be
Show file tree
Hide file tree
Showing 46 changed files with 1,525 additions and 47 deletions.
3 changes: 2 additions & 1 deletion docs/docs/coverage/language/nodejs.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ By default, Trivy doesn't report development dependencies. Use the `--include-de

### Yarn
Trivy parses `yarn.lock`, which doesn't contain information about development dependencies.
To exclude devDependencies, `package.json` also needs to be present next to `yarn.lock`.
To exclude devDependencies, `package.json` also needs to be present next to `yarn.lock`.
Trivy analyzes `.yarn` (Yarn 2+) or `node_modules` (Yarn Classic) folder next to the yarn.lock file to detect licenses.

By default, Trivy doesn't report development dependencies. Use the `--include-dev-deps` flag to include them.

Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions integration/testdata/yarn.json.golden
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@
"ID": "jquery@3.2.1",
"Name": "jquery",
"Version": "3.2.1",
"Licenses": [
"MIT"
],
"Layer": {},
"Locations": [
{
Expand Down
9 changes: 7 additions & 2 deletions pkg/fanal/analyzer/language/analyze.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"github.com/aquasecurity/trivy/pkg/fanal/types"
"github.com/aquasecurity/trivy/pkg/licensing"
"github.com/aquasecurity/trivy/pkg/log"
xio "github.com/aquasecurity/trivy/pkg/x/io"
)

// Analyze returns an analysis result of the lock file
Expand Down Expand Up @@ -44,8 +45,12 @@ func AnalyzePackage(fileType, filePath string, r dio.ReadSeekerAt, parser godept
}

// Parse returns a parsed result of the lock file
func Parse(fileType, filePath string, r dio.ReadSeekerAt, parser godeptypes.Parser) (*types.Application, error) {
parsedLibs, parsedDependencies, err := parser.Parse(r)
func Parse(fileType, filePath string, r io.Reader, parser godeptypes.Parser) (*types.Application, error) {
rr, err := xio.NewReadSeekerAt(r)
if err != nil {
return nil, xerrors.Errorf("reader error: %w", err)
}
parsedLibs, parsedDependencies, err := parser.Parse(rr)
if err != nil {
return nil, xerrors.Errorf("failed to parse %s: %w", filePath, err)
}
Expand Down
6 changes: 2 additions & 4 deletions pkg/fanal/analyzer/language/golang/mod/mod.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ func (a *gomodAnalyzer) PostAnalyze(_ context.Context, input analyzer.PostAnalys
return filepath.Base(path) == types.GoMod
}

err := fsutils.WalkDir(input.FS, ".", required, func(path string, d fs.DirEntry, r dio.ReadSeekerAt) error {
err := fsutils.WalkDir(input.FS, ".", required, func(path string, d fs.DirEntry, _ io.Reader) error {
// Parse go.mod
gomod, err := parse(input.FS, path, a.modParser)
if err != nil {
Expand Down Expand Up @@ -298,9 +298,7 @@ func findLicense(dir string, classifierConfidenceLevel float64) ([]string, error
return nil, nil
}

return lo.Map(license.Findings, func(finding types.LicenseFinding, _ int) string {
return finding.Name
}), nil
return license.Findings.Names(), nil
}

// normalizeModName escapes upper characters
Expand Down
105 changes: 105 additions & 0 deletions pkg/fanal/analyzer/language/nodejs/license/license.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
package license

import (
"errors"
"io"
"io/fs"
"path"
"strings"

"golang.org/x/xerrors"

"github.com/aquasecurity/go-dep-parser/pkg/nodejs/packagejson"
"github.com/aquasecurity/trivy/pkg/fanal/types"
"github.com/aquasecurity/trivy/pkg/licensing"
"github.com/aquasecurity/trivy/pkg/log"
"github.com/aquasecurity/trivy/pkg/utils/fsutils"
)

type License struct {
parser *packagejson.Parser
classifierConfidenceLevel float64
}

func NewLicense(classifierConfidenceLevel float64) *License {
return &License{
parser: packagejson.NewParser(),
classifierConfidenceLevel: classifierConfidenceLevel,
}
}

func (l *License) Traverse(fsys fs.FS, root string) (map[string][]string, error) {
licenses := map[string][]string{}
walkDirFunc := func(pkgJSONPath string, d fs.DirEntry, r io.Reader) error {
pkg, err := l.parser.Parse(r)
if err != nil {
return xerrors.Errorf("unable to parse %q: %w", pkgJSONPath, err)
}

ok, licenseFileName := IsLicenseRefToFile(pkg.License)
if !ok {
licenses[pkg.ID] = []string{pkg.License}
return nil
}

log.Logger.Debugf("License names are missing in %q, an attempt to find them in the %q file", pkgJSONPath, licenseFileName)
licenseFilePath := path.Join(path.Dir(pkgJSONPath), licenseFileName)

if findings, err := classifyLicense(licenseFilePath, l.classifierConfidenceLevel, fsys); err != nil {
return xerrors.Errorf("unable to classify the license: %w", err)
} else if len(findings) > 0 {
// License found
licenses[pkg.ID] = findings.Names()
} else {
log.Logger.Debugf("The license file %q was not found or the license could not be classified", licenseFilePath)
}
return nil
}
if err := fsutils.WalkDir(fsys, root, fsutils.RequiredFile(types.NpmPkg), walkDirFunc); err != nil {
return nil, xerrors.Errorf("walk error: %w", err)
}

return licenses, nil
}

// IsLicenseRefToFile The license field can refer to a file
// https://docs.npmjs.com/cli/v9/configuring-npm/package-json
func IsLicenseRefToFile(maybeLicense string) (bool, string) {
if maybeLicense == "" {
// trying to find at least the LICENSE file
return true, "LICENSE"
}

var licenseFileName string
if strings.HasPrefix(maybeLicense, "LicenseRef-") {
// LicenseRef-<filename>
licenseFileName = strings.Split(maybeLicense, "-")[1]
} else if strings.HasPrefix(maybeLicense, "SEE LICENSE IN ") {
// SEE LICENSE IN <filename>
parts := strings.Split(maybeLicense, " ")
licenseFileName = parts[len(parts)-1]
}

return licenseFileName != "", licenseFileName
}

func classifyLicense(filePath string, classifierConfidenceLevel float64, fsys fs.FS) (types.LicenseFindings, error) {
f, err := fsys.Open(filePath)
if errors.Is(err, fs.ErrNotExist) {
return nil, nil
} else if err != nil {
return nil, xerrors.Errorf("file open error: %w", err)
}
defer f.Close()

l, err := licensing.Classify(filePath, f, classifierConfidenceLevel)
if err != nil {
return nil, xerrors.Errorf("license classify error: %w", err)
}

if l == nil {
return nil, nil
}

return l.Findings, nil
}
98 changes: 98 additions & 0 deletions pkg/fanal/analyzer/language/nodejs/license/license_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
package license_test

import (
"path/filepath"
"testing"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"

"github.com/aquasecurity/trivy/pkg/fanal/analyzer/language/nodejs/license"
"github.com/aquasecurity/trivy/pkg/mapfs"
)

func Test_ParseLicenses(t *testing.T) {
tests := []struct {
name string
dir string
want map[string][]string
wantErr string
}{
{
name: "happy",
dir: filepath.Join("testdata", "happy"),
want: map[string][]string{
"package-a@0.0.1": {"CC-BY-SA-4.0"},
"package-b@0.0.1": {"MIT"},
"package-c@0.0.1": {"BSD-3-Clause"},
"package-d@0.0.1": {"BSD-3-Clause"},
"package-e@0.0.1": {"(GPL-3.0 OR LGPL-3.0 OR MPL-1.1 OR SEE LICENSE IN LICENSE)"},
},
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
fsys := mapfs.New()
require.NoError(t, fsys.CopyFilesUnder(tt.dir))

l := license.NewLicense(0.9)
licenses, err := l.Traverse(fsys, ".")
if tt.wantErr != "" {
assert.ErrorContainsf(t, err, tt.wantErr, tt.name)
return
}
require.NoError(t, err)
assert.Equal(t, tt.want, licenses)
})
}
}

func Test_IsLicenseRefToFile(t *testing.T) {
tests := []struct {
name string
input string
wantOk bool
wantFileName string
}{
{
name: "no ref to file",
input: "MIT",
},
{
name: "empty input",
wantOk: true,
wantFileName: "LICENSE",
},
{
name: "happy `SEE LICENSE IN`",
input: "SEE LICENSE IN LICENSE.md",
wantOk: true,
wantFileName: "LICENSE.md",
},
{
name: "sad `SEE LICENSE IN`",
input: "SEE LICENSE IN ",
wantOk: false,
},
{
name: "happy `LicenseRef-`",
input: "LicenseRef-LICENSE.txt",
wantOk: true,
wantFileName: "LICENSE.txt",
},
{
name: "sad `LicenseRef-`",
input: "LicenseRef-",
wantOk: false,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
ok, licenseFileName := license.IsLicenseRefToFile(tt.input)
assert.Equal(t, ok, tt.wantOk)
assert.Equal(t, licenseFileName, tt.wantFileName)
})
}
}
Loading

0 comments on commit ec5d8be

Please sign in to comment.