Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(python): parse licenses from dist-info folder #4724

Merged
merged 20 commits into from
Jan 4, 2024
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
157 changes: 127 additions & 30 deletions pkg/fanal/analyzer/language/python/packaging/packaging.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,25 +4,41 @@ import (
"archive/zip"
"bytes"
"context"
"errors"
"io"
"io/fs"
"os"
"path"
"path/filepath"
"strings"

"github.com/samber/lo"
"golang.org/x/xerrors"

dio "github.com/aquasecurity/go-dep-parser/pkg/io"
"github.com/aquasecurity/go-dep-parser/pkg/python/packaging"
godeptypes "github.com/aquasecurity/go-dep-parser/pkg/types"
"github.com/aquasecurity/trivy/pkg/fanal/analyzer"
"github.com/aquasecurity/trivy/pkg/fanal/analyzer/language"
"github.com/aquasecurity/trivy/pkg/fanal/types"
"github.com/aquasecurity/trivy/pkg/licensing"
"github.com/aquasecurity/trivy/pkg/log"
"github.com/aquasecurity/trivy/pkg/utils/fsutils"
)

func init() {
analyzer.RegisterAnalyzer(&packagingAnalyzer{})
analyzer.RegisterPostAnalyzer(analyzer.TypePythonPkg, newPackagingAnalyzer)
}

const version = 1

func newPackagingAnalyzer(opt analyzer.AnalyzerOptions) (analyzer.PostAnalyzer, error) {
return &packagingAnalyzer{
pkgParser: packaging.NewParser(),
licenseClassifierConfidenceLevel: opt.LicenseScannerOption.ClassifierConfidenceLevel,
}, nil
}

var (
requiredFiles = []string{
// .egg format
Expand All @@ -34,35 +50,117 @@ var (
// https://setuptools.readthedocs.io/en/latest/deprecated/python_eggs.html#eggs-and-their-formats
".egg-info",
".egg-info/PKG-INFO",

// wheel
".dist-info/METADATA",
}
)

type packagingAnalyzer struct{}
type packagingAnalyzer struct {
pkgParser godeptypes.Parser
licenseClassifierConfidenceLevel float64
}

// Analyze analyzes egg and wheel files.
DmitriyLewen marked this conversation as resolved.
Show resolved Hide resolved
func (a packagingAnalyzer) Analyze(_ context.Context, input analyzer.AnalysisInput) (*analyzer.AnalysisResult, error) {
r := input.Content
func (a packagingAnalyzer) PostAnalyze(_ context.Context, input analyzer.PostAnalysisInput) (*analyzer.AnalysisResult, error) {

var apps []types.Application

required := func(path string, _ fs.DirEntry) bool {
return strings.Contains(path, ".dist-info") || required(path)
DmitriyLewen marked this conversation as resolved.
Show resolved Hide resolved
}

err := fsutils.WalkDir(input.FS, ".", required, func(path string, d fs.DirEntry, r dio.ReadSeekerAt) error {

// .egg file is zip format and PKG-INFO needs to be extracted from the zip file.
if strings.HasSuffix(path, ".egg") {
info, _ := d.Info()
pkginfoInZip, err := a.analyzeEggZip(r, info.Size())
if err != nil {
return xerrors.Errorf("egg analysis error: %w", err)
}

// .egg file is zip format and PKG-INFO needs to be extracted from the zip file.
if strings.HasSuffix(input.FilePath, ".egg") {
pkginfoInZip, err := a.analyzeEggZip(input.Content, input.Info.Size())
// Egg archive may not contain required files, then we will get nil. Skip this archives
if pkginfoInZip == nil {
return nil
}

r = pkginfoInZip
}
app, err := a.parse(path, r)
if err != nil {
return nil, xerrors.Errorf("egg analysis error: %w", err)
return xerrors.Errorf("parse error: %w", err)
} else if app == nil {
return nil
}

// Egg archive may not contain required files, then we will get nil. Skip this archives
if pkginfoInZip == nil {
return nil, nil
if err := a.fillAdditionalData(input.FS, path, app); err != nil {
log.Logger.Warnf("Unable to collect additional info: %s", err)
}

apps = append(apps, *app)
return nil
})

if err != nil {
return nil, xerrors.Errorf("python package walk error: %w", err)
}
return &analyzer.AnalysisResult{
Applications: apps,
}, nil
}

func (a packagingAnalyzer) fillAdditionalData(fsys fs.FS, filePath string, app *types.Application) error {

if len(app.Libraries) > 0 {
var licenses []string
for _, lic := range app.Libraries[0].Licenses {
if !strings.HasPrefix(lic, "file://") {
DmitriyLewen marked this conversation as resolved.
Show resolved Hide resolved
licenses = append(licenses, lic)
continue
}
licenseFielPath := filepath.Base(strings.TrimPrefix(lic, "file://"))
DmitriyLewen marked this conversation as resolved.
Show resolved Hide resolved

findings, err := classifyLicense(filePath, licenseFielPath, a.licenseClassifierConfidenceLevel, fsys)
if err != nil {
return err
}
// License found
if len(findings) > 0 {
foundLicenses := lo.Map(findings, func(finding types.LicenseFinding, _ int) string {
return finding.Name
})
licenses = append(licenses, foundLicenses...)
}
}

r = pkginfoInZip
app.Libraries[0].Licenses = licenses
}
return nil
}

func classifyLicense(dir string, licPath string, classifierConfidenceLevel float64, fsys fs.FS) (types.LicenseFindings, error) {
// Note that fs.FS is always slashed regardless of the platform,
// and path.Join should be used rather than filepath.Join.
f, err := fsys.Open(path.Join(filepath.Dir(dir), licPath))
if errors.Is(err, fs.ErrNotExist) {
return nil, nil
} else if err != nil {
return nil, xerrors.Errorf("file open error: %w", err)
}
defer f.Close()

l, err := licensing.Classify(licPath, f, classifierConfidenceLevel)
if err != nil {
return nil, xerrors.Errorf("license classify error: %w", err)
}

if l == nil {
return nil, nil
}

p := packaging.NewParser()
return language.AnalyzePackage(types.PythonPkg, input.FilePath, r, p, input.Options.FileChecksum)
return l.Findings, nil
}

func (a packagingAnalyzer) parse(path string, r dio.ReadSeekerAt) (*types.Application, error) {
return language.Parse(types.PythonPkg, path, r, a.pkgParser)
DmitriyLewen marked this conversation as resolved.
Show resolved Hide resolved
}

func (a packagingAnalyzer) analyzeEggZip(r io.ReaderAt, size int64) (dio.ReadSeekerAt, error) {
Expand All @@ -71,14 +169,12 @@ func (a packagingAnalyzer) analyzeEggZip(r io.ReaderAt, size int64) (dio.ReadSee
return nil, xerrors.Errorf("zip reader error: %w", err)
}

for _, file := range zr.File {
if !a.Required(file.Name, nil) {
continue
}

return a.open(file)
finded, ok := lo.Find(zr.File, func(f *zip.File) bool {
return required(f.Name)
})
if ok {
return a.open(finded)
}

return nil, nil
}

Expand All @@ -98,12 +194,13 @@ func (a packagingAnalyzer) open(file *zip.File) (dio.ReadSeekerAt, error) {
}

func (a packagingAnalyzer) Required(filePath string, _ os.FileInfo) bool {
for _, r := range requiredFiles {
if strings.HasSuffix(filePath, r) {
return true
}
}
return false
return strings.Contains(filePath, ".dist-info") || required(filePath)
DmitriyLewen marked this conversation as resolved.
Show resolved Hide resolved
}

func required(filePath string) bool {
DmitriyLewen marked this conversation as resolved.
Show resolved Hide resolved
return lo.SomeBy(requiredFiles, func(fileName string) bool {
return strings.HasSuffix(filePath, fileName)
})
}

func (a packagingAnalyzer) Type() analyzer.Type {
Expand Down
Loading