Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(python): parse licenses from dist-info folder #4724

Merged
merged 20 commits into from
Jan 4, 2024
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
166 changes: 134 additions & 32 deletions pkg/fanal/analyzer/language/python/packaging/packaging.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,27 +4,43 @@ import (
"archive/zip"
"bytes"
"context"
"errors"
"io"
"io/fs"
"os"
"path"
"path/filepath"
"strings"

"github.com/samber/lo"
"golang.org/x/xerrors"

dio "github.com/aquasecurity/go-dep-parser/pkg/io"
"github.com/aquasecurity/go-dep-parser/pkg/python/packaging"
godeptypes "github.com/aquasecurity/go-dep-parser/pkg/types"
"github.com/aquasecurity/trivy/pkg/fanal/analyzer"
"github.com/aquasecurity/trivy/pkg/fanal/analyzer/language"
"github.com/aquasecurity/trivy/pkg/fanal/types"
"github.com/aquasecurity/trivy/pkg/licensing"
"github.com/aquasecurity/trivy/pkg/log"
"github.com/aquasecurity/trivy/pkg/utils/fsutils"
)

func init() {
analyzer.RegisterAnalyzer(&packagingAnalyzer{})
analyzer.RegisterPostAnalyzer(analyzer.TypePythonPkg, newPackagingAnalyzer)
}

const version = 1

func newPackagingAnalyzer(opt analyzer.AnalyzerOptions) (analyzer.PostAnalyzer, error) {
return &packagingAnalyzer{
pkgParser: packaging.NewParser(),
licenseClassifierConfidenceLevel: opt.LicenseScannerOption.ClassifierConfidenceLevel,
}, nil
}

var (
requiredFiles = []string{
eggFiles = []string{
// .egg format
// https://setuptools.readthedocs.io/en/latest/deprecated/python_eggs.html#eggs-and-their-formats
".egg", // zip format
Expand All @@ -34,35 +50,122 @@ var (
// https://setuptools.readthedocs.io/en/latest/deprecated/python_eggs.html#eggs-and-their-formats
".egg-info",
".egg-info/PKG-INFO",

// wheel
".dist-info/METADATA",
}
)

type packagingAnalyzer struct{}
type packagingAnalyzer struct {
pkgParser godeptypes.Parser
licenseClassifierConfidenceLevel float64
}

// PostAnalyze analyzes egg and wheel files.
func (a packagingAnalyzer) PostAnalyze(_ context.Context, input analyzer.PostAnalysisInput) (*analyzer.AnalysisResult, error) {

var apps []types.Application

required := func(path string, _ fs.DirEntry) bool {
return filepath.Base(path) == "METADATA" || isEggFile(path)
}

err := fsutils.WalkDir(input.FS, ".", required, func(path string, d fs.DirEntry, r dio.ReadSeekerAt) error {

// .egg file is zip format and PKG-INFO needs to be extracted from the zip file.
if strings.HasSuffix(path, ".egg") {
info, _ := d.Info()
pkginfoInZip, err := a.analyzeEggZip(r, info.Size())
if err != nil {
return xerrors.Errorf("egg analysis error: %w", err)
}

// Analyze analyzes egg and wheel files.
func (a packagingAnalyzer) Analyze(_ context.Context, input analyzer.AnalysisInput) (*analyzer.AnalysisResult, error) {
r := input.Content
// Egg archive may not contain required files, then we will get nil. Skip this archives
if pkginfoInZip == nil {
return nil
}

// .egg file is zip format and PKG-INFO needs to be extracted from the zip file.
if strings.HasSuffix(input.FilePath, ".egg") {
pkginfoInZip, err := a.analyzeEggZip(input.Content, input.Info.Size())
r = pkginfoInZip
}
app, err := a.parse(path, r)
if err != nil {
return nil, xerrors.Errorf("egg analysis error: %w", err)
return xerrors.Errorf("parse error: %w", err)
} else if app == nil {
return nil
}

// Egg archive may not contain required files, then we will get nil. Skip this archives
if pkginfoInZip == nil {
return nil, nil
apps = append(apps, *app)
return nil
})

for i := range apps {
if err := a.fillAdditionalData(input.FS, &apps[i]); err != nil {
log.Logger.Warnf("Unable to collect additional info: %s", err)
}
}
DmitriyLewen marked this conversation as resolved.
Show resolved Hide resolved

r = pkginfoInZip
if err != nil {
return nil, xerrors.Errorf("python package walk error: %w", err)
}
return &analyzer.AnalysisResult{
Applications: apps,
}, nil
}

p := packaging.NewParser()
return language.AnalyzePackage(types.PythonPkg, input.FilePath, r, p, input.Options.FileChecksum)
func (a packagingAnalyzer) fillAdditionalData(fsys fs.FS, app *types.Application) error {

if len(app.Libraries) > 0 {
var licenses []string
for _, lic := range app.Libraries[0].Licenses {
// Parser adds `file://` prefix to filepath from `License-File` field
// We need to read this file to find licenses
// Otherwise, this is the name of the license
if !strings.HasPrefix(lic, "file://") {
DmitriyLewen marked this conversation as resolved.
Show resolved Hide resolved
licenses = append(licenses, lic)
continue
}
licenseFielPath := filepath.Base(strings.TrimPrefix(lic, "file://"))
DmitriyLewen marked this conversation as resolved.
Show resolved Hide resolved

findings, err := classifyLicense(app.FilePath, licenseFielPath, a.licenseClassifierConfidenceLevel, fsys)
if err != nil {
return err
}
// License found
if len(findings) > 0 {
foundLicenses := lo.Map(findings, func(finding types.LicenseFinding, _ int) string {
return finding.Name
})
licenses = append(licenses, foundLicenses...)
}
}

app.Libraries[0].Licenses = licenses
}
return nil
}

func classifyLicense(dir string, licPath string, classifierConfidenceLevel float64, fsys fs.FS) (types.LicenseFindings, error) {
// Note that fs.FS is always slashed regardless of the platform,
// and path.Join should be used rather than filepath.Join.
f, err := fsys.Open(path.Join(filepath.Dir(dir), licPath))
if errors.Is(err, fs.ErrNotExist) {
return nil, nil
} else if err != nil {
return nil, xerrors.Errorf("file open error: %w", err)
}
defer f.Close()

l, err := licensing.Classify(licPath, f, classifierConfidenceLevel)
if err != nil {
return nil, xerrors.Errorf("license classify error: %w", err)
}

if l == nil {
return nil, nil
}

return l.Findings, nil
}

func (a packagingAnalyzer) parse(path string, r dio.ReadSeekerAt) (*types.Application, error) {
return language.Parse(types.PythonPkg, path, r, a.pkgParser)
DmitriyLewen marked this conversation as resolved.
Show resolved Hide resolved
}

func (a packagingAnalyzer) analyzeEggZip(r io.ReaderAt, size int64) (dio.ReadSeekerAt, error) {
Expand All @@ -71,14 +174,12 @@ func (a packagingAnalyzer) analyzeEggZip(r io.ReaderAt, size int64) (dio.ReadSee
return nil, xerrors.Errorf("zip reader error: %w", err)
}

for _, file := range zr.File {
if !a.Required(file.Name, nil) {
continue
}

return a.open(file)
finded, ok := lo.Find(zr.File, func(f *zip.File) bool {
return isEggFile(f.Name)
})
if ok {
return a.open(finded)
}

return nil, nil
}

Expand All @@ -98,12 +199,13 @@ func (a packagingAnalyzer) open(file *zip.File) (dio.ReadSeekerAt, error) {
}

func (a packagingAnalyzer) Required(filePath string, _ os.FileInfo) bool {
for _, r := range requiredFiles {
if strings.HasSuffix(filePath, r) {
return true
}
}
return false
return strings.Contains(filePath, ".dist-info") || isEggFile(filePath)
}

func isEggFile(filePath string) bool {
return lo.SomeBy(eggFiles, func(fileName string) bool {
return strings.HasSuffix(filePath, fileName)
})
}

func (a packagingAnalyzer) Type() analyzer.Type {
Expand Down
Loading