Skip to content

Commit

Permalink
add flag to toggle getting deps.dev dependencies (#1382)
Browse files Browse the repository at this point in the history
* add flag to toggle getting deps.dev dependencies

Signed-off-by: Marco Deicas <mdeicas@google.com>

* respond to comments

Signed-off-by: Marco Deicas <mdeicas@google.com>

* move retrieve-dependencies flag

Signed-off-by: Marco Deicas <mdeicas@google.com>

* make concurrent calls properly

Signed-off-by: Marco Deicas <mdeicas@google.com>

---------

Signed-off-by: Marco Deicas <mdeicas@google.com>
  • Loading branch information
mdeicas committed Oct 13, 2023
1 parent d18327b commit c225a8e
Show file tree
Hide file tree
Showing 6 changed files with 285 additions and 31 deletions.
1 change: 1 addition & 0 deletions cmd/README.md
Expand Up @@ -59,6 +59,7 @@ services:
- colsub addr
- collector/certifier name
- polling options
- flag to toggle retrieving deps

## Collectors and Certifiers

Expand Down
19 changes: 17 additions & 2 deletions cmd/guaccollect/cmd/deps_dev.go
Expand Up @@ -21,6 +21,7 @@ import (
"os"
"time"

"github.com/guacsec/guac/pkg/cli"
"github.com/guacsec/guac/pkg/collectsub/client"
csubclient "github.com/guacsec/guac/pkg/collectsub/client"
"github.com/guacsec/guac/pkg/collectsub/datasource"
Expand All @@ -40,6 +41,8 @@ type depsDevOptions struct {
natsAddr string
// run as poll collector
poll bool
// query for dependencies
retrieveDependencies bool
}

var depsDevCmd = &cobra.Command{
Expand All @@ -57,6 +60,7 @@ var depsDevCmd = &cobra.Command{
viper.GetBool("csub-tls-skip-verify"),
viper.GetBool("use-csub"),
viper.GetBool("service-poll"),
viper.GetBool("retrieve-dependencies"),
args)
if err != nil {
fmt.Printf("unable to validate flags: %v\n", err)
Expand All @@ -65,7 +69,7 @@ var depsDevCmd = &cobra.Command{
}

// Register collector
depsDevCollector, err := deps_dev.NewDepsCollector(ctx, opts.dataSource, opts.poll, 30*time.Second)
depsDevCollector, err := deps_dev.NewDepsCollector(ctx, opts.dataSource, opts.poll, opts.retrieveDependencies, 30*time.Second)
if err != nil {
logger.Errorf("unable to register oci collector: %v", err)
}
Expand All @@ -78,10 +82,11 @@ var depsDevCmd = &cobra.Command{
},
}

func validateDepsDevFlags(natsAddr string, csubAddr string, csubTls bool, csubTlsSkipVerify bool, useCsub bool, poll bool, args []string) (depsDevOptions, error) {
func validateDepsDevFlags(natsAddr string, csubAddr string, csubTls bool, csubTlsSkipVerify bool, useCsub bool, poll bool, retrieveDependencies bool, args []string) (depsDevOptions, error) {
var opts depsDevOptions
opts.natsAddr = natsAddr
opts.poll = poll
opts.retrieveDependencies = retrieveDependencies

if useCsub {
csubOpts, err := client.ValidateCsubClientFlags(csubAddr, csubTls, csubTlsSkipVerify)
Expand Down Expand Up @@ -119,5 +124,15 @@ func validateDepsDevFlags(natsAddr string, csubAddr string, csubTls bool, csubTl
}

func init() {
set, err := cli.BuildFlags([]string{"retrieve-dependencies"})
if err != nil {
fmt.Fprintf(os.Stderr, "failed to setup flag: %v", err)
os.Exit(1)
}
depsDevCmd.PersistentFlags().AddFlagSet(set)
if err := viper.BindPFlags(depsDevCmd.PersistentFlags()); err != nil {
fmt.Fprintf(os.Stderr, "failed to bind flags: %v", err)
os.Exit(1)
}
rootCmd.AddCommand(depsDevCmd)
}
80 changes: 80 additions & 0 deletions internal/testing/testdata/testdata.go
Expand Up @@ -1981,6 +1981,86 @@ var (
"UpdateTime":"2022-11-21T17:45:50.52Z"
}`

CollectedForeignTypesNoDeps = `{
"CurrentPackage":{
"name":"foreign-types",
"namespace":"",
"qualifiers":null,
"subpath":"",
"type":"cargo",
"version":"0.3.2"
},
"Scorecard":{
"aggregateScore":4.599999904632568,
"checks":[
{
"check":"Maintained",
"score":5
},
{
"check":"CII-Best-Practices",
"score":0
},
{
"check":"Signed-Releases",
"score":-1
},
{
"check":"Packaging",
"score":-1
},
{
"check":"Dangerous-Workflow",
"score":10
},
{
"check":"Binary-Artifacts",
"score":10
},
{
"check":"Token-Permissions",
"score":0
},
{
"check":"Pinned-Dependencies",
"score":7
},
{
"check":"Fuzzing",
"score":0
},
{
"check":"Vulnerabilities",
"score":10
},
{
"check":"Branch-Protection",
"score":0
},
{
"check":"License",
"score":10
},
{
"check":"Security-Policy",
"score":0
}
],
"collector":"",
"origin":"",
"scorecardCommit":"6c5de2c32a4b8f60211e8e8eb94f8d3370a11b93",
"scorecardVersion":"v4.10.5-77-g6c5de2c",
"timeScanned":"2022-11-21T17:45:50.52Z"
},
"Source":{
"commit":null,
"name":"foreign-types",
"namespace":"github.com/sfackler",
"tag":null,
"type":"git"
},
"UpdateTime":"2022-11-21T17:45:50.52Z"
}`
CollectedForeignTypes = `{
"CurrentPackage":{
"name":"foreign-types",
Expand Down
3 changes: 3 additions & 0 deletions pkg/cli/store.go
Expand Up @@ -75,6 +75,9 @@ func init() {

set.Bool("service-poll", true, "sets the collector or certifier to polling mode")
set.BoolP("poll", "p", false, "sets the collector or certifier to polling mode")

set.Bool("retrieve-dependencies", true, "enable the deps.dev collector to retrieve package dependencies")

set.StringP("interval", "i", "5m", "if polling set interval, m, h, s, etc.")

set.BoolP("cert-good", "g", false, "enable to certifyGood, otherwise defaults to certifyBad")
Expand Down
173 changes: 154 additions & 19 deletions pkg/handler/collector/deps_dev/deps_dev.go
Expand Up @@ -24,6 +24,7 @@ import (
"time"

jsoniter "github.com/json-iterator/go"
"golang.org/x/exp/maps"

model "github.com/guacsec/guac/pkg/assembler/clients/generated"
"github.com/guacsec/guac/pkg/assembler/helpers"
Expand Down Expand Up @@ -62,18 +63,19 @@ type PackageComponent struct {
}

type depsCollector struct {
collectDataSource datasource.CollectSource
client pb.InsightsClient
poll bool
interval time.Duration
checkedPurls map[string]*PackageComponent
ingestedSource map[string]*model.SourceInputSpec
projectInfoMap map[string]*pb.Project
versions map[string]*pb.Version
dependencies map[string]*pb.Dependencies
collectDataSource datasource.CollectSource
client pb.InsightsClient
poll bool
retrieveDependencies bool
interval time.Duration
checkedPurls map[string]*PackageComponent
ingestedSource map[string]*model.SourceInputSpec
projectInfoMap map[string]*pb.Project
versions map[string]*pb.Version
dependencies map[string]*pb.Dependencies
}

func NewDepsCollector(ctx context.Context, collectDataSource datasource.CollectSource, poll bool, interval time.Duration) (*depsCollector, error) {
func NewDepsCollector(ctx context.Context, collectDataSource datasource.CollectSource, poll bool, retrieveDependencies bool, interval time.Duration) (*depsCollector, error) {
// Get the system certificates.
sysPool, err := x509.SystemCertPool()
if err != nil {
Expand All @@ -93,15 +95,16 @@ func NewDepsCollector(ctx context.Context, collectDataSource datasource.CollectS
client := pb.NewInsightsClient(conn)

return &depsCollector{
collectDataSource: collectDataSource,
client: client,
poll: poll,
interval: interval,
checkedPurls: map[string]*PackageComponent{},
ingestedSource: map[string]*model.SourceInputSpec{},
projectInfoMap: map[string]*pb.Project{},
versions: map[string]*pb.Version{},
dependencies: map[string]*pb.Dependencies{},
collectDataSource: collectDataSource,
client: client,
poll: poll,
retrieveDependencies: retrieveDependencies,
interval: interval,
checkedPurls: map[string]*PackageComponent{},
ingestedSource: map[string]*model.SourceInputSpec{},
projectInfoMap: map[string]*pb.Project{},
versions: map[string]*pb.Version{},
dependencies: map[string]*pb.Dependencies{},
}, nil
}

Expand Down Expand Up @@ -133,6 +136,16 @@ func (d *depsCollector) populatePurls(ctx context.Context, docChannel chan<- *pr
if err != nil {
return fmt.Errorf("unable to retrieve datasource: %w", err)
}

if !d.retrieveDependencies {
// do validation of and converting purls here, to remove duplicated work in next two calls
versionKeys, pkgInputs := d.validatePurls(ctx, ds.PurlDataSources)

d.retrieveVersionsAndProjects(ctx, maps.Values(versionKeys))
d.collectMetadata(ctx, docChannel, pkgInputs)
return nil
}

start := time.Now()
err = d.getAllDependencies(ctx, ds.PurlDataSources)
if err != nil {
Expand All @@ -150,6 +163,125 @@ func (d *depsCollector) populatePurls(ctx context.Context, docChannel chan<- *pr
return nil
}

// returns mappings of purls to VersionKeys and PkgInputSpec, not including the purls that:
// - have already been queried
// - error when converting to PkgInputSpec
// - error when converting to VersionKey
// - don't contain a version
func (d *depsCollector) validatePurls(ctx context.Context, datasources []datasource.Source) (map[string]*pb.VersionKey, map[string]*model.PkgInputSpec) {
logger := logging.FromContext(ctx)

validVersionKeys := map[string]*pb.VersionKey{}
validPackageInputs := map[string]*model.PkgInputSpec{}

for _, ds := range datasources {
purl := ds.Value

if _, ok := d.checkedPurls[purl]; ok {
logger.Infof("purl %s already queried", purl)
continue
}

packageInput, err := helpers.PurlToPkg(purl)
if err != nil {
logger.Infof("failed to parse purl to pkg: %s", purl)
continue
}

// if version is not specified, cannot obtain accurate information from deps.dev. Log as info and skip the purl.
if *packageInput.Version == "" {
logger.Infof("purl does not contain version, skipping deps.dev query: %s", purl)
continue
}

versionKey, err := getVersionKey(packageInput.Type, packageInput.Namespace, packageInput.Name, packageInput.Version)
if err != nil {
logger.Debugf("failed to get VersionKey with the following error: %v", err)
continue
}

validPackageInputs[purl] = packageInput
validVersionKeys[purl] = versionKey
}

return validVersionKeys, validPackageInputs
}

// retrieves version and project information concurrently for all version keys
func (d *depsCollector) retrieveVersionsAndProjects(ctx context.Context, versionKeys []*pb.VersionKey) {
// channels to signal when the project and version info have been fetched
projectDone := make(chan bool)
versionDone := make(chan bool)

// channels to send the inputs to the goroutines
projectChan := make(chan *pb.ProjectKey)
versionChan := make(chan *pb.VersionKey)

// the projectChan and versionChan are used to send the project key and version key to the respective channels
go func() {
// this go routine has to be before the next go routine as it will be pushing into the project channel
// for each version that is fetched from the version channel it will check if the project has to be fetched
d.versions = d.getVersions(ctx, versionChan, projectChan) // the results are the stored in the versions map
versionDone <- true
}()

// the project channel is used to send the project key to the project channel
// these goroutines will be used to fetch the projects concurrently
go func() {
// this sets up the goroutine to fetch the projects concurrently for each input
d.projectInfoMap = d.getProjects(ctx, projectChan) // the results are the stored in the projectInfoMap map
// posts to the projectDone channel to signal that all projects have been fetched
projectDone <- true
}()

for _, versionKey := range versionKeys {
versionChan <- versionKey
}

close(versionChan)
<-versionDone
close(projectChan)
<-projectDone
}

// For each purl, generate a document containing scorecard and source metadata and write to docChannel.
// For performance, retrieveVersionsAndProjects should be called before to populate d.versions and d.projectInfoMap. Otherwise,
// blocking calls to deps.dev will be made for each purl
func (d *depsCollector) collectMetadata(ctx context.Context, docChannel chan<- *processor.Document, purls map[string]*model.PkgInputSpec) {
logger := logging.FromContext(ctx)

for purl, packageInput := range purls {
component := &PackageComponent{}
component.CurrentPackage = packageInput

err := d.collectAdditionalMetadata(ctx, packageInput.Type, packageInput.Namespace, packageInput.Name, packageInput.Version, component)
if err != nil {
logger.Debugf("failed to get additional metadata for package: %s, err: %v", purl, err)
continue
}

logger.Infof("obtained additional metadata for package: %s", purl)
d.checkedPurls[purl] = component

blob, err := json.Marshal(component)
if err != nil {
logger.Errorf("Error marshalling component to json: %s", err)
continue
}

doc := &processor.Document{
Blob: blob,
Type: processor.DocumentDepsDev,
Format: processor.FormatJSON,
SourceInformation: processor.SourceInformation{
Collector: DepsCollector,
Source: DepsCollector,
},
}
docChannel <- doc
}
}

// getAllDependencies gets all the dependencies for the purls provided in a concurrent manner.
func (d *depsCollector) getAllDependencies(ctx context.Context, purls []datasource.Source) error {
// channels to signal when the project and version info have been fetched
Expand Down Expand Up @@ -212,6 +344,7 @@ func (d *depsCollector) getAllDependencies(ctx context.Context, purls []datasour
logger.Debugf("failed to get dependencies %v", err)
return nil
}
logger.Infof("Retrieved dependencies for %s", purl)
d.dependencies[versionKey.String()] = deps

for i, node := range deps.Nodes {
Expand Down Expand Up @@ -240,6 +373,7 @@ func (d *depsCollector) getAllDependencies(ctx context.Context, purls []datasour
versionChan <- depsVersionKey
}
}

close(versionChan)
<-versionDone
close(projectChan)
Expand Down Expand Up @@ -297,6 +431,7 @@ func (d *depsCollector) fetchDependencies(ctx context.Context, purl string, docC
logger.Debugf("failed to get dependencies: %v", err)
return nil
}
logger.Infof("Retrieved dependencies for %s", purl)
d.dependencies[versionKey.String()] = deps
}

Expand Down

0 comments on commit c225a8e

Please sign in to comment.