Skip to content

Commit

Permalink
Populate SourceInformation.DocumentRef in collectors (#1847)
Browse files Browse the repository at this point in the history
* Add store-blob-url CLI flag to all collectors

Signed-off-by: Narsimham Chelluri (Narsa) <narsa@kusari.dev>

* Import package only once in file

- This clears up a go-staticcheck warning

Signed-off-by: Narsimham Chelluri (Narsa) <narsa@kusari.dev>

* Make use of setBlobURL flag in all collectors

Signed-off-by: Narsimham Chelluri (Narsa) <narsa@kusari.dev>

* Remove impossible conditional clause

- This clears up a nilness analyzer warning

Signed-off-by: Narsimham Chelluri (Narsa) <narsa@kusari.dev>

* Import package only once in file

- This clears up a go-staticcheck warning

Signed-off-by: Narsimham Chelluri (Narsa) <narsa@kusari.dev>

* Tweak error message to not end with punctuation

- This clears up up a go-staticcheck warning

Signed-off-by: Narsimham Chelluri (Narsa) <narsa@kusari.dev>

* Add test for collector storeBlobURL flags

- TODO: GitHub

Signed-off-by: Narsimham Chelluri (Narsa) <narsa@kusari.dev>

* Fix typo in filename

Signed-off-by: Narsimham Chelluri (Narsa) <narsa@kusari.dev>

* Add some TODOs to the GitHub collector test.

Signed-off-by: Narsimham Chelluri (Narsa) <narsa@kusari.dev>

* Rename flag more appropriately

Signed-off-by: Narsimham Chelluri (Narsa) <narsa@kusari.dev>

* Always set DocumentRef to blob key

- Since we now have the DocumentRef field, there is
  no reason to gate its usage, and this simplifies
  the CLI command code.

Signed-off-by: Narsimham Chelluri (Narsa) <narsa@kusari.dev>

* Fix broken tests (minor)

- I forgot to include the DocumentRef values in my
  want docs.

Signed-off-by: Narsimham Chelluri (Narsa) <narsa@kusari.dev>

* Minor refactor

- Since we no longer branch based on whether or not we
  want to store blob keys (we always do it), the getDocRef()
  methods have become funcs instead.

Signed-off-by: Narsimham Chelluri (Narsa) <narsa@kusari.dev>

* Fix S3 collected doc Source field

Signed-off-by: Narsimham Chelluri (Narsa) <narsa@kusari.dev>

* Fix SourceInformation assertion

Signed-off-by: Narsimham Chelluri (Narsa) <narsa@kusari.dev>

* Create helper wrapper func to aid readability

- This allows me to stop repeating a call with a comment
  explaining it multiple time throughout the collector
  code. It's  very minor change.

Signed-off-by: Narsimham Chelluri (Narsa) <narsa@kusari.dev>

---------

Signed-off-by: Narsimham Chelluri (Narsa) <narsa@kusari.dev>
  • Loading branch information
nchelluri committed Apr 23, 2024
1 parent d908792 commit 3577d4d
Show file tree
Hide file tree
Showing 28 changed files with 219 additions and 124 deletions.
16 changes: 13 additions & 3 deletions cmd/guaccollect/cmd/deps_dev.go
Expand Up @@ -82,9 +82,9 @@ you have access to read and write to the respective blob store.`,
viper.GetBool("use-csub"),
viper.GetBool("service-poll"),
viper.GetBool("retrieve-dependencies"),
args,
viper.GetBool("enable-prometheus"),
viper.GetInt("prometheus-port"),
args,
)
if err != nil {
fmt.Printf("unable to validate flags: %v\n", err)
Expand Down Expand Up @@ -114,8 +114,18 @@ you have access to read and write to the respective blob store.`,
},
}

func validateDepsDevFlags(pubsubAddr string, blobAddr string, csubAddr string, csubTls bool, csubTlsSkipVerify bool, useCsub bool, poll bool, retrieveDependencies bool, args []string,
enablePrometheus bool, prometheusPort int,
func validateDepsDevFlags(
pubsubAddr,
blobAddr,
csubAddr string,
csubTls,
csubTlsSkipVerify,
useCsub,
poll,
retrieveDependencies,
enablePrometheus bool,
prometheusPort int,
args []string,
) (depsDevOptions, error) {
var opts depsDevOptions
opts.pubsubAddr = pubsubAddr
Expand Down
19 changes: 2 additions & 17 deletions cmd/guaccollect/cmd/files.go
Expand Up @@ -26,7 +26,6 @@ import (
"time"

"github.com/guacsec/guac/pkg/blob"
"github.com/guacsec/guac/pkg/cli"
"github.com/guacsec/guac/pkg/emitter"
"github.com/guacsec/guac/pkg/handler/collector"
"github.com/guacsec/guac/pkg/handler/collector/file"
Expand All @@ -45,8 +44,6 @@ type filesOptions struct {
blobAddr string
// poll location
poll bool
// use blob URL for origin instead of source URL (useful if the blob store is persistent and we want to store the blob source location)
useBlobURL bool
}

var filesCmd = &cobra.Command{
Expand All @@ -73,7 +70,6 @@ you have access to read and write to the respective blob store.`,
viper.GetString("pubsub-addr"),
viper.GetString("blob-addr"),
viper.GetBool("service-poll"),
viper.GetBool("use-blob-url"),
args)
if err != nil {
fmt.Printf("unable to validate flags: %v\n", err)
Expand All @@ -85,7 +81,7 @@ you have access to read and write to the respective blob store.`,
logger := logging.FromContext(ctx)

// Register collector
fileCollector := file.NewFileCollector(ctx, opts.path, opts.poll, 30*time.Second, opts.useBlobURL)
fileCollector := file.NewFileCollector(ctx, opts.path, opts.poll, 30*time.Second)
err = collector.RegisterDocumentCollector(fileCollector, file.FileCollector)
if err != nil {
logger.Fatalf("unable to register file collector: %v", err)
Expand All @@ -95,13 +91,12 @@ you have access to read and write to the respective blob store.`,
},
}

func validateFilesFlags(pubsubAddr, blobAddr string, poll, useBlobURL bool, args []string) (filesOptions, error) {
func validateFilesFlags(pubsubAddr, blobAddr string, poll bool, args []string) (filesOptions, error) {
var opts filesOptions

opts.pubsubAddr = pubsubAddr
opts.blobAddr = blobAddr
opts.poll = poll
opts.useBlobURL = useBlobURL

if len(args) != 1 {
return opts, fmt.Errorf("expected positional argument for file_path")
Expand Down Expand Up @@ -193,15 +188,5 @@ func initializeNATsandCollector(ctx context.Context, pubsubAddr string, blobAddr
}

func init() {
set, err := cli.BuildFlags([]string{"use-blob-url"})
if err != nil {
fmt.Fprintf(os.Stderr, "failed to setup flag: %v", err)
os.Exit(1)
}
filesCmd.PersistentFlags().AddFlagSet(set)
if err := viper.BindPFlags(filesCmd.PersistentFlags()); err != nil {
fmt.Fprintf(os.Stderr, "failed to bind flags: %v", err)
os.Exit(1)
}
rootCmd.AddCommand(filesCmd)
}
25 changes: 17 additions & 8 deletions cmd/guaccollect/cmd/gcs.go
@@ -1,11 +1,12 @@
package cmd

import (
"cloud.google.com/go/storage"
"context"
"fmt"
"os"

"cloud.google.com/go/storage"
"github.com/guacsec/guac/pkg/cli"
"github.com/guacsec/guac/pkg/collectsub/client"
csub_client "github.com/guacsec/guac/pkg/collectsub/client"
"github.com/guacsec/guac/pkg/handler/collector"
"github.com/guacsec/guac/pkg/handler/collector/gcs"
Expand All @@ -14,14 +15,13 @@ import (
"github.com/spf13/cobra"
"github.com/spf13/viper"
"google.golang.org/api/option"
"os"
)

type gcsOptions struct {
pubSubAddr string
blobAddr string
graphqlEndpoint string
csubClientOptions client.CsubClientOptions
csubClientOptions csub_client.CsubClientOptions
bucket string
}

Expand All @@ -41,9 +41,9 @@ var gcsCmd = &cobra.Command{
viper.GetString("blob-addr"),
viper.GetString("gql-addr"),
viper.GetString("csub-addr"),
viper.GetString(gcsCredentialsPathFlag),
viper.GetBool("csub-tls"),
viper.GetBool("csub-tls-skip-verify"),
viper.GetString(gcsCredentialsPathFlag),
args)
if err != nil {
fmt.Printf("unable to validate flags: %v\n", err)
Expand All @@ -66,7 +66,7 @@ var gcsCmd = &cobra.Command{
logger.Fatalf("creating client: %v", err)
}

// Register collector by providing a new GCS Client and bucket name
// Register collector
gcsCollector, err := gcs.NewGCSCollector(gcs.WithBucket(opts.bucket), gcs.WithClient(client))
if err != nil {
logger.Fatalf("unable to create gcs client: %v", err)
Expand All @@ -90,14 +90,23 @@ var gcsCmd = &cobra.Command{
},
}

func validateGCSFlags(pubSubAddr, blobAddr, gqlEndpoint string, csubAddr string, csubTls bool, csubTlsSkipVerify bool, credentialsPath string, args []string) (gcsOptions, error) {
func validateGCSFlags(
pubSubAddr,
blobAddr,
gqlEndpoint,
csubAddr,
credentialsPath string,
csubTls,
csubTlsSkipVerify bool,
args []string,
) (gcsOptions, error) {
opts := gcsOptions{
pubSubAddr: pubSubAddr,
blobAddr: blobAddr,
graphqlEndpoint: gqlEndpoint,
}

csubOpts, err := client.ValidateCsubClientFlags(csubAddr, csubTls, csubTlsSkipVerify)
csubOpts, err := csub_client.ValidateCsubClientFlags(csubAddr, csubTls, csubTlsSkipVerify)
if err != nil {
return opts, fmt.Errorf("unable to validate csub client flags: %w", err)
}
Expand Down
21 changes: 17 additions & 4 deletions cmd/guaccollect/cmd/github.go
Expand Up @@ -18,13 +18,14 @@ package cmd
import (
"context"
"fmt"
csubclient "github.com/guacsec/guac/pkg/collectsub/client"
"github.com/guacsec/guac/pkg/collectsub/datasource/csubsource"
"github.com/guacsec/guac/pkg/collectsub/datasource/inmemsource"
"os"
"strings"
"time"

csubclient "github.com/guacsec/guac/pkg/collectsub/client"
"github.com/guacsec/guac/pkg/collectsub/datasource/csubsource"
"github.com/guacsec/guac/pkg/collectsub/datasource/inmemsource"

"github.com/guacsec/guac/pkg/cli"

"github.com/guacsec/guac/internal/client/githubclient"
Expand Down Expand Up @@ -153,7 +154,19 @@ you have access to read and write to the respective blob store.`,
},
}

func validateGithubFlags(pubsubAddr, blobAddr, csubAddr, githubMode, sbomName, workflowFileName string, csubTls, csubTlsSkipVerify, useCsub, poll bool, args []string) (githubOptions, error) {
func validateGithubFlags(
pubsubAddr,
blobAddr,
csubAddr,
githubMode,
sbomName,
workflowFileName string,
csubTls,
csubTlsSkipVerify,
useCsub,
poll bool,
args []string,
) (githubOptions, error) {
var opts githubOptions
opts.pubsubAddr = pubsubAddr
opts.blobAddr = blobAddr
Expand Down
11 changes: 10 additions & 1 deletion cmd/guaccollect/cmd/oci.go
Expand Up @@ -95,7 +95,16 @@ you have access to read and write to the respective blob store.`,
},
}

func validateOCIFlags(pubsubAddr string, blobAddr string, csubAddr string, csubTls bool, csubTlsSkipVerify bool, useCsub bool, poll bool, args []string) (ociOptions, error) {
func validateOCIFlags(
pubsubAddr,
blobAddr,
csubAddr string,
csubTls,
csubTlsSkipVerify,
useCsub,
poll bool,
args []string,
) (ociOptions, error) {
var opts ociOptions
opts.pubsubAddr = pubsubAddr
opts.blobAddr = blobAddr
Expand Down
9 changes: 8 additions & 1 deletion cmd/guaccollect/cmd/root.go
Expand Up @@ -29,7 +29,14 @@ import (
func init() {
cobra.OnInitialize(cli.InitConfig)

set, err := cli.BuildFlags([]string{"pubsub-addr", "blob-addr", "csub-addr", "use-csub", "service-poll", "enable-prometheus"})
set, err := cli.BuildFlags([]string{
"pubsub-addr",
"blob-addr",
"csub-addr",
"use-csub",
"service-poll",
"enable-prometheus",
})
if err != nil {
fmt.Fprintf(os.Stderr, "failed to setup flag: %v", err)
os.Exit(1)
Expand Down
28 changes: 22 additions & 6 deletions cmd/guaccollect/cmd/s3.go
Expand Up @@ -3,16 +3,17 @@ package cmd
import (
"context"
"fmt"
"os"
"os/signal"
"syscall"

"github.com/guacsec/guac/pkg/cli"
csub_client "github.com/guacsec/guac/pkg/collectsub/client"
"github.com/guacsec/guac/pkg/handler/collector"
"github.com/guacsec/guac/pkg/handler/collector/s3"
"github.com/guacsec/guac/pkg/logging"
"github.com/spf13/cobra"
"github.com/spf13/viper"
"os"
"os/signal"
"syscall"
)

// s3Options flags for configuring the command
Expand Down Expand Up @@ -65,15 +66,15 @@ $ guacone collect s3 --s3-url http://localhost:9000 --s3-bucket guac-test --poll
viper.GetString("blob-addr"),
viper.GetString("gql-addr"),
viper.GetString("csub-addr"),
viper.GetBool("csub-tls"),
viper.GetBool("csub-tls-skip-verify"),
viper.GetString("s3-url"),
viper.GetString("s3-bucket"),
viper.GetString("s3-region"),
viper.GetString("s3-item"),
viper.GetString("s3-mp"),
viper.GetString("s3-mp-endpoint"),
viper.GetString("s3-queues"),
viper.GetBool("csub-tls"),
viper.GetBool("csub-tls-skip-verify"),
viper.GetBool("poll"),
)
if err != nil {
Expand Down Expand Up @@ -112,7 +113,22 @@ $ guacone collect s3 --s3-url http://localhost:9000 --s3-bucket guac-test --poll
},
}

func validateS3Opts(pubSubAddr, blobAddr, graphqlEndpoint, csubAddr string, csubTls, csubTlsSkipVerify bool, s3url, s3bucket, region, s3item, mp, mpEndpoint, queues string, poll bool) (s3Options, error) {
func validateS3Opts(
pubSubAddr,
blobAddr,
graphqlEndpoint,
csubAddr,
s3url,
s3bucket,
region,
s3item,
mp,
mpEndpoint,
queues string,
csubTls,
csubTlsSkipVerify,
poll bool,
) (s3Options, error) {
var opts s3Options

if poll {
Expand Down
2 changes: 1 addition & 1 deletion cmd/guacone/cmd/files.go
Expand Up @@ -99,7 +99,7 @@ var filesCmd = &cobra.Command{
}

// Register collector
fileCollector := file.NewFileCollector(ctx, opts.path, false, time.Second, false)
fileCollector := file.NewFileCollector(ctx, opts.path, false, time.Second)
err = collector.RegisterDocumentCollector(fileCollector, file.FileCollector)
if err != nil {
logger.Fatalf("unable to register file collector: %v", err)
Expand Down
2 changes: 1 addition & 1 deletion internal/testing/cmd/pubsub_test/cmd/files.go
Expand Up @@ -75,7 +75,7 @@ var filesCmd = &cobra.Command{
logger := logging.FromContext(ctx)

// Register collector
fileCollector := file.NewFileCollector(ctx, opts.path, opts.poll, 30*time.Second, false)
fileCollector := file.NewFileCollector(ctx, opts.path, opts.poll, 30*time.Second)
err = collector.RegisterDocumentCollector(fileCollector, file.FileCollector)
if err != nil {
logger.Errorf("unable to register file collector: %v", err)
Expand Down
2 changes: 1 addition & 1 deletion internal/testing/dochelper/dochelper.go
Expand Up @@ -80,7 +80,7 @@ func DocTreeEqual(a, b processor.DocumentTree) bool {
}
}

return true
return reflect.DeepEqual(a.Document.SourceInformation, b.Document.SourceInformation)
}

// ConsistentJsonBytes makes sure that the blob byte comparison
Expand Down
3 changes: 0 additions & 3 deletions pkg/cli/store.go
Expand Up @@ -129,9 +129,6 @@ func init() {
set.String("github-sbom", "", "name of sbom file to look for in github release.")
set.String("github-workflow-file", "", "name of workflow file to look for in github workflow. \nThis will be the name of the actual file, not the workflow name (i.e. ci.yaml).")

// Files collector options
set.Bool("use-blob-url", false, "use blob URL for origin instead of source URL (useful if the blob store is persistent and we want to store the blob source location)")

set.String("header-file", "", "a text file containing HTTP headers to send to the GQL server, in RFC 822 format")

set.VisitAll(func(f *pflag.Flag) {
Expand Down
5 changes: 5 additions & 0 deletions pkg/events/events.go
Expand Up @@ -64,6 +64,11 @@ func GetKey(blob []byte) string {
return fmt.Sprintf("sha256:%s", generatedHash)
}

// GetDocRef returns the Document Reference of a blob; i.e. the blob store key for this blob.
func GetDocRef(blob []byte) string {
return GetKey(blob)
}

func getHash(data []byte) string {
sha256sum := sha256.Sum256(data)
return hex.EncodeToString(sha256sum[:])
Expand Down
12 changes: 7 additions & 5 deletions pkg/handler/collector/collector_test.go
Expand Up @@ -20,12 +20,13 @@ import (
"context"
"errors"
"fmt"
"go.uber.org/zap"
"go.uber.org/zap/zapcore"
"reflect"
"testing"
"time"

"go.uber.org/zap"
"go.uber.org/zap/zapcore"

uuid "github.com/gofrs/uuid"
"github.com/guacsec/guac/internal/testing/dochelper"
nats_test "github.com/guacsec/guac/internal/testing/nats"
Expand Down Expand Up @@ -54,14 +55,15 @@ func TestCollect(t *testing.T) {
want []*processor.Document
}{{
name: "file collector file",
collector: file.NewFileCollector(ctx, "./testdata", false, time.Second, false),
collector: file.NewFileCollector(ctx, "./testdata", false, time.Second),
want: []*processor.Document{{
Blob: []byte("hello\n"),
Type: processor.DocumentUnknown,
Format: processor.FormatUnknown,
SourceInformation: processor.SourceInformation{
Collector: string(file.FileCollector),
Source: "file:///testdata/hello",
Collector: string(file.FileCollector),
Source: "file:///testdata/hello",
DocumentRef: events.GetDocRef([]byte("hello\n")),
}},
},
wantErr: false,
Expand Down

0 comments on commit 3577d4d

Please sign in to comment.