From f06c19476aa1d736bd2344336b2f25d4cca37e18 Mon Sep 17 00:00:00 2001 From: "Narsimham Chelluri (Narsa)" Date: Wed, 3 Apr 2024 16:24:43 -0300 Subject: [PATCH 1/3] Add guaccollect files option to set origin to blob path Signed-off-by: Narsimham Chelluri (Narsa) --- cmd/guaccollect/cmd/files.go | 19 +++++++++++++++-- cmd/guacone/cmd/files.go | 2 +- internal/testing/cmd/pubsub_test/cmd/files.go | 2 +- pkg/cli/store.go | 3 +++ pkg/handler/collector/collector_test.go | 2 +- pkg/handler/collector/file/file.go | 18 +++++++++++----- pkg/handler/collector/file/file_test.go | 21 +++++++++++++++++++ pkg/handler/collector/git/git.go | 2 +- 8 files changed, 58 insertions(+), 11 deletions(-) diff --git a/cmd/guaccollect/cmd/files.go b/cmd/guaccollect/cmd/files.go index e2806839f6..e27a5d3b1d 100644 --- a/cmd/guaccollect/cmd/files.go +++ b/cmd/guaccollect/cmd/files.go @@ -26,6 +26,7 @@ import ( "time" "github.com/guacsec/guac/pkg/blob" + "github.com/guacsec/guac/pkg/cli" "github.com/guacsec/guac/pkg/emitter" "github.com/guacsec/guac/pkg/handler/collector" "github.com/guacsec/guac/pkg/handler/collector/file" @@ -44,6 +45,8 @@ type filesOptions struct { blobAddr string // poll location poll bool + // use blob path for origin instead of source path + useBlobPath bool } var filesCmd = &cobra.Command{ @@ -70,6 +73,7 @@ you have access to read and write to the respective blob store.`, viper.GetString("pubsub-addr"), viper.GetString("blob-addr"), viper.GetBool("service-poll"), + viper.GetBool("use-blob-path"), args) if err != nil { fmt.Printf("unable to validate flags: %v\n", err) @@ -81,7 +85,7 @@ you have access to read and write to the respective blob store.`, logger := logging.FromContext(ctx) // Register collector - fileCollector := file.NewFileCollector(ctx, opts.path, opts.poll, 30*time.Second) + fileCollector := file.NewFileCollector(ctx, opts.path, opts.poll, 30*time.Second, opts.useBlobPath) err = collector.RegisterDocumentCollector(fileCollector, file.FileCollector) if err != nil { logger.Fatalf("unable to register file collector: %v", err) @@ -91,12 +95,13 @@ you have access to read and write to the respective blob store.`, }, } -func validateFilesFlags(pubsubAddr string, blobAddr string, poll bool, args []string) (filesOptions, error) { +func validateFilesFlags(pubsubAddr string, blobAddr string, poll bool, useBlobPath bool, args []string) (filesOptions, error) { var opts filesOptions opts.pubsubAddr = pubsubAddr opts.blobAddr = blobAddr opts.poll = poll + opts.useBlobPath = useBlobPath if len(args) != 1 { return opts, fmt.Errorf("expected positional argument for file_path") @@ -186,5 +191,15 @@ func initializeNATsandCollector(ctx context.Context, pubsubAddr string, blobAddr } func init() { + set, err := cli.BuildFlags([]string{"use-blob-path"}) + if err != nil { + fmt.Fprintf(os.Stderr, "failed to setup flag: %v", err) + os.Exit(1) + } + filesCmd.PersistentFlags().AddFlagSet(set) + if err := viper.BindPFlags(filesCmd.PersistentFlags()); err != nil { + fmt.Fprintf(os.Stderr, "failed to bind flags: %v", err) + os.Exit(1) + } rootCmd.AddCommand(filesCmd) } diff --git a/cmd/guacone/cmd/files.go b/cmd/guacone/cmd/files.go index 7a4d87f703..54f2cf3db3 100644 --- a/cmd/guacone/cmd/files.go +++ b/cmd/guacone/cmd/files.go @@ -99,7 +99,7 @@ var filesCmd = &cobra.Command{ } // Register collector - fileCollector := file.NewFileCollector(ctx, opts.path, false, time.Second) + fileCollector := file.NewFileCollector(ctx, opts.path, false, time.Second, false) err = collector.RegisterDocumentCollector(fileCollector, file.FileCollector) if err != nil { logger.Fatalf("unable to register file collector: %v", err) diff --git a/internal/testing/cmd/pubsub_test/cmd/files.go b/internal/testing/cmd/pubsub_test/cmd/files.go index ccea1dc802..7544f2d754 100644 --- a/internal/testing/cmd/pubsub_test/cmd/files.go +++ b/internal/testing/cmd/pubsub_test/cmd/files.go @@ -75,7 +75,7 @@ var filesCmd = &cobra.Command{ logger := logging.FromContext(ctx) // Register collector - fileCollector := file.NewFileCollector(ctx, opts.path, opts.poll, 30*time.Second) + fileCollector := file.NewFileCollector(ctx, opts.path, opts.poll, 30*time.Second, false) err = collector.RegisterDocumentCollector(fileCollector, file.FileCollector) if err != nil { logger.Errorf("unable to register file collector: %v", err) diff --git a/pkg/cli/store.go b/pkg/cli/store.go index 489c63c888..968395bc25 100644 --- a/pkg/cli/store.go +++ b/pkg/cli/store.go @@ -129,6 +129,9 @@ func init() { set.String("github-sbom", "", "name of sbom file to look for in github release.") set.String("github-workflow-file", "", "name of workflow file to look for in github workflow. \nThis will be the name of the actual file, not the workflow name (i.e. ci.yaml).") + // Files collector options + set.Bool("use-blob-path", false, "use blob path for origin instead of source path") + set.VisitAll(func(f *pflag.Flag) { flagStore[f.Name] = f }) diff --git a/pkg/handler/collector/collector_test.go b/pkg/handler/collector/collector_test.go index f610b042dc..23ed051ba2 100644 --- a/pkg/handler/collector/collector_test.go +++ b/pkg/handler/collector/collector_test.go @@ -51,7 +51,7 @@ func TestCollect(t *testing.T) { want []*processor.Document }{{ name: "file collector file", - collector: file.NewFileCollector(ctx, "./testdata", false, time.Second), + collector: file.NewFileCollector(ctx, "./testdata", false, time.Second, false), want: []*processor.Document{{ Blob: []byte("hello\n"), Type: processor.DocumentUnknown, diff --git a/pkg/handler/collector/file/file.go b/pkg/handler/collector/file/file.go index 97a13ec397..26b790435d 100644 --- a/pkg/handler/collector/file/file.go +++ b/pkg/handler/collector/file/file.go @@ -23,6 +23,7 @@ import ( "path/filepath" "time" + "github.com/guacsec/guac/pkg/events" "github.com/guacsec/guac/pkg/handler/processor" ) @@ -35,13 +36,15 @@ type fileCollector struct { lastChecked time.Time poll bool interval time.Duration + useBlobPath bool } -func NewFileCollector(ctx context.Context, path string, poll bool, interval time.Duration) *fileCollector { +func NewFileCollector(ctx context.Context, path string, poll bool, interval time.Duration, useBlobPath bool) *fileCollector { return &fileCollector{ - path: path, - poll: poll, - interval: interval, + path: path, + poll: poll, + interval: interval, + useBlobPath: useBlobPath, } } @@ -87,13 +90,18 @@ func (f *fileCollector) RetrieveArtifacts(ctx context.Context, docChannel chan<- return fmt.Errorf("error reading file: %s, err: %w", path, err) } + source := fmt.Sprintf("file:///%s", path) + if f.useBlobPath { + source = events.GetKey(blob) // this is the blob store path + } + doc := &processor.Document{ Blob: blob, Type: processor.DocumentUnknown, Format: processor.FormatUnknown, SourceInformation: processor.SourceInformation{ Collector: string(FileCollector), - Source: fmt.Sprintf("file:///%s", path), + Source: source, }, } diff --git a/pkg/handler/collector/file/file_test.go b/pkg/handler/collector/file/file_test.go index 344722a4e1..5e0332f0de 100644 --- a/pkg/handler/collector/file/file_test.go +++ b/pkg/handler/collector/file/file_test.go @@ -32,6 +32,7 @@ func Test_fileCollector_RetrieveArtifacts(t *testing.T) { lastChecked time.Time poll bool interval time.Duration + useBlobPath bool } tests := []struct { name string @@ -66,6 +67,25 @@ func Test_fileCollector_RetrieveArtifacts(t *testing.T) { }}, }, wantErr: false, + }, { + name: "found file with useBlobPath", + fields: fields{ + path: "./testdata", + lastChecked: time.Date(2009, 11, 17, 20, 34, 58, 651387237, time.UTC), + poll: false, + interval: 0, + useBlobPath: true, + }, + want: []*processor.Document{{ + Blob: []byte("hello\n"), + Type: processor.DocumentUnknown, + Format: processor.FormatUnknown, + SourceInformation: processor.SourceInformation{ + Collector: string(FileCollector), + Source: "sha256:5891b5b522d5df086d0ff0b110fbd9d21bb4fc7163af34d08286a2e846f6be03", + }}, + }, + wantErr: false, }, { name: "with canceled poll", fields: fields{ @@ -92,6 +112,7 @@ func Test_fileCollector_RetrieveArtifacts(t *testing.T) { lastChecked: tt.fields.lastChecked, poll: tt.fields.poll, interval: tt.fields.interval, + useBlobPath: tt.fields.useBlobPath, } // NOTE: Below is one of the simplest ways to validate the context getting canceled() // This is still brittle if a test for some reason takes longer than a second. diff --git a/pkg/handler/collector/git/git.go b/pkg/handler/collector/git/git.go index 60ad14f04b..f195220e0a 100644 --- a/pkg/handler/collector/git/git.go +++ b/pkg/handler/collector/git/git.go @@ -47,7 +47,7 @@ type gitDocumentCollector struct { } func NewGitDocumentCollector(ctx context.Context, url string, dir string, poll bool, interval time.Duration) *gitDocumentCollector { - fileCollector := file.NewFileCollector(ctx, dir, false, time.Second) + fileCollector := file.NewFileCollector(ctx, dir, false, time.Second, false) return &gitDocumentCollector{ url: url, From 05bcb32b39fd202e6fa9c6e5935b9cd65319a4ca Mon Sep 17 00:00:00 2001 From: "Narsimham Chelluri (Narsa)" Date: Thu, 4 Apr 2024 09:10:04 -0300 Subject: [PATCH 2/3] Change nomenclature: "blob path" -> "blob URL" - They are actually blob URLs, so this is slightly more accurate, and a touch shorter. Signed-off-by: Narsimham Chelluri (Narsa) --- cmd/guaccollect/cmd/files.go | 14 +++++++------- pkg/cli/store.go | 2 +- pkg/handler/collector/file/file.go | 14 +++++++------- pkg/handler/collector/file/file_test.go | 8 ++++---- 4 files changed, 19 insertions(+), 19 deletions(-) diff --git a/cmd/guaccollect/cmd/files.go b/cmd/guaccollect/cmd/files.go index e27a5d3b1d..449b72a679 100644 --- a/cmd/guaccollect/cmd/files.go +++ b/cmd/guaccollect/cmd/files.go @@ -45,8 +45,8 @@ type filesOptions struct { blobAddr string // poll location poll bool - // use blob path for origin instead of source path - useBlobPath bool + // use blob URL for origin instead of source URL + useBlobURL bool } var filesCmd = &cobra.Command{ @@ -73,7 +73,7 @@ you have access to read and write to the respective blob store.`, viper.GetString("pubsub-addr"), viper.GetString("blob-addr"), viper.GetBool("service-poll"), - viper.GetBool("use-blob-path"), + viper.GetBool("use-blob-url"), args) if err != nil { fmt.Printf("unable to validate flags: %v\n", err) @@ -85,7 +85,7 @@ you have access to read and write to the respective blob store.`, logger := logging.FromContext(ctx) // Register collector - fileCollector := file.NewFileCollector(ctx, opts.path, opts.poll, 30*time.Second, opts.useBlobPath) + fileCollector := file.NewFileCollector(ctx, opts.path, opts.poll, 30*time.Second, opts.useBlobURL) err = collector.RegisterDocumentCollector(fileCollector, file.FileCollector) if err != nil { logger.Fatalf("unable to register file collector: %v", err) @@ -95,13 +95,13 @@ you have access to read and write to the respective blob store.`, }, } -func validateFilesFlags(pubsubAddr string, blobAddr string, poll bool, useBlobPath bool, args []string) (filesOptions, error) { +func validateFilesFlags(pubsubAddr, blobAddr string, poll, useBlobURL bool, args []string) (filesOptions, error) { var opts filesOptions opts.pubsubAddr = pubsubAddr opts.blobAddr = blobAddr opts.poll = poll - opts.useBlobPath = useBlobPath + opts.useBlobURL = useBlobURL if len(args) != 1 { return opts, fmt.Errorf("expected positional argument for file_path") @@ -191,7 +191,7 @@ func initializeNATsandCollector(ctx context.Context, pubsubAddr string, blobAddr } func init() { - set, err := cli.BuildFlags([]string{"use-blob-path"}) + set, err := cli.BuildFlags([]string{"use-blob-url"}) if err != nil { fmt.Fprintf(os.Stderr, "failed to setup flag: %v", err) os.Exit(1) diff --git a/pkg/cli/store.go b/pkg/cli/store.go index 968395bc25..214870916b 100644 --- a/pkg/cli/store.go +++ b/pkg/cli/store.go @@ -130,7 +130,7 @@ func init() { set.String("github-workflow-file", "", "name of workflow file to look for in github workflow. \nThis will be the name of the actual file, not the workflow name (i.e. ci.yaml).") // Files collector options - set.Bool("use-blob-path", false, "use blob path for origin instead of source path") + set.Bool("use-blob-url", false, "use blob URL for origin instead of source URL") set.VisitAll(func(f *pflag.Flag) { flagStore[f.Name] = f diff --git a/pkg/handler/collector/file/file.go b/pkg/handler/collector/file/file.go index 26b790435d..a28749a807 100644 --- a/pkg/handler/collector/file/file.go +++ b/pkg/handler/collector/file/file.go @@ -36,15 +36,15 @@ type fileCollector struct { lastChecked time.Time poll bool interval time.Duration - useBlobPath bool + useBlobURL bool } -func NewFileCollector(ctx context.Context, path string, poll bool, interval time.Duration, useBlobPath bool) *fileCollector { +func NewFileCollector(ctx context.Context, path string, poll bool, interval time.Duration, useBlobURL bool) *fileCollector { return &fileCollector{ - path: path, - poll: poll, - interval: interval, - useBlobPath: useBlobPath, + path: path, + poll: poll, + interval: interval, + useBlobURL: useBlobURL, } } @@ -91,7 +91,7 @@ func (f *fileCollector) RetrieveArtifacts(ctx context.Context, docChannel chan<- } source := fmt.Sprintf("file:///%s", path) - if f.useBlobPath { + if f.useBlobURL { source = events.GetKey(blob) // this is the blob store path } diff --git a/pkg/handler/collector/file/file_test.go b/pkg/handler/collector/file/file_test.go index 5e0332f0de..415cd62d6e 100644 --- a/pkg/handler/collector/file/file_test.go +++ b/pkg/handler/collector/file/file_test.go @@ -32,7 +32,7 @@ func Test_fileCollector_RetrieveArtifacts(t *testing.T) { lastChecked time.Time poll bool interval time.Duration - useBlobPath bool + useBlobURL bool } tests := []struct { name string @@ -68,13 +68,13 @@ func Test_fileCollector_RetrieveArtifacts(t *testing.T) { }, wantErr: false, }, { - name: "found file with useBlobPath", + name: "found file with useBlobURL", fields: fields{ path: "./testdata", lastChecked: time.Date(2009, 11, 17, 20, 34, 58, 651387237, time.UTC), poll: false, interval: 0, - useBlobPath: true, + useBlobURL: true, }, want: []*processor.Document{{ Blob: []byte("hello\n"), @@ -112,7 +112,7 @@ func Test_fileCollector_RetrieveArtifacts(t *testing.T) { lastChecked: tt.fields.lastChecked, poll: tt.fields.poll, interval: tt.fields.interval, - useBlobPath: tt.fields.useBlobPath, + useBlobURL: tt.fields.useBlobURL, } // NOTE: Below is one of the simplest ways to validate the context getting canceled() // This is still brittle if a test for some reason takes longer than a second. From 2046085814078590c6357233cf607da7b5d8883d Mon Sep 17 00:00:00 2001 From: "Narsimham Chelluri (Narsa)" Date: Thu, 4 Apr 2024 15:51:16 -0300 Subject: [PATCH 3/3] Update field comment and CLI arg desc to be more useful Signed-off-by: Narsimham Chelluri (Narsa) --- cmd/guaccollect/cmd/files.go | 2 +- pkg/cli/store.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cmd/guaccollect/cmd/files.go b/cmd/guaccollect/cmd/files.go index 449b72a679..beba66f300 100644 --- a/cmd/guaccollect/cmd/files.go +++ b/cmd/guaccollect/cmd/files.go @@ -45,7 +45,7 @@ type filesOptions struct { blobAddr string // poll location poll bool - // use blob URL for origin instead of source URL + // use blob URL for origin instead of source URL (useful if the blob store is persistent and we want to store the blob source location) useBlobURL bool } diff --git a/pkg/cli/store.go b/pkg/cli/store.go index 214870916b..e5c8d75671 100644 --- a/pkg/cli/store.go +++ b/pkg/cli/store.go @@ -130,7 +130,7 @@ func init() { set.String("github-workflow-file", "", "name of workflow file to look for in github workflow. \nThis will be the name of the actual file, not the workflow name (i.e. ci.yaml).") // Files collector options - set.Bool("use-blob-url", false, "use blob URL for origin instead of source URL") + set.Bool("use-blob-url", false, "use blob URL for origin instead of source URL (useful if the blob store is persistent and we want to store the blob source location)") set.VisitAll(func(f *pflag.Flag) { flagStore[f.Name] = f