diff --git a/cmd/guaccollect/cmd/files.go b/cmd/guaccollect/cmd/files.go index e2806839f6..beba66f300 100644 --- a/cmd/guaccollect/cmd/files.go +++ b/cmd/guaccollect/cmd/files.go @@ -26,6 +26,7 @@ import ( "time" "github.com/guacsec/guac/pkg/blob" + "github.com/guacsec/guac/pkg/cli" "github.com/guacsec/guac/pkg/emitter" "github.com/guacsec/guac/pkg/handler/collector" "github.com/guacsec/guac/pkg/handler/collector/file" @@ -44,6 +45,8 @@ type filesOptions struct { blobAddr string // poll location poll bool + // use blob URL for origin instead of source URL (useful if the blob store is persistent and we want to store the blob source location) + useBlobURL bool } var filesCmd = &cobra.Command{ @@ -70,6 +73,7 @@ you have access to read and write to the respective blob store.`, viper.GetString("pubsub-addr"), viper.GetString("blob-addr"), viper.GetBool("service-poll"), + viper.GetBool("use-blob-url"), args) if err != nil { fmt.Printf("unable to validate flags: %v\n", err) @@ -81,7 +85,7 @@ you have access to read and write to the respective blob store.`, logger := logging.FromContext(ctx) // Register collector - fileCollector := file.NewFileCollector(ctx, opts.path, opts.poll, 30*time.Second) + fileCollector := file.NewFileCollector(ctx, opts.path, opts.poll, 30*time.Second, opts.useBlobURL) err = collector.RegisterDocumentCollector(fileCollector, file.FileCollector) if err != nil { logger.Fatalf("unable to register file collector: %v", err) @@ -91,12 +95,13 @@ you have access to read and write to the respective blob store.`, }, } -func validateFilesFlags(pubsubAddr string, blobAddr string, poll bool, args []string) (filesOptions, error) { +func validateFilesFlags(pubsubAddr, blobAddr string, poll, useBlobURL bool, args []string) (filesOptions, error) { var opts filesOptions opts.pubsubAddr = pubsubAddr opts.blobAddr = blobAddr opts.poll = poll + opts.useBlobURL = useBlobURL if len(args) != 1 { return opts, fmt.Errorf("expected positional argument for file_path") @@ -186,5 +191,15 @@ func initializeNATsandCollector(ctx context.Context, pubsubAddr string, blobAddr } func init() { + set, err := cli.BuildFlags([]string{"use-blob-url"}) + if err != nil { + fmt.Fprintf(os.Stderr, "failed to setup flag: %v", err) + os.Exit(1) + } + filesCmd.PersistentFlags().AddFlagSet(set) + if err := viper.BindPFlags(filesCmd.PersistentFlags()); err != nil { + fmt.Fprintf(os.Stderr, "failed to bind flags: %v", err) + os.Exit(1) + } rootCmd.AddCommand(filesCmd) } diff --git a/cmd/guacone/cmd/files.go b/cmd/guacone/cmd/files.go index 7a4d87f703..54f2cf3db3 100644 --- a/cmd/guacone/cmd/files.go +++ b/cmd/guacone/cmd/files.go @@ -99,7 +99,7 @@ var filesCmd = &cobra.Command{ } // Register collector - fileCollector := file.NewFileCollector(ctx, opts.path, false, time.Second) + fileCollector := file.NewFileCollector(ctx, opts.path, false, time.Second, false) err = collector.RegisterDocumentCollector(fileCollector, file.FileCollector) if err != nil { logger.Fatalf("unable to register file collector: %v", err) diff --git a/internal/testing/cmd/pubsub_test/cmd/files.go b/internal/testing/cmd/pubsub_test/cmd/files.go index ccea1dc802..7544f2d754 100644 --- a/internal/testing/cmd/pubsub_test/cmd/files.go +++ b/internal/testing/cmd/pubsub_test/cmd/files.go @@ -75,7 +75,7 @@ var filesCmd = &cobra.Command{ logger := logging.FromContext(ctx) // Register collector - fileCollector := file.NewFileCollector(ctx, opts.path, opts.poll, 30*time.Second) + fileCollector := file.NewFileCollector(ctx, opts.path, opts.poll, 30*time.Second, false) err = collector.RegisterDocumentCollector(fileCollector, file.FileCollector) if err != nil { logger.Errorf("unable to register file collector: %v", err) diff --git a/pkg/cli/store.go b/pkg/cli/store.go index 489c63c888..e5c8d75671 100644 --- a/pkg/cli/store.go +++ b/pkg/cli/store.go @@ -129,6 +129,9 @@ func init() { set.String("github-sbom", "", "name of sbom file to look for in github release.") set.String("github-workflow-file", "", "name of workflow file to look for in github workflow. \nThis will be the name of the actual file, not the workflow name (i.e. ci.yaml).") + // Files collector options + set.Bool("use-blob-url", false, "use blob URL for origin instead of source URL (useful if the blob store is persistent and we want to store the blob source location)") + set.VisitAll(func(f *pflag.Flag) { flagStore[f.Name] = f }) diff --git a/pkg/handler/collector/collector_test.go b/pkg/handler/collector/collector_test.go index f610b042dc..23ed051ba2 100644 --- a/pkg/handler/collector/collector_test.go +++ b/pkg/handler/collector/collector_test.go @@ -51,7 +51,7 @@ func TestCollect(t *testing.T) { want []*processor.Document }{{ name: "file collector file", - collector: file.NewFileCollector(ctx, "./testdata", false, time.Second), + collector: file.NewFileCollector(ctx, "./testdata", false, time.Second, false), want: []*processor.Document{{ Blob: []byte("hello\n"), Type: processor.DocumentUnknown, diff --git a/pkg/handler/collector/file/file.go b/pkg/handler/collector/file/file.go index 97a13ec397..a28749a807 100644 --- a/pkg/handler/collector/file/file.go +++ b/pkg/handler/collector/file/file.go @@ -23,6 +23,7 @@ import ( "path/filepath" "time" + "github.com/guacsec/guac/pkg/events" "github.com/guacsec/guac/pkg/handler/processor" ) @@ -35,13 +36,15 @@ type fileCollector struct { lastChecked time.Time poll bool interval time.Duration + useBlobURL bool } -func NewFileCollector(ctx context.Context, path string, poll bool, interval time.Duration) *fileCollector { +func NewFileCollector(ctx context.Context, path string, poll bool, interval time.Duration, useBlobURL bool) *fileCollector { return &fileCollector{ - path: path, - poll: poll, - interval: interval, + path: path, + poll: poll, + interval: interval, + useBlobURL: useBlobURL, } } @@ -87,13 +90,18 @@ func (f *fileCollector) RetrieveArtifacts(ctx context.Context, docChannel chan<- return fmt.Errorf("error reading file: %s, err: %w", path, err) } + source := fmt.Sprintf("file:///%s", path) + if f.useBlobURL { + source = events.GetKey(blob) // this is the blob store path + } + doc := &processor.Document{ Blob: blob, Type: processor.DocumentUnknown, Format: processor.FormatUnknown, SourceInformation: processor.SourceInformation{ Collector: string(FileCollector), - Source: fmt.Sprintf("file:///%s", path), + Source: source, }, } diff --git a/pkg/handler/collector/file/file_test.go b/pkg/handler/collector/file/file_test.go index 344722a4e1..415cd62d6e 100644 --- a/pkg/handler/collector/file/file_test.go +++ b/pkg/handler/collector/file/file_test.go @@ -32,6 +32,7 @@ func Test_fileCollector_RetrieveArtifacts(t *testing.T) { lastChecked time.Time poll bool interval time.Duration + useBlobURL bool } tests := []struct { name string @@ -66,6 +67,25 @@ func Test_fileCollector_RetrieveArtifacts(t *testing.T) { }}, }, wantErr: false, + }, { + name: "found file with useBlobURL", + fields: fields{ + path: "./testdata", + lastChecked: time.Date(2009, 11, 17, 20, 34, 58, 651387237, time.UTC), + poll: false, + interval: 0, + useBlobURL: true, + }, + want: []*processor.Document{{ + Blob: []byte("hello\n"), + Type: processor.DocumentUnknown, + Format: processor.FormatUnknown, + SourceInformation: processor.SourceInformation{ + Collector: string(FileCollector), + Source: "sha256:5891b5b522d5df086d0ff0b110fbd9d21bb4fc7163af34d08286a2e846f6be03", + }}, + }, + wantErr: false, }, { name: "with canceled poll", fields: fields{ @@ -92,6 +112,7 @@ func Test_fileCollector_RetrieveArtifacts(t *testing.T) { lastChecked: tt.fields.lastChecked, poll: tt.fields.poll, interval: tt.fields.interval, + useBlobURL: tt.fields.useBlobURL, } // NOTE: Below is one of the simplest ways to validate the context getting canceled() // This is still brittle if a test for some reason takes longer than a second. diff --git a/pkg/handler/collector/git/git.go b/pkg/handler/collector/git/git.go index 60ad14f04b..f195220e0a 100644 --- a/pkg/handler/collector/git/git.go +++ b/pkg/handler/collector/git/git.go @@ -47,7 +47,7 @@ type gitDocumentCollector struct { } func NewGitDocumentCollector(ctx context.Context, url string, dir string, poll bool, interval time.Duration) *gitDocumentCollector { - fileCollector := file.NewFileCollector(ctx, dir, false, time.Second) + fileCollector := file.NewFileCollector(ctx, dir, false, time.Second, false) return &gitDocumentCollector{ url: url,