Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add guaccollect files option to set origin to blob path #1811

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
19 changes: 17 additions & 2 deletions cmd/guaccollect/cmd/files.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"time"

"github.com/guacsec/guac/pkg/blob"
"github.com/guacsec/guac/pkg/cli"
"github.com/guacsec/guac/pkg/emitter"
"github.com/guacsec/guac/pkg/handler/collector"
"github.com/guacsec/guac/pkg/handler/collector/file"
Expand All @@ -44,6 +45,8 @@ type filesOptions struct {
blobAddr string
// poll location
poll bool
// use blob URL for origin instead of source URL (useful if the blob store is persistent and we want to store the blob source location)
useBlobURL bool
}

var filesCmd = &cobra.Command{
Expand All @@ -70,6 +73,7 @@ you have access to read and write to the respective blob store.`,
viper.GetString("pubsub-addr"),
viper.GetString("blob-addr"),
viper.GetBool("service-poll"),
viper.GetBool("use-blob-url"),
args)
if err != nil {
fmt.Printf("unable to validate flags: %v\n", err)
Expand All @@ -81,7 +85,7 @@ you have access to read and write to the respective blob store.`,
logger := logging.FromContext(ctx)

// Register collector
fileCollector := file.NewFileCollector(ctx, opts.path, opts.poll, 30*time.Second)
fileCollector := file.NewFileCollector(ctx, opts.path, opts.poll, 30*time.Second, opts.useBlobURL)
err = collector.RegisterDocumentCollector(fileCollector, file.FileCollector)
if err != nil {
logger.Fatalf("unable to register file collector: %v", err)
Expand All @@ -91,12 +95,13 @@ you have access to read and write to the respective blob store.`,
},
}

func validateFilesFlags(pubsubAddr string, blobAddr string, poll bool, args []string) (filesOptions, error) {
func validateFilesFlags(pubsubAddr, blobAddr string, poll, useBlobURL bool, args []string) (filesOptions, error) {
var opts filesOptions

opts.pubsubAddr = pubsubAddr
opts.blobAddr = blobAddr
opts.poll = poll
opts.useBlobURL = useBlobURL

if len(args) != 1 {
return opts, fmt.Errorf("expected positional argument for file_path")
Expand Down Expand Up @@ -186,5 +191,15 @@ func initializeNATsandCollector(ctx context.Context, pubsubAddr string, blobAddr
}

func init() {
set, err := cli.BuildFlags([]string{"use-blob-url"})
if err != nil {
fmt.Fprintf(os.Stderr, "failed to setup flag: %v", err)
os.Exit(1)
}
filesCmd.PersistentFlags().AddFlagSet(set)
if err := viper.BindPFlags(filesCmd.PersistentFlags()); err != nil {
fmt.Fprintf(os.Stderr, "failed to bind flags: %v", err)
os.Exit(1)
}
rootCmd.AddCommand(filesCmd)
}
2 changes: 1 addition & 1 deletion cmd/guacone/cmd/files.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ var filesCmd = &cobra.Command{
}

// Register collector
fileCollector := file.NewFileCollector(ctx, opts.path, false, time.Second)
fileCollector := file.NewFileCollector(ctx, opts.path, false, time.Second, false)
err = collector.RegisterDocumentCollector(fileCollector, file.FileCollector)
if err != nil {
logger.Fatalf("unable to register file collector: %v", err)
Expand Down
2 changes: 1 addition & 1 deletion internal/testing/cmd/pubsub_test/cmd/files.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ var filesCmd = &cobra.Command{
logger := logging.FromContext(ctx)

// Register collector
fileCollector := file.NewFileCollector(ctx, opts.path, opts.poll, 30*time.Second)
fileCollector := file.NewFileCollector(ctx, opts.path, opts.poll, 30*time.Second, false)
err = collector.RegisterDocumentCollector(fileCollector, file.FileCollector)
if err != nil {
logger.Errorf("unable to register file collector: %v", err)
Expand Down
3 changes: 3 additions & 0 deletions pkg/cli/store.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,9 @@ func init() {
set.String("github-sbom", "", "name of sbom file to look for in github release.")
set.String("github-workflow-file", "", "name of workflow file to look for in github workflow. \nThis will be the name of the actual file, not the workflow name (i.e. ci.yaml).")

// Files collector options
set.Bool("use-blob-url", false, "use blob URL for origin instead of source URL (useful if the blob store is persistent and we want to store the blob source location)")

set.VisitAll(func(f *pflag.Flag) {
flagStore[f.Name] = f
})
Expand Down
2 changes: 1 addition & 1 deletion pkg/handler/collector/collector_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ func TestCollect(t *testing.T) {
want []*processor.Document
}{{
name: "file collector file",
collector: file.NewFileCollector(ctx, "./testdata", false, time.Second),
collector: file.NewFileCollector(ctx, "./testdata", false, time.Second, false),
want: []*processor.Document{{
Blob: []byte("hello\n"),
Type: processor.DocumentUnknown,
Expand Down
18 changes: 13 additions & 5 deletions pkg/handler/collector/file/file.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"path/filepath"
"time"

"github.com/guacsec/guac/pkg/events"
"github.com/guacsec/guac/pkg/handler/processor"
)

Expand All @@ -35,13 +36,15 @@ type fileCollector struct {
lastChecked time.Time
poll bool
interval time.Duration
useBlobURL bool
}

func NewFileCollector(ctx context.Context, path string, poll bool, interval time.Duration) *fileCollector {
func NewFileCollector(ctx context.Context, path string, poll bool, interval time.Duration, useBlobURL bool) *fileCollector {
return &fileCollector{
path: path,
poll: poll,
interval: interval,
path: path,
poll: poll,
interval: interval,
useBlobURL: useBlobURL,
}
}

Expand Down Expand Up @@ -87,13 +90,18 @@ func (f *fileCollector) RetrieveArtifacts(ctx context.Context, docChannel chan<-
return fmt.Errorf("error reading file: %s, err: %w", path, err)
}

source := fmt.Sprintf("file:///%s", path)
if f.useBlobURL {
source = events.GetKey(blob) // this is the blob store path
}

doc := &processor.Document{
Blob: blob,
Type: processor.DocumentUnknown,
Format: processor.FormatUnknown,
SourceInformation: processor.SourceInformation{
Collector: string(FileCollector),
Source: fmt.Sprintf("file:///%s", path),
Source: source,
},
}

Expand Down
21 changes: 21 additions & 0 deletions pkg/handler/collector/file/file_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ func Test_fileCollector_RetrieveArtifacts(t *testing.T) {
lastChecked time.Time
poll bool
interval time.Duration
useBlobURL bool
}
tests := []struct {
name string
Expand Down Expand Up @@ -66,6 +67,25 @@ func Test_fileCollector_RetrieveArtifacts(t *testing.T) {
}},
},
wantErr: false,
}, {
name: "found file with useBlobURL",
fields: fields{
path: "./testdata",
lastChecked: time.Date(2009, 11, 17, 20, 34, 58, 651387237, time.UTC),
poll: false,
interval: 0,
useBlobURL: true,
},
want: []*processor.Document{{
Blob: []byte("hello\n"),
Type: processor.DocumentUnknown,
Format: processor.FormatUnknown,
SourceInformation: processor.SourceInformation{
Collector: string(FileCollector),
Source: "sha256:5891b5b522d5df086d0ff0b110fbd9d21bb4fc7163af34d08286a2e846f6be03",
}},
},
wantErr: false,
}, {
name: "with canceled poll",
fields: fields{
Expand All @@ -92,6 +112,7 @@ func Test_fileCollector_RetrieveArtifacts(t *testing.T) {
lastChecked: tt.fields.lastChecked,
poll: tt.fields.poll,
interval: tt.fields.interval,
useBlobURL: tt.fields.useBlobURL,
}
// NOTE: Below is one of the simplest ways to validate the context getting canceled()
// This is still brittle if a test for some reason takes longer than a second.
Expand Down
2 changes: 1 addition & 1 deletion pkg/handler/collector/git/git.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ type gitDocumentCollector struct {
}

func NewGitDocumentCollector(ctx context.Context, url string, dir string, poll bool, interval time.Duration) *gitDocumentCollector {
fileCollector := file.NewFileCollector(ctx, dir, false, time.Second)
fileCollector := file.NewFileCollector(ctx, dir, false, time.Second, false)

return &gitDocumentCollector{
url: url,
Expand Down