From 12ec0dfe9ebfc746bdd1db0956055cfea600450f Mon Sep 17 00:00:00 2001 From: Nikita Pivkin Date: Fri, 19 Apr 2024 13:22:31 +0700 Subject: [PATCH] feat(misconf): loading embedded checks as a fallback (#6502) --- pkg/cloud/report/convert.go | 3 +- pkg/commands/artifact/run.go | 8 +- pkg/iac/rego/embed.go | 6 +- pkg/iac/rego/load.go | 130 ++++++++++++++---- pkg/iac/rego/load_test.go | 115 +++++++++++++++- pkg/iac/rego/metadata.go | 13 ++ pkg/iac/rego/scanner.go | 15 +- pkg/iac/rego/testdata/embedded/bad-check.rego | 9 ++ pkg/iac/rego/testdata/embedded/my-check1.rego | 11 ++ pkg/scanner/local/scan.go | 3 +- 10 files changed, 271 insertions(+), 42 deletions(-) create mode 100644 pkg/iac/rego/testdata/embedded/bad-check.rego create mode 100644 pkg/iac/rego/testdata/embedded/my-check1.rego diff --git a/pkg/cloud/report/convert.go b/pkg/cloud/report/convert.go index 2f6917d1855..ac8517380cb 100644 --- a/pkg/cloud/report/convert.go +++ b/pkg/cloud/report/convert.go @@ -8,6 +8,7 @@ import ( "github.com/aws/aws-sdk-go-v2/aws/arn" ftypes "github.com/aquasecurity/trivy/pkg/fanal/types" + "github.com/aquasecurity/trivy/pkg/iac/rego" "github.com/aquasecurity/trivy/pkg/iac/scan" "github.com/aquasecurity/trivy/pkg/types" ) @@ -57,7 +58,7 @@ func ConvertResults(results scan.Results, provider string, scoped []string) map[ // empty namespace implies a go rule from defsec, "builtin" refers to a built-in rego rule // this ensures we don't generate bad links for custom policies - if result.RegoNamespace() == "" || strings.HasPrefix(result.RegoNamespace(), "builtin.") { + if result.RegoNamespace() == "" || rego.IsBuiltinNamespace(result.RegoNamespace()) { primaryURL = fmt.Sprintf("https://avd.aquasec.com/misconfig/%s", strings.ToLower(result.Rule().AVDID)) } diff --git a/pkg/commands/artifact/run.go b/pkg/commands/artifact/run.go index d50a39c65ca..f9e06a30f0a 100644 --- a/pkg/commands/artifact/run.go +++ b/pkg/commands/artifact/run.go @@ -21,6 +21,7 @@ import ( ftypes "github.com/aquasecurity/trivy/pkg/fanal/types" "github.com/aquasecurity/trivy/pkg/fanal/walker" "github.com/aquasecurity/trivy/pkg/flag" + "github.com/aquasecurity/trivy/pkg/iac/rego" "github.com/aquasecurity/trivy/pkg/javadb" "github.com/aquasecurity/trivy/pkg/log" "github.com/aquasecurity/trivy/pkg/misconf" @@ -50,11 +51,6 @@ const ( ) var ( - defaultPolicyNamespaces = []string{ - "appshield", - "defsec", - "builtin", - } SkipScan = errors.New("skip subsequent processes") ) @@ -598,7 +594,7 @@ func initScannerConfig(opts flag.Options, cacheClient cache.Cache) (ScannerConfi configScannerOptions = misconf.ScannerOption{ Debug: opts.Debug, Trace: opts.Trace, - Namespaces: append(opts.PolicyNamespaces, defaultPolicyNamespaces...), + Namespaces: append(opts.PolicyNamespaces, rego.BuiltinNamespaces()...), PolicyPaths: append(opts.PolicyPaths, downloadedPolicyPaths...), DataPaths: opts.DataPaths, HelmValues: opts.HelmValues, diff --git a/pkg/iac/rego/embed.go b/pkg/iac/rego/embed.go index 16eff7345cc..d554ebf087a 100644 --- a/pkg/iac/rego/embed.go +++ b/pkg/iac/rego/embed.go @@ -8,7 +8,7 @@ import ( "github.com/open-policy-agent/opa/ast" - rules2 "github.com/aquasecurity/trivy-policies" + checks "github.com/aquasecurity/trivy-policies" "github.com/aquasecurity/trivy/pkg/iac/rules" ) @@ -62,11 +62,11 @@ func RegisterRegoRules(modules map[string]*ast.Module) { } func LoadEmbeddedPolicies() (map[string]*ast.Module, error) { - return LoadPoliciesFromDirs(rules2.EmbeddedPolicyFileSystem, ".") + return LoadPoliciesFromDirs(checks.EmbeddedPolicyFileSystem, ".") } func LoadEmbeddedLibraries() (map[string]*ast.Module, error) { - return LoadPoliciesFromDirs(rules2.EmbeddedLibraryFileSystem, ".") + return LoadPoliciesFromDirs(checks.EmbeddedLibraryFileSystem, ".") } func LoadPoliciesFromDirs(target fs.FS, paths ...string) (map[string]*ast.Module, error) { diff --git a/pkg/iac/rego/load.go b/pkg/iac/rego/load.go index aeef8014447..43f8fd76be6 100644 --- a/pkg/iac/rego/load.go +++ b/pkg/iac/rego/load.go @@ -9,8 +9,25 @@ import ( "github.com/open-policy-agent/opa/ast" "github.com/open-policy-agent/opa/bundle" + "github.com/samber/lo" ) +var builtinNamespaces = map[string]struct{}{ + "builtin": {}, + "defsec": {}, + "appshield": {}, +} + +func BuiltinNamespaces() []string { + return lo.Keys(builtinNamespaces) +} + +func IsBuiltinNamespace(namespace string) bool { + return lo.ContainsBy(BuiltinNamespaces(), func(ns string) bool { + return strings.HasPrefix(namespace, ns+".") + }) +} + func IsRegoFile(name string) bool { return strings.HasSuffix(name, bundle.RegoExt) && !strings.HasSuffix(name, "_test"+bundle.RegoExt) } @@ -38,28 +55,20 @@ func (s *Scanner) loadPoliciesFromReaders(readers []io.Reader) (map[string]*ast. return modules, nil } -func (s *Scanner) loadEmbedded(enableEmbeddedLibraries, enableEmbeddedPolicies bool) error { - if enableEmbeddedLibraries { - loadedLibs, errLoad := LoadEmbeddedLibraries() - if errLoad != nil { - return fmt.Errorf("failed to load embedded rego libraries: %w", errLoad) - } - for name, policy := range loadedLibs { - s.policies[name] = policy - } - s.debug.Log("Loaded %d embedded libraries.", len(loadedLibs)) +func (s *Scanner) loadEmbedded() error { + loaded, err := LoadEmbeddedLibraries() + if err != nil { + return fmt.Errorf("failed to load embedded rego libraries: %w", err) } + s.embeddedLibs = loaded + s.debug.Log("Loaded %d embedded libraries.", len(loaded)) - if enableEmbeddedPolicies { - loaded, err := LoadEmbeddedPolicies() - if err != nil { - return fmt.Errorf("failed to load embedded rego policies: %w", err) - } - for name, policy := range loaded { - s.policies[name] = policy - } - s.debug.Log("Loaded %d embedded policies.", len(loaded)) + loaded, err = LoadEmbeddedPolicies() + if err != nil { + return fmt.Errorf("failed to load embedded rego policies: %w", err) } + s.embeddedChecks = loaded + s.debug.Log("Loaded %d embedded policies.", len(loaded)) return nil } @@ -71,14 +80,22 @@ func (s *Scanner) LoadPolicies(enableEmbeddedLibraries, enableEmbeddedPolicies b } if s.policyFS != nil { - s.debug.Log("Overriding filesystem for policies!") + s.debug.Log("Overriding filesystem for checks!") srcFS = s.policyFS } - if err := s.loadEmbedded(enableEmbeddedLibraries, enableEmbeddedPolicies); err != nil { + if err := s.loadEmbedded(); err != nil { return err } + if enableEmbeddedPolicies { + s.policies = lo.Assign(s.policies, s.embeddedChecks) + } + + if enableEmbeddedLibraries { + s.policies = lo.Assign(s.policies, s.embeddedLibs) + } + var err error if len(paths) > 0 { loaded, err := LoadPoliciesFromDirs(srcFS, paths...) @@ -94,12 +111,12 @@ func (s *Scanner) LoadPolicies(enableEmbeddedLibraries, enableEmbeddedPolicies b if len(readers) > 0 { loaded, err := s.loadPoliciesFromReaders(readers) if err != nil { - return fmt.Errorf("failed to load rego policies from reader(s): %w", err) + return fmt.Errorf("failed to load rego checks from reader(s): %w", err) } for name, policy := range loaded { s.policies[name] = policy } - s.debug.Log("Loaded %d policies from reader(s).", len(loaded)) + s.debug.Log("Loaded %d checks from reader(s).", len(loaded)) } // gather namespaces @@ -127,9 +144,73 @@ func (s *Scanner) LoadPolicies(enableEmbeddedLibraries, enableEmbeddedPolicies b return s.compilePolicies(srcFS, paths) } +func (s *Scanner) fallbackChecks(compiler *ast.Compiler) { + + var excludedFiles []string + + for _, e := range compiler.Errors { + loc := e.Location.File + + if lo.Contains(excludedFiles, loc) { + continue + } + + badPolicy, exists := s.policies[loc] + if !exists || badPolicy == nil { + continue + } + + if !IsBuiltinNamespace(getModuleNamespace(badPolicy)) { + continue + } + + s.debug.Log("Error occurred while parsing: %s, %s. Trying to fallback to embedded check.", loc, e.Error()) + + embedded := s.findMatchedEmbeddedCheck(badPolicy) + if embedded == nil { + s.debug.Log("Failed to find embedded check: %s", loc) + continue + } + + s.debug.Log("Found embedded check: %s", embedded.Package.Location.File) + delete(s.policies, loc) // remove bad policy + s.policies[embedded.Package.Location.File] = embedded + delete(s.embeddedChecks, embedded.Package.Location.File) // avoid infinite loop if embedded check contains ref error + excludedFiles = append(excludedFiles, e.Location.File) + } + + compiler.Errors = lo.Filter(compiler.Errors, func(e *ast.Error, _ int) bool { + return !lo.Contains(excludedFiles, e.Location.File) + }) +} + +func (s *Scanner) findMatchedEmbeddedCheck(badPolicy *ast.Module) *ast.Module { + for _, embeddedCheck := range s.embeddedChecks { + if embeddedCheck.Package.Path.String() == badPolicy.Package.Path.String() { + return embeddedCheck + } + } + + badPolicyMeta, err := metadataFromRegoModule(badPolicy) + if err != nil { + return nil + } + + for _, embeddedCheck := range s.embeddedChecks { + meta, err := metadataFromRegoModule(embeddedCheck) + if err != nil { + continue + } + if badPolicyMeta.AVDID != "" && badPolicyMeta.AVDID == meta.AVDID { + return embeddedCheck + } + } + return nil +} + func (s *Scanner) prunePoliciesWithError(compiler *ast.Compiler) error { if len(compiler.Errors) > s.regoErrorLimit { - s.debug.Log("Error(s) occurred while loading policies") + s.debug.Log("Error(s) occurred while loading checks") return compiler.Errors } @@ -157,6 +238,7 @@ func (s *Scanner) compilePolicies(srcFS fs.FS, paths []string) error { compiler.Compile(s.policies) if compiler.Failed() { + s.fallbackChecks(compiler) if err := s.prunePoliciesWithError(compiler); err != nil { return err } diff --git a/pkg/iac/rego/load_test.go b/pkg/iac/rego/load_test.go index ae6669a6fb0..fdd56a56347 100644 --- a/pkg/iac/rego/load_test.go +++ b/pkg/iac/rego/load_test.go @@ -8,6 +8,7 @@ import ( "testing" "testing/fstest" + trivy_policies "github.com/aquasecurity/trivy-policies" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -19,6 +20,9 @@ import ( //go:embed all:testdata/policies var testEmbedFS embed.FS +//go:embed testdata/embedded +var embeddedChecksFS embed.FS + func Test_RegoScanning_WithSomeInvalidPolicies(t *testing.T) { t.Run("allow no errors", func(t *testing.T) { var debugBuf bytes.Buffer @@ -30,7 +34,7 @@ func Test_RegoScanning_WithSomeInvalidPolicies(t *testing.T) { err := scanner.LoadPolicies(false, false, testEmbedFS, []string{"."}, nil) require.ErrorContains(t, err, `want (one of): ["Cmd" "EndLine" "Flags" "JSON" "Original" "Path" "Stage" "StartLine" "SubCmd" "Value"]`) - assert.Contains(t, debugBuf.String(), "Error(s) occurred while loading policies") + assert.Contains(t, debugBuf.String(), "Error(s) occurred while loading checks") }) t.Run("allow up to max 1 error", func(t *testing.T) { @@ -95,3 +99,112 @@ deny { }) } + +func Test_FallbackToEmbedded(t *testing.T) { + tests := []struct { + name string + files map[string]*fstest.MapFile + expectedErr string + }{ + { + name: "match by namespace", + files: map[string]*fstest.MapFile{ + "policies/my-check2.rego": { + Data: []byte(`# METADATA +# schemas: +# - input: schema["fooschema"] + +package builtin.test + +deny { + input.evil == "foo bar" +}`, + ), + }, + }, + }, + { + name: "match by check ID", + files: map[string]*fstest.MapFile{ + "policies/my-check2.rego": { + Data: []byte(`# METADATA +# schemas: +# - input: schema["fooschema"] +# custom: +# avd_id: test-001 +package builtin.test2 + +deny { + input.evil == "foo bar" +}`, + ), + }, + }, + }, + { + name: "bad embedded check", + files: map[string]*fstest.MapFile{ + "policies/my-check2.rego": { + Data: []byte(`# METADATA +# schemas: +# - input: schema["fooschema"] +package builtin.bad.test + +deny { + input.evil == "foo bar" +}`, + ), + }, + }, + expectedErr: "testdata/embedded/bad-check.rego:8: rego_type_error: undefined ref", + }, + { + name: "with non existent function", + files: map[string]*fstest.MapFile{ + "policies/my-check2.rego": { + Data: []byte(`# METADATA +# schemas: +# - input: schema["fooschema"] +package builtin.test + +deny { + input.foo == fn.is_foo("foo") +}`, + ), + }, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + scanner := rego.NewScanner( + types.SourceDockerfile, + options.ScannerWithRegoErrorLimits(0), + options.ScannerWithEmbeddedPolicies(false), + ) + + tt.files["schemas/fooschema.json"] = &fstest.MapFile{ + Data: []byte(`{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "foo": { + "type": "string" + } + } + }`), + } + + fsys := fstest.MapFS(tt.files) + trivy_policies.EmbeddedPolicyFileSystem = embeddedChecksFS + err := scanner.LoadPolicies(false, false, fsys, []string{"."}, nil) + + if tt.expectedErr != "" { + assert.ErrorContains(t, err, tt.expectedErr) + } else { + assert.NoError(t, err) + } + }) + } +} diff --git a/pkg/iac/rego/metadata.go b/pkg/iac/rego/metadata.go index f2a6505910d..141bb407056 100644 --- a/pkg/iac/rego/metadata.go +++ b/pkg/iac/rego/metadata.go @@ -393,3 +393,16 @@ func (m *MetadataRetriever) queryInputOptions(ctx context.Context, module *ast.M func getModuleNamespace(module *ast.Module) string { return strings.TrimPrefix(module.Package.Path.String(), "data.") } + +func metadataFromRegoModule(module *ast.Module) (*StaticMetadata, error) { + meta := new(StaticMetadata) + for _, annotation := range module.Annotations { + if annotation.Scope == "package" { + if err := meta.FromAnnotations(annotation); err != nil { + return nil, err + } + break + } + } + return meta, nil +} diff --git a/pkg/iac/rego/scanner.go b/pkg/iac/rego/scanner.go index 6969f957fa5..aff5813d191 100644 --- a/pkg/iac/rego/scanner.go +++ b/pkg/iac/rego/scanner.go @@ -7,6 +7,7 @@ import ( "fmt" "io" "io/fs" + "maps" "strings" "github.com/open-policy-agent/opa/ast" @@ -41,6 +42,9 @@ type Scanner struct { spec string inputSchema interface{} // unmarshalled into this from a json schema document sourceType types.Source + + embeddedLibs map[string]*ast.Module + embeddedChecks map[string]*ast.Module } func (s *Scanner) SetUseEmbeddedLibraries(b bool) { @@ -135,13 +139,12 @@ func NewScanner(source types.Source, opts ...options.ScannerOption) *Scanner { s := &Scanner{ regoErrorLimit: ast.CompileErrorLimitDefault, sourceType: source, - ruleNamespaces: map[string]struct{}{ - "builtin": {}, - "appshield": {}, - "defsec": {}, - }, - runtimeValues: addRuntimeValues(), + ruleNamespaces: make(map[string]struct{}), + runtimeValues: addRuntimeValues(), } + + maps.Copy(s.ruleNamespaces, builtinNamespaces) + for _, opt := range opts { opt(s) } diff --git a/pkg/iac/rego/testdata/embedded/bad-check.rego b/pkg/iac/rego/testdata/embedded/bad-check.rego new file mode 100644 index 00000000000..f30348989af --- /dev/null +++ b/pkg/iac/rego/testdata/embedded/bad-check.rego @@ -0,0 +1,9 @@ +# METADATA +# schemas: +# - input: schema["fooschema"] + +package builtin.bad.test + +deny { + input.evil == "foo bar" +} \ No newline at end of file diff --git a/pkg/iac/rego/testdata/embedded/my-check1.rego b/pkg/iac/rego/testdata/embedded/my-check1.rego new file mode 100644 index 00000000000..a087e9376ab --- /dev/null +++ b/pkg/iac/rego/testdata/embedded/my-check1.rego @@ -0,0 +1,11 @@ +# METADATA +# schemas: +# - input: schema["fooschema"] +# custom: +# avd_id: test-001 + +package builtin.test + +deny { + input.foo == "foo bar" +} \ No newline at end of file diff --git a/pkg/scanner/local/scan.go b/pkg/scanner/local/scan.go index 19f673f5415..02687b319a8 100644 --- a/pkg/scanner/local/scan.go +++ b/pkg/scanner/local/scan.go @@ -17,6 +17,7 @@ import ( "github.com/aquasecurity/trivy/pkg/fanal/analyzer" "github.com/aquasecurity/trivy/pkg/fanal/applier" ftypes "github.com/aquasecurity/trivy/pkg/fanal/types" + "github.com/aquasecurity/trivy/pkg/iac/rego" "github.com/aquasecurity/trivy/pkg/licensing" "github.com/aquasecurity/trivy/pkg/log" "github.com/aquasecurity/trivy/pkg/scanner/langpkg" @@ -383,7 +384,7 @@ func toDetectedMisconfiguration(res ftypes.MisconfResult, defaultSeverity dbType // empty namespace implies a go rule from defsec, "builtin" refers to a built-in rego rule // this ensures we don't generate bad links for custom policies - if res.Namespace == "" || strings.HasPrefix(res.Namespace, "builtin.") { + if res.Namespace == "" || rego.IsBuiltinNamespace(res.Namespace) { primaryURL = fmt.Sprintf("https://avd.aquasec.com/misconfig/%s", strings.ToLower(res.ID)) res.References = append(res.References, primaryURL) }