From 941cd5252fbb18fd8add648b4dc11df8ca0814a4 Mon Sep 17 00:00:00 2001 From: Thabo Fletcher Date: Mon, 13 Apr 2026 20:00:07 -0700 Subject: [PATCH 01/14] Add Go policy engine library for PBAC evaluation Go library implementing the PBAC purpose-overlap evaluation algorithm and fideslang policy rule evaluation. Provides two packages: - pkg/pbac: Purpose evaluation (set intersection of consumer vs dataset purposes) - pkg/fideslang: Fideslang policy rule evaluation (taxonomy hierarchy matching) - cmd/fides-evaluate: CLI binary for stdin/file-based evaluation The library is designed to be imported by the fidesplus sidecar for high-throughput HTTP evaluation (~13x faster than Python). --- policy-engine/cmd/fides-evaluate/main.go | 76 ++++++ policy-engine/go.mod | 3 + policy-engine/pkg/fideslang/evaluate.go | 167 ++++++++++++ policy-engine/pkg/fideslang/evaluate_test.go | 261 +++++++++++++++++++ policy-engine/pkg/fideslang/taxonomy.go | 64 +++++ policy-engine/pkg/fideslang/types.go | 79 ++++++ policy-engine/pkg/pbac/evaluate.go | 138 ++++++++++ policy-engine/pkg/pbac/evaluate_test.go | 213 +++++++++++++++ policy-engine/pkg/pbac/types.go | 133 ++++++++++ 9 files changed, 1134 insertions(+) create mode 100644 policy-engine/cmd/fides-evaluate/main.go create mode 100644 policy-engine/go.mod create mode 100644 policy-engine/pkg/fideslang/evaluate.go create mode 100644 policy-engine/pkg/fideslang/evaluate_test.go create mode 100644 policy-engine/pkg/fideslang/taxonomy.go create mode 100644 policy-engine/pkg/fideslang/types.go create mode 100644 policy-engine/pkg/pbac/evaluate.go create mode 100644 policy-engine/pkg/pbac/evaluate_test.go create mode 100644 policy-engine/pkg/pbac/types.go diff --git a/policy-engine/cmd/fides-evaluate/main.go b/policy-engine/cmd/fides-evaluate/main.go new file mode 100644 index 00000000000..3fd34f419b2 --- /dev/null +++ b/policy-engine/cmd/fides-evaluate/main.go @@ -0,0 +1,76 @@ +// fides-evaluate is a CLI tool for running the Fides policy evaluation engine. +// +// Usage: +// +// echo '{"consumer": {...}, "datasets": {...}}' | fides-evaluate purpose +// echo '{"taxonomy": {...}, "policy_rule": {...}, "privacy_declaration": {...}}' | fides-evaluate policy-rule +package main + +import ( + "encoding/json" + "fmt" + "io" + "os" + + "github.com/ethyca/fides/policy-engine/pkg/fideslang" + "github.com/ethyca/fides/policy-engine/pkg/pbac" +) + +func main() { + if len(os.Args) < 2 { + fmt.Fprintf(os.Stderr, "Usage: fides-evaluate [file]\n") + os.Exit(1) + } + + command := os.Args[1] + + var reader io.Reader = os.Stdin + if len(os.Args) > 2 { + f, err := os.Open(os.Args[2]) + if err != nil { + fmt.Fprintf(os.Stderr, "Error opening file: %v\n", err) + os.Exit(1) + } + defer f.Close() + reader = f + } + + input, err := io.ReadAll(reader) + if err != nil { + fmt.Fprintf(os.Stderr, "Error reading input: %v\n", err) + os.Exit(1) + } + + switch command { + case "purpose": + var req pbac.EvaluatePurposeRequest + if err := json.Unmarshal(input, &req); err != nil { + fmt.Fprintf(os.Stderr, "Error parsing JSON: %v\n", err) + os.Exit(1) + } + result := pbac.EvaluatePurpose(req.Consumer, req.Datasets, req.Collections) + writeJSON(result) + + case "policy-rule": + var req fideslang.EvaluateRequest + if err := json.Unmarshal(input, &req); err != nil { + fmt.Fprintf(os.Stderr, "Error parsing JSON: %v\n", err) + os.Exit(1) + } + result := fideslang.Evaluate(&req) + writeJSON(result) + + default: + fmt.Fprintf(os.Stderr, "Unknown command: %s\nUse 'purpose' or 'policy-rule'\n", command) + os.Exit(1) + } +} + +func writeJSON(v interface{}) { + enc := json.NewEncoder(os.Stdout) + enc.SetIndent("", " ") + if err := enc.Encode(v); err != nil { + fmt.Fprintf(os.Stderr, "Error encoding JSON: %v\n", err) + os.Exit(1) + } +} diff --git a/policy-engine/go.mod b/policy-engine/go.mod new file mode 100644 index 00000000000..9703fd57ec4 --- /dev/null +++ b/policy-engine/go.mod @@ -0,0 +1,3 @@ +module github.com/ethyca/fides/policy-engine + +go 1.23 diff --git a/policy-engine/pkg/fideslang/evaluate.go b/policy-engine/pkg/fideslang/evaluate.go new file mode 100644 index 00000000000..013224c5469 --- /dev/null +++ b/policy-engine/pkg/fideslang/evaluate.go @@ -0,0 +1,167 @@ +package fideslang + +import ( + "fmt" + "time" +) + +// EvaluatePolicyRule evaluates a single policy rule against a privacy declaration. +// Returns violations if the declaration violates the rule, empty slice otherwise. +func EvaluatePolicyRule(idx *TaxonomyIndex, rule *PolicyRule, decl *PrivacyDeclaration) []Violation { + // Build hierarchies for data categories + categoryHierarchies := make([][]string, 0, len(decl.DataCategories)) + for _, cat := range decl.DataCategories { + hierarchy := idx.GetCategoryHierarchy(cat) + categoryHierarchies = append(categoryHierarchies, hierarchy) + } + + // Evaluate data categories + categoryViolations := compareRuleToDeclaration( + rule.DataCategories.Values, + categoryHierarchies, + rule.DataCategories.Matches, + ) + + // Build hierarchy for data use (single value, wrapped in slice of slices) + useHierarchies := [][]string{idx.GetUseHierarchy(decl.DataUse)} + + // Evaluate data uses + useViolations := compareRuleToDeclaration( + rule.DataUses.Values, + useHierarchies, + rule.DataUses.Matches, + ) + + // Build hierarchies for data subjects + subjectHierarchies := make([][]string, 0, len(decl.DataSubjects)) + for _, subj := range decl.DataSubjects { + subjectHierarchies = append(subjectHierarchies, idx.GetSubjectHierarchy(subj)) + } + + // Evaluate data subjects + subjectViolations := compareRuleToDeclaration( + rule.DataSubjects.Values, + subjectHierarchies, + rule.DataSubjects.Matches, + ) + + // Violation occurs only if ALL THREE dimensions match (AND logic) + if len(categoryViolations) > 0 && len(useViolations) > 0 && len(subjectViolations) > 0 { + return []Violation{ + { + Detail: fmt.Sprintf( + "Declaration violates rule '%s'. Violated usage of data categories (%v) for data uses (%v) and subjects (%v)", + rule.Name, + categoryViolations, + useViolations, + subjectViolations, + ), + ViolatingAttributes: ViolatingAttributes{ + DataCategories: categoryViolations, + DataUses: useViolations, + DataSubjects: subjectViolations, + }, + }, + } + } + + return nil +} + +// compareRuleToDeclaration implements the match mode logic. +// +// Match modes: +// - ANY: violation if ANY rule_type appears in ANY hierarchy +// - ALL: violation if ALL declaration types are covered by rule types +// - NONE: violation if NO matches occur (returns the non-matching types) +// - OTHER: violation if declaration types are NOT in the rule (returns non-matching) +func compareRuleToDeclaration(ruleTypes []string, declarationHierarchies [][]string, matchMode MatchMode) []string { + ruleTypeSet := make(map[string]bool, len(ruleTypes)) + for _, rt := range ruleTypes { + ruleTypeSet[rt] = true + } + + matched := make(map[string]bool) + mismatched := make(map[string]bool) + + for _, hierarchy := range declarationHierarchies { + if len(hierarchy) == 0 { + continue + } + declaredType := hierarchy[0] // The leaf node (actual declared type) + foundMatch := false + + for _, h := range hierarchy { + if ruleTypeSet[h] { + foundMatch = true + break + } + } + + if foundMatch { + matched[declaredType] = true + } else { + mismatched[declaredType] = true + } + } + + matchedList := mapKeysToSlice(matched) + mismatchedList := mapKeysToSlice(mismatched) + + switch matchMode { + case MatchAny: + return matchedList + + case MatchAll: + if len(matched) == len(declarationHierarchies) && len(declarationHierarchies) > 0 { + return matchedList + } + return nil + + case MatchNone: + if len(matched) == 0 && len(mismatched) > 0 { + return mismatchedList + } + return nil + + case MatchOther: + return mismatchedList + + default: + return nil + } +} + +// Evaluate performs a full policy evaluation and returns the response. +func Evaluate(req *EvaluateRequest) *EvaluateResponse { + start := time.Now() + + idx := NewTaxonomyIndex(&req.Taxonomy) + violations := EvaluatePolicyRule(idx, &req.PolicyRule, &req.PrivacyDeclaration) + + status := "PASS" + if len(violations) > 0 { + status = "FAIL" + } + + if violations == nil { + violations = []Violation{} + } + + return &EvaluateResponse{ + Status: status, + Violations: violations, + EvaluationTimeUs: time.Since(start).Microseconds(), + } +} + +func mapKeysToSlice(m map[string]bool) []string { + if len(m) == 0 { + return nil + } + result := make([]string, 0, len(m)) + for k := range m { + result = append(result, k) + } + return result +} diff --git a/policy-engine/pkg/fideslang/evaluate_test.go b/policy-engine/pkg/fideslang/evaluate_test.go new file mode 100644 index 00000000000..de9463a4841 --- /dev/null +++ b/policy-engine/pkg/fideslang/evaluate_test.go @@ -0,0 +1,261 @@ +package fideslang + +import ( + "testing" +) + +func strPtr(s string) *string { return &s } + +func buildTestTaxonomy() Taxonomy { + return Taxonomy{ + DataCategory: []TaxonomyEntry{ + {FidesKey: "user", ParentKey: nil}, + {FidesKey: "user.contact", ParentKey: strPtr("user")}, + {FidesKey: "user.contact.email", ParentKey: strPtr("user.contact")}, + {FidesKey: "user.financial", ParentKey: strPtr("user")}, + {FidesKey: "user.financial.bank_account", ParentKey: strPtr("user.financial")}, + {FidesKey: "system", ParentKey: nil}, + {FidesKey: "system.operations", ParentKey: strPtr("system")}, + }, + DataUse: []TaxonomyEntry{ + {FidesKey: "marketing", ParentKey: nil}, + {FidesKey: "marketing.advertising", ParentKey: strPtr("marketing")}, + {FidesKey: "analytics", ParentKey: nil}, + {FidesKey: "essential", ParentKey: nil}, + {FidesKey: "essential.service", ParentKey: strPtr("essential")}, + }, + DataSubject: []TaxonomyEntry{ + {FidesKey: "customer", ParentKey: nil}, + {FidesKey: "employee", ParentKey: nil}, + }, + } +} + +func TestEvaluate_PASS_NoViolation(t *testing.T) { + req := &EvaluateRequest{ + Taxonomy: buildTestTaxonomy(), + PolicyRule: PolicyRule{ + Name: "block-marketing-email", + DataCategories: PolicyRuleTarget{ + Values: []string{"user.financial"}, + Matches: MatchAny, + }, + DataUses: PolicyRuleTarget{ + Values: []string{"marketing"}, + Matches: MatchAny, + }, + DataSubjects: PolicyRuleTarget{ + Values: []string{"customer"}, + Matches: MatchAny, + }, + }, + PrivacyDeclaration: PrivacyDeclaration{ + DataCategories: []string{"user.contact.email"}, + DataUse: "essential.service", + DataSubjects: []string{"customer"}, + }, + } + + resp := Evaluate(req) + + if resp.Status != "PASS" { + t.Errorf("expected PASS, got %s", resp.Status) + } + if len(resp.Violations) != 0 { + t.Errorf("expected 0 violations, got %d", len(resp.Violations)) + } +} + +func TestEvaluate_FAIL_AllDimensionsMatch(t *testing.T) { + req := &EvaluateRequest{ + Taxonomy: buildTestTaxonomy(), + PolicyRule: PolicyRule{ + Name: "block-marketing-pii", + DataCategories: PolicyRuleTarget{ + Values: []string{"user"}, + Matches: MatchAny, + }, + DataUses: PolicyRuleTarget{ + Values: []string{"marketing"}, + Matches: MatchAny, + }, + DataSubjects: PolicyRuleTarget{ + Values: []string{"customer"}, + Matches: MatchAny, + }, + }, + PrivacyDeclaration: PrivacyDeclaration{ + DataCategories: []string{"user.contact.email"}, + DataUse: "marketing.advertising", + DataSubjects: []string{"customer"}, + }, + } + + resp := Evaluate(req) + + if resp.Status != "FAIL" { + t.Errorf("expected FAIL, got %s", resp.Status) + } + if len(resp.Violations) != 1 { + t.Fatalf("expected 1 violation, got %d", len(resp.Violations)) + } +} + +func TestEvaluate_PASS_UseMismatch(t *testing.T) { + // Rule targets marketing, but declaration uses essential — no violation + req := &EvaluateRequest{ + Taxonomy: buildTestTaxonomy(), + PolicyRule: PolicyRule{ + Name: "block-marketing-user-data", + DataCategories: PolicyRuleTarget{ + Values: []string{"user"}, + Matches: MatchAny, + }, + DataUses: PolicyRuleTarget{ + Values: []string{"marketing"}, + Matches: MatchAny, + }, + DataSubjects: PolicyRuleTarget{ + Values: []string{"customer"}, + Matches: MatchAny, + }, + }, + PrivacyDeclaration: PrivacyDeclaration{ + DataCategories: []string{"user.contact.email"}, + DataUse: "essential.service", + DataSubjects: []string{"customer"}, + }, + } + + resp := Evaluate(req) + + if resp.Status != "PASS" { + t.Errorf("expected PASS, got %s", resp.Status) + } +} + +func TestHierarchyMatching_ParentMatchesChild(t *testing.T) { + tax := buildTestTaxonomy() + idx := NewTaxonomyIndex(&tax) + + // "user" should match "user.contact.email" via hierarchy + h := idx.GetCategoryHierarchy("user.contact.email") + if len(h) != 3 { + t.Fatalf("expected hierarchy of length 3, got %d: %v", len(h), h) + } + if h[0] != "user.contact.email" || h[1] != "user.contact" || h[2] != "user" { + t.Errorf("unexpected hierarchy: %v", h) + } +} + +func TestMatchMode_ALL(t *testing.T) { + // ALL: violation only if every declared type is covered + tax := buildTestTaxonomy() + idx := NewTaxonomyIndex(&tax) + + rule := &PolicyRule{ + Name: "test-all", + DataCategories: PolicyRuleTarget{ + Values: []string{"user"}, + Matches: MatchAll, + }, + DataUses: PolicyRuleTarget{ + Values: []string{"marketing"}, + Matches: MatchAny, + }, + DataSubjects: PolicyRuleTarget{ + Values: []string{"customer"}, + Matches: MatchAny, + }, + } + + // Both categories are under "user" → ALL matched → violation + decl := &PrivacyDeclaration{ + DataCategories: []string{"user.contact.email", "user.financial.bank_account"}, + DataUse: "marketing.advertising", + DataSubjects: []string{"customer"}, + } + + violations := EvaluatePolicyRule(idx, rule, decl) + if len(violations) == 0 { + t.Error("expected violation when ALL categories match") + } + + // Mix of user + system → not ALL user → no category violation → PASS + decl2 := &PrivacyDeclaration{ + DataCategories: []string{"user.contact.email", "system.operations"}, + DataUse: "marketing.advertising", + DataSubjects: []string{"customer"}, + } + + violations2 := EvaluatePolicyRule(idx, rule, decl2) + if len(violations2) != 0 { + t.Error("expected no violation when not ALL categories match") + } +} + +func TestMatchMode_NONE(t *testing.T) { + tax := buildTestTaxonomy() + idx := NewTaxonomyIndex(&tax) + + rule := &PolicyRule{ + Name: "test-none", + DataCategories: PolicyRuleTarget{ + Values: []string{"user.financial"}, + Matches: MatchNone, + }, + DataUses: PolicyRuleTarget{ + Values: []string{"marketing"}, + Matches: MatchAny, + }, + DataSubjects: PolicyRuleTarget{ + Values: []string{"customer"}, + Matches: MatchAny, + }, + } + + // No financial data → NONE matches (violation because no overlap is the problem) + decl := &PrivacyDeclaration{ + DataCategories: []string{"user.contact.email"}, + DataUse: "marketing.advertising", + DataSubjects: []string{"customer"}, + } + + violations := EvaluatePolicyRule(idx, rule, decl) + if len(violations) == 0 { + t.Error("expected violation when NONE mode has no matches") + } +} + +func TestMatchMode_OTHER(t *testing.T) { + tax := buildTestTaxonomy() + idx := NewTaxonomyIndex(&tax) + + rule := &PolicyRule{ + Name: "test-other", + DataCategories: PolicyRuleTarget{ + Values: []string{"user.contact"}, + Matches: MatchOther, + }, + DataUses: PolicyRuleTarget{ + Values: []string{"marketing"}, + Matches: MatchAny, + }, + DataSubjects: PolicyRuleTarget{ + Values: []string{"customer"}, + Matches: MatchAny, + }, + } + + // system.operations is NOT under user.contact → OTHER catches it + decl := &PrivacyDeclaration{ + DataCategories: []string{"user.contact.email", "system.operations"}, + DataUse: "marketing.advertising", + DataSubjects: []string{"customer"}, + } + + violations := EvaluatePolicyRule(idx, rule, decl) + if len(violations) == 0 { + t.Error("expected violation when OTHER mode finds non-matching types") + } +} diff --git a/policy-engine/pkg/fideslang/taxonomy.go b/policy-engine/pkg/fideslang/taxonomy.go new file mode 100644 index 00000000000..952fc045aba --- /dev/null +++ b/policy-engine/pkg/fideslang/taxonomy.go @@ -0,0 +1,64 @@ +package fideslang + +// TaxonomyIndex provides fast lookups for taxonomy hierarchies. +type TaxonomyIndex struct { + dataCategory map[string]*TaxonomyEntry + dataUse map[string]*TaxonomyEntry + dataSubject map[string]*TaxonomyEntry +} + +// NewTaxonomyIndex creates an indexed taxonomy for efficient lookups. +func NewTaxonomyIndex(t *Taxonomy) *TaxonomyIndex { + idx := &TaxonomyIndex{ + dataCategory: make(map[string]*TaxonomyEntry, len(t.DataCategory)), + dataUse: make(map[string]*TaxonomyEntry, len(t.DataUse)), + dataSubject: make(map[string]*TaxonomyEntry, len(t.DataSubject)), + } + for i := range t.DataCategory { + idx.dataCategory[t.DataCategory[i].FidesKey] = &t.DataCategory[i] + } + for i := range t.DataUse { + idx.dataUse[t.DataUse[i].FidesKey] = &t.DataUse[i] + } + for i := range t.DataSubject { + idx.dataSubject[t.DataSubject[i].FidesKey] = &t.DataSubject[i] + } + return idx +} + +// GetCategoryHierarchy returns the hierarchy from the given key to root. +// Example: "user.contact.email" -> ["user.contact.email", "user.contact", "user"] +func (idx *TaxonomyIndex) GetCategoryHierarchy(fidesKey string) []string { + return idx.getHierarchy(fidesKey, idx.dataCategory) +} + +// GetUseHierarchy returns the hierarchy from the given key to root. +func (idx *TaxonomyIndex) GetUseHierarchy(fidesKey string) []string { + return idx.getHierarchy(fidesKey, idx.dataUse) +} + +// GetSubjectHierarchy returns the hierarchy for a data subject. +// Subjects don't have a hierarchical structure, so this returns just the key. +func (idx *TaxonomyIndex) GetSubjectHierarchy(fidesKey string) []string { + return []string{fidesKey} +} + +// getHierarchy traverses the parent chain and builds the hierarchy list. +func (idx *TaxonomyIndex) getHierarchy(fidesKey string, entries map[string]*TaxonomyEntry) []string { + hierarchy := make([]string, 0, 4) + currentKey := fidesKey + visited := make(map[string]bool) + + for currentKey != "" && !visited[currentKey] { + visited[currentKey] = true + hierarchy = append(hierarchy, currentKey) + + entry, ok := entries[currentKey] + if !ok || entry.ParentKey == nil { + break + } + currentKey = *entry.ParentKey + } + + return hierarchy +} diff --git a/policy-engine/pkg/fideslang/types.go b/policy-engine/pkg/fideslang/types.go new file mode 100644 index 00000000000..266edd91fbf --- /dev/null +++ b/policy-engine/pkg/fideslang/types.go @@ -0,0 +1,79 @@ +// Package fideslang implements the Fideslang policy rule evaluation algorithm. +// +// It evaluates whether a privacy declaration violates a policy rule by matching +// data categories, data uses, and data subjects against the fideslang taxonomy +// hierarchy. This is the Go translation of the evaluation engine used in +// fides/src/fides/api/util/ and benchmarked in Adrian's sidecar POC. +package fideslang + +// MatchMode represents the matching mode for policy rules. +type MatchMode string + +const ( + MatchAny MatchMode = "ANY" + MatchAll MatchMode = "ALL" + MatchNone MatchMode = "NONE" + MatchOther MatchMode = "OTHER" +) + +// PolicyRuleTarget represents a rule's constraint on a specific dimension +// (categories, uses, or subjects). +type PolicyRuleTarget struct { + Values []string `json:"values"` + Matches MatchMode `json:"matches"` +} + +// PolicyRule represents a single rule within a policy. +type PolicyRule struct { + Name string `json:"name"` + DataCategories PolicyRuleTarget `json:"data_categories"` + DataUses PolicyRuleTarget `json:"data_uses"` + DataSubjects PolicyRuleTarget `json:"data_subjects"` +} + +// PrivacyDeclaration represents a system's declaration of data usage. +type PrivacyDeclaration struct { + DataCategories []string `json:"data_categories"` + DataUse string `json:"data_use"` + DataSubjects []string `json:"data_subjects"` +} + +// TaxonomyEntry represents a single entry in a taxonomy hierarchy. +type TaxonomyEntry struct { + FidesKey string `json:"fides_key"` + ParentKey *string `json:"parent_key,omitempty"` +} + +// Taxonomy holds the taxonomy data for evaluation. +type Taxonomy struct { + DataCategory []TaxonomyEntry `json:"data_category"` + DataUse []TaxonomyEntry `json:"data_use"` + DataSubject []TaxonomyEntry `json:"data_subject"` +} + +// ViolatingAttributes captures the specific attributes that violated a rule. +type ViolatingAttributes struct { + DataCategories []string `json:"data_categories"` + DataUses []string `json:"data_uses"` + DataSubjects []string `json:"data_subjects"` +} + +// Violation represents a policy violation. +type Violation struct { + Detail string `json:"detail"` + ViolatingAttributes ViolatingAttributes `json:"violating_attributes"` +} + +// EvaluateRequest is the input for policy evaluation. +type EvaluateRequest struct { + Taxonomy Taxonomy `json:"taxonomy"` + PolicyRule PolicyRule `json:"policy_rule"` + PrivacyDeclaration PrivacyDeclaration `json:"privacy_declaration"` +} + +// EvaluateResponse is the output of policy evaluation. +type EvaluateResponse struct { + Status string `json:"status"` // "PASS" or "FAIL" + Violations []Violation `json:"violations"` + EvaluationTimeUs int64 `json:"evaluation_time_us"` +} diff --git a/policy-engine/pkg/pbac/evaluate.go b/policy-engine/pkg/pbac/evaluate.go new file mode 100644 index 00000000000..396ca48c064 --- /dev/null +++ b/policy-engine/pkg/pbac/evaluate.go @@ -0,0 +1,138 @@ +package pbac + +// EvaluatePurpose checks dataset accesses against purpose assignments. +// +// Rules (matching the Python engine in fides/service/pbac/evaluate.py): +// +// 1. If the consumer has NO declared purposes, every dataset access is +// recorded as an identity gap (not a violation). +// 2. If a dataset has declared purposes AND the consumer's purposes do not +// intersect with the dataset's effective purposes, it is a violation. +// 3. If a dataset has NO declared purposes, it is recorded as a dataset +// gap (not a violation). +func EvaluatePurpose( + consumer ConsumerPurposes, + datasets map[string]DatasetPurposes, + collections map[string][]string, +) PurposeEvaluationResult { + if collections == nil { + collections = map[string][]string{} + } + + var violations []PurposeViolation + var gaps []EvaluationGap + totalAccesses := 0 + + consumerPurposeSet := toSet(consumer.PurposeKeys) + + // Rule 1: consumer has no purposes — record as identity gap + if len(consumer.PurposeKeys) == 0 { + for datasetKey := range datasets { + totalAccesses++ + dk := datasetKey + gaps = append(gaps, EvaluationGap{ + GapType: GapUnresolvedIdentity, + Identifier: consumer.ConsumerID, + DatasetKey: &dk, + Reason: "Consumer has no declared purposes", + }) + } + return PurposeEvaluationResult{ + Violations: ensureViolations(violations), + Gaps: ensureGaps(gaps), + TotalAccesses: totalAccesses, + } + } + + for datasetKey, dsPurposes := range datasets { + dk := datasetKey + datasetCollections := collections[datasetKey] + + if len(datasetCollections) > 0 { + for _, collection := range datasetCollections { + totalAccesses++ + col := collection + result := checkAccess(consumerPurposeSet, consumer, &dsPurposes, dk, &col) + switch r := result.(type) { + case PurposeViolation: + violations = append(violations, r) + case EvaluationGap: + gaps = append(gaps, r) + } + } + } else { + totalAccesses++ + result := checkAccess(consumerPurposeSet, consumer, &dsPurposes, dk, nil) + switch r := result.(type) { + case PurposeViolation: + violations = append(violations, r) + case EvaluationGap: + gaps = append(gaps, r) + } + } + } + + return PurposeEvaluationResult{ + Violations: ensureViolations(violations), + Gaps: ensureGaps(gaps), + TotalAccesses: totalAccesses, + } +} + +// checkAccess checks a single dataset/collection access against consumer purposes. +// Returns a PurposeViolation, EvaluationGap, or nil (compliant). +func checkAccess( + consumerPurposeSet map[string]bool, + consumer ConsumerPurposes, + dsPurposes *DatasetPurposes, + datasetKey string, + collection *string, +) interface{} { + col := "" + if collection != nil { + col = *collection + } + effective := dsPurposes.EffectivePurposes(col) + + // Rule 3: no effective purposes → dataset gap + if len(effective) == 0 { + dk := datasetKey + return EvaluationGap{ + GapType: GapUnconfiguredDataset, + Identifier: datasetKey, + DatasetKey: &dk, + Reason: "Dataset has no declared purposes", + } + } + + // Rule 2: no overlap → violation + if !intersects(consumerPurposeSet, effective) { + return PurposeViolation{ + ConsumerID: consumer.ConsumerID, + ConsumerName: consumer.ConsumerName, + DatasetKey: datasetKey, + Collection: collection, + ConsumerPurposes: sortedKeys(consumerPurposeSet), + DatasetPurposes: sortedKeys(effective), + Reason: violationReason(consumerPurposeSet, effective), + } + } + + // Compliant + return nil +} + +// Ensure non-nil slices for JSON serialization. +func ensureViolations(v []PurposeViolation) []PurposeViolation { + if v == nil { + return []PurposeViolation{} + } + return v +} + +func ensureGaps(g []EvaluationGap) []EvaluationGap { + if g == nil { + return []EvaluationGap{} + } + return g +} diff --git a/policy-engine/pkg/pbac/evaluate_test.go b/policy-engine/pkg/pbac/evaluate_test.go new file mode 100644 index 00000000000..c772ebf71b1 --- /dev/null +++ b/policy-engine/pkg/pbac/evaluate_test.go @@ -0,0 +1,213 @@ +package pbac + +import ( + "testing" +) + +func strPtr(s string) *string { return &s } + +func TestRule1_NoConsumerPurposes_ProducesGap(t *testing.T) { + consumer := ConsumerPurposes{ + ConsumerID: "consumer-1", + ConsumerName: "test-consumer", + PurposeKeys: []string{}, + } + datasets := map[string]DatasetPurposes{ + "ds_billing": { + DatasetKey: "ds_billing", + PurposeKeys: []string{"billing"}, + }, + } + + result := EvaluatePurpose(consumer, datasets, nil) + + if len(result.Violations) != 0 { + t.Errorf("expected 0 violations, got %d", len(result.Violations)) + } + if len(result.Gaps) != 1 { + t.Fatalf("expected 1 gap, got %d", len(result.Gaps)) + } + if result.Gaps[0].GapType != GapUnresolvedIdentity { + t.Errorf("expected gap type %s, got %s", GapUnresolvedIdentity, result.Gaps[0].GapType) + } + if result.TotalAccesses != 1 { + t.Errorf("expected 1 total access, got %d", result.TotalAccesses) + } +} + +func TestRule2_NoOverlap_ProducesViolation(t *testing.T) { + consumer := ConsumerPurposes{ + ConsumerID: "consumer-1", + ConsumerName: "Analytics Pipeline", + PurposeKeys: []string{"analytics"}, + } + datasets := map[string]DatasetPurposes{ + "billing_db": { + DatasetKey: "billing_db", + PurposeKeys: []string{"billing"}, + }, + } + + result := EvaluatePurpose(consumer, datasets, nil) + + if len(result.Violations) != 1 { + t.Fatalf("expected 1 violation, got %d", len(result.Violations)) + } + v := result.Violations[0] + if v.ConsumerID != "consumer-1" { + t.Errorf("expected consumer_id 'consumer-1', got '%s'", v.ConsumerID) + } + if v.DatasetKey != "billing_db" { + t.Errorf("expected dataset_key 'billing_db', got '%s'", v.DatasetKey) + } + if len(result.Gaps) != 0 { + t.Errorf("expected 0 gaps, got %d", len(result.Gaps)) + } +} + +func TestRule2_Overlap_Compliant(t *testing.T) { + consumer := ConsumerPurposes{ + ConsumerID: "consumer-1", + ConsumerName: "Billing Pipeline", + PurposeKeys: []string{"billing", "analytics"}, + } + datasets := map[string]DatasetPurposes{ + "billing_db": { + DatasetKey: "billing_db", + PurposeKeys: []string{"billing"}, + }, + } + + result := EvaluatePurpose(consumer, datasets, nil) + + if len(result.Violations) != 0 { + t.Errorf("expected 0 violations, got %d", len(result.Violations)) + } + if len(result.Gaps) != 0 { + t.Errorf("expected 0 gaps, got %d", len(result.Gaps)) + } + if result.TotalAccesses != 1 { + t.Errorf("expected 1 total access, got %d", result.TotalAccesses) + } +} + +func TestRule3_NoDatasetPurposes_ProducesGap(t *testing.T) { + consumer := ConsumerPurposes{ + ConsumerID: "consumer-1", + ConsumerName: "test-consumer", + PurposeKeys: []string{"analytics"}, + } + datasets := map[string]DatasetPurposes{ + "unknown_db": { + DatasetKey: "unknown_db", + PurposeKeys: []string{}, + }, + } + + result := EvaluatePurpose(consumer, datasets, nil) + + if len(result.Violations) != 0 { + t.Errorf("expected 0 violations, got %d", len(result.Violations)) + } + if len(result.Gaps) != 1 { + t.Fatalf("expected 1 gap, got %d", len(result.Gaps)) + } + if result.Gaps[0].GapType != GapUnconfiguredDataset { + t.Errorf("expected gap type %s, got %s", GapUnconfiguredDataset, result.Gaps[0].GapType) + } +} + +func TestCollectionPurposes_AdditiveInheritance(t *testing.T) { + consumer := ConsumerPurposes{ + ConsumerID: "consumer-1", + ConsumerName: "Billing Pipeline", + PurposeKeys: []string{"accounting"}, + } + datasets := map[string]DatasetPurposes{ + "billing_db": { + DatasetKey: "billing_db", + PurposeKeys: []string{"billing"}, + CollectionPurposes: map[string][]string{ + "invoices": {"accounting"}, + }, + }, + } + collections := map[string][]string{ + "billing_db": {"invoices"}, + } + + result := EvaluatePurpose(consumer, datasets, collections) + + // "accounting" is in invoices collection purposes, which is unioned + // with dataset "billing" purposes. Consumer has "accounting" which + // overlaps with effective {"billing", "accounting"} → compliant. + if len(result.Violations) != 0 { + t.Errorf("expected 0 violations, got %d", len(result.Violations)) + } + if len(result.Gaps) != 0 { + t.Errorf("expected 0 gaps, got %d", len(result.Gaps)) + } +} + +func TestCollectionPurposes_NoOverlap_Violation(t *testing.T) { + consumer := ConsumerPurposes{ + ConsumerID: "consumer-1", + ConsumerName: "Marketing Pipeline", + PurposeKeys: []string{"marketing"}, + } + datasets := map[string]DatasetPurposes{ + "billing_db": { + DatasetKey: "billing_db", + PurposeKeys: []string{"billing"}, + CollectionPurposes: map[string][]string{ + "invoices": {"accounting"}, + }, + }, + } + collections := map[string][]string{ + "billing_db": {"invoices"}, + } + + result := EvaluatePurpose(consumer, datasets, collections) + + if len(result.Violations) != 1 { + t.Fatalf("expected 1 violation, got %d", len(result.Violations)) + } + if result.Violations[0].Collection == nil || *result.Violations[0].Collection != "invoices" { + t.Errorf("expected collection 'invoices'") + } +} + +func TestMultipleDatasets_MixedResults(t *testing.T) { + consumer := ConsumerPurposes{ + ConsumerID: "consumer-1", + ConsumerName: "Analytics", + PurposeKeys: []string{"analytics"}, + } + datasets := map[string]DatasetPurposes{ + "analytics_db": { + DatasetKey: "analytics_db", + PurposeKeys: []string{"analytics"}, + }, + "billing_db": { + DatasetKey: "billing_db", + PurposeKeys: []string{"billing"}, + }, + "unknown_db": { + DatasetKey: "unknown_db", + PurposeKeys: []string{}, + }, + } + + result := EvaluatePurpose(consumer, datasets, nil) + + if len(result.Violations) != 1 { + t.Errorf("expected 1 violation, got %d", len(result.Violations)) + } + if len(result.Gaps) != 1 { + t.Errorf("expected 1 gap, got %d", len(result.Gaps)) + } + if result.TotalAccesses != 3 { + t.Errorf("expected 3 total accesses, got %d", result.TotalAccesses) + } +} diff --git a/policy-engine/pkg/pbac/types.go b/policy-engine/pkg/pbac/types.go new file mode 100644 index 00000000000..c272d7ccff7 --- /dev/null +++ b/policy-engine/pkg/pbac/types.go @@ -0,0 +1,133 @@ +// Package pbac implements purpose-based access control evaluation. +// +// It mirrors the Python evaluation engine in fides/service/pbac/ and is the +// canonical Go implementation of the PBAC purpose-overlap algorithm. +package pbac + +import ( + "fmt" + "sort" + "strings" +) + +// GapType classifies PBAC coverage gaps. +type GapType string + +const ( + GapUnresolvedIdentity GapType = "unresolved_identity" + GapUnconfiguredConsumer GapType = "unconfigured_consumer" + GapUnconfiguredDataset GapType = "unconfigured_dataset" +) + +// ConsumerPurposes holds the declared purposes for a data consumer. +type ConsumerPurposes struct { + ConsumerID string `json:"consumer_id"` + ConsumerName string `json:"consumer_name"` + PurposeKeys []string `json:"purpose_keys"` +} + +// DatasetPurposes holds the declared purposes for a dataset, including +// per-collection purposes. Purpose inheritance is additive: a collection's +// effective purposes are purpose_keys | collection_purposes[collection]. +type DatasetPurposes struct { + DatasetKey string `json:"dataset_key"` + PurposeKeys []string `json:"purpose_keys"` + CollectionPurposes map[string][]string `json:"collection_purposes,omitempty"` +} + +// EffectivePurposes returns the effective purposes for a collection (additive +// inheritance). If collection is empty, returns dataset-level purposes only. +func (d *DatasetPurposes) EffectivePurposes(collection string) map[string]bool { + result := make(map[string]bool, len(d.PurposeKeys)) + for _, k := range d.PurposeKeys { + result[k] = true + } + if collection != "" { + if cp, ok := d.CollectionPurposes[collection]; ok { + for _, k := range cp { + result[k] = true + } + } + } + return result +} + +// PurposeViolation represents a purpose-based access violation. +type PurposeViolation struct { + ConsumerID string `json:"consumer_id"` + ConsumerName string `json:"consumer_name"` + DatasetKey string `json:"dataset_key"` + Collection *string `json:"collection,omitempty"` + ConsumerPurposes []string `json:"consumer_purposes"` + DatasetPurposes []string `json:"dataset_purposes"` + Reason string `json:"reason"` + DataUse *string `json:"data_use,omitempty"` + Control *string `json:"control,omitempty"` +} + +// EvaluationGap represents a gap in PBAC coverage — incomplete configuration, +// not a policy violation. +type EvaluationGap struct { + GapType GapType `json:"gap_type"` + Identifier string `json:"identifier"` + DatasetKey *string `json:"dataset_key,omitempty"` + Reason string `json:"reason"` +} + +// PurposeEvaluationResult is the output from EvaluatePurpose. +type PurposeEvaluationResult struct { + Violations []PurposeViolation `json:"violations"` + Gaps []EvaluationGap `json:"gaps"` + TotalAccesses int `json:"total_accesses"` +} + +// EvaluatePurposeRequest is the JSON request body for purpose evaluation. +type EvaluatePurposeRequest struct { + Consumer ConsumerPurposes `json:"consumer"` + Datasets map[string]DatasetPurposes `json:"datasets"` + Collections map[string][]string `json:"collections,omitempty"` +} + +// Helper: sorted keys from a set for deterministic output. +func sortedKeys(m map[string]bool) []string { + keys := make([]string, 0, len(m)) + for k := range m { + keys = append(keys, k) + } + sort.Strings(keys) + return keys +} + +// Helper: build a set from a slice. +func toSet(s []string) map[string]bool { + m := make(map[string]bool, len(s)) + for _, v := range s { + m[v] = true + } + return m +} + +// Helper: check if two sets intersect. +func intersects(a, b map[string]bool) bool { + // iterate the smaller set + if len(a) > len(b) { + a, b = b, a + } + for k := range a { + if b[k] { + return true + } + } + return false +} + +// violationReason builds a human-readable reason string matching the Python format. +func violationReason(consumerPurposes, datasetPurposes map[string]bool) string { + cp := sortedKeys(consumerPurposes) + dp := sortedKeys(datasetPurposes) + return fmt.Sprintf( + "Consumer purposes [%s] do not overlap with dataset purposes [%s]", + strings.Join(cp, ", "), + strings.Join(dp, ", "), + ) +} From 1bf8c7f482c8e84412fc05ef626cb312f88755df Mon Sep 17 00:00:00 2001 From: Thabo Fletcher Date: Mon, 13 Apr 2026 20:03:49 -0700 Subject: [PATCH 02/14] =?UTF-8?q?Remove=20pkg/fideslang=20=E2=80=94=20depr?= =?UTF-8?q?ecated=20policy=20evaluation=20algorithm?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The fideslang package implemented the old policy rule evaluation (taxonomy hierarchy matching against privacy declarations). This is replaced by the Access Policy v2 system which will be added to pkg/pbac/ as the next step in the PBAC pipeline. --- policy-engine/cmd/fides-evaluate/main.go | 17 +- policy-engine/pkg/fideslang/evaluate.go | 167 ------------ policy-engine/pkg/fideslang/evaluate_test.go | 261 ------------------- policy-engine/pkg/fideslang/taxonomy.go | 64 ----- policy-engine/pkg/fideslang/types.go | 79 ------ 5 files changed, 3 insertions(+), 585 deletions(-) delete mode 100644 policy-engine/pkg/fideslang/evaluate.go delete mode 100644 policy-engine/pkg/fideslang/evaluate_test.go delete mode 100644 policy-engine/pkg/fideslang/taxonomy.go delete mode 100644 policy-engine/pkg/fideslang/types.go diff --git a/policy-engine/cmd/fides-evaluate/main.go b/policy-engine/cmd/fides-evaluate/main.go index 3fd34f419b2..87a19a97c58 100644 --- a/policy-engine/cmd/fides-evaluate/main.go +++ b/policy-engine/cmd/fides-evaluate/main.go @@ -1,9 +1,8 @@ -// fides-evaluate is a CLI tool for running the Fides policy evaluation engine. +// fides-evaluate is a CLI tool for running the Fides PBAC evaluation engine. // // Usage: // // echo '{"consumer": {...}, "datasets": {...}}' | fides-evaluate purpose -// echo '{"taxonomy": {...}, "policy_rule": {...}, "privacy_declaration": {...}}' | fides-evaluate policy-rule package main import ( @@ -12,13 +11,12 @@ import ( "io" "os" - "github.com/ethyca/fides/policy-engine/pkg/fideslang" "github.com/ethyca/fides/policy-engine/pkg/pbac" ) func main() { if len(os.Args) < 2 { - fmt.Fprintf(os.Stderr, "Usage: fides-evaluate [file]\n") + fmt.Fprintf(os.Stderr, "Usage: fides-evaluate [file]\n") os.Exit(1) } @@ -51,17 +49,8 @@ func main() { result := pbac.EvaluatePurpose(req.Consumer, req.Datasets, req.Collections) writeJSON(result) - case "policy-rule": - var req fideslang.EvaluateRequest - if err := json.Unmarshal(input, &req); err != nil { - fmt.Fprintf(os.Stderr, "Error parsing JSON: %v\n", err) - os.Exit(1) - } - result := fideslang.Evaluate(&req) - writeJSON(result) - default: - fmt.Fprintf(os.Stderr, "Unknown command: %s\nUse 'purpose' or 'policy-rule'\n", command) + fmt.Fprintf(os.Stderr, "Unknown command: %s\nUse 'purpose'\n", command) os.Exit(1) } } diff --git a/policy-engine/pkg/fideslang/evaluate.go b/policy-engine/pkg/fideslang/evaluate.go deleted file mode 100644 index 013224c5469..00000000000 --- a/policy-engine/pkg/fideslang/evaluate.go +++ /dev/null @@ -1,167 +0,0 @@ -package fideslang - -import ( - "fmt" - "time" -) - -// EvaluatePolicyRule evaluates a single policy rule against a privacy declaration. -// Returns violations if the declaration violates the rule, empty slice otherwise. -func EvaluatePolicyRule(idx *TaxonomyIndex, rule *PolicyRule, decl *PrivacyDeclaration) []Violation { - // Build hierarchies for data categories - categoryHierarchies := make([][]string, 0, len(decl.DataCategories)) - for _, cat := range decl.DataCategories { - hierarchy := idx.GetCategoryHierarchy(cat) - categoryHierarchies = append(categoryHierarchies, hierarchy) - } - - // Evaluate data categories - categoryViolations := compareRuleToDeclaration( - rule.DataCategories.Values, - categoryHierarchies, - rule.DataCategories.Matches, - ) - - // Build hierarchy for data use (single value, wrapped in slice of slices) - useHierarchies := [][]string{idx.GetUseHierarchy(decl.DataUse)} - - // Evaluate data uses - useViolations := compareRuleToDeclaration( - rule.DataUses.Values, - useHierarchies, - rule.DataUses.Matches, - ) - - // Build hierarchies for data subjects - subjectHierarchies := make([][]string, 0, len(decl.DataSubjects)) - for _, subj := range decl.DataSubjects { - subjectHierarchies = append(subjectHierarchies, idx.GetSubjectHierarchy(subj)) - } - - // Evaluate data subjects - subjectViolations := compareRuleToDeclaration( - rule.DataSubjects.Values, - subjectHierarchies, - rule.DataSubjects.Matches, - ) - - // Violation occurs only if ALL THREE dimensions match (AND logic) - if len(categoryViolations) > 0 && len(useViolations) > 0 && len(subjectViolations) > 0 { - return []Violation{ - { - Detail: fmt.Sprintf( - "Declaration violates rule '%s'. Violated usage of data categories (%v) for data uses (%v) and subjects (%v)", - rule.Name, - categoryViolations, - useViolations, - subjectViolations, - ), - ViolatingAttributes: ViolatingAttributes{ - DataCategories: categoryViolations, - DataUses: useViolations, - DataSubjects: subjectViolations, - }, - }, - } - } - - return nil -} - -// compareRuleToDeclaration implements the match mode logic. -// -// Match modes: -// - ANY: violation if ANY rule_type appears in ANY hierarchy -// - ALL: violation if ALL declaration types are covered by rule types -// - NONE: violation if NO matches occur (returns the non-matching types) -// - OTHER: violation if declaration types are NOT in the rule (returns non-matching) -func compareRuleToDeclaration(ruleTypes []string, declarationHierarchies [][]string, matchMode MatchMode) []string { - ruleTypeSet := make(map[string]bool, len(ruleTypes)) - for _, rt := range ruleTypes { - ruleTypeSet[rt] = true - } - - matched := make(map[string]bool) - mismatched := make(map[string]bool) - - for _, hierarchy := range declarationHierarchies { - if len(hierarchy) == 0 { - continue - } - declaredType := hierarchy[0] // The leaf node (actual declared type) - foundMatch := false - - for _, h := range hierarchy { - if ruleTypeSet[h] { - foundMatch = true - break - } - } - - if foundMatch { - matched[declaredType] = true - } else { - mismatched[declaredType] = true - } - } - - matchedList := mapKeysToSlice(matched) - mismatchedList := mapKeysToSlice(mismatched) - - switch matchMode { - case MatchAny: - return matchedList - - case MatchAll: - if len(matched) == len(declarationHierarchies) && len(declarationHierarchies) > 0 { - return matchedList - } - return nil - - case MatchNone: - if len(matched) == 0 && len(mismatched) > 0 { - return mismatchedList - } - return nil - - case MatchOther: - return mismatchedList - - default: - return nil - } -} - -// Evaluate performs a full policy evaluation and returns the response. -func Evaluate(req *EvaluateRequest) *EvaluateResponse { - start := time.Now() - - idx := NewTaxonomyIndex(&req.Taxonomy) - violations := EvaluatePolicyRule(idx, &req.PolicyRule, &req.PrivacyDeclaration) - - status := "PASS" - if len(violations) > 0 { - status = "FAIL" - } - - if violations == nil { - violations = []Violation{} - } - - return &EvaluateResponse{ - Status: status, - Violations: violations, - EvaluationTimeUs: time.Since(start).Microseconds(), - } -} - -func mapKeysToSlice(m map[string]bool) []string { - if len(m) == 0 { - return nil - } - result := make([]string, 0, len(m)) - for k := range m { - result = append(result, k) - } - return result -} diff --git a/policy-engine/pkg/fideslang/evaluate_test.go b/policy-engine/pkg/fideslang/evaluate_test.go deleted file mode 100644 index de9463a4841..00000000000 --- a/policy-engine/pkg/fideslang/evaluate_test.go +++ /dev/null @@ -1,261 +0,0 @@ -package fideslang - -import ( - "testing" -) - -func strPtr(s string) *string { return &s } - -func buildTestTaxonomy() Taxonomy { - return Taxonomy{ - DataCategory: []TaxonomyEntry{ - {FidesKey: "user", ParentKey: nil}, - {FidesKey: "user.contact", ParentKey: strPtr("user")}, - {FidesKey: "user.contact.email", ParentKey: strPtr("user.contact")}, - {FidesKey: "user.financial", ParentKey: strPtr("user")}, - {FidesKey: "user.financial.bank_account", ParentKey: strPtr("user.financial")}, - {FidesKey: "system", ParentKey: nil}, - {FidesKey: "system.operations", ParentKey: strPtr("system")}, - }, - DataUse: []TaxonomyEntry{ - {FidesKey: "marketing", ParentKey: nil}, - {FidesKey: "marketing.advertising", ParentKey: strPtr("marketing")}, - {FidesKey: "analytics", ParentKey: nil}, - {FidesKey: "essential", ParentKey: nil}, - {FidesKey: "essential.service", ParentKey: strPtr("essential")}, - }, - DataSubject: []TaxonomyEntry{ - {FidesKey: "customer", ParentKey: nil}, - {FidesKey: "employee", ParentKey: nil}, - }, - } -} - -func TestEvaluate_PASS_NoViolation(t *testing.T) { - req := &EvaluateRequest{ - Taxonomy: buildTestTaxonomy(), - PolicyRule: PolicyRule{ - Name: "block-marketing-email", - DataCategories: PolicyRuleTarget{ - Values: []string{"user.financial"}, - Matches: MatchAny, - }, - DataUses: PolicyRuleTarget{ - Values: []string{"marketing"}, - Matches: MatchAny, - }, - DataSubjects: PolicyRuleTarget{ - Values: []string{"customer"}, - Matches: MatchAny, - }, - }, - PrivacyDeclaration: PrivacyDeclaration{ - DataCategories: []string{"user.contact.email"}, - DataUse: "essential.service", - DataSubjects: []string{"customer"}, - }, - } - - resp := Evaluate(req) - - if resp.Status != "PASS" { - t.Errorf("expected PASS, got %s", resp.Status) - } - if len(resp.Violations) != 0 { - t.Errorf("expected 0 violations, got %d", len(resp.Violations)) - } -} - -func TestEvaluate_FAIL_AllDimensionsMatch(t *testing.T) { - req := &EvaluateRequest{ - Taxonomy: buildTestTaxonomy(), - PolicyRule: PolicyRule{ - Name: "block-marketing-pii", - DataCategories: PolicyRuleTarget{ - Values: []string{"user"}, - Matches: MatchAny, - }, - DataUses: PolicyRuleTarget{ - Values: []string{"marketing"}, - Matches: MatchAny, - }, - DataSubjects: PolicyRuleTarget{ - Values: []string{"customer"}, - Matches: MatchAny, - }, - }, - PrivacyDeclaration: PrivacyDeclaration{ - DataCategories: []string{"user.contact.email"}, - DataUse: "marketing.advertising", - DataSubjects: []string{"customer"}, - }, - } - - resp := Evaluate(req) - - if resp.Status != "FAIL" { - t.Errorf("expected FAIL, got %s", resp.Status) - } - if len(resp.Violations) != 1 { - t.Fatalf("expected 1 violation, got %d", len(resp.Violations)) - } -} - -func TestEvaluate_PASS_UseMismatch(t *testing.T) { - // Rule targets marketing, but declaration uses essential — no violation - req := &EvaluateRequest{ - Taxonomy: buildTestTaxonomy(), - PolicyRule: PolicyRule{ - Name: "block-marketing-user-data", - DataCategories: PolicyRuleTarget{ - Values: []string{"user"}, - Matches: MatchAny, - }, - DataUses: PolicyRuleTarget{ - Values: []string{"marketing"}, - Matches: MatchAny, - }, - DataSubjects: PolicyRuleTarget{ - Values: []string{"customer"}, - Matches: MatchAny, - }, - }, - PrivacyDeclaration: PrivacyDeclaration{ - DataCategories: []string{"user.contact.email"}, - DataUse: "essential.service", - DataSubjects: []string{"customer"}, - }, - } - - resp := Evaluate(req) - - if resp.Status != "PASS" { - t.Errorf("expected PASS, got %s", resp.Status) - } -} - -func TestHierarchyMatching_ParentMatchesChild(t *testing.T) { - tax := buildTestTaxonomy() - idx := NewTaxonomyIndex(&tax) - - // "user" should match "user.contact.email" via hierarchy - h := idx.GetCategoryHierarchy("user.contact.email") - if len(h) != 3 { - t.Fatalf("expected hierarchy of length 3, got %d: %v", len(h), h) - } - if h[0] != "user.contact.email" || h[1] != "user.contact" || h[2] != "user" { - t.Errorf("unexpected hierarchy: %v", h) - } -} - -func TestMatchMode_ALL(t *testing.T) { - // ALL: violation only if every declared type is covered - tax := buildTestTaxonomy() - idx := NewTaxonomyIndex(&tax) - - rule := &PolicyRule{ - Name: "test-all", - DataCategories: PolicyRuleTarget{ - Values: []string{"user"}, - Matches: MatchAll, - }, - DataUses: PolicyRuleTarget{ - Values: []string{"marketing"}, - Matches: MatchAny, - }, - DataSubjects: PolicyRuleTarget{ - Values: []string{"customer"}, - Matches: MatchAny, - }, - } - - // Both categories are under "user" → ALL matched → violation - decl := &PrivacyDeclaration{ - DataCategories: []string{"user.contact.email", "user.financial.bank_account"}, - DataUse: "marketing.advertising", - DataSubjects: []string{"customer"}, - } - - violations := EvaluatePolicyRule(idx, rule, decl) - if len(violations) == 0 { - t.Error("expected violation when ALL categories match") - } - - // Mix of user + system → not ALL user → no category violation → PASS - decl2 := &PrivacyDeclaration{ - DataCategories: []string{"user.contact.email", "system.operations"}, - DataUse: "marketing.advertising", - DataSubjects: []string{"customer"}, - } - - violations2 := EvaluatePolicyRule(idx, rule, decl2) - if len(violations2) != 0 { - t.Error("expected no violation when not ALL categories match") - } -} - -func TestMatchMode_NONE(t *testing.T) { - tax := buildTestTaxonomy() - idx := NewTaxonomyIndex(&tax) - - rule := &PolicyRule{ - Name: "test-none", - DataCategories: PolicyRuleTarget{ - Values: []string{"user.financial"}, - Matches: MatchNone, - }, - DataUses: PolicyRuleTarget{ - Values: []string{"marketing"}, - Matches: MatchAny, - }, - DataSubjects: PolicyRuleTarget{ - Values: []string{"customer"}, - Matches: MatchAny, - }, - } - - // No financial data → NONE matches (violation because no overlap is the problem) - decl := &PrivacyDeclaration{ - DataCategories: []string{"user.contact.email"}, - DataUse: "marketing.advertising", - DataSubjects: []string{"customer"}, - } - - violations := EvaluatePolicyRule(idx, rule, decl) - if len(violations) == 0 { - t.Error("expected violation when NONE mode has no matches") - } -} - -func TestMatchMode_OTHER(t *testing.T) { - tax := buildTestTaxonomy() - idx := NewTaxonomyIndex(&tax) - - rule := &PolicyRule{ - Name: "test-other", - DataCategories: PolicyRuleTarget{ - Values: []string{"user.contact"}, - Matches: MatchOther, - }, - DataUses: PolicyRuleTarget{ - Values: []string{"marketing"}, - Matches: MatchAny, - }, - DataSubjects: PolicyRuleTarget{ - Values: []string{"customer"}, - Matches: MatchAny, - }, - } - - // system.operations is NOT under user.contact → OTHER catches it - decl := &PrivacyDeclaration{ - DataCategories: []string{"user.contact.email", "system.operations"}, - DataUse: "marketing.advertising", - DataSubjects: []string{"customer"}, - } - - violations := EvaluatePolicyRule(idx, rule, decl) - if len(violations) == 0 { - t.Error("expected violation when OTHER mode finds non-matching types") - } -} diff --git a/policy-engine/pkg/fideslang/taxonomy.go b/policy-engine/pkg/fideslang/taxonomy.go deleted file mode 100644 index 952fc045aba..00000000000 --- a/policy-engine/pkg/fideslang/taxonomy.go +++ /dev/null @@ -1,64 +0,0 @@ -package fideslang - -// TaxonomyIndex provides fast lookups for taxonomy hierarchies. -type TaxonomyIndex struct { - dataCategory map[string]*TaxonomyEntry - dataUse map[string]*TaxonomyEntry - dataSubject map[string]*TaxonomyEntry -} - -// NewTaxonomyIndex creates an indexed taxonomy for efficient lookups. -func NewTaxonomyIndex(t *Taxonomy) *TaxonomyIndex { - idx := &TaxonomyIndex{ - dataCategory: make(map[string]*TaxonomyEntry, len(t.DataCategory)), - dataUse: make(map[string]*TaxonomyEntry, len(t.DataUse)), - dataSubject: make(map[string]*TaxonomyEntry, len(t.DataSubject)), - } - for i := range t.DataCategory { - idx.dataCategory[t.DataCategory[i].FidesKey] = &t.DataCategory[i] - } - for i := range t.DataUse { - idx.dataUse[t.DataUse[i].FidesKey] = &t.DataUse[i] - } - for i := range t.DataSubject { - idx.dataSubject[t.DataSubject[i].FidesKey] = &t.DataSubject[i] - } - return idx -} - -// GetCategoryHierarchy returns the hierarchy from the given key to root. -// Example: "user.contact.email" -> ["user.contact.email", "user.contact", "user"] -func (idx *TaxonomyIndex) GetCategoryHierarchy(fidesKey string) []string { - return idx.getHierarchy(fidesKey, idx.dataCategory) -} - -// GetUseHierarchy returns the hierarchy from the given key to root. -func (idx *TaxonomyIndex) GetUseHierarchy(fidesKey string) []string { - return idx.getHierarchy(fidesKey, idx.dataUse) -} - -// GetSubjectHierarchy returns the hierarchy for a data subject. -// Subjects don't have a hierarchical structure, so this returns just the key. -func (idx *TaxonomyIndex) GetSubjectHierarchy(fidesKey string) []string { - return []string{fidesKey} -} - -// getHierarchy traverses the parent chain and builds the hierarchy list. -func (idx *TaxonomyIndex) getHierarchy(fidesKey string, entries map[string]*TaxonomyEntry) []string { - hierarchy := make([]string, 0, 4) - currentKey := fidesKey - visited := make(map[string]bool) - - for currentKey != "" && !visited[currentKey] { - visited[currentKey] = true - hierarchy = append(hierarchy, currentKey) - - entry, ok := entries[currentKey] - if !ok || entry.ParentKey == nil { - break - } - currentKey = *entry.ParentKey - } - - return hierarchy -} diff --git a/policy-engine/pkg/fideslang/types.go b/policy-engine/pkg/fideslang/types.go deleted file mode 100644 index 266edd91fbf..00000000000 --- a/policy-engine/pkg/fideslang/types.go +++ /dev/null @@ -1,79 +0,0 @@ -// Package fideslang implements the Fideslang policy rule evaluation algorithm. -// -// It evaluates whether a privacy declaration violates a policy rule by matching -// data categories, data uses, and data subjects against the fideslang taxonomy -// hierarchy. This is the Go translation of the evaluation engine used in -// fides/src/fides/api/util/ and benchmarked in Adrian's sidecar POC. -package fideslang - -// MatchMode represents the matching mode for policy rules. -type MatchMode string - -const ( - MatchAny MatchMode = "ANY" - MatchAll MatchMode = "ALL" - MatchNone MatchMode = "NONE" - MatchOther MatchMode = "OTHER" -) - -// PolicyRuleTarget represents a rule's constraint on a specific dimension -// (categories, uses, or subjects). -type PolicyRuleTarget struct { - Values []string `json:"values"` - Matches MatchMode `json:"matches"` -} - -// PolicyRule represents a single rule within a policy. -type PolicyRule struct { - Name string `json:"name"` - DataCategories PolicyRuleTarget `json:"data_categories"` - DataUses PolicyRuleTarget `json:"data_uses"` - DataSubjects PolicyRuleTarget `json:"data_subjects"` -} - -// PrivacyDeclaration represents a system's declaration of data usage. -type PrivacyDeclaration struct { - DataCategories []string `json:"data_categories"` - DataUse string `json:"data_use"` - DataSubjects []string `json:"data_subjects"` -} - -// TaxonomyEntry represents a single entry in a taxonomy hierarchy. -type TaxonomyEntry struct { - FidesKey string `json:"fides_key"` - ParentKey *string `json:"parent_key,omitempty"` -} - -// Taxonomy holds the taxonomy data for evaluation. -type Taxonomy struct { - DataCategory []TaxonomyEntry `json:"data_category"` - DataUse []TaxonomyEntry `json:"data_use"` - DataSubject []TaxonomyEntry `json:"data_subject"` -} - -// ViolatingAttributes captures the specific attributes that violated a rule. -type ViolatingAttributes struct { - DataCategories []string `json:"data_categories"` - DataUses []string `json:"data_uses"` - DataSubjects []string `json:"data_subjects"` -} - -// Violation represents a policy violation. -type Violation struct { - Detail string `json:"detail"` - ViolatingAttributes ViolatingAttributes `json:"violating_attributes"` -} - -// EvaluateRequest is the input for policy evaluation. -type EvaluateRequest struct { - Taxonomy Taxonomy `json:"taxonomy"` - PolicyRule PolicyRule `json:"policy_rule"` - PrivacyDeclaration PrivacyDeclaration `json:"privacy_declaration"` -} - -// EvaluateResponse is the output of policy evaluation. -type EvaluateResponse struct { - Status string `json:"status"` // "PASS" or "FAIL" - Violations []Violation `json:"violations"` - EvaluationTimeUs int64 `json:"evaluation_time_us"` -} From 802c12bb76bbaa697a0a29512be5efdc57106158 Mon Sep 17 00:00:00 2001 From: Thabo Fletcher Date: Mon, 13 Apr 2026 20:06:40 -0700 Subject: [PATCH 03/14] Add Access Policy v2 evaluation to PBAC package MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements the policy evaluation algorithm from IMPLEMENTATION_GUIDE.md as step 7 of the PBAC pipeline — filtering violations through access policies created via the UI. Features: - Priority-ordered, first-decisive-match-wins evaluation - Match blocks with any/all operators and taxonomy hierarchy matching - Unless conditions: consent, geo_location, data_flow (AND logic) - ALLOW + unless triggered = DENY (decisive) - DENY + unless triggered = SUPPRESSED (continues to next policy) - Audit trail of evaluated policies 25 tests covering priority ordering, match modes, all three unless condition types, multi-constraint AND logic, and taxonomy matching. --- policy-engine/pkg/pbac/policy_evaluate.go | 304 ++++++++++++ .../pkg/pbac/policy_evaluate_test.go | 434 ++++++++++++++++++ policy-engine/pkg/pbac/policy_types.go | 116 +++++ 3 files changed, 854 insertions(+) create mode 100644 policy-engine/pkg/pbac/policy_evaluate.go create mode 100644 policy-engine/pkg/pbac/policy_evaluate_test.go create mode 100644 policy-engine/pkg/pbac/policy_types.go diff --git a/policy-engine/pkg/pbac/policy_evaluate.go b/policy-engine/pkg/pbac/policy_evaluate.go new file mode 100644 index 00000000000..56c747436c8 --- /dev/null +++ b/policy-engine/pkg/pbac/policy_evaluate.go @@ -0,0 +1,304 @@ +package pbac + +import ( + "sort" + "strings" +) + +// EvaluatePolicies evaluates a list of access policies against a request. +// +// Algorithm (from IMPLEMENTATION_GUIDE.md): +// 1. Sort enabled policies by priority (highest first) +// 2. For each policy, check if the match block applies to the request +// 3. If matched, evaluate unless conditions +// 4. Unless triggered + ALLOW → DENY (decisive, stop) +// 5. Unless triggered + DENY → SUPPRESSED (not decisive, continue) +// 6. Unless not triggered → decision stands as-is (decisive, stop) +// 7. No policy matched → NO_DECISION +func EvaluatePolicies(policies []AccessPolicy, request *AccessEvaluationRequest) *PolicyEvaluationResult { + // Filter to enabled policies and sort by priority descending + enabled := make([]AccessPolicy, 0, len(policies)) + for _, p := range policies { + if p.Enabled { + enabled = append(enabled, p) + } + } + sort.Slice(enabled, func(i, j int) bool { + return enabled[i].Priority > enabled[j].Priority + }) + + evaluated := make([]EvaluatedPolicyInfo, 0) + + for _, policy := range enabled { + if !matchesRequest(&policy.Match, request) { + continue + } + + unlessTriggered := evaluateUnless(policy.Unless, request) + + if unlessTriggered { + if policy.Decision == PolicyAllow { + // ALLOW inverted to DENY — decisive, stop + key := policy.Key + priority := policy.Priority + return &PolicyEvaluationResult{ + Decision: PolicyDeny, + DecisivePolicyKey: &key, + DecisivePolicyPriority: &priority, + UnlessTriggered: true, + Action: policy.Action, + EvaluatedPolicies: evaluated, + } + } + // DENY suppressed — not decisive, continue + evaluated = append(evaluated, EvaluatedPolicyInfo{ + PolicyKey: policy.Key, + Priority: policy.Priority, + Matched: true, + Result: "SUPPRESSED", + UnlessTriggered: true, + }) + continue + } + + // Decision stands as-is — decisive, stop + key := policy.Key + priority := policy.Priority + var action *PolicyAction + if policy.Decision == PolicyDeny { + action = policy.Action + } + return &PolicyEvaluationResult{ + Decision: policy.Decision, + DecisivePolicyKey: &key, + DecisivePolicyPriority: &priority, + UnlessTriggered: false, + Action: action, + EvaluatedPolicies: evaluated, + } + } + + // No policy matched + return &PolicyEvaluationResult{ + Decision: PolicyNoDecision, + EvaluatedPolicies: evaluated, + } +} + +// matchesRequest checks if a policy's match block applies to the request. +// An empty match block matches everything (catch-all policy). +func matchesRequest(match *MatchBlock, req *AccessEvaluationRequest) bool { + if match.DataUse != nil && !matchesDimension(match.DataUse, req.DataUses) { + return false + } + if match.DataCategory != nil && !matchesDimension(match.DataCategory, req.DataCategories) { + return false + } + if match.DataSubject != nil && !matchesDimension(match.DataSubject, req.DataSubjects) { + return false + } + return true +} + +// matchesDimension checks if request values satisfy a match dimension's any/all operators. +// Uses taxonomy prefix matching: "user.contact" matches "user.contact.email". +func matchesDimension(dim *MatchDimension, requestValues []string) bool { + // "any" — at least one match value must appear in request values + if len(dim.Any) > 0 { + found := false + for _, matchVal := range dim.Any { + if taxonomyMatchesAny(matchVal, requestValues) { + found = true + break + } + } + if !found { + return false + } + } + + // "all" — every match value must appear in request values + if len(dim.All) > 0 { + for _, matchVal := range dim.All { + if !taxonomyMatchesAny(matchVal, requestValues) { + return false + } + } + } + + return true +} + +// taxonomyMatchesAny checks if a taxonomy key matches any of the request values. +// A match key "user.contact" matches request value "user.contact.email" because +// it's a parent in the taxonomy hierarchy (prefix match). +func taxonomyMatchesAny(matchKey string, requestValues []string) bool { + for _, reqVal := range requestValues { + if taxonomyMatch(matchKey, reqVal) { + return true + } + } + return false +} + +// taxonomyMatch checks if matchKey is equal to or a parent of requestValue. +// "user.contact" matches "user.contact.email" (prefix + dot boundary). +// "user.contact" matches "user.contact" (exact match). +// "user" does NOT match "user_data" (must be a dot boundary). +func taxonomyMatch(matchKey, requestValue string) bool { + if matchKey == requestValue { + return true + } + return strings.HasPrefix(requestValue, matchKey+".") +} + +// evaluateUnless evaluates all unless conditions. All must trigger (AND logic) +// for the unless block to fire. +func evaluateUnless(constraints []Constraint, req *AccessEvaluationRequest) bool { + if len(constraints) == 0 { + return false + } + + for _, c := range constraints { + if !evaluateConstraint(&c, req) { + return false + } + } + return true +} + +// evaluateConstraint evaluates a single unless condition against the request context. +func evaluateConstraint(c *Constraint, req *AccessEvaluationRequest) bool { + switch c.Type { + case ConstraintConsent: + return evaluateConsentConstraint(c, req) + case ConstraintGeoLocation: + return evaluateGeoConstraint(c, req) + case ConstraintDataFlow: + return evaluateDataFlowConstraint(c, req) + default: + return false + } +} + +// evaluateConsentConstraint checks a consent-based unless condition. +// Looks up consent status in request.Context["consent"][privacy_notice_key]. +func evaluateConsentConstraint(c *Constraint, req *AccessEvaluationRequest) bool { + if req.Context == nil { + return false + } + + consentMap, ok := req.Context["consent"].(map[string]interface{}) + if !ok { + return false + } + + status, ok := consentMap[c.PrivacyNoticeKey].(string) + if !ok { + return false + } + + switch c.Requirement { + case "opt_in": + return status == "opt_in" + case "opt_out": + return status == "opt_out" + case "not_opt_in": + return status != "opt_in" + case "not_opt_out": + return status != "opt_out" + default: + return false + } +} + +// evaluateGeoConstraint checks a geo_location-based unless condition. +// Resolves the field path in request.Context and checks against values. +func evaluateGeoConstraint(c *Constraint, req *AccessEvaluationRequest) bool { + if req.Context == nil { + return false + } + + value := resolveContextField(req.Context, c.Field) + if value == "" { + return false + } + + valueSet := make(map[string]bool, len(c.Values)) + for _, v := range c.Values { + valueSet[v] = true + } + + switch c.Operator { + case "in": + return valueSet[value] + case "not_in": + return !valueSet[value] + default: + return false + } +} + +// evaluateDataFlowConstraint checks a data_flow-based unless condition. +// Looks up system data flows in request.Context["data_flows"][direction]. +func evaluateDataFlowConstraint(c *Constraint, req *AccessEvaluationRequest) bool { + if req.Context == nil { + return false + } + + flowsMap, ok := req.Context["data_flows"].(map[string]interface{}) + if !ok { + return false + } + + directionFlows, ok := flowsMap[c.Direction].([]interface{}) + if !ok { + return false + } + + systemSet := make(map[string]bool, len(directionFlows)) + for _, s := range directionFlows { + if str, ok := s.(string); ok { + systemSet[str] = true + } + } + + switch c.Operator { + case "any_of": + for _, sys := range c.Systems { + if systemSet[sys] { + return true + } + } + return false + case "none_of": + for _, sys := range c.Systems { + if systemSet[sys] { + return false + } + } + return true + default: + return false + } +} + +// resolveContextField traverses a dotted field path in the context map. +// e.g. "environment.geo_location" → context["environment"]["geo_location"] +func resolveContextField(ctx map[string]interface{}, field string) string { + parts := strings.Split(field, ".") + var current interface{} = ctx + + for _, part := range parts { + m, ok := current.(map[string]interface{}) + if !ok { + return "" + } + current = m[part] + } + + if str, ok := current.(string); ok { + return str + } + return "" +} diff --git a/policy-engine/pkg/pbac/policy_evaluate_test.go b/policy-engine/pkg/pbac/policy_evaluate_test.go new file mode 100644 index 00000000000..a0c94c4cb4f --- /dev/null +++ b/policy-engine/pkg/pbac/policy_evaluate_test.go @@ -0,0 +1,434 @@ +package pbac + +import ( + "testing" +) + +func intPtr(i int) *int { return &i } + +func basePolicies() []AccessPolicy { + return []AccessPolicy{ + { + ID: "p1", Key: "allow-marketing", Priority: 100, Enabled: true, + Decision: PolicyAllow, + Match: MatchBlock{ + DataUse: &MatchDimension{Any: []string{"marketing"}}, + }, + }, + { + ID: "p2", Key: "deny-financial", Priority: 200, Enabled: true, + Decision: PolicyDeny, + Match: MatchBlock{ + DataCategory: &MatchDimension{Any: []string{"user.financial"}}, + }, + Action: &PolicyAction{Message: "Financial data access denied"}, + }, + { + ID: "p3", Key: "catch-all-deny", Priority: 0, Enabled: true, + Decision: PolicyDeny, + Match: MatchBlock{}, // empty = matches everything + Action: &PolicyAction{Message: "Default deny"}, + }, + } +} + +func TestEvaluatePolicies_HighestPriorityWins(t *testing.T) { + policies := basePolicies() + req := &AccessEvaluationRequest{ + DataUses: []string{"marketing.advertising"}, + DataCategories: []string{"user.financial.bank_account"}, + } + + result := EvaluatePolicies(policies, req) + + // Priority 200 (deny-financial) should win over 100 (allow-marketing) + if result.Decision != PolicyDeny { + t.Errorf("expected DENY, got %s", result.Decision) + } + if result.DecisivePolicyKey == nil || *result.DecisivePolicyKey != "deny-financial" { + t.Errorf("expected decisive policy 'deny-financial'") + } +} + +func TestEvaluatePolicies_AllowWhenMatched(t *testing.T) { + policies := basePolicies() + req := &AccessEvaluationRequest{ + DataUses: []string{"marketing.advertising"}, + DataCategories: []string{"user.contact.email"}, + } + + result := EvaluatePolicies(policies, req) + + // deny-financial doesn't match (no user.financial), allow-marketing matches + if result.Decision != PolicyAllow { + t.Errorf("expected ALLOW, got %s", result.Decision) + } + if result.DecisivePolicyKey == nil || *result.DecisivePolicyKey != "allow-marketing" { + t.Errorf("expected decisive policy 'allow-marketing'") + } +} + +func TestEvaluatePolicies_CatchAllDeny(t *testing.T) { + policies := basePolicies() + req := &AccessEvaluationRequest{ + DataUses: []string{"essential.service"}, + DataCategories: []string{"system.operations"}, + } + + result := EvaluatePolicies(policies, req) + + // Nothing specific matches, catch-all (priority 0) kicks in + if result.Decision != PolicyDeny { + t.Errorf("expected DENY, got %s", result.Decision) + } + if result.DecisivePolicyKey == nil || *result.DecisivePolicyKey != "catch-all-deny" { + t.Errorf("expected decisive policy 'catch-all-deny'") + } +} + +func TestEvaluatePolicies_NoDecisionWhenNoPolicies(t *testing.T) { + result := EvaluatePolicies([]AccessPolicy{}, &AccessEvaluationRequest{}) + + if result.Decision != PolicyNoDecision { + t.Errorf("expected NO_DECISION, got %s", result.Decision) + } +} + +func TestEvaluatePolicies_DisabledPoliciesSkipped(t *testing.T) { + policies := []AccessPolicy{ + { + ID: "p1", Key: "disabled", Priority: 100, Enabled: false, + Decision: PolicyDeny, + Match: MatchBlock{}, + }, + } + + result := EvaluatePolicies(policies, &AccessEvaluationRequest{}) + + if result.Decision != PolicyNoDecision { + t.Errorf("expected NO_DECISION, got %s", result.Decision) + } +} + +// ── Unless condition tests ─────────────────────────────────────────── + +func TestUnless_ConsentOptOut_InvertsAllow(t *testing.T) { + policies := []AccessPolicy{ + { + ID: "p1", Key: "allow-unless-optout", Priority: 100, Enabled: true, + Decision: PolicyAllow, + Match: MatchBlock{ + DataUse: &MatchDimension{Any: []string{"marketing"}}, + }, + Unless: []Constraint{ + { + Type: ConstraintConsent, + PrivacyNoticeKey: "do_not_sell", + Requirement: "opt_out", + }, + }, + Action: &PolicyAction{Message: "User opted out"}, + }, + } + + req := &AccessEvaluationRequest{ + DataUses: []string{"marketing.advertising"}, + Context: map[string]interface{}{ + "consent": map[string]interface{}{ + "do_not_sell": "opt_out", + }, + }, + } + + result := EvaluatePolicies(policies, req) + + // ALLOW + unless triggered → DENY + if result.Decision != PolicyDeny { + t.Errorf("expected DENY (inverted ALLOW), got %s", result.Decision) + } + if !result.UnlessTriggered { + t.Error("expected unless_triggered=true") + } +} + +func TestUnless_ConsentNotTriggered_AllowStands(t *testing.T) { + policies := []AccessPolicy{ + { + ID: "p1", Key: "allow-unless-optout", Priority: 100, Enabled: true, + Decision: PolicyAllow, + Match: MatchBlock{ + DataUse: &MatchDimension{Any: []string{"marketing"}}, + }, + Unless: []Constraint{ + { + Type: ConstraintConsent, + PrivacyNoticeKey: "do_not_sell", + Requirement: "opt_out", + }, + }, + }, + } + + req := &AccessEvaluationRequest{ + DataUses: []string{"marketing.advertising"}, + Context: map[string]interface{}{ + "consent": map[string]interface{}{ + "do_not_sell": "opt_in", // NOT opt_out, so unless doesn't trigger + }, + }, + } + + result := EvaluatePolicies(policies, req) + + if result.Decision != PolicyAllow { + t.Errorf("expected ALLOW, got %s", result.Decision) + } + if result.UnlessTriggered { + t.Error("expected unless_triggered=false") + } +} + +func TestUnless_DenySuppressed_ContinuesToNext(t *testing.T) { + policies := []AccessPolicy{ + { + ID: "p1", Key: "deny-unless-geo", Priority: 200, Enabled: true, + Decision: PolicyDeny, + Match: MatchBlock{}, + Unless: []Constraint{ + { + Type: ConstraintGeoLocation, + Field: "environment.geo_location", + Operator: "in", + Values: []string{"US-CA"}, + }, + }, + }, + { + ID: "p2", Key: "fallback-allow", Priority: 100, Enabled: true, + Decision: PolicyAllow, + Match: MatchBlock{}, + }, + } + + req := &AccessEvaluationRequest{ + Context: map[string]interface{}{ + "environment": map[string]interface{}{ + "geo_location": "US-CA", + }, + }, + } + + result := EvaluatePolicies(policies, req) + + // DENY + unless triggered → SUPPRESSED, continue to fallback-allow + if result.Decision != PolicyAllow { + t.Errorf("expected ALLOW (fallback), got %s", result.Decision) + } + if result.DecisivePolicyKey == nil || *result.DecisivePolicyKey != "fallback-allow" { + t.Errorf("expected decisive policy 'fallback-allow'") + } + // The suppressed policy should be in the audit trail + if len(result.EvaluatedPolicies) != 1 { + t.Fatalf("expected 1 evaluated policy (suppressed), got %d", len(result.EvaluatedPolicies)) + } + if result.EvaluatedPolicies[0].Result != "SUPPRESSED" { + t.Errorf("expected SUPPRESSED, got %s", result.EvaluatedPolicies[0].Result) + } +} + +func TestUnless_GeoNotIn(t *testing.T) { + policies := []AccessPolicy{ + { + ID: "p1", Key: "deny-outside-ca", Priority: 100, Enabled: true, + Decision: PolicyDeny, + Match: MatchBlock{}, + Unless: []Constraint{ + { + Type: ConstraintGeoLocation, + Field: "environment.geo_location", + Operator: "not_in", + Values: []string{"US-CA", "US-VA"}, + }, + }, + }, + } + + // User is in US-CA → not_in returns false → unless NOT triggered → DENY stands + req := &AccessEvaluationRequest{ + Context: map[string]interface{}{ + "environment": map[string]interface{}{ + "geo_location": "US-CA", + }, + }, + } + + result := EvaluatePolicies(policies, req) + if result.Decision != PolicyDeny { + t.Errorf("expected DENY, got %s", result.Decision) + } + + // User is in EU-DE → not_in returns true → unless triggered → DENY suppressed + req2 := &AccessEvaluationRequest{ + Context: map[string]interface{}{ + "environment": map[string]interface{}{ + "geo_location": "EU-DE", + }, + }, + } + + result2 := EvaluatePolicies(policies, req2) + if result2.Decision != PolicyNoDecision { + t.Errorf("expected NO_DECISION (deny suppressed, no fallback), got %s", result2.Decision) + } +} + +func TestUnless_DataFlow(t *testing.T) { + policies := []AccessPolicy{ + { + ID: "p1", Key: "allow-unless-egress", Priority: 100, Enabled: true, + Decision: PolicyAllow, + Match: MatchBlock{}, + Unless: []Constraint{ + { + Type: ConstraintDataFlow, + Direction: "egress", + Operator: "any_of", + Systems: []string{"external_vendor"}, + }, + }, + }, + } + + // Egress to external_vendor → unless triggers → ALLOW inverted to DENY + req := &AccessEvaluationRequest{ + Context: map[string]interface{}{ + "data_flows": map[string]interface{}{ + "egress": []interface{}{"external_vendor", "partner_api"}, + }, + }, + } + + result := EvaluatePolicies(policies, req) + if result.Decision != PolicyDeny { + t.Errorf("expected DENY (inverted), got %s", result.Decision) + } +} + +func TestUnless_MultipleConstraints_AllMustTrigger(t *testing.T) { + policies := []AccessPolicy{ + { + ID: "p1", Key: "allow-unless-both", Priority: 100, Enabled: true, + Decision: PolicyAllow, + Match: MatchBlock{}, + Unless: []Constraint{ + { + Type: ConstraintConsent, + PrivacyNoticeKey: "do_not_sell", + Requirement: "opt_out", + }, + { + Type: ConstraintGeoLocation, + Field: "environment.geo_location", + Operator: "in", + Values: []string{"US-CA"}, + }, + }, + }, + } + + // Only consent triggers, not geo → unless does NOT trigger → ALLOW stands + req := &AccessEvaluationRequest{ + Context: map[string]interface{}{ + "consent": map[string]interface{}{ + "do_not_sell": "opt_out", + }, + "environment": map[string]interface{}{ + "geo_location": "US-NY", + }, + }, + } + + result := EvaluatePolicies(policies, req) + if result.Decision != PolicyAllow { + t.Errorf("expected ALLOW (only one constraint triggered), got %s", result.Decision) + } + + // Both trigger → ALLOW inverted to DENY + req2 := &AccessEvaluationRequest{ + Context: map[string]interface{}{ + "consent": map[string]interface{}{ + "do_not_sell": "opt_out", + }, + "environment": map[string]interface{}{ + "geo_location": "US-CA", + }, + }, + } + + result2 := EvaluatePolicies(policies, req2) + if result2.Decision != PolicyDeny { + t.Errorf("expected DENY (both constraints triggered), got %s", result2.Decision) + } +} + +// ── Taxonomy matching tests ────────────────────────────────────────── + +func TestTaxonomyMatch_ExactMatch(t *testing.T) { + if !taxonomyMatch("user.contact", "user.contact") { + t.Error("expected exact match") + } +} + +func TestTaxonomyMatch_ParentMatchesChild(t *testing.T) { + if !taxonomyMatch("user.contact", "user.contact.email") { + t.Error("expected parent to match child") + } +} + +func TestTaxonomyMatch_ChildDoesNotMatchParent(t *testing.T) { + if taxonomyMatch("user.contact.email", "user.contact") { + t.Error("child should not match parent") + } +} + +func TestTaxonomyMatch_NoDotBoundaryFalsePositive(t *testing.T) { + if taxonomyMatch("user", "user_data") { + t.Error("should not match across non-dot boundary") + } +} + +func TestMatchDimension_Any(t *testing.T) { + dim := &MatchDimension{Any: []string{"marketing", "analytics"}} + if !matchesDimension(dim, []string{"marketing.advertising"}) { + t.Error("expected any to match") + } + if matchesDimension(dim, []string{"essential.service"}) { + t.Error("expected any to not match") + } +} + +func TestMatchDimension_All(t *testing.T) { + dim := &MatchDimension{All: []string{"user.contact", "user.financial"}} + + // Both present + if !matchesDimension(dim, []string{"user.contact.email", "user.financial.bank_account"}) { + t.Error("expected all to match when both present") + } + + // Only one present + if matchesDimension(dim, []string{"user.contact.email"}) { + t.Error("expected all to not match when one missing") + } +} + +func TestMatchBlock_EmptyMatchesEverything(t *testing.T) { + match := &MatchBlock{} + req := &AccessEvaluationRequest{ + DataUses: []string{"marketing"}, + DataCategories: []string{"user.contact"}, + DataSubjects: []string{"customer"}, + } + if !matchesRequest(match, req) { + t.Error("empty match block should match everything") + } +} diff --git a/policy-engine/pkg/pbac/policy_types.go b/policy-engine/pkg/pbac/policy_types.go new file mode 100644 index 00000000000..dc32a7777eb --- /dev/null +++ b/policy-engine/pkg/pbac/policy_types.go @@ -0,0 +1,116 @@ +package pbac + +// PolicyDecision is the outcome of an access policy evaluation. +type PolicyDecision string + +const ( + PolicyAllow PolicyDecision = "ALLOW" + PolicyDeny PolicyDecision = "DENY" + PolicyNoDecision PolicyDecision = "NO_DECISION" +) + +// AccessPolicy represents a parsed YAML access policy ready for evaluation. +type AccessPolicy struct { + ID string `json:"id"` + Key string `json:"key"` + Priority int `json:"priority"` + Enabled bool `json:"enabled"` + Decision PolicyDecision `json:"decision"` // ALLOW or DENY + Match MatchBlock `json:"match"` + Unless []Constraint `json:"unless,omitempty"` + Action *PolicyAction `json:"action,omitempty"` +} + +// MatchBlock declares which taxonomy dimensions a policy applies to. +// An empty MatchBlock matches everything (catch-all). +type MatchBlock struct { + DataUse *MatchDimension `json:"data_use,omitempty"` + DataCategory *MatchDimension `json:"data_category,omitempty"` + DataSubject *MatchDimension `json:"data_subject,omitempty"` +} + +// MatchDimension specifies the any/all operators for one taxonomy dimension. +type MatchDimension struct { + Any []string `json:"any,omitempty"` // at least one must match + All []string `json:"all,omitempty"` // all must match +} + +// ConstraintType identifies the kind of unless condition. +type ConstraintType string + +const ( + ConstraintConsent ConstraintType = "consent" + ConstraintGeoLocation ConstraintType = "geo_location" + ConstraintDataFlow ConstraintType = "data_flow" +) + +// Constraint is one condition in an unless block. +// All constraints in a block are AND'd — all must trigger for the unless to fire. +type Constraint struct { + Type ConstraintType `json:"type"` + + // Consent fields + PrivacyNoticeKey string `json:"privacy_notice_key,omitempty"` + Requirement string `json:"requirement,omitempty"` // opt_in, opt_out, not_opt_in, not_opt_out + + // Geo location fields + Field string `json:"field,omitempty"` // e.g. "environment.geo_location" + Operator string `json:"operator,omitempty"` // "in", "not_in", "any_of", "none_of" + Values []string `json:"values,omitempty"` + + // Data flow fields + Direction string `json:"direction,omitempty"` // "ingress", "egress" + Systems []string `json:"systems,omitempty"` +} + +// PolicyAction is the action block from a decisive policy. +type PolicyAction struct { + Message string `json:"message,omitempty"` +} + +// AccessEvaluationRequest is the context provided to the policy evaluator +// after a PBAC purpose violation is detected. +type AccessEvaluationRequest struct { + // From PBAC violation + ConsumerID string `json:"consumer_id"` + ConsumerName string `json:"consumer_name"` + ConsumerPurposes []string `json:"consumer_purposes"` + DatasetKey string `json:"dataset_key"` + DatasetPurposes []string `json:"dataset_purposes"` + Collection *string `json:"collection,omitempty"` + + // For policy match resolution — from the consumer's system declarations + SystemFidesKey string `json:"system_fides_key,omitempty"` + DataUses []string `json:"data_uses,omitempty"` + DataCategories []string `json:"data_categories,omitempty"` + DataSubjects []string `json:"data_subjects,omitempty"` + + // Runtime context for unless conditions + Context map[string]interface{} `json:"context,omitempty"` +} + +// EvaluatedPolicyInfo is the audit trail for a single policy evaluation. +type EvaluatedPolicyInfo struct { + PolicyKey string `json:"policy_key"` + Priority int `json:"priority"` + Matched bool `json:"matched"` + Result string `json:"result"` // "ALLOW", "DENY", "SUPPRESSED" + UnlessTriggered bool `json:"unless_triggered"` +} + +// PolicyEvaluationResult is the output of evaluating access policies. +type PolicyEvaluationResult struct { + Decision PolicyDecision `json:"decision"` + DecisivePolicyKey *string `json:"decisive_policy_key,omitempty"` + DecisivePolicyPriority *int `json:"decisive_policy_priority,omitempty"` + UnlessTriggered bool `json:"unless_triggered"` + EvaluatedPolicies []EvaluatedPolicyInfo `json:"evaluated_policies"` + Action *PolicyAction `json:"action,omitempty"` + Reason *string `json:"reason,omitempty"` +} + +// EvaluatePoliciesRequest is the JSON request body for the policy evaluation endpoint. +type EvaluatePoliciesRequest struct { + Policies []AccessPolicy `json:"policies"` + Request AccessEvaluationRequest `json:"request"` +} From 9459ac7f1dee7262baa9fc555a52f401cac963c2 Mon Sep 17 00:00:00 2001 From: Thabo Fletcher Date: Mon, 13 Apr 2026 21:04:15 -0700 Subject: [PATCH 04/14] Add comprehensive tests for PBAC library and CLI passthrough Library tests (pkg/pbac/): - edge_cases_test.go: empty datasets, nil collections, multiple collections counted separately, duplicate purpose keys, sorted output, non-nil slices for JSON, EffectivePurposes inheritance, policy catch-all, match all/any combined, unless without context, deny action only on deny, context field resolution (nested, missing, non-string) CLI integration tests (cmd/fides-evaluate/): - Builds the binary once, exercises both purpose and policies commands via stdin and file input - Purpose: compliant, violation, gap, collections, multiple datasets - Policies: allow, deny, no-decision, unless-inverts, priority ordering - Error handling: no args, unknown command, invalid JSON, missing file, empty input, output is valid JSON Also adds 'policies' subcommand to the CLI. 63 tests total, all passing. --- policy-engine/cmd/fides-evaluate/main.go | 14 +- policy-engine/cmd/fides-evaluate/main_test.go | 492 ++++++++++++++++++ policy-engine/pkg/pbac/edge_cases_test.go | 370 +++++++++++++ 3 files changed, 874 insertions(+), 2 deletions(-) create mode 100644 policy-engine/cmd/fides-evaluate/main_test.go create mode 100644 policy-engine/pkg/pbac/edge_cases_test.go diff --git a/policy-engine/cmd/fides-evaluate/main.go b/policy-engine/cmd/fides-evaluate/main.go index 87a19a97c58..9e29b9872b4 100644 --- a/policy-engine/cmd/fides-evaluate/main.go +++ b/policy-engine/cmd/fides-evaluate/main.go @@ -3,6 +3,7 @@ // Usage: // // echo '{"consumer": {...}, "datasets": {...}}' | fides-evaluate purpose +// echo '{"policies": [...], "request": {...}}' | fides-evaluate policies package main import ( @@ -16,7 +17,7 @@ import ( func main() { if len(os.Args) < 2 { - fmt.Fprintf(os.Stderr, "Usage: fides-evaluate [file]\n") + fmt.Fprintf(os.Stderr, "Usage: fides-evaluate [file]\n") os.Exit(1) } @@ -49,8 +50,17 @@ func main() { result := pbac.EvaluatePurpose(req.Consumer, req.Datasets, req.Collections) writeJSON(result) + case "policies": + var req pbac.EvaluatePoliciesRequest + if err := json.Unmarshal(input, &req); err != nil { + fmt.Fprintf(os.Stderr, "Error parsing JSON: %v\n", err) + os.Exit(1) + } + result := pbac.EvaluatePolicies(req.Policies, &req.Request) + writeJSON(result) + default: - fmt.Fprintf(os.Stderr, "Unknown command: %s\nUse 'purpose'\n", command) + fmt.Fprintf(os.Stderr, "Unknown command: %s\nUse 'purpose' or 'policies'\n", command) os.Exit(1) } } diff --git a/policy-engine/cmd/fides-evaluate/main_test.go b/policy-engine/cmd/fides-evaluate/main_test.go new file mode 100644 index 00000000000..513871955c9 --- /dev/null +++ b/policy-engine/cmd/fides-evaluate/main_test.go @@ -0,0 +1,492 @@ +package main + +import ( + "bytes" + "encoding/json" + "os" + "os/exec" + "path/filepath" + "testing" +) + +var binaryPath string + +func TestMain(m *testing.M) { + // Build the binary once before all tests + dir, err := os.MkdirTemp("", "fides-evaluate-test") + if err != nil { + panic(err) + } + defer os.RemoveAll(dir) + + binaryPath = filepath.Join(dir, "fides-evaluate") + cmd := exec.Command("go", "build", "-o", binaryPath, ".") + cmd.Stderr = os.Stderr + if err := cmd.Run(); err != nil { + panic("failed to build binary: " + err.Error()) + } + + os.Exit(m.Run()) +} + +func runCLI(t *testing.T, command string, input string) (string, string, int) { + t.Helper() + cmd := exec.Command(binaryPath, command) + cmd.Stdin = bytes.NewBufferString(input) + var stdout, stderr bytes.Buffer + cmd.Stdout = &stdout + cmd.Stderr = &stderr + + err := cmd.Run() + exitCode := 0 + if err != nil { + if exitErr, ok := err.(*exec.ExitError); ok { + exitCode = exitErr.ExitCode() + } else { + t.Fatalf("unexpected error running CLI: %v", err) + } + } + return stdout.String(), stderr.String(), exitCode +} + +func runCLIWithFile(t *testing.T, command string, content string) (string, string, int) { + t.Helper() + f, err := os.CreateTemp("", "fides-evaluate-*.json") + if err != nil { + t.Fatal(err) + } + defer os.Remove(f.Name()) + if _, err := f.WriteString(content); err != nil { + t.Fatal(err) + } + f.Close() + + cmd := exec.Command(binaryPath, command, f.Name()) + var stdout, stderr bytes.Buffer + cmd.Stdout = &stdout + cmd.Stderr = &stderr + + err = cmd.Run() + exitCode := 0 + if err != nil { + if exitErr, ok := err.(*exec.ExitError); ok { + exitCode = exitErr.ExitCode() + } else { + t.Fatalf("unexpected error running CLI: %v", err) + } + } + return stdout.String(), stderr.String(), exitCode +} + +// ── purpose command tests ──────────────────────────────────────────── + +func TestCLI_Purpose_Compliant(t *testing.T) { + input := `{ + "consumer": {"consumer_id": "c1", "consumer_name": "Billing", "purpose_keys": ["billing"]}, + "datasets": {"billing_db": {"dataset_key": "billing_db", "purpose_keys": ["billing"]}} + }` + + stdout, _, exitCode := runCLI(t, "purpose", input) + if exitCode != 0 { + t.Fatalf("expected exit 0, got %d", exitCode) + } + + var result map[string]interface{} + if err := json.Unmarshal([]byte(stdout), &result); err != nil { + t.Fatalf("invalid JSON output: %v\nstdout: %s", err, stdout) + } + + violations := result["violations"].([]interface{}) + if len(violations) != 0 { + t.Errorf("expected 0 violations, got %d", len(violations)) + } + if result["total_accesses"].(float64) != 1 { + t.Errorf("expected 1 total access") + } +} + +func TestCLI_Purpose_Violation(t *testing.T) { + input := `{ + "consumer": {"consumer_id": "c1", "consumer_name": "Analytics", "purpose_keys": ["analytics"]}, + "datasets": {"billing_db": {"dataset_key": "billing_db", "purpose_keys": ["billing"]}} + }` + + stdout, _, exitCode := runCLI(t, "purpose", input) + if exitCode != 0 { + t.Fatalf("expected exit 0, got %d", exitCode) + } + + var result map[string]interface{} + json.Unmarshal([]byte(stdout), &result) + + violations := result["violations"].([]interface{}) + if len(violations) != 1 { + t.Errorf("expected 1 violation, got %d", len(violations)) + } +} + +func TestCLI_Purpose_Gap(t *testing.T) { + input := `{ + "consumer": {"consumer_id": "c1", "consumer_name": "Unknown", "purpose_keys": []}, + "datasets": {"db1": {"dataset_key": "db1", "purpose_keys": ["billing"]}} + }` + + stdout, _, exitCode := runCLI(t, "purpose", input) + if exitCode != 0 { + t.Fatalf("expected exit 0, got %d", exitCode) + } + + var result map[string]interface{} + json.Unmarshal([]byte(stdout), &result) + + gaps := result["gaps"].([]interface{}) + if len(gaps) != 1 { + t.Errorf("expected 1 gap, got %d", len(gaps)) + } + gap := gaps[0].(map[string]interface{}) + if gap["gap_type"] != "unresolved_identity" { + t.Errorf("expected unresolved_identity, got %s", gap["gap_type"]) + } +} + +func TestCLI_Purpose_WithCollections(t *testing.T) { + input := `{ + "consumer": {"consumer_id": "c1", "consumer_name": "Accountant", "purpose_keys": ["accounting"]}, + "datasets": { + "billing_db": { + "dataset_key": "billing_db", + "purpose_keys": ["billing"], + "collection_purposes": {"invoices": ["accounting"]} + } + }, + "collections": {"billing_db": ["invoices"]} + }` + + stdout, _, exitCode := runCLI(t, "purpose", input) + if exitCode != 0 { + t.Fatalf("expected exit 0, got %d", exitCode) + } + + var result map[string]interface{} + json.Unmarshal([]byte(stdout), &result) + + // accounting overlaps with billing_db.invoices effective purposes {billing, accounting} + violations := result["violations"].([]interface{}) + if len(violations) != 0 { + t.Errorf("expected 0 violations (collection purpose overlap), got %d", len(violations)) + } +} + +func TestCLI_Purpose_FromFile(t *testing.T) { + input := `{ + "consumer": {"consumer_id": "c1", "consumer_name": "Test", "purpose_keys": ["analytics"]}, + "datasets": {"db1": {"dataset_key": "db1", "purpose_keys": ["analytics"]}} + }` + + stdout, _, exitCode := runCLIWithFile(t, "purpose", input) + if exitCode != 0 { + t.Fatalf("expected exit 0, got %d", exitCode) + } + + var result map[string]interface{} + if err := json.Unmarshal([]byte(stdout), &result); err != nil { + t.Fatalf("invalid JSON: %v", err) + } + + violations := result["violations"].([]interface{}) + if len(violations) != 0 { + t.Errorf("expected 0 violations, got %d", len(violations)) + } +} + +func TestCLI_Purpose_MultipleDatasets(t *testing.T) { + input := `{ + "consumer": {"consumer_id": "c1", "consumer_name": "Analyst", "purpose_keys": ["analytics"]}, + "datasets": { + "analytics_db": {"dataset_key": "analytics_db", "purpose_keys": ["analytics"]}, + "billing_db": {"dataset_key": "billing_db", "purpose_keys": ["billing"]}, + "empty_db": {"dataset_key": "empty_db", "purpose_keys": []} + } + }` + + stdout, _, exitCode := runCLI(t, "purpose", input) + if exitCode != 0 { + t.Fatalf("expected exit 0, got %d", exitCode) + } + + var result map[string]interface{} + json.Unmarshal([]byte(stdout), &result) + + if result["total_accesses"].(float64) != 3 { + t.Errorf("expected 3 total accesses, got %v", result["total_accesses"]) + } + + violations := result["violations"].([]interface{}) + gaps := result["gaps"].([]interface{}) + if len(violations) != 1 { + t.Errorf("expected 1 violation (billing_db), got %d", len(violations)) + } + if len(gaps) != 1 { + t.Errorf("expected 1 gap (empty_db), got %d", len(gaps)) + } +} + +// ── policies command tests ─────────────────────────────────────────── + +func TestCLI_Policies_Allow(t *testing.T) { + input := `{ + "policies": [ + { + "id": "p1", "key": "allow-marketing", "priority": 100, "enabled": true, + "decision": "ALLOW", + "match": {"data_use": {"any": ["marketing"]}} + } + ], + "request": { + "consumer_id": "c1", "consumer_name": "Marketing", + "data_uses": ["marketing.advertising"] + } + }` + + stdout, _, exitCode := runCLI(t, "policies", input) + if exitCode != 0 { + t.Fatalf("expected exit 0, got %d", exitCode) + } + + var result map[string]interface{} + json.Unmarshal([]byte(stdout), &result) + + if result["decision"] != "ALLOW" { + t.Errorf("expected ALLOW, got %s", result["decision"]) + } + if result["decisive_policy_key"] != "allow-marketing" { + t.Errorf("expected decisive key 'allow-marketing', got %v", result["decisive_policy_key"]) + } +} + +func TestCLI_Policies_Deny(t *testing.T) { + input := `{ + "policies": [ + { + "id": "p1", "key": "deny-all", "priority": 0, "enabled": true, + "decision": "DENY", + "match": {}, + "action": {"message": "Access denied by default"} + } + ], + "request": { + "consumer_id": "c1", "consumer_name": "Anyone", + "data_uses": ["essential"] + } + }` + + stdout, _, exitCode := runCLI(t, "policies", input) + if exitCode != 0 { + t.Fatalf("expected exit 0, got %d", exitCode) + } + + var result map[string]interface{} + json.Unmarshal([]byte(stdout), &result) + + if result["decision"] != "DENY" { + t.Errorf("expected DENY, got %s", result["decision"]) + } + action := result["action"].(map[string]interface{}) + if action["message"] != "Access denied by default" { + t.Errorf("expected action message") + } +} + +func TestCLI_Policies_NoDecision(t *testing.T) { + input := `{ + "policies": [ + { + "id": "p1", "key": "deny-financial", "priority": 100, "enabled": true, + "decision": "DENY", + "match": {"data_category": {"any": ["user.financial"]}} + } + ], + "request": { + "consumer_id": "c1", "consumer_name": "Test", + "data_categories": ["system.operations"] + } + }` + + stdout, _, exitCode := runCLI(t, "policies", input) + if exitCode != 0 { + t.Fatalf("expected exit 0, got %d", exitCode) + } + + var result map[string]interface{} + json.Unmarshal([]byte(stdout), &result) + + if result["decision"] != "NO_DECISION" { + t.Errorf("expected NO_DECISION, got %s", result["decision"]) + } +} + +func TestCLI_Policies_UnlessInverts(t *testing.T) { + input := `{ + "policies": [ + { + "id": "p1", "key": "allow-unless-optout", "priority": 100, "enabled": true, + "decision": "ALLOW", + "match": {"data_use": {"any": ["marketing"]}}, + "unless": [ + {"type": "consent", "privacy_notice_key": "do_not_sell", "requirement": "opt_out"} + ], + "action": {"message": "User opted out"} + } + ], + "request": { + "consumer_id": "c1", "consumer_name": "Marketing", + "data_uses": ["marketing.advertising"], + "context": { + "consent": {"do_not_sell": "opt_out"} + } + } + }` + + stdout, _, exitCode := runCLI(t, "policies", input) + if exitCode != 0 { + t.Fatalf("expected exit 0, got %d", exitCode) + } + + var result map[string]interface{} + json.Unmarshal([]byte(stdout), &result) + + if result["decision"] != "DENY" { + t.Errorf("expected DENY (inverted ALLOW), got %s", result["decision"]) + } + if result["unless_triggered"] != true { + t.Error("expected unless_triggered=true") + } +} + +func TestCLI_Policies_PriorityOrdering(t *testing.T) { + input := `{ + "policies": [ + { + "id": "p1", "key": "low-allow", "priority": 10, "enabled": true, + "decision": "ALLOW", + "match": {} + }, + { + "id": "p2", "key": "high-deny", "priority": 200, "enabled": true, + "decision": "DENY", + "match": {}, + "action": {"message": "Highest priority wins"} + } + ], + "request": { + "consumer_id": "c1", "consumer_name": "Test", + "data_uses": ["marketing"] + } + }` + + stdout, _, exitCode := runCLI(t, "policies", input) + if exitCode != 0 { + t.Fatalf("expected exit 0, got %d", exitCode) + } + + var result map[string]interface{} + json.Unmarshal([]byte(stdout), &result) + + if result["decision"] != "DENY" { + t.Errorf("expected DENY (high priority), got %s", result["decision"]) + } + if result["decisive_policy_key"] != "high-deny" { + t.Errorf("expected decisive key 'high-deny', got %v", result["decisive_policy_key"]) + } +} + +func TestCLI_Policies_FromFile(t *testing.T) { + input := `{ + "policies": [], + "request": {"consumer_id": "c1", "consumer_name": "Test"} + }` + + stdout, _, exitCode := runCLIWithFile(t, "policies", input) + if exitCode != 0 { + t.Fatalf("expected exit 0, got %d", exitCode) + } + + var result map[string]interface{} + json.Unmarshal([]byte(stdout), &result) + + if result["decision"] != "NO_DECISION" { + t.Errorf("expected NO_DECISION, got %s", result["decision"]) + } +} + +// ── Error handling tests ───────────────────────────────────────────── + +func TestCLI_NoArgs_ExitsNonZero(t *testing.T) { + cmd := exec.Command(binaryPath) + err := cmd.Run() + if err == nil { + t.Error("expected non-zero exit with no args") + } +} + +func TestCLI_UnknownCommand_ExitsNonZero(t *testing.T) { + _, stderr, exitCode := runCLI(t, "bogus", "{}") + if exitCode == 0 { + t.Error("expected non-zero exit for unknown command") + } + if !bytes.Contains([]byte(stderr), []byte("Unknown command")) { + t.Errorf("expected 'Unknown command' in stderr, got: %s", stderr) + } +} + +func TestCLI_InvalidJSON_ExitsNonZero(t *testing.T) { + _, stderr, exitCode := runCLI(t, "purpose", "not valid json") + if exitCode == 0 { + t.Error("expected non-zero exit for invalid JSON") + } + if !bytes.Contains([]byte(stderr), []byte("Error parsing JSON")) { + t.Errorf("expected parse error in stderr, got: %s", stderr) + } +} + +func TestCLI_MissingFile_ExitsNonZero(t *testing.T) { + cmd := exec.Command(binaryPath, "purpose", "/tmp/nonexistent-file-fides-test.json") + err := cmd.Run() + if err == nil { + t.Error("expected non-zero exit for missing file") + } +} + +func TestCLI_EmptyInput_Purpose(t *testing.T) { + // Empty JSON object — consumer and datasets will have zero values + stdout, _, exitCode := runCLI(t, "purpose", "{}") + if exitCode != 0 { + t.Fatalf("expected exit 0, got %d", exitCode) + } + + var result map[string]interface{} + json.Unmarshal([]byte(stdout), &result) + + // Empty datasets → 0 accesses, no violations, no gaps + if result["total_accesses"].(float64) != 0 { + t.Errorf("expected 0 accesses, got %v", result["total_accesses"]) + } +} + +func TestCLI_OutputIsValidJSON(t *testing.T) { + input := `{ + "consumer": {"consumer_id": "c1", "consumer_name": "Test", "purpose_keys": ["x"]}, + "datasets": {"d1": {"dataset_key": "d1", "purpose_keys": ["y"]}} + }` + + stdout, _, exitCode := runCLI(t, "purpose", input) + if exitCode != 0 { + t.Fatalf("expected exit 0, got %d", exitCode) + } + + // Verify it's valid JSON that can be round-tripped + var parsed interface{} + if err := json.Unmarshal([]byte(stdout), &parsed); err != nil { + t.Errorf("output is not valid JSON: %v\noutput: %s", err, stdout) + } +} diff --git a/policy-engine/pkg/pbac/edge_cases_test.go b/policy-engine/pkg/pbac/edge_cases_test.go new file mode 100644 index 00000000000..3bd520e3a9d --- /dev/null +++ b/policy-engine/pkg/pbac/edge_cases_test.go @@ -0,0 +1,370 @@ +package pbac + +import ( + "testing" +) + +// ── Purpose evaluation edge cases ──────────────────────────────────── + +func TestPurpose_EmptyDatasets_ZeroAccesses(t *testing.T) { + consumer := ConsumerPurposes{ + ConsumerID: "c1", ConsumerName: "Test", PurposeKeys: []string{"billing"}, + } + result := EvaluatePurpose(consumer, map[string]DatasetPurposes{}, nil) + + if result.TotalAccesses != 0 { + t.Errorf("expected 0 accesses, got %d", result.TotalAccesses) + } + if len(result.Violations) != 0 { + t.Errorf("expected 0 violations, got %d", len(result.Violations)) + } + if len(result.Gaps) != 0 { + t.Errorf("expected 0 gaps, got %d", len(result.Gaps)) + } +} + +func TestPurpose_NilCollections_NoPanic(t *testing.T) { + consumer := ConsumerPurposes{ + ConsumerID: "c1", ConsumerName: "Test", PurposeKeys: []string{"billing"}, + } + datasets := map[string]DatasetPurposes{ + "db1": {DatasetKey: "db1", PurposeKeys: []string{"billing"}}, + } + + // Should not panic with nil collections + result := EvaluatePurpose(consumer, datasets, nil) + if result.TotalAccesses != 1 { + t.Errorf("expected 1 access, got %d", result.TotalAccesses) + } +} + +func TestPurpose_MultipleCollections_CountedSeparately(t *testing.T) { + consumer := ConsumerPurposes{ + ConsumerID: "c1", ConsumerName: "Test", PurposeKeys: []string{"billing"}, + } + datasets := map[string]DatasetPurposes{ + "db1": { + DatasetKey: "db1", + PurposeKeys: []string{"billing"}, + CollectionPurposes: map[string][]string{ + "invoices": {"billing"}, + "payments": {"billing"}, + }, + }, + } + collections := map[string][]string{ + "db1": {"invoices", "payments"}, + } + + result := EvaluatePurpose(consumer, datasets, collections) + + if result.TotalAccesses != 2 { + t.Errorf("expected 2 accesses (one per collection), got %d", result.TotalAccesses) + } +} + +func TestPurpose_DuplicatePurposeKeys_StillWorks(t *testing.T) { + consumer := ConsumerPurposes{ + ConsumerID: "c1", ConsumerName: "Test", + PurposeKeys: []string{"billing", "billing", "analytics"}, + } + datasets := map[string]DatasetPurposes{ + "db1": {DatasetKey: "db1", PurposeKeys: []string{"billing"}}, + } + + result := EvaluatePurpose(consumer, datasets, nil) + + // Should still be compliant + if len(result.Violations) != 0 { + t.Errorf("expected 0 violations, got %d", len(result.Violations)) + } +} + +func TestPurpose_EmptyCollectionPurposes_InheritsDatasetOnly(t *testing.T) { + consumer := ConsumerPurposes{ + ConsumerID: "c1", ConsumerName: "Test", PurposeKeys: []string{"billing"}, + } + datasets := map[string]DatasetPurposes{ + "db1": { + DatasetKey: "db1", + PurposeKeys: []string{"billing"}, + CollectionPurposes: map[string][]string{}, + }, + } + collections := map[string][]string{ + "db1": {"unknown_collection"}, + } + + result := EvaluatePurpose(consumer, datasets, collections) + + // Collection has no extra purposes, but dataset-level "billing" still applies + if len(result.Violations) != 0 { + t.Errorf("expected 0 violations (dataset purpose inherited), got %d", len(result.Violations)) + } +} + +func TestPurpose_ViolationsHaveSortedPurposes(t *testing.T) { + consumer := ConsumerPurposes{ + ConsumerID: "c1", ConsumerName: "Test", + PurposeKeys: []string{"z_purpose", "a_purpose"}, + } + datasets := map[string]DatasetPurposes{ + "db1": {DatasetKey: "db1", PurposeKeys: []string{"other"}}, + } + + result := EvaluatePurpose(consumer, datasets, nil) + + if len(result.Violations) != 1 { + t.Fatalf("expected 1 violation, got %d", len(result.Violations)) + } + + // Purposes should be sorted in the violation for deterministic output + cp := result.Violations[0].ConsumerPurposes + if len(cp) != 2 || cp[0] != "a_purpose" || cp[1] != "z_purpose" { + t.Errorf("expected sorted consumer purposes [a_purpose, z_purpose], got %v", cp) + } +} + +func TestPurpose_GapsAndViolationsNeverNil(t *testing.T) { + consumer := ConsumerPurposes{ + ConsumerID: "c1", ConsumerName: "Test", PurposeKeys: []string{"billing"}, + } + datasets := map[string]DatasetPurposes{ + "db1": {DatasetKey: "db1", PurposeKeys: []string{"billing"}}, + } + + result := EvaluatePurpose(consumer, datasets, nil) + + // Slices should be empty, not nil (for clean JSON serialization) + if result.Violations == nil { + t.Error("violations should be empty slice, not nil") + } + if result.Gaps == nil { + t.Error("gaps should be empty slice, not nil") + } +} + +// ── EffectivePurposes edge cases ───────────────────────────────────── + +func TestEffectivePurposes_EmptyCollection_ReturnsDatasetOnly(t *testing.T) { + ds := DatasetPurposes{ + DatasetKey: "db1", + PurposeKeys: []string{"billing"}, + } + + effective := ds.EffectivePurposes("") + if len(effective) != 1 || !effective["billing"] { + t.Errorf("expected {billing}, got %v", effective) + } +} + +func TestEffectivePurposes_UnknownCollection_ReturnsDatasetOnly(t *testing.T) { + ds := DatasetPurposes{ + DatasetKey: "db1", + PurposeKeys: []string{"billing"}, + CollectionPurposes: map[string][]string{ + "invoices": {"accounting"}, + }, + } + + effective := ds.EffectivePurposes("nonexistent") + if len(effective) != 1 || !effective["billing"] { + t.Errorf("expected {billing}, got %v", effective) + } +} + +func TestEffectivePurposes_Additive(t *testing.T) { + ds := DatasetPurposes{ + DatasetKey: "db1", + PurposeKeys: []string{"billing"}, + CollectionPurposes: map[string][]string{ + "invoices": {"accounting", "reporting"}, + }, + } + + effective := ds.EffectivePurposes("invoices") + if len(effective) != 3 { + t.Errorf("expected 3 effective purposes, got %d: %v", len(effective), effective) + } + for _, key := range []string{"billing", "accounting", "reporting"} { + if !effective[key] { + t.Errorf("expected %s in effective purposes", key) + } + } +} + +// ── Policy evaluation edge cases ───────────────────────────────────── + +func TestPolicy_EmptyPoliciesList(t *testing.T) { + result := EvaluatePolicies(nil, &AccessEvaluationRequest{}) + if result.Decision != PolicyNoDecision { + t.Errorf("expected NO_DECISION, got %s", result.Decision) + } + if result.EvaluatedPolicies == nil { + t.Error("evaluated_policies should be empty slice, not nil") + } +} + +func TestPolicy_AllDisabled(t *testing.T) { + policies := []AccessPolicy{ + {ID: "p1", Key: "disabled-1", Priority: 100, Enabled: false, Decision: PolicyDeny, Match: MatchBlock{}}, + {ID: "p2", Key: "disabled-2", Priority: 200, Enabled: false, Decision: PolicyAllow, Match: MatchBlock{}}, + } + + result := EvaluatePolicies(policies, &AccessEvaluationRequest{}) + if result.Decision != PolicyNoDecision { + t.Errorf("expected NO_DECISION, got %s", result.Decision) + } +} + +func TestPolicy_NoMatchDimensions_CatchAll(t *testing.T) { + // An empty MatchBlock should match any request, even with empty data_uses + policies := []AccessPolicy{ + {ID: "p1", Key: "catch-all", Priority: 1, Enabled: true, Decision: PolicyDeny, Match: MatchBlock{}}, + } + + result := EvaluatePolicies(policies, &AccessEvaluationRequest{}) + if result.Decision != PolicyDeny { + t.Errorf("expected DENY (catch-all), got %s", result.Decision) + } +} + +func TestPolicy_MatchAll_RequiresEveryValue(t *testing.T) { + policies := []AccessPolicy{ + { + ID: "p1", Key: "require-both", Priority: 100, Enabled: true, + Decision: PolicyDeny, + Match: MatchBlock{ + DataCategory: &MatchDimension{ + All: []string{"user.contact", "user.financial"}, + }, + }, + }, + } + + // Only one category → no match + req1 := &AccessEvaluationRequest{ + DataCategories: []string{"user.contact.email"}, + } + result1 := EvaluatePolicies(policies, req1) + if result1.Decision != PolicyNoDecision { + t.Errorf("expected NO_DECISION (only one category), got %s", result1.Decision) + } + + // Both categories → match → DENY + req2 := &AccessEvaluationRequest{ + DataCategories: []string{"user.contact.email", "user.financial.bank_account"}, + } + result2 := EvaluatePolicies(policies, req2) + if result2.Decision != PolicyDeny { + t.Errorf("expected DENY (both categories), got %s", result2.Decision) + } +} + +func TestPolicy_MatchAnyAndAll_Combined(t *testing.T) { + policies := []AccessPolicy{ + { + ID: "p1", Key: "combined", Priority: 100, Enabled: true, + Decision: PolicyDeny, + Match: MatchBlock{ + DataUse: &MatchDimension{Any: []string{"marketing"}}, + DataCategory: &MatchDimension{All: []string{"user.contact", "user.financial"}}, + }, + }, + } + + // Marketing use + both categories → match + req := &AccessEvaluationRequest{ + DataUses: []string{"marketing.advertising"}, + DataCategories: []string{"user.contact.email", "user.financial.bank_account"}, + } + result := EvaluatePolicies(policies, req) + if result.Decision != PolicyDeny { + t.Errorf("expected DENY, got %s", result.Decision) + } + + // Marketing use + only one category → no match + req2 := &AccessEvaluationRequest{ + DataUses: []string{"marketing.advertising"}, + DataCategories: []string{"user.contact.email"}, + } + result2 := EvaluatePolicies(policies, req2) + if result2.Decision != PolicyNoDecision { + t.Errorf("expected NO_DECISION, got %s", result2.Decision) + } +} + +func TestPolicy_UnlessNoContext_DoesNotTrigger(t *testing.T) { + policies := []AccessPolicy{ + { + ID: "p1", Key: "allow-unless", Priority: 100, Enabled: true, + Decision: PolicyAllow, + Match: MatchBlock{}, + Unless: []Constraint{ + {Type: ConstraintConsent, PrivacyNoticeKey: "notice", Requirement: "opt_out"}, + }, + }, + } + + // No context → unless can't evaluate → doesn't trigger → ALLOW stands + result := EvaluatePolicies(policies, &AccessEvaluationRequest{}) + if result.Decision != PolicyAllow { + t.Errorf("expected ALLOW (no context), got %s", result.Decision) + } +} + +func TestPolicy_DenyAction_OnlyReturnedForDeny(t *testing.T) { + policies := []AccessPolicy{ + { + ID: "p1", Key: "allow-with-action", Priority: 100, Enabled: true, + Decision: PolicyAllow, + Match: MatchBlock{}, + Action: &PolicyAction{Message: "this should not appear"}, + }, + } + + result := EvaluatePolicies(policies, &AccessEvaluationRequest{}) + if result.Decision != PolicyAllow { + t.Errorf("expected ALLOW, got %s", result.Decision) + } + if result.Action != nil { + t.Error("action should be nil for ALLOW decisions") + } +} + +func TestPolicy_ResolveContextField_Nested(t *testing.T) { + ctx := map[string]interface{}{ + "a": map[string]interface{}{ + "b": map[string]interface{}{ + "c": "deep_value", + }, + }, + } + + val := resolveContextField(ctx, "a.b.c") + if val != "deep_value" { + t.Errorf("expected 'deep_value', got '%s'", val) + } +} + +func TestPolicy_ResolveContextField_MissingPath(t *testing.T) { + ctx := map[string]interface{}{ + "a": map[string]interface{}{"b": "value"}, + } + + val := resolveContextField(ctx, "a.b.c.d") + if val != "" { + t.Errorf("expected empty string for missing path, got '%s'", val) + } +} + +func TestPolicy_ResolveContextField_NonStringValue(t *testing.T) { + ctx := map[string]interface{}{ + "a": map[string]interface{}{"b": 42}, + } + + val := resolveContextField(ctx, "a.b") + if val != "" { + t.Errorf("expected empty string for non-string value, got '%s'", val) + } +} From 8ff362a8bac74ca978896bb910887ff1a933d402 Mon Sep 17 00:00:00 2001 From: Thabo Fletcher Date: Tue, 14 Apr 2026 13:38:05 -0700 Subject: [PATCH 05/14] Replace Go CLI with fides pbac Python commands MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drops the standalone Go binary (cmd/fides-evaluate/) and adds PBAC evaluation to the existing fides CLI: fides pbac evaluate-purpose — purpose overlap check (stdin/file) fides pbac evaluate-policies — access policy evaluation (stdin/file) The CLI calls the Python evaluation engine directly. Performance is not the concern here — the Go sidecar handles API throughput. The CLI is for local testing and debugging. Adds: - src/fides/cli/commands/pbac.py — Click command group - src/fides/service/pbac/policies/evaluate.py — Python policy v2 engine (mirrors Go implementation in policy-engine/pkg/pbac/) - tests for both the evaluation engine and CLI passthrough --- policy-engine/cmd/fides-evaluate/main.go | 75 --- policy-engine/cmd/fides-evaluate/main_test.go | 492 ------------------ src/fides/cli/__init__.py | 3 +- src/fides/cli/commands/pbac.py | 161 ++++++ src/fides/service/pbac/policies/evaluate.py | 204 ++++++++ .../policies/test_evaluate_access_policies.py | 391 ++++++++++++++ tests/service/pbac/policies/test_pbac_cli.py | 301 +++++++++++ 7 files changed, 1059 insertions(+), 568 deletions(-) delete mode 100644 policy-engine/cmd/fides-evaluate/main.go delete mode 100644 policy-engine/cmd/fides-evaluate/main_test.go create mode 100644 src/fides/cli/commands/pbac.py create mode 100644 src/fides/service/pbac/policies/evaluate.py create mode 100644 tests/service/pbac/policies/test_evaluate_access_policies.py create mode 100644 tests/service/pbac/policies/test_pbac_cli.py diff --git a/policy-engine/cmd/fides-evaluate/main.go b/policy-engine/cmd/fides-evaluate/main.go deleted file mode 100644 index 9e29b9872b4..00000000000 --- a/policy-engine/cmd/fides-evaluate/main.go +++ /dev/null @@ -1,75 +0,0 @@ -// fides-evaluate is a CLI tool for running the Fides PBAC evaluation engine. -// -// Usage: -// -// echo '{"consumer": {...}, "datasets": {...}}' | fides-evaluate purpose -// echo '{"policies": [...], "request": {...}}' | fides-evaluate policies -package main - -import ( - "encoding/json" - "fmt" - "io" - "os" - - "github.com/ethyca/fides/policy-engine/pkg/pbac" -) - -func main() { - if len(os.Args) < 2 { - fmt.Fprintf(os.Stderr, "Usage: fides-evaluate [file]\n") - os.Exit(1) - } - - command := os.Args[1] - - var reader io.Reader = os.Stdin - if len(os.Args) > 2 { - f, err := os.Open(os.Args[2]) - if err != nil { - fmt.Fprintf(os.Stderr, "Error opening file: %v\n", err) - os.Exit(1) - } - defer f.Close() - reader = f - } - - input, err := io.ReadAll(reader) - if err != nil { - fmt.Fprintf(os.Stderr, "Error reading input: %v\n", err) - os.Exit(1) - } - - switch command { - case "purpose": - var req pbac.EvaluatePurposeRequest - if err := json.Unmarshal(input, &req); err != nil { - fmt.Fprintf(os.Stderr, "Error parsing JSON: %v\n", err) - os.Exit(1) - } - result := pbac.EvaluatePurpose(req.Consumer, req.Datasets, req.Collections) - writeJSON(result) - - case "policies": - var req pbac.EvaluatePoliciesRequest - if err := json.Unmarshal(input, &req); err != nil { - fmt.Fprintf(os.Stderr, "Error parsing JSON: %v\n", err) - os.Exit(1) - } - result := pbac.EvaluatePolicies(req.Policies, &req.Request) - writeJSON(result) - - default: - fmt.Fprintf(os.Stderr, "Unknown command: %s\nUse 'purpose' or 'policies'\n", command) - os.Exit(1) - } -} - -func writeJSON(v interface{}) { - enc := json.NewEncoder(os.Stdout) - enc.SetIndent("", " ") - if err := enc.Encode(v); err != nil { - fmt.Fprintf(os.Stderr, "Error encoding JSON: %v\n", err) - os.Exit(1) - } -} diff --git a/policy-engine/cmd/fides-evaluate/main_test.go b/policy-engine/cmd/fides-evaluate/main_test.go deleted file mode 100644 index 513871955c9..00000000000 --- a/policy-engine/cmd/fides-evaluate/main_test.go +++ /dev/null @@ -1,492 +0,0 @@ -package main - -import ( - "bytes" - "encoding/json" - "os" - "os/exec" - "path/filepath" - "testing" -) - -var binaryPath string - -func TestMain(m *testing.M) { - // Build the binary once before all tests - dir, err := os.MkdirTemp("", "fides-evaluate-test") - if err != nil { - panic(err) - } - defer os.RemoveAll(dir) - - binaryPath = filepath.Join(dir, "fides-evaluate") - cmd := exec.Command("go", "build", "-o", binaryPath, ".") - cmd.Stderr = os.Stderr - if err := cmd.Run(); err != nil { - panic("failed to build binary: " + err.Error()) - } - - os.Exit(m.Run()) -} - -func runCLI(t *testing.T, command string, input string) (string, string, int) { - t.Helper() - cmd := exec.Command(binaryPath, command) - cmd.Stdin = bytes.NewBufferString(input) - var stdout, stderr bytes.Buffer - cmd.Stdout = &stdout - cmd.Stderr = &stderr - - err := cmd.Run() - exitCode := 0 - if err != nil { - if exitErr, ok := err.(*exec.ExitError); ok { - exitCode = exitErr.ExitCode() - } else { - t.Fatalf("unexpected error running CLI: %v", err) - } - } - return stdout.String(), stderr.String(), exitCode -} - -func runCLIWithFile(t *testing.T, command string, content string) (string, string, int) { - t.Helper() - f, err := os.CreateTemp("", "fides-evaluate-*.json") - if err != nil { - t.Fatal(err) - } - defer os.Remove(f.Name()) - if _, err := f.WriteString(content); err != nil { - t.Fatal(err) - } - f.Close() - - cmd := exec.Command(binaryPath, command, f.Name()) - var stdout, stderr bytes.Buffer - cmd.Stdout = &stdout - cmd.Stderr = &stderr - - err = cmd.Run() - exitCode := 0 - if err != nil { - if exitErr, ok := err.(*exec.ExitError); ok { - exitCode = exitErr.ExitCode() - } else { - t.Fatalf("unexpected error running CLI: %v", err) - } - } - return stdout.String(), stderr.String(), exitCode -} - -// ── purpose command tests ──────────────────────────────────────────── - -func TestCLI_Purpose_Compliant(t *testing.T) { - input := `{ - "consumer": {"consumer_id": "c1", "consumer_name": "Billing", "purpose_keys": ["billing"]}, - "datasets": {"billing_db": {"dataset_key": "billing_db", "purpose_keys": ["billing"]}} - }` - - stdout, _, exitCode := runCLI(t, "purpose", input) - if exitCode != 0 { - t.Fatalf("expected exit 0, got %d", exitCode) - } - - var result map[string]interface{} - if err := json.Unmarshal([]byte(stdout), &result); err != nil { - t.Fatalf("invalid JSON output: %v\nstdout: %s", err, stdout) - } - - violations := result["violations"].([]interface{}) - if len(violations) != 0 { - t.Errorf("expected 0 violations, got %d", len(violations)) - } - if result["total_accesses"].(float64) != 1 { - t.Errorf("expected 1 total access") - } -} - -func TestCLI_Purpose_Violation(t *testing.T) { - input := `{ - "consumer": {"consumer_id": "c1", "consumer_name": "Analytics", "purpose_keys": ["analytics"]}, - "datasets": {"billing_db": {"dataset_key": "billing_db", "purpose_keys": ["billing"]}} - }` - - stdout, _, exitCode := runCLI(t, "purpose", input) - if exitCode != 0 { - t.Fatalf("expected exit 0, got %d", exitCode) - } - - var result map[string]interface{} - json.Unmarshal([]byte(stdout), &result) - - violations := result["violations"].([]interface{}) - if len(violations) != 1 { - t.Errorf("expected 1 violation, got %d", len(violations)) - } -} - -func TestCLI_Purpose_Gap(t *testing.T) { - input := `{ - "consumer": {"consumer_id": "c1", "consumer_name": "Unknown", "purpose_keys": []}, - "datasets": {"db1": {"dataset_key": "db1", "purpose_keys": ["billing"]}} - }` - - stdout, _, exitCode := runCLI(t, "purpose", input) - if exitCode != 0 { - t.Fatalf("expected exit 0, got %d", exitCode) - } - - var result map[string]interface{} - json.Unmarshal([]byte(stdout), &result) - - gaps := result["gaps"].([]interface{}) - if len(gaps) != 1 { - t.Errorf("expected 1 gap, got %d", len(gaps)) - } - gap := gaps[0].(map[string]interface{}) - if gap["gap_type"] != "unresolved_identity" { - t.Errorf("expected unresolved_identity, got %s", gap["gap_type"]) - } -} - -func TestCLI_Purpose_WithCollections(t *testing.T) { - input := `{ - "consumer": {"consumer_id": "c1", "consumer_name": "Accountant", "purpose_keys": ["accounting"]}, - "datasets": { - "billing_db": { - "dataset_key": "billing_db", - "purpose_keys": ["billing"], - "collection_purposes": {"invoices": ["accounting"]} - } - }, - "collections": {"billing_db": ["invoices"]} - }` - - stdout, _, exitCode := runCLI(t, "purpose", input) - if exitCode != 0 { - t.Fatalf("expected exit 0, got %d", exitCode) - } - - var result map[string]interface{} - json.Unmarshal([]byte(stdout), &result) - - // accounting overlaps with billing_db.invoices effective purposes {billing, accounting} - violations := result["violations"].([]interface{}) - if len(violations) != 0 { - t.Errorf("expected 0 violations (collection purpose overlap), got %d", len(violations)) - } -} - -func TestCLI_Purpose_FromFile(t *testing.T) { - input := `{ - "consumer": {"consumer_id": "c1", "consumer_name": "Test", "purpose_keys": ["analytics"]}, - "datasets": {"db1": {"dataset_key": "db1", "purpose_keys": ["analytics"]}} - }` - - stdout, _, exitCode := runCLIWithFile(t, "purpose", input) - if exitCode != 0 { - t.Fatalf("expected exit 0, got %d", exitCode) - } - - var result map[string]interface{} - if err := json.Unmarshal([]byte(stdout), &result); err != nil { - t.Fatalf("invalid JSON: %v", err) - } - - violations := result["violations"].([]interface{}) - if len(violations) != 0 { - t.Errorf("expected 0 violations, got %d", len(violations)) - } -} - -func TestCLI_Purpose_MultipleDatasets(t *testing.T) { - input := `{ - "consumer": {"consumer_id": "c1", "consumer_name": "Analyst", "purpose_keys": ["analytics"]}, - "datasets": { - "analytics_db": {"dataset_key": "analytics_db", "purpose_keys": ["analytics"]}, - "billing_db": {"dataset_key": "billing_db", "purpose_keys": ["billing"]}, - "empty_db": {"dataset_key": "empty_db", "purpose_keys": []} - } - }` - - stdout, _, exitCode := runCLI(t, "purpose", input) - if exitCode != 0 { - t.Fatalf("expected exit 0, got %d", exitCode) - } - - var result map[string]interface{} - json.Unmarshal([]byte(stdout), &result) - - if result["total_accesses"].(float64) != 3 { - t.Errorf("expected 3 total accesses, got %v", result["total_accesses"]) - } - - violations := result["violations"].([]interface{}) - gaps := result["gaps"].([]interface{}) - if len(violations) != 1 { - t.Errorf("expected 1 violation (billing_db), got %d", len(violations)) - } - if len(gaps) != 1 { - t.Errorf("expected 1 gap (empty_db), got %d", len(gaps)) - } -} - -// ── policies command tests ─────────────────────────────────────────── - -func TestCLI_Policies_Allow(t *testing.T) { - input := `{ - "policies": [ - { - "id": "p1", "key": "allow-marketing", "priority": 100, "enabled": true, - "decision": "ALLOW", - "match": {"data_use": {"any": ["marketing"]}} - } - ], - "request": { - "consumer_id": "c1", "consumer_name": "Marketing", - "data_uses": ["marketing.advertising"] - } - }` - - stdout, _, exitCode := runCLI(t, "policies", input) - if exitCode != 0 { - t.Fatalf("expected exit 0, got %d", exitCode) - } - - var result map[string]interface{} - json.Unmarshal([]byte(stdout), &result) - - if result["decision"] != "ALLOW" { - t.Errorf("expected ALLOW, got %s", result["decision"]) - } - if result["decisive_policy_key"] != "allow-marketing" { - t.Errorf("expected decisive key 'allow-marketing', got %v", result["decisive_policy_key"]) - } -} - -func TestCLI_Policies_Deny(t *testing.T) { - input := `{ - "policies": [ - { - "id": "p1", "key": "deny-all", "priority": 0, "enabled": true, - "decision": "DENY", - "match": {}, - "action": {"message": "Access denied by default"} - } - ], - "request": { - "consumer_id": "c1", "consumer_name": "Anyone", - "data_uses": ["essential"] - } - }` - - stdout, _, exitCode := runCLI(t, "policies", input) - if exitCode != 0 { - t.Fatalf("expected exit 0, got %d", exitCode) - } - - var result map[string]interface{} - json.Unmarshal([]byte(stdout), &result) - - if result["decision"] != "DENY" { - t.Errorf("expected DENY, got %s", result["decision"]) - } - action := result["action"].(map[string]interface{}) - if action["message"] != "Access denied by default" { - t.Errorf("expected action message") - } -} - -func TestCLI_Policies_NoDecision(t *testing.T) { - input := `{ - "policies": [ - { - "id": "p1", "key": "deny-financial", "priority": 100, "enabled": true, - "decision": "DENY", - "match": {"data_category": {"any": ["user.financial"]}} - } - ], - "request": { - "consumer_id": "c1", "consumer_name": "Test", - "data_categories": ["system.operations"] - } - }` - - stdout, _, exitCode := runCLI(t, "policies", input) - if exitCode != 0 { - t.Fatalf("expected exit 0, got %d", exitCode) - } - - var result map[string]interface{} - json.Unmarshal([]byte(stdout), &result) - - if result["decision"] != "NO_DECISION" { - t.Errorf("expected NO_DECISION, got %s", result["decision"]) - } -} - -func TestCLI_Policies_UnlessInverts(t *testing.T) { - input := `{ - "policies": [ - { - "id": "p1", "key": "allow-unless-optout", "priority": 100, "enabled": true, - "decision": "ALLOW", - "match": {"data_use": {"any": ["marketing"]}}, - "unless": [ - {"type": "consent", "privacy_notice_key": "do_not_sell", "requirement": "opt_out"} - ], - "action": {"message": "User opted out"} - } - ], - "request": { - "consumer_id": "c1", "consumer_name": "Marketing", - "data_uses": ["marketing.advertising"], - "context": { - "consent": {"do_not_sell": "opt_out"} - } - } - }` - - stdout, _, exitCode := runCLI(t, "policies", input) - if exitCode != 0 { - t.Fatalf("expected exit 0, got %d", exitCode) - } - - var result map[string]interface{} - json.Unmarshal([]byte(stdout), &result) - - if result["decision"] != "DENY" { - t.Errorf("expected DENY (inverted ALLOW), got %s", result["decision"]) - } - if result["unless_triggered"] != true { - t.Error("expected unless_triggered=true") - } -} - -func TestCLI_Policies_PriorityOrdering(t *testing.T) { - input := `{ - "policies": [ - { - "id": "p1", "key": "low-allow", "priority": 10, "enabled": true, - "decision": "ALLOW", - "match": {} - }, - { - "id": "p2", "key": "high-deny", "priority": 200, "enabled": true, - "decision": "DENY", - "match": {}, - "action": {"message": "Highest priority wins"} - } - ], - "request": { - "consumer_id": "c1", "consumer_name": "Test", - "data_uses": ["marketing"] - } - }` - - stdout, _, exitCode := runCLI(t, "policies", input) - if exitCode != 0 { - t.Fatalf("expected exit 0, got %d", exitCode) - } - - var result map[string]interface{} - json.Unmarshal([]byte(stdout), &result) - - if result["decision"] != "DENY" { - t.Errorf("expected DENY (high priority), got %s", result["decision"]) - } - if result["decisive_policy_key"] != "high-deny" { - t.Errorf("expected decisive key 'high-deny', got %v", result["decisive_policy_key"]) - } -} - -func TestCLI_Policies_FromFile(t *testing.T) { - input := `{ - "policies": [], - "request": {"consumer_id": "c1", "consumer_name": "Test"} - }` - - stdout, _, exitCode := runCLIWithFile(t, "policies", input) - if exitCode != 0 { - t.Fatalf("expected exit 0, got %d", exitCode) - } - - var result map[string]interface{} - json.Unmarshal([]byte(stdout), &result) - - if result["decision"] != "NO_DECISION" { - t.Errorf("expected NO_DECISION, got %s", result["decision"]) - } -} - -// ── Error handling tests ───────────────────────────────────────────── - -func TestCLI_NoArgs_ExitsNonZero(t *testing.T) { - cmd := exec.Command(binaryPath) - err := cmd.Run() - if err == nil { - t.Error("expected non-zero exit with no args") - } -} - -func TestCLI_UnknownCommand_ExitsNonZero(t *testing.T) { - _, stderr, exitCode := runCLI(t, "bogus", "{}") - if exitCode == 0 { - t.Error("expected non-zero exit for unknown command") - } - if !bytes.Contains([]byte(stderr), []byte("Unknown command")) { - t.Errorf("expected 'Unknown command' in stderr, got: %s", stderr) - } -} - -func TestCLI_InvalidJSON_ExitsNonZero(t *testing.T) { - _, stderr, exitCode := runCLI(t, "purpose", "not valid json") - if exitCode == 0 { - t.Error("expected non-zero exit for invalid JSON") - } - if !bytes.Contains([]byte(stderr), []byte("Error parsing JSON")) { - t.Errorf("expected parse error in stderr, got: %s", stderr) - } -} - -func TestCLI_MissingFile_ExitsNonZero(t *testing.T) { - cmd := exec.Command(binaryPath, "purpose", "/tmp/nonexistent-file-fides-test.json") - err := cmd.Run() - if err == nil { - t.Error("expected non-zero exit for missing file") - } -} - -func TestCLI_EmptyInput_Purpose(t *testing.T) { - // Empty JSON object — consumer and datasets will have zero values - stdout, _, exitCode := runCLI(t, "purpose", "{}") - if exitCode != 0 { - t.Fatalf("expected exit 0, got %d", exitCode) - } - - var result map[string]interface{} - json.Unmarshal([]byte(stdout), &result) - - // Empty datasets → 0 accesses, no violations, no gaps - if result["total_accesses"].(float64) != 0 { - t.Errorf("expected 0 accesses, got %v", result["total_accesses"]) - } -} - -func TestCLI_OutputIsValidJSON(t *testing.T) { - input := `{ - "consumer": {"consumer_id": "c1", "consumer_name": "Test", "purpose_keys": ["x"]}, - "datasets": {"d1": {"dataset_key": "d1", "purpose_keys": ["y"]}} - }` - - stdout, _, exitCode := runCLI(t, "purpose", input) - if exitCode != 0 { - t.Fatalf("expected exit 0, got %d", exitCode) - } - - // Verify it's valid JSON that can be round-tripped - var parsed interface{} - if err := json.Unmarshal([]byte(stdout), &parsed); err != nil { - t.Errorf("output is not valid JSON: %v\noutput: %s", err, stdout) - } -} diff --git a/src/fides/cli/__init__.py b/src/fides/cli/__init__.py index 3f09532fed1..1bfab20e598 100644 --- a/src/fides/cli/__init__.py +++ b/src/fides/cli/__init__.py @@ -19,6 +19,7 @@ from .commands.db import database from .commands.deploy import deploy from .commands.generate import generate +from .commands.pbac import pbac from .commands.pull import pull from .commands.scan import scan from .commands.ungrouped import ( @@ -38,7 +39,7 @@ from .exceptions import LocalModeException CONTEXT_SETTINGS = {"help_option_names": ["-h", "--help"]} -LOCAL_COMMANDS = [deploy, evaluate, generate, init, scan, parse, view, webserver] +LOCAL_COMMANDS = [deploy, evaluate, generate, init, pbac, scan, parse, view, webserver] LOCAL_COMMAND_NAMES = {command.name for command in LOCAL_COMMANDS} API_COMMANDS = [ annotate, diff --git a/src/fides/cli/commands/pbac.py b/src/fides/cli/commands/pbac.py new file mode 100644 index 00000000000..c607094a3fe --- /dev/null +++ b/src/fides/cli/commands/pbac.py @@ -0,0 +1,161 @@ +"""PBAC CLI commands for the Fides privacy engineering platform. + +Exposes the PBAC evaluation engine via the command line: + fides pbac evaluate-purpose — Check consumer/dataset purpose overlap + fides pbac evaluate-policies — Run access policies against a violation +""" + +from __future__ import annotations + +import json +import sys + +import rich_click as click + +from fides.service.pbac.evaluate import evaluate_purpose +from fides.service.pbac.types import ConsumerPurposes, DatasetPurposes + + +@click.group(name="pbac") +@click.pass_context +def pbac(ctx: click.Context) -> None: + """ + Policy-Based Access Control evaluation commands. + """ + + +@pbac.command(name="evaluate-purpose") +@click.argument("input_file", type=click.File("r"), default="-") +def evaluate_purpose_cmd(input_file: click.utils.LazyFile) -> None: + """Evaluate purpose overlap between a consumer and datasets. + + Reads JSON from INPUT_FILE (or stdin if omitted). + + \b + Expected JSON schema: + { + "consumer": { + "consumer_id": "...", + "consumer_name": "...", + "purpose_keys": ["billing", "analytics"] + }, + "datasets": { + "dataset_key": { + "dataset_key": "...", + "purpose_keys": ["billing"], + "collection_purposes": {"collection_name": ["purpose1"]} + } + }, + "collections": {"dataset_key": ["collection1", "collection2"]} + } + """ + try: + data = json.load(input_file) + except json.JSONDecodeError as e: + click.echo(f"Error parsing JSON: {e}", err=True) + sys.exit(1) + + consumer_data = data.get("consumer", {}) + consumer = ConsumerPurposes( + consumer_id=consumer_data.get("consumer_id", ""), + consumer_name=consumer_data.get("consumer_name", ""), + purpose_keys=frozenset(consumer_data.get("purpose_keys", [])), + ) + + datasets: dict[str, DatasetPurposes] = {} + for key, ds_data in data.get("datasets", {}).items(): + collection_purposes = { + col: frozenset(purposes) + for col, purposes in ds_data.get("collection_purposes", {}).items() + } + datasets[key] = DatasetPurposes( + dataset_key=ds_data.get("dataset_key", key), + purpose_keys=frozenset(ds_data.get("purpose_keys", [])), + collection_purposes=collection_purposes, + ) + + collections_raw = data.get("collections", {}) + collections: dict[str, tuple[str, ...]] | None = ( + {k: tuple(v) for k, v in collections_raw.items()} if collections_raw else None + ) + + result = evaluate_purpose(consumer, datasets, collections=collections) + + output = { + "violations": [ + { + "consumer_id": v.consumer_id, + "consumer_name": v.consumer_name, + "dataset_key": v.dataset_key, + "collection": v.collection, + "consumer_purposes": sorted(v.consumer_purposes), + "dataset_purposes": sorted(v.dataset_purposes), + "reason": v.reason, + } + for v in result.violations + ], + "gaps": [ + { + "gap_type": g.gap_type.value, + "identifier": g.identifier, + "dataset_key": g.dataset_key, + "reason": g.reason, + } + for g in result.gaps + ], + "total_accesses": result.total_accesses, + } + + click.echo(json.dumps(output, indent=2)) + + +@pbac.command(name="evaluate-policies") +@click.argument("input_file", type=click.File("r"), default="-") +def evaluate_policies_cmd(input_file: click.utils.LazyFile) -> None: + """Evaluate access policies against a PBAC violation. + + Reads JSON from INPUT_FILE (or stdin if omitted). + + \b + Expected JSON schema: + { + "policies": [ + { + "key": "allow-marketing", + "priority": 100, + "enabled": true, + "decision": "ALLOW", + "match": {"data_use": {"any": ["marketing"]}}, + "unless": [{"type": "consent", "privacy_notice_key": "...", "requirement": "opt_out"}], + "action": {"message": "..."} + } + ], + "request": { + "consumer_id": "...", + "consumer_name": "...", + "data_uses": ["marketing.advertising"], + "data_categories": ["user.contact.email"], + "data_subjects": ["customer"], + "context": {"consent": {"do_not_sell": "opt_out"}} + } + } + + \b + This is the same evaluation the Go sidecar performs at API speed. + The CLI runs it through Python for convenience — use the sidecar + for production throughput. + """ + try: + data = json.load(input_file) + except json.JSONDecodeError as e: + click.echo(f"Error parsing JSON: {e}", err=True) + sys.exit(1) + + from fides.service.pbac.policies.evaluate import evaluate_access_policies + + policies = data.get("policies", []) + request = data.get("request", {}) + + result = evaluate_access_policies(policies, request) + + click.echo(json.dumps(result, indent=2)) diff --git a/src/fides/service/pbac/policies/evaluate.py b/src/fides/service/pbac/policies/evaluate.py new file mode 100644 index 00000000000..70f2e802288 --- /dev/null +++ b/src/fides/service/pbac/policies/evaluate.py @@ -0,0 +1,204 @@ +"""Access Policy v2 evaluation engine — Python implementation. + +Mirrors the Go implementation in policy-engine/pkg/pbac/policy_evaluate.go. +Used by the CLI (fides pbac evaluate-policies) and as the reference +implementation. The Go sidecar is the production path for API throughput. + +Algorithm (from IMPLEMENTATION_GUIDE.md): + 1. Sort enabled policies by priority (highest first) + 2. For each policy, check if match block applies + 3. If matched, evaluate unless conditions + 4. ALLOW + unless triggered → DENY (decisive, stop) + 5. DENY + unless triggered → SUPPRESSED (continue) + 6. Decision stands as-is → decisive, stop + 7. No policy matched → NO_DECISION +""" + +from __future__ import annotations + +from typing import Any + + +def evaluate_access_policies( + policies: list[dict[str, Any]], + request: dict[str, Any], +) -> dict[str, Any]: + """Evaluate a list of access policies against a request. + + Takes and returns plain dicts for easy JSON round-tripping from the CLI. + """ + enabled = [p for p in policies if p.get("enabled", True)] + enabled.sort(key=lambda p: p.get("priority", 0), reverse=True) + + evaluated: list[dict[str, Any]] = [] + + for policy in enabled: + if not _matches_request(policy.get("match", {}), request): + continue + + unless_triggered = _evaluate_unless(policy.get("unless", []), request) + decision = policy.get("decision", "DENY") + + if unless_triggered: + if decision == "ALLOW": + return { + "decision": "DENY", + "decisive_policy_key": policy.get("key"), + "decisive_policy_priority": policy.get("priority"), + "unless_triggered": True, + "action": policy.get("action"), + "evaluated_policies": evaluated, + } + # DENY suppressed + evaluated.append({ + "policy_key": policy.get("key"), + "priority": policy.get("priority"), + "matched": True, + "result": "SUPPRESSED", + "unless_triggered": True, + }) + continue + + # Decision stands + action = policy.get("action") if decision == "DENY" else None + return { + "decision": decision, + "decisive_policy_key": policy.get("key"), + "decisive_policy_priority": policy.get("priority"), + "unless_triggered": False, + "action": action, + "evaluated_policies": evaluated, + } + + return { + "decision": "NO_DECISION", + "evaluated_policies": evaluated, + } + + +def _matches_request(match: dict[str, Any], request: dict[str, Any]) -> bool: + """Check if a policy's match block applies to the request.""" + for dimension, field in [ + ("data_use", "data_uses"), + ("data_category", "data_categories"), + ("data_subject", "data_subjects"), + ]: + dim = match.get(dimension) + if dim is not None: + values = request.get(field, []) + if not _matches_dimension(dim, values): + return False + return True + + +def _matches_dimension(dim: dict[str, Any], request_values: list[str]) -> bool: + """Check if request values satisfy a match dimension's any/all operators.""" + any_values = dim.get("any", []) + if any_values: + if not any(_taxonomy_matches_any(mv, request_values) for mv in any_values): + return False + + all_values = dim.get("all", []) + if all_values: + if not all(_taxonomy_matches_any(mv, request_values) for mv in all_values): + return False + + return True + + +def _taxonomy_matches_any(match_key: str, request_values: list[str]) -> bool: + """Check if a taxonomy key matches any request value via prefix matching.""" + return any(_taxonomy_match(match_key, rv) for rv in request_values) + + +def _taxonomy_match(match_key: str, request_value: str) -> bool: + """Check if match_key equals or is a parent of request_value. + + "user.contact" matches "user.contact.email" (prefix + dot boundary). + "user" does NOT match "user_data". + """ + if match_key == request_value: + return True + return request_value.startswith(match_key + ".") + + +def _evaluate_unless( + constraints: list[dict[str, Any]], request: dict[str, Any] +) -> bool: + """All constraints must trigger (AND logic) for the unless to fire.""" + if not constraints: + return False + return all(_evaluate_constraint(c, request) for c in constraints) + + +def _evaluate_constraint(constraint: dict[str, Any], request: dict[str, Any]) -> bool: + """Evaluate a single unless condition.""" + ctype = constraint.get("type") + context = request.get("context", {}) + + if ctype == "consent": + return _eval_consent(constraint, context) + if ctype == "geo_location": + return _eval_geo(constraint, context) + if ctype == "data_flow": + return _eval_data_flow(constraint, context) + return False + + +def _eval_consent(constraint: dict[str, Any], context: dict[str, Any]) -> bool: + consent_map = context.get("consent", {}) + status = consent_map.get(constraint.get("privacy_notice_key")) + if status is None: + return False + + requirement = constraint.get("requirement") + if requirement == "opt_in": + return status == "opt_in" + if requirement == "opt_out": + return status == "opt_out" + if requirement == "not_opt_in": + return status != "opt_in" + if requirement == "not_opt_out": + return status != "opt_out" + return False + + +def _eval_geo(constraint: dict[str, Any], context: dict[str, Any]) -> bool: + field_path = constraint.get("field", "") + value = _resolve_field(context, field_path) + if value is None: + return False + + values_set = set(constraint.get("values", [])) + operator = constraint.get("operator") + + if operator == "in": + return value in values_set + if operator == "not_in": + return value not in values_set + return False + + +def _eval_data_flow(constraint: dict[str, Any], context: dict[str, Any]) -> bool: + flows_map = context.get("data_flows", {}) + direction_flows = flows_map.get(constraint.get("direction"), []) + system_set = set(direction_flows) + + operator = constraint.get("operator") + systems = constraint.get("systems", []) + + if operator == "any_of": + return any(s in system_set for s in systems) + if operator == "none_of": + return all(s not in system_set for s in systems) + return False + + +def _resolve_field(context: dict[str, Any], field_path: str) -> str | None: + """Traverse a dotted path in the context dict.""" + current: Any = context + for part in field_path.split("."): + if not isinstance(current, dict): + return None + current = current.get(part) + return current if isinstance(current, str) else None diff --git a/tests/service/pbac/policies/test_evaluate_access_policies.py b/tests/service/pbac/policies/test_evaluate_access_policies.py new file mode 100644 index 00000000000..8d4d851bbcd --- /dev/null +++ b/tests/service/pbac/policies/test_evaluate_access_policies.py @@ -0,0 +1,391 @@ +"""Tests for the Access Policy v2 evaluation engine (Python implementation). + +These mirror the Go tests in policy-engine/pkg/pbac/policy_evaluate_test.go +to ensure both implementations produce identical results. +""" + +import pytest + +from fides.service.pbac.policies.evaluate import evaluate_access_policies + + +class TestPriorityOrdering: + def test_highest_priority_wins(self): + policies = [ + { + "key": "low-allow", + "priority": 10, + "enabled": True, + "decision": "ALLOW", + "match": {}, + }, + { + "key": "high-deny", + "priority": 200, + "enabled": True, + "decision": "DENY", + "match": {}, + "action": {"message": "Highest priority wins"}, + }, + ] + request = {"data_uses": ["marketing"]} + + result = evaluate_access_policies(policies, request) + + assert result["decision"] == "DENY" + assert result["decisive_policy_key"] == "high-deny" + + def test_allow_when_matched(self): + policies = [ + { + "key": "deny-financial", + "priority": 200, + "enabled": True, + "decision": "DENY", + "match": {"data_category": {"any": ["user.financial"]}}, + }, + { + "key": "allow-marketing", + "priority": 100, + "enabled": True, + "decision": "ALLOW", + "match": {"data_use": {"any": ["marketing"]}}, + }, + ] + request = { + "data_uses": ["marketing.advertising"], + "data_categories": ["user.contact.email"], + } + + result = evaluate_access_policies(policies, request) + + assert result["decision"] == "ALLOW" + assert result["decisive_policy_key"] == "allow-marketing" + + def test_catch_all_deny(self): + policies = [ + { + "key": "catch-all", + "priority": 0, + "enabled": True, + "decision": "DENY", + "match": {}, + "action": {"message": "Default deny"}, + }, + ] + + result = evaluate_access_policies(policies, {"data_uses": ["essential"]}) + + assert result["decision"] == "DENY" + assert result["decisive_policy_key"] == "catch-all" + assert result["action"]["message"] == "Default deny" + + +class TestNoDecision: + def test_empty_policies(self): + result = evaluate_access_policies([], {}) + assert result["decision"] == "NO_DECISION" + + def test_disabled_policies_skipped(self): + policies = [ + {"key": "disabled", "priority": 100, "enabled": False, "decision": "DENY", "match": {}}, + ] + result = evaluate_access_policies(policies, {}) + assert result["decision"] == "NO_DECISION" + + def test_no_match(self): + policies = [ + { + "key": "deny-financial", + "priority": 100, + "enabled": True, + "decision": "DENY", + "match": {"data_category": {"any": ["user.financial"]}}, + }, + ] + result = evaluate_access_policies( + policies, {"data_categories": ["system.operations"]} + ) + assert result["decision"] == "NO_DECISION" + + +class TestTaxonomyMatching: + def test_parent_matches_child(self): + policies = [ + { + "key": "deny-user", + "priority": 100, + "enabled": True, + "decision": "DENY", + "match": {"data_category": {"any": ["user"]}}, + }, + ] + result = evaluate_access_policies( + policies, {"data_categories": ["user.contact.email"]} + ) + assert result["decision"] == "DENY" + + def test_child_does_not_match_parent(self): + policies = [ + { + "key": "deny-child", + "priority": 100, + "enabled": True, + "decision": "DENY", + "match": {"data_category": {"any": ["user.contact.email"]}}, + }, + ] + result = evaluate_access_policies( + policies, {"data_categories": ["user.contact"]} + ) + assert result["decision"] == "NO_DECISION" + + def test_no_dot_boundary_false_positive(self): + policies = [ + { + "key": "deny-user", + "priority": 100, + "enabled": True, + "decision": "DENY", + "match": {"data_category": {"any": ["user"]}}, + }, + ] + result = evaluate_access_policies( + policies, {"data_categories": ["user_data"]} + ) + assert result["decision"] == "NO_DECISION" + + def test_match_all_requires_every_value(self): + policies = [ + { + "key": "require-both", + "priority": 100, + "enabled": True, + "decision": "DENY", + "match": { + "data_category": { + "all": ["user.contact", "user.financial"], + } + }, + }, + ] + + # Only one → no match + result1 = evaluate_access_policies( + policies, {"data_categories": ["user.contact.email"]} + ) + assert result1["decision"] == "NO_DECISION" + + # Both → match + result2 = evaluate_access_policies( + policies, + {"data_categories": ["user.contact.email", "user.financial.bank_account"]}, + ) + assert result2["decision"] == "DENY" + + +class TestUnlessConsent: + def test_opt_out_inverts_allow(self): + policies = [ + { + "key": "allow-unless-optout", + "priority": 100, + "enabled": True, + "decision": "ALLOW", + "match": {"data_use": {"any": ["marketing"]}}, + "unless": [ + { + "type": "consent", + "privacy_notice_key": "do_not_sell", + "requirement": "opt_out", + } + ], + "action": {"message": "User opted out"}, + }, + ] + request = { + "data_uses": ["marketing.advertising"], + "context": {"consent": {"do_not_sell": "opt_out"}}, + } + + result = evaluate_access_policies(policies, request) + + assert result["decision"] == "DENY" + assert result["unless_triggered"] is True + + def test_consent_not_triggered_allow_stands(self): + policies = [ + { + "key": "allow-unless-optout", + "priority": 100, + "enabled": True, + "decision": "ALLOW", + "match": {"data_use": {"any": ["marketing"]}}, + "unless": [ + { + "type": "consent", + "privacy_notice_key": "do_not_sell", + "requirement": "opt_out", + } + ], + }, + ] + request = { + "data_uses": ["marketing.advertising"], + "context": {"consent": {"do_not_sell": "opt_in"}}, + } + + result = evaluate_access_policies(policies, request) + assert result["decision"] == "ALLOW" + assert result["unless_triggered"] is False + + +class TestUnlessGeo: + def test_deny_suppressed_continues_to_next(self): + policies = [ + { + "key": "deny-unless-geo", + "priority": 200, + "enabled": True, + "decision": "DENY", + "match": {}, + "unless": [ + { + "type": "geo_location", + "field": "environment.geo_location", + "operator": "in", + "values": ["US-CA"], + } + ], + }, + { + "key": "fallback-allow", + "priority": 100, + "enabled": True, + "decision": "ALLOW", + "match": {}, + }, + ] + request = { + "context": {"environment": {"geo_location": "US-CA"}}, + } + + result = evaluate_access_policies(policies, request) + + assert result["decision"] == "ALLOW" + assert result["decisive_policy_key"] == "fallback-allow" + assert len(result["evaluated_policies"]) == 1 + assert result["evaluated_policies"][0]["result"] == "SUPPRESSED" + + +class TestUnlessDataFlow: + def test_egress_triggers_unless(self): + policies = [ + { + "key": "allow-unless-egress", + "priority": 100, + "enabled": True, + "decision": "ALLOW", + "match": {}, + "unless": [ + { + "type": "data_flow", + "direction": "egress", + "operator": "any_of", + "systems": ["external_vendor"], + } + ], + }, + ] + request = { + "context": { + "data_flows": {"egress": ["external_vendor", "partner_api"]}, + }, + } + + result = evaluate_access_policies(policies, request) + assert result["decision"] == "DENY" + + +class TestUnlessMultipleConstraints: + def test_all_must_trigger(self): + policies = [ + { + "key": "allow-unless-both", + "priority": 100, + "enabled": True, + "decision": "ALLOW", + "match": {}, + "unless": [ + { + "type": "consent", + "privacy_notice_key": "do_not_sell", + "requirement": "opt_out", + }, + { + "type": "geo_location", + "field": "environment.geo_location", + "operator": "in", + "values": ["US-CA"], + }, + ], + }, + ] + + # Only consent triggers → ALLOW stands + result1 = evaluate_access_policies(policies, { + "context": { + "consent": {"do_not_sell": "opt_out"}, + "environment": {"geo_location": "US-NY"}, + }, + }) + assert result1["decision"] == "ALLOW" + + # Both trigger → DENY + result2 = evaluate_access_policies(policies, { + "context": { + "consent": {"do_not_sell": "opt_out"}, + "environment": {"geo_location": "US-CA"}, + }, + }) + assert result2["decision"] == "DENY" + + +class TestEdgeCases: + def test_no_context_unless_does_not_trigger(self): + policies = [ + { + "key": "allow-unless", + "priority": 100, + "enabled": True, + "decision": "ALLOW", + "match": {}, + "unless": [ + {"type": "consent", "privacy_notice_key": "x", "requirement": "opt_out"} + ], + }, + ] + result = evaluate_access_policies(policies, {}) + assert result["decision"] == "ALLOW" + + def test_deny_action_only_on_deny(self): + policies = [ + { + "key": "allow-with-action", + "priority": 100, + "enabled": True, + "decision": "ALLOW", + "match": {}, + "action": {"message": "should not appear"}, + }, + ] + result = evaluate_access_policies(policies, {}) + assert result["decision"] == "ALLOW" + assert result.get("action") is None + + def test_empty_match_catches_everything(self): + policies = [ + {"key": "catch-all", "priority": 1, "enabled": True, "decision": "DENY", "match": {}}, + ] + result = evaluate_access_policies(policies, {}) + assert result["decision"] == "DENY" diff --git a/tests/service/pbac/policies/test_pbac_cli.py b/tests/service/pbac/policies/test_pbac_cli.py new file mode 100644 index 00000000000..23bcdcae230 --- /dev/null +++ b/tests/service/pbac/policies/test_pbac_cli.py @@ -0,0 +1,301 @@ +"""Tests for the fides pbac CLI commands. + +Uses Click's CliRunner to invoke the commands directly without +needing a running server or Go sidecar. +""" + +import json +import os +import tempfile + +import pytest +from click.testing import CliRunner + +from fides.cli.commands.pbac import pbac + + +@pytest.fixture +def runner(): + return CliRunner() + + +class TestEvaluatePurposeCLI: + def test_compliant_via_stdin(self, runner): + input_json = json.dumps({ + "consumer": { + "consumer_id": "c1", + "consumer_name": "Billing", + "purpose_keys": ["billing"], + }, + "datasets": { + "billing_db": { + "dataset_key": "billing_db", + "purpose_keys": ["billing"], + }, + }, + }) + + result = runner.invoke(pbac, ["evaluate-purpose"], input=input_json) + + assert result.exit_code == 0 + output = json.loads(result.output) + assert output["violations"] == [] + assert output["total_accesses"] == 1 + + def test_violation_via_stdin(self, runner): + input_json = json.dumps({ + "consumer": { + "consumer_id": "c1", + "consumer_name": "Analytics", + "purpose_keys": ["analytics"], + }, + "datasets": { + "billing_db": { + "dataset_key": "billing_db", + "purpose_keys": ["billing"], + }, + }, + }) + + result = runner.invoke(pbac, ["evaluate-purpose"], input=input_json) + + assert result.exit_code == 0 + output = json.loads(result.output) + assert len(output["violations"]) == 1 + assert output["violations"][0]["dataset_key"] == "billing_db" + + def test_gap_no_consumer_purposes(self, runner): + input_json = json.dumps({ + "consumer": { + "consumer_id": "c1", + "consumer_name": "Unknown", + "purpose_keys": [], + }, + "datasets": { + "db1": {"dataset_key": "db1", "purpose_keys": ["billing"]}, + }, + }) + + result = runner.invoke(pbac, ["evaluate-purpose"], input=input_json) + + assert result.exit_code == 0 + output = json.loads(result.output) + assert len(output["gaps"]) == 1 + assert output["gaps"][0]["gap_type"] == "unresolved_identity" + + def test_with_collections(self, runner): + input_json = json.dumps({ + "consumer": { + "consumer_id": "c1", + "consumer_name": "Accountant", + "purpose_keys": ["accounting"], + }, + "datasets": { + "billing_db": { + "dataset_key": "billing_db", + "purpose_keys": ["billing"], + "collection_purposes": {"invoices": ["accounting"]}, + }, + }, + "collections": {"billing_db": ["invoices"]}, + }) + + result = runner.invoke(pbac, ["evaluate-purpose"], input=input_json) + + assert result.exit_code == 0 + output = json.loads(result.output) + assert output["violations"] == [] + + def test_from_file(self, runner): + data = { + "consumer": { + "consumer_id": "c1", + "consumer_name": "Test", + "purpose_keys": ["analytics"], + }, + "datasets": { + "db1": {"dataset_key": "db1", "purpose_keys": ["analytics"]}, + }, + } + + with tempfile.NamedTemporaryFile( + mode="w", suffix=".json", delete=False + ) as f: + json.dump(data, f) + f.flush() + tmppath = f.name + + try: + result = runner.invoke(pbac, ["evaluate-purpose", tmppath]) + assert result.exit_code == 0 + output = json.loads(result.output) + assert output["violations"] == [] + finally: + os.unlink(tmppath) + + def test_multiple_datasets(self, runner): + input_json = json.dumps({ + "consumer": { + "consumer_id": "c1", + "consumer_name": "Analyst", + "purpose_keys": ["analytics"], + }, + "datasets": { + "analytics_db": { + "dataset_key": "analytics_db", + "purpose_keys": ["analytics"], + }, + "billing_db": { + "dataset_key": "billing_db", + "purpose_keys": ["billing"], + }, + "empty_db": {"dataset_key": "empty_db", "purpose_keys": []}, + }, + }) + + result = runner.invoke(pbac, ["evaluate-purpose"], input=input_json) + + assert result.exit_code == 0 + output = json.loads(result.output) + assert output["total_accesses"] == 3 + assert len(output["violations"]) == 1 + assert len(output["gaps"]) == 1 + + def test_invalid_json(self, runner): + result = runner.invoke(pbac, ["evaluate-purpose"], input="not json") + assert result.exit_code != 0 + + def test_output_is_valid_json(self, runner): + input_json = json.dumps({ + "consumer": {"consumer_id": "c1", "consumer_name": "T", "purpose_keys": ["x"]}, + "datasets": {"d1": {"dataset_key": "d1", "purpose_keys": ["y"]}}, + }) + result = runner.invoke(pbac, ["evaluate-purpose"], input=input_json) + assert result.exit_code == 0 + json.loads(result.output) # should not raise + + +class TestEvaluatePoliciesCLI: + def test_allow(self, runner): + input_json = json.dumps({ + "policies": [ + { + "key": "allow-marketing", + "priority": 100, + "enabled": True, + "decision": "ALLOW", + "match": {"data_use": {"any": ["marketing"]}}, + }, + ], + "request": { + "consumer_id": "c1", + "consumer_name": "Marketing", + "data_uses": ["marketing.advertising"], + }, + }) + + result = runner.invoke(pbac, ["evaluate-policies"], input=input_json) + + assert result.exit_code == 0 + output = json.loads(result.output) + assert output["decision"] == "ALLOW" + assert output["decisive_policy_key"] == "allow-marketing" + + def test_deny_with_action(self, runner): + input_json = json.dumps({ + "policies": [ + { + "key": "deny-all", + "priority": 0, + "enabled": True, + "decision": "DENY", + "match": {}, + "action": {"message": "Access denied"}, + }, + ], + "request": {"consumer_id": "c1", "consumer_name": "Anyone"}, + }) + + result = runner.invoke(pbac, ["evaluate-policies"], input=input_json) + + assert result.exit_code == 0 + output = json.loads(result.output) + assert output["decision"] == "DENY" + assert output["action"]["message"] == "Access denied" + + def test_no_decision(self, runner): + input_json = json.dumps({ + "policies": [], + "request": {"consumer_id": "c1", "consumer_name": "Test"}, + }) + + result = runner.invoke(pbac, ["evaluate-policies"], input=input_json) + + assert result.exit_code == 0 + output = json.loads(result.output) + assert output["decision"] == "NO_DECISION" + + def test_unless_inverts(self, runner): + input_json = json.dumps({ + "policies": [ + { + "key": "allow-unless-optout", + "priority": 100, + "enabled": True, + "decision": "ALLOW", + "match": {"data_use": {"any": ["marketing"]}}, + "unless": [ + { + "type": "consent", + "privacy_notice_key": "do_not_sell", + "requirement": "opt_out", + } + ], + }, + ], + "request": { + "data_uses": ["marketing.advertising"], + "context": {"consent": {"do_not_sell": "opt_out"}}, + }, + }) + + result = runner.invoke(pbac, ["evaluate-policies"], input=input_json) + + assert result.exit_code == 0 + output = json.loads(result.output) + assert output["decision"] == "DENY" + assert output["unless_triggered"] is True + + def test_priority_ordering(self, runner): + input_json = json.dumps({ + "policies": [ + {"key": "low-allow", "priority": 10, "enabled": True, "decision": "ALLOW", "match": {}}, + { + "key": "high-deny", + "priority": 200, + "enabled": True, + "decision": "DENY", + "match": {}, + }, + ], + "request": {"data_uses": ["marketing"]}, + }) + + result = runner.invoke(pbac, ["evaluate-policies"], input=input_json) + + assert result.exit_code == 0 + output = json.loads(result.output) + assert output["decision"] == "DENY" + assert output["decisive_policy_key"] == "high-deny" + + def test_invalid_json(self, runner): + result = runner.invoke(pbac, ["evaluate-policies"], input="bad json") + assert result.exit_code != 0 + + +class TestPbacGroupHelp: + def test_help(self, runner): + result = runner.invoke(pbac, ["--help"]) + assert result.exit_code == 0 + assert "evaluate-purpose" in result.output + assert "evaluate-policies" in result.output From 74d86ee2bb1da35c456e3a6071acf9e8a010b685 Mon Sep 17 00:00:00 2001 From: Thabo Fletcher Date: Tue, 14 Apr 2026 14:54:16 -0700 Subject: [PATCH 06/14] Fill test coverage gaps for all PBAC engine rules MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Go library (50 tests) — adds: - data_subject match dimension - three-dimension match (use + category + subject) - consent not_opt_in / not_opt_out requirements - data_flow none_of operator Python evaluation (42 tests) — adds: - data_subject match + three-dimension match - match any+all combined across dimensions - consent not_opt_in / not_opt_out - geo not_in operator - data_flow none_of operator - nested context field resolution 92 automated tests total across Go and Python, covering every rule, match mode, unless condition type, and operator. --- policy-engine/pkg/pbac/edge_cases_test.go | 168 +++++++++++++ .../policies/test_evaluate_access_policies.py | 231 ++++++++++++++++++ 2 files changed, 399 insertions(+) diff --git a/policy-engine/pkg/pbac/edge_cases_test.go b/policy-engine/pkg/pbac/edge_cases_test.go index 3bd520e3a9d..556c1a8f8cd 100644 --- a/policy-engine/pkg/pbac/edge_cases_test.go +++ b/policy-engine/pkg/pbac/edge_cases_test.go @@ -368,3 +368,171 @@ func TestPolicy_ResolveContextField_NonStringValue(t *testing.T) { t.Errorf("expected empty string for non-string value, got '%s'", val) } } + +// ── Missing coverage: data_subject dimension ───────────────────────── + +func TestPolicy_MatchDataSubject(t *testing.T) { + policies := []AccessPolicy{ + { + ID: "p1", Key: "deny-employee-data", Priority: 100, Enabled: true, + Decision: PolicyDeny, + Match: MatchBlock{ + DataSubject: &MatchDimension{Any: []string{"employee"}}, + }, + }, + } + + // Matches + result := EvaluatePolicies(policies, &AccessEvaluationRequest{ + DataSubjects: []string{"employee"}, + }) + if result.Decision != PolicyDeny { + t.Errorf("expected DENY, got %s", result.Decision) + } + + // Doesn't match + result2 := EvaluatePolicies(policies, &AccessEvaluationRequest{ + DataSubjects: []string{"customer"}, + }) + if result2.Decision != PolicyNoDecision { + t.Errorf("expected NO_DECISION, got %s", result2.Decision) + } +} + +func TestPolicy_MatchThreeDimensions(t *testing.T) { + policies := []AccessPolicy{ + { + ID: "p1", Key: "specific-deny", Priority: 100, Enabled: true, + Decision: PolicyDeny, + Match: MatchBlock{ + DataUse: &MatchDimension{Any: []string{"marketing"}}, + DataCategory: &MatchDimension{Any: []string{"user.contact"}}, + DataSubject: &MatchDimension{Any: []string{"customer"}}, + }, + }, + } + + // All three match + result := EvaluatePolicies(policies, &AccessEvaluationRequest{ + DataUses: []string{"marketing.advertising"}, + DataCategories: []string{"user.contact.email"}, + DataSubjects: []string{"customer"}, + }) + if result.Decision != PolicyDeny { + t.Errorf("expected DENY (all dimensions match), got %s", result.Decision) + } + + // Two of three match — subject doesn't + result2 := EvaluatePolicies(policies, &AccessEvaluationRequest{ + DataUses: []string{"marketing.advertising"}, + DataCategories: []string{"user.contact.email"}, + DataSubjects: []string{"employee"}, + }) + if result2.Decision != PolicyNoDecision { + t.Errorf("expected NO_DECISION (subject mismatch), got %s", result2.Decision) + } +} + +// ── Missing coverage: consent not_opt_in / not_opt_out ─────────────── + +func TestUnless_ConsentNotOptIn(t *testing.T) { + policies := []AccessPolicy{ + { + ID: "p1", Key: "allow-unless-not-optin", Priority: 100, Enabled: true, + Decision: PolicyAllow, Match: MatchBlock{}, + Unless: []Constraint{ + {Type: ConstraintConsent, PrivacyNoticeKey: "notice", Requirement: "not_opt_in"}, + }, + }, + } + + // Status is "opt_out" → not_opt_in is true → unless triggers + result := EvaluatePolicies(policies, &AccessEvaluationRequest{ + Context: map[string]interface{}{ + "consent": map[string]interface{}{"notice": "opt_out"}, + }, + }) + if result.Decision != PolicyDeny { + t.Errorf("expected DENY (not_opt_in triggered), got %s", result.Decision) + } + + // Status is "opt_in" → not_opt_in is false → unless doesn't trigger + result2 := EvaluatePolicies(policies, &AccessEvaluationRequest{ + Context: map[string]interface{}{ + "consent": map[string]interface{}{"notice": "opt_in"}, + }, + }) + if result2.Decision != PolicyAllow { + t.Errorf("expected ALLOW (opt_in, not_opt_in is false), got %s", result2.Decision) + } +} + +func TestUnless_ConsentNotOptOut(t *testing.T) { + policies := []AccessPolicy{ + { + ID: "p1", Key: "allow-unless-not-optout", Priority: 100, Enabled: true, + Decision: PolicyAllow, Match: MatchBlock{}, + Unless: []Constraint{ + {Type: ConstraintConsent, PrivacyNoticeKey: "notice", Requirement: "not_opt_out"}, + }, + }, + } + + // Status is "opt_in" → not_opt_out is true → unless triggers + result := EvaluatePolicies(policies, &AccessEvaluationRequest{ + Context: map[string]interface{}{ + "consent": map[string]interface{}{"notice": "opt_in"}, + }, + }) + if result.Decision != PolicyDeny { + t.Errorf("expected DENY (not_opt_out triggered), got %s", result.Decision) + } + + // Status is "opt_out" → not_opt_out is false → unless doesn't trigger + result2 := EvaluatePolicies(policies, &AccessEvaluationRequest{ + Context: map[string]interface{}{ + "consent": map[string]interface{}{"notice": "opt_out"}, + }, + }) + if result2.Decision != PolicyAllow { + t.Errorf("expected ALLOW (opt_out, not_opt_out is false), got %s", result2.Decision) + } +} + +// ── Missing coverage: data_flow none_of ────────────────────────────── + +func TestUnless_DataFlowNoneOf(t *testing.T) { + policies := []AccessPolicy{ + { + ID: "p1", Key: "allow-unless-egress-to-vendor", Priority: 100, Enabled: true, + Decision: PolicyAllow, Match: MatchBlock{}, + Unless: []Constraint{ + {Type: ConstraintDataFlow, Direction: "egress", Operator: "none_of", Systems: []string{"trusted_partner"}}, + }, + }, + } + + // trusted_partner is present → none_of is false → unless doesn't trigger + result := EvaluatePolicies(policies, &AccessEvaluationRequest{ + Context: map[string]interface{}{ + "data_flows": map[string]interface{}{ + "egress": []interface{}{"trusted_partner", "analytics"}, + }, + }, + }) + if result.Decision != PolicyAllow { + t.Errorf("expected ALLOW (trusted partner present, none_of false), got %s", result.Decision) + } + + // trusted_partner NOT present → none_of is true → unless triggers + result2 := EvaluatePolicies(policies, &AccessEvaluationRequest{ + Context: map[string]interface{}{ + "data_flows": map[string]interface{}{ + "egress": []interface{}{"unknown_vendor"}, + }, + }, + }) + if result2.Decision != PolicyDeny { + t.Errorf("expected DENY (trusted partner absent, none_of true), got %s", result2.Decision) + } +} diff --git a/tests/service/pbac/policies/test_evaluate_access_policies.py b/tests/service/pbac/policies/test_evaluate_access_policies.py index 8d4d851bbcd..99646223cb3 100644 --- a/tests/service/pbac/policies/test_evaluate_access_policies.py +++ b/tests/service/pbac/policies/test_evaluate_access_policies.py @@ -351,6 +351,213 @@ def test_all_must_trigger(self): assert result2["decision"] == "DENY" +class TestMatchDataSubject: + def test_data_subject_matches(self): + policies = [ + { + "key": "deny-employee", + "priority": 100, + "enabled": True, + "decision": "DENY", + "match": {"data_subject": {"any": ["employee"]}}, + }, + ] + result = evaluate_access_policies( + policies, {"data_subjects": ["employee"]} + ) + assert result["decision"] == "DENY" + + def test_data_subject_no_match(self): + policies = [ + { + "key": "deny-employee", + "priority": 100, + "enabled": True, + "decision": "DENY", + "match": {"data_subject": {"any": ["employee"]}}, + }, + ] + result = evaluate_access_policies( + policies, {"data_subjects": ["customer"]} + ) + assert result["decision"] == "NO_DECISION" + + def test_three_dimensions_all_must_match(self): + policies = [ + { + "key": "specific", + "priority": 100, + "enabled": True, + "decision": "DENY", + "match": { + "data_use": {"any": ["marketing"]}, + "data_category": {"any": ["user.contact"]}, + "data_subject": {"any": ["customer"]}, + }, + }, + ] + # All three match + result = evaluate_access_policies(policies, { + "data_uses": ["marketing.advertising"], + "data_categories": ["user.contact.email"], + "data_subjects": ["customer"], + }) + assert result["decision"] == "DENY" + + # Subject doesn't match + result2 = evaluate_access_policies(policies, { + "data_uses": ["marketing.advertising"], + "data_categories": ["user.contact.email"], + "data_subjects": ["employee"], + }) + assert result2["decision"] == "NO_DECISION" + + +class TestMatchCombined: + def test_any_and_all_on_different_dimensions(self): + policies = [ + { + "key": "combined", + "priority": 100, + "enabled": True, + "decision": "DENY", + "match": { + "data_use": {"any": ["marketing"]}, + "data_category": {"all": ["user.contact", "user.financial"]}, + }, + }, + ] + # Both categories present → match + result = evaluate_access_policies(policies, { + "data_uses": ["marketing.advertising"], + "data_categories": ["user.contact.email", "user.financial.bank_account"], + }) + assert result["decision"] == "DENY" + + # Only one category → no match + result2 = evaluate_access_policies(policies, { + "data_uses": ["marketing.advertising"], + "data_categories": ["user.contact.email"], + }) + assert result2["decision"] == "NO_DECISION" + + +class TestConsentVariants: + def test_not_opt_in(self): + policies = [ + { + "key": "allow-unless", + "priority": 100, + "enabled": True, + "decision": "ALLOW", + "match": {}, + "unless": [ + {"type": "consent", "privacy_notice_key": "n", "requirement": "not_opt_in"}, + ], + }, + ] + # opt_out → not_opt_in is true → DENY + result = evaluate_access_policies( + policies, {"context": {"consent": {"n": "opt_out"}}} + ) + assert result["decision"] == "DENY" + + # opt_in → not_opt_in is false → ALLOW + result2 = evaluate_access_policies( + policies, {"context": {"consent": {"n": "opt_in"}}} + ) + assert result2["decision"] == "ALLOW" + + def test_not_opt_out(self): + policies = [ + { + "key": "allow-unless", + "priority": 100, + "enabled": True, + "decision": "ALLOW", + "match": {}, + "unless": [ + {"type": "consent", "privacy_notice_key": "n", "requirement": "not_opt_out"}, + ], + }, + ] + # opt_in → not_opt_out is true → DENY + result = evaluate_access_policies( + policies, {"context": {"consent": {"n": "opt_in"}}} + ) + assert result["decision"] == "DENY" + + # opt_out → not_opt_out is false → ALLOW + result2 = evaluate_access_policies( + policies, {"context": {"consent": {"n": "opt_out"}}} + ) + assert result2["decision"] == "ALLOW" + + +class TestGeoNotIn: + def test_not_in_operator(self): + policies = [ + { + "key": "deny-unless-outside", + "priority": 100, + "enabled": True, + "decision": "DENY", + "match": {}, + "unless": [ + { + "type": "geo_location", + "field": "environment.geo_location", + "operator": "not_in", + "values": ["US-CA"], + } + ], + }, + ] + # In CA → not_in false → unless doesn't trigger → DENY + result = evaluate_access_policies(policies, { + "context": {"environment": {"geo_location": "US-CA"}}, + }) + assert result["decision"] == "DENY" + + # In DE → not_in true → unless triggers → DENY suppressed → NO_DECISION + result2 = evaluate_access_policies(policies, { + "context": {"environment": {"geo_location": "EU-DE"}}, + }) + assert result2["decision"] == "NO_DECISION" + + +class TestDataFlowNoneOf: + def test_none_of_operator(self): + policies = [ + { + "key": "allow-unless", + "priority": 100, + "enabled": True, + "decision": "ALLOW", + "match": {}, + "unless": [ + { + "type": "data_flow", + "direction": "egress", + "operator": "none_of", + "systems": ["trusted_partner"], + } + ], + }, + ] + # trusted_partner present → none_of false → ALLOW + result = evaluate_access_policies(policies, { + "context": {"data_flows": {"egress": ["trusted_partner"]}}, + }) + assert result["decision"] == "ALLOW" + + # trusted_partner absent → none_of true → DENY + result2 = evaluate_access_policies(policies, { + "context": {"data_flows": {"egress": ["unknown_vendor"]}}, + }) + assert result2["decision"] == "DENY" + + class TestEdgeCases: def test_no_context_unless_does_not_trigger(self): policies = [ @@ -389,3 +596,27 @@ def test_empty_match_catches_everything(self): ] result = evaluate_access_policies(policies, {}) assert result["decision"] == "DENY" + + def test_context_nested_field_resolution(self): + policies = [ + { + "key": "deny-unless-nested", + "priority": 100, + "enabled": True, + "decision": "DENY", + "match": {}, + "unless": [ + { + "type": "geo_location", + "field": "a.b.c", + "operator": "in", + "values": ["deep_value"], + } + ], + }, + ] + result = evaluate_access_policies(policies, { + "context": {"a": {"b": {"c": "deep_value"}}}, + }) + # Unless triggers → DENY suppressed → NO_DECISION + assert result["decision"] == "NO_DECISION" From 57edc45e8c2c50814eda970bdb140da732fd616f Mon Sep 17 00:00:00 2001 From: Thabo Fletcher Date: Tue, 14 Apr 2026 15:15:36 -0700 Subject: [PATCH 07/14] Add CI workflow for Go policy engine Runs go build, go vet, and go test on pull_request/push when policy-engine/ files change. Uses setup-go with module caching. --- .github/workflows/policy_engine_checks.yml | 46 ++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 .github/workflows/policy_engine_checks.yml diff --git a/.github/workflows/policy_engine_checks.yml b/.github/workflows/policy_engine_checks.yml new file mode 100644 index 00000000000..894bdf3d48f --- /dev/null +++ b/.github/workflows/policy_engine_checks.yml @@ -0,0 +1,46 @@ +name: Policy Engine (Go) Checks + +on: + workflow_dispatch: + pull_request: + paths: + - "policy-engine/**" + - ".github/workflows/policy_engine_checks.yml" + merge_group: + types: [checks_requested] + push: + branches: + - "main" + - "release-**" + paths: + - "policy-engine/**" + - ".github/workflows/policy_engine_checks.yml" + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} + +jobs: + go-checks: + runs-on: ubuntu-latest + defaults: + run: + working-directory: policy-engine + steps: + - name: Checkout + uses: actions/checkout@v6 + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: "1.23" + cache-dependency-path: policy-engine/go.mod + + - name: Build + run: go build ./... + + - name: Vet + run: go vet ./... + + - name: Test + run: go test ./... -v -count=1 From 31a9128e5e75afa924150b6b81272630c9c8079a Mon Sep 17 00:00:00 2001 From: Thabo Fletcher Date: Tue, 14 Apr 2026 15:35:09 -0700 Subject: [PATCH 08/14] Address PR review: fix CI blockers, logic divergences, and missing coverage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CI blockers: - Fix mypy: click.utils.LazyFile → typing.TextIO in pbac.py - Run ruff format on all Python files Logic fixes: - Deterministic output: sort dataset keys before iterating Go map - Stable sort: sort.Slice → sort.SliceStable for policy priority ties - Enabled default: *bool pointer so nil (omitted) defaults to true, matching Python's p.get("enabled", True) - Complete audit trail: decisive policy now included in EvaluatedPolicies for both ALLOW-inverted-to-DENY and normal decisive paths - Empty taxonomy guard: taxonomyMatch("", x) returns false in both Go and Python to prevent accidental catch-all Code smells: - Replace interface{} return from checkAccess with typed accessCheckResult - Remove unused GapUnconfiguredConsumer from Go (service-layer concern) - Document Constraint.Operator field: which operators apply to which types Missing tests (Go + Python): - MatchDimension with both Any and All on same dimension - Duplicate Unless constraints (AND of identical = no-op) - Empty taxonomy match key never matches - Enabled defaults to true when omitted 54 Go tests + 48 Python tests, all passing. --- policy-engine/pkg/pbac/edge_cases_test.go | 107 ++++++- policy-engine/pkg/pbac/evaluate.go | 54 ++-- policy-engine/pkg/pbac/policy_evaluate.go | 26 +- .../pkg/pbac/policy_evaluate_test.go | 35 +- policy-engine/pkg/pbac/policy_types.go | 22 +- policy-engine/pkg/pbac/types.go | 8 +- src/fides/cli/commands/pbac.py | 5 +- src/fides/service/pbac/policies/evaluate.py | 37 ++- .../policies/test_evaluate_access_policies.py | 254 +++++++++++---- tests/service/pbac/policies/test_pbac_cli.py | 300 ++++++++++-------- 10 files changed, 587 insertions(+), 261 deletions(-) diff --git a/policy-engine/pkg/pbac/edge_cases_test.go b/policy-engine/pkg/pbac/edge_cases_test.go index 556c1a8f8cd..07e1aab5323 100644 --- a/policy-engine/pkg/pbac/edge_cases_test.go +++ b/policy-engine/pkg/pbac/edge_cases_test.go @@ -207,8 +207,8 @@ func TestPolicy_EmptyPoliciesList(t *testing.T) { func TestPolicy_AllDisabled(t *testing.T) { policies := []AccessPolicy{ - {ID: "p1", Key: "disabled-1", Priority: 100, Enabled: false, Decision: PolicyDeny, Match: MatchBlock{}}, - {ID: "p2", Key: "disabled-2", Priority: 200, Enabled: false, Decision: PolicyAllow, Match: MatchBlock{}}, + {ID: "p1", Key: "disabled-1", Priority: 100, Enabled: boolPtr(false), Decision: PolicyDeny, Match: MatchBlock{}}, + {ID: "p2", Key: "disabled-2", Priority: 200, Enabled: boolPtr(false), Decision: PolicyAllow, Match: MatchBlock{}}, } result := EvaluatePolicies(policies, &AccessEvaluationRequest{}) @@ -220,7 +220,7 @@ func TestPolicy_AllDisabled(t *testing.T) { func TestPolicy_NoMatchDimensions_CatchAll(t *testing.T) { // An empty MatchBlock should match any request, even with empty data_uses policies := []AccessPolicy{ - {ID: "p1", Key: "catch-all", Priority: 1, Enabled: true, Decision: PolicyDeny, Match: MatchBlock{}}, + {ID: "p1", Key: "catch-all", Priority: 1, Enabled: boolPtr(true), Decision: PolicyDeny, Match: MatchBlock{}}, } result := EvaluatePolicies(policies, &AccessEvaluationRequest{}) @@ -232,7 +232,7 @@ func TestPolicy_NoMatchDimensions_CatchAll(t *testing.T) { func TestPolicy_MatchAll_RequiresEveryValue(t *testing.T) { policies := []AccessPolicy{ { - ID: "p1", Key: "require-both", Priority: 100, Enabled: true, + ID: "p1", Key: "require-both", Priority: 100, Enabled: boolPtr(true), Decision: PolicyDeny, Match: MatchBlock{ DataCategory: &MatchDimension{ @@ -264,7 +264,7 @@ func TestPolicy_MatchAll_RequiresEveryValue(t *testing.T) { func TestPolicy_MatchAnyAndAll_Combined(t *testing.T) { policies := []AccessPolicy{ { - ID: "p1", Key: "combined", Priority: 100, Enabled: true, + ID: "p1", Key: "combined", Priority: 100, Enabled: boolPtr(true), Decision: PolicyDeny, Match: MatchBlock{ DataUse: &MatchDimension{Any: []string{"marketing"}}, @@ -297,7 +297,7 @@ func TestPolicy_MatchAnyAndAll_Combined(t *testing.T) { func TestPolicy_UnlessNoContext_DoesNotTrigger(t *testing.T) { policies := []AccessPolicy{ { - ID: "p1", Key: "allow-unless", Priority: 100, Enabled: true, + ID: "p1", Key: "allow-unless", Priority: 100, Enabled: boolPtr(true), Decision: PolicyAllow, Match: MatchBlock{}, Unless: []Constraint{ @@ -316,7 +316,7 @@ func TestPolicy_UnlessNoContext_DoesNotTrigger(t *testing.T) { func TestPolicy_DenyAction_OnlyReturnedForDeny(t *testing.T) { policies := []AccessPolicy{ { - ID: "p1", Key: "allow-with-action", Priority: 100, Enabled: true, + ID: "p1", Key: "allow-with-action", Priority: 100, Enabled: boolPtr(true), Decision: PolicyAllow, Match: MatchBlock{}, Action: &PolicyAction{Message: "this should not appear"}, @@ -374,7 +374,7 @@ func TestPolicy_ResolveContextField_NonStringValue(t *testing.T) { func TestPolicy_MatchDataSubject(t *testing.T) { policies := []AccessPolicy{ { - ID: "p1", Key: "deny-employee-data", Priority: 100, Enabled: true, + ID: "p1", Key: "deny-employee-data", Priority: 100, Enabled: boolPtr(true), Decision: PolicyDeny, Match: MatchBlock{ DataSubject: &MatchDimension{Any: []string{"employee"}}, @@ -402,7 +402,7 @@ func TestPolicy_MatchDataSubject(t *testing.T) { func TestPolicy_MatchThreeDimensions(t *testing.T) { policies := []AccessPolicy{ { - ID: "p1", Key: "specific-deny", Priority: 100, Enabled: true, + ID: "p1", Key: "specific-deny", Priority: 100, Enabled: boolPtr(true), Decision: PolicyDeny, Match: MatchBlock{ DataUse: &MatchDimension{Any: []string{"marketing"}}, @@ -438,7 +438,7 @@ func TestPolicy_MatchThreeDimensions(t *testing.T) { func TestUnless_ConsentNotOptIn(t *testing.T) { policies := []AccessPolicy{ { - ID: "p1", Key: "allow-unless-not-optin", Priority: 100, Enabled: true, + ID: "p1", Key: "allow-unless-not-optin", Priority: 100, Enabled: boolPtr(true), Decision: PolicyAllow, Match: MatchBlock{}, Unless: []Constraint{ {Type: ConstraintConsent, PrivacyNoticeKey: "notice", Requirement: "not_opt_in"}, @@ -470,7 +470,7 @@ func TestUnless_ConsentNotOptIn(t *testing.T) { func TestUnless_ConsentNotOptOut(t *testing.T) { policies := []AccessPolicy{ { - ID: "p1", Key: "allow-unless-not-optout", Priority: 100, Enabled: true, + ID: "p1", Key: "allow-unless-not-optout", Priority: 100, Enabled: boolPtr(true), Decision: PolicyAllow, Match: MatchBlock{}, Unless: []Constraint{ {Type: ConstraintConsent, PrivacyNoticeKey: "notice", Requirement: "not_opt_out"}, @@ -504,7 +504,7 @@ func TestUnless_ConsentNotOptOut(t *testing.T) { func TestUnless_DataFlowNoneOf(t *testing.T) { policies := []AccessPolicy{ { - ID: "p1", Key: "allow-unless-egress-to-vendor", Priority: 100, Enabled: true, + ID: "p1", Key: "allow-unless-egress-to-vendor", Priority: 100, Enabled: boolPtr(true), Decision: PolicyAllow, Match: MatchBlock{}, Unless: []Constraint{ {Type: ConstraintDataFlow, Direction: "egress", Operator: "none_of", Systems: []string{"trusted_partner"}}, @@ -536,3 +536,86 @@ func TestUnless_DataFlowNoneOf(t *testing.T) { t.Errorf("expected DENY (trusted partner absent, none_of true), got %s", result2.Decision) } } + +// ── Review items #12, #13, #14 ─────────────────────────────────────── + +func TestMatchDimension_BothAnyAndAll(t *testing.T) { + // A single dimension with both Any and All populated. + // Both conditions must pass for the dimension to match. + dim := &MatchDimension{ + Any: []string{"marketing", "analytics"}, + All: []string{"user.contact", "user.financial"}, + } + + // Any matches (marketing) AND all matches (both categories) → true + if !matchesDimension(dim, []string{"marketing.advertising", "user.contact.email", "user.financial.bank_account"}) { + t.Error("expected match when both any and all are satisfied") + } + + // Any matches but all doesn't (missing financial) → false + if matchesDimension(dim, []string{"marketing.advertising", "user.contact.email"}) { + t.Error("expected no match when all is not satisfied") + } + + // All matches but any doesn't → false + if matchesDimension(dim, []string{"essential.service", "user.contact.email", "user.financial.bank_account"}) { + t.Error("expected no match when any is not satisfied") + } +} + +func TestUnless_DuplicateConstraints(t *testing.T) { + // Duplicate constraints should be a no-op (AND of identical = same condition) + policies := []AccessPolicy{ + { + ID: "p1", Key: "allow-unless-dup", Priority: 100, Enabled: boolPtr(true), + Decision: PolicyAllow, Match: MatchBlock{}, + Unless: []Constraint{ + {Type: ConstraintConsent, PrivacyNoticeKey: "notice", Requirement: "opt_out"}, + {Type: ConstraintConsent, PrivacyNoticeKey: "notice", Requirement: "opt_out"}, + }, + }, + } + + // Both trigger → DENY + result := EvaluatePolicies(policies, &AccessEvaluationRequest{ + Context: map[string]interface{}{ + "consent": map[string]interface{}{"notice": "opt_out"}, + }, + }) + if result.Decision != PolicyDeny { + t.Errorf("expected DENY, got %s", result.Decision) + } + + // Neither triggers → ALLOW + result2 := EvaluatePolicies(policies, &AccessEvaluationRequest{ + Context: map[string]interface{}{ + "consent": map[string]interface{}{"notice": "opt_in"}, + }, + }) + if result2.Decision != PolicyAllow { + t.Errorf("expected ALLOW, got %s", result2.Decision) + } +} + +func TestTaxonomyMatch_EmptyKey_NeverMatches(t *testing.T) { + if taxonomyMatch("", "anything") { + t.Error("empty match key should never match") + } + if taxonomyMatch("", "") { + t.Error("empty match key should not match empty value") + } + if taxonomyMatch("", ".prefixed") { + t.Error("empty match key should not match dot-prefixed values") + } +} + +func TestPolicy_EnabledDefaultsToTrue(t *testing.T) { + // Enabled=nil (omitted from JSON) should be treated as enabled + policies := []AccessPolicy{ + {ID: "p1", Key: "no-enabled-field", Priority: 100, Decision: PolicyDeny, Match: MatchBlock{}}, + } + result := EvaluatePolicies(policies, &AccessEvaluationRequest{}) + if result.Decision != PolicyDeny { + t.Errorf("expected DENY (nil enabled = active), got %s", result.Decision) + } +} diff --git a/policy-engine/pkg/pbac/evaluate.go b/policy-engine/pkg/pbac/evaluate.go index 396ca48c064..f4b2d7bb9a9 100644 --- a/policy-engine/pkg/pbac/evaluate.go +++ b/policy-engine/pkg/pbac/evaluate.go @@ -1,5 +1,7 @@ package pbac +import "sort" + // EvaluatePurpose checks dataset accesses against purpose assignments. // // Rules (matching the Python engine in fides/service/pbac/evaluate.py): @@ -19,6 +21,15 @@ func EvaluatePurpose( collections = map[string][]string{} } + // Sort dataset keys for deterministic iteration order. + // Go map iteration is randomized; sorting ensures stable output + // for audit trails, diff-ability, and test reliability. + datasetKeys := make([]string, 0, len(datasets)) + for k := range datasets { + datasetKeys = append(datasetKeys, k) + } + sort.Strings(datasetKeys) + var violations []PurposeViolation var gaps []EvaluationGap totalAccesses := 0 @@ -27,7 +38,7 @@ func EvaluatePurpose( // Rule 1: consumer has no purposes — record as identity gap if len(consumer.PurposeKeys) == 0 { - for datasetKey := range datasets { + for _, datasetKey := range datasetKeys { totalAccesses++ dk := datasetKey gaps = append(gaps, EvaluationGap{ @@ -44,7 +55,8 @@ func EvaluatePurpose( } } - for datasetKey, dsPurposes := range datasets { + for _, datasetKey := range datasetKeys { + dsPurposes := datasets[datasetKey] dk := datasetKey datasetCollections := collections[datasetKey] @@ -53,21 +65,19 @@ func EvaluatePurpose( totalAccesses++ col := collection result := checkAccess(consumerPurposeSet, consumer, &dsPurposes, dk, &col) - switch r := result.(type) { - case PurposeViolation: - violations = append(violations, r) - case EvaluationGap: - gaps = append(gaps, r) + if result.Violation != nil { + violations = append(violations, *result.Violation) + } else if result.Gap != nil { + gaps = append(gaps, *result.Gap) } } } else { totalAccesses++ result := checkAccess(consumerPurposeSet, consumer, &dsPurposes, dk, nil) - switch r := result.(type) { - case PurposeViolation: - violations = append(violations, r) - case EvaluationGap: - gaps = append(gaps, r) + if result.Violation != nil { + violations = append(violations, *result.Violation) + } else if result.Gap != nil { + gaps = append(gaps, *result.Gap) } } } @@ -79,15 +89,21 @@ func EvaluatePurpose( } } +// accessCheckResult holds the outcome of a single dataset/collection check. +// Exactly one of Violation or Gap is set, or both are nil (compliant). +type accessCheckResult struct { + Violation *PurposeViolation + Gap *EvaluationGap +} + // checkAccess checks a single dataset/collection access against consumer purposes. -// Returns a PurposeViolation, EvaluationGap, or nil (compliant). func checkAccess( consumerPurposeSet map[string]bool, consumer ConsumerPurposes, dsPurposes *DatasetPurposes, datasetKey string, collection *string, -) interface{} { +) accessCheckResult { col := "" if collection != nil { col = *collection @@ -97,17 +113,17 @@ func checkAccess( // Rule 3: no effective purposes → dataset gap if len(effective) == 0 { dk := datasetKey - return EvaluationGap{ + return accessCheckResult{Gap: &EvaluationGap{ GapType: GapUnconfiguredDataset, Identifier: datasetKey, DatasetKey: &dk, Reason: "Dataset has no declared purposes", - } + }} } // Rule 2: no overlap → violation if !intersects(consumerPurposeSet, effective) { - return PurposeViolation{ + return accessCheckResult{Violation: &PurposeViolation{ ConsumerID: consumer.ConsumerID, ConsumerName: consumer.ConsumerName, DatasetKey: datasetKey, @@ -115,11 +131,11 @@ func checkAccess( ConsumerPurposes: sortedKeys(consumerPurposeSet), DatasetPurposes: sortedKeys(effective), Reason: violationReason(consumerPurposeSet, effective), - } + }} } // Compliant - return nil + return accessCheckResult{} } // Ensure non-nil slices for JSON serialization. diff --git a/policy-engine/pkg/pbac/policy_evaluate.go b/policy-engine/pkg/pbac/policy_evaluate.go index 56c747436c8..15f05ec4b29 100644 --- a/policy-engine/pkg/pbac/policy_evaluate.go +++ b/policy-engine/pkg/pbac/policy_evaluate.go @@ -16,14 +16,16 @@ import ( // 6. Unless not triggered → decision stands as-is (decisive, stop) // 7. No policy matched → NO_DECISION func EvaluatePolicies(policies []AccessPolicy, request *AccessEvaluationRequest) *PolicyEvaluationResult { - // Filter to enabled policies and sort by priority descending + // Filter to enabled policies and sort by priority descending. + // Enabled defaults to true when omitted (nil pointer) to match + // the Python implementation and database schema default. enabled := make([]AccessPolicy, 0, len(policies)) for _, p := range policies { - if p.Enabled { + if p.Enabled == nil || *p.Enabled { enabled = append(enabled, p) } } - sort.Slice(enabled, func(i, j int) bool { + sort.SliceStable(enabled, func(i, j int) bool { return enabled[i].Priority > enabled[j].Priority }) @@ -41,6 +43,13 @@ func EvaluatePolicies(policies []AccessPolicy, request *AccessEvaluationRequest) // ALLOW inverted to DENY — decisive, stop key := policy.Key priority := policy.Priority + evaluated = append(evaluated, EvaluatedPolicyInfo{ + PolicyKey: policy.Key, + Priority: policy.Priority, + Matched: true, + Result: "DENY", + UnlessTriggered: true, + }) return &PolicyEvaluationResult{ Decision: PolicyDeny, DecisivePolicyKey: &key, @@ -68,6 +77,13 @@ func EvaluatePolicies(policies []AccessPolicy, request *AccessEvaluationRequest) if policy.Decision == PolicyDeny { action = policy.Action } + evaluated = append(evaluated, EvaluatedPolicyInfo{ + PolicyKey: policy.Key, + Priority: policy.Priority, + Matched: true, + Result: string(policy.Decision), + UnlessTriggered: false, + }) return &PolicyEvaluationResult{ Decision: policy.Decision, DecisivePolicyKey: &key, @@ -145,7 +161,11 @@ func taxonomyMatchesAny(matchKey string, requestValues []string) bool { // "user.contact" matches "user.contact.email" (prefix + dot boundary). // "user.contact" matches "user.contact" (exact match). // "user" does NOT match "user_data" (must be a dot boundary). +// Empty matchKey never matches — prevents accidental catch-all. func taxonomyMatch(matchKey, requestValue string) bool { + if matchKey == "" { + return false + } if matchKey == requestValue { return true } diff --git a/policy-engine/pkg/pbac/policy_evaluate_test.go b/policy-engine/pkg/pbac/policy_evaluate_test.go index a0c94c4cb4f..7ff53b4e5b4 100644 --- a/policy-engine/pkg/pbac/policy_evaluate_test.go +++ b/policy-engine/pkg/pbac/policy_evaluate_test.go @@ -4,19 +4,19 @@ import ( "testing" ) -func intPtr(i int) *int { return &i } +func boolPtr(b bool) *bool { return &b } func basePolicies() []AccessPolicy { return []AccessPolicy{ { - ID: "p1", Key: "allow-marketing", Priority: 100, Enabled: true, + ID: "p1", Key: "allow-marketing", Priority: 100, Enabled: boolPtr(true), Decision: PolicyAllow, Match: MatchBlock{ DataUse: &MatchDimension{Any: []string{"marketing"}}, }, }, { - ID: "p2", Key: "deny-financial", Priority: 200, Enabled: true, + ID: "p2", Key: "deny-financial", Priority: 200, Enabled: boolPtr(true), Decision: PolicyDeny, Match: MatchBlock{ DataCategory: &MatchDimension{Any: []string{"user.financial"}}, @@ -24,7 +24,7 @@ func basePolicies() []AccessPolicy { Action: &PolicyAction{Message: "Financial data access denied"}, }, { - ID: "p3", Key: "catch-all-deny", Priority: 0, Enabled: true, + ID: "p3", Key: "catch-all-deny", Priority: 0, Enabled: boolPtr(true), Decision: PolicyDeny, Match: MatchBlock{}, // empty = matches everything Action: &PolicyAction{Message: "Default deny"}, @@ -97,7 +97,7 @@ func TestEvaluatePolicies_NoDecisionWhenNoPolicies(t *testing.T) { func TestEvaluatePolicies_DisabledPoliciesSkipped(t *testing.T) { policies := []AccessPolicy{ { - ID: "p1", Key: "disabled", Priority: 100, Enabled: false, + ID: "p1", Key: "disabled", Priority: 100, Enabled: boolPtr(false), Decision: PolicyDeny, Match: MatchBlock{}, }, @@ -115,7 +115,7 @@ func TestEvaluatePolicies_DisabledPoliciesSkipped(t *testing.T) { func TestUnless_ConsentOptOut_InvertsAllow(t *testing.T) { policies := []AccessPolicy{ { - ID: "p1", Key: "allow-unless-optout", Priority: 100, Enabled: true, + ID: "p1", Key: "allow-unless-optout", Priority: 100, Enabled: boolPtr(true), Decision: PolicyAllow, Match: MatchBlock{ DataUse: &MatchDimension{Any: []string{"marketing"}}, @@ -154,7 +154,7 @@ func TestUnless_ConsentOptOut_InvertsAllow(t *testing.T) { func TestUnless_ConsentNotTriggered_AllowStands(t *testing.T) { policies := []AccessPolicy{ { - ID: "p1", Key: "allow-unless-optout", Priority: 100, Enabled: true, + ID: "p1", Key: "allow-unless-optout", Priority: 100, Enabled: boolPtr(true), Decision: PolicyAllow, Match: MatchBlock{ DataUse: &MatchDimension{Any: []string{"marketing"}}, @@ -191,7 +191,7 @@ func TestUnless_ConsentNotTriggered_AllowStands(t *testing.T) { func TestUnless_DenySuppressed_ContinuesToNext(t *testing.T) { policies := []AccessPolicy{ { - ID: "p1", Key: "deny-unless-geo", Priority: 200, Enabled: true, + ID: "p1", Key: "deny-unless-geo", Priority: 200, Enabled: boolPtr(true), Decision: PolicyDeny, Match: MatchBlock{}, Unless: []Constraint{ @@ -204,7 +204,7 @@ func TestUnless_DenySuppressed_ContinuesToNext(t *testing.T) { }, }, { - ID: "p2", Key: "fallback-allow", Priority: 100, Enabled: true, + ID: "p2", Key: "fallback-allow", Priority: 100, Enabled: boolPtr(true), Decision: PolicyAllow, Match: MatchBlock{}, }, @@ -227,19 +227,22 @@ func TestUnless_DenySuppressed_ContinuesToNext(t *testing.T) { if result.DecisivePolicyKey == nil || *result.DecisivePolicyKey != "fallback-allow" { t.Errorf("expected decisive policy 'fallback-allow'") } - // The suppressed policy should be in the audit trail - if len(result.EvaluatedPolicies) != 1 { - t.Fatalf("expected 1 evaluated policy (suppressed), got %d", len(result.EvaluatedPolicies)) + // Audit trail: suppressed deny + decisive allow + if len(result.EvaluatedPolicies) != 2 { + t.Fatalf("expected 2 evaluated policies, got %d", len(result.EvaluatedPolicies)) } if result.EvaluatedPolicies[0].Result != "SUPPRESSED" { - t.Errorf("expected SUPPRESSED, got %s", result.EvaluatedPolicies[0].Result) + t.Errorf("expected first policy SUPPRESSED, got %s", result.EvaluatedPolicies[0].Result) + } + if result.EvaluatedPolicies[1].Result != "ALLOW" { + t.Errorf("expected second policy ALLOW, got %s", result.EvaluatedPolicies[1].Result) } } func TestUnless_GeoNotIn(t *testing.T) { policies := []AccessPolicy{ { - ID: "p1", Key: "deny-outside-ca", Priority: 100, Enabled: true, + ID: "p1", Key: "deny-outside-ca", Priority: 100, Enabled: boolPtr(true), Decision: PolicyDeny, Match: MatchBlock{}, Unless: []Constraint{ @@ -285,7 +288,7 @@ func TestUnless_GeoNotIn(t *testing.T) { func TestUnless_DataFlow(t *testing.T) { policies := []AccessPolicy{ { - ID: "p1", Key: "allow-unless-egress", Priority: 100, Enabled: true, + ID: "p1", Key: "allow-unless-egress", Priority: 100, Enabled: boolPtr(true), Decision: PolicyAllow, Match: MatchBlock{}, Unless: []Constraint{ @@ -317,7 +320,7 @@ func TestUnless_DataFlow(t *testing.T) { func TestUnless_MultipleConstraints_AllMustTrigger(t *testing.T) { policies := []AccessPolicy{ { - ID: "p1", Key: "allow-unless-both", Priority: 100, Enabled: true, + ID: "p1", Key: "allow-unless-both", Priority: 100, Enabled: boolPtr(true), Decision: PolicyAllow, Match: MatchBlock{}, Unless: []Constraint{ diff --git a/policy-engine/pkg/pbac/policy_types.go b/policy-engine/pkg/pbac/policy_types.go index dc32a7777eb..4c07b677281 100644 --- a/policy-engine/pkg/pbac/policy_types.go +++ b/policy-engine/pkg/pbac/policy_types.go @@ -10,11 +10,15 @@ const ( ) // AccessPolicy represents a parsed YAML access policy ready for evaluation. +// +// Enabled defaults to true (via pointer) to match the Python implementation +// and the database schema default. A policy omitting the enabled field is +// treated as active. type AccessPolicy struct { ID string `json:"id"` Key string `json:"key"` Priority int `json:"priority"` - Enabled bool `json:"enabled"` + Enabled *bool `json:"enabled,omitempty"` Decision PolicyDecision `json:"decision"` // ALLOW or DENY Match MatchBlock `json:"match"` Unless []Constraint `json:"unless,omitempty"` @@ -49,16 +53,20 @@ const ( type Constraint struct { Type ConstraintType `json:"type"` - // Consent fields + // Consent fields (type=consent) PrivacyNoticeKey string `json:"privacy_notice_key,omitempty"` Requirement string `json:"requirement,omitempty"` // opt_in, opt_out, not_opt_in, not_opt_out - // Geo location fields - Field string `json:"field,omitempty"` // e.g. "environment.geo_location" - Operator string `json:"operator,omitempty"` // "in", "not_in", "any_of", "none_of" - Values []string `json:"values,omitempty"` + // Geo location fields (type=geo_location) + Field string `json:"field,omitempty"` // dotted context path, e.g. "environment.geo_location" + Values []string `json:"values,omitempty"` - // Data flow fields + // Operator is shared between geo_location and data_flow constraints: + // geo_location: "in", "not_in" + // data_flow: "any_of", "none_of" + Operator string `json:"operator,omitempty"` + + // Data flow fields (type=data_flow) Direction string `json:"direction,omitempty"` // "ingress", "egress" Systems []string `json:"systems,omitempty"` } diff --git a/policy-engine/pkg/pbac/types.go b/policy-engine/pkg/pbac/types.go index c272d7ccff7..a16ebf15378 100644 --- a/policy-engine/pkg/pbac/types.go +++ b/policy-engine/pkg/pbac/types.go @@ -15,8 +15,12 @@ type GapType string const ( GapUnresolvedIdentity GapType = "unresolved_identity" - GapUnconfiguredConsumer GapType = "unconfigured_consumer" - GapUnconfiguredDataset GapType = "unconfigured_dataset" + GapUnconfiguredDataset GapType = "unconfigured_dataset" + // GapUnconfiguredConsumer ("unconfigured_consumer") is produced by the + // Python service layer (step 5) when reclassifying identity gaps for + // consumers that exist but have no purposes. The Go engine does not + // produce this gap type — it's a service-layer concern, not an engine + // concern. ) // ConsumerPurposes holds the declared purposes for a data consumer. diff --git a/src/fides/cli/commands/pbac.py b/src/fides/cli/commands/pbac.py index c607094a3fe..2942552b5e9 100644 --- a/src/fides/cli/commands/pbac.py +++ b/src/fides/cli/commands/pbac.py @@ -9,6 +9,7 @@ import json import sys +from typing import TextIO import rich_click as click @@ -26,7 +27,7 @@ def pbac(ctx: click.Context) -> None: @pbac.command(name="evaluate-purpose") @click.argument("input_file", type=click.File("r"), default="-") -def evaluate_purpose_cmd(input_file: click.utils.LazyFile) -> None: +def evaluate_purpose_cmd(input_file: TextIO) -> None: """Evaluate purpose overlap between a consumer and datasets. Reads JSON from INPUT_FILE (or stdin if omitted). @@ -111,7 +112,7 @@ def evaluate_purpose_cmd(input_file: click.utils.LazyFile) -> None: @pbac.command(name="evaluate-policies") @click.argument("input_file", type=click.File("r"), default="-") -def evaluate_policies_cmd(input_file: click.utils.LazyFile) -> None: +def evaluate_policies_cmd(input_file: TextIO) -> None: """Evaluate access policies against a PBAC violation. Reads JSON from INPUT_FILE (or stdin if omitted). diff --git a/src/fides/service/pbac/policies/evaluate.py b/src/fides/service/pbac/policies/evaluate.py index 70f2e802288..f5c6b2d8db9 100644 --- a/src/fides/service/pbac/policies/evaluate.py +++ b/src/fides/service/pbac/policies/evaluate.py @@ -41,6 +41,15 @@ def evaluate_access_policies( if unless_triggered: if decision == "ALLOW": + evaluated.append( + { + "policy_key": policy.get("key"), + "priority": policy.get("priority"), + "matched": True, + "result": "DENY", + "unless_triggered": True, + } + ) return { "decision": "DENY", "decisive_policy_key": policy.get("key"), @@ -50,17 +59,28 @@ def evaluate_access_policies( "evaluated_policies": evaluated, } # DENY suppressed - evaluated.append({ - "policy_key": policy.get("key"), - "priority": policy.get("priority"), - "matched": True, - "result": "SUPPRESSED", - "unless_triggered": True, - }) + evaluated.append( + { + "policy_key": policy.get("key"), + "priority": policy.get("priority"), + "matched": True, + "result": "SUPPRESSED", + "unless_triggered": True, + } + ) continue # Decision stands action = policy.get("action") if decision == "DENY" else None + evaluated.append( + { + "policy_key": policy.get("key"), + "priority": policy.get("priority"), + "matched": True, + "result": decision, + "unless_triggered": False, + } + ) return { "decision": decision, "decisive_policy_key": policy.get("key"), @@ -116,7 +136,10 @@ def _taxonomy_match(match_key: str, request_value: str) -> bool: "user.contact" matches "user.contact.email" (prefix + dot boundary). "user" does NOT match "user_data". + Empty match_key never matches — prevents accidental catch-all. """ + if not match_key: + return False if match_key == request_value: return True return request_value.startswith(match_key + ".") diff --git a/tests/service/pbac/policies/test_evaluate_access_policies.py b/tests/service/pbac/policies/test_evaluate_access_policies.py index 99646223cb3..cfc25a6645e 100644 --- a/tests/service/pbac/policies/test_evaluate_access_policies.py +++ b/tests/service/pbac/policies/test_evaluate_access_policies.py @@ -88,7 +88,13 @@ def test_empty_policies(self): def test_disabled_policies_skipped(self): policies = [ - {"key": "disabled", "priority": 100, "enabled": False, "decision": "DENY", "match": {}}, + { + "key": "disabled", + "priority": 100, + "enabled": False, + "decision": "DENY", + "match": {}, + }, ] result = evaluate_access_policies(policies, {}) assert result["decision"] == "NO_DECISION" @@ -150,9 +156,7 @@ def test_no_dot_boundary_false_positive(self): "match": {"data_category": {"any": ["user"]}}, }, ] - result = evaluate_access_policies( - policies, {"data_categories": ["user_data"]} - ) + result = evaluate_access_policies(policies, {"data_categories": ["user_data"]}) assert result["decision"] == "NO_DECISION" def test_match_all_requires_every_value(self): @@ -274,8 +278,9 @@ def test_deny_suppressed_continues_to_next(self): assert result["decision"] == "ALLOW" assert result["decisive_policy_key"] == "fallback-allow" - assert len(result["evaluated_policies"]) == 1 + assert len(result["evaluated_policies"]) == 2 assert result["evaluated_policies"][0]["result"] == "SUPPRESSED" + assert result["evaluated_policies"][1]["result"] == "ALLOW" class TestUnlessDataFlow: @@ -333,21 +338,27 @@ def test_all_must_trigger(self): ] # Only consent triggers → ALLOW stands - result1 = evaluate_access_policies(policies, { - "context": { - "consent": {"do_not_sell": "opt_out"}, - "environment": {"geo_location": "US-NY"}, + result1 = evaluate_access_policies( + policies, + { + "context": { + "consent": {"do_not_sell": "opt_out"}, + "environment": {"geo_location": "US-NY"}, + }, }, - }) + ) assert result1["decision"] == "ALLOW" # Both trigger → DENY - result2 = evaluate_access_policies(policies, { - "context": { - "consent": {"do_not_sell": "opt_out"}, - "environment": {"geo_location": "US-CA"}, + result2 = evaluate_access_policies( + policies, + { + "context": { + "consent": {"do_not_sell": "opt_out"}, + "environment": {"geo_location": "US-CA"}, + }, }, - }) + ) assert result2["decision"] == "DENY" @@ -362,9 +373,7 @@ def test_data_subject_matches(self): "match": {"data_subject": {"any": ["employee"]}}, }, ] - result = evaluate_access_policies( - policies, {"data_subjects": ["employee"]} - ) + result = evaluate_access_policies(policies, {"data_subjects": ["employee"]}) assert result["decision"] == "DENY" def test_data_subject_no_match(self): @@ -377,9 +386,7 @@ def test_data_subject_no_match(self): "match": {"data_subject": {"any": ["employee"]}}, }, ] - result = evaluate_access_policies( - policies, {"data_subjects": ["customer"]} - ) + result = evaluate_access_policies(policies, {"data_subjects": ["customer"]}) assert result["decision"] == "NO_DECISION" def test_three_dimensions_all_must_match(self): @@ -397,19 +404,25 @@ def test_three_dimensions_all_must_match(self): }, ] # All three match - result = evaluate_access_policies(policies, { - "data_uses": ["marketing.advertising"], - "data_categories": ["user.contact.email"], - "data_subjects": ["customer"], - }) + result = evaluate_access_policies( + policies, + { + "data_uses": ["marketing.advertising"], + "data_categories": ["user.contact.email"], + "data_subjects": ["customer"], + }, + ) assert result["decision"] == "DENY" # Subject doesn't match - result2 = evaluate_access_policies(policies, { - "data_uses": ["marketing.advertising"], - "data_categories": ["user.contact.email"], - "data_subjects": ["employee"], - }) + result2 = evaluate_access_policies( + policies, + { + "data_uses": ["marketing.advertising"], + "data_categories": ["user.contact.email"], + "data_subjects": ["employee"], + }, + ) assert result2["decision"] == "NO_DECISION" @@ -428,17 +441,26 @@ def test_any_and_all_on_different_dimensions(self): }, ] # Both categories present → match - result = evaluate_access_policies(policies, { - "data_uses": ["marketing.advertising"], - "data_categories": ["user.contact.email", "user.financial.bank_account"], - }) + result = evaluate_access_policies( + policies, + { + "data_uses": ["marketing.advertising"], + "data_categories": [ + "user.contact.email", + "user.financial.bank_account", + ], + }, + ) assert result["decision"] == "DENY" # Only one category → no match - result2 = evaluate_access_policies(policies, { - "data_uses": ["marketing.advertising"], - "data_categories": ["user.contact.email"], - }) + result2 = evaluate_access_policies( + policies, + { + "data_uses": ["marketing.advertising"], + "data_categories": ["user.contact.email"], + }, + ) assert result2["decision"] == "NO_DECISION" @@ -452,7 +474,11 @@ def test_not_opt_in(self): "decision": "ALLOW", "match": {}, "unless": [ - {"type": "consent", "privacy_notice_key": "n", "requirement": "not_opt_in"}, + { + "type": "consent", + "privacy_notice_key": "n", + "requirement": "not_opt_in", + }, ], }, ] @@ -477,7 +503,11 @@ def test_not_opt_out(self): "decision": "ALLOW", "match": {}, "unless": [ - {"type": "consent", "privacy_notice_key": "n", "requirement": "not_opt_out"}, + { + "type": "consent", + "privacy_notice_key": "n", + "requirement": "not_opt_out", + }, ], }, ] @@ -514,15 +544,21 @@ def test_not_in_operator(self): }, ] # In CA → not_in false → unless doesn't trigger → DENY - result = evaluate_access_policies(policies, { - "context": {"environment": {"geo_location": "US-CA"}}, - }) + result = evaluate_access_policies( + policies, + { + "context": {"environment": {"geo_location": "US-CA"}}, + }, + ) assert result["decision"] == "DENY" # In DE → not_in true → unless triggers → DENY suppressed → NO_DECISION - result2 = evaluate_access_policies(policies, { - "context": {"environment": {"geo_location": "EU-DE"}}, - }) + result2 = evaluate_access_policies( + policies, + { + "context": {"environment": {"geo_location": "EU-DE"}}, + }, + ) assert result2["decision"] == "NO_DECISION" @@ -546,15 +582,21 @@ def test_none_of_operator(self): }, ] # trusted_partner present → none_of false → ALLOW - result = evaluate_access_policies(policies, { - "context": {"data_flows": {"egress": ["trusted_partner"]}}, - }) + result = evaluate_access_policies( + policies, + { + "context": {"data_flows": {"egress": ["trusted_partner"]}}, + }, + ) assert result["decision"] == "ALLOW" # trusted_partner absent → none_of true → DENY - result2 = evaluate_access_policies(policies, { - "context": {"data_flows": {"egress": ["unknown_vendor"]}}, - }) + result2 = evaluate_access_policies( + policies, + { + "context": {"data_flows": {"egress": ["unknown_vendor"]}}, + }, + ) assert result2["decision"] == "DENY" @@ -568,7 +610,11 @@ def test_no_context_unless_does_not_trigger(self): "decision": "ALLOW", "match": {}, "unless": [ - {"type": "consent", "privacy_notice_key": "x", "requirement": "opt_out"} + { + "type": "consent", + "privacy_notice_key": "x", + "requirement": "opt_out", + } ], }, ] @@ -592,7 +638,13 @@ def test_deny_action_only_on_deny(self): def test_empty_match_catches_everything(self): policies = [ - {"key": "catch-all", "priority": 1, "enabled": True, "decision": "DENY", "match": {}}, + { + "key": "catch-all", + "priority": 1, + "enabled": True, + "decision": "DENY", + "match": {}, + }, ] result = evaluate_access_policies(policies, {}) assert result["decision"] == "DENY" @@ -615,8 +667,94 @@ def test_context_nested_field_resolution(self): ], }, ] - result = evaluate_access_policies(policies, { - "context": {"a": {"b": {"c": "deep_value"}}}, - }) + result = evaluate_access_policies( + policies, + { + "context": {"a": {"b": {"c": "deep_value"}}}, + }, + ) # Unless triggers → DENY suppressed → NO_DECISION assert result["decision"] == "NO_DECISION" + + def test_taxonomy_empty_key_never_matches(self): + policies = [ + { + "key": "empty-key", + "priority": 100, + "enabled": True, + "decision": "DENY", + "match": {"data_use": {"any": [""]}}, + }, + ] + result = evaluate_access_policies(policies, {"data_uses": ["marketing"]}) + assert result["decision"] == "NO_DECISION" + + def test_enabled_defaults_to_true(self): + # Policy without "enabled" field should be treated as active + policies = [ + {"key": "no-enabled", "priority": 100, "decision": "DENY", "match": {}}, + ] + result = evaluate_access_policies(policies, {}) + assert result["decision"] == "DENY" + + def test_duplicate_unless_constraints(self): + policies = [ + { + "key": "allow-unless-dup", + "priority": 100, + "enabled": True, + "decision": "ALLOW", + "match": {}, + "unless": [ + { + "type": "consent", + "privacy_notice_key": "n", + "requirement": "opt_out", + }, + { + "type": "consent", + "privacy_notice_key": "n", + "requirement": "opt_out", + }, + ], + }, + ] + # Both trigger → DENY + result = evaluate_access_policies( + policies, {"context": {"consent": {"n": "opt_out"}}} + ) + assert result["decision"] == "DENY" + + # Neither triggers → ALLOW + result2 = evaluate_access_policies( + policies, {"context": {"consent": {"n": "opt_in"}}} + ) + assert result2["decision"] == "ALLOW" + + def test_match_dimension_both_any_and_all(self): + policies = [ + { + "key": "both-ops", + "priority": 100, + "enabled": True, + "decision": "DENY", + "match": { + "data_category": { + "any": ["user.contact", "user.financial"], + "all": ["user.contact", "user.financial"], + } + }, + }, + ] + # Both satisfied + result = evaluate_access_policies( + policies, + {"data_categories": ["user.contact.email", "user.financial.bank_account"]}, + ) + assert result["decision"] == "DENY" + + # Any satisfied but not all + result2 = evaluate_access_policies( + policies, {"data_categories": ["user.contact.email"]} + ) + assert result2["decision"] == "NO_DECISION" diff --git a/tests/service/pbac/policies/test_pbac_cli.py b/tests/service/pbac/policies/test_pbac_cli.py index 23bcdcae230..1a17167c65a 100644 --- a/tests/service/pbac/policies/test_pbac_cli.py +++ b/tests/service/pbac/policies/test_pbac_cli.py @@ -21,19 +21,21 @@ def runner(): class TestEvaluatePurposeCLI: def test_compliant_via_stdin(self, runner): - input_json = json.dumps({ - "consumer": { - "consumer_id": "c1", - "consumer_name": "Billing", - "purpose_keys": ["billing"], - }, - "datasets": { - "billing_db": { - "dataset_key": "billing_db", + input_json = json.dumps( + { + "consumer": { + "consumer_id": "c1", + "consumer_name": "Billing", "purpose_keys": ["billing"], }, - }, - }) + "datasets": { + "billing_db": { + "dataset_key": "billing_db", + "purpose_keys": ["billing"], + }, + }, + } + ) result = runner.invoke(pbac, ["evaluate-purpose"], input=input_json) @@ -43,19 +45,21 @@ def test_compliant_via_stdin(self, runner): assert output["total_accesses"] == 1 def test_violation_via_stdin(self, runner): - input_json = json.dumps({ - "consumer": { - "consumer_id": "c1", - "consumer_name": "Analytics", - "purpose_keys": ["analytics"], - }, - "datasets": { - "billing_db": { - "dataset_key": "billing_db", - "purpose_keys": ["billing"], + input_json = json.dumps( + { + "consumer": { + "consumer_id": "c1", + "consumer_name": "Analytics", + "purpose_keys": ["analytics"], }, - }, - }) + "datasets": { + "billing_db": { + "dataset_key": "billing_db", + "purpose_keys": ["billing"], + }, + }, + } + ) result = runner.invoke(pbac, ["evaluate-purpose"], input=input_json) @@ -65,16 +69,18 @@ def test_violation_via_stdin(self, runner): assert output["violations"][0]["dataset_key"] == "billing_db" def test_gap_no_consumer_purposes(self, runner): - input_json = json.dumps({ - "consumer": { - "consumer_id": "c1", - "consumer_name": "Unknown", - "purpose_keys": [], - }, - "datasets": { - "db1": {"dataset_key": "db1", "purpose_keys": ["billing"]}, - }, - }) + input_json = json.dumps( + { + "consumer": { + "consumer_id": "c1", + "consumer_name": "Unknown", + "purpose_keys": [], + }, + "datasets": { + "db1": {"dataset_key": "db1", "purpose_keys": ["billing"]}, + }, + } + ) result = runner.invoke(pbac, ["evaluate-purpose"], input=input_json) @@ -84,21 +90,23 @@ def test_gap_no_consumer_purposes(self, runner): assert output["gaps"][0]["gap_type"] == "unresolved_identity" def test_with_collections(self, runner): - input_json = json.dumps({ - "consumer": { - "consumer_id": "c1", - "consumer_name": "Accountant", - "purpose_keys": ["accounting"], - }, - "datasets": { - "billing_db": { - "dataset_key": "billing_db", - "purpose_keys": ["billing"], - "collection_purposes": {"invoices": ["accounting"]}, + input_json = json.dumps( + { + "consumer": { + "consumer_id": "c1", + "consumer_name": "Accountant", + "purpose_keys": ["accounting"], }, - }, - "collections": {"billing_db": ["invoices"]}, - }) + "datasets": { + "billing_db": { + "dataset_key": "billing_db", + "purpose_keys": ["billing"], + "collection_purposes": {"invoices": ["accounting"]}, + }, + }, + "collections": {"billing_db": ["invoices"]}, + } + ) result = runner.invoke(pbac, ["evaluate-purpose"], input=input_json) @@ -118,9 +126,7 @@ def test_from_file(self, runner): }, } - with tempfile.NamedTemporaryFile( - mode="w", suffix=".json", delete=False - ) as f: + with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f: json.dump(data, f) f.flush() tmppath = f.name @@ -134,24 +140,26 @@ def test_from_file(self, runner): os.unlink(tmppath) def test_multiple_datasets(self, runner): - input_json = json.dumps({ - "consumer": { - "consumer_id": "c1", - "consumer_name": "Analyst", - "purpose_keys": ["analytics"], - }, - "datasets": { - "analytics_db": { - "dataset_key": "analytics_db", + input_json = json.dumps( + { + "consumer": { + "consumer_id": "c1", + "consumer_name": "Analyst", "purpose_keys": ["analytics"], }, - "billing_db": { - "dataset_key": "billing_db", - "purpose_keys": ["billing"], + "datasets": { + "analytics_db": { + "dataset_key": "analytics_db", + "purpose_keys": ["analytics"], + }, + "billing_db": { + "dataset_key": "billing_db", + "purpose_keys": ["billing"], + }, + "empty_db": {"dataset_key": "empty_db", "purpose_keys": []}, }, - "empty_db": {"dataset_key": "empty_db", "purpose_keys": []}, - }, - }) + } + ) result = runner.invoke(pbac, ["evaluate-purpose"], input=input_json) @@ -166,10 +174,16 @@ def test_invalid_json(self, runner): assert result.exit_code != 0 def test_output_is_valid_json(self, runner): - input_json = json.dumps({ - "consumer": {"consumer_id": "c1", "consumer_name": "T", "purpose_keys": ["x"]}, - "datasets": {"d1": {"dataset_key": "d1", "purpose_keys": ["y"]}}, - }) + input_json = json.dumps( + { + "consumer": { + "consumer_id": "c1", + "consumer_name": "T", + "purpose_keys": ["x"], + }, + "datasets": {"d1": {"dataset_key": "d1", "purpose_keys": ["y"]}}, + } + ) result = runner.invoke(pbac, ["evaluate-purpose"], input=input_json) assert result.exit_code == 0 json.loads(result.output) # should not raise @@ -177,22 +191,24 @@ def test_output_is_valid_json(self, runner): class TestEvaluatePoliciesCLI: def test_allow(self, runner): - input_json = json.dumps({ - "policies": [ - { - "key": "allow-marketing", - "priority": 100, - "enabled": True, - "decision": "ALLOW", - "match": {"data_use": {"any": ["marketing"]}}, + input_json = json.dumps( + { + "policies": [ + { + "key": "allow-marketing", + "priority": 100, + "enabled": True, + "decision": "ALLOW", + "match": {"data_use": {"any": ["marketing"]}}, + }, + ], + "request": { + "consumer_id": "c1", + "consumer_name": "Marketing", + "data_uses": ["marketing.advertising"], }, - ], - "request": { - "consumer_id": "c1", - "consumer_name": "Marketing", - "data_uses": ["marketing.advertising"], - }, - }) + } + ) result = runner.invoke(pbac, ["evaluate-policies"], input=input_json) @@ -202,19 +218,21 @@ def test_allow(self, runner): assert output["decisive_policy_key"] == "allow-marketing" def test_deny_with_action(self, runner): - input_json = json.dumps({ - "policies": [ - { - "key": "deny-all", - "priority": 0, - "enabled": True, - "decision": "DENY", - "match": {}, - "action": {"message": "Access denied"}, - }, - ], - "request": {"consumer_id": "c1", "consumer_name": "Anyone"}, - }) + input_json = json.dumps( + { + "policies": [ + { + "key": "deny-all", + "priority": 0, + "enabled": True, + "decision": "DENY", + "match": {}, + "action": {"message": "Access denied"}, + }, + ], + "request": {"consumer_id": "c1", "consumer_name": "Anyone"}, + } + ) result = runner.invoke(pbac, ["evaluate-policies"], input=input_json) @@ -224,10 +242,12 @@ def test_deny_with_action(self, runner): assert output["action"]["message"] == "Access denied" def test_no_decision(self, runner): - input_json = json.dumps({ - "policies": [], - "request": {"consumer_id": "c1", "consumer_name": "Test"}, - }) + input_json = json.dumps( + { + "policies": [], + "request": {"consumer_id": "c1", "consumer_name": "Test"}, + } + ) result = runner.invoke(pbac, ["evaluate-policies"], input=input_json) @@ -236,28 +256,30 @@ def test_no_decision(self, runner): assert output["decision"] == "NO_DECISION" def test_unless_inverts(self, runner): - input_json = json.dumps({ - "policies": [ - { - "key": "allow-unless-optout", - "priority": 100, - "enabled": True, - "decision": "ALLOW", - "match": {"data_use": {"any": ["marketing"]}}, - "unless": [ - { - "type": "consent", - "privacy_notice_key": "do_not_sell", - "requirement": "opt_out", - } - ], + input_json = json.dumps( + { + "policies": [ + { + "key": "allow-unless-optout", + "priority": 100, + "enabled": True, + "decision": "ALLOW", + "match": {"data_use": {"any": ["marketing"]}}, + "unless": [ + { + "type": "consent", + "privacy_notice_key": "do_not_sell", + "requirement": "opt_out", + } + ], + }, + ], + "request": { + "data_uses": ["marketing.advertising"], + "context": {"consent": {"do_not_sell": "opt_out"}}, }, - ], - "request": { - "data_uses": ["marketing.advertising"], - "context": {"consent": {"do_not_sell": "opt_out"}}, - }, - }) + } + ) result = runner.invoke(pbac, ["evaluate-policies"], input=input_json) @@ -267,19 +289,27 @@ def test_unless_inverts(self, runner): assert output["unless_triggered"] is True def test_priority_ordering(self, runner): - input_json = json.dumps({ - "policies": [ - {"key": "low-allow", "priority": 10, "enabled": True, "decision": "ALLOW", "match": {}}, - { - "key": "high-deny", - "priority": 200, - "enabled": True, - "decision": "DENY", - "match": {}, - }, - ], - "request": {"data_uses": ["marketing"]}, - }) + input_json = json.dumps( + { + "policies": [ + { + "key": "low-allow", + "priority": 10, + "enabled": True, + "decision": "ALLOW", + "match": {}, + }, + { + "key": "high-deny", + "priority": 200, + "enabled": True, + "decision": "DENY", + "match": {}, + }, + ], + "request": {"data_uses": ["marketing"]}, + } + ) result = runner.invoke(pbac, ["evaluate-policies"], input=input_json) From 5d7980c34b9e4cd68151d441e149bd6b87242f67 Mon Sep 17 00:00:00 2001 From: Thabo Fletcher Date: Tue, 14 Apr 2026 15:40:31 -0700 Subject: [PATCH 09/14] Add changelog entry for #7926 --- changelog/7926-go-pbac-policy-engine.yaml | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 changelog/7926-go-pbac-policy-engine.yaml diff --git a/changelog/7926-go-pbac-policy-engine.yaml b/changelog/7926-go-pbac-policy-engine.yaml new file mode 100644 index 00000000000..0d6662017e9 --- /dev/null +++ b/changelog/7926-go-pbac-policy-engine.yaml @@ -0,0 +1,4 @@ +type: Added +description: Go PBAC policy engine library for high-throughput evaluation, plus `fides pbac` CLI commands for purpose and access policy evaluation +pr: 7926 +labels: [] From cb0c34e7af35d82a9b50abaf77d422124600eb46 Mon Sep 17 00:00:00 2001 From: Thabo Fletcher Date: Tue, 14 Apr 2026 15:54:47 -0700 Subject: [PATCH 10/14] Address architecture review: implement AccessPolicyEvaluator Protocol MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Critical fixes: - evaluate.py now uses typed dataclasses (ParsedPolicy, AccessEvaluationRequest, PolicyEvaluationResult) instead of raw dicts - InProcessAccessPolicyEvaluator conforms to the existing AccessPolicyEvaluator Protocol from interface.py - JSON conversion (parsed_policy_from_dict, request_from_dict, result_to_dict) pushed to CLI boundary, not inside the service - CLI evaluate-policies now constructs typed objects from JSON, matching the pattern used by evaluate-purpose - Lazy import removed — all imports at module level Significant fixes: - CI: add go mod tidy verification step - Go: document EvaluatePoliciesRequest/EvaluatePurposeRequest as sidecar types (used by fidesplus companion PR) - Go + Python: add priority tie-breaking test (stable sort preserves insertion order) 55 Go tests + 51 Python tests, all passing. --- .github/workflows/policy_engine_checks.yml | 3 + policy-engine/pkg/pbac/edge_cases_test.go | 15 + policy-engine/pkg/pbac/policy_types.go | 3 +- policy-engine/pkg/pbac/types.go | 3 +- src/fides/cli/commands/pbac.py | 22 +- src/fides/service/pbac/policies/evaluate.py | 265 +++-- .../policies/test_evaluate_access_policies.py | 905 +++++++++--------- 7 files changed, 669 insertions(+), 547 deletions(-) diff --git a/.github/workflows/policy_engine_checks.yml b/.github/workflows/policy_engine_checks.yml index 894bdf3d48f..8d106ac14fe 100644 --- a/.github/workflows/policy_engine_checks.yml +++ b/.github/workflows/policy_engine_checks.yml @@ -36,6 +36,9 @@ jobs: go-version: "1.23" cache-dependency-path: policy-engine/go.mod + - name: Verify module + run: go mod tidy && git diff --exit-code go.mod go.sum + - name: Build run: go build ./... diff --git a/policy-engine/pkg/pbac/edge_cases_test.go b/policy-engine/pkg/pbac/edge_cases_test.go index 07e1aab5323..9f231119150 100644 --- a/policy-engine/pkg/pbac/edge_cases_test.go +++ b/policy-engine/pkg/pbac/edge_cases_test.go @@ -619,3 +619,18 @@ func TestPolicy_EnabledDefaultsToTrue(t *testing.T) { t.Errorf("expected DENY (nil enabled = active), got %s", result.Decision) } } + +func TestPolicy_PriorityTie_PreservesInsertionOrder(t *testing.T) { + // SliceStable preserves original order for equal priorities. + policies := []AccessPolicy{ + {ID: "p1", Key: "first-allow", Priority: 100, Enabled: boolPtr(true), Decision: PolicyAllow, Match: MatchBlock{}}, + {ID: "p2", Key: "second-deny", Priority: 100, Enabled: boolPtr(true), Decision: PolicyDeny, Match: MatchBlock{}}, + } + result := EvaluatePolicies(policies, &AccessEvaluationRequest{}) + if result.Decision != PolicyAllow { + t.Errorf("expected ALLOW (first in insertion order), got %s", result.Decision) + } + if result.DecisivePolicyKey == nil || *result.DecisivePolicyKey != "first-allow" { + t.Errorf("expected decisive policy 'first-allow'") + } +} diff --git a/policy-engine/pkg/pbac/policy_types.go b/policy-engine/pkg/pbac/policy_types.go index 4c07b677281..f6670b4a938 100644 --- a/policy-engine/pkg/pbac/policy_types.go +++ b/policy-engine/pkg/pbac/policy_types.go @@ -117,7 +117,8 @@ type PolicyEvaluationResult struct { Reason *string `json:"reason,omitempty"` } -// EvaluatePoliciesRequest is the JSON request body for the policy evaluation endpoint. +// EvaluatePoliciesRequest is the JSON request body used by the fidesplus +// sidecar HTTP handler for POST /v1/evaluate-policies. type EvaluatePoliciesRequest struct { Policies []AccessPolicy `json:"policies"` Request AccessEvaluationRequest `json:"request"` diff --git a/policy-engine/pkg/pbac/types.go b/policy-engine/pkg/pbac/types.go index a16ebf15378..a55ba1f9bd4 100644 --- a/policy-engine/pkg/pbac/types.go +++ b/policy-engine/pkg/pbac/types.go @@ -85,7 +85,8 @@ type PurposeEvaluationResult struct { TotalAccesses int `json:"total_accesses"` } -// EvaluatePurposeRequest is the JSON request body for purpose evaluation. +// EvaluatePurposeRequest is the JSON request body used by the fidesplus +// sidecar HTTP handler for POST /v1/evaluate-purpose. type EvaluatePurposeRequest struct { Consumer ConsumerPurposes `json:"consumer"` Datasets map[string]DatasetPurposes `json:"datasets"` diff --git a/src/fides/cli/commands/pbac.py b/src/fides/cli/commands/pbac.py index 2942552b5e9..a10444a0026 100644 --- a/src/fides/cli/commands/pbac.py +++ b/src/fides/cli/commands/pbac.py @@ -9,11 +9,18 @@ import json import sys +from dataclasses import asdict from typing import TextIO import rich_click as click from fides.service.pbac.evaluate import evaluate_purpose +from fides.service.pbac.policies.evaluate import ( + evaluate_policies, + parsed_policy_from_dict, + request_from_dict, + result_to_dict, +) from fides.service.pbac.types import ConsumerPurposes, DatasetPurposes @@ -140,11 +147,6 @@ def evaluate_policies_cmd(input_file: TextIO) -> None: "context": {"consent": {"do_not_sell": "opt_out"}} } } - - \b - This is the same evaluation the Go sidecar performs at API speed. - The CLI runs it through Python for convenience — use the sidecar - for production throughput. """ try: data = json.load(input_file) @@ -152,11 +154,9 @@ def evaluate_policies_cmd(input_file: TextIO) -> None: click.echo(f"Error parsing JSON: {e}", err=True) sys.exit(1) - from fides.service.pbac.policies.evaluate import evaluate_access_policies - - policies = data.get("policies", []) - request = data.get("request", {}) + policies = [parsed_policy_from_dict(p) for p in data.get("policies", [])] + request = request_from_dict(data.get("request", {})) - result = evaluate_access_policies(policies, request) + result = evaluate_policies(policies, request) - click.echo(json.dumps(result, indent=2)) + click.echo(json.dumps(result_to_dict(result), indent=2)) diff --git a/src/fides/service/pbac/policies/evaluate.py b/src/fides/service/pbac/policies/evaluate.py index f5c6b2d8db9..1d765027cea 100644 --- a/src/fides/service/pbac/policies/evaluate.py +++ b/src/fides/service/pbac/policies/evaluate.py @@ -1,8 +1,11 @@ """Access Policy v2 evaluation engine — Python implementation. +Implements the AccessPolicyEvaluator Protocol defined in interface.py, +using the existing typed dataclasses for inputs and outputs. + Mirrors the Go implementation in policy-engine/pkg/pbac/policy_evaluate.go. -Used by the CLI (fides pbac evaluate-policies) and as the reference -implementation. The Go sidecar is the production path for API throughput. +The Go sidecar is the production path for API throughput; this Python +implementation is used by the CLI and as the in-process fallback. Algorithm (from IMPLEMENTATION_GUIDE.md): 1. Sort enabled policies by priority (highest first) @@ -16,97 +19,215 @@ from __future__ import annotations +from dataclasses import dataclass, field from typing import Any +from fides.service.pbac.policies.interface import ( + AccessEvaluationRequest, + EvaluatedPolicyInfo, + PolicyAction, + PolicyDecision, + PolicyEvaluationResult, +) + + +# ── Policy representation (parsed from YAML + DB metadata) ──────────── + + +@dataclass +class ParsedPolicy: + """A policy ready for evaluation. + + Constructed from the DB entity + parsed YAML by the service layer + or from JSON by the CLI. + """ + + key: str + priority: int = 0 + enabled: bool = True + decision: str = "DENY" # "ALLOW" or "DENY" + match: dict[str, Any] = field(default_factory=dict) + unless: list[dict[str, Any]] = field(default_factory=list) + action: PolicyAction | None = None -def evaluate_access_policies( - policies: list[dict[str, Any]], - request: dict[str, Any], -) -> dict[str, Any]: - """Evaluate a list of access policies against a request. - Takes and returns plain dicts for easy JSON round-tripping from the CLI. +# ── Protocol-conformant evaluator ───────────────────────────────────── + + +class InProcessAccessPolicyEvaluator: + """Evaluates access policies in-process using the Python engine. + + Conforms to the AccessPolicyEvaluator Protocol from interface.py. + Injected into InProcessPBACEvaluationService or + SidecarPBACEvaluationService as the policy_evaluator. """ - enabled = [p for p in policies if p.get("enabled", True)] - enabled.sort(key=lambda p: p.get("priority", 0), reverse=True) - evaluated: list[dict[str, Any]] = [] + def __init__(self, policies: list[ParsedPolicy] | None = None) -> None: + self._policies = policies or [] + + def set_policies(self, policies: list[ParsedPolicy]) -> None: + """Update the policy set (e.g., after a reload from DB).""" + self._policies = policies + + def evaluate(self, request: AccessEvaluationRequest) -> PolicyEvaluationResult: + """Evaluate access policies against a PBAC violation.""" + return evaluate_policies(self._policies, request) + + +# ── Core evaluation function ────────────────────────────────────────── + + +def evaluate_policies( + policies: list[ParsedPolicy], + request: AccessEvaluationRequest, +) -> PolicyEvaluationResult: + """Evaluate a list of parsed policies against a typed request. + + This is the pure evaluation function — no I/O, no DB access. + """ + enabled = [p for p in policies if p.enabled] + enabled.sort(key=lambda p: p.priority, reverse=True) + + evaluated: list[EvaluatedPolicyInfo] = [] for policy in enabled: - if not _matches_request(policy.get("match", {}), request): + if not _matches_request(policy.match, request): continue - unless_triggered = _evaluate_unless(policy.get("unless", []), request) - decision = policy.get("decision", "DENY") + unless_triggered = _evaluate_unless(policy.unless, request) if unless_triggered: - if decision == "ALLOW": + if policy.decision == "ALLOW": evaluated.append( - { - "policy_key": policy.get("key"), - "priority": policy.get("priority"), - "matched": True, - "result": "DENY", - "unless_triggered": True, - } + EvaluatedPolicyInfo( + policy_key=policy.key, + priority=policy.priority, + matched=True, + result="DENY", + unless_triggered=True, + ) + ) + return PolicyEvaluationResult( + decision=PolicyDecision.DENY, + decisive_policy_key=policy.key, + decisive_policy_priority=policy.priority, + unless_triggered=True, + action=policy.action, + evaluated_policies=evaluated, ) - return { - "decision": "DENY", - "decisive_policy_key": policy.get("key"), - "decisive_policy_priority": policy.get("priority"), - "unless_triggered": True, - "action": policy.get("action"), - "evaluated_policies": evaluated, - } # DENY suppressed evaluated.append( - { - "policy_key": policy.get("key"), - "priority": policy.get("priority"), - "matched": True, - "result": "SUPPRESSED", - "unless_triggered": True, - } + EvaluatedPolicyInfo( + policy_key=policy.key, + priority=policy.priority, + matched=True, + result="SUPPRESSED", + unless_triggered=True, + ) ) continue # Decision stands - action = policy.get("action") if decision == "DENY" else None + action = policy.action if policy.decision == "DENY" else None evaluated.append( - { - "policy_key": policy.get("key"), - "priority": policy.get("priority"), - "matched": True, - "result": decision, - "unless_triggered": False, - } + EvaluatedPolicyInfo( + policy_key=policy.key, + priority=policy.priority, + matched=True, + result=policy.decision, + unless_triggered=False, + ) + ) + return PolicyEvaluationResult( + decision=PolicyDecision(policy.decision), + decisive_policy_key=policy.key, + decisive_policy_priority=policy.priority, + unless_triggered=False, + action=action, + evaluated_policies=evaluated, ) - return { - "decision": decision, - "decisive_policy_key": policy.get("key"), - "decisive_policy_priority": policy.get("priority"), - "unless_triggered": False, - "action": action, - "evaluated_policies": evaluated, - } - return { - "decision": "NO_DECISION", - "evaluated_policies": evaluated, + return PolicyEvaluationResult( + decision=PolicyDecision.NO_DECISION, + evaluated_policies=evaluated, + ) + + +# ── JSON conversion helpers (CLI boundary) ──────────────────────────── + + +def parsed_policy_from_dict(data: dict[str, Any]) -> ParsedPolicy: + """Construct a ParsedPolicy from a JSON dict (CLI/API boundary).""" + action_data = data.get("action") + action = PolicyAction(message=action_data.get("message")) if action_data else None + return ParsedPolicy( + key=data.get("key", ""), + priority=data.get("priority", 0), + enabled=data.get("enabled", True), + decision=data.get("decision", "DENY"), + match=data.get("match", {}), + unless=data.get("unless", []), + action=action, + ) + + +def request_from_dict(data: dict[str, Any]) -> AccessEvaluationRequest: + """Construct an AccessEvaluationRequest from a JSON dict (CLI/API boundary).""" + return AccessEvaluationRequest( + consumer_id=data.get("consumer_id", ""), + consumer_name=data.get("consumer_name", ""), + consumer_purposes=frozenset(data.get("consumer_purposes", [])), + dataset_key=data.get("dataset_key", ""), + dataset_purposes=frozenset(data.get("dataset_purposes", [])), + collection=data.get("collection"), + system_fides_key=data.get("system_fides_key"), + data_uses=tuple(data.get("data_uses", [])), + data_categories=tuple(data.get("data_categories", [])), + data_subjects=tuple(data.get("data_subjects", [])), + context=data.get("context", {}), + ) + + +def result_to_dict(result: PolicyEvaluationResult) -> dict[str, Any]: + """Serialize a PolicyEvaluationResult to a JSON-safe dict (CLI boundary).""" + output: dict[str, Any] = { + "decision": result.decision.value, } + if result.decisive_policy_key is not None: + output["decisive_policy_key"] = result.decisive_policy_key + if result.decisive_policy_priority is not None: + output["decisive_policy_priority"] = result.decisive_policy_priority + output["unless_triggered"] = result.unless_triggered + if result.action: + output["action"] = {"message": result.action.message} + else: + output["action"] = None + output["evaluated_policies"] = [ + { + "policy_key": ep.policy_key, + "priority": ep.priority, + "matched": ep.matched, + "result": ep.result, + "unless_triggered": ep.unless_triggered, + } + for ep in result.evaluated_policies + ] + return output + + +# ── Match evaluation (private) ──────────────────────────────────────── -def _matches_request(match: dict[str, Any], request: dict[str, Any]) -> bool: +def _matches_request(match: dict[str, Any], request: AccessEvaluationRequest) -> bool: """Check if a policy's match block applies to the request.""" - for dimension, field in [ - ("data_use", "data_uses"), - ("data_category", "data_categories"), - ("data_subject", "data_subjects"), + for dimension, values in [ + ("data_use", request.data_uses), + ("data_category", request.data_categories), + ("data_subject", request.data_subjects), ]: dim = match.get(dimension) if dim is not None: - values = request.get(field, []) - if not _matches_dimension(dim, values): + if not _matches_dimension(dim, list(values)): return False return True @@ -127,15 +248,12 @@ def _matches_dimension(dim: dict[str, Any], request_values: list[str]) -> bool: def _taxonomy_matches_any(match_key: str, request_values: list[str]) -> bool: - """Check if a taxonomy key matches any request value via prefix matching.""" return any(_taxonomy_match(match_key, rv) for rv in request_values) def _taxonomy_match(match_key: str, request_value: str) -> bool: - """Check if match_key equals or is a parent of request_value. + """Taxonomy prefix match with dot boundary guard. - "user.contact" matches "user.contact.email" (prefix + dot boundary). - "user" does NOT match "user_data". Empty match_key never matches — prevents accidental catch-all. """ if not match_key: @@ -145,8 +263,11 @@ def _taxonomy_match(match_key: str, request_value: str) -> bool: return request_value.startswith(match_key + ".") +# ── Unless evaluation (private) ─────────────────────────────────────── + + def _evaluate_unless( - constraints: list[dict[str, Any]], request: dict[str, Any] + constraints: list[dict[str, Any]], request: AccessEvaluationRequest ) -> bool: """All constraints must trigger (AND logic) for the unless to fire.""" if not constraints: @@ -154,10 +275,11 @@ def _evaluate_unless( return all(_evaluate_constraint(c, request) for c in constraints) -def _evaluate_constraint(constraint: dict[str, Any], request: dict[str, Any]) -> bool: - """Evaluate a single unless condition.""" +def _evaluate_constraint( + constraint: dict[str, Any], request: AccessEvaluationRequest +) -> bool: ctype = constraint.get("type") - context = request.get("context", {}) + context = request.context if ctype == "consent": return _eval_consent(constraint, context) @@ -218,7 +340,6 @@ def _eval_data_flow(constraint: dict[str, Any], context: dict[str, Any]) -> bool def _resolve_field(context: dict[str, Any], field_path: str) -> str | None: - """Traverse a dotted path in the context dict.""" current: Any = context for part in field_path.split("."): if not isinstance(current, dict): diff --git a/tests/service/pbac/policies/test_evaluate_access_policies.py b/tests/service/pbac/policies/test_evaluate_access_policies.py index cfc25a6645e..ad1317cb7e0 100644 --- a/tests/service/pbac/policies/test_evaluate_access_policies.py +++ b/tests/service/pbac/policies/test_evaluate_access_policies.py @@ -6,254 +6,246 @@ import pytest -from fides.service.pbac.policies.evaluate import evaluate_access_policies +from fides.service.pbac.policies.evaluate import ( + InProcessAccessPolicyEvaluator, + ParsedPolicy, + evaluate_policies, + parsed_policy_from_dict, + request_from_dict, + result_to_dict, +) +from fides.service.pbac.policies.interface import ( + AccessEvaluationRequest, + PolicyAction, + PolicyDecision, +) + + +def req(**kwargs) -> AccessEvaluationRequest: + """Shorthand for building AccessEvaluationRequest with defaults.""" + defaults = { + "consumer_id": "", + "consumer_name": "", + "consumer_purposes": frozenset(), + "dataset_key": "", + "dataset_purposes": frozenset(), + } + defaults.update(kwargs) + return AccessEvaluationRequest(**defaults) class TestPriorityOrdering: def test_highest_priority_wins(self): policies = [ - { - "key": "low-allow", - "priority": 10, - "enabled": True, - "decision": "ALLOW", - "match": {}, - }, - { - "key": "high-deny", - "priority": 200, - "enabled": True, - "decision": "DENY", - "match": {}, - "action": {"message": "Highest priority wins"}, - }, + ParsedPolicy(key="low-allow", priority=10, decision="ALLOW"), + ParsedPolicy( + key="high-deny", + priority=200, + decision="DENY", + action=PolicyAction(message="Highest priority wins"), + ), ] - request = {"data_uses": ["marketing"]} - - result = evaluate_access_policies(policies, request) + result = evaluate_policies(policies, req(data_uses=("marketing",))) - assert result["decision"] == "DENY" - assert result["decisive_policy_key"] == "high-deny" + assert result.decision == PolicyDecision.DENY + assert result.decisive_policy_key == "high-deny" def test_allow_when_matched(self): policies = [ - { - "key": "deny-financial", - "priority": 200, - "enabled": True, - "decision": "DENY", - "match": {"data_category": {"any": ["user.financial"]}}, - }, - { - "key": "allow-marketing", - "priority": 100, - "enabled": True, - "decision": "ALLOW", - "match": {"data_use": {"any": ["marketing"]}}, - }, + ParsedPolicy( + key="deny-financial", + priority=200, + decision="DENY", + match={"data_category": {"any": ["user.financial"]}}, + ), + ParsedPolicy( + key="allow-marketing", + priority=100, + decision="ALLOW", + match={"data_use": {"any": ["marketing"]}}, + ), ] - request = { - "data_uses": ["marketing.advertising"], - "data_categories": ["user.contact.email"], - } - - result = evaluate_access_policies(policies, request) + result = evaluate_policies( + policies, + req( + data_uses=("marketing.advertising",), + data_categories=("user.contact.email",), + ), + ) - assert result["decision"] == "ALLOW" - assert result["decisive_policy_key"] == "allow-marketing" + assert result.decision == PolicyDecision.ALLOW + assert result.decisive_policy_key == "allow-marketing" def test_catch_all_deny(self): policies = [ - { - "key": "catch-all", - "priority": 0, - "enabled": True, - "decision": "DENY", - "match": {}, - "action": {"message": "Default deny"}, - }, + ParsedPolicy( + key="catch-all", + priority=0, + decision="DENY", + action=PolicyAction(message="Default deny"), + ), ] - result = evaluate_access_policies(policies, {"data_uses": ["essential"]}) + result = evaluate_policies(policies, req(data_uses=("essential",))) - assert result["decision"] == "DENY" - assert result["decisive_policy_key"] == "catch-all" - assert result["action"]["message"] == "Default deny" + assert result.decision == PolicyDecision.DENY + assert result.decisive_policy_key == "catch-all" + assert result.action is not None + assert result.action.message == "Default deny" class TestNoDecision: def test_empty_policies(self): - result = evaluate_access_policies([], {}) - assert result["decision"] == "NO_DECISION" + result = evaluate_policies([], req()) + assert result.decision == PolicyDecision.NO_DECISION def test_disabled_policies_skipped(self): policies = [ - { - "key": "disabled", - "priority": 100, - "enabled": False, - "decision": "DENY", - "match": {}, - }, + ParsedPolicy(key="disabled", priority=100, enabled=False, decision="DENY") ] - result = evaluate_access_policies(policies, {}) - assert result["decision"] == "NO_DECISION" + result = evaluate_policies(policies, req()) + assert result.decision == PolicyDecision.NO_DECISION def test_no_match(self): policies = [ - { - "key": "deny-financial", - "priority": 100, - "enabled": True, - "decision": "DENY", - "match": {"data_category": {"any": ["user.financial"]}}, - }, + ParsedPolicy( + key="deny-financial", + priority=100, + decision="DENY", + match={"data_category": {"any": ["user.financial"]}}, + ), ] - result = evaluate_access_policies( - policies, {"data_categories": ["system.operations"]} + result = evaluate_policies( + policies, req(data_categories=("system.operations",)) ) - assert result["decision"] == "NO_DECISION" + assert result.decision == PolicyDecision.NO_DECISION class TestTaxonomyMatching: def test_parent_matches_child(self): policies = [ - { - "key": "deny-user", - "priority": 100, - "enabled": True, - "decision": "DENY", - "match": {"data_category": {"any": ["user"]}}, - }, + ParsedPolicy( + key="deny-user", + priority=100, + decision="DENY", + match={"data_category": {"any": ["user"]}}, + ), ] - result = evaluate_access_policies( - policies, {"data_categories": ["user.contact.email"]} + result = evaluate_policies( + policies, req(data_categories=("user.contact.email",)) ) - assert result["decision"] == "DENY" + assert result.decision == PolicyDecision.DENY def test_child_does_not_match_parent(self): policies = [ - { - "key": "deny-child", - "priority": 100, - "enabled": True, - "decision": "DENY", - "match": {"data_category": {"any": ["user.contact.email"]}}, - }, + ParsedPolicy( + key="deny-child", + priority=100, + decision="DENY", + match={"data_category": {"any": ["user.contact.email"]}}, + ), ] - result = evaluate_access_policies( - policies, {"data_categories": ["user.contact"]} - ) - assert result["decision"] == "NO_DECISION" + result = evaluate_policies(policies, req(data_categories=("user.contact",))) + assert result.decision == PolicyDecision.NO_DECISION def test_no_dot_boundary_false_positive(self): policies = [ - { - "key": "deny-user", - "priority": 100, - "enabled": True, - "decision": "DENY", - "match": {"data_category": {"any": ["user"]}}, - }, + ParsedPolicy( + key="deny-user", + priority=100, + decision="DENY", + match={"data_category": {"any": ["user"]}}, + ), ] - result = evaluate_access_policies(policies, {"data_categories": ["user_data"]}) - assert result["decision"] == "NO_DECISION" + result = evaluate_policies(policies, req(data_categories=("user_data",))) + assert result.decision == PolicyDecision.NO_DECISION def test_match_all_requires_every_value(self): policies = [ - { - "key": "require-both", - "priority": 100, - "enabled": True, - "decision": "DENY", - "match": { - "data_category": { - "all": ["user.contact", "user.financial"], - } - }, - }, + ParsedPolicy( + key="require-both", + priority=100, + decision="DENY", + match={"data_category": {"all": ["user.contact", "user.financial"]}}, + ), ] - # Only one → no match - result1 = evaluate_access_policies( - policies, {"data_categories": ["user.contact.email"]} + result1 = evaluate_policies( + policies, req(data_categories=("user.contact.email",)) ) - assert result1["decision"] == "NO_DECISION" + assert result1.decision == PolicyDecision.NO_DECISION - # Both → match - result2 = evaluate_access_policies( + result2 = evaluate_policies( policies, - {"data_categories": ["user.contact.email", "user.financial.bank_account"]}, + req(data_categories=("user.contact.email", "user.financial.bank_account")), ) - assert result2["decision"] == "DENY" + assert result2.decision == PolicyDecision.DENY class TestUnlessConsent: def test_opt_out_inverts_allow(self): policies = [ - { - "key": "allow-unless-optout", - "priority": 100, - "enabled": True, - "decision": "ALLOW", - "match": {"data_use": {"any": ["marketing"]}}, - "unless": [ + ParsedPolicy( + key="allow-unless-optout", + priority=100, + decision="ALLOW", + match={"data_use": {"any": ["marketing"]}}, + unless=[ { "type": "consent", "privacy_notice_key": "do_not_sell", "requirement": "opt_out", } ], - "action": {"message": "User opted out"}, - }, + action=PolicyAction(message="User opted out"), + ), ] - request = { - "data_uses": ["marketing.advertising"], - "context": {"consent": {"do_not_sell": "opt_out"}}, - } - - result = evaluate_access_policies(policies, request) + result = evaluate_policies( + policies, + req( + data_uses=("marketing.advertising",), + context={"consent": {"do_not_sell": "opt_out"}}, + ), + ) - assert result["decision"] == "DENY" - assert result["unless_triggered"] is True + assert result.decision == PolicyDecision.DENY + assert result.unless_triggered is True def test_consent_not_triggered_allow_stands(self): policies = [ - { - "key": "allow-unless-optout", - "priority": 100, - "enabled": True, - "decision": "ALLOW", - "match": {"data_use": {"any": ["marketing"]}}, - "unless": [ + ParsedPolicy( + key="allow-unless-optout", + priority=100, + decision="ALLOW", + match={"data_use": {"any": ["marketing"]}}, + unless=[ { "type": "consent", "privacy_notice_key": "do_not_sell", "requirement": "opt_out", } ], - }, + ), ] - request = { - "data_uses": ["marketing.advertising"], - "context": {"consent": {"do_not_sell": "opt_in"}}, - } - - result = evaluate_access_policies(policies, request) - assert result["decision"] == "ALLOW" - assert result["unless_triggered"] is False + result = evaluate_policies( + policies, + req( + data_uses=("marketing.advertising",), + context={"consent": {"do_not_sell": "opt_in"}}, + ), + ) + assert result.decision == PolicyDecision.ALLOW + assert result.unless_triggered is False class TestUnlessGeo: def test_deny_suppressed_continues_to_next(self): policies = [ - { - "key": "deny-unless-geo", - "priority": 200, - "enabled": True, - "decision": "DENY", - "match": {}, - "unless": [ + ParsedPolicy( + key="deny-unless-geo", + priority=200, + decision="DENY", + unless=[ { "type": "geo_location", "field": "environment.geo_location", @@ -261,38 +253,28 @@ def test_deny_suppressed_continues_to_next(self): "values": ["US-CA"], } ], - }, - { - "key": "fallback-allow", - "priority": 100, - "enabled": True, - "decision": "ALLOW", - "match": {}, - }, + ), + ParsedPolicy(key="fallback-allow", priority=100, decision="ALLOW"), ] - request = { - "context": {"environment": {"geo_location": "US-CA"}}, - } - - result = evaluate_access_policies(policies, request) + result = evaluate_policies( + policies, req(context={"environment": {"geo_location": "US-CA"}}) + ) - assert result["decision"] == "ALLOW" - assert result["decisive_policy_key"] == "fallback-allow" - assert len(result["evaluated_policies"]) == 2 - assert result["evaluated_policies"][0]["result"] == "SUPPRESSED" - assert result["evaluated_policies"][1]["result"] == "ALLOW" + assert result.decision == PolicyDecision.ALLOW + assert result.decisive_policy_key == "fallback-allow" + assert len(result.evaluated_policies) == 2 + assert result.evaluated_policies[0].result == "SUPPRESSED" + assert result.evaluated_policies[1].result == "ALLOW" class TestUnlessDataFlow: def test_egress_triggers_unless(self): policies = [ - { - "key": "allow-unless-egress", - "priority": 100, - "enabled": True, - "decision": "ALLOW", - "match": {}, - "unless": [ + ParsedPolicy( + key="allow-unless-egress", + priority=100, + decision="ALLOW", + unless=[ { "type": "data_flow", "direction": "egress", @@ -300,28 +282,23 @@ def test_egress_triggers_unless(self): "systems": ["external_vendor"], } ], - }, + ), ] - request = { - "context": { - "data_flows": {"egress": ["external_vendor", "partner_api"]}, - }, - } - - result = evaluate_access_policies(policies, request) - assert result["decision"] == "DENY" + result = evaluate_policies( + policies, + req(context={"data_flows": {"egress": ["external_vendor", "partner_api"]}}), + ) + assert result.decision == PolicyDecision.DENY class TestUnlessMultipleConstraints: def test_all_must_trigger(self): policies = [ - { - "key": "allow-unless-both", - "priority": 100, - "enabled": True, - "decision": "ALLOW", - "match": {}, - "unless": [ + ParsedPolicy( + key="allow-unless-both", + priority=100, + decision="ALLOW", + unless=[ { "type": "consent", "privacy_notice_key": "do_not_sell", @@ -334,206 +311,177 @@ def test_all_must_trigger(self): "values": ["US-CA"], }, ], - }, + ), ] - # Only consent triggers → ALLOW stands - result1 = evaluate_access_policies( + result1 = evaluate_policies( policies, - { - "context": { + req( + context={ "consent": {"do_not_sell": "opt_out"}, "environment": {"geo_location": "US-NY"}, - }, - }, + } + ), ) - assert result1["decision"] == "ALLOW" + assert result1.decision == PolicyDecision.ALLOW - # Both trigger → DENY - result2 = evaluate_access_policies( + result2 = evaluate_policies( policies, - { - "context": { + req( + context={ "consent": {"do_not_sell": "opt_out"}, "environment": {"geo_location": "US-CA"}, - }, - }, + } + ), ) - assert result2["decision"] == "DENY" + assert result2.decision == PolicyDecision.DENY class TestMatchDataSubject: def test_data_subject_matches(self): policies = [ - { - "key": "deny-employee", - "priority": 100, - "enabled": True, - "decision": "DENY", - "match": {"data_subject": {"any": ["employee"]}}, - }, + ParsedPolicy( + key="deny-employee", + priority=100, + decision="DENY", + match={"data_subject": {"any": ["employee"]}}, + ), ] - result = evaluate_access_policies(policies, {"data_subjects": ["employee"]}) - assert result["decision"] == "DENY" + result = evaluate_policies(policies, req(data_subjects=("employee",))) + assert result.decision == PolicyDecision.DENY def test_data_subject_no_match(self): policies = [ - { - "key": "deny-employee", - "priority": 100, - "enabled": True, - "decision": "DENY", - "match": {"data_subject": {"any": ["employee"]}}, - }, + ParsedPolicy( + key="deny-employee", + priority=100, + decision="DENY", + match={"data_subject": {"any": ["employee"]}}, + ), ] - result = evaluate_access_policies(policies, {"data_subjects": ["customer"]}) - assert result["decision"] == "NO_DECISION" + result = evaluate_policies(policies, req(data_subjects=("customer",))) + assert result.decision == PolicyDecision.NO_DECISION def test_three_dimensions_all_must_match(self): policies = [ - { - "key": "specific", - "priority": 100, - "enabled": True, - "decision": "DENY", - "match": { + ParsedPolicy( + key="specific", + priority=100, + decision="DENY", + match={ "data_use": {"any": ["marketing"]}, "data_category": {"any": ["user.contact"]}, "data_subject": {"any": ["customer"]}, }, - }, + ), ] - # All three match - result = evaluate_access_policies( + result = evaluate_policies( policies, - { - "data_uses": ["marketing.advertising"], - "data_categories": ["user.contact.email"], - "data_subjects": ["customer"], - }, + req( + data_uses=("marketing.advertising",), + data_categories=("user.contact.email",), + data_subjects=("customer",), + ), ) - assert result["decision"] == "DENY" + assert result.decision == PolicyDecision.DENY - # Subject doesn't match - result2 = evaluate_access_policies( + result2 = evaluate_policies( policies, - { - "data_uses": ["marketing.advertising"], - "data_categories": ["user.contact.email"], - "data_subjects": ["employee"], - }, + req( + data_uses=("marketing.advertising",), + data_categories=("user.contact.email",), + data_subjects=("employee",), + ), ) - assert result2["decision"] == "NO_DECISION" + assert result2.decision == PolicyDecision.NO_DECISION class TestMatchCombined: def test_any_and_all_on_different_dimensions(self): policies = [ - { - "key": "combined", - "priority": 100, - "enabled": True, - "decision": "DENY", - "match": { + ParsedPolicy( + key="combined", + priority=100, + decision="DENY", + match={ "data_use": {"any": ["marketing"]}, "data_category": {"all": ["user.contact", "user.financial"]}, }, - }, + ), ] - # Both categories present → match - result = evaluate_access_policies( + result = evaluate_policies( policies, - { - "data_uses": ["marketing.advertising"], - "data_categories": [ - "user.contact.email", - "user.financial.bank_account", - ], - }, + req( + data_uses=("marketing.advertising",), + data_categories=("user.contact.email", "user.financial.bank_account"), + ), ) - assert result["decision"] == "DENY" + assert result.decision == PolicyDecision.DENY - # Only one category → no match - result2 = evaluate_access_policies( + result2 = evaluate_policies( policies, - { - "data_uses": ["marketing.advertising"], - "data_categories": ["user.contact.email"], - }, + req( + data_uses=("marketing.advertising",), + data_categories=("user.contact.email",), + ), ) - assert result2["decision"] == "NO_DECISION" + assert result2.decision == PolicyDecision.NO_DECISION class TestConsentVariants: def test_not_opt_in(self): policies = [ - { - "key": "allow-unless", - "priority": 100, - "enabled": True, - "decision": "ALLOW", - "match": {}, - "unless": [ + ParsedPolicy( + key="allow-unless", + priority=100, + decision="ALLOW", + unless=[ { "type": "consent", "privacy_notice_key": "n", "requirement": "not_opt_in", - }, + } ], - }, + ), ] - # opt_out → not_opt_in is true → DENY - result = evaluate_access_policies( - policies, {"context": {"consent": {"n": "opt_out"}}} - ) - assert result["decision"] == "DENY" + result = evaluate_policies(policies, req(context={"consent": {"n": "opt_out"}})) + assert result.decision == PolicyDecision.DENY - # opt_in → not_opt_in is false → ALLOW - result2 = evaluate_access_policies( - policies, {"context": {"consent": {"n": "opt_in"}}} - ) - assert result2["decision"] == "ALLOW" + result2 = evaluate_policies(policies, req(context={"consent": {"n": "opt_in"}})) + assert result2.decision == PolicyDecision.ALLOW def test_not_opt_out(self): policies = [ - { - "key": "allow-unless", - "priority": 100, - "enabled": True, - "decision": "ALLOW", - "match": {}, - "unless": [ + ParsedPolicy( + key="allow-unless", + priority=100, + decision="ALLOW", + unless=[ { "type": "consent", "privacy_notice_key": "n", "requirement": "not_opt_out", - }, + } ], - }, + ), ] - # opt_in → not_opt_out is true → DENY - result = evaluate_access_policies( - policies, {"context": {"consent": {"n": "opt_in"}}} - ) - assert result["decision"] == "DENY" + result = evaluate_policies(policies, req(context={"consent": {"n": "opt_in"}})) + assert result.decision == PolicyDecision.DENY - # opt_out → not_opt_out is false → ALLOW - result2 = evaluate_access_policies( - policies, {"context": {"consent": {"n": "opt_out"}}} + result2 = evaluate_policies( + policies, req(context={"consent": {"n": "opt_out"}}) ) - assert result2["decision"] == "ALLOW" + assert result2.decision == PolicyDecision.ALLOW class TestGeoNotIn: def test_not_in_operator(self): policies = [ - { - "key": "deny-unless-outside", - "priority": 100, - "enabled": True, - "decision": "DENY", - "match": {}, - "unless": [ + ParsedPolicy( + key="deny-unless-outside", + priority=100, + decision="DENY", + unless=[ { "type": "geo_location", "field": "environment.geo_location", @@ -541,37 +489,27 @@ def test_not_in_operator(self): "values": ["US-CA"], } ], - }, + ), ] - # In CA → not_in false → unless doesn't trigger → DENY - result = evaluate_access_policies( - policies, - { - "context": {"environment": {"geo_location": "US-CA"}}, - }, + result = evaluate_policies( + policies, req(context={"environment": {"geo_location": "US-CA"}}) ) - assert result["decision"] == "DENY" + assert result.decision == PolicyDecision.DENY - # In DE → not_in true → unless triggers → DENY suppressed → NO_DECISION - result2 = evaluate_access_policies( - policies, - { - "context": {"environment": {"geo_location": "EU-DE"}}, - }, + result2 = evaluate_policies( + policies, req(context={"environment": {"geo_location": "EU-DE"}}) ) - assert result2["decision"] == "NO_DECISION" + assert result2.decision == PolicyDecision.NO_DECISION class TestDataFlowNoneOf: def test_none_of_operator(self): policies = [ - { - "key": "allow-unless", - "priority": 100, - "enabled": True, - "decision": "ALLOW", - "match": {}, - "unless": [ + ParsedPolicy( + key="allow-unless", + priority=100, + decision="ALLOW", + unless=[ { "type": "data_flow", "direction": "egress", @@ -579,85 +517,63 @@ def test_none_of_operator(self): "systems": ["trusted_partner"], } ], - }, + ), ] - # trusted_partner present → none_of false → ALLOW - result = evaluate_access_policies( - policies, - { - "context": {"data_flows": {"egress": ["trusted_partner"]}}, - }, + result = evaluate_policies( + policies, req(context={"data_flows": {"egress": ["trusted_partner"]}}) ) - assert result["decision"] == "ALLOW" + assert result.decision == PolicyDecision.ALLOW - # trusted_partner absent → none_of true → DENY - result2 = evaluate_access_policies( - policies, - { - "context": {"data_flows": {"egress": ["unknown_vendor"]}}, - }, + result2 = evaluate_policies( + policies, req(context={"data_flows": {"egress": ["unknown_vendor"]}}) ) - assert result2["decision"] == "DENY" + assert result2.decision == PolicyDecision.DENY class TestEdgeCases: def test_no_context_unless_does_not_trigger(self): policies = [ - { - "key": "allow-unless", - "priority": 100, - "enabled": True, - "decision": "ALLOW", - "match": {}, - "unless": [ + ParsedPolicy( + key="allow-unless", + priority=100, + decision="ALLOW", + unless=[ { "type": "consent", "privacy_notice_key": "x", "requirement": "opt_out", } ], - }, + ), ] - result = evaluate_access_policies(policies, {}) - assert result["decision"] == "ALLOW" + result = evaluate_policies(policies, req()) + assert result.decision == PolicyDecision.ALLOW def test_deny_action_only_on_deny(self): policies = [ - { - "key": "allow-with-action", - "priority": 100, - "enabled": True, - "decision": "ALLOW", - "match": {}, - "action": {"message": "should not appear"}, - }, + ParsedPolicy( + key="allow-with-action", + priority=100, + decision="ALLOW", + action=PolicyAction(message="should not appear"), + ), ] - result = evaluate_access_policies(policies, {}) - assert result["decision"] == "ALLOW" - assert result.get("action") is None + result = evaluate_policies(policies, req()) + assert result.decision == PolicyDecision.ALLOW + assert result.action is None def test_empty_match_catches_everything(self): - policies = [ - { - "key": "catch-all", - "priority": 1, - "enabled": True, - "decision": "DENY", - "match": {}, - }, - ] - result = evaluate_access_policies(policies, {}) - assert result["decision"] == "DENY" + policies = [ParsedPolicy(key="catch-all", priority=1, decision="DENY")] + result = evaluate_policies(policies, req()) + assert result.decision == PolicyDecision.DENY def test_context_nested_field_resolution(self): policies = [ - { - "key": "deny-unless-nested", - "priority": 100, - "enabled": True, - "decision": "DENY", - "match": {}, - "unless": [ + ParsedPolicy( + key="deny-unless-nested", + priority=100, + decision="DENY", + unless=[ { "type": "geo_location", "field": "a.b.c", @@ -665,47 +581,37 @@ def test_context_nested_field_resolution(self): "values": ["deep_value"], } ], - }, + ), ] - result = evaluate_access_policies( - policies, - { - "context": {"a": {"b": {"c": "deep_value"}}}, - }, + result = evaluate_policies( + policies, req(context={"a": {"b": {"c": "deep_value"}}}) ) - # Unless triggers → DENY suppressed → NO_DECISION - assert result["decision"] == "NO_DECISION" + assert result.decision == PolicyDecision.NO_DECISION def test_taxonomy_empty_key_never_matches(self): policies = [ - { - "key": "empty-key", - "priority": 100, - "enabled": True, - "decision": "DENY", - "match": {"data_use": {"any": [""]}}, - }, + ParsedPolicy( + key="empty-key", + priority=100, + decision="DENY", + match={"data_use": {"any": [""]}}, + ), ] - result = evaluate_access_policies(policies, {"data_uses": ["marketing"]}) - assert result["decision"] == "NO_DECISION" + result = evaluate_policies(policies, req(data_uses=("marketing",))) + assert result.decision == PolicyDecision.NO_DECISION def test_enabled_defaults_to_true(self): - # Policy without "enabled" field should be treated as active - policies = [ - {"key": "no-enabled", "priority": 100, "decision": "DENY", "match": {}}, - ] - result = evaluate_access_policies(policies, {}) - assert result["decision"] == "DENY" + policies = [ParsedPolicy(key="default-enabled", priority=100, decision="DENY")] + result = evaluate_policies(policies, req()) + assert result.decision == PolicyDecision.DENY def test_duplicate_unless_constraints(self): policies = [ - { - "key": "allow-unless-dup", - "priority": 100, - "enabled": True, - "decision": "ALLOW", - "match": {}, - "unless": [ + ParsedPolicy( + key="allow-unless-dup", + priority=100, + decision="ALLOW", + unless=[ { "type": "consent", "privacy_notice_key": "n", @@ -717,44 +623,119 @@ def test_duplicate_unless_constraints(self): "requirement": "opt_out", }, ], - }, + ), ] - # Both trigger → DENY - result = evaluate_access_policies( - policies, {"context": {"consent": {"n": "opt_out"}}} - ) - assert result["decision"] == "DENY" + result = evaluate_policies(policies, req(context={"consent": {"n": "opt_out"}})) + assert result.decision == PolicyDecision.DENY - # Neither triggers → ALLOW - result2 = evaluate_access_policies( - policies, {"context": {"consent": {"n": "opt_in"}}} - ) - assert result2["decision"] == "ALLOW" + result2 = evaluate_policies(policies, req(context={"consent": {"n": "opt_in"}})) + assert result2.decision == PolicyDecision.ALLOW def test_match_dimension_both_any_and_all(self): policies = [ - { - "key": "both-ops", - "priority": 100, - "enabled": True, - "decision": "DENY", - "match": { + ParsedPolicy( + key="both-ops", + priority=100, + decision="DENY", + match={ "data_category": { "any": ["user.contact", "user.financial"], "all": ["user.contact", "user.financial"], } }, - }, + ), ] - # Both satisfied - result = evaluate_access_policies( + result = evaluate_policies( policies, - {"data_categories": ["user.contact.email", "user.financial.bank_account"]}, + req(data_categories=("user.contact.email", "user.financial.bank_account")), + ) + assert result.decision == PolicyDecision.DENY + + result2 = evaluate_policies( + policies, req(data_categories=("user.contact.email",)) + ) + assert result2.decision == PolicyDecision.NO_DECISION + + def test_priority_tie_preserves_insertion_order(self): + """Stable sort: policies at the same priority keep their original order.""" + policies = [ + ParsedPolicy(key="first-allow", priority=100, decision="ALLOW"), + ParsedPolicy(key="second-deny", priority=100, decision="DENY"), + ] + result = evaluate_policies(policies, req()) + assert result.decision == PolicyDecision.ALLOW + assert result.decisive_policy_key == "first-allow" + + +class TestProtocolConformance: + def test_evaluator_conforms_to_protocol(self): + evaluator = InProcessAccessPolicyEvaluator( + policies=[ParsedPolicy(key="deny-all", priority=0, decision="DENY")] ) - assert result["decision"] == "DENY" + result = evaluator.evaluate(req()) + assert result.decision == PolicyDecision.DENY + + def test_evaluator_set_policies(self): + evaluator = InProcessAccessPolicyEvaluator() + result = evaluator.evaluate(req()) + assert result.decision == PolicyDecision.NO_DECISION - # Any satisfied but not all - result2 = evaluate_access_policies( - policies, {"data_categories": ["user.contact.email"]} + evaluator.set_policies( + [ParsedPolicy(key="deny-all", priority=0, decision="DENY")] ) - assert result2["decision"] == "NO_DECISION" + result = evaluator.evaluate(req()) + assert result.decision == PolicyDecision.DENY + + +class TestJsonConversion: + def test_parsed_policy_from_dict(self): + p = parsed_policy_from_dict( + { + "key": "test", + "priority": 50, + "enabled": False, + "decision": "ALLOW", + "match": {"data_use": {"any": ["marketing"]}}, + "action": {"message": "hello"}, + } + ) + assert p.key == "test" + assert p.priority == 50 + assert p.enabled is False + assert p.decision == "ALLOW" + assert p.action is not None + assert p.action.message == "hello" + + def test_parsed_policy_from_dict_defaults(self): + p = parsed_policy_from_dict({}) + assert p.key == "" + assert p.enabled is True + assert p.decision == "DENY" + + def test_request_from_dict(self): + r = request_from_dict( + { + "consumer_id": "c1", + "data_uses": ["marketing"], + "context": {"consent": {"n": "opt_out"}}, + } + ) + assert r.consumer_id == "c1" + assert r.data_uses == ("marketing",) + assert r.context == {"consent": {"n": "opt_out"}} + + def test_result_round_trip(self): + policies = [ + ParsedPolicy( + key="deny-all", + priority=0, + decision="DENY", + action=PolicyAction(message="denied"), + ) + ] + result = evaluate_policies(policies, req()) + d = result_to_dict(result) + assert d["decision"] == "DENY" + assert d["decisive_policy_key"] == "deny-all" + assert d["action"]["message"] == "denied" + assert len(d["evaluated_policies"]) == 1 From 3a9b82c6d5a24446dcdfe4105a84419ec78d4990 Mon Sep 17 00:00:00 2001 From: Thabo Fletcher Date: Tue, 14 Apr 2026 16:03:10 -0700 Subject: [PATCH 11/14] Fix Go CI, ruff lint, and type-safety for ParsedPolicy.decision - Go CI: remove go.sum from git diff check (no external deps, file doesn't exist) - Ruff: fix import block formatting (extra blank line) - ParsedPolicy.decision: change from str to PolicyDecision enum so invalid values fail at construction time rather than evaluation time - .gitignore: add test_report.xml (pytest junitxml artifact) --- .github/workflows/policy_engine_checks.yml | 2 +- .gitignore | 1 + src/fides/service/pbac/policies/evaluate.py | 19 +++++++++++-------- 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/.github/workflows/policy_engine_checks.yml b/.github/workflows/policy_engine_checks.yml index 8d106ac14fe..4e88bd6e1c0 100644 --- a/.github/workflows/policy_engine_checks.yml +++ b/.github/workflows/policy_engine_checks.yml @@ -37,7 +37,7 @@ jobs: cache-dependency-path: policy-engine/go.mod - name: Verify module - run: go mod tidy && git diff --exit-code go.mod go.sum + run: go mod tidy && git diff --exit-code go.mod - name: Build run: go build ./... diff --git a/.gitignore b/.gitignore index a573e41206a..13cff9b21b4 100644 --- a/.gitignore +++ b/.gitignore @@ -148,6 +148,7 @@ htmlcov/ .cache nosetests.xml coverage.xml +test_report.xml *.cover .hypothesis/ .pytest_cache/ diff --git a/src/fides/service/pbac/policies/evaluate.py b/src/fides/service/pbac/policies/evaluate.py index 1d765027cea..79a3203ffe4 100644 --- a/src/fides/service/pbac/policies/evaluate.py +++ b/src/fides/service/pbac/policies/evaluate.py @@ -30,7 +30,6 @@ PolicyEvaluationResult, ) - # ── Policy representation (parsed from YAML + DB metadata) ──────────── @@ -45,7 +44,7 @@ class ParsedPolicy: key: str priority: int = 0 enabled: bool = True - decision: str = "DENY" # "ALLOW" or "DENY" + decision: PolicyDecision = PolicyDecision.DENY match: dict[str, Any] = field(default_factory=dict) unless: list[dict[str, Any]] = field(default_factory=list) action: PolicyAction | None = None @@ -97,7 +96,7 @@ def evaluate_policies( unless_triggered = _evaluate_unless(policy.unless, request) if unless_triggered: - if policy.decision == "ALLOW": + if policy.decision == PolicyDecision.ALLOW: evaluated.append( EvaluatedPolicyInfo( policy_key=policy.key, @@ -128,18 +127,18 @@ def evaluate_policies( continue # Decision stands - action = policy.action if policy.decision == "DENY" else None + action = policy.action if policy.decision == PolicyDecision.DENY else None evaluated.append( EvaluatedPolicyInfo( policy_key=policy.key, priority=policy.priority, matched=True, - result=policy.decision, + result=policy.decision.value, unless_triggered=False, ) ) return PolicyEvaluationResult( - decision=PolicyDecision(policy.decision), + decision=policy.decision, decisive_policy_key=policy.key, decisive_policy_priority=policy.priority, unless_triggered=False, @@ -157,14 +156,18 @@ def evaluate_policies( def parsed_policy_from_dict(data: dict[str, Any]) -> ParsedPolicy: - """Construct a ParsedPolicy from a JSON dict (CLI/API boundary).""" + """Construct a ParsedPolicy from a JSON dict (CLI/API boundary). + + Validates the decision field at construction time rather than + deferring to evaluation time. + """ action_data = data.get("action") action = PolicyAction(message=action_data.get("message")) if action_data else None return ParsedPolicy( key=data.get("key", ""), priority=data.get("priority", 0), enabled=data.get("enabled", True), - decision=data.get("decision", "DENY"), + decision=PolicyDecision(data.get("decision", "DENY")), match=data.get("match", {}), unless=data.get("unless", []), action=action, From 330807ad796868df11e4ac07542bf6dd1709d06d Mon Sep 17 00:00:00 2001 From: Thabo Fletcher Date: Tue, 14 Apr 2026 16:13:28 -0700 Subject: [PATCH 12/14] Fix tests: use PolicyDecision enum instead of string literals Tests were constructing ParsedPolicy with decision="ALLOW"/"DENY" strings, but the field is now PolicyDecision. This caused 'str' object has no attribute 'value' in evaluate_policies. --- .../policies/test_evaluate_access_policies.py | 97 +++++++++++-------- 1 file changed, 57 insertions(+), 40 deletions(-) diff --git a/tests/service/pbac/policies/test_evaluate_access_policies.py b/tests/service/pbac/policies/test_evaluate_access_policies.py index ad1317cb7e0..9e5e9eb428d 100644 --- a/tests/service/pbac/policies/test_evaluate_access_policies.py +++ b/tests/service/pbac/policies/test_evaluate_access_policies.py @@ -37,11 +37,11 @@ def req(**kwargs) -> AccessEvaluationRequest: class TestPriorityOrdering: def test_highest_priority_wins(self): policies = [ - ParsedPolicy(key="low-allow", priority=10, decision="ALLOW"), + ParsedPolicy(key="low-allow", priority=10, decision=PolicyDecision.ALLOW), ParsedPolicy( key="high-deny", priority=200, - decision="DENY", + decision=PolicyDecision.DENY, action=PolicyAction(message="Highest priority wins"), ), ] @@ -55,13 +55,13 @@ def test_allow_when_matched(self): ParsedPolicy( key="deny-financial", priority=200, - decision="DENY", + decision=PolicyDecision.DENY, match={"data_category": {"any": ["user.financial"]}}, ), ParsedPolicy( key="allow-marketing", priority=100, - decision="ALLOW", + decision=PolicyDecision.ALLOW, match={"data_use": {"any": ["marketing"]}}, ), ] @@ -81,7 +81,7 @@ def test_catch_all_deny(self): ParsedPolicy( key="catch-all", priority=0, - decision="DENY", + decision=PolicyDecision.DENY, action=PolicyAction(message="Default deny"), ), ] @@ -101,7 +101,12 @@ def test_empty_policies(self): def test_disabled_policies_skipped(self): policies = [ - ParsedPolicy(key="disabled", priority=100, enabled=False, decision="DENY") + ParsedPolicy( + key="disabled", + priority=100, + enabled=False, + decision=PolicyDecision.DENY, + ) ] result = evaluate_policies(policies, req()) assert result.decision == PolicyDecision.NO_DECISION @@ -111,7 +116,7 @@ def test_no_match(self): ParsedPolicy( key="deny-financial", priority=100, - decision="DENY", + decision=PolicyDecision.DENY, match={"data_category": {"any": ["user.financial"]}}, ), ] @@ -127,7 +132,7 @@ def test_parent_matches_child(self): ParsedPolicy( key="deny-user", priority=100, - decision="DENY", + decision=PolicyDecision.DENY, match={"data_category": {"any": ["user"]}}, ), ] @@ -141,7 +146,7 @@ def test_child_does_not_match_parent(self): ParsedPolicy( key="deny-child", priority=100, - decision="DENY", + decision=PolicyDecision.DENY, match={"data_category": {"any": ["user.contact.email"]}}, ), ] @@ -153,7 +158,7 @@ def test_no_dot_boundary_false_positive(self): ParsedPolicy( key="deny-user", priority=100, - decision="DENY", + decision=PolicyDecision.DENY, match={"data_category": {"any": ["user"]}}, ), ] @@ -165,7 +170,7 @@ def test_match_all_requires_every_value(self): ParsedPolicy( key="require-both", priority=100, - decision="DENY", + decision=PolicyDecision.DENY, match={"data_category": {"all": ["user.contact", "user.financial"]}}, ), ] @@ -188,7 +193,7 @@ def test_opt_out_inverts_allow(self): ParsedPolicy( key="allow-unless-optout", priority=100, - decision="ALLOW", + decision=PolicyDecision.ALLOW, match={"data_use": {"any": ["marketing"]}}, unless=[ { @@ -216,7 +221,7 @@ def test_consent_not_triggered_allow_stands(self): ParsedPolicy( key="allow-unless-optout", priority=100, - decision="ALLOW", + decision=PolicyDecision.ALLOW, match={"data_use": {"any": ["marketing"]}}, unless=[ { @@ -244,7 +249,7 @@ def test_deny_suppressed_continues_to_next(self): ParsedPolicy( key="deny-unless-geo", priority=200, - decision="DENY", + decision=PolicyDecision.DENY, unless=[ { "type": "geo_location", @@ -254,7 +259,9 @@ def test_deny_suppressed_continues_to_next(self): } ], ), - ParsedPolicy(key="fallback-allow", priority=100, decision="ALLOW"), + ParsedPolicy( + key="fallback-allow", priority=100, decision=PolicyDecision.ALLOW + ), ] result = evaluate_policies( policies, req(context={"environment": {"geo_location": "US-CA"}}) @@ -273,7 +280,7 @@ def test_egress_triggers_unless(self): ParsedPolicy( key="allow-unless-egress", priority=100, - decision="ALLOW", + decision=PolicyDecision.ALLOW, unless=[ { "type": "data_flow", @@ -297,7 +304,7 @@ def test_all_must_trigger(self): ParsedPolicy( key="allow-unless-both", priority=100, - decision="ALLOW", + decision=PolicyDecision.ALLOW, unless=[ { "type": "consent", @@ -343,7 +350,7 @@ def test_data_subject_matches(self): ParsedPolicy( key="deny-employee", priority=100, - decision="DENY", + decision=PolicyDecision.DENY, match={"data_subject": {"any": ["employee"]}}, ), ] @@ -355,7 +362,7 @@ def test_data_subject_no_match(self): ParsedPolicy( key="deny-employee", priority=100, - decision="DENY", + decision=PolicyDecision.DENY, match={"data_subject": {"any": ["employee"]}}, ), ] @@ -367,7 +374,7 @@ def test_three_dimensions_all_must_match(self): ParsedPolicy( key="specific", priority=100, - decision="DENY", + decision=PolicyDecision.DENY, match={ "data_use": {"any": ["marketing"]}, "data_category": {"any": ["user.contact"]}, @@ -402,7 +409,7 @@ def test_any_and_all_on_different_dimensions(self): ParsedPolicy( key="combined", priority=100, - decision="DENY", + decision=PolicyDecision.DENY, match={ "data_use": {"any": ["marketing"]}, "data_category": {"all": ["user.contact", "user.financial"]}, @@ -434,7 +441,7 @@ def test_not_opt_in(self): ParsedPolicy( key="allow-unless", priority=100, - decision="ALLOW", + decision=PolicyDecision.ALLOW, unless=[ { "type": "consent", @@ -455,7 +462,7 @@ def test_not_opt_out(self): ParsedPolicy( key="allow-unless", priority=100, - decision="ALLOW", + decision=PolicyDecision.ALLOW, unless=[ { "type": "consent", @@ -480,7 +487,7 @@ def test_not_in_operator(self): ParsedPolicy( key="deny-unless-outside", priority=100, - decision="DENY", + decision=PolicyDecision.DENY, unless=[ { "type": "geo_location", @@ -508,7 +515,7 @@ def test_none_of_operator(self): ParsedPolicy( key="allow-unless", priority=100, - decision="ALLOW", + decision=PolicyDecision.ALLOW, unless=[ { "type": "data_flow", @@ -536,7 +543,7 @@ def test_no_context_unless_does_not_trigger(self): ParsedPolicy( key="allow-unless", priority=100, - decision="ALLOW", + decision=PolicyDecision.ALLOW, unless=[ { "type": "consent", @@ -554,7 +561,7 @@ def test_deny_action_only_on_deny(self): ParsedPolicy( key="allow-with-action", priority=100, - decision="ALLOW", + decision=PolicyDecision.ALLOW, action=PolicyAction(message="should not appear"), ), ] @@ -563,7 +570,9 @@ def test_deny_action_only_on_deny(self): assert result.action is None def test_empty_match_catches_everything(self): - policies = [ParsedPolicy(key="catch-all", priority=1, decision="DENY")] + policies = [ + ParsedPolicy(key="catch-all", priority=1, decision=PolicyDecision.DENY) + ] result = evaluate_policies(policies, req()) assert result.decision == PolicyDecision.DENY @@ -572,7 +581,7 @@ def test_context_nested_field_resolution(self): ParsedPolicy( key="deny-unless-nested", priority=100, - decision="DENY", + decision=PolicyDecision.DENY, unless=[ { "type": "geo_location", @@ -593,7 +602,7 @@ def test_taxonomy_empty_key_never_matches(self): ParsedPolicy( key="empty-key", priority=100, - decision="DENY", + decision=PolicyDecision.DENY, match={"data_use": {"any": [""]}}, ), ] @@ -601,7 +610,11 @@ def test_taxonomy_empty_key_never_matches(self): assert result.decision == PolicyDecision.NO_DECISION def test_enabled_defaults_to_true(self): - policies = [ParsedPolicy(key="default-enabled", priority=100, decision="DENY")] + policies = [ + ParsedPolicy( + key="default-enabled", priority=100, decision=PolicyDecision.DENY + ) + ] result = evaluate_policies(policies, req()) assert result.decision == PolicyDecision.DENY @@ -610,7 +623,7 @@ def test_duplicate_unless_constraints(self): ParsedPolicy( key="allow-unless-dup", priority=100, - decision="ALLOW", + decision=PolicyDecision.ALLOW, unless=[ { "type": "consent", @@ -636,7 +649,7 @@ def test_match_dimension_both_any_and_all(self): ParsedPolicy( key="both-ops", priority=100, - decision="DENY", + decision=PolicyDecision.DENY, match={ "data_category": { "any": ["user.contact", "user.financial"], @@ -659,8 +672,10 @@ def test_match_dimension_both_any_and_all(self): def test_priority_tie_preserves_insertion_order(self): """Stable sort: policies at the same priority keep their original order.""" policies = [ - ParsedPolicy(key="first-allow", priority=100, decision="ALLOW"), - ParsedPolicy(key="second-deny", priority=100, decision="DENY"), + ParsedPolicy( + key="first-allow", priority=100, decision=PolicyDecision.ALLOW + ), + ParsedPolicy(key="second-deny", priority=100, decision=PolicyDecision.DENY), ] result = evaluate_policies(policies, req()) assert result.decision == PolicyDecision.ALLOW @@ -670,7 +685,9 @@ def test_priority_tie_preserves_insertion_order(self): class TestProtocolConformance: def test_evaluator_conforms_to_protocol(self): evaluator = InProcessAccessPolicyEvaluator( - policies=[ParsedPolicy(key="deny-all", priority=0, decision="DENY")] + policies=[ + ParsedPolicy(key="deny-all", priority=0, decision=PolicyDecision.DENY) + ] ) result = evaluator.evaluate(req()) assert result.decision == PolicyDecision.DENY @@ -681,7 +698,7 @@ def test_evaluator_set_policies(self): assert result.decision == PolicyDecision.NO_DECISION evaluator.set_policies( - [ParsedPolicy(key="deny-all", priority=0, decision="DENY")] + [ParsedPolicy(key="deny-all", priority=0, decision=PolicyDecision.DENY)] ) result = evaluator.evaluate(req()) assert result.decision == PolicyDecision.DENY @@ -702,7 +719,7 @@ def test_parsed_policy_from_dict(self): assert p.key == "test" assert p.priority == 50 assert p.enabled is False - assert p.decision == "ALLOW" + assert p.decision == PolicyDecision.ALLOW assert p.action is not None assert p.action.message == "hello" @@ -710,7 +727,7 @@ def test_parsed_policy_from_dict_defaults(self): p = parsed_policy_from_dict({}) assert p.key == "" assert p.enabled is True - assert p.decision == "DENY" + assert p.decision == PolicyDecision.DENY def test_request_from_dict(self): r = request_from_dict( @@ -729,7 +746,7 @@ def test_result_round_trip(self): ParsedPolicy( key="deny-all", priority=0, - decision="DENY", + decision=PolicyDecision.DENY, action=PolicyAction(message="denied"), ) ] From 38e63fef21827e25c5440979086e5668bf689496 Mon Sep 17 00:00:00 2001 From: Thabo Fletcher Date: Wed, 15 Apr 2026 10:00:44 -0700 Subject: [PATCH 13/14] Address PR review comments - CI: add gofmt check and include go.sum in module verification diff - Fix gofmt alignment drift in Constraint and EvaluatedPolicyInfo structs - parsed_policy_from_dict: raise InvalidPolicyError with friendly message on bad decision values (instead of raw ValueError) - CLI: catch InvalidPolicyError for clean error output on bad JSON - result_to_dict: omit null action (matches Go omitempty behavior) --- .github/workflows/policy_engine_checks.yml | 5 +++- policy-engine/pkg/pbac/edge_cases_test.go | 2 +- policy-engine/pkg/pbac/policy_types.go | 26 +++++++++---------- src/fides/cli/commands/pbac.py | 9 +++++-- src/fides/service/pbac/policies/evaluate.py | 22 +++++++++++++--- .../policies/test_evaluate_access_policies.py | 20 ++++++++++++++ 6 files changed, 63 insertions(+), 21 deletions(-) diff --git a/.github/workflows/policy_engine_checks.yml b/.github/workflows/policy_engine_checks.yml index 4e88bd6e1c0..e792c0ef5ad 100644 --- a/.github/workflows/policy_engine_checks.yml +++ b/.github/workflows/policy_engine_checks.yml @@ -37,7 +37,10 @@ jobs: cache-dependency-path: policy-engine/go.mod - name: Verify module - run: go mod tidy && git diff --exit-code go.mod + run: go mod tidy && git diff --exit-code go.mod go.sum + + - name: Format + run: test -z "$(gofmt -l .)" - name: Build run: go build ./... diff --git a/policy-engine/pkg/pbac/edge_cases_test.go b/policy-engine/pkg/pbac/edge_cases_test.go index 9f231119150..86213587623 100644 --- a/policy-engine/pkg/pbac/edge_cases_test.go +++ b/policy-engine/pkg/pbac/edge_cases_test.go @@ -267,7 +267,7 @@ func TestPolicy_MatchAnyAndAll_Combined(t *testing.T) { ID: "p1", Key: "combined", Priority: 100, Enabled: boolPtr(true), Decision: PolicyDeny, Match: MatchBlock{ - DataUse: &MatchDimension{Any: []string{"marketing"}}, + DataUse: &MatchDimension{Any: []string{"marketing"}}, DataCategory: &MatchDimension{All: []string{"user.contact", "user.financial"}}, }, }, diff --git a/policy-engine/pkg/pbac/policy_types.go b/policy-engine/pkg/pbac/policy_types.go index f6670b4a938..4a818c6dd2e 100644 --- a/policy-engine/pkg/pbac/policy_types.go +++ b/policy-engine/pkg/pbac/policy_types.go @@ -58,7 +58,7 @@ type Constraint struct { Requirement string `json:"requirement,omitempty"` // opt_in, opt_out, not_opt_in, not_opt_out // Geo location fields (type=geo_location) - Field string `json:"field,omitempty"` // dotted context path, e.g. "environment.geo_location" + Field string `json:"field,omitempty"` // dotted context path, e.g. "environment.geo_location" Values []string `json:"values,omitempty"` // Operator is shared between geo_location and data_flow constraints: @@ -99,22 +99,22 @@ type AccessEvaluationRequest struct { // EvaluatedPolicyInfo is the audit trail for a single policy evaluation. type EvaluatedPolicyInfo struct { - PolicyKey string `json:"policy_key"` - Priority int `json:"priority"` - Matched bool `json:"matched"` - Result string `json:"result"` // "ALLOW", "DENY", "SUPPRESSED" - UnlessTriggered bool `json:"unless_triggered"` + PolicyKey string `json:"policy_key"` + Priority int `json:"priority"` + Matched bool `json:"matched"` + Result string `json:"result"` // "ALLOW", "DENY", "SUPPRESSED" + UnlessTriggered bool `json:"unless_triggered"` } // PolicyEvaluationResult is the output of evaluating access policies. type PolicyEvaluationResult struct { - Decision PolicyDecision `json:"decision"` - DecisivePolicyKey *string `json:"decisive_policy_key,omitempty"` - DecisivePolicyPriority *int `json:"decisive_policy_priority,omitempty"` - UnlessTriggered bool `json:"unless_triggered"` - EvaluatedPolicies []EvaluatedPolicyInfo `json:"evaluated_policies"` - Action *PolicyAction `json:"action,omitempty"` - Reason *string `json:"reason,omitempty"` + Decision PolicyDecision `json:"decision"` + DecisivePolicyKey *string `json:"decisive_policy_key,omitempty"` + DecisivePolicyPriority *int `json:"decisive_policy_priority,omitempty"` + UnlessTriggered bool `json:"unless_triggered"` + EvaluatedPolicies []EvaluatedPolicyInfo `json:"evaluated_policies"` + Action *PolicyAction `json:"action,omitempty"` + Reason *string `json:"reason,omitempty"` } // EvaluatePoliciesRequest is the JSON request body used by the fidesplus diff --git a/src/fides/cli/commands/pbac.py b/src/fides/cli/commands/pbac.py index a10444a0026..737eaecd56f 100644 --- a/src/fides/cli/commands/pbac.py +++ b/src/fides/cli/commands/pbac.py @@ -16,6 +16,7 @@ from fides.service.pbac.evaluate import evaluate_purpose from fides.service.pbac.policies.evaluate import ( + InvalidPolicyError, evaluate_policies, parsed_policy_from_dict, request_from_dict, @@ -154,9 +155,13 @@ def evaluate_policies_cmd(input_file: TextIO) -> None: click.echo(f"Error parsing JSON: {e}", err=True) sys.exit(1) - policies = [parsed_policy_from_dict(p) for p in data.get("policies", [])] - request = request_from_dict(data.get("request", {})) + try: + policies = [parsed_policy_from_dict(p) for p in data.get("policies", [])] + except InvalidPolicyError as e: + click.echo(f"Error: {e}", err=True) + sys.exit(1) + request = request_from_dict(data.get("request", {})) result = evaluate_policies(policies, request) click.echo(json.dumps(result_to_dict(result), indent=2)) diff --git a/src/fides/service/pbac/policies/evaluate.py b/src/fides/service/pbac/policies/evaluate.py index 79a3203ffe4..d105818bebd 100644 --- a/src/fides/service/pbac/policies/evaluate.py +++ b/src/fides/service/pbac/policies/evaluate.py @@ -155,19 +155,34 @@ def evaluate_policies( # ── JSON conversion helpers (CLI boundary) ──────────────────────────── +class InvalidPolicyError(ValueError): + """Raised when a policy dict contains invalid values at the JSON boundary.""" + + def parsed_policy_from_dict(data: dict[str, Any]) -> ParsedPolicy: """Construct a ParsedPolicy from a JSON dict (CLI/API boundary). Validates the decision field at construction time rather than - deferring to evaluation time. + deferring to evaluation time. Raises InvalidPolicyError with a + friendly message on bad input instead of an ugly enum ValueError. """ + decision_raw = data.get("decision", "DENY") + try: + decision = PolicyDecision(decision_raw) + except ValueError: + valid = ", ".join(d.value for d in PolicyDecision) + raise InvalidPolicyError( + f"Invalid decision value {decision_raw!r} in policy " + f"{data.get('key', '')!r}. Must be one of: {valid}" + ) from None + action_data = data.get("action") action = PolicyAction(message=action_data.get("message")) if action_data else None return ParsedPolicy( key=data.get("key", ""), priority=data.get("priority", 0), enabled=data.get("enabled", True), - decision=PolicyDecision(data.get("decision", "DENY")), + decision=decision, match=data.get("match", {}), unless=data.get("unless", []), action=action, @@ -203,8 +218,7 @@ def result_to_dict(result: PolicyEvaluationResult) -> dict[str, Any]: output["unless_triggered"] = result.unless_triggered if result.action: output["action"] = {"message": result.action.message} - else: - output["action"] = None + # Omit "action" entirely when unset (matches Go's omitempty behavior) output["evaluated_policies"] = [ { "policy_key": ep.policy_key, diff --git a/tests/service/pbac/policies/test_evaluate_access_policies.py b/tests/service/pbac/policies/test_evaluate_access_policies.py index 9e5e9eb428d..c7a97286b4e 100644 --- a/tests/service/pbac/policies/test_evaluate_access_policies.py +++ b/tests/service/pbac/policies/test_evaluate_access_policies.py @@ -8,6 +8,7 @@ from fides.service.pbac.policies.evaluate import ( InProcessAccessPolicyEvaluator, + InvalidPolicyError, ParsedPolicy, evaluate_policies, parsed_policy_from_dict, @@ -756,3 +757,22 @@ def test_result_round_trip(self): assert d["decisive_policy_key"] == "deny-all" assert d["action"]["message"] == "denied" assert len(d["evaluated_policies"]) == 1 + + def test_result_omits_null_action(self): + """Matches Go's omitempty: ALLOW decisions have no action, so the key is absent.""" + policies = [ + ParsedPolicy(key="allow-all", priority=0, decision=PolicyDecision.ALLOW), + ] + result = evaluate_policies(policies, req()) + d = result_to_dict(result) + assert d["decision"] == "ALLOW" + assert "action" not in d + + def test_invalid_decision_raises_friendly_error(self): + """Bad decision strings produce InvalidPolicyError, not raw ValueError.""" + with pytest.raises(InvalidPolicyError) as exc: + parsed_policy_from_dict({"key": "bad", "decision": "MAYBE"}) + msg = str(exc.value) + assert "MAYBE" in msg + assert "bad" in msg + assert "ALLOW" in msg and "DENY" in msg # lists valid values From 1d27def0ba6389ebbb47beb056c6a622b9b5ced0 Mon Sep 17 00:00:00 2001 From: Adrian Galvan Date: Wed, 15 Apr 2026 15:28:08 -0700 Subject: [PATCH 14/14] Add fides-pbac standalone CLI and demo fixtures (reference) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reference-only bundle showing how a single Go binary can evaluate SQL queries against a YAML PBAC config directory with no dependency on the fides Python stack. Adds: - policy-engine/cmd/fides-pbac/ — the CLI (flag parsing, statement splitter, output shape matching EvaluationRecord) - policy-engine/pkg/sqlextract/ — regex-based table extractor plus StripComments helper (31 subtests) - policy-engine/pkg/fixtures/ — YAML loaders for consumers, purposes, datasets, policies; builds the collection-name -> dataset_key index - policy-engine/README.md — module-level docs - pbac/ — demo fixture directory used by the CLI Extends pbac types minimally to support the CLI: - yaml tags on pbac.AccessPolicy and its nested types (non-breaking) - PurposeViolation.SuppressedByPolicy / SuppressedByAction so suppressions are auditable inline instead of being filtered out Removes an unused asdict import from src/fides/cli/commands/pbac.py. Co-Authored-By: Claude Opus 4.6 (1M context) --- pbac/README.md | 162 ++++++++ pbac/consumers/analytics-team.yml | 8 + pbac/consumers/marketing-team.yml | 8 + pbac/consumers/onboarding-unconfigured.yml | 8 + pbac/datasets/campaigns.yml | 24 ++ pbac/datasets/events.yml | 24 ++ pbac/datasets/sales.yml | 48 +++ pbac/entries/alice.txt | 19 + pbac/entries/bob.txt | 6 + pbac/entries/carol.txt | 6 + pbac/entries/dave.txt | 6 + .../allow-analytics-on-billing-data.yml | 16 + pbac/purposes/analytics.yml | 7 + pbac/purposes/billing.yml | 8 + pbac/purposes/marketing.yml | 8 + policy-engine/README.md | 185 +++++++++ policy-engine/cmd/fides-pbac/main.go | 384 ++++++++++++++++++ policy-engine/go.mod | 2 + policy-engine/go.sum | 4 + policy-engine/pkg/fixtures/fixtures.go | 272 +++++++++++++ policy-engine/pkg/pbac/policy_types.go | 44 +- policy-engine/pkg/pbac/types.go | 26 +- policy-engine/pkg/sqlextract/extract.go | 155 +++++++ policy-engine/pkg/sqlextract/extract_test.go | 217 ++++++++++ src/fides/cli/commands/pbac.py | 1 - 25 files changed, 1616 insertions(+), 32 deletions(-) create mode 100644 pbac/README.md create mode 100644 pbac/consumers/analytics-team.yml create mode 100644 pbac/consumers/marketing-team.yml create mode 100644 pbac/consumers/onboarding-unconfigured.yml create mode 100644 pbac/datasets/campaigns.yml create mode 100644 pbac/datasets/events.yml create mode 100644 pbac/datasets/sales.yml create mode 100644 pbac/entries/alice.txt create mode 100644 pbac/entries/bob.txt create mode 100644 pbac/entries/carol.txt create mode 100644 pbac/entries/dave.txt create mode 100644 pbac/policies/allow-analytics-on-billing-data.yml create mode 100644 pbac/purposes/analytics.yml create mode 100644 pbac/purposes/billing.yml create mode 100644 pbac/purposes/marketing.yml create mode 100644 policy-engine/README.md create mode 100644 policy-engine/cmd/fides-pbac/main.go create mode 100644 policy-engine/go.sum create mode 100644 policy-engine/pkg/fixtures/fixtures.go create mode 100644 policy-engine/pkg/sqlextract/extract.go create mode 100644 policy-engine/pkg/sqlextract/extract_test.go diff --git a/pbac/README.md b/pbac/README.md new file mode 100644 index 00000000000..5ea4683278e --- /dev/null +++ b/pbac/README.md @@ -0,0 +1,162 @@ +# PBAC demo fixtures + +Sample data for the `fides-pbac` CLI (Go standalone binary, see +`policy-engine/README.md`). Each `.txt` file in `entries/` is one +identity's SQL queries; the CLI is told which identity via +`--identity`, extracts table references from the SQL with +`pkg/sqlextract`, and runs them through the full PBAC pipeline +(purpose evaluation + policy filtering). + +All domains use RFC 2606 reserved `.example` suffixes so this is safe +to commit to the public repo. + +## Cast + +| Identity | Consumer | Purposes | +|---|---|---| +| `alice@demo.example`, `priya@demo.example` | Analytics Team | `analytics` | +| `bob@demo.example`, `maria@demo.example` | Marketing Team | `marketing` | +| `dave@demo.example` | Onboarding | *none declared* | +| `carol@demo.example` | *not registered* | | + +| Purpose | `data_use` | +|---|---| +| `analytics` | `analytics.reporting` | +| `marketing` | `marketing.advertising` | +| `billing` | `essential.service.payment_processing` | + +| Dataset (`fides_key`) | Dataset `data_purposes` | Collections | +|---|---|---| +| `sales` | `billing` | `orders`, `invoices` (+ `analytics` at collection level) | +| `events` | `analytics` | `page_views` | +| `marketing` | `marketing` | `campaigns` | + +Tables are assumed to be **globally unique across datasets**, so the CLI +resolves queries by bare table name. `SELECT ... FROM orders` and +`SELECT ... FROM warehouse.archive.orders` both resolve to whichever +dataset declares an `orders` collection. A query naming a table that +isn't a declared collection (e.g. `cold_storage`) produces an +`UNCONFIGURED_DATASET` gap identified by the query's qualified name. + +## Purposes at three levels + +`data_purposes` can appear on the dataset, each collection, and each +field. They stack additively: + +``` +effective_purposes(dataset.collection) + = dataset.data_purposes + ∪ collection.data_purposes + ∪ union(field.data_purposes for each field in collection) +``` + +The engine currently evaluates at collection granularity (the CLI +extracts tables, not individual columns from SELECT lists), so +field-level purposes fold into their owning collection's effective +set. A column-aware extractor would let field-level purposes gate +individual SELECTs, but that's out of scope today. + +`sales.invoices` demonstrates the collection layer: the dataset is +`billing`, the collection adds `analytics`, so analytics-team queries +against invoices pass the purpose check at the engine without needing +any policy override. + +## Access policies + +`policies/allow-analytics-on-billing-data.yml` shows a realistic ALLOW +override. It matches any violation where the dataset's purposes resolve +to `essential.service.payment_processing` (the `billing` purpose's +`data_use`) and suppresses the violation. + +Policy evaluation only runs on purpose violations. Compliant queries +and coverage gaps pass through unchanged — gaps represent missing +configuration, not access decisions. + +## File layout + +``` +pbac/ + consumers/ one YAML per consumer (top-level key: consumer:) + purposes/ one YAML per purpose (top-level key: purpose:) + datasets/ fideslang Dataset YAML (top-level key: dataset:) + policies/ one YAML per policy (top-level key: policy:) + entries/ one .txt per identity, raw SQL separated by semicolons +``` + +## Invocation + +```bash +fides-pbac --config pbac/ --identity alice@demo.example pbac/entries/alice.txt +fides-pbac --config pbac/ --identity bob@demo.example pbac/entries/bob.txt +fides-pbac --config pbac/ --identity carol@demo.example pbac/entries/carol.txt +fides-pbac --config pbac/ --identity dave@demo.example pbac/entries/dave.txt +``` + +## Expected outcomes + +| File | Query | Outcome | +|---|---|---| +| `alice.txt` | `SELECT ... FROM page_views ...` | **compliant** (analytics ∩ analytics) | +| `alice.txt` | `SELECT ... FROM orders ...` | violation **suppressed** by `allow-analytics-on-billing-data` | +| `alice.txt` | `SELECT ... FROM invoices ...` | **compliant** via collection-level `analytics` on `sales.invoices` | +| `alice.txt` | `SELECT ... FROM campaigns ...` | **violation stands** — no matching policy | +| `bob.txt` | `SELECT ... FROM cold_storage ...` | **gap** `UNCONFIGURED_DATASET` | +| `carol.txt` | `SELECT ... FROM page_views` | **gap** `UNRESOLVED_IDENTITY` | +| `dave.txt` | `SELECT ... FROM page_views ...` | **gap** `UNCONFIGURED_CONSUMER` | + +## Schema notes + +**Consumer YAML**. A consumer represents a group (or individual) that +accesses data, with a list of identities under `members`. Every member +email resolves to the same consumer, so an identity match in the CLI is +"`identity` appears in some consumer's `members` list." + +```yaml +consumer: +- name: Analytics Team + members: + - alice@demo.example + - priya@demo.example + purposes: [analytics] +``` + +If the same identity appears in multiple consumers, the last one loaded +wins. + +**Purpose YAML** mirrors `fidesplus/seed/pbac/data.py::PURPOSES`: + +```yaml +purpose: +- fides_key: analytics + name: Product Analytics + data_use: analytics.reporting + data_categories: [user.behavior] +``` + +**Dataset YAML** is standard fideslang. `data_purposes` can be declared +at the dataset, collection, and field levels; they stack additively +(see "Purposes at three levels" above). `sales.invoices` demonstrates +the collection layer — `sales` is `billing` at the dataset level, +`invoices` adds `analytics` at the collection level, so analytics-team +queries against `invoices` pass the purpose check directly. + +**Policy YAML** matches `pbac.AccessPolicy`: + +```yaml +policy: +- key: allow-analytics-on-billing-data + priority: 100 + enabled: true + decision: ALLOW + match: + data_use: + any: + - essential.service.payment_processing + unless: [] + action: + message: ... +``` + +Match blocks key on the `data_use` of the dataset being accessed +(the CLI resolves dataset purposes to their `data_use` via the +purposes/ directory before calling the policy engine). diff --git a/pbac/consumers/analytics-team.yml b/pbac/consumers/analytics-team.yml new file mode 100644 index 00000000000..6099b1030c9 --- /dev/null +++ b/pbac/consumers/analytics-team.yml @@ -0,0 +1,8 @@ +consumer: +- name: Analytics Team + description: Product analysts running reporting queries. + members: + - alice@demo.example + - priya@demo.example + purposes: + - analytics diff --git a/pbac/consumers/marketing-team.yml b/pbac/consumers/marketing-team.yml new file mode 100644 index 00000000000..4871e5774d1 --- /dev/null +++ b/pbac/consumers/marketing-team.yml @@ -0,0 +1,8 @@ +consumer: +- name: Marketing Team + description: Marketing team managing advertising campaigns. + members: + - bob@demo.example + - maria@demo.example + purposes: + - marketing diff --git a/pbac/consumers/onboarding-unconfigured.yml b/pbac/consumers/onboarding-unconfigured.yml new file mode 100644 index 00000000000..fe424e7f9c2 --- /dev/null +++ b/pbac/consumers/onboarding-unconfigured.yml @@ -0,0 +1,8 @@ +consumer: +- name: Onboarding + description: Registered consumer with no declared purposes. Any member + of this consumer produces UNCONFIGURED_CONSUMER gaps until purposes + are declared. + members: + - dave@demo.example + purposes: [] diff --git a/pbac/datasets/campaigns.yml b/pbac/datasets/campaigns.yml new file mode 100644 index 00000000000..f399a5dc4b5 --- /dev/null +++ b/pbac/datasets/campaigns.yml @@ -0,0 +1,24 @@ +dataset: +- fides_key: marketing + organization_fides_key: default_organization + name: Marketing Campaigns + description: Campaign definitions and targeting rules. + data_categories: + - user.contact + data_purposes: + - marketing + collections: + - name: campaigns + description: Campaign definitions. + data_categories: + - user.contact + fields: + - name: campaign_id + data_categories: + - system.operations + - name: name + data_categories: + - system.operations + - name: audience_rule + data_categories: + - user.contact diff --git a/pbac/datasets/events.yml b/pbac/datasets/events.yml new file mode 100644 index 00000000000..04280a00913 --- /dev/null +++ b/pbac/datasets/events.yml @@ -0,0 +1,24 @@ +dataset: +- fides_key: events + organization_fides_key: default_organization + name: Product Events + description: Page views and behavioral events. + data_categories: + - user.behavior + data_purposes: + - analytics + collections: + - name: page_views + description: Page view events from the web application. + data_categories: + - user.behavior + fields: + - name: user_id + data_categories: + - user.unique_id + - name: page_path + data_categories: + - user.behavior + - name: event_date + data_categories: + - system.operations diff --git a/pbac/datasets/sales.yml b/pbac/datasets/sales.yml new file mode 100644 index 00000000000..f42a2f4538a --- /dev/null +++ b/pbac/datasets/sales.yml @@ -0,0 +1,48 @@ +dataset: +- fides_key: sales + organization_fides_key: default_organization + name: Sales + description: Order and invoice records. + data_categories: + - user.financial + data_purposes: + - billing + collections: + - name: orders + description: Customer order records. + data_categories: + - user.financial + fields: + - name: order_id + data_categories: + - system.operations + - name: customer_id + data_categories: + - user.unique_id + - name: total + data_categories: + - user.financial + - name: order_date + data_categories: + - system.operations + - name: invoices + description: Invoice records tied to orders. Also used by the + Analytics Team for revenue reconciliation, so the collection + declares analytics as an additional purpose beyond the dataset's + billing default. + data_categories: + - user.financial + data_purposes: + - analytics + fields: + - name: invoice_id + data_categories: + - system.operations + - name: order_id + data_categories: + - system.operations + - name: amount + data_categories: + - user.financial + data_purposes: + - analytics diff --git a/pbac/entries/alice.txt b/pbac/entries/alice.txt new file mode 100644 index 00000000000..3e7807f0eb5 --- /dev/null +++ b/pbac/entries/alice.txt @@ -0,0 +1,19 @@ +-- alice@demo.example is registered as the Analytics Team (purpose: analytics). +-- +-- Compliant: analytics ∩ events.data_purposes = {analytics} +SELECT user_id, page_path FROM page_views WHERE event_date = '2026-04-14'; + +-- Purpose mismatch at dataset level (analytics vs billing) — SUPPRESSED +-- by the allow-analytics-on-billing-data policy (matches data_use +-- essential.service.payment_processing). +SELECT customer_id, total FROM orders WHERE order_date = '2026-04-14'; + +-- Compliant via COLLECTION-level purpose: the sales dataset is billing +-- at the dataset level, but the invoices collection adds analytics. Any +-- analytics-team query against invoices passes the purpose check directly +-- without needing a policy override. +SELECT invoice_id, amount FROM invoices WHERE amount > 100; + +-- Purpose mismatch (analytics vs marketing) — NO matching policy, so +-- this violation stands. +SELECT campaign_id, name FROM campaigns LIMIT 10; diff --git a/pbac/entries/bob.txt b/pbac/entries/bob.txt new file mode 100644 index 00000000000..3e442c89a4e --- /dev/null +++ b/pbac/entries/bob.txt @@ -0,0 +1,6 @@ +-- bob@demo.example is registered as the Marketing Team (purpose: marketing). +-- +-- Gap (UNCONFIGURED_DATASET): cold_storage is not a declared collection +-- in any dataset under pbac/datasets/, so table resolution falls through +-- to the qualified name and the engine records a dataset gap. +SELECT archive_key FROM cold_storage LIMIT 10; diff --git a/pbac/entries/carol.txt b/pbac/entries/carol.txt new file mode 100644 index 00000000000..9cfac288c21 --- /dev/null +++ b/pbac/entries/carol.txt @@ -0,0 +1,6 @@ +-- carol@demo.example is NOT registered as a consumer. +-- +-- Gap (UNRESOLVED_IDENTITY): no consumer in pbac/consumers/ lists +-- carol in its members, so identity resolution returns nothing and +-- every dataset access is recorded as an identity gap. +SELECT COUNT(*) FROM page_views; diff --git a/pbac/entries/dave.txt b/pbac/entries/dave.txt new file mode 100644 index 00000000000..23f3bf0d3ce --- /dev/null +++ b/pbac/entries/dave.txt @@ -0,0 +1,6 @@ +-- dave@demo.example is registered (Onboarding) but declares no purposes. +-- +-- Gap (UNCONFIGURED_CONSUMER): the engine emits UNRESOLVED_IDENTITY and +-- the CLI reclassifies it to UNCONFIGURED_CONSUMER because the consumer +-- was found but its purposes list is empty. +SELECT page_path FROM page_views LIMIT 100; diff --git a/pbac/policies/allow-analytics-on-billing-data.yml b/pbac/policies/allow-analytics-on-billing-data.yml new file mode 100644 index 00000000000..d0101cc42e8 --- /dev/null +++ b/pbac/policies/allow-analytics-on-billing-data.yml @@ -0,0 +1,16 @@ +policy: +- key: allow-analytics-on-billing-data + priority: 100 + enabled: true + decision: ALLOW + match: + data_use: + any: + - essential.service.payment_processing + unless: [] + action: + message: > + Analytics team is permitted to query payment data for reconciliation and + finance reporting. Overrides the purpose mismatch that PBAC would otherwise + raise when analytics (analytics.reporting) touches billing data + (essential.service.payment_processing). diff --git a/pbac/purposes/analytics.yml b/pbac/purposes/analytics.yml new file mode 100644 index 00000000000..44fb9ec5d89 --- /dev/null +++ b/pbac/purposes/analytics.yml @@ -0,0 +1,7 @@ +purpose: +- fides_key: analytics + name: Product Analytics + data_use: analytics.reporting + data_categories: + - user.behavior + description: Measuring product usage and engagement. diff --git a/pbac/purposes/billing.yml b/pbac/purposes/billing.yml new file mode 100644 index 00000000000..300703da841 --- /dev/null +++ b/pbac/purposes/billing.yml @@ -0,0 +1,8 @@ +purpose: +- fides_key: billing + name: Billing Operations + data_use: essential.service.payment_processing + data_subject: customer + data_categories: + - user.financial + description: Processing payments, invoices, and receipts. diff --git a/pbac/purposes/marketing.yml b/pbac/purposes/marketing.yml new file mode 100644 index 00000000000..48b96c55b67 --- /dev/null +++ b/pbac/purposes/marketing.yml @@ -0,0 +1,8 @@ +purpose: +- fides_key: marketing + name: Marketing Advertising + data_use: marketing.advertising + data_subject: customer + data_categories: + - user.contact + description: Promotional campaigns and advertising. diff --git a/policy-engine/README.md b/policy-engine/README.md new file mode 100644 index 00000000000..890b182dcbf --- /dev/null +++ b/policy-engine/README.md @@ -0,0 +1,185 @@ +# policy-engine + +A Go implementation of the Fides Purpose-Based Access Control (PBAC) evaluation engine, plus a standalone `fides-pbac` CLI for evaluating raw SQL queries against a PBAC config directory. + +## Why this exists + +The Python PBAC engine lives inside the `fides` package, which drags in FastAPI, Celery, every SaaS connector, Alembic, and the rest of the Fides framework. Any CLI that imports from `fides` inherits that startup cost (multiple seconds before emitting output). + +The `fides-pbac` binary: + +- Starts and loads its config in **~3 ms**. +- Is a **single 3.5 MB static binary** with one external Go dep (`gopkg.in/yaml.v3`). +- Has **no server, no database, no Redis, no Python runtime** requirement. +- Runs in CI, on a laptop, in an airgapped environment, wherever you can copy a file. + +## Module layout + +``` +policy-engine/ +├── cmd/ +│ └── fides-pbac/ Standalone CLI binary (main package) +├── pkg/ +│ ├── pbac/ Purpose and policy evaluation engine +│ ├── sqlextract/ Regex-based SQL table extractor +│ └── fixtures/ YAML loaders for consumers/purposes/datasets/policies +├── go.mod +└── go.sum +``` + +## fides-pbac CLI + +### Install from a local checkout + +```bash +cd policy-engine +go install ./cmd/fides-pbac +``` + +`go install` builds from whatever is in your working tree (no commit or publish required). The binary lands in `$(go env GOBIN)` or `~/go/bin`. Add that to your `PATH` if it isn't already: + +```bash +export PATH="$HOME/go/bin:$PATH" # add to ~/.zshrc for persistence +``` + +Verify: + +```bash +fides-pbac --help +``` + +### Fixture layout + +`fides-pbac` reads a directory with this structure: + +``` +/ +├── consumers/*.yml top-level key: consumer: +├── purposes/*.yml top-level key: purpose: +├── datasets/*.yml fideslang Dataset YAML, top-level key: dataset: +└── policies/*.yml top-level key: policy: +``` + +Reference fixtures live at `pbac/` at the repo root, including a README describing the cast of identities, datasets, policies, and expected outcomes. + +### Usage + +```bash +fides-pbac --config --identity [FILE] +``` + +If `FILE` is omitted or `-`, the CLI reads SQL from stdin. Each top-level statement (split on `;` outside strings and comments) becomes one `EvaluationRecord`. All statements in one invocation are attributed to `--identity`. + +### Examples + +```bash +# Against a fixture file +fides-pbac --config pbac/ \ + --identity alice@demo.example \ + pbac/entries/alice.txt + +# Single query from stdin +echo "SELECT customer_id, total FROM orders" | \ + fides-pbac --config pbac/ --identity alice@demo.example - + +# Pipe a batch of SQL lines +cat production-queries.sql | \ + fides-pbac --config pbac/ --identity bob@demo.example - +``` + +Output is JSON on stdout. The record shape mirrors `EvaluationRecord` from `src/fides/service/pbac/types.py`: one `records` array with `query_id`, `identity`, resolved `consumer`, resolved `dataset_keys`, `is_compliant`, `violations`, `gaps`, `total_accesses`, and `query_text` per SQL statement. When a policy ALLOWs a violation, the suppression is recorded inline on the violation via `suppressed_by_policy` and (when present) `suppressed_by_action`. + +### Evaluation pipeline + +Each SQL statement runs through: + +1. **Table extraction** — `sqlextract` pulls 1-3 part qualified identifiers out of the SQL text. +2. **Identity resolution** — `--identity` looked up against `consumers/` by member email. A consumer with N members is reachable via any of them. +3. **Dataset resolution** — each extracted table is looked up in the collection-name index built from `datasets/*.yml` (`collections[].name`). Tables are assumed globally unique across datasets. Unknown tables fall through to their qualified name so the resulting gap identifies the missing table. +4. **Purpose evaluation** — consumer's `purposes` ∩ effective purposes at (dataset, collection). Effective purposes are the union of dataset `data_purposes`, collection `data_purposes`, and the union of every field's `data_purposes` in that collection. Non-overlap produces a `PurposeViolation`; missing configuration produces an `EvaluationGap`. +5. **Gap reclassification** — when the consumer exists but has no purposes, `UNRESOLVED_IDENTITY` gaps become `UNCONFIGURED_CONSUMER`. +6. **Policy filtering** — each violation's dataset purposes are mapped to their `data_use` strings via `purposes/`, then the access policy engine evaluates the loaded `policies/`. An `ALLOW` decision sets `suppressed_by_policy` (and `suppressed_by_action` when the policy has an `action.message`) on the violation, which remains in `violations` for auditability. + +Gaps do not flow through policy filtering — they represent missing config, not access decisions. A record is compliant when every violation is suppressed and no gaps were recorded. + +## Packages + +### `pkg/pbac` + +The canonical Go implementation of the Fides PBAC purpose and policy engines. Mirrors `src/fides/service/pbac/evaluate.py` and `src/fides/service/pbac/policies/evaluate.py` rule-for-rule. + +Key entry points: + +- `pbac.EvaluatePurpose(consumer, datasets, collections) PurposeEvaluationResult` +- `pbac.EvaluatePolicies(policies, request) *PolicyEvaluationResult` + +This package has no dependencies outside the Go standard library. + +### `pkg/sqlextract` + +A deliberately simple regex-based extractor that pulls `TableRef{Catalog, Schema, Table}` values out of SQL text. + +Handles: + +- 1, 2, and 3-part qualified identifiers (`orders`, `sales.orders`, `prod.sales.orders`) +- Backtick and double-quote identifier wrapping +- Line (`--`) and block (`/* */`) comments +- Case-insensitive `FROM` / `JOIN` +- CTE names (filtered out via a `WITH ... AS (` scan) +- Subqueries (naturally skipped since `(` isn't an identifier char) +- Deduplication (case-insensitive, by qualified name) + +Known limitations: + +- Views are not expanded. A query that reads from a view reports the view name, not the underlying tables. +- Wildcard tables (BigQuery `events_*`) are returned verbatim. +- Tables read inside UDFs or stored procedures are missed. +- Old-style comma joins (`FROM a, b, c`) only pick up the first table. +- `FROM` tokens inside string literals (`'FROM something' AS x`) produce false positives. + +For exact resolution, use the platform's audit API (BigQuery Jobs, Snowflake `ACCESS_HISTORY`, Databricks query history). sqlextract is a best-effort helper for when you only have SQL text. + +### `pkg/fixtures` + +YAML loaders for `consumers/`, `purposes/`, `datasets/`, and `policies/`. Returns: + +- `map[member_email]Consumer` — a consumer with N members appears N times, once per member, all pointing at the same `Consumer` +- `map[fides_key]Purpose` — used to resolve dataset purposes to `data_use` strings before policy eval +- `Datasets{ Purposes, Tables }` — `Purposes` is keyed by dataset `fides_key` and fed to the engine; `Tables` is a lowercase collection-name index (`collections[].name → fides_key`) for SQL-driven dataset resolution +- `[]pbac.AccessPolicy` — disabled policies (`enabled: false`) are filtered out at load time + +## What's intentionally not here + +- **Column-aware extraction.** `sqlextract` pulls tables, not individual columns from SELECT lists. Field-level purposes therefore fold into their owning collection's effective set (any field's purpose broadens the whole collection). A column-aware extractor would let field-level purposes gate individual SELECTs, but that's out of scope today. +- **Runtime context for `unless` constraints.** The CLI populates `data_uses` on the policy request but not `context` (consent state, geo location, data flows). Policies that gate their ALLOW decision on consent or geo will not fire correctly until the CLI grows a way to inject that context. + +## Build and test + +```bash +# From policy-engine/ +go build ./... +go test ./... +go vet ./... + +# Build just the CLI for your platform +go build -o fides-pbac ./cmd/fides-pbac + +# Cross-compile +GOOS=linux GOARCH=amd64 go build -o fides-pbac-linux-amd64 ./cmd/fides-pbac +GOOS=darwin GOARCH=arm64 go build -o fides-pbac-darwin-arm64 ./cmd/fides-pbac +GOOS=windows GOARCH=amd64 go build -o fides-pbac-windows-amd64.exe ./cmd/fides-pbac +``` + +Tests cover the SQL extractor (24 extraction subtests + 7 `StripComments` subtests) and the purpose / policy engines. `go vet` is clean. + +## Performance + +Measured on Apple Silicon, 50 warm runs per case: + +| Invocation | Median | p95 | +|---|---|---| +| Empty stdin (startup + config load only) | 2.9 ms | 3.4 ms | +| Single query from stdin | 2.8 ms | 3.2 ms | +| 2-query file from disk | 2.9 ms | 3.3 ms | + +The floor is process launch and Go runtime init. The engine and YAML loading are sub-millisecond. diff --git a/policy-engine/cmd/fides-pbac/main.go b/policy-engine/cmd/fides-pbac/main.go new file mode 100644 index 00000000000..f1ce6be2c62 --- /dev/null +++ b/policy-engine/cmd/fides-pbac/main.go @@ -0,0 +1,384 @@ +// Command fides-pbac evaluates raw SQL queries against a PBAC config. +// +// Usage: +// +// fides-pbac --config pbac/ --identity alice@demo.example pbac/entries/alice.txt +// echo "SELECT * FROM prod.events.page_views" | \ +// fides-pbac --config pbac/ --identity alice@demo.example +// +// A standalone Go binary with no server dependency: parses SQL, resolves +// datasets, evaluates purpose overlap, and filters violations through +// access policies. +// +// Output is JSON to stdout: one record per SQL statement. +package main + +import ( + "crypto/rand" + "encoding/json" + "flag" + "fmt" + "io" + "os" + "path/filepath" + "strings" + + "github.com/ethyca/fides/policy-engine/pkg/fixtures" + "github.com/ethyca/fides/policy-engine/pkg/pbac" + "github.com/ethyca/fides/policy-engine/pkg/sqlextract" +) + +// record is the JSON output shape, one per SQL statement. Mirrors +// fides.service.pbac.types.EvaluationRecord, with one extension: +// PurposeViolation entries carry SuppressedByPolicy / SuppressedByAction +// when an ALLOW policy overrode the violation, so suppressions remain +// auditable. A record is compliant when no violation stands (every +// violation has a SuppressedByPolicy) and no gaps were recorded. +type record struct { + QueryID string `json:"query_id"` + Identity string `json:"identity"` + Consumer *string `json:"consumer"` + DatasetKeys []string `json:"dataset_keys"` + IsCompliant bool `json:"is_compliant"` + Violations []pbac.PurposeViolation `json:"violations"` + Gaps []pbac.EvaluationGap `json:"gaps"` + TotalAccesses int `json:"total_accesses"` + QueryText string `json:"query_text"` +} + +type output struct { + Records []record `json:"records"` +} + +func main() { + var ( + configDir = flag.String("config", "", + "Directory containing consumers/, purposes/, datasets/, policies/ YAML fixtures (required)") + identity = flag.String("identity", "", + "User identity to attribute every query in the input to (required)") + ) + flag.Usage = func() { + fmt.Fprintf(os.Stderr, "Usage: %s --config DIR --identity EMAIL [FILE]\n\n", os.Args[0]) + fmt.Fprintln(os.Stderr, "Evaluate SQL queries against PBAC config. Reads SQL from FILE or stdin.") + fmt.Fprintln(os.Stderr, "Each top-level statement (split on ;) becomes one EvaluationRecord.") + fmt.Fprintln(os.Stderr) + flag.PrintDefaults() + } + flag.Parse() + + if *configDir == "" || *identity == "" { + flag.Usage() + os.Exit(2) + } + + consumers, err := fixtures.LoadConsumers(filepath.Join(*configDir, "consumers")) + if err != nil { + die(err) + } + purposes, err := fixtures.LoadPurposes(filepath.Join(*configDir, "purposes")) + if err != nil { + die(err) + } + datasets, err := fixtures.LoadDatasets(filepath.Join(*configDir, "datasets")) + if err != nil { + die(err) + } + policies, err := fixtures.LoadPolicies(filepath.Join(*configDir, "policies")) + if err != nil { + die(err) + } + + sqlText, err := readInput(flag.Args()) + if err != nil { + die(err) + } + + statements := splitStatements(sqlText) + records := make([]record, 0, len(statements)) + for _, stmt := range statements { + records = append(records, evaluateStatement(stmt, *identity, consumers, purposes, datasets, policies)) + } + + enc := json.NewEncoder(os.Stdout) + enc.SetIndent("", " ") + if err := enc.Encode(output{Records: records}); err != nil { + die(err) + } +} + +// evaluateStatement runs one SQL statement through the full pipeline: +// identity resolution → table extraction → dataset resolution → purpose +// evaluation → gap reclassification → policy filtering of violations. +func evaluateStatement( + sql string, + identity string, + consumers map[string]fixtures.Consumer, + purposes map[string]fixtures.Purpose, + datasets fixtures.Datasets, + policies []pbac.AccessPolicy, +) record { + tables := sqlextract.Extract(sql) + + // Dataset resolution by table name. Assumes table names are unique + // across all datasets. Tables not present in any dataset's + // collections are keyed by their qualified name so the resulting + // UNCONFIGURED_DATASET gap points at the specific unknown table. + // + // Each resolved table is also recorded as a collection access on + // its owning dataset so the engine can apply collection- and + // field-level purposes (via DatasetPurposes.CollectionPurposes). + datasetKeys := make([]string, 0, len(tables)) + collections := map[string][]string{} + for _, t := range tables { + name := strings.ToLower(t.Table) + if key, ok := datasets.Tables[name]; ok { + datasetKeys = append(datasetKeys, key) + collections[key] = append(collections[key], name) + } else { + datasetKeys = append(datasetKeys, t.QualifiedName()) + } + } + + consumer, consumerFound := consumers[identity] + var consumerPurposes pbac.ConsumerPurposes + if consumerFound { + consumerPurposes = pbac.ConsumerPurposes{ + ConsumerID: consumer.Name, + ConsumerName: consumer.Name, + PurposeKeys: consumer.Purposes, + } + } else { + consumerPurposes = pbac.ConsumerPurposes{ + ConsumerID: identity, + ConsumerName: identity, + PurposeKeys: nil, + } + } + + dsMap := make(map[string]pbac.DatasetPurposes, len(datasetKeys)) + for _, key := range datasetKeys { + if dp, ok := datasets.Purposes[key]; ok { + dsMap[key] = dp + } else { + dsMap[key] = pbac.DatasetPurposes{DatasetKey: key, PurposeKeys: nil} + } + } + + result := pbac.EvaluatePurpose(consumerPurposes, dsMap, collections) + + // Gap reclassification: when the consumer was found but has no + // declared purposes, the engine's UNRESOLVED_IDENTITY gaps are + // reclassified to UNCONFIGURED_CONSUMER. + if consumerFound && len(consumer.Purposes) == 0 { + for i, g := range result.Gaps { + if g.GapType == pbac.GapUnresolvedIdentity { + result.Gaps[i] = pbac.EvaluationGap{ + GapType: "unconfigured_consumer", + Identifier: consumer.Name, + DatasetKey: g.DatasetKey, + Reason: "Consumer has no declared purposes", + } + } + } + } + + violations := applyPolicySuppression(result.Violations, purposes, policies) + + var consumerName *string + if consumerFound { + name := consumer.Name + consumerName = &name + } + + return record{ + QueryID: newQueryID(), + Identity: identity, + Consumer: consumerName, + DatasetKeys: datasetKeys, + IsCompliant: isCompliant(violations, result.Gaps), + Violations: violations, + Gaps: result.Gaps, + TotalAccesses: result.TotalAccesses, + QueryText: sqlextract.StripComments(sql), + } +} + +// isCompliant is true when no violation is still standing (every +// violation has been suppressed by a policy) and no gaps were recorded. +func isCompliant(violations []pbac.PurposeViolation, gaps []pbac.EvaluationGap) bool { + if len(gaps) > 0 { + return false + } + for _, v := range violations { + if v.SuppressedByPolicy == nil { + return false + } + } + return true +} + +// applyPolicySuppression runs each purpose violation through the access +// policy evaluator. Violations the policy engine resolves to ALLOW are +// marked in-place with SuppressedByPolicy and (when present) the +// decisive policy's action. Non-suppressed violations are returned +// unchanged. The returned slice contains every input violation in +// input order. +// +// Populates AccessEvaluationRequest.DataUses by mapping each dataset +// purpose key through the loaded Purpose map to its data_use — this is +// how policy match blocks on data_use find data to match against. +func applyPolicySuppression( + violations []pbac.PurposeViolation, + purposes map[string]fixtures.Purpose, + policies []pbac.AccessPolicy, +) []pbac.PurposeViolation { + if len(violations) == 0 || len(policies) == 0 { + return violations + } + + out := make([]pbac.PurposeViolation, 0, len(violations)) + for _, v := range violations { + req := pbac.AccessEvaluationRequest{ + ConsumerID: v.ConsumerID, + ConsumerName: v.ConsumerName, + ConsumerPurposes: v.ConsumerPurposes, + DatasetKey: v.DatasetKey, + DatasetPurposes: v.DatasetPurposes, + Collection: v.Collection, + DataUses: dataUsesForPurposes(v.DatasetPurposes, purposes), + } + res := pbac.EvaluatePolicies(policies, &req) + if res != nil && res.Decision == pbac.PolicyAllow && res.DecisivePolicyKey != nil { + key := *res.DecisivePolicyKey + v.SuppressedByPolicy = &key + if res.Action != nil { + action := *res.Action + v.SuppressedByAction = &action + } + } + out = append(out, v) + } + return out +} + +// dataUsesForPurposes maps purpose fides_keys to their data_use strings +// via the loaded Purpose map. Purposes with no configured data_use are +// skipped. Result is deduplicated while preserving input order. +func dataUsesForPurposes(purposeKeys []string, purposes map[string]fixtures.Purpose) []string { + if len(purposeKeys) == 0 || len(purposes) == 0 { + return nil + } + seen := map[string]bool{} + out := make([]string, 0, len(purposeKeys)) + for _, pk := range purposeKeys { + p, ok := purposes[pk] + if !ok || p.DataUse == "" { + continue + } + if seen[p.DataUse] { + continue + } + seen[p.DataUse] = true + out = append(out, p.DataUse) + } + return out +} + +// readInput returns the SQL text from the first positional arg, or stdin +// if no arg (or "-") is given. +func readInput(args []string) (string, error) { + if len(args) == 0 || args[0] == "-" { + b, err := io.ReadAll(os.Stdin) + if err != nil { + return "", fmt.Errorf("read stdin: %w", err) + } + return string(b), nil + } + b, err := os.ReadFile(args[0]) + if err != nil { + return "", fmt.Errorf("read %s: %w", args[0], err) + } + return string(b), nil +} + +// splitStatements splits SQL on top-level semicolons, preserving the +// original text of each statement (including comments). Respects single- +// quoted strings and both SQL comment styles so semicolons inside them +// are not treated as separators. +// +// Known limitation: does not handle backslash-escaped quotes, doubled +// single quotes ('' = '), or dollar-quoted strings (Postgres). Good +// enough for the demo fixtures. +func splitStatements(sql string) []string { + var stmts []string + var buf strings.Builder + var ( + inSingleQuote bool + inLineComment bool + inBlockComment bool + ) + + flush := func() { + s := strings.TrimSpace(buf.String()) + if s != "" { + stmts = append(stmts, s) + } + buf.Reset() + } + + for i := 0; i < len(sql); i++ { + c := sql[i] + switch { + case inLineComment: + buf.WriteByte(c) + if c == '\n' { + inLineComment = false + } + case inBlockComment: + buf.WriteByte(c) + if c == '*' && i+1 < len(sql) && sql[i+1] == '/' { + buf.WriteByte(sql[i+1]) + i++ + inBlockComment = false + } + case inSingleQuote: + buf.WriteByte(c) + if c == '\'' { + inSingleQuote = false + } + case c == '-' && i+1 < len(sql) && sql[i+1] == '-': + inLineComment = true + buf.WriteByte(c) + case c == '/' && i+1 < len(sql) && sql[i+1] == '*': + inBlockComment = true + buf.WriteByte(c) + buf.WriteByte(sql[i+1]) + i++ + case c == '\'': + inSingleQuote = true + buf.WriteByte(c) + case c == ';': + flush() + default: + buf.WriteByte(c) + } + } + flush() + return stmts +} + +// newQueryID returns a UUIDv4-formatted string using crypto/rand. +func newQueryID() string { + b := make([]byte, 16) + if _, err := rand.Read(b); err != nil { + panic(fmt.Sprintf("crypto/rand: %v", err)) + } + b[6] = (b[6] & 0x0f) | 0x40 // version 4 + b[8] = (b[8] & 0x3f) | 0x80 // variant 10 + return fmt.Sprintf("%x-%x-%x-%x-%x", b[0:4], b[4:6], b[6:8], b[8:10], b[10:]) +} + +func die(err error) { + fmt.Fprintf(os.Stderr, "error: %v\n", err) + os.Exit(1) +} diff --git a/policy-engine/go.mod b/policy-engine/go.mod index 9703fd57ec4..99df04a04a1 100644 --- a/policy-engine/go.mod +++ b/policy-engine/go.mod @@ -1,3 +1,5 @@ module github.com/ethyca/fides/policy-engine go 1.23 + +require gopkg.in/yaml.v3 v3.0.1 diff --git a/policy-engine/go.sum b/policy-engine/go.sum new file mode 100644 index 00000000000..a62c313c5b0 --- /dev/null +++ b/policy-engine/go.sum @@ -0,0 +1,4 @@ +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/policy-engine/pkg/fixtures/fixtures.go b/policy-engine/pkg/fixtures/fixtures.go new file mode 100644 index 00000000000..9907fa93d41 --- /dev/null +++ b/policy-engine/pkg/fixtures/fixtures.go @@ -0,0 +1,272 @@ +// Package fixtures loads PBAC YAML config directories into shapes the +// engine can consume. +// +// The directory layout matches pbac/ at the repo root: +// +// /consumers/*.yml top-level key: consumer: +// /purposes/*.yml top-level key: purpose: +// /datasets/*.yml fideslang Dataset YAML, top-level key: dataset: +// /policies/*.yml top-level key: policy: +package fixtures + +import ( + "fmt" + "os" + "path/filepath" + "sort" + "strings" + + "github.com/ethyca/fides/policy-engine/pkg/pbac" + "gopkg.in/yaml.v3" +) + +// Consumer is a data consumer loaded from YAML. Members is the list of +// identities (typically email addresses) that resolve to this consumer. +type Consumer struct { + Name string `yaml:"name"` + Description string `yaml:"description,omitempty"` + Members []string `yaml:"members"` + Purposes []string `yaml:"purposes"` +} + +type consumerFile struct { + Consumer []Consumer `yaml:"consumer"` +} + +// Purpose is a declared purpose loaded from YAML. +type Purpose struct { + FidesKey string `yaml:"fides_key"` + Name string `yaml:"name"` + DataUse string `yaml:"data_use"` + DataSubject string `yaml:"data_subject,omitempty"` + DataCategories []string `yaml:"data_categories,omitempty"` + Description string `yaml:"description,omitempty"` +} + +type purposeFile struct { + Purpose []Purpose `yaml:"purpose"` +} + +// dataset / collection / field carry data_purposes at each level. The +// engine reads dataset-level purposes directly and reads collection +// purposes via DatasetPurposes.CollectionPurposes. Field purposes are +// unioned into their owning collection's CollectionPurposes since the +// CLI extracts tables (collections) from SQL, not individual columns. +type dataset struct { + FidesKey string `yaml:"fides_key"` + Name string `yaml:"name,omitempty"` + DataPurposes []string `yaml:"data_purposes,omitempty"` + Collections []collection `yaml:"collections,omitempty"` +} + +type collection struct { + Name string `yaml:"name"` + DataPurposes []string `yaml:"data_purposes,omitempty"` + Fields []field `yaml:"fields,omitempty"` +} + +type field struct { + Name string `yaml:"name"` + DataPurposes []string `yaml:"data_purposes,omitempty"` +} + +type datasetFile struct { + Dataset []dataset `yaml:"dataset"` +} + +// Datasets bundles everything LoadDatasets returns: the per-dataset +// purpose map (engine input) and the table-name -> dataset_key index +// (table resolution). +type Datasets struct { + // Purposes is keyed by dataset fides_key and fed to the engine. + Purposes map[string]pbac.DatasetPurposes + // Tables maps a lowercase table name to its owning dataset's + // fides_key. Assumes table names are globally unique across + // datasets; on collision the last one loaded wins. + Tables map[string]string +} + +// LoadConsumers walks dir for *.yml files and returns a map from member +// identity to Consumer. A consumer with N members appears N times in the +// map, once per member, all pointing to the same Consumer value. +// +// When two consumers list the same member, the last one loaded wins. +// Returns an empty map (not an error) if dir doesn't exist. +func LoadConsumers(dir string) (map[string]Consumer, error) { + out := map[string]Consumer{} + if _, err := os.Stat(dir); os.IsNotExist(err) { + return out, nil + } + matches, err := filepath.Glob(filepath.Join(dir, "*.yml")) + if err != nil { + return nil, err + } + for _, path := range matches { + var f consumerFile + if err := readYAML(path, &f); err != nil { + return nil, fmt.Errorf("consumers: %s: %w", path, err) + } + for _, c := range f.Consumer { + for _, member := range c.Members { + if member != "" { + out[member] = c + } + } + } + } + return out, nil +} + +// LoadPurposes walks dir for *.yml files and returns a map keyed by FidesKey. +func LoadPurposes(dir string) (map[string]Purpose, error) { + out := map[string]Purpose{} + if _, err := os.Stat(dir); os.IsNotExist(err) { + return out, nil + } + matches, err := filepath.Glob(filepath.Join(dir, "*.yml")) + if err != nil { + return nil, err + } + for _, path := range matches { + var f purposeFile + if err := readYAML(path, &f); err != nil { + return nil, fmt.Errorf("purposes: %s: %w", path, err) + } + for _, p := range f.Purpose { + if p.FidesKey != "" { + out[p.FidesKey] = p + } + } + } + return out, nil +} + +// LoadDatasets walks dir for fideslang Dataset YAML files and returns +// both the per-dataset purpose map (for the engine) and a table -> +// dataset_key index built from collections[].name. +// +// Table resolution assumes collection names are globally unique across +// datasets, so SELECT ... FROM warehouse.orders and +// SELECT ... FROM archive.orders both resolve to whichever dataset +// declares a collection named "orders". +// +// Purposes are unioned across the three taxonomy levels: +// +// - dataset.data_purposes → DatasetPurposes.PurposeKeys +// - collection.data_purposes ┐ +// - union(field.data_purposes for fields) ┴ → CollectionPurposes[name] +// +// The engine's EffectivePurposes(collection) then unions PurposeKeys +// with CollectionPurposes[collection], so the final effective set for +// a query on . is the union of all three levels. +func LoadDatasets(dir string) (Datasets, error) { + result := Datasets{ + Purposes: map[string]pbac.DatasetPurposes{}, + Tables: map[string]string{}, + } + if _, err := os.Stat(dir); os.IsNotExist(err) { + return result, nil + } + matches, err := filepath.Glob(filepath.Join(dir, "*.yml")) + if err != nil { + return result, err + } + for _, path := range matches { + var f datasetFile + if err := readYAML(path, &f); err != nil { + return result, fmt.Errorf("datasets: %s: %w", path, err) + } + for _, ds := range f.Dataset { + if ds.FidesKey == "" { + continue + } + dp := pbac.DatasetPurposes{ + DatasetKey: ds.FidesKey, + PurposeKeys: ds.DataPurposes, + CollectionPurposes: map[string][]string{}, + } + for _, c := range ds.Collections { + if c.Name == "" { + continue + } + name := strings.ToLower(c.Name) + effective := collectionEffectivePurposes(c) + if len(effective) > 0 { + dp.CollectionPurposes[name] = effective + } + result.Tables[name] = ds.FidesKey + } + result.Purposes[ds.FidesKey] = dp + } + } + return result, nil +} + +// collectionEffectivePurposes unions a collection's own data_purposes +// with every one of its fields' data_purposes. Result is deduplicated +// and sorted for deterministic output. +func collectionEffectivePurposes(c collection) []string { + set := map[string]bool{} + for _, p := range c.DataPurposes { + if p != "" { + set[p] = true + } + } + for _, fd := range c.Fields { + for _, p := range fd.DataPurposes { + if p != "" { + set[p] = true + } + } + } + if len(set) == 0 { + return nil + } + out := make([]string, 0, len(set)) + for p := range set { + out = append(out, p) + } + sort.Strings(out) + return out +} + +type policyFile struct { + Policy []pbac.AccessPolicy `yaml:"policy"` +} + +// LoadPolicies walks dir for *.yml files and returns the enabled access +// policies. Disabled policies (enabled: false) are filtered out so the +// CLI doesn't have to care about the enabled flag later. +// +// Returns an empty slice (not an error) if dir doesn't exist. +func LoadPolicies(dir string) ([]pbac.AccessPolicy, error) { + out := []pbac.AccessPolicy{} + if _, err := os.Stat(dir); os.IsNotExist(err) { + return out, nil + } + matches, err := filepath.Glob(filepath.Join(dir, "*.yml")) + if err != nil { + return nil, err + } + for _, path := range matches { + var f policyFile + if err := readYAML(path, &f); err != nil { + return nil, fmt.Errorf("policies: %s: %w", path, err) + } + for _, p := range f.Policy { + if p.Enabled != nil && !*p.Enabled { + continue + } + out = append(out, p) + } + } + return out, nil +} + +func readYAML(path string, into interface{}) error { + b, err := os.ReadFile(path) + if err != nil { + return err + } + return yaml.Unmarshal(b, into) +} diff --git a/policy-engine/pkg/pbac/policy_types.go b/policy-engine/pkg/pbac/policy_types.go index 4a818c6dd2e..bd4220edfc2 100644 --- a/policy-engine/pkg/pbac/policy_types.go +++ b/policy-engine/pkg/pbac/policy_types.go @@ -15,28 +15,28 @@ const ( // and the database schema default. A policy omitting the enabled field is // treated as active. type AccessPolicy struct { - ID string `json:"id"` - Key string `json:"key"` - Priority int `json:"priority"` - Enabled *bool `json:"enabled,omitempty"` - Decision PolicyDecision `json:"decision"` // ALLOW or DENY - Match MatchBlock `json:"match"` - Unless []Constraint `json:"unless,omitempty"` - Action *PolicyAction `json:"action,omitempty"` + ID string `json:"id" yaml:"id,omitempty"` + Key string `json:"key" yaml:"key"` + Priority int `json:"priority" yaml:"priority"` + Enabled *bool `json:"enabled,omitempty" yaml:"enabled,omitempty"` + Decision PolicyDecision `json:"decision" yaml:"decision"` // ALLOW or DENY + Match MatchBlock `json:"match" yaml:"match"` + Unless []Constraint `json:"unless,omitempty" yaml:"unless,omitempty"` + Action *PolicyAction `json:"action,omitempty" yaml:"action,omitempty"` } // MatchBlock declares which taxonomy dimensions a policy applies to. // An empty MatchBlock matches everything (catch-all). type MatchBlock struct { - DataUse *MatchDimension `json:"data_use,omitempty"` - DataCategory *MatchDimension `json:"data_category,omitempty"` - DataSubject *MatchDimension `json:"data_subject,omitempty"` + DataUse *MatchDimension `json:"data_use,omitempty" yaml:"data_use,omitempty"` + DataCategory *MatchDimension `json:"data_category,omitempty" yaml:"data_category,omitempty"` + DataSubject *MatchDimension `json:"data_subject,omitempty" yaml:"data_subject,omitempty"` } // MatchDimension specifies the any/all operators for one taxonomy dimension. type MatchDimension struct { - Any []string `json:"any,omitempty"` // at least one must match - All []string `json:"all,omitempty"` // all must match + Any []string `json:"any,omitempty" yaml:"any,omitempty"` // at least one must match + All []string `json:"all,omitempty" yaml:"all,omitempty"` // all must match } // ConstraintType identifies the kind of unless condition. @@ -51,29 +51,29 @@ const ( // Constraint is one condition in an unless block. // All constraints in a block are AND'd — all must trigger for the unless to fire. type Constraint struct { - Type ConstraintType `json:"type"` + Type ConstraintType `json:"type" yaml:"type"` // Consent fields (type=consent) - PrivacyNoticeKey string `json:"privacy_notice_key,omitempty"` - Requirement string `json:"requirement,omitempty"` // opt_in, opt_out, not_opt_in, not_opt_out + PrivacyNoticeKey string `json:"privacy_notice_key,omitempty" yaml:"privacy_notice_key,omitempty"` + Requirement string `json:"requirement,omitempty" yaml:"requirement,omitempty"` // opt_in, opt_out, not_opt_in, not_opt_out // Geo location fields (type=geo_location) - Field string `json:"field,omitempty"` // dotted context path, e.g. "environment.geo_location" - Values []string `json:"values,omitempty"` + Field string `json:"field,omitempty" yaml:"field,omitempty"` // dotted context path, e.g. "environment.geo_location" + Values []string `json:"values,omitempty" yaml:"values,omitempty"` // Operator is shared between geo_location and data_flow constraints: // geo_location: "in", "not_in" // data_flow: "any_of", "none_of" - Operator string `json:"operator,omitempty"` + Operator string `json:"operator,omitempty" yaml:"operator,omitempty"` // Data flow fields (type=data_flow) - Direction string `json:"direction,omitempty"` // "ingress", "egress" - Systems []string `json:"systems,omitempty"` + Direction string `json:"direction,omitempty" yaml:"direction,omitempty"` // "ingress", "egress" + Systems []string `json:"systems,omitempty" yaml:"systems,omitempty"` } // PolicyAction is the action block from a decisive policy. type PolicyAction struct { - Message string `json:"message,omitempty"` + Message string `json:"message,omitempty" yaml:"message,omitempty"` } // AccessEvaluationRequest is the context provided to the policy evaluator diff --git a/policy-engine/pkg/pbac/types.go b/policy-engine/pkg/pbac/types.go index a55ba1f9bd4..5a2be855ae1 100644 --- a/policy-engine/pkg/pbac/types.go +++ b/policy-engine/pkg/pbac/types.go @@ -57,16 +57,24 @@ func (d *DatasetPurposes) EffectivePurposes(collection string) map[string]bool { } // PurposeViolation represents a purpose-based access violation. +// +// DataUse and Control are set by the service layer during enrichment. +// SuppressedByPolicy and SuppressedByAction are set when an ALLOW +// policy matched during the post-engine policy filter — the violation +// is kept in the record for auditability, but a caller treating +// suppressed violations as compliant should check these fields. type PurposeViolation struct { - ConsumerID string `json:"consumer_id"` - ConsumerName string `json:"consumer_name"` - DatasetKey string `json:"dataset_key"` - Collection *string `json:"collection,omitempty"` - ConsumerPurposes []string `json:"consumer_purposes"` - DatasetPurposes []string `json:"dataset_purposes"` - Reason string `json:"reason"` - DataUse *string `json:"data_use,omitempty"` - Control *string `json:"control,omitempty"` + ConsumerID string `json:"consumer_id"` + ConsumerName string `json:"consumer_name"` + DatasetKey string `json:"dataset_key"` + Collection *string `json:"collection,omitempty"` + ConsumerPurposes []string `json:"consumer_purposes"` + DatasetPurposes []string `json:"dataset_purposes"` + Reason string `json:"reason"` + DataUse *string `json:"data_use,omitempty"` + Control *string `json:"control,omitempty"` + SuppressedByPolicy *string `json:"suppressed_by_policy,omitempty"` + SuppressedByAction *PolicyAction `json:"suppressed_by_action,omitempty"` } // EvaluationGap represents a gap in PBAC coverage — incomplete configuration, diff --git a/policy-engine/pkg/sqlextract/extract.go b/policy-engine/pkg/sqlextract/extract.go new file mode 100644 index 00000000000..f7e6df42d03 --- /dev/null +++ b/policy-engine/pkg/sqlextract/extract.go @@ -0,0 +1,155 @@ +// Package sqlextract pulls table references out of SQL text with regex. +// +// This is a deliberately simple extractor intended for the standalone +// fides-pbac CLI, where platform audit APIs (BigQuery Jobs, Snowflake +// ACCESS_HISTORY, Databricks query history) aren't available to hand +// back structured table references. +// +// Known limitations: +// - Views are not expanded (a query against a view reports the view +// name, not the underlying tables). +// - Wildcard tables (BigQuery `events_*`) are returned verbatim. +// - Tables referenced inside UDFs or stored procedures are missed. +// - Old-style comma joins (FROM a, b, c) only pick up the first table. +// - Identifiers with inner dots inside a single quoted span +// ("schema.table" as one name) are treated as two parts. +// +// For exact resolution, use the platform's native audit API. Use this +// extractor when you only have raw SQL text and need a best-effort answer. +package sqlextract + +import ( + "regexp" + "strings" +) + +// TableRef references a table in an external data platform. +// +// Catalog maps to a BigQuery project, Snowflake database, or Databricks +// catalog. Schema maps to a BQ dataset, Snowflake schema, or Databricks +// schema. Table is the table name. +type TableRef struct { + Catalog string `json:"catalog"` + Schema string `json:"schema"` + Table string `json:"table"` +} + +// QualifiedName returns the dot-separated identifier, skipping empty parts. +func (t TableRef) QualifiedName() string { + parts := make([]string, 0, 3) + if t.Catalog != "" { + parts = append(parts, t.Catalog) + } + if t.Schema != "" { + parts = append(parts, t.Schema) + } + if t.Table != "" { + parts = append(parts, t.Table) + } + return strings.Join(parts, ".") +} + +var ( + // Strip /* ... */ block comments. (?s) makes . match newlines. + // Nested comments are rare enough that we don't handle them. + blockCommentRe = regexp.MustCompile(`(?s)/\*.*?\*/`) + + // Strip -- line comments through end of line. + lineCommentRe = regexp.MustCompile(`--[^\n]*`) + + // Match FROM/JOIN followed by a 1-3 part identifier. Each part may + // optionally be wrapped in backticks or double quotes. A `(` + // following FROM/JOIN (subquery) doesn't match because it's not a + // word character. + tableRe = regexp.MustCompile( + "(?i)\\b(?:FROM|JOIN)\\s+" + + "([`\"]?\\w+[`\"]?" + + "(?:\\s*\\.\\s*[`\"]?\\w+[`\"]?){0,2})", + ) + + // Collect CTE names from WITH clauses: `WITH name AS (` or `, name AS (`. + // These are filtered from the results since they aren't real tables. + cteRe = regexp.MustCompile(`(?is)(?:WITH|,)\s+([a-zA-Z_]\w*)\s+AS\s*\(`) +) + +// StripComments returns the SQL with /* */ and -- comments removed. +// Lines that become empty after stripping are dropped, and leading and +// trailing whitespace is trimmed. Internal whitespace is otherwise +// preserved so multi-line queries keep their shape. +func StripComments(sql string) string { + s := blockCommentRe.ReplaceAllString(sql, " ") + s = lineCommentRe.ReplaceAllString(s, "") + lines := strings.Split(s, "\n") + kept := lines[:0] + for _, l := range lines { + if strings.TrimSpace(l) != "" { + kept = append(kept, l) + } + } + return strings.TrimSpace(strings.Join(kept, "\n")) +} + +// Extract returns the distinct list of table references found in a SQL +// string, in the order they first appear. +// +// Comments are stripped before extraction. CTE names declared in WITH +// clauses are filtered out. +func Extract(sql string) []TableRef { + stripped := blockCommentRe.ReplaceAllString(sql, " ") + stripped = lineCommentRe.ReplaceAllString(stripped, "") + + ctes := collectCTENames(stripped) + + seen := make(map[string]bool) + refs := make([]TableRef, 0) + for _, m := range tableRe.FindAllStringSubmatch(stripped, -1) { + ref := parseIdentifier(m[1]) + if ref.Table == "" { + continue + } + // Skip unqualified CTE references. + if ref.Schema == "" && ref.Catalog == "" && + ctes[strings.ToLower(ref.Table)] { + continue + } + key := strings.ToLower(ref.QualifiedName()) + if seen[key] { + continue + } + seen[key] = true + refs = append(refs, ref) + } + return refs +} + +func collectCTENames(sql string) map[string]bool { + ctes := make(map[string]bool) + for _, m := range cteRe.FindAllStringSubmatch(sql, -1) { + ctes[strings.ToLower(m[1])] = true + } + return ctes +} + +// parseIdentifier splits a 1-3 part identifier into a TableRef. +// Strips surrounding backticks and double quotes from each part. +func parseIdentifier(s string) TableRef { + parts := strings.Split(s, ".") + cleaned := make([]string, 0, len(parts)) + for _, p := range parts { + p = strings.TrimSpace(p) + p = strings.Trim(p, "`\"") + if p != "" { + cleaned = append(cleaned, p) + } + } + switch len(cleaned) { + case 1: + return TableRef{Table: cleaned[0]} + case 2: + return TableRef{Schema: cleaned[0], Table: cleaned[1]} + case 3: + return TableRef{Catalog: cleaned[0], Schema: cleaned[1], Table: cleaned[2]} + default: + return TableRef{} + } +} diff --git a/policy-engine/pkg/sqlextract/extract_test.go b/policy-engine/pkg/sqlextract/extract_test.go new file mode 100644 index 00000000000..e1dc7136ccc --- /dev/null +++ b/policy-engine/pkg/sqlextract/extract_test.go @@ -0,0 +1,217 @@ +package sqlextract + +import ( + "reflect" + "testing" +) + +func TestStripComments(t *testing.T) { + cases := []struct { + name string + in string + want string + }{ + {"empty", "", ""}, + {"no comments", "SELECT 1", "SELECT 1"}, + { + "leading line comment", + "-- hi\nSELECT 1", + "SELECT 1", + }, + { + "trailing line comment", + "SELECT 1 -- hi", + "SELECT 1", + }, + { + "block comment stripped, whitespace preserved", + "/* prelude */ SELECT x FROM y", + "SELECT x FROM y", + }, + { + "multiple comment lines collapsed", + "-- one\n--\n-- two\nSELECT 1\n-- three\nFROM t", + "SELECT 1\nFROM t", + }, + { + "multiline block comment", + "SELECT 1\n/* multi\n line\n comment */\nFROM t", + "SELECT 1\nFROM t", + }, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + got := StripComments(tc.in) + if got != tc.want { + t.Errorf("StripComments(%q)\n got: %q\n want: %q", tc.in, got, tc.want) + } + }) + } +} + +func TestQualifiedName(t *testing.T) { + cases := []struct { + name string + ref TableRef + want string + }{ + {"three parts", TableRef{"prod", "sales", "orders"}, "prod.sales.orders"}, + {"two parts", TableRef{"", "sales", "orders"}, "sales.orders"}, + {"one part", TableRef{"", "", "orders"}, "orders"}, + {"empty", TableRef{}, ""}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + if got := tc.ref.QualifiedName(); got != tc.want { + t.Errorf("QualifiedName() = %q, want %q", got, tc.want) + } + }) + } +} + +func TestExtract(t *testing.T) { + cases := []struct { + name string + sql string + want []TableRef + }{ + { + name: "empty", + sql: "", + want: []TableRef{}, + }, + { + name: "simple from, 3-part", + sql: "SELECT * FROM prod.sales.orders", + want: []TableRef{{"prod", "sales", "orders"}}, + }, + { + name: "simple from, 2-part", + sql: "SELECT * FROM sales.orders", + want: []TableRef{{"", "sales", "orders"}}, + }, + { + name: "simple from, 1-part", + sql: "SELECT * FROM orders", + want: []TableRef{{"", "", "orders"}}, + }, + { + name: "basic join", + sql: "SELECT * FROM prod.sales.orders o JOIN prod.customers.profiles c ON o.customer_id = c.id", + want: []TableRef{ + {"prod", "sales", "orders"}, + {"prod", "customers", "profiles"}, + }, + }, + { + name: "case insensitive keywords", + sql: "select * from prod.sales.orders inner Join prod.events.page_views pv on pv.user_id = 1", + want: []TableRef{ + {"prod", "sales", "orders"}, + {"prod", "events", "page_views"}, + }, + }, + { + name: "backtick quoted parts", + sql: "SELECT * FROM `prod`.`sales`.`orders`", + want: []TableRef{{"prod", "sales", "orders"}}, + }, + { + name: "double quoted parts", + sql: `SELECT * FROM "prod"."sales"."orders"`, + want: []TableRef{{"prod", "sales", "orders"}}, + }, + { + name: "mixed quoting", + sql: "SELECT * FROM `prod`.sales.\"orders\"", + want: []TableRef{{"prod", "sales", "orders"}}, + }, + { + name: "block comments stripped", + sql: "/* this is prod.ignore.me */ SELECT * FROM prod.sales.orders", + want: []TableRef{{"prod", "sales", "orders"}}, + }, + { + name: "multiline block comment stripped", + sql: "SELECT *\n/* from ignore.me\n still comment */\nFROM prod.sales.orders", + want: []TableRef{{"prod", "sales", "orders"}}, + }, + { + name: "line comments stripped", + sql: "SELECT * -- from ignore.me\nFROM prod.sales.orders -- trailing", + want: []TableRef{{"prod", "sales", "orders"}}, + }, + { + name: "cte filtered out", + sql: `WITH recent_orders AS ( + SELECT * FROM prod.sales.orders WHERE created_at > now() - interval '7 days' + ) + SELECT * FROM recent_orders`, + want: []TableRef{{"prod", "sales", "orders"}}, + }, + { + name: "multiple ctes filtered out", + sql: `WITH a AS (SELECT * FROM prod.sales.orders), + b AS (SELECT * FROM prod.events.page_views) + SELECT * FROM a JOIN b ON a.user_id = b.user_id`, + want: []TableRef{ + {"prod", "sales", "orders"}, + {"prod", "events", "page_views"}, + }, + }, + { + name: "subquery does not match", + sql: "SELECT * FROM (SELECT id FROM prod.sales.orders) sub JOIN prod.customers.profiles c ON sub.id = c.id", + want: []TableRef{ + {"prod", "sales", "orders"}, + {"prod", "customers", "profiles"}, + }, + }, + { + name: "deduplication", + sql: "SELECT * FROM prod.sales.orders o1 JOIN prod.sales.orders o2 ON o1.id = o2.parent_id", + want: []TableRef{{"prod", "sales", "orders"}}, + }, + { + name: "deduplication is case insensitive", + sql: "SELECT * FROM prod.sales.orders o1 JOIN PROD.SALES.ORDERS o2 ON o1.id = o2.id", + want: []TableRef{{"prod", "sales", "orders"}}, + }, + { + name: "multiple statements", + sql: "SELECT * FROM prod.sales.orders; SELECT * FROM prod.events.page_views;", + want: []TableRef{ + {"prod", "sales", "orders"}, + {"prod", "events", "page_views"}, + }, + }, + { + name: "whitespace variations around dots", + sql: "SELECT * FROM prod . sales . orders", + want: []TableRef{{"prod", "sales", "orders"}}, + }, + { + name: "from keyword inside string literal is ignored as long as it's not FROM syntax", + sql: "SELECT 'FROM something' AS x FROM prod.sales.orders", + // The literal 'FROM something' will match because we don't + // track string boundaries. Document: quoted content can + // produce false positives. This test locks in current + // behavior so a regression is visible. + want: []TableRef{ + {"", "", "something"}, + {"prod", "sales", "orders"}, + }, + }, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + got := Extract(tc.sql) + if len(got) == 0 && len(tc.want) == 0 { + return + } + if !reflect.DeepEqual(got, tc.want) { + t.Errorf("Extract()\n got: %+v\n want: %+v", got, tc.want) + } + }) + } +} diff --git a/src/fides/cli/commands/pbac.py b/src/fides/cli/commands/pbac.py index 737eaecd56f..6d7b3b8f457 100644 --- a/src/fides/cli/commands/pbac.py +++ b/src/fides/cli/commands/pbac.py @@ -9,7 +9,6 @@ import json import sys -from dataclasses import asdict from typing import TextIO import rich_click as click