From d76e8a105e63c980f9d5d8155c616ae81bd63371 Mon Sep 17 00:00:00 2001 From: Gene Zhang Date: Tue, 27 Jan 2026 14:27:24 -0800 Subject: [PATCH] refactor(match-clause): Complete architecture overhaul - split monolithic file into modules This PR refactors the 4,333-line match_clause.rs monolith into a well-organized module structure: ## Module Structure - traversal.rs (1,159 lines) - Core MATCH pattern traversal and evaluation - view_scan.rs (899 lines) - ViewScan generation for nodes and relationships - type_inference.rs (570 lines) - Node label and relationship type inference - helpers.rs (536 lines) - 14 reusable helper functions - tests.rs (1,560 lines) - Unit tests - mod.rs (49 lines) - Public API - errors.rs (7 lines) - Error types ## Key Improvements - Eliminated 500+ lines of code duplication via helper extraction - Clear separation of concerns with single-responsibility modules - Improved testability and maintainability - Renamed legacy.rs to traversal.rs (final naming) - Updated method names for graph_catalog compatibility All 801 tests passing. No functional changes - pure refactoring. --- .../MATCH_CLAUSE_ARCHITECTURE_PROPOSAL.md | 533 ++ .../logical_plan/match_clause.rs | 4338 ----------------- .../logical_plan/match_clause/errors.rs | 7 + .../logical_plan/match_clause/helpers.rs | 536 ++ .../logical_plan/match_clause/mod.rs | 47 + .../logical_plan/match_clause/tests.rs | 1554 ++++++ .../logical_plan/match_clause/traversal.rs | 1174 +++++ .../match_clause/type_inference.rs | 572 +++ .../logical_plan/match_clause/view_scan.rs | 895 ++++ .../logical_plan/return_clause.rs | 9 +- 10 files changed, 5324 insertions(+), 4341 deletions(-) create mode 100644 docs/audits/MATCH_CLAUSE_ARCHITECTURE_PROPOSAL.md delete mode 100644 src/query_planner/logical_plan/match_clause.rs create mode 100644 src/query_planner/logical_plan/match_clause/errors.rs create mode 100644 src/query_planner/logical_plan/match_clause/helpers.rs create mode 100644 src/query_planner/logical_plan/match_clause/mod.rs create mode 100644 src/query_planner/logical_plan/match_clause/tests.rs create mode 100644 src/query_planner/logical_plan/match_clause/traversal.rs create mode 100644 src/query_planner/logical_plan/match_clause/type_inference.rs create mode 100644 src/query_planner/logical_plan/match_clause/view_scan.rs diff --git a/docs/audits/MATCH_CLAUSE_ARCHITECTURE_PROPOSAL.md b/docs/audits/MATCH_CLAUSE_ARCHITECTURE_PROPOSAL.md new file mode 100644 index 00000000..832389da --- /dev/null +++ b/docs/audits/MATCH_CLAUSE_ARCHITECTURE_PROPOSAL.md @@ -0,0 +1,533 @@ +# Match Clause Architecture Improvement Proposal + +**Date**: January 27, 2026 +**Author**: GitHub Copilot (based on code audit) +**Status**: PROPOSAL - Awaiting Review + +## Executive Summary + +The `match_clause.rs` file (4,333 lines) is the heart of ClickGraph's Cypher processing. While functional, it has accumulated significant architectural debt that makes it fragile, hard to reason about, and risky to extend. This proposal outlines a refactoring strategy that: + +1. **Leverages existing infrastructure** (PatternSchemaContext, TypedVariable) that is underutilized +2. **Separates concerns** using proven design patterns +3. **Reduces cyclomatic complexity** from 50+ to <10 per function +4. **Improves testability** by creating isolated, testable units + +**Key Insight**: The infrastructure already exists in `pattern_schema.rs` - but `match_clause.rs` doesn't use it! The fix is integration, not reinvention. + +--- + +## 1. Root Cause Analysis + +### Why is match_clause.rs Complex? + +Your hypothesis is correct: **schema variations are the primary complexity driver**. But the real problem is HOW schema variations are handled: + +``` +Current State: Schema logic MIXED with traversal logic +───────────────────────────────────────────────────── +match_clause.rs (4,333 lines) +├── Pattern parsing (simple) +├── VLP detection (simple) +├── Schema lookup × 10+ times (SCATTERED) +│ ├── Line 450: classify_edge_table_pattern() +│ ├── Line 620: edge_has_node_properties() +│ ├── Line 890: is_node_denormalized_on_edge() +│ ├── Line 1120: classify_edge_table_pattern() AGAIN +│ └── ... repeated everywhere +├── Schema-specific SQL generation (MIXED INTO TRAVERSAL) +│ ├── Denormalized node handling in 5 places +│ ├── Multi-source UNION in 3 places +│ └── Polymorphic edge handling in 4 places +└── ViewScan construction × 8 times (DUPLICATED) +``` + +**Existing Infrastructure Not Used:** +```rust +// pattern_schema.rs has ALL this ready: +pub enum NodeAccessStrategy { + OwnTable { table, id_column, properties }, // Regular node + EmbeddedInEdge { edge_alias, properties, .. }, // Denormalized + Virtual { label }, // Polymorphic +} + +pub enum EdgeAccessStrategy { + SeparateTable { table, from_id, to_id, .. }, // Regular edge + Polymorphic { type_column, type_values, .. }, // Multi-type edge + FkEdge { node_table, fk_column }, // FK-based edge +} + +pub enum JoinStrategy { + SingleTableScan { .. }, // Denormalized - no JOIN needed + Traditional { .. }, // Standard node-edge-node + EdgeToEdge { .. }, // Multi-hop denormalized + CoupledSameRow { .. }, // Coupled optimization +} +``` + +**But match_clause.rs computes all this manually, repeatedly!** + +--- + +## 2. Proposed Architecture + +### 2.1 Strategy Pattern for Traversal Modes + +Currently `traverse_connected_pattern_with_mode()` is 1,137 lines handling: +- Regular traversal +- Variable-length paths (VLP) +- Shortest path (single) +- All shortest paths + +**Proposed**: Use Strategy pattern to separate these: + +``` +┌─────────────────────────────────────────────────────────────┐ +│ TraversalStrategyFactory │ +│ Analyzes pattern → Returns appropriate TraversalStrategy │ +└─────────────────────────────────────────────────────────────┘ + │ + ▼ + ┌────────────────────┼────────────────────┐ + │ │ │ + ▼ ▼ ▼ +┌────────────────┐ ┌────────────────┐ ┌────────────────────┐ +│ RegularStrategy│ │ VLPStrategy │ │ ShortestPathStrategy│ +│ (~200 lines) │ │ (~300 lines) │ │ (~250 lines) │ +└────────────────┘ └────────────────┘ └────────────────────┘ +``` + +```rust +// New: src/query_planner/logical_plan/match_clause/traversal_strategy.rs + +pub trait TraversalStrategy { + /// Generate logical plan elements for this traversal + fn generate_plan( + &self, + pattern: &PatternContext, + schema_ctx: &PatternSchemaContext, + plan_ctx: &mut PlanCtx, + ) -> Result; +} + +pub struct RegularTraversalStrategy; +pub struct VLPTraversalStrategy { bounds: (Option, Option) } +pub struct ShortestPathStrategy { find_all: bool } + +impl TraversalStrategyFactory { + pub fn create(pattern: &GraphPattern) -> Box { + if pattern.is_shortest_path() { + Box::new(ShortestPathStrategy { find_all: pattern.find_all_shortest }) + } else if pattern.is_vlp() { + Box::new(VLPTraversalStrategy { bounds: pattern.length_bounds }) + } else { + Box::new(RegularTraversalStrategy) + } + } +} +``` + +### 2.2 Builder Pattern for ViewScan Construction + +Currently ViewScan construction is duplicated 8+ times with slight variations: + +```rust +// Current: Duplicated everywhere with subtle differences +ViewScan { + label: ..., + alias: ..., + id_column: ..., // Sometimes computed, sometimes passed + property_mappings: ..., // Different logic in each location + filter_expression: ..., // Sometimes None, sometimes complex + union_sources: ..., // Multi-table handling scattered +} +``` + +**Proposed**: Builder that uses PatternSchemaContext: + +```rust +// New: src/query_planner/logical_plan/match_clause/view_scan_builder.rs + +pub struct ViewScanBuilder<'a> { + schema_ctx: &'a PatternSchemaContext, + graph_schema: &'a GraphSchema, + alias: String, +} + +impl<'a> ViewScanBuilder<'a> { + /// Create ViewScan directly from PatternSchemaContext + pub fn for_node(schema_ctx: &'a PatternSchemaContext, position: NodePosition) -> Self { ... } + pub fn for_edge(schema_ctx: &'a PatternSchemaContext) -> Self { ... } + + pub fn with_alias(mut self, alias: &str) -> Self { ... } + pub fn with_property_filter(mut self, filters: Vec) -> Self { ... } + + pub fn build(self) -> Result { + // Exhaustive match on schema_ctx.join_strategy - ALL cases handled + match &self.schema_ctx.join_strategy { + JoinStrategy::SingleTableScan { table, .. } => { + // Denormalized: single source, properties from edge + self.build_denormalized_scan() + } + JoinStrategy::Traditional { left_node, edge, right_node } => { + // Standard: separate node table + self.build_traditional_scan() + } + JoinStrategy::EdgeToEdge { .. } => { + // Multi-hop denormalized + self.build_edge_to_edge_scan() + } + JoinStrategy::CoupledSameRow { .. } => { + // Coupled optimization + self.build_coupled_scan() + } + } + } +} +``` + +### 2.3 Integration Points with Existing Infrastructure + +The key improvement is USING what already exists: + +```rust +// Current flow (BAD - doesn't use infrastructure): +fn traverse_connected_pattern_with_mode(...) { + // 1. Manually compute schema info (500+ lines) + let edge_pattern = classify_edge_table_pattern(...); + let is_denorm = is_node_denormalized_on_edge(...); + let edge_has_props = edge_has_node_properties(...); + // ... repeated for every decision point + + // 2. Manually build ViewScans (duplicated 8x) + let view_scan = ViewScan { ... }; +} + +// Proposed flow (GOOD - uses PatternSchemaContext): +fn traverse_connected_pattern_with_mode(...) { + // 1. Compute PatternSchemaContext ONCE + let schema_ctx = PatternSchemaContext::analyze( + &left_node, &right_node, &edge, &graph_schema + ); + + // 2. Select strategy based on traversal mode + let strategy = TraversalStrategyFactory::create(&pattern); + + // 3. Generate plan - strategy uses schema_ctx internally + let result = strategy.generate_plan(&pattern, &schema_ctx, plan_ctx)?; + + // 4. ViewScanBuilder uses schema_ctx for ALL variations + let view_scan = ViewScanBuilder::for_node(&schema_ctx, NodePosition::Left) + .with_alias(&left_alias) + .build()?; +} +``` + +--- + +## 3. Module Structure + +### Proposed Directory Layout + +``` +src/query_planner/logical_plan/ +├── mod.rs # LogicalPlan enum (unchanged) +├── match_clause/ # NEW: Module directory +│ ├── mod.rs # Main entry point (~300 lines) +│ │ └── pub fn process_match_clause(...) +│ │ +│ ├── pattern_analyzer.rs # Pattern parsing & analysis (~200 lines) +│ │ └── Extract nodes, edges, detect VLP/shortest path +│ │ +│ ├── traversal_strategy.rs # Strategy trait + implementations (~600 lines) +│ │ ├── trait TraversalStrategy +│ │ ├── RegularTraversalStrategy +│ │ ├── VLPTraversalStrategy +│ │ └── ShortestPathStrategy +│ │ +│ ├── view_scan_builder.rs # ViewScan construction (~250 lines) +│ │ └── ViewScanBuilder (uses PatternSchemaContext) +│ │ +│ ├── type_inference.rs # Relationship type inference (~200 lines) +│ │ └── Extracted from infer_relationship_type_from_nodes() +│ │ +│ └── errors.rs # Match-specific errors (~50 lines) +│ +├── return_clause.rs # (unchanged) +├── with_clause.rs # (unchanged) +└── ... +``` + +### Benefits of This Structure + +| Aspect | Before | After | +|--------|--------|-------| +| **Single file size** | 4,333 lines | ~300 lines (mod.rs) | +| **Largest function** | 1,137 lines | ~100 lines | +| **Schema lookups** | 10+ scattered | 1 (PatternSchemaContext) | +| **ViewScan construction** | 8 duplicate sites | 1 (ViewScanBuilder) | +| **Testability** | Requires full integration | Each strategy testable in isolation | +| **New schema support** | Edit 10+ places | Add case to enum + builder | + +--- + +## 4. Implementation Phases + +### Phase 1: Foundation (1-2 days) +**Goal**: Create structure without breaking existing code + +1. Create `match_clause/` module directory +2. Move `match_clause.rs` → `match_clause/legacy.rs` (temporary during refactoring) +3. Create new `match_clause/mod.rs` that re-exports legacy +4. Verify all 794 tests still pass +5. **Final step**: Rename `legacy.rs` → `traversal.rs` (implemented) + +**Risk**: Zero - purely structural change + +### Phase 2: ViewScanBuilder (2-3 days) +**Goal**: Centralize ViewScan construction + +1. Implement `ViewScanBuilder` using existing `PatternSchemaContext` +2. Find all 8 ViewScan construction sites in legacy code +3. Replace ONE SITE AT A TIME with ViewScanBuilder +4. Test after each replacement + +**Key Insight**: ViewScanBuilder can handle ALL schema variations by delegating to PatternSchemaContext: + +```rust +impl ViewScanBuilder { + fn build_from_node_access_strategy(&self) -> ViewScan { + match &self.node_access { + NodeAccessStrategy::OwnTable { table, id_column, properties } => { + // Standard node table + ViewScan::single_source(table, id_column, properties, &self.alias) + } + NodeAccessStrategy::EmbeddedInEdge { edge_alias, properties, .. } => { + // Denormalized - properties from edge table + ViewScan::embedded(edge_alias, properties, &self.alias) + } + NodeAccessStrategy::Virtual { label } => { + // Polymorphic - handled by type column + ViewScan::virtual_node(label, &self.alias) + } + } + } +} +``` + +### Phase 3: TraversalStrategy (3-4 days) +**Goal**: Separate traversal modes + +1. Extract `RegularTraversalStrategy` (simplest case) +2. Verify regular traversal tests pass +3. Extract `VLPTraversalStrategy` +4. Extract `ShortestPathStrategy` +5. Replace monster function with factory dispatch + +**Key Insight**: The three modes share setup/teardown but differ in core logic: + +``` +Regular: Node → Edge → Node (simple JOIN) +VLP: Node → CTE(recursive) → Node +ShortestPath: Node → CTE(bfs/limited) → Node +``` + +### Phase 4: Type Inference (1 day) +**Goal**: Extract `infer_relationship_type_from_nodes()` + +1. Create `type_inference.rs` module +2. Move 209-line function as-is +3. Add proper tests +4. Consider caching (many patterns repeat type inference) + +### Phase 5: Cleanup (1 day) +**Goal**: Remove legacy code, update documentation + +1. Rename `legacy.rs` → `traversal.rs` (completed - final module name) +2. Update all imports +3. Run full test suite +4. Update architecture documentation + +--- + +## 5. Design Patterns Used + +### 5.1 Strategy Pattern +**Where**: Traversal mode selection +**Why**: Different traversal modes (regular, VLP, shortest path) have same interface but completely different implementations + +```rust +trait TraversalStrategy { + fn generate_plan(...) -> Result; +} +``` + +### 5.2 Builder Pattern +**Where**: ViewScan construction +**Why**: ViewScans have many optional fields and complex validation rules + +```rust +ViewScanBuilder::for_node(&schema_ctx, NodePosition::Left) + .with_alias("u") + .with_filter(expr) + .build()? +``` + +### 5.3 Facade Pattern +**Where**: `match_clause/mod.rs` +**Why**: Present simple API while hiding complexity of strategies and builders + +```rust +pub fn process_match_clause( + pattern: &MatchPattern, + graph_schema: &GraphSchema, + plan_ctx: &mut PlanCtx, +) -> Result { + // Complex orchestration hidden behind simple function +} +``` + +### 5.4 Template Method Pattern +**Where**: Base traversal logic +**Why**: All traversal modes share common steps (extract nodes, validate, register variables) + +```rust +trait TraversalStrategy { + // Template method - common algorithm + fn execute(&self, ctx: &TraversalContext) -> Result<...> { + self.validate_pattern(ctx)?; // Common + self.prepare_variables(ctx)?; // Common + self.generate_core_plan(ctx)?; // Varies by strategy + self.finalize(ctx)?; // Common + } + + // Hook methods - overridden by strategies + fn generate_core_plan(&self, ctx: &TraversalContext) -> Result<...>; +} +``` + +--- + +## 6. Quality Metrics + +### Before Refactoring +| Metric | Value | Grade | +|--------|-------|-------| +| File size | 4,333 lines | ❌ F | +| Max function size | 1,137 lines | ❌ F | +| Cyclomatic complexity (max) | 50+ | ❌ F | +| Code duplication | 8 ViewScan sites | ❌ D | +| Schema lookups | 10+ scattered | ❌ D | +| Test coverage (unit) | Low - hard to test | ⚠️ C | + +### After Refactoring (Target) +| Metric | Value | Grade | +|--------|-------|-------| +| Module total | ~1,600 lines | ✅ B | +| Largest file | ~600 lines | ✅ A- | +| Max function size | ~100 lines | ✅ A | +| Cyclomatic complexity (max) | <10 | ✅ A | +| Code duplication | 1 ViewScan builder | ✅ A | +| Schema lookups | 1 PatternSchemaContext | ✅ A | +| Test coverage (unit) | High - strategies testable | ✅ A | + +--- + +## 7. Risk Mitigation + +### Risk 1: Breaking Existing Functionality +**Mitigation**: +- Phase 1 is purely structural (move files, no logic changes) +- Each subsequent phase replaces ONE component at a time +- Full test suite (794 tests) run after each change +- Keep legacy code until all phases complete + +### Risk 2: Edge Cases Not Covered +**Mitigation**: +- PatternSchemaContext already handles all schema variations +- Exhaustive `match` on enums catches all cases at compile time +- Unit tests for each strategy in isolation +- Integration tests for combined behavior + +### Risk 3: Performance Regression +**Mitigation**: +- PatternSchemaContext computed ONCE per pattern (vs 10+ lookups now) +- ViewScanBuilder avoids redundant property mapping computation +- Benchmark suite validates no regression + +--- + +## 8. Decision Points for Review + +**Question 1**: Should we keep legacy code as fallback? +- Option A: Delete after refactor (cleaner, but no safety net) +- Option B: Keep behind feature flag for 1 release cycle +- **Recommendation**: Option B - feature flag allows rollback + +**Question 2**: How granular should strategies be? +- Option A: 3 strategies (Regular, VLP, ShortestPath) +- Option B: 5 strategies (split VLP into bounded/unbounded, shortest into single/all) +- **Recommendation**: Option A first, split later if needed + +**Question 3**: Should ViewScanBuilder validate against schema? +- Option A: Builder validates (fail fast, clearer errors) +- Option B: Validation happens earlier in pipeline +- **Recommendation**: Option A - builder owns construction concerns + +--- + +## 9. Conclusion + +The match_clause.rs complexity is a solved problem in disguise. The `pattern_schema.rs` module already provides: +- Complete schema variation classification (NodeAccessStrategy, EdgeAccessStrategy) +- JOIN strategy selection (JoinStrategy enum) +- Property mapping resolution + +The refactoring doesn't require inventing new abstractions - it requires **using the existing ones**. The Strategy and Builder patterns provide clean integration points. + +**Estimated Total Effort**: 8-11 days +**Expected Grade Improvement**: C+ → A- + +--- + +## 10. Next Steps + +1. **Review this proposal** - Any concerns with the approach? +2. **Prioritize phases** - All phases? Subset for immediate wins? +3. **Create branch** - `refactor/match-clause-architecture` +4. **Begin Phase 1** - Structural changes, zero risk + +--- + +## Appendix A: Code Mapping + +Current locations → Proposed locations: + +| Current (match_clause.rs) | Lines | Proposed Location | +|--------------------------|-------|-------------------| +| `traverse_connected_pattern_with_mode()` | 762-1898 | `traversal_strategy.rs` | +| VLP handling | 1200-1500 | `VLPTraversalStrategy` | +| Shortest path | 1500-1700 | `ShortestPathStrategy` | +| `try_generate_view_scan()` | 418-1052 | `view_scan_builder.rs` | +| `infer_relationship_type_from_nodes()` | 2330-2538 | `type_inference.rs` | +| Pattern extraction | 100-417 | `pattern_analyzer.rs` | +| Entry point | scattered | `mod.rs` | + +## Appendix B: PatternSchemaContext Integration Points + +Current code that should use PatternSchemaContext but doesn't: + +```rust +// Line ~450: Manual schema classification +let edge_pattern = classify_edge_table_pattern(...); +// Should be: schema_ctx.join_strategy + +// Line ~620: Manual denormalized check +let is_denorm = edge_has_node_properties(...); +// Should be: matches!(schema_ctx.left_node, NodeAccessStrategy::EmbeddedInEdge { .. }) + +// Line ~890: Manual property resolution +let props = node_schema.property_mappings.get(prop); +// Should be: schema_ctx.left_node.get_property_column(prop) +``` + diff --git a/src/query_planner/logical_plan/match_clause.rs b/src/query_planner/logical_plan/match_clause.rs deleted file mode 100644 index 61388b72..00000000 --- a/src/query_planner/logical_plan/match_clause.rs +++ /dev/null @@ -1,4338 +0,0 @@ -use std::sync::Arc; - -use crate::graph_catalog::expression_parser::PropertyValue; -use crate::{ - open_cypher_parser::ast, - query_planner::{ - logical_expr::{ - LogicalExpr, Operator, OperatorApplication, Property, PropertyAccess, TableAlias, - }, - logical_plan::{ - errors::LogicalPlanError, - plan_builder::LogicalPlanResult, - { - CartesianProduct, GraphNode, GraphRel, LogicalPlan, ShortestPathMode, Union, - VariableLengthSpec, - }, - }, - plan_ctx::{PlanCtx, TableCtx}, - }, -}; - -use super::{generate_id, ViewScan}; -use crate::graph_catalog::graph_schema::GraphSchema; -use std::collections::HashMap; - -/// Maximum number of inferred types allowed before requiring explicit specification. -/// This prevents accidentally generating huge UNION queries from ambiguous patterns. -/// For example, `()-[r]->()` on a schema with 50 relationship types would need 50 UNION branches. -const MAX_INFERRED_TYPES: usize = 4; - -/// Infer node label for standalone nodes when label is not specified. -/// -/// Handles single-schema inference: If schema has only one node type, use it. -/// - Query: `MATCH (n) RETURN n` -/// - Schema: Only one node type defined (e.g., User) -/// - Result: n inferred as :User -/// -/// Returns: -/// - `Ok(Some(label))` - Successfully inferred label -/// - `Ok(None)` - Cannot infer (multiple node types or no nodes in schema) -/// - `Err(TooManyInferredTypes)` - Too many matches, user must specify explicit type -fn infer_node_label_from_schema( - schema: &GraphSchema, - plan_ctx: &PlanCtx, -) -> LogicalPlanResult> { - let node_schemas = schema.all_node_schemas(); - - // Case 1: Single node type in schema - use it - if node_schemas.len() == 1 { - let node_type = node_schemas - .keys() - .next() - .ok_or_else(|| { - LogicalPlanError::QueryPlanningError( - "Schema has exactly 1 node type but keys().next() returned None".to_string(), - ) - })? - .clone(); - log::info!( - "Node inference: Schema has only one node type '{}', using it", - node_type - ); - return Ok(Some(node_type)); - } - - // Case 2: No nodes in schema - if node_schemas.is_empty() { - log::debug!("Node inference: Schema has no node types defined, cannot infer"); - return Ok(None); - } - - // Case 3: Multiple node types - check if within limit for UNION generation - let node_count = node_schemas.len(); - if node_count <= plan_ctx.max_inferred_types { - // Could potentially generate UNION of all types, but for now just log info - log::info!( - "Node inference: Schema has {} node types ({:?}), would need UNION for all", - node_count, - node_schemas.keys().collect::>() - ); - // For now, don't auto-generate UNION - require explicit label - return Ok(None); - } - - // Case 4: Too many node types - let types_preview: Vec<_> = node_schemas.keys().take(5).cloned().collect(); - let types_str = if node_count > 5 { - format!("{}, ...", types_preview.join(", ")) - } else { - node_schemas.keys().cloned().collect::>().join(", ") - }; - - log::info!( - "Node inference: Schema has {} node types [{}], too many for auto-inference", - node_count, - types_str - ); - - // Don't error - just return None to indicate no inference possible - // User should specify an explicit label - Ok(None) -} - -/// Infer node labels from relationship schema when nodes are unlabeled. -/// -/// For example: -/// - Query: `()-[r:FLIGHT]->()` -/// - Schema: FLIGHT has from_node=Airport, to_node=Airport -/// - Result: Both nodes inferred as Airport -/// -/// REMOVED: Old parsing-time label inference (now handled by TypeInference analyzer pass) -/// The TypeInference pass runs after parsing and provides more robust inference -/// that works across WITH boundaries and handles both node labels and edge types. -/// -/// Infer relationship type from typed node labels when edge is untyped. -/// -/// Handles two cases: -/// 1. **Single-schema inference**: If schema has only one relationship, use it -/// - Query: `()-[r]->()` → infer r:ONLY_REL if only one relationship in schema -/// -/// 2. **Node-type inference**: If nodes are typed, find relationships that match -/// - Query: `(a:Airport)-[r]->()` → infer r:FLIGHT if FLIGHT is the only edge with from_node=Airport -/// - Query: `()-[r]->(a:Airport)` → infer r:FLIGHT if FLIGHT is the only edge with to_node=Airport -/// - Query: `(a:User)-[r]->(b:Post)` → infer r:LIKES if LIKES is the only User→Post edge -/// -/// Returns: -/// - `Ok(Some(types))` - Successfully inferred relationship types -/// - `Ok(None)` - Cannot infer (both nodes untyped with multi-schema, or no matches) -/// - `Err(TooManyInferredTypes)` - Too many matches, user must specify explicit type -fn infer_relationship_type_from_nodes( - start_label: &Option, - end_label: &Option, - direction: &ast::Direction, - schema: &GraphSchema, - plan_ctx: &PlanCtx, -) -> LogicalPlanResult>> { - let rel_schemas = schema.get_relationships_schemas(); - - // Case 1: Single relationship in schema - use it regardless of node types - if rel_schemas.len() == 1 { - let rel_type = rel_schemas - .keys() - .next() - .ok_or_else(|| { - LogicalPlanError::QueryPlanningError( - "Schema has exactly 1 relationship type but keys().next() returned None" - .to_string(), - ) - })? - .clone(); - log::info!( - "Relationship inference: Schema has only one relationship type '{}', using it", - rel_type - ); - return Ok(Some(vec![rel_type])); - } - - // Case 2: At least one node is typed - filter relationships by node type compatibility - if start_label.is_none() && end_label.is_none() { - log::debug!("Relationship inference: Both nodes untyped and schema has {} relationships, cannot infer", - rel_schemas.len()); - return Ok(None); - } - - // Find relationships that match the typed node(s) - let matching_types: Vec = rel_schemas - .iter() - .filter(|(_, rel_schema)| { - // Check compatibility based on direction - match direction { - ast::Direction::Outgoing => { - // start→end: from_node=start, to_node=end - let from_ok = start_label - .as_ref() - .map(|l| { - // Check both from_node and from_label_values for polymorphic support - if l == &rel_schema.from_node { - return true; - } - if let Some(values) = &rel_schema.from_label_values { - return values.contains(l); - } - false - }) - .unwrap_or(true); - let to_ok = end_label - .as_ref() - .map(|l| { - if l == &rel_schema.to_node { - return true; - } - if let Some(values) = &rel_schema.to_label_values { - return values.contains(l); - } - false - }) - .unwrap_or(true); - from_ok && to_ok - } - ast::Direction::Incoming => { - // start←end: from_node=end, to_node=start - let from_ok = end_label - .as_ref() - .map(|l| { - if l == &rel_schema.from_node { - return true; - } - if let Some(values) = &rel_schema.from_label_values { - return values.contains(l); - } - false - }) - .unwrap_or(true); - let to_ok = start_label - .as_ref() - .map(|l| { - if l == &rel_schema.to_node { - return true; - } - if let Some(values) = &rel_schema.to_label_values { - return values.contains(l); - } - false - }) - .unwrap_or(true); - from_ok && to_ok - } - ast::Direction::Either => { - // Could match in either direction - let outgoing_ok = { - let from_ok = start_label - .as_ref() - .map(|l| { - l == &rel_schema.from_node - || rel_schema - .from_label_values - .as_ref() - .map(|v| v.contains(l)) - .unwrap_or(false) - }) - .unwrap_or(true); - let to_ok = end_label - .as_ref() - .map(|l| { - l == &rel_schema.to_node - || rel_schema - .to_label_values - .as_ref() - .map(|v| v.contains(l)) - .unwrap_or(false) - }) - .unwrap_or(true); - from_ok && to_ok - }; - let incoming_ok = { - let from_ok = end_label - .as_ref() - .map(|l| { - l == &rel_schema.from_node - || rel_schema - .from_label_values - .as_ref() - .map(|v| v.contains(l)) - .unwrap_or(false) - }) - .unwrap_or(true); - let to_ok = start_label - .as_ref() - .map(|l| { - l == &rel_schema.to_node - || rel_schema - .to_label_values - .as_ref() - .map(|v| v.contains(l)) - .unwrap_or(false) - }) - .unwrap_or(true); - from_ok && to_ok - }; - outgoing_ok || incoming_ok - } - } - }) - .map(|(type_name, _)| type_name.clone()) - .collect(); - - if matching_types.is_empty() { - log::warn!( - "Relationship inference: No relationships match {:?}->{:?}", - start_label, - end_label - ); - return Ok(None); - } - - // Check if too many types would result in excessive UNION branches - if matching_types.len() > plan_ctx.max_inferred_types { - let types_preview: Vec<_> = matching_types.iter().take(5).cloned().collect(); - let types_str = if matching_types.len() > 5 { - format!("{}, ...", types_preview.join(", ")) - } else { - matching_types.join(", ") - }; - - log::error!( - "Relationship inference: Too many matching types ({}) for {:?}->{:?}: [{}]. Max allowed is {}.", - matching_types.len(), start_label, end_label, types_str, plan_ctx.max_inferred_types - ); - - return Err(LogicalPlanError::TooManyInferredTypes { - count: matching_types.len(), - max: plan_ctx.max_inferred_types, - types: types_str, - }); - } - - if matching_types.len() == 1 { - log::info!( - "Relationship inference: Inferred relationship type '{}' from node types {:?}->{:?}", - matching_types[0], - start_label, - end_label - ); - } else { - log::info!( - "Relationship inference: Multiple matching types {:?} for {:?}->{:?}, will expand to UNION", - matching_types, start_label, end_label - ); - } - - Ok(Some(matching_types)) -} - -/// Generate a scan operation for a node pattern -/// -/// This function creates a ViewScan using schema information from plan_ctx. -/// If the schema lookup fails, it returns an error since node labels should be validated -/// against the schema. -fn generate_scan( - alias: String, - label: Option, - plan_ctx: &PlanCtx, -) -> LogicalPlanResult> { - log::debug!( - "generate_scan called with alias='{}', label={:?}", - alias, - label - ); - - if let Some(label_str) = &label { - // Handle $any wildcard for polymorphic edges - if label_str == "$any" { - log::debug!("Label is $any (polymorphic wildcard), creating Empty plan"); - return Ok(Arc::new(LogicalPlan::Empty)); - } - - log::debug!("Trying to create ViewScan for label '{}'", label_str); - match try_generate_view_scan(&alias, &label_str, plan_ctx)? { - Some(view_scan) => { - log::info!("✓ Successfully created ViewScan for label '{}'", label_str); - Ok(view_scan) - } - None => { - // ViewScan creation failed - this is an error (schema not found) - Err(LogicalPlanError::NodeNotFound(label_str.to_string())) - } - } - } else { - log::debug!("No label provided - anonymous node, using Empty plan"); - // For anonymous nodes, use Empty plan - // The node label will be inferred from relationship context during analysis - Ok(Arc::new(LogicalPlan::Empty)) - } -} - -/// Helper function to check if a plan contains a denormalized ViewScan -fn is_denormalized_scan(plan: &Arc) -> bool { - let result = match plan.as_ref() { - LogicalPlan::ViewScan(view_scan) => { - crate::debug_print!( - "is_denormalized_scan: ViewScan.is_denormalized = {} for table '{}'", - view_scan.is_denormalized, - view_scan.source_table - ); - view_scan.is_denormalized - } - _ => { - crate::debug_print!("is_denormalized_scan: Not a ViewScan, returning false"); - false - } - }; - crate::debug_print!("is_denormalized_scan: returning {}", result); - result -} - -/// Check if a node label is denormalized by looking up the schema -/// Returns true if the node is denormalized (exists only in edge context) -fn is_label_denormalized(label: &Option, plan_ctx: &PlanCtx) -> bool { - if let Some(label_str) = label { - let schema = plan_ctx.schema(); - if let Ok(node_schema) = schema.node_schema(label_str) { - crate::debug_print!( - "is_label_denormalized: label '{}' is_denormalized = {}", - label_str, - node_schema.is_denormalized - ); - return node_schema.is_denormalized; - } - } - crate::debug_print!( - "is_label_denormalized: label {:?} not found or no label, returning false", - label - ); - false -} - -/// Try to generate a ViewScan for a node by looking up the label in the schema from plan_ctx -/// Returns None if schema is not available or label not found. -pub fn try_generate_view_scan( - _alias: &str, - label: &str, - plan_ctx: &PlanCtx, -) -> Result>, LogicalPlanError> { - log::debug!("try_generate_view_scan: label='{}'", label); - - // Use plan_ctx.schema() instead of GLOBAL_SCHEMAS - let schema = plan_ctx.schema(); - - // Look up the node schema for this label - let node_schema = match schema.node_schema(label) { - Ok(s) => s, - Err(e) => { - log::warn!("Could not find node schema for label '{}': {:?}", label, e); - return Ok(None); - } - }; - - // DENORMALIZED NODE-ONLY QUERIES: - // For denormalized nodes (virtual nodes that exist as columns on edge tables), - // we need to generate queries from the edge table itself. - // - // For nodes that appear in MULTIPLE edge tables (like IP in dns_log and conn_log), - // we create a UNION ALL of all possible sources. - // - // For each relationship where this node appears: - // - If node is FROM → ViewScan with from_node_properties from that edge table - // - If node is TO → ViewScan with to_node_properties from that edge table - if node_schema.is_denormalized { - log::info!( - "✓ Denormalized node-only query for label '{}' - checking all tables", - label - ); - - // Check if this node appears in multiple relationships/tables - if let Some(metadata) = schema.get_denormalized_node_metadata(label) { - let rel_types = metadata.get_relationship_types(); - - if rel_types.len() > 1 || metadata.id_sources.values().any(|v| v.len() > 1) { - // MULTI-TABLE CASE: Node appears in multiple tables/positions - log::info!( - "✓ Denormalized node '{}' appears in {} relationship type(s) - creating multi-table UNION", - label, rel_types.len() - ); - - let mut union_inputs: Vec> = Vec::new(); - - for rel_type in &rel_types { - if let Ok(rel_schema) = schema.get_rel_schema(rel_type) { - let full_table_name = rel_schema.full_table_name(); - - // Check if this node is in FROM position - if rel_schema.from_node == label { - if let Some(ref from_props) = rel_schema.from_node_properties { - log::debug!( - "✓ Adding FROM branch for '{}' from table '{}' (rel: {})", - label, - full_table_name, - rel_type - ); - log::info!( - "DEBUG: Adding FROM branch. union_inputs before push: len={}", - union_inputs.len() - ); - - // 🔧 FIX: Populate property_mapping from from_props so full node expansion works - let property_mapping: HashMap = from_props.iter() - .map(|(k, v)| (k.clone(), crate::graph_catalog::expression_parser::PropertyValue::Column(v.clone()))) - .collect(); - - // 🔧 FIX: Get the actual ID column name from node_id property - let id_prop_name = node_schema - .node_id - .columns() - .first() - .map(|s| s.to_string()) - .unwrap_or_else(|| "id".to_string()); - let id_column = from_props - .get(&id_prop_name) - .cloned() - .unwrap_or_else(|| id_prop_name.clone()); - - log::info!( - "✓ FROM branch for '{}': id_prop='{}', id_column='{}', {} properties", - label, id_prop_name, id_column, property_mapping.len() - ); - - let mut from_scan = ViewScan::new( - full_table_name.clone(), - None, - property_mapping.clone(), // Use actual property mappings - id_column, // Use actual column name - vec![], - vec![], - ); - from_scan.is_denormalized = true; - from_scan.from_node_properties = Some(property_mapping); - log::info!( - "DEBUG: FROM ViewScan properties: from={:?}, to={:?}", - from_scan - .from_node_properties - .as_ref() - .map(|p| p.keys().collect::>()), - from_scan - .to_node_properties - .as_ref() - .map(|p| p.keys().collect::>()) - ); - union_inputs - .push(Arc::new(LogicalPlan::ViewScan(Arc::new(from_scan)))); - log::info!( - "DEBUG: Added FROM branch. union_inputs after push: len={}", - union_inputs.len() - ); - } - } - - // Check if this node is in TO position - if rel_schema.to_node == label { - log::info!("DEBUG: Checking TO position. to_node='{}', label='{}', has to_node_properties: {}", rel_schema.to_node, label, rel_schema.to_node_properties.is_some()); - if let Some(ref to_props) = rel_schema.to_node_properties { - log::info!( - "DEBUG: TO props: {:?}", - to_props.keys().collect::>() - ); - log::debug!( - "✓ Adding TO branch for '{}' from table '{}' (rel: {})", - label, - full_table_name, - rel_type - ); - log::info!( - "DEBUG: Adding TO branch. union_inputs before push: len={}", - union_inputs.len() - ); - - // 🔧 FIX: Populate property_mapping from to_props so full node expansion works - let property_mapping: HashMap = to_props.iter() - .map(|(k, v)| (k.clone(), crate::graph_catalog::expression_parser::PropertyValue::Column(v.clone()))) - .collect(); - - // 🔧 FIX: Get the actual ID column name from node_id property - let id_prop_name = node_schema - .node_id - .columns() - .first() - .map(|s| s.to_string()) - .unwrap_or_else(|| "id".to_string()); - let id_column = to_props - .get(&id_prop_name) - .cloned() - .unwrap_or_else(|| id_prop_name.clone()); - - log::info!( - "✓ TO branch for '{}': id_prop='{}', id_column='{}', {} properties", - label, id_prop_name, id_column, property_mapping.len() - ); - - let mut to_scan = ViewScan::new( - full_table_name.clone(), - None, - property_mapping.clone(), // Use actual property mappings - id_column, // Use actual column name - vec![], - vec![], - ); - to_scan.is_denormalized = true; - to_scan.to_node_properties = Some(property_mapping); - log::info!( - "DEBUG: TO ViewScan properties: from={:?}, to={:?}", - to_scan - .from_node_properties - .as_ref() - .map(|p| p.keys().collect::>()), - to_scan - .to_node_properties - .as_ref() - .map(|p| p.keys().collect::>()) - ); - union_inputs - .push(Arc::new(LogicalPlan::ViewScan(Arc::new(to_scan)))); - log::info!( - "DEBUG: Added TO branch. union_inputs after push: len={}", - union_inputs.len() - ); - } - } - } - } - - if union_inputs.is_empty() { - log::error!("No ViewScans generated for denormalized node '{}'", label); - return Ok(None); - } - - if union_inputs.len() == 1 { - log::info!( - "✓ Single ViewScan for denormalized node '{}' (only one source)", - label - ); - // Safe: we just checked that union_inputs.len() == 1, so pop() must return Some. - let plan = union_inputs - .pop() - .expect("union_inputs.pop() must return Some when len() == 1"); - return Ok(Some(plan)); - } - - use crate::query_planner::logical_plan::{Union, UnionType}; - let union = Union { - inputs: union_inputs, - union_type: UnionType::All, - }; - - log::info!( - "✓ Created UNION ALL with {} branches for denormalized node '{}'", - union.inputs.len(), - label - ); - return Ok(Some(Arc::new(LogicalPlan::Union(union)))); - } - } - - // SINGLE-TABLE CASE: Fall through to existing logic - let has_from_props = node_schema.from_properties.is_some(); - let has_to_props = node_schema.to_properties.is_some(); - let source_table = node_schema - .denormalized_source_table - .as_ref() - .ok_or_else(|| { - log::error!("Denormalized node '{}' missing source table", label); - LogicalPlanError::InvalidSchema { - label: label.to_string(), - reason: "Denormalized node missing source table".to_string(), - } - })?; - - log::debug!( - "Denormalized node '{}': has_from_props={}, has_to_props={}, source_table={}", - label, - has_from_props, - has_to_props, - source_table - ); - - // source_table is already fully qualified (database.table) from config.rs - let full_table_name = source_table.clone(); - - // Case 3: BOTH from and to properties → UNION ALL of two ViewScans - if has_from_props && has_to_props { - log::info!( - "✓✓✓ SINGLE-TABLE CASE: Denormalized node '{}' has BOTH positions - creating UNION ALL ✓✓✓", - label - ); - - // Create FROM position ViewScan - let mut from_scan = ViewScan::new( - full_table_name.clone(), - None, - HashMap::new(), - String::new(), - vec![], - vec![], - ); - from_scan.is_denormalized = true; - from_scan.from_node_properties = node_schema.from_properties.as_ref().map(|props| { - props - .iter() - .map(|(k, v)| { - ( - k.clone(), - crate::graph_catalog::expression_parser::PropertyValue::Column( - v.clone(), - ), - ) - }) - .collect() - }); - from_scan.schema_filter = node_schema.filter.clone(); - // Note: to_node_properties is None - this is the FROM branch - - // Create TO position ViewScan - let mut to_scan = ViewScan::new( - full_table_name, - None, - HashMap::new(), - String::new(), - vec![], - vec![], - ); - to_scan.is_denormalized = true; - to_scan.to_node_properties = node_schema.to_properties.as_ref().map(|props| { - props - .iter() - .map(|(k, v)| { - ( - k.clone(), - crate::graph_catalog::expression_parser::PropertyValue::Column( - v.clone(), - ), - ) - }) - .collect() - }); - to_scan.schema_filter = node_schema.filter.clone(); - // Note: from_node_properties is None - this is the TO branch - - // Create Union of the two ViewScans - use crate::query_planner::logical_plan::{Union, UnionType}; - let union = Union { - inputs: vec![ - Arc::new(LogicalPlan::ViewScan(Arc::new(from_scan))), - Arc::new(LogicalPlan::ViewScan(Arc::new(to_scan))), - ], - union_type: UnionType::All, - }; - - log::info!( - ">>>SINGLE-TABLE CASE: Created UNION with 2 branches for '{}' <<<", - label - ); - return Ok(Some(Arc::new(LogicalPlan::Union(union)))); - } - - // Case 1 or 2: Only one position - single ViewScan - let mut view_scan = ViewScan::new( - full_table_name, - None, - HashMap::new(), - String::new(), - vec![], - vec![], - ); - - view_scan.is_denormalized = true; - view_scan.from_node_properties = node_schema.from_properties.as_ref().map(|props| { - props - .iter() - .map(|(k, v)| { - ( - k.clone(), - crate::graph_catalog::expression_parser::PropertyValue::Column(v.clone()), - ) - }) - .collect() - }); - view_scan.to_node_properties = node_schema.to_properties.as_ref().map(|props| { - props - .iter() - .map(|(k, v)| { - ( - k.clone(), - crate::graph_catalog::expression_parser::PropertyValue::Column(v.clone()), - ) - }) - .collect() - }); - view_scan.schema_filter = node_schema.filter.clone(); - - log::info!( - "✓ Created denormalized ViewScan for '{}' (single position)", - label - ); - - return Ok(Some(Arc::new(LogicalPlan::ViewScan(Arc::new(view_scan))))); - } - - // MULTI_TABLE_LABEL CHECK: Non-denormalized nodes with same label in multiple tables - // This happens when the config has multiple node definitions with the same label but different tables - let all_schemas_for_label = schema.get_all_node_schemas_for_label(label); - if all_schemas_for_label.len() > 1 { - log::info!( - "✓ MULTI_TABLE_LABEL: Found '{}' in {} different tables - creating UNION ALL", - label, - all_schemas_for_label.len() - ); - - let mut union_inputs: Vec> = Vec::new(); - - for (_composite_key, other_schema) in all_schemas_for_label { - let full_table_name = format!("{}.{}", other_schema.database, other_schema.table_name); - let id_column = other_schema - .node_id - .columns() - .first() - .map(|s| s.to_string()) - .unwrap_or_else(|| "id".to_string()); - - let mut view_scan = ViewScan::new( - full_table_name, - None, - other_schema.property_mappings.clone(), - id_column, - vec![], - vec![], - ); - - view_scan.schema_filter = other_schema.filter.clone(); - log::debug!( - "Added ViewScan for '{}' from table '{}.{}'", - label, - other_schema.database, - other_schema.table_name - ); - - union_inputs.push(Arc::new(LogicalPlan::ViewScan(Arc::new(view_scan)))); - } - - if union_inputs.len() > 1 { - use crate::query_planner::logical_plan::{Union, UnionType}; - let union = Union { - inputs: union_inputs, - union_type: UnionType::All, - }; - - log::info!( - "✓ Created MULTI_TABLE_LABEL UNION with {} branches for '{}'", - union.inputs.len(), - label - ); - return Ok(Some(Arc::new(LogicalPlan::Union(union)))); - } - } - - // SINGLE-TABLE CASE OR NON-DENORMALIZED: Use standard ViewScan logic - log::info!( - "✓ ViewScan: Resolved label '{}' to table '{}'", - label, - node_schema.table_name - ); - - // Use property mapping from schema directly (already PropertyValue) - // 🔧 FIX: For denormalized nodes, property_mappings is often empty because properties - // are stored in from_properties/to_properties. Merge them into property_mapping - // so that full node expansion (RETURN n) works correctly for MULTI_TABLE_LABEL schemas. - let mut property_mapping = node_schema.property_mappings.clone(); - - if node_schema.is_denormalized && property_mapping.is_empty() { - // Merge from_properties and to_properties into property_mapping - // This enables full node expansion to find the actual column names - if let Some(ref from_props) = node_schema.from_properties { - for (prop_name, col_name) in from_props.iter() { - property_mapping.insert( - prop_name.clone(), - crate::graph_catalog::expression_parser::PropertyValue::Column( - col_name.clone(), - ), - ); - } - } - if let Some(ref to_props) = node_schema.to_properties { - for (prop_name, col_name) in to_props.iter() { - // Only add if not already present (from_properties takes precedence) - property_mapping - .entry(prop_name.clone()) - .or_insert_with(|| { - crate::graph_catalog::expression_parser::PropertyValue::Column( - col_name.clone(), - ) - }); - } - } - - if !property_mapping.is_empty() { - log::info!( - "✓ Populated property_mapping for denormalized node '{}' with {} properties: {:?}", - label, - property_mapping.len(), - property_mapping.keys().collect::>() - ); - } - } - - // Create fully qualified table name (database.table) - let full_table_name = format!("{}.{}", node_schema.database, node_schema.table_name); - log::debug!("Using fully qualified table name: {}", full_table_name); - - // Get view parameter names from schema (if this is a parameterized view) - let view_parameter_names = node_schema.view_parameters.clone(); - - // Get view parameter values from PlanCtx (if provided) - let view_parameter_values = plan_ctx.view_parameter_values().cloned(); - - // Log parameter info - if let Some(ref param_names) = view_parameter_names { - log::debug!( - "ViewScan: Table '{}' expects parameters: {:?}", - node_schema.table_name, - param_names - ); - if let Some(ref param_values) = view_parameter_values { - log::debug!("ViewScan: Will use parameter values: {:?}", param_values); - } else { - log::warn!( - "ViewScan: Table '{}' is parameterized but no values provided!", - node_schema.table_name - ); - } - } - - // Create ViewScan with the actual table name from schema - // 🔧 FIX: For denormalized nodes, node_id refers to the property name (e.g., "ip"), - // but we need the actual column name (e.g., "id.orig_h") for SQL generation. - // Look it up from from_properties/to_properties for denormalized schemas. - let id_column = if node_schema.is_denormalized { - // Get the node_id property name first - let id_prop_name = node_schema - .node_id - .columns() - .first() - .map(|s| s.to_string()) - .unwrap_or_else(|| "id".to_string()); - - // Look up the actual column name from from_properties or to_properties - let actual_column = node_schema.from_properties - .as_ref() - .and_then(|props| props.get(&id_prop_name)) - .or_else(|| { - node_schema.to_properties - .as_ref() - .and_then(|props| props.get(&id_prop_name)) - }) - .cloned() - .unwrap_or_else(|| { - log::warn!( - "Denormalized node '{}' ID property '{}' not found in from/to_properties, using as-is", - label, - id_prop_name - ); - id_prop_name.clone() - }); - - log::info!( - "✓ Resolved denormalized node '{}' ID column: '{}' (property) → '{}' (column)", - label, - id_prop_name, - actual_column - ); - actual_column - } else { - // For non-denormalized nodes, node_id IS the actual column name - node_schema - .node_id - .columns() - .first() - .map(|s| s.to_string()) - .ok_or_else(|| { - log::error!("Node schema for '{}' has no ID columns defined", label); - // Don't hardcode "id" - this causes bugs with auto_discover_columns - // where the actual column might be user_id, object_id, etc. - // This should never happen in valid schemas. - LogicalPlanError::InvalidSchema { - label: label.to_string(), - reason: "No ID columns defined in node schema".to_string(), - } - })? - }; - - let mut view_scan = ViewScan::new( - full_table_name, // Use fully qualified table name (database.table) - None, // No filter condition yet - property_mapping, // Property mappings from schema - id_column, // ID column from schema (first for composite) - vec!["id".to_string()], // Basic output schema - vec![], // No projections yet - ); - - // Set view parameters if this is a parameterized view - view_scan.view_parameter_names = view_parameter_names.clone(); - view_scan.view_parameter_values = view_parameter_values.clone(); - log::debug!( - "ViewScan created for '{}': param_names={:?}, param_values={:?}", - label, - view_parameter_names, - view_parameter_values - ); - - // Set denormalized flag and properties from schema - view_scan.is_denormalized = node_schema.is_denormalized; - - // Populate denormalized node properties (for role-based mapping) - if node_schema.is_denormalized { - // Convert from HashMap to HashMap - view_scan.from_node_properties = node_schema.from_properties.as_ref().map(|props| { - props - .iter() - .map(|(k, v)| { - ( - k.clone(), - crate::graph_catalog::expression_parser::PropertyValue::Column(v.clone()), - ) - }) - .collect() - }); - - view_scan.to_node_properties = node_schema.to_properties.as_ref().map(|props| { - props - .iter() - .map(|(k, v)| { - ( - k.clone(), - crate::graph_catalog::expression_parser::PropertyValue::Column(v.clone()), - ) - }) - .collect() - }); - - log::debug!( - "ViewScan: Populated denormalized properties for label '{}' - from_props={:?}, to_props={:?}", - label, - view_scan.from_node_properties.as_ref().map(|p| p.keys().collect::>()), - view_scan.to_node_properties.as_ref().map(|p| p.keys().collect::>()) - ); - } - - log::debug!( - "ViewScan: Set is_denormalized={} for node label '{}' (table: {})", - node_schema.is_denormalized, - label, - node_schema.table_name - ); - - // Set schema-level filter if defined in schema - view_scan.schema_filter = node_schema.filter.clone(); - if view_scan.schema_filter.is_some() { - log::info!( - "ViewScan: Applied schema filter for label '{}': {:?}", - label, - node_schema.filter.as_ref().map(|f| &f.raw) - ); - } - - Ok(Some(Arc::new(LogicalPlan::ViewScan(Arc::new(view_scan))))) -} - -/// Try to generate a ViewScan for a relationship by looking up the relationship type in the schema from plan_ctx -pub fn try_generate_relationship_view_scan( - _alias: &str, - rel_type: &str, - left_node_label: Option<&str>, - right_node_label: Option<&str>, - plan_ctx: &PlanCtx, -) -> Option> { - log::debug!( - "try_generate_relationship_view_scan: rel_type='{}', left_node_label={:?}, right_node_label={:?}", - rel_type, - left_node_label, - right_node_label - ); - - // Use plan_ctx.schema() instead of GLOBAL_SCHEMAS - let schema = plan_ctx.schema(); - - // Look up the relationship schema for this type, using node labels for disambiguation - let rel_schema = - match schema.get_rel_schema_with_nodes(rel_type, left_node_label, right_node_label) { - Ok(s) => s, - Err(e) => { - log::warn!( - "Could not find relationship schema for type '{}' with nodes ({:?}, {:?}): {:?}", - rel_type, - left_node_label, - right_node_label, - e - ); - return None; - } - }; - - // Log successful resolution - log::info!( - "✓ Relationship ViewScan: Resolved type '{}' to table '{}'", - rel_type, - rel_schema.table_name - ); - - // Copy property mappings from schema so relationships can be expanded in RETURN - let property_mapping = rel_schema.property_mappings.clone(); - log::debug!( - "Relationship ViewScan: property_mapping has {} entries", - property_mapping.len() - ); - - // Create fully qualified table name (database.table) - let full_table_name = format!("{}.{}", rel_schema.database, rel_schema.table_name); - log::debug!( - "Using fully qualified relationship table name: {}", - full_table_name - ); - - // Get view parameter names from schema (if this is a parameterized view) - let view_parameter_names = rel_schema.view_parameters.clone(); - - // Get view parameter values from PlanCtx (if provided) - let view_parameter_values = plan_ctx.view_parameter_values().cloned(); - - // Log parameter info - if let Some(ref param_names) = view_parameter_names { - log::debug!( - "Relationship ViewScan: Table '{}' expects parameters: {:?}", - rel_schema.table_name, - param_names - ); - if let Some(ref param_values) = view_parameter_values { - log::debug!( - "Relationship ViewScan: Will use parameter values: {:?}", - param_values - ); - } else { - log::warn!( - "Relationship ViewScan: Table '{}' is parameterized but no values provided!", - rel_schema.table_name - ); - } - } - - // Create ViewScan for relationship with from/to columns - let mut view_scan = ViewScan::new_relationship( - full_table_name, // Use fully qualified table name (database.table) - None, // No filter condition yet - property_mapping, // Empty for now - rel_schema.from_id.clone(), // Use from_id as id_column for relationships - vec!["id".to_string()], // Output schema - relationships have "id" property - vec![], // No projections yet - rel_schema.from_id.clone(), // From column from schema - rel_schema.to_id.clone(), // To column from schema - ); - - // Set view parameters if this is a parameterized view - view_scan.view_parameter_names = view_parameter_names; - view_scan.view_parameter_values = view_parameter_values; - - // Populate polymorphic edge fields from schema - // Copy label columns even if type_column is None (fixed-endpoint pattern) - view_scan.type_column = rel_schema.type_column.clone(); - view_scan.from_label_column = rel_schema.from_label_column.clone(); - view_scan.to_label_column = rel_schema.to_label_column.clone(); - - if rel_schema.type_column.is_some() - || rel_schema.from_label_column.is_some() - || rel_schema.to_label_column.is_some() - { - log::debug!( - "ViewScan: Populated polymorphic fields for rel '{}' - type_column={:?}, from_label={:?}, to_label={:?}", - rel_type, - view_scan.type_column, - view_scan.from_label_column, - view_scan.to_label_column - ); - } - - // Set denormalized node properties from schema - // Convert HashMap to HashMap - view_scan.from_node_properties = rel_schema.from_node_properties.as_ref().map(|props| { - props - .iter() - .map(|(k, v)| { - ( - k.clone(), - crate::graph_catalog::expression_parser::PropertyValue::Column(v.clone()), - ) - }) - .collect() - }); - view_scan.to_node_properties = rel_schema.to_node_properties.as_ref().map(|props| { - props - .iter() - .map(|(k, v)| { - ( - k.clone(), - crate::graph_catalog::expression_parser::PropertyValue::Column(v.clone()), - ) - }) - .collect() - }); - - if view_scan.from_node_properties.is_some() || view_scan.to_node_properties.is_some() { - log::debug!( - "ViewScan: Set denormalized node properties for rel '{}' - from_props={:?}, to_props={:?}", - rel_type, - view_scan.from_node_properties.as_ref().map(|p| p.keys().collect::>()), - view_scan.to_node_properties.as_ref().map(|p| p.keys().collect::>()) - ); - } - - // Set schema-level filter if defined in schema - view_scan.schema_filter = rel_schema.filter.clone(); - if view_scan.schema_filter.is_some() { - log::info!( - "ViewScan: Applied schema filter for relationship '{}': {:?}", - rel_type, - rel_schema.filter.as_ref().map(|f| &f.raw) - ); - } - - Some(Arc::new(LogicalPlan::ViewScan(Arc::new(view_scan)))) -} - -/// Generate a relationship center (ViewScan if possible, otherwise regular Scan) -fn generate_relationship_center( - rel_alias: &str, - rel_labels: &Option>, - _left_connection: &str, - _right_connection: &str, - left_node_label: &Option, - right_node_label: &Option, - plan_ctx: &PlanCtx, -) -> LogicalPlanResult> { - log::debug!( - "Creating relationship center for alias '{}', labels: {:?}, left_node_label: {:?}, right_node_label: {:?}", - rel_alias, - rel_labels, - left_node_label, - right_node_label - ); - // Try to generate a ViewScan for the relationship if we have a single type - if let Some(labels) = rel_labels { - log::debug!("Relationship has {} labels: {:?}", labels.len(), labels); - - // Deduplicate labels - [:FOLLOWS|FOLLOWS] should be treated as single type - let unique_labels: Vec<_> = { - let mut seen = std::collections::HashSet::new(); - labels.iter().filter(|l| seen.insert(*l)).cloned().collect() - }; - log::debug!( - "After deduplication: {} unique labels: {:?}", - unique_labels.len(), - unique_labels - ); - - if unique_labels.len() == 1 { - log::debug!( - "Trying to create Relationship ViewScan for type '{}'", - unique_labels[0] - ); - if let Some(view_scan) = try_generate_relationship_view_scan( - rel_alias, - &unique_labels[0], - left_node_label.as_ref().map(|s| s.as_str()), - right_node_label.as_ref().map(|s| s.as_str()), - plan_ctx, - ) { - log::info!( - "✓ Successfully created Relationship ViewScan for type '{}'", - unique_labels[0] - ); - return Ok(view_scan); - } else { - // ViewScan creation failed - this is an error - return Err(LogicalPlanError::RelationshipNotFound( - unique_labels[0].clone(), - )); - } - } else { - log::debug!( - "Multiple relationship types ({}), using Empty plan (CTE uses GraphRel.labels)", - unique_labels.len() - ); - // For multiple relationships, use Empty plan - // The actual UNION ALL CTE generation happens in render phase using GraphRel.labels - // No need for "rel_*" placeholder - it was never actually looked up - return Ok(Arc::new(LogicalPlan::Empty)); - } - } else { - log::debug!("No relationship labels specified, using Empty plan"); - // For relationships without labels, use Empty - // Type inference pass will fill in the relationship type - return Ok(Arc::new(LogicalPlan::Empty)); - } -} - -fn convert_properties( - props: Vec, - node_alias: &str, -) -> LogicalPlanResult> { - let mut extracted_props: Vec = vec![]; - - for prop in props { - match prop { - Property::PropertyKV(property_kvpair) => { - let op_app = LogicalExpr::OperatorApplicationExp(OperatorApplication { - operator: Operator::Equal, - operands: vec![ - LogicalExpr::PropertyAccessExp(PropertyAccess { - table_alias: TableAlias(node_alias.to_string()), - column: PropertyValue::Column(property_kvpair.key.to_string()), - }), - property_kvpair.value, - ], - }); - extracted_props.push(op_app); - } - Property::Param(_) => return Err(LogicalPlanError::FoundParamInProperties), - } - } - - Ok(extracted_props) -} - -fn convert_properties_to_operator_application(plan_ctx: &mut PlanCtx) -> LogicalPlanResult<()> { - for (alias, table_ctx) in plan_ctx.get_mut_alias_table_ctx_map().iter_mut() { - let mut extracted_props = convert_properties(table_ctx.get_and_clear_properties(), alias)?; - table_ctx.append_filters(&mut extracted_props); - } - Ok(()) -} - -// Wrapper for backwards compatibility -// Reserved for future use when non-optional traversal needs explicit mode -#[allow(dead_code)] -fn traverse_connected_pattern<'a>( - connected_patterns: &Vec>, - plan: Arc, - plan_ctx: &mut PlanCtx, - pathpattern_idx: usize, -) -> LogicalPlanResult> { - traverse_connected_pattern_with_mode( - connected_patterns, - plan, - plan_ctx, - pathpattern_idx, - None, - None, - false, - ) -} - -fn traverse_connected_pattern_with_mode<'a>( - connected_patterns: &Vec>, - mut plan: Arc, - plan_ctx: &mut PlanCtx, - pathpattern_idx: usize, - shortest_path_mode: Option, - path_variable: Option<&str>, - is_optional: bool, -) -> LogicalPlanResult> { - crate::debug_print!("\n╔════════════════════════════════════════"); - crate::debug_print!("║ traverse_connected_pattern_with_mode"); - crate::debug_print!("║ connected_patterns.len() = {}", connected_patterns.len()); - crate::debug_print!("║ Current plan type: {:?}", std::mem::discriminant(&*plan)); - crate::debug_print!("╚════════════════════════════════════════\n"); - - // === PRE-PROCESS: Assign consistent aliases to shared nodes === - // When patterns share nodes via Rc::clone() (e.g., ()-[r1]->()-[r2]->()), - // we need to ensure the shared node gets the same alias in both patterns. - // Use pointer equality to detect shared Rc instances. - use std::collections::HashMap; - - // Use usize from Rc::as_ptr() cast as the key for pointer-based identity - let mut node_alias_map: HashMap = HashMap::new(); - - for connected_pattern in connected_patterns.iter() { - // Check start_node - use address as key - let start_ptr = connected_pattern.start_node.as_ptr() as usize; - if !node_alias_map.contains_key(&start_ptr) { - let start_node_ref = connected_pattern.start_node.borrow(); - let alias = if let Some(name) = start_node_ref.name { - name.to_string() - } else { - generate_id() - }; - drop(start_node_ref); - node_alias_map.insert(start_ptr, alias); - } - - // Check end_node - use address as key - let end_ptr = connected_pattern.end_node.as_ptr() as usize; - if !node_alias_map.contains_key(&end_ptr) { - let end_node_ref = connected_pattern.end_node.borrow(); - let alias = if let Some(name) = end_node_ref.name { - name.to_string() - } else { - generate_id() - }; - drop(end_node_ref); - node_alias_map.insert(end_ptr, alias); - } - } - - crate::debug_print!( - "║ Pre-assigned {} node aliases for shared node detection", - node_alias_map.len() - ); - - for (pattern_idx, connected_pattern) in connected_patterns.iter().enumerate() { - crate::debug_print!("┌─ Processing connected_pattern #{}", pattern_idx); - - let start_node_ref = connected_pattern.start_node.borrow(); - let start_node_label_from_ast = start_node_ref.first_label().map(|val| val.to_string()); - // Use pre-assigned alias to ensure shared nodes get the same alias - let start_node_alias = node_alias_map - .get(&(connected_pattern.start_node.as_ptr() as usize)) - .cloned() - .unwrap_or_else(generate_id); - - // CRITICAL FIX: Label resolution order: - // 1. If AST has explicit label (Some(...)), use it - // 2. Else if node exists in plan_ctx with label, use that - // 3. Else None - // This fixes: MATCH (a)-[:R]->(b:B), (b)-[:S]->(c) - // where second pattern needs b's label from first pattern (AST returns None after first use) - let start_node_label = if start_node_label_from_ast.is_some() { - start_node_label_from_ast - } else if let Some(table_ctx) = plan_ctx.get_mut_table_ctx_opt(&start_node_alias) { - if let Some(label) = table_ctx.get_label_opt() { - log::info!( - ">>> Found existing '{}' in plan_ctx with label: {}", - start_node_alias, - label - ); - Some(label) - } else { - None - } - } else { - None - }; - - crate::debug_print!( - "│ Start node: alias='{}', label={:?}", - start_node_alias, - start_node_label - ); - - let start_node_props = start_node_ref - .properties - .clone() - .map(|props| { - props - .into_iter() - .map(|p| Property::try_from(p)) - .collect::, _>>() - }) - .transpose() - .map_err(|e| { - LogicalPlanError::QueryPlanningError(format!( - "Failed to convert start node property: {}", - e - )) - })? - .unwrap_or_else(Vec::new); - - // Extract end node info early - needed for filtering anonymous edge types - let end_node_ref = connected_pattern.end_node.borrow(); - // Use pre-assigned alias to ensure shared nodes get the same alias - let end_node_alias = node_alias_map - .get(&(connected_pattern.end_node.as_ptr() as usize)) - .cloned() - .unwrap_or_else(generate_id); - let end_node_label_from_ast = end_node_ref.first_label().map(|val| val.to_string()); - - // CRITICAL FIX: Same label resolution order as start_node - let end_node_label = if end_node_label_from_ast.is_some() { - end_node_label_from_ast - } else if let Some(table_ctx) = plan_ctx.get_mut_table_ctx_opt(&end_node_alias) { - if let Some(label) = table_ctx.get_label_opt() { - log::info!( - ">>> Found existing '{}' in plan_ctx with label: {}", - end_node_alias, - label - ); - Some(label) - } else { - None - } - } else { - None - }; - - let rel = &connected_pattern.relationship; - let rel_alias = if let Some(alias) = rel.name { - alias.to_string() - } else { - generate_id() - }; - - // Handle anonymous edge patterns: [] (no type specified) - // Expand relationship types using composite key index from schema - // Supports multiple relationships with same type name differentiated by from/to nodes - let rel_labels = match rel.labels.as_ref() { - Some(labels) => { - // Explicit labels provided: [:TYPE1|TYPE2] - // Look up relationship types using composite key index (O(1) lookup) - // Filters by node compatibility when node types are known - let graph_schema = plan_ctx.schema(); - let mut expanded_labels = Vec::new(); - - // Get node labels for semantic expansion - let from_label = start_node_label.as_deref(); - let to_label = end_node_label.as_deref(); - - for label in labels.iter() { - let variants = - graph_schema.expand_generic_relationship_type(label, from_label, to_label); - if variants.is_empty() { - // No expansion found, use original label (will fail later if truly missing) - expanded_labels.push(label.to_string()); - } else { - // Add all expanded variants - expanded_labels.extend(variants); - } - } - - // Deduplicate in case of overlapping expansions - let unique_labels: Vec = { - let mut seen = std::collections::HashSet::new(); - expanded_labels - .into_iter() - .filter(|l| seen.insert(l.clone())) - .collect() - }; - - Some(unique_labels) - } - None => { - // Anonymous edge pattern: [] (no type specified) - // Use smart inference to determine relationship type(s): - // 1. If schema has only one relationship, use it - // 2. If nodes are typed, find relationships that match those types - // 3. Otherwise, expand to all matching relationship types for UNION - let graph_schema = plan_ctx.schema(); - - infer_relationship_type_from_nodes( - &start_node_label, - &end_node_label, - &rel.direction, - graph_schema, - plan_ctx, - )? - } - }; - - // === LABEL INFERENCE === - // NOTE: Label and edge type inference is now handled by the TypeInference analyzer pass - // which runs after parsing. This provides more robust inference that works across - // WITH boundaries and handles both node labels AND edge types. - // The labels in start_node_label/end_node_label come from AST parsing or will be - // inferred by TypeInference pass. - - log::debug!( - "Pattern processing: start='{}' ({}), end='{}' ({})", - start_node_alias, - start_node_label - .as_ref() - .map(|s| s.as_str()) - .unwrap_or("None"), - end_node_alias, - end_node_label - .as_ref() - .map(|s| s.as_str()) - .unwrap_or("None") - ); - - // Polymorphic inference removed - TypeInference pass handles this - // (start_possible_labels and end_possible_labels were used for UNION generation) - - crate::debug_print!( - "│ Relationship: alias='{}', labels={:?}, direction={:?}", - rel_alias, - rel_labels, - rel.direction - ); - crate::debug_print!( - "│ After inference: start_label={:?}, end_label={:?}", - start_node_label, - end_node_label - ); - - log::debug!("Parsed relationship labels: {:?}", rel_labels); - let rel_properties = rel - .properties - .clone() - .map(|props| { - props - .into_iter() - .map(|p| Property::try_from(p)) - .collect::, _>>() - }) - .transpose() - .map_err(|e| { - LogicalPlanError::QueryPlanningError(format!( - "Failed to convert relationship property: {}", - e - )) - })? - .unwrap_or_else(Vec::new); - - crate::debug_print!( - "│ End node: alias='{}', label={:?}", - end_node_alias, - end_node_label - ); - - let end_node_props = end_node_ref - .properties - .clone() - .map(|props| { - props - .into_iter() - .map(|p| Property::try_from(p)) - .collect::, _>>() - }) - .transpose() - .map_err(|e| { - LogicalPlanError::QueryPlanningError(format!( - "Failed to convert end node property: {}", - e - )) - })? - .unwrap_or_else(Vec::new); - - // if start alias already present in ctx map, it means the current nested connected pattern's start node will be connecting at right side plan and end node will be at the left - if let Some(table_ctx) = plan_ctx.get_mut_table_ctx_opt(&start_node_alias) { - if start_node_label.is_some() { - table_ctx.set_labels(start_node_label.clone().map(|l| vec![l])); - } - if !start_node_props.is_empty() { - table_ctx.append_properties(start_node_props); - } - - plan_ctx.insert_table_ctx( - end_node_alias.clone(), - TableCtx::build( - end_node_alias.clone(), - end_node_label.clone().map(|l| vec![l]), - end_node_props, - false, - end_node_ref.name.is_some(), - ), - ); - - let (left_conn, right_conn) = match rel.direction { - ast::Direction::Outgoing => (start_node_alias.clone(), end_node_alias.clone()), - ast::Direction::Incoming => (end_node_alias.clone(), start_node_alias.clone()), - ast::Direction::Either => (start_node_alias.clone(), end_node_alias.clone()), - }; - - // Compute left and right node labels based on direction for relationship lookup - let (left_node_label_for_rel, right_node_label_for_rel) = match rel.direction { - ast::Direction::Outgoing => (start_node_label.clone(), end_node_label.clone()), - ast::Direction::Incoming => (end_node_label.clone(), start_node_label.clone()), - ast::Direction::Either => (start_node_label.clone(), end_node_label.clone()), - }; - - // FIX: For multi-hop patterns, use the existing plan as LEFT to create nested structure - // This ensures (a)-[r1]->(b)-[r2]->(c) becomes GraphRel { left: GraphRel(a-r1-b), center: r2, right: c } - let (left_node, right_node) = match rel.direction { - ast::Direction::Outgoing => { - // (a)-[:r1]->(b)-[:r2]->(c): existing plan (a-r1-b) on left, new node (c) on right - - // Check if end_node is denormalized - if so, don't create a separate scan - let (scan, is_denorm) = if is_label_denormalized(&end_node_label, plan_ctx) { - crate::debug_print!( - "=== End node '{}' is DENORMALIZED, creating Empty scan ===", - end_node_alias - ); - (Arc::new(LogicalPlan::Empty), true) - } else { - let scan = generate_scan( - end_node_alias.clone(), - end_node_label.clone(), - plan_ctx, - )?; - let is_d = is_denormalized_scan(&scan); - (scan, is_d) - }; - - ( - plan.clone(), - Arc::new(LogicalPlan::GraphNode(GraphNode { - input: scan, - alias: end_node_alias.clone(), - label: end_node_label.clone().map(|s| s.to_string()), - is_denormalized: is_denorm, - projected_columns: None, - })), - ) - } - ast::Direction::Incoming => { - // (c)<-[:r2]-(b)<-[:r1]-(a): new node (c) on left, existing plan (b-r1-a) on right - - // Check if end_node is denormalized - if so, don't create a separate scan - let (scan, is_denorm) = if is_label_denormalized(&end_node_label, plan_ctx) { - crate::debug_print!( - "=== End node '{}' is DENORMALIZED, creating Empty scan ===", - end_node_alias - ); - (Arc::new(LogicalPlan::Empty), true) - } else { - let scan = generate_scan( - end_node_alias.clone(), - end_node_label.clone(), - plan_ctx, - )?; - let is_d = is_denormalized_scan(&scan); - (scan, is_d) - }; - - ( - Arc::new(LogicalPlan::GraphNode(GraphNode { - input: scan, - alias: end_node_alias.clone(), - label: end_node_label.clone().map(|s| s.to_string()), - is_denormalized: is_denorm, - projected_columns: None, - })), - plan.clone(), - ) - } - ast::Direction::Either => { - // Either direction: existing plan on left, new node on right - - // Check if end_node is denormalized - if so, don't create a separate scan - let (scan, is_denorm) = if is_label_denormalized(&end_node_label, plan_ctx) { - crate::debug_print!( - "=== End node '{}' is DENORMALIZED, creating Empty scan ===", - end_node_alias - ); - (Arc::new(LogicalPlan::Empty), true) - } else { - let scan = generate_scan( - end_node_alias.clone(), - end_node_label.clone(), - plan_ctx, - )?; - let is_d = is_denormalized_scan(&scan); - (scan, is_d) - }; - - ( - plan.clone(), - Arc::new(LogicalPlan::GraphNode(GraphNode { - input: scan, - alias: end_node_alias.clone(), - label: end_node_label.clone().map(|s| s.to_string()), - is_denormalized: is_denorm, - projected_columns: None, - })), - ) - } - }; - - // Determine anchor_connection for OPTIONAL MATCH - // The anchor is whichever node was already seen in the base MATCH - let anchor_connection = if is_optional { - let alias_map = plan_ctx.get_alias_table_ctx_map(); - if alias_map.contains_key(&left_conn) && !alias_map.contains_key(&right_conn) { - // left_conn exists, right_conn is new -> left_conn is anchor - Some(left_conn.clone()) - } else if alias_map.contains_key(&right_conn) && !alias_map.contains_key(&left_conn) - { - // right_conn exists, left_conn is new -> right_conn is anchor - Some(right_conn.clone()) - } else { - // Both exist or neither exists - shouldn't happen in normal OPTIONAL MATCH - // Fall back to None - crate::debug_print!("WARN: OPTIONAL MATCH could not determine anchor: left_conn={}, right_conn={}", left_conn, right_conn); - None - } - } else { - None - }; - - // Handle variable-length patterns and multi-type relationships: - // - Single-type *1: (a)-[:TYPE*1]->(b) → simplify to regular relationship - // - Multi-type *1: (a)-[:TYPE1|TYPE2*1]->(b) → keep VLP for polymorphic nodes - // - Multi-type no VLP: (a)-[:TYPE1|TYPE2]->(b) → ADD implicit *1 for polymorphic handling - let is_multi_type = rel_labels.as_ref().map_or(false, |labels| labels.len() > 1); - - let variable_length = if let Some(vlp) = rel.variable_length.clone() { - // Has explicit VLP spec - let spec: VariableLengthSpec = vlp.into(); - let is_exact_one_hop = spec.min_hops == Some(1) && spec.max_hops == Some(1); - - if is_exact_one_hop && !is_multi_type { - log::info!("Simplifying *1 single-type pattern to regular relationship"); - None // Remove *1 for single-type - treat as regular relationship - } else { - Some(spec) // Keep VLP for multi-type or ranges - } - } else if is_multi_type { - // Multi-type without VLP: add implicit *1 for proper polymorphic handling - log::info!("Adding implicit *1 for multi-type relationship (polymorphic end node)"); - Some(VariableLengthSpec { - min_hops: Some(1), - max_hops: Some(1), - }) - } else { - None // Single-type, no VLP - }; - - let graph_rel_node = GraphRel { - left: left_node, - center: generate_relationship_center( - &rel_alias, - &rel_labels, - &left_conn, - &right_conn, - &left_node_label_for_rel, - &right_node_label_for_rel, - plan_ctx, - )?, - right: right_node, - alias: rel_alias.clone(), - direction: rel.direction.clone().into(), - left_connection: left_conn, - right_connection: right_conn, - is_rel_anchor: false, - variable_length, - shortest_path_mode: shortest_path_mode.clone(), - path_variable: path_variable.map(|s| s.to_string()), - where_predicate: None, // Will be populated by filter pushdown optimization - labels: rel_labels.clone(), - is_optional: if is_optional { Some(true) } else { None }, - anchor_connection, - cte_references: std::collections::HashMap::new(), - }; - plan_ctx.insert_table_ctx( - rel_alias.clone(), - TableCtx::build( - rel_alias.clone(), - rel_labels, - rel_properties, - true, - rel.name.is_some(), - ), - ); - - // Set connected node labels for polymorphic relationship resolution - if let Some(rel_table_ctx) = plan_ctx.get_mut_table_ctx_opt(&rel_alias) { - rel_table_ctx.set_connected_nodes( - left_node_label_for_rel.clone(), - right_node_label_for_rel.clone(), - ); - } - - // Register path variable in PlanCtx with full TypedVariable::Path metadata - if let Some(path_var) = path_variable { - // Extract length bounds from graph_rel_node.variable_length for TypedVariable::Path - let length_bounds = graph_rel_node - .variable_length - .as_ref() - .map(|vlp| (vlp.min_hops, vlp.max_hops)); - - // First register TypedVariable::Path with full metadata - plan_ctx.define_path( - path_var.to_string(), - Some(graph_rel_node.left_connection.clone()), // start_node - Some(graph_rel_node.right_connection.clone()), // end_node - Some(rel_alias.clone()), // relationship - length_bounds, // length bounds from VLP spec - shortest_path_mode.is_some(), // is_shortest_path - ); - - // Then register TableCtx for backward compatibility with code that uses alias_table_ctx_map - // insert_table_ctx will skip variable registration since we already defined it - plan_ctx.insert_table_ctx( - path_var.to_string(), - TableCtx::build( - path_var.to_string(), - None, // Path variables don't have labels - vec![], // Path variables don't have properties - false, // Not a relationship - true, // Explicitly named by user - ), - ); - - log::info!( - "📍 Registered path variable '{}' with TypedVariable::Path (start={}, end={}, bounds={:?})", - path_var, graph_rel_node.left_connection, graph_rel_node.right_connection, length_bounds - ); - } - - plan = Arc::new(LogicalPlan::GraphRel(graph_rel_node)); - - crate::debug_print!("│ ✓ Created GraphRel (start node already in context)"); - crate::debug_print!("│ Plan is now: GraphRel"); - crate::debug_print!("└─ Pattern #{} complete\n", pattern_idx); - } - // if end alias already present in ctx map, it means the current nested connected pattern's end node will be connecting at right side plan and start node will be at the left - else if let Some(table_ctx) = plan_ctx.get_mut_table_ctx_opt(&end_node_alias) { - log::info!( - ">>> Found existing TableCtx for '{}', updating with label: {:?}", - end_node_alias, - end_node_label - ); - if end_node_label.is_some() { - table_ctx.set_labels(end_node_label.clone().map(|l| vec![l])); - log::info!( - ">>> Updated '{}' with label: {}", - end_node_alias, - end_node_label - .as_ref() - .expect("end_node_label was checked to be Some") - ); - } else { - log::warn!( - ">>> end_node_label is None for '{}', cannot update TableCtx!", - end_node_alias - ); - } - if !end_node_props.is_empty() { - table_ctx.append_properties(end_node_props); - } - - let (start_scan, start_is_denorm) = - if is_label_denormalized(&start_node_label, plan_ctx) { - crate::debug_print!( - "=== Start node '{}' is DENORMALIZED, creating Empty scan ===", - start_node_alias - ); - (Arc::new(LogicalPlan::Empty), true) - } else { - let scan = generate_scan( - start_node_alias.clone(), - start_node_label.clone(), - plan_ctx, - )?; - let is_d = is_denormalized_scan(&scan); - (scan, is_d) - }; - - let start_graph_node = GraphNode { - input: start_scan, - alias: start_node_alias.clone(), - label: start_node_label.clone().map(|s| s.to_string()), - is_denormalized: start_is_denorm, - projected_columns: None, - }; - plan_ctx.insert_table_ctx( - start_node_alias.clone(), - TableCtx::build( - start_node_alias.clone(), - start_node_label.clone().map(|l| vec![l]), - start_node_props, - false, - start_node_ref.name.is_some(), - ), - ); - - // Compute left and right node labels based on direction for relationship lookup - let (left_node_label_for_rel, right_node_label_for_rel) = match rel.direction { - ast::Direction::Outgoing => (start_node_label.clone(), end_node_label.clone()), - ast::Direction::Incoming => (end_node_label.clone(), start_node_label.clone()), - ast::Direction::Either => (start_node_label.clone(), end_node_label.clone()), - }; - - let graph_rel_node = GraphRel { - left: Arc::new(LogicalPlan::GraphNode(start_graph_node)), - center: generate_relationship_center( - &rel_alias, - &rel_labels, - &start_node_alias, - &end_node_alias, - &start_node_label, - &end_node_label, - plan_ctx, - )?, - right: plan.clone(), - alias: rel_alias.clone(), - direction: rel.direction.clone().into(), - left_connection: start_node_alias.clone(), - right_connection: end_node_alias.clone(), - is_rel_anchor: false, - variable_length: { - let is_multi_type = - rel_labels.as_ref().map_or(false, |labels| labels.len() > 1); - if let Some(vlp) = rel.variable_length.clone() { - let spec: VariableLengthSpec = vlp.into(); - let is_exact_one_hop = spec.min_hops == Some(1) && spec.max_hops == Some(1); - if is_exact_one_hop && !is_multi_type { - None // *1 single-type is same as regular relationship - } else { - Some(spec) // Keep *1 for multi-type or ranges - } - } else if is_multi_type { - // Add implicit *1 for multi-type without VLP (polymorphic end node) - Some(VariableLengthSpec { - min_hops: Some(1), - max_hops: Some(1), - }) - } else { - None // Single-type, no VLP - } - }, - shortest_path_mode: shortest_path_mode.clone(), - path_variable: path_variable.map(|s| s.to_string()), - where_predicate: None, // Will be populated by filter pushdown optimization - labels: rel_labels.clone(), - is_optional: if plan_ctx.is_optional_match_mode() { - log::warn!( - "CREATING GraphRel with is_optional=Some(true), mode={}", - plan_ctx.is_optional_match_mode() - ); - Some(true) - } else { - log::warn!( - "CREATING GraphRel with is_optional=None, mode={}", - plan_ctx.is_optional_match_mode() - ); - None - }, - // For anchor traversals, the right connection (end_node) is the anchor from base MATCH - // The left connection (start_node) is newly introduced - anchor_connection: if plan_ctx.is_optional_match_mode() { - Some(end_node_alias.clone()) - } else { - None - }, - cte_references: std::collections::HashMap::new(), - }; - plan_ctx.insert_table_ctx( - rel_alias.clone(), - TableCtx::build( - rel_alias.clone(), - rel_labels, - rel_properties, - true, - rel.name.is_some(), - ), - ); - - // Set connected node labels for polymorphic relationship resolution - if let Some(rel_table_ctx) = plan_ctx.get_mut_table_ctx_opt(&rel_alias) { - rel_table_ctx.set_connected_nodes( - left_node_label_for_rel.clone(), - right_node_label_for_rel.clone(), - ); - } - - // Register path variable in PlanCtx with full TypedVariable::Path metadata - if let Some(path_var) = path_variable { - // Extract length bounds from graph_rel_node.variable_length for TypedVariable::Path - let length_bounds = graph_rel_node - .variable_length - .as_ref() - .map(|vlp| (vlp.min_hops, vlp.max_hops)); - - // First register TypedVariable::Path with full metadata - plan_ctx.define_path( - path_var.to_string(), - Some(start_node_alias.clone()), // start_node - Some(end_node_alias.clone()), // end_node - Some(rel_alias.clone()), // relationship - length_bounds, // length bounds from VLP spec - shortest_path_mode.is_some(), // is_shortest_path - ); - - // Then register TableCtx for backward compatibility with code that uses alias_table_ctx_map - plan_ctx.insert_table_ctx( - path_var.to_string(), - TableCtx::build( - path_var.to_string(), - None, // Path variables don't have labels - vec![], // Path variables don't have properties - false, // Not a relationship - true, // Explicitly named by user - ), - ); - - log::info!( - "📍 Registered path variable '{}' with TypedVariable::Path (start={}, end={}, bounds={:?})", - path_var, start_node_alias, end_node_alias, length_bounds - ); - } - - plan = Arc::new(LogicalPlan::GraphRel(graph_rel_node)); - - crate::debug_print!("│ ✓ Created GraphRel (end node already in context)"); - crate::debug_print!("│ Plan is now: GraphRel"); - crate::debug_print!("└─ Pattern #{} complete\n", pattern_idx); - } - // not connected with existing nodes - else { - // if two comma separated patterns found and they are not connected to each other i.e. there is no common node alias between them - // Allow this - it will create a CartesianProduct. - // If WHERE clause has predicates connecting them (e.g., srcip1.ip = srcip2.ip), those will be processed later - // and can be converted to proper JOINs by optimizer passes. - if pathpattern_idx > 0 { - log::info!( - "Disconnected comma pattern detected at index {}. Creating CartesianProduct. WHERE clause may contain connecting predicates.", - pathpattern_idx - ); - } - - crate::debug_print!("=== CHECKING EXISTING PLAN ==="); - crate::debug_print!( - "=== plan discriminant: {:?} ===", - std::mem::discriminant(&*plan) - ); - - // Check if we have a non-empty input plan (e.g., from WITH clause or previous MATCH) - // If so, we need to create a CartesianProduct to join the previous plan with this new pattern - let has_existing_plan = !matches!(plan.as_ref(), LogicalPlan::Empty); - - crate::debug_print!("=== has_existing_plan: {} ===", has_existing_plan); - - if has_existing_plan { - crate::debug_print!( - "=== DISCONNECTED PATTERN WITH EXISTING PLAN: Creating CartesianProduct ===" - ); - crate::debug_print!( - "=== Existing plan type: {:?} ===", - std::mem::discriminant(&*plan) - ); - } - - // we will keep start graph node at the right side and end at the left side - crate::debug_print!("=== DISCONNECTED PATTERN: About to create start_graph_node ==="); - - let (start_scan, start_is_denorm) = - if is_label_denormalized(&start_node_label, plan_ctx) { - crate::debug_print!( - "=== Start node '{}' is DENORMALIZED, creating Empty scan ===", - start_node_alias - ); - (Arc::new(LogicalPlan::Empty), true) - } else { - let scan = generate_scan( - start_node_alias.clone(), - start_node_label.clone(), - plan_ctx, - )?; - crate::debug_print!( - "=== DISCONNECTED: start_scan created, calling is_denormalized_scan ===" - ); - let is_d = is_denormalized_scan(&scan); - crate::debug_print!("=== DISCONNECTED: start_is_denorm = {} ===", is_d); - (scan, is_d) - }; - - let start_graph_node = GraphNode { - input: start_scan, - alias: start_node_alias.clone(), - label: start_node_label.clone().map(|s| s.to_string()), - is_denormalized: start_is_denorm, - projected_columns: None, - }; - crate::debug_print!( - "=== DISCONNECTED: start_graph_node created with is_denormalized={} ===", - start_graph_node.is_denormalized - ); - plan_ctx.insert_table_ctx( - start_node_alias.clone(), - TableCtx::build( - start_node_alias.clone(), - start_node_label.clone().map(|l| vec![l]), - start_node_props, - false, - start_node_ref.name.is_some(), - ), - ); - - let (end_scan, end_is_denorm) = if is_label_denormalized(&end_node_label, plan_ctx) { - crate::debug_print!( - "=== End node '{}' is DENORMALIZED, creating Empty scan ===", - end_node_alias - ); - (Arc::new(LogicalPlan::Empty), true) - } else { - let scan = generate_scan(end_node_alias.clone(), end_node_label.clone(), plan_ctx)?; - let is_d = is_denormalized_scan(&scan); - (scan, is_d) - }; - - let end_graph_node = GraphNode { - input: end_scan, - alias: end_node_alias.clone(), - label: end_node_label.clone().map(|s| s.to_string()), - is_denormalized: end_is_denorm, - projected_columns: None, - }; - plan_ctx.insert_table_ctx( - end_node_alias.clone(), - TableCtx::build( - end_node_alias.clone(), - end_node_label.clone().map(|l| vec![l]), - end_node_props, - false, - end_node_ref.name.is_some(), - ), - ); - - let (left_conn, right_conn) = match rel.direction { - ast::Direction::Outgoing => (start_node_alias.clone(), end_node_alias.clone()), - ast::Direction::Incoming => (end_node_alias.clone(), start_node_alias.clone()), - ast::Direction::Either => (start_node_alias.clone(), end_node_alias.clone()), - }; - - // Compute left and right node labels based on direction for relationship lookup - let (left_node_label_for_rel, right_node_label_for_rel) = match rel.direction { - ast::Direction::Outgoing => (start_node_label, end_node_label), - ast::Direction::Incoming => (end_node_label, start_node_label), - ast::Direction::Either => (start_node_label, end_node_label), - }; - - let (left_node, right_node) = match rel.direction { - ast::Direction::Outgoing => ( - Arc::new(LogicalPlan::GraphNode(start_graph_node)), - Arc::new(LogicalPlan::GraphNode(end_graph_node)), - ), - ast::Direction::Incoming => ( - Arc::new(LogicalPlan::GraphNode(end_graph_node)), - Arc::new(LogicalPlan::GraphNode(start_graph_node)), - ), - ast::Direction::Either => ( - Arc::new(LogicalPlan::GraphNode(start_graph_node)), - Arc::new(LogicalPlan::GraphNode(end_graph_node)), - ), - }; - - // Determine anchor_connection for OPTIONAL MATCH - // Check which connection already exists in alias_table_ctx_map - let anchor_connection = if is_optional { - let alias_map = plan_ctx.get_alias_table_ctx_map(); - if alias_map.contains_key(&left_conn) && !alias_map.contains_key(&right_conn) { - Some(left_conn.clone()) - } else if alias_map.contains_key(&right_conn) && !alias_map.contains_key(&left_conn) - { - Some(right_conn.clone()) - } else { - None - } - } else { - None - }; - - let graph_rel_node = GraphRel { - left: left_node, - center: generate_relationship_center( - &rel_alias, - &rel_labels, - &left_conn, - &right_conn, - &left_node_label_for_rel, - &right_node_label_for_rel, - plan_ctx, - )?, - right: right_node, - alias: rel_alias.clone(), - direction: rel.direction.clone().into(), - left_connection: left_conn.clone(), // Left node is the start node (left_conn for Outgoing) - right_connection: right_conn.clone(), // Right node is the end node (right_conn for Outgoing) - is_rel_anchor: false, - variable_length: { - let is_multi_type = - rel_labels.as_ref().map_or(false, |labels| labels.len() > 1); - if let Some(vlp) = rel.variable_length.clone() { - let spec: VariableLengthSpec = vlp.into(); - let is_exact_one_hop = spec.min_hops == Some(1) && spec.max_hops == Some(1); - if is_exact_one_hop && !is_multi_type { - None // *1 single-type is same as regular relationship - } else { - Some(spec) // Keep *1 for multi-type or ranges - } - } else if is_multi_type { - // Add implicit *1 for multi-type without VLP (polymorphic end node) - Some(VariableLengthSpec { - min_hops: Some(1), - max_hops: Some(1), - }) - } else { - None // Single-type, no VLP - } - }, - shortest_path_mode: shortest_path_mode.clone(), - path_variable: path_variable.map(|s| s.to_string()), - where_predicate: { - // 🔧 FIX: For VLP patterns (including shortestPath), extract filters/properties from bound nodes - // When nodes like (p1:Airport {code: 'LAX'}) are used with VLP patterns, their filters - // are in plan_ctx but not automatically merged into GraphRel.where_predicate - // This is needed for VLP CTE generation to apply correct filters with property mapping - if shortest_path_mode.is_some() || rel.variable_length.is_some() { - use crate::query_planner::logical_expr::{Operator, OperatorApplication}; - let mut node_filters = vec![]; - - // Extract filters/properties for left node - if let Some(table_ctx) = plan_ctx.get_mut_table_ctx_opt(&left_conn) { - // Get both existing filters AND unconverted properties - node_filters.extend(table_ctx.get_filters().iter().cloned()); - - // Convert any remaining properties to filters - let props = table_ctx.get_and_clear_properties(); - if !props.is_empty() { - match convert_properties(props, &left_conn) { - Ok(mut prop_filters) => { - log::info!( - "🔧 VLP: Converted {} properties to filters for left node '{}'", - prop_filters.len(), - left_conn - ); - node_filters.append(&mut prop_filters); - } - Err(e) => { - log::warn!( - "Failed to convert properties for left node '{}': {:?}", - left_conn, - e - ); - } - } - } - } - - // Extract filters/properties for right node - if let Some(table_ctx) = plan_ctx.get_mut_table_ctx_opt(&right_conn) { - // Get both existing filters AND unconverted properties - node_filters.extend(table_ctx.get_filters().iter().cloned()); - - // Convert any remaining properties to filters - let props = table_ctx.get_and_clear_properties(); - if !props.is_empty() { - match convert_properties(props, &right_conn) { - Ok(mut prop_filters) => { - log::info!( - "🔧 VLP: Converted {} properties to filters for right node '{}'", - prop_filters.len(), - right_conn - ); - node_filters.append(&mut prop_filters); - } - Err(e) => { - log::warn!( - "Failed to convert properties for right node '{}': {:?}", - right_conn, - e - ); - } - } - } - } - - // Combine all filters with AND - if !node_filters.is_empty() { - log::info!( - "🔧 VLP: Merged {} bound node filters into where_predicate for rel '{}'", - node_filters.len(), - rel_alias - ); - Some( - node_filters - .into_iter() - .reduce(|acc, filter| { - LogicalExpr::OperatorApplicationExp(OperatorApplication { - operator: Operator::And, - operands: vec![acc, filter], - }) - }) - .expect("node_filters is non-empty, reduce() must return Some"), - ) - } else { - None // No filters found - } - } else { - None // Will be populated by filter pushdown optimization for regular patterns - } - }, - labels: rel_labels.clone(), - is_optional: if is_optional { Some(true) } else { None }, - anchor_connection, - cte_references: std::collections::HashMap::new(), - }; - plan_ctx.insert_table_ctx( - rel_alias.clone(), - TableCtx::build( - rel_alias.clone(), - rel_labels, - rel_properties, - true, - rel.name.is_some(), - ), - ); - - // Set connected node labels for polymorphic relationship resolution - if let Some(rel_table_ctx) = plan_ctx.get_mut_table_ctx_opt(&rel_alias) { - rel_table_ctx.set_connected_nodes( - left_node_label_for_rel.clone(), - right_node_label_for_rel.clone(), - ); - } - - // Register path variable in PlanCtx with full TypedVariable::Path metadata - if let Some(path_var) = path_variable { - // Extract length bounds from variable_length in graph_rel_node - let length_bounds = graph_rel_node - .variable_length - .as_ref() - .map(|vlp| (vlp.min_hops, vlp.max_hops)); - - // First register TypedVariable::Path with full metadata - plan_ctx.define_path( - path_var.to_string(), - Some(left_conn.clone()), // start_node - Some(right_conn.clone()), // end_node - Some(rel_alias.clone()), // relationship - length_bounds, // length bounds from VLP spec - shortest_path_mode.is_some(), // is_shortest_path - ); - - // Then register TableCtx for backward compatibility with code that uses alias_table_ctx_map - plan_ctx.insert_table_ctx( - path_var.to_string(), - TableCtx::build( - path_var.to_string(), - None, // Path variables don't have labels - vec![], // Path variables don't have properties - false, // Not a relationship - true, // Explicitly named by user - ), - ); - - log::info!( - "📍 Registered path variable '{}' with TypedVariable::Path (start={}, end={}, bounds={:?})", - path_var, left_conn, right_conn, length_bounds - ); - } - - // Create the GraphRel for this pattern - let new_pattern = Arc::new(LogicalPlan::GraphRel(graph_rel_node)); - - // If we have an existing plan (e.g., from WITH clause), combine with CartesianProduct - if has_existing_plan { - // CRITICAL FIX: When existing plan is OPTIONAL and new pattern is REQUIRED, - // swap them so the required pattern becomes the anchor (FROM clause). - // This ensures correct SQL generation: - // OPTIONAL MATCH ... MATCH x → FROM x LEFT JOIN optional_pattern - // Instead of wrong: - // FROM optional_pattern CROSS JOIN x - let existing_is_optional = plan.is_optional_pattern(); - let (left, right, cp_is_optional) = if existing_is_optional && !is_optional { - // Swap: required pattern becomes left (anchor), optional becomes right - log::info!( - "🔄 CartesianProduct: Swapping left/right - existing plan is optional, new pattern is required" - ); - (new_pattern.clone(), plan.clone(), true) // is_optional=true means RIGHT is optional - } else { - // Normal case: existing plan is anchor - (plan.clone(), new_pattern.clone(), is_optional) - }; - - plan = Arc::new(LogicalPlan::CartesianProduct(CartesianProduct { - left, - right, - is_optional: cp_is_optional, - join_condition: None, // Will be populated by optimizer if WHERE bridges both sides - })); - crate::debug_print!( - "│ ✓ Created CartesianProduct (combining existing plan with new pattern)" - ); - crate::debug_print!( - "│ Plan is now: CartesianProduct(optional: {})", - cp_is_optional - ); - } else { - plan = new_pattern; - crate::debug_print!("│ ✓ Created GraphRel (first pattern - disconnected)"); - crate::debug_print!("│ Plan is now: GraphRel"); - } - crate::debug_print!("└─ Pattern #{} complete\n", pattern_idx); - } - } - - crate::debug_print!("╔════════════════════════════════════════"); - crate::debug_print!("║ traverse_connected_pattern_with_mode COMPLETE"); - crate::debug_print!("║ Final plan type: {:?}", std::mem::discriminant(&*plan)); - crate::debug_print!("╚════════════════════════════════════════\n"); - - Ok(plan) -} - -fn traverse_node_pattern( - node_pattern: &ast::NodePattern, - plan: Arc, - plan_ctx: &mut PlanCtx, -) -> LogicalPlanResult> { - // For now we are not supporting empty node. standalone node with name is supported. - let node_alias = node_pattern - .name - .ok_or(LogicalPlanError::EmptyNode)? - .to_string(); - let mut node_label: Option = node_pattern.first_label().map(|val| val.to_string()); - - // === SINGLE-NODE-SCHEMA INFERENCE === - // If no label provided and schema has only one node type, use it - if node_label.is_none() { - if let Ok(Some(inferred_label)) = infer_node_label_from_schema(plan_ctx.schema(), plan_ctx) - { - log::info!( - "Node '{}' label inferred as '{}' (single node type in schema)", - node_alias, - inferred_label - ); - node_label = Some(inferred_label); - } - } - - let node_props: Vec = node_pattern - .properties - .clone() - .map(|props| { - props - .into_iter() - .map(|p| Property::try_from(p).unwrap()) - .collect() - }) - .unwrap_or_default(); - - // if alias already present in ctx map then just add its conditions and do not add it in the logical plan - if let Some(table_ctx) = plan_ctx.get_mut_table_ctx_opt(&node_alias) { - if node_label.is_some() { - table_ctx.set_labels(node_label.map(|l| vec![l])); - } - if !node_props.is_empty() { - table_ctx.append_properties(node_props); - } - Ok(plan) - } else { - // plan_ctx.alias_table_ctx_map.insert(node_alias.clone(), TableCtx { label: node_label, properties: node_props, filter_predicates: vec![], projection_items: vec![], is_rel: false, use_edge_list: false, explicit_alias: node_pattern.name.is_some() }); - plan_ctx.insert_table_ctx( - node_alias.clone(), - TableCtx::build( - node_alias.clone(), - node_label.clone().map(|l| vec![l]), // Clone here so we can use it below - node_props, - false, - node_pattern.name.is_some(), - ), - ); - - let scan = generate_scan(node_alias.clone(), node_label.clone(), plan_ctx)?; - - // Check if this is a Union (denormalized node with BOTH positions) - // In that case, wrap EACH branch in its own GraphNode, then return the Union - if let LogicalPlan::Union(union) = scan.as_ref() { - log::info!( - "✓ Wrapping Union branches in GraphNodes for alias '{}'", - node_alias - ); - let wrapped_inputs: Vec> = union - .inputs - .iter() - .map(|branch| { - let is_denorm = is_denormalized_scan(branch); - Arc::new(LogicalPlan::GraphNode(GraphNode { - input: branch.clone(), - alias: node_alias.clone(), - label: node_label.clone().map(|s| s.to_string()), - is_denormalized: is_denorm, - projected_columns: None, - })) - }) - .collect(); - - let wrapped_union = Union { - inputs: wrapped_inputs, - union_type: union.union_type.clone(), - }; - log::info!( - "✓✓✓ WRAPPING UNION: {} branches being wrapped in GraphNodes ✓✓✓", - wrapped_union.inputs.len() - ); - return Ok(Arc::new(LogicalPlan::Union(wrapped_union))); - } - - // Normal case: single ViewScan wrapped in GraphNode - let is_denorm = is_denormalized_scan(&scan); - let new_node_alias = node_alias.clone(); // Clone for logging - let graph_node = GraphNode { - input: scan, - alias: node_alias, - label: node_label.map(|s| s.to_string()), - is_denormalized: is_denorm, - projected_columns: None, - }; - let new_node_plan = Arc::new(LogicalPlan::GraphNode(graph_node)); - - // Check if we need to create a CartesianProduct - // For comma patterns like (a:User), (b:User), we need CROSS JOIN - let has_existing_plan = match plan.as_ref() { - LogicalPlan::Empty => false, - _ => true, - }; - - if has_existing_plan { - // CRITICAL FIX: When existing plan is OPTIONAL and new node is from REQUIRED MATCH, - // swap them so the required node becomes the anchor (FROM clause). - let existing_is_optional = plan.is_optional_pattern(); - let (left, right, cp_is_optional) = if existing_is_optional { - // Swap: required node becomes left (anchor), optional becomes right - log::info!( - "🔄 CartesianProduct (node): Swapping - existing plan is optional, node '{}' is required", - new_node_alias - ); - (new_node_plan.clone(), plan.clone(), true) // is_optional=true means RIGHT is optional - } else { - // Normal case: existing plan is anchor - (plan.clone(), new_node_plan.clone(), false) - }; - - log::info!( - "Creating CartesianProduct for comma pattern: existing plan + node '{}'", - new_node_alias - ); - Ok(Arc::new(LogicalPlan::CartesianProduct(CartesianProduct { - left, - right, - is_optional: cp_is_optional, - join_condition: None, - }))) - } else { - Ok(new_node_plan) - } - } -} - -pub fn evaluate_match_clause<'a>( - match_clause: &ast::MatchClause<'a>, - plan: Arc, - plan_ctx: &mut PlanCtx, -) -> LogicalPlanResult> { - evaluate_match_clause_with_optional(match_clause, plan, plan_ctx, false) -} - -/// Internal function that supports optional mode -pub fn evaluate_match_clause_with_optional<'a>( - match_clause: &ast::MatchClause<'a>, - mut plan: Arc, - plan_ctx: &mut PlanCtx, - is_optional: bool, -) -> LogicalPlanResult> { - for (idx, (path_variable, path_pattern)) in match_clause.path_patterns.iter().enumerate() { - match path_pattern { - ast::PathPattern::Node(node_pattern) => { - plan = traverse_node_pattern(node_pattern, plan, plan_ctx)?; - } - ast::PathPattern::ConnectedPattern(connected_patterns) => { - plan = traverse_connected_pattern_with_mode( - connected_patterns, - plan, - plan_ctx, - idx, - None, - *path_variable, - is_optional, - )?; - } - ast::PathPattern::ShortestPath(inner_pattern) => { - // Process inner pattern with shortest path mode enabled - plan = evaluate_single_path_pattern_with_mode( - inner_pattern.as_ref(), - plan, - plan_ctx, - idx, - Some(ShortestPathMode::Shortest), - *path_variable, - )?; - } - ast::PathPattern::AllShortestPaths(inner_pattern) => { - // Process inner pattern with all shortest paths mode enabled - plan = evaluate_single_path_pattern_with_mode( - inner_pattern.as_ref(), - plan, - plan_ctx, - idx, - Some(ShortestPathMode::AllShortest), - *path_variable, - )?; - } - } - } - - convert_properties_to_operator_application(plan_ctx)?; - - // Apply WHERE clause if present (OpenCypher grammar allows WHERE per MATCH) - if let Some(where_clause) = &match_clause.where_clause { - use crate::query_planner::logical_plan::where_clause::evaluate_where_clause; - plan = evaluate_where_clause(where_clause, plan)?; - } - - Ok(plan) -} - -// Helper function to evaluate a single path pattern with shortest path mode -fn evaluate_single_path_pattern_with_mode<'a>( - path_pattern: &ast::PathPattern<'a>, - plan: Arc, - plan_ctx: &mut PlanCtx, - idx: usize, - shortest_path_mode: Option, - path_variable: Option<&str>, -) -> LogicalPlanResult> { - match path_pattern { - ast::PathPattern::Node(node_pattern) => traverse_node_pattern(node_pattern, plan, plan_ctx), - ast::PathPattern::ConnectedPattern(connected_patterns) => { - traverse_connected_pattern_with_mode( - connected_patterns, - plan, - plan_ctx, - idx, - shortest_path_mode, - path_variable, - false, - ) - } - ast::PathPattern::ShortestPath(inner) => { - // Recursively unwrap with shortest path mode - evaluate_single_path_pattern_with_mode( - inner.as_ref(), - plan, - plan_ctx, - idx, - Some(ShortestPathMode::Shortest), - path_variable, - ) - } - ast::PathPattern::AllShortestPaths(inner) => { - // Recursively unwrap with all shortest paths mode - evaluate_single_path_pattern_with_mode( - inner.as_ref(), - plan, - plan_ctx, - idx, - Some(ShortestPathMode::AllShortest), - path_variable, - ) - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::open_cypher_parser::ast; - use crate::query_planner::logical_expr::{Direction, Literal, LogicalExpr, PropertyKVPair}; - use std::cell::RefCell; - use std::rc::Rc; - - #[test] - fn test_convert_properties_with_kv_pairs() { - let properties = vec![ - Property::PropertyKV(PropertyKVPair { - key: "name".to_string(), - value: LogicalExpr::Literal(Literal::String("John".to_string())), - }), - Property::PropertyKV(PropertyKVPair { - key: "age".to_string(), - value: LogicalExpr::Literal(Literal::Integer(30)), - }), - ]; - - let result = convert_properties(properties, "n").unwrap(); - assert_eq!(result.len(), 2); - - // Check first property conversion - match &result[0] { - LogicalExpr::OperatorApplicationExp(op_app) => { - assert_eq!(op_app.operator, Operator::Equal); - assert_eq!(op_app.operands.len(), 2); - match &op_app.operands[0] { - LogicalExpr::PropertyAccessExp(prop) => { - assert_eq!(prop.table_alias.0, "n"); - match &prop.column { - PropertyValue::Column(col) => assert_eq!(col, "name"), - _ => panic!("Expected Column property"), - } - } - _ => panic!("Expected PropertyAccessExp"), - } - match &op_app.operands[1] { - LogicalExpr::Literal(Literal::String(s)) => assert_eq!(s, "John"), - _ => panic!("Expected String literal"), - } - } - _ => panic!("Expected OperatorApplication"), - } - - // Check second property conversion - match &result[1] { - LogicalExpr::OperatorApplicationExp(op_app) => { - assert_eq!(op_app.operator, Operator::Equal); - match &op_app.operands[1] { - LogicalExpr::Literal(Literal::Integer(age)) => assert_eq!(*age, 30), - _ => panic!("Expected Integer literal"), - } - } - _ => panic!("Expected OperatorApplication"), - } - } - - #[test] - fn test_convert_properties_with_param_returns_error() { - let properties = vec![ - Property::PropertyKV(PropertyKVPair { - key: "name".to_string(), - value: LogicalExpr::Literal(Literal::String("Alice".to_string())), - }), - Property::Param("param1".to_string()), - ]; - - let result = convert_properties(properties, "n"); - assert!(result.is_err()); - match result.unwrap_err() { - LogicalPlanError::FoundParamInProperties => (), // Expected error - _ => panic!("Expected FoundParamInProperties error"), - } - } - - #[test] - fn test_convert_properties_empty_list() { - let properties = vec![]; - let result = convert_properties(properties, "n").unwrap(); - assert_eq!(result.len(), 0); - } - - #[test] - fn test_generate_id_uniqueness() { - let id1 = generate_id(); - let id2 = generate_id(); - - // IDs should be unique - assert_ne!(id1, id2); - - // IDs should start with 't' (simple format: t1, t2, t3...) - assert!(id1.starts_with('t')); - assert!(id2.starts_with('t')); - - // IDs should be reasonable length (t1 to t999999+) - assert!(id1.len() >= 2 && id1.len() < 10); - assert!(id2.len() >= 2 && id2.len() < 10); - } - - #[test] - fn test_traverse_node_pattern_new_node() { - let graph_schema = create_test_schema_with_relationships(); - let mut plan_ctx = PlanCtx::new(Arc::new(graph_schema)); - let initial_plan = Arc::new(LogicalPlan::Empty); - - let node_pattern = ast::NodePattern { - name: Some("customer"), - labels: Some(vec!["Person"]), - properties: Some(vec![ast::Property::PropertyKV(ast::PropertyKVPair { - key: "city", - value: ast::Expression::Literal(ast::Literal::String("Boston")), - })]), - }; - - let result = - traverse_node_pattern(&node_pattern, initial_plan.clone(), &mut plan_ctx).unwrap(); - - // Should return a GraphNode plan - match result.as_ref() { - LogicalPlan::GraphNode(graph_node) => { - assert_eq!(graph_node.alias, "customer"); - // Input should be a ViewScan - match graph_node.input.as_ref() { - LogicalPlan::ViewScan(_view_scan) => { - // ViewScan created successfully via try_generate_view_scan - // This happens when GLOBAL_GRAPH_SCHEMA is available - } - _ => panic!("Expected ViewScan as input"), - } - } - _ => panic!("Expected GraphNode"), - } - - // Should have added entry to plan context - let table_ctx = plan_ctx.get_table_ctx("customer").unwrap(); - assert_eq!(table_ctx.get_label_opt(), Some("Person".to_string())); - // Note: properties get moved to filters after convert_properties_to_operator_application - assert!(table_ctx.is_explicit_alias()); - } - - #[test] - fn test_traverse_node_pattern_existing_node() { - let mut plan_ctx = PlanCtx::default(); - let initial_plan = Arc::new(LogicalPlan::Empty); - - // Pre-populate plan context with existing node - plan_ctx.insert_table_ctx( - "customer".to_string(), - TableCtx::build( - "customer".to_string(), - Some("User".to_string()).map(|l| vec![l]), - vec![], - false, - true, - ), - ); - - let node_pattern = ast::NodePattern { - name: Some("customer"), - labels: Some(vec!["Person"]), // Different label - properties: Some(vec![ast::Property::PropertyKV(ast::PropertyKVPair { - key: "age", - value: ast::Expression::Literal(ast::Literal::Integer(25)), - })]), - }; - - let result = - traverse_node_pattern(&node_pattern, initial_plan.clone(), &mut plan_ctx).unwrap(); - - // Should return the same plan (not create new GraphNode) - assert_eq!(result, initial_plan); - - // Should have updated the existing table context - let table_ctx = plan_ctx.get_table_ctx("customer").unwrap(); - assert_eq!(table_ctx.get_label_opt(), Some("Person".to_string())); // Label should be updated - // Note: properties get moved to filters after convert_properties_to_operator_application - } - - #[test] - fn test_traverse_node_pattern_empty_node_error() { - let mut plan_ctx = PlanCtx::default(); - let initial_plan = Arc::new(LogicalPlan::Empty); - - let node_pattern = ast::NodePattern { - name: None, // Empty node - labels: Some(vec!["Person"]), - properties: None, - }; - - let result = traverse_node_pattern(&node_pattern, initial_plan, &mut plan_ctx); - assert!(result.is_err()); - match result.unwrap_err() { - LogicalPlanError::EmptyNode => (), // Expected error - _ => panic!("Expected EmptyNode error"), - } - } - - #[test] - fn test_traverse_connected_pattern_new_connection() { - let graph_schema = create_test_schema_with_relationships(); - let mut plan_ctx = PlanCtx::new(Arc::new(graph_schema)); - let initial_plan = Arc::new(LogicalPlan::Empty); - - let start_node = ast::NodePattern { - name: Some("user"), - labels: Some(vec!["Person"]), - properties: None, - }; - - let end_node = ast::NodePattern { - name: Some("company"), - labels: Some(vec!["Organization"]), - properties: None, - }; - - let relationship = ast::RelationshipPattern { - name: Some("works_at"), - direction: ast::Direction::Outgoing, - labels: Some(vec!["WORKS_AT"]), - properties: None, - variable_length: None, - }; - - let connected_pattern = ast::ConnectedPattern { - start_node: Rc::new(RefCell::new(start_node)), - relationship, - end_node: Rc::new(RefCell::new(end_node)), - }; - - let connected_patterns = vec![connected_pattern]; - - let result = - traverse_connected_pattern(&connected_patterns, initial_plan, &mut plan_ctx, 0) - .unwrap(); - - // Should return a GraphRel plan - match result.as_ref() { - LogicalPlan::GraphRel(graph_rel) => { - assert_eq!(graph_rel.alias, "works_at"); - assert_eq!(graph_rel.direction, Direction::Outgoing); - assert_eq!(graph_rel.left_connection, "user"); // Left node is the start node (user) for outgoing relationships - assert_eq!(graph_rel.right_connection, "company"); // Right node is the end node (company) for outgoing relationships - assert!(!graph_rel.is_rel_anchor); - - // Check left side (start node for outgoing relationships) - match graph_rel.left.as_ref() { - LogicalPlan::GraphNode(left_node) => { - assert_eq!(left_node.alias, "user"); - } - _ => panic!("Expected GraphNode on left"), - } - - // Check right side (end node for outgoing relationships) - match graph_rel.right.as_ref() { - LogicalPlan::GraphNode(right_node) => { - assert_eq!(right_node.alias, "company"); - } - _ => panic!("Expected GraphNode on right"), - } - } - _ => panic!("Expected GraphRel"), - } - - // Should have added entries to plan context - assert!(plan_ctx.get_table_ctx("user").is_ok()); - assert!(plan_ctx.get_table_ctx("company").is_ok()); - assert!(plan_ctx.get_table_ctx("works_at").is_ok()); - - let rel_ctx = plan_ctx.get_table_ctx("works_at").unwrap(); - assert!(rel_ctx.is_relation()); - } - - #[test] - fn test_traverse_connected_pattern_with_existing_start_node() { - let graph_schema = create_test_schema_with_relationships(); - let mut plan_ctx = PlanCtx::new(Arc::new(graph_schema)); - let initial_plan = Arc::new(LogicalPlan::Empty); - - // Pre-populate with existing start node - plan_ctx.insert_table_ctx( - "user".to_string(), - TableCtx::build( - "user".to_string(), - Some("Person".to_string()).map(|l| vec![l]), - vec![], - false, - true, - ), - ); - - let start_node = ast::NodePattern { - name: Some("user"), // This exists in plan_ctx - labels: Some(vec!["Employee"]), // Different label - properties: None, - }; - - let end_node = ast::NodePattern { - name: Some("project"), - labels: Some(vec!["Project"]), - properties: None, - }; - - let relationship = ast::RelationshipPattern { - name: Some("assigned_to"), - direction: ast::Direction::Incoming, - labels: Some(vec!["ASSIGNED_TO"]), - properties: None, - variable_length: None, - }; - - let connected_pattern = ast::ConnectedPattern { - start_node: Rc::new(RefCell::new(start_node)), - relationship, - end_node: Rc::new(RefCell::new(end_node)), - }; - - let connected_patterns = vec![connected_pattern]; - - let result = - traverse_connected_pattern(&connected_patterns, initial_plan, &mut plan_ctx, 0) - .unwrap(); - - // Should return a GraphRel plan with different structure - match result.as_ref() { - LogicalPlan::GraphRel(graph_rel) => { - assert_eq!(graph_rel.alias, "assigned_to"); - assert_eq!(graph_rel.direction, Direction::Incoming); - assert_eq!(graph_rel.left_connection, "project"); - assert_eq!(graph_rel.right_connection, "user"); - - // Left should be the new end node - match graph_rel.left.as_ref() { - LogicalPlan::GraphNode(left_node) => { - assert_eq!(left_node.alias, "project"); - } - _ => panic!("Expected GraphNode on left"), - } - } - _ => panic!("Expected GraphRel"), - } - - // Existing start node should have updated label - let user_ctx = plan_ctx.get_table_ctx("user").unwrap(); - assert_eq!(user_ctx.get_label_opt(), Some("Employee".to_string())); - } - - // Test removed: DisconnectedPatternFound error no longer exists - // as of commit b015cf0 which allows disconnected comma patterns - // with WHERE clause predicates for cross-table correlation - - #[test] - fn test_evaluate_match_clause_with_node_and_connected_pattern() { - let graph_schema = create_test_schema_with_relationships(); - let mut plan_ctx = PlanCtx::new(Arc::new(graph_schema)); - let initial_plan = Arc::new(LogicalPlan::Empty); - - // Create a match clause with both node pattern and connected pattern - let node_pattern = ast::NodePattern { - name: Some("admin"), - labels: Some(vec!["User"]), - properties: Some(vec![ast::Property::PropertyKV(ast::PropertyKVPair { - key: "role", - value: ast::Expression::Literal(ast::Literal::String("administrator")), - })]), - }; - - let start_node = ast::NodePattern { - name: Some("admin"), // Same as above - should connect - labels: None, - properties: None, - }; - - let end_node = ast::NodePattern { - name: Some("system"), - labels: Some(vec!["System"]), - properties: None, - }; - - let relationship = ast::RelationshipPattern { - name: Some("manages"), - direction: ast::Direction::Outgoing, - labels: Some(vec!["MANAGES"]), - properties: None, - variable_length: None, - }; - - let connected_pattern = ast::ConnectedPattern { - start_node: Rc::new(RefCell::new(start_node)), - relationship, - end_node: Rc::new(RefCell::new(end_node)), - }; - - let match_clause = ast::MatchClause { - path_patterns: vec![ - (None, ast::PathPattern::Node(node_pattern)), - ( - None, - ast::PathPattern::ConnectedPattern(vec![connected_pattern]), - ), - ], - where_clause: None, - }; - - let result = evaluate_match_clause(&match_clause, initial_plan, &mut plan_ctx).unwrap(); - - // Should return a GraphRel plan - match result.as_ref() { - LogicalPlan::GraphRel(graph_rel) => { - assert_eq!(graph_rel.alias, "manages"); - assert_eq!(graph_rel.direction, Direction::Outgoing); - } - _ => panic!("Expected GraphRel at top level"), - } - - // Properties should have been converted to filters - let admin_ctx = plan_ctx.get_table_ctx("admin").unwrap(); - assert_eq!(admin_ctx.get_filters().len(), 1); - } - - #[test] - fn test_convert_properties_to_operator_application() { - let mut plan_ctx = PlanCtx::default(); - - // Add table context with properties - let properties = vec![Property::PropertyKV(PropertyKVPair { - key: "status".to_string(), - value: LogicalExpr::Literal(Literal::String("active".to_string())), - })]; - - let table_ctx = TableCtx::build( - "user".to_string(), - Some("Person".to_string()).map(|l| vec![l]), - properties, - false, - true, - ); - - plan_ctx.insert_table_ctx("user".to_string(), table_ctx); - - // Before conversion, table should have no filters - let table_ctx_before = plan_ctx.get_table_ctx("user").unwrap(); - assert_eq!(table_ctx_before.get_filters().len(), 0); - - // Convert properties - let result = convert_properties_to_operator_application(&mut plan_ctx); - assert!(result.is_ok()); - - // After conversion, properties should be moved to filters - let table_ctx_after = plan_ctx.get_table_ctx("user").unwrap(); - assert_eq!(table_ctx_after.get_filters().len(), 1); // Filter added - - // Check the filter predicate - match &table_ctx_after.get_filters()[0] { - LogicalExpr::OperatorApplicationExp(op_app) => { - assert_eq!(op_app.operator, Operator::Equal); - match &op_app.operands[0] { - LogicalExpr::PropertyAccessExp(prop_access) => { - assert_eq!(prop_access.table_alias.0, "user"); - assert_eq!(prop_access.column.raw(), "status"); - } - _ => panic!("Expected PropertyAccessExp"), - } - } - _ => panic!("Expected OperatorApplication"), - } - } - - #[test] - fn test_generate_scan() { - // Create schema with Customer node - use crate::graph_catalog::graph_schema::{GraphSchema, NodeIdSchema, NodeSchema}; - use std::collections::HashMap; - - let mut nodes = HashMap::new(); - nodes.insert( - "Customer".to_string(), - NodeSchema { - database: "test_db".to_string(), - table_name: "customers".to_string(), - column_names: vec!["id".to_string(), "name".to_string()], - primary_keys: "id".to_string(), - node_id: NodeIdSchema::single("id".to_string(), "UInt64".to_string()), - property_mappings: HashMap::new(), - view_parameters: None, - engine: None, - use_final: None, - filter: None, - is_denormalized: false, - from_properties: None, - to_properties: None, - denormalized_source_table: None, - label_column: None, - label_value: None, - }, - ); - - let schema = Arc::new(GraphSchema::build( - 1, - "test".to_string(), - nodes, - HashMap::new(), - )); - let plan_ctx = PlanCtx::new(schema); - - let scan = generate_scan( - "customers".to_string(), - Some("Customer".to_string()), - &plan_ctx, - ) - .unwrap(); - - match scan.as_ref() { - LogicalPlan::ViewScan(scan_plan) => { - assert_eq!(scan_plan.source_table, "test_db.customers"); - // The label is "Customer" but ViewScan doesn't store it directly - } - _ => panic!("Expected ViewScan plan"), - } - } - - // ========================================== - // Tests for relationship type inference - // ========================================== - - fn create_test_schema_with_relationships() -> GraphSchema { - use crate::graph_catalog::graph_schema::{NodeIdSchema, NodeSchema, RelationshipSchema}; - use std::collections::HashMap; - - let mut nodes = HashMap::new(); - nodes.insert( - "Airport".to_string(), - NodeSchema { - database: "test_db".to_string(), - table_name: "airports".to_string(), - column_names: vec!["id".to_string(), "code".to_string()], - primary_keys: "id".to_string(), - node_id: NodeIdSchema::single("id".to_string(), "UInt64".to_string()), - property_mappings: HashMap::new(), - view_parameters: None, - engine: None, - use_final: None, - filter: None, - is_denormalized: false, - from_properties: None, - to_properties: None, - denormalized_source_table: None, - label_column: None, - label_value: None, - }, - ); - nodes.insert( - "User".to_string(), - NodeSchema { - database: "test_db".to_string(), - table_name: "users".to_string(), - column_names: vec!["id".to_string(), "name".to_string()], - primary_keys: "id".to_string(), - node_id: NodeIdSchema::single("id".to_string(), "UInt64".to_string()), - property_mappings: HashMap::new(), - view_parameters: None, - engine: None, - use_final: None, - filter: None, - is_denormalized: false, - from_properties: None, - to_properties: None, - denormalized_source_table: None, - label_column: None, - label_value: None, - }, - ); - nodes.insert( - "Post".to_string(), - NodeSchema { - database: "test_db".to_string(), - table_name: "posts".to_string(), - column_names: vec!["id".to_string(), "title".to_string()], - primary_keys: "id".to_string(), - node_id: NodeIdSchema::single("id".to_string(), "UInt64".to_string()), - property_mappings: HashMap::new(), - view_parameters: None, - engine: None, - use_final: None, - filter: None, - is_denormalized: false, - from_properties: None, - to_properties: None, - denormalized_source_table: None, - label_column: None, - label_value: None, - }, - ); - - let mut rels = HashMap::new(); - rels.insert( - "FLIGHT".to_string(), - RelationshipSchema { - database: "test_db".to_string(), - table_name: "flights".to_string(), - column_names: vec!["from_airport".to_string(), "to_airport".to_string()], - from_node: "Airport".to_string(), - to_node: "Airport".to_string(), - from_node_table: "airports".to_string(), - to_node_table: "airports".to_string(), - from_id: "from_airport".to_string(), - to_id: "to_airport".to_string(), - from_node_id_dtype: "UInt64".to_string(), - to_node_id_dtype: "UInt64".to_string(), - property_mappings: HashMap::new(), - view_parameters: None, - engine: None, - use_final: None, - filter: None, - edge_id: None, - type_column: None, - from_label_column: None, - to_label_column: None, - from_label_values: None, - to_label_values: None, - from_node_properties: None, - to_node_properties: None, - is_fk_edge: false, - constraints: None, - }, - ); - rels.insert( - "LIKES".to_string(), - RelationshipSchema { - database: "test_db".to_string(), - table_name: "likes".to_string(), - column_names: vec!["user_id".to_string(), "post_id".to_string()], - from_node: "User".to_string(), - to_node: "Post".to_string(), - from_node_table: "users".to_string(), - to_node_table: "posts".to_string(), - from_id: "user_id".to_string(), - to_id: "post_id".to_string(), - from_node_id_dtype: "UInt64".to_string(), - to_node_id_dtype: "UInt64".to_string(), - property_mappings: HashMap::new(), - view_parameters: None, - engine: None, - use_final: None, - filter: None, - edge_id: None, - type_column: None, - from_label_column: None, - to_label_column: None, - from_label_values: None, - to_label_values: None, - from_node_properties: None, - to_node_properties: None, - is_fk_edge: false, - constraints: None, - }, - ); - rels.insert( - "FOLLOWS".to_string(), - RelationshipSchema { - database: "test_db".to_string(), - table_name: "follows".to_string(), - column_names: vec!["follower_id".to_string(), "followed_id".to_string()], - from_node: "User".to_string(), - to_node: "User".to_string(), - from_node_table: "users".to_string(), - to_node_table: "users".to_string(), - from_id: "follower_id".to_string(), - to_id: "followed_id".to_string(), - from_node_id_dtype: "UInt64".to_string(), - to_node_id_dtype: "UInt64".to_string(), - property_mappings: HashMap::new(), - view_parameters: None, - engine: None, - use_final: None, - filter: None, - edge_id: None, - type_column: None, - from_label_column: None, - to_label_column: None, - from_label_values: None, - to_label_values: None, - from_node_properties: None, - to_node_properties: None, - is_fk_edge: false, - constraints: None, - }, - ); - - // Add missing nodes for tests - nodes.insert( - "Person".to_string(), - NodeSchema { - database: "test_db".to_string(), - table_name: "persons".to_string(), - column_names: vec!["id".to_string(), "name".to_string(), "city".to_string()], - primary_keys: "id".to_string(), - node_id: NodeIdSchema::single("id".to_string(), "UInt64".to_string()), - property_mappings: HashMap::new(), - view_parameters: None, - engine: None, - use_final: None, - filter: None, - is_denormalized: false, - from_properties: None, - to_properties: None, - denormalized_source_table: None, - label_column: None, - label_value: None, - }, - ); - nodes.insert( - "Organization".to_string(), - NodeSchema { - database: "test_db".to_string(), - table_name: "organizations".to_string(), - column_names: vec!["id".to_string(), "name".to_string()], - primary_keys: "id".to_string(), - node_id: NodeIdSchema::single("id".to_string(), "UInt64".to_string()), - property_mappings: HashMap::new(), - view_parameters: None, - engine: None, - use_final: None, - filter: None, - is_denormalized: false, - from_properties: None, - to_properties: None, - denormalized_source_table: None, - label_column: None, - label_value: None, - }, - ); - nodes.insert( - "Employee".to_string(), - NodeSchema { - database: "test_db".to_string(), - table_name: "employees".to_string(), - column_names: vec!["id".to_string(), "name".to_string()], - primary_keys: "id".to_string(), - node_id: NodeIdSchema::single("id".to_string(), "UInt64".to_string()), - property_mappings: HashMap::new(), - view_parameters: None, - engine: None, - use_final: None, - filter: None, - is_denormalized: false, - from_properties: None, - to_properties: None, - denormalized_source_table: None, - label_column: None, - label_value: None, - }, - ); - nodes.insert( - "Project".to_string(), - NodeSchema { - database: "test_db".to_string(), - table_name: "projects".to_string(), - column_names: vec!["id".to_string(), "name".to_string()], - primary_keys: "id".to_string(), - node_id: NodeIdSchema::single("id".to_string(), "UInt64".to_string()), - property_mappings: HashMap::new(), - view_parameters: None, - engine: None, - use_final: None, - filter: None, - is_denormalized: false, - from_properties: None, - to_properties: None, - denormalized_source_table: None, - label_column: None, - label_value: None, - }, - ); - nodes.insert( - "System".to_string(), - NodeSchema { - database: "test_db".to_string(), - table_name: "systems".to_string(), - column_names: vec!["id".to_string(), "name".to_string()], - primary_keys: "id".to_string(), - node_id: NodeIdSchema::single("id".to_string(), "UInt64".to_string()), - property_mappings: HashMap::new(), - view_parameters: None, - engine: None, - use_final: None, - filter: None, - is_denormalized: false, - from_properties: None, - to_properties: None, - denormalized_source_table: None, - label_column: None, - label_value: None, - }, - ); - - // Add missing relationships for tests - rels.insert( - "WORKS_AT".to_string(), - RelationshipSchema { - database: "test_db".to_string(), - table_name: "works_at".to_string(), - column_names: vec!["person_id".to_string(), "org_id".to_string()], - from_node: "Person".to_string(), - to_node: "Organization".to_string(), - from_node_table: "persons".to_string(), - to_node_table: "organizations".to_string(), - from_id: "person_id".to_string(), - to_id: "org_id".to_string(), - from_node_id_dtype: "UInt64".to_string(), - to_node_id_dtype: "UInt64".to_string(), - property_mappings: HashMap::new(), - view_parameters: None, - engine: None, - use_final: None, - filter: None, - edge_id: None, - type_column: None, - from_label_column: None, - to_label_column: None, - from_label_values: None, - to_label_values: None, - from_node_properties: None, - to_node_properties: None, - is_fk_edge: false, - constraints: None, - }, - ); - rels.insert( - "ASSIGNED_TO".to_string(), - RelationshipSchema { - database: "test_db".to_string(), - table_name: "assigned_to".to_string(), - column_names: vec!["emp_id".to_string(), "proj_id".to_string()], - from_node: "Employee".to_string(), - to_node: "Project".to_string(), - from_node_table: "employees".to_string(), - to_node_table: "projects".to_string(), - from_id: "emp_id".to_string(), - to_id: "proj_id".to_string(), - from_node_id_dtype: "UInt64".to_string(), - to_node_id_dtype: "UInt64".to_string(), - property_mappings: HashMap::new(), - view_parameters: None, - engine: None, - use_final: None, - filter: None, - edge_id: None, - type_column: None, - from_label_column: None, - to_label_column: None, - from_label_values: None, - to_label_values: None, - from_node_properties: None, - to_node_properties: None, - is_fk_edge: false, - constraints: None, - }, - ); - rels.insert( - "MANAGES".to_string(), - RelationshipSchema { - database: "test_db".to_string(), - table_name: "manages".to_string(), - column_names: vec!["user_id".to_string(), "system_id".to_string()], - from_node: "User".to_string(), - to_node: "System".to_string(), - from_node_table: "users".to_string(), - to_node_table: "systems".to_string(), - from_id: "user_id".to_string(), - to_id: "system_id".to_string(), - from_node_id_dtype: "UInt64".to_string(), - to_node_id_dtype: "UInt64".to_string(), - property_mappings: HashMap::new(), - view_parameters: None, - engine: None, - use_final: None, - filter: None, - edge_id: None, - type_column: None, - from_label_column: None, - to_label_column: None, - from_label_values: None, - to_label_values: None, - from_node_properties: None, - to_node_properties: None, - is_fk_edge: false, - constraints: None, - }, - ); - - GraphSchema::build(1, "test_db".to_string(), nodes, rels) - } - - fn create_single_relationship_schema() -> GraphSchema { - use crate::graph_catalog::graph_schema::{NodeIdSchema, NodeSchema, RelationshipSchema}; - use std::collections::HashMap; - - let mut nodes = HashMap::new(); - nodes.insert( - "Person".to_string(), - NodeSchema { - database: "test_db".to_string(), - table_name: "persons".to_string(), - column_names: vec!["id".to_string(), "name".to_string()], - primary_keys: "id".to_string(), - node_id: NodeIdSchema::single("id".to_string(), "UInt64".to_string()), - property_mappings: HashMap::new(), - view_parameters: None, - engine: None, - use_final: None, - filter: None, - is_denormalized: false, - from_properties: None, - to_properties: None, - denormalized_source_table: None, - label_column: None, - label_value: None, - }, - ); - - let mut rels = HashMap::new(); - rels.insert( - "KNOWS".to_string(), - RelationshipSchema { - database: "test_db".to_string(), - table_name: "knows".to_string(), - column_names: vec!["person1_id".to_string(), "person2_id".to_string()], - from_node: "Person".to_string(), - to_node: "Person".to_string(), - from_node_table: "persons".to_string(), - to_node_table: "persons".to_string(), - from_id: "person1_id".to_string(), - to_id: "person2_id".to_string(), - from_node_id_dtype: "UInt64".to_string(), - to_node_id_dtype: "UInt64".to_string(), - property_mappings: HashMap::new(), - view_parameters: None, - engine: None, - use_final: None, - filter: None, - edge_id: None, - type_column: None, - from_label_column: None, - to_label_column: None, - from_label_values: None, - to_label_values: None, - from_node_properties: None, - to_node_properties: None, - is_fk_edge: false, - constraints: None, - }, - ); - - GraphSchema::build(1, "test_db".to_string(), nodes, rels) - } - - #[test] - fn test_infer_relationship_type_single_schema() { - // When schema has only one relationship, use it regardless of node types - let schema = create_single_relationship_schema(); - let plan_ctx = PlanCtx::new(Arc::new(schema.clone())); - - let result = infer_relationship_type_from_nodes( - &None, // untyped start - &None, // untyped end - &ast::Direction::Outgoing, - &schema, - &plan_ctx, - ) - .expect("Should not error"); - - assert!(result.is_some()); - let types = result.unwrap(); - assert_eq!(types.len(), 1); - assert_eq!(types[0], "KNOWS"); - } - - #[test] - fn test_infer_relationship_type_from_start_node() { - // (a:Airport)-[r]->() should infer FLIGHT (only edge from Airport) - let schema = create_test_schema_with_relationships(); - let plan_ctx = PlanCtx::new(Arc::new(schema.clone())); - - let result = infer_relationship_type_from_nodes( - &Some("Airport".to_string()), - &None, - &ast::Direction::Outgoing, - &schema, - &plan_ctx, - ) - .expect("Should not error"); - - assert!(result.is_some()); - let types = result.unwrap(); - assert_eq!(types.len(), 1); - assert_eq!(types[0], "FLIGHT"); - } - - #[test] - fn test_infer_relationship_type_from_end_node() { - // ()-[r]->(p:Post) should infer LIKES (only edge to Post) - let schema = create_test_schema_with_relationships(); - let plan_ctx = PlanCtx::new(Arc::new(schema.clone())); - - let result = infer_relationship_type_from_nodes( - &None, - &Some("Post".to_string()), - &ast::Direction::Outgoing, - &schema, - &plan_ctx, - ) - .expect("Should not error"); - - assert!(result.is_some()); - let types = result.unwrap(); - assert_eq!(types.len(), 1); - assert_eq!(types[0], "LIKES"); - } - - #[test] - fn test_infer_relationship_type_from_both_nodes() { - // (u:User)-[r]->(p:Post) should infer LIKES - let schema = create_test_schema_with_relationships(); - let plan_ctx = PlanCtx::new(Arc::new(schema.clone())); - - let result = infer_relationship_type_from_nodes( - &Some("User".to_string()), - &Some("Post".to_string()), - &ast::Direction::Outgoing, - &schema, - &plan_ctx, - ) - .expect("Should not error"); - - assert!(result.is_some()); - let types = result.unwrap(); - assert_eq!(types.len(), 1); - assert_eq!(types[0], "LIKES"); - } - - #[test] - fn test_infer_relationship_type_multiple_matches() { - // (u:User)-[r]->() should return LIKES, FOLLOWS, and MANAGES (multiple edges from User) - let schema = create_test_schema_with_relationships(); - let plan_ctx = PlanCtx::new(Arc::new(schema.clone())); - - let result = infer_relationship_type_from_nodes( - &Some("User".to_string()), - &None, - &ast::Direction::Outgoing, - &schema, - &plan_ctx, - ) - .expect("Should not error"); - - assert!(result.is_some()); - let types = result.unwrap(); - assert_eq!(types.len(), 3); // Now 3 relationships: LIKES, FOLLOWS, MANAGES - assert!(types.contains(&"LIKES".to_string())); - assert!(types.contains(&"FOLLOWS".to_string())); - assert!(types.contains(&"MANAGES".to_string())); - } - - #[test] - fn test_infer_relationship_type_incoming_direction() { - // ()<-[r]-(p:Post) should infer LIKES (reversed direction) - let schema = create_test_schema_with_relationships(); - let plan_ctx = PlanCtx::new(Arc::new(schema.clone())); - - let result = infer_relationship_type_from_nodes( - &None, - &Some("Post".to_string()), - &ast::Direction::Incoming, - &schema, - &plan_ctx, - ) - .expect("Should not error"); - - // Incoming means: from=end(Post), to=start(None) - // LIKES has from=User, to=Post - // So we need to check: from_node=Post? No. LIKES doesn't match. - // Actually for incoming: from=end, to=start - // So Post is the end node, meaning we're looking for relationships with to_node=Post - // But incoming flips it: from_matches_end = "Post" == rel.from_node? No for LIKES - // Hmm, let me reconsider - for incoming, the arrow points to start - // So the relationship's to_node should be the pattern's start node - // And the relationship's from_node should be the pattern's end node - // In this case: ()<-[r]-(p:Post) means Post→anonymous - // So we want relationships where from_node=Post - but LIKES has from_node=User - // This should return None/empty - assert!(result.is_none() || result.as_ref().unwrap().is_empty()); - } - - #[test] - fn test_infer_relationship_type_incoming_correct() { - // (u:User)<-[r]-() should infer FOLLOWS (User is the to_node of FOLLOWS) - let schema = create_test_schema_with_relationships(); - let plan_ctx = PlanCtx::new(Arc::new(schema.clone())); - - let result = infer_relationship_type_from_nodes( - &Some("User".to_string()), - &None, - &ast::Direction::Incoming, - &schema, - &plan_ctx, - ) - .expect("Should not error"); - - // Incoming: from=end(None), to=start(User) - // FOLLOWS: from=User, to=User - matches (to=User checks against start) - // LIKES: from=User, to=Post - doesn't match (to=Post != User) - assert!(result.is_some()); - let types = result.unwrap(); - assert_eq!(types.len(), 1); - assert_eq!(types[0], "FOLLOWS"); - } - - #[test] - fn test_infer_relationship_type_no_matches() { - // (a:Airport)-[r]->(u:User) should find no matching relationships - let schema = create_test_schema_with_relationships(); - let plan_ctx = PlanCtx::new(Arc::new(schema.clone())); - - let result = infer_relationship_type_from_nodes( - &Some("Airport".to_string()), - &Some("User".to_string()), - &ast::Direction::Outgoing, - &schema, - &plan_ctx, - ) - .expect("Should not error"); - - // FLIGHT: Airport→Airport - doesn't match (to=Airport != User) - // LIKES: User→Post - doesn't match (from=User != Airport) - // FOLLOWS: User→User - doesn't match - assert!(result.is_none()); - } - - #[test] - fn test_infer_relationship_type_both_untyped_multi_schema() { - // ()-[r]->() with multiple relationships should return None - let schema = create_test_schema_with_relationships(); - let plan_ctx = PlanCtx::new(Arc::new(schema.clone())); - - let result = infer_relationship_type_from_nodes( - &None, - &None, - &ast::Direction::Outgoing, - &schema, - &plan_ctx, - ) - .expect("Should not error"); - - // Both nodes untyped and schema has 3 relationships - cannot infer - assert!(result.is_none()); - } - - // Tests for node label inference from relationship type - // Note: infer_node_labels_from_relationship function was removed - // These tests are commented out until the feature is reimplemented - - /* #[test] - fn test_infer_node_labels_from_typed_relationship() { - // ()-[r:FLIGHT]->() should infer both nodes as Airport - let schema = create_test_schema_with_relationships(); - - let (start, end, _, _) = infer_node_labels_from_relationship( - None, - None, - &Some(vec!["FLIGHT".to_string()]), - &ast::Direction::Outgoing, - &schema, - ); - - assert_eq!(start, Some("Airport".to_string())); - assert_eq!(end, Some("Airport".to_string())); - } - - #[test] - fn test_infer_node_labels_partial() { - // (u:User)-[r:LIKES]->() should infer end node as Post - let schema = create_test_schema_with_relationships(); - - let (start, end, _, _) = infer_node_labels_from_relationship( - Some("User".to_string()), - None, - &Some(vec!["LIKES".to_string()]), - &ast::Direction::Outgoing, - &schema, - ); - - // Start was already User, end should be inferred as Post - assert_eq!(start, Some("User".to_string())); - assert_eq!(end, Some("Post".to_string())); - } - - #[test] - fn test_infer_node_labels_incoming_direction() { - // ()<-[r:LIKES]-(u:User) should infer start as Post - let schema = create_test_schema_with_relationships(); - - let (start, end, _, _) = infer_node_labels_from_relationship( - None, - Some("User".to_string()), - &Some(vec!["LIKES".to_string()]), - &ast::Direction::Incoming, - &schema, - ); - - // For incoming: start is to_node (Post), end is from_node (User) - assert_eq!(start, Some("Post".to_string())); - assert_eq!(end, Some("User".to_string())); - } - */ - - #[test] - fn test_infer_relationship_type_too_many_matches_error() { - // Create a schema with many relationship types from User - use crate::graph_catalog::graph_schema::{NodeIdSchema, NodeSchema, RelationshipSchema}; - use std::collections::HashMap; - - let mut nodes = HashMap::new(); - nodes.insert( - "User".to_string(), - NodeSchema { - database: "test_db".to_string(), - table_name: "users".to_string(), - column_names: vec!["id".to_string()], - primary_keys: "id".to_string(), - node_id: NodeIdSchema::single("id".to_string(), "UInt64".to_string()), - property_mappings: HashMap::new(), - view_parameters: None, - engine: None, - use_final: None, - filter: None, - is_denormalized: false, - from_properties: None, - to_properties: None, - denormalized_source_table: None, - label_column: None, - label_value: None, - }, - ); - - let mut rels = HashMap::new(); - // Create 6 relationships from User to User (exceeds MAX_INFERRED_TYPES of 5) - for i in 1..=6 { - rels.insert( - format!("REL_{}", i), - RelationshipSchema { - database: "test_db".to_string(), - table_name: format!("rel_{}", i), - column_names: vec!["from_id".to_string(), "to_id".to_string()], - from_node: "User".to_string(), - to_node: "User".to_string(), - from_node_table: "users".to_string(), - to_node_table: "users".to_string(), - from_id: "from_id".to_string(), - to_id: "to_id".to_string(), - from_node_id_dtype: "UInt64".to_string(), - to_node_id_dtype: "UInt64".to_string(), - property_mappings: HashMap::new(), - view_parameters: None, - engine: None, - use_final: None, - filter: None, - edge_id: None, - type_column: None, - from_label_column: None, - to_label_column: None, - from_label_values: None, - to_label_values: None, - from_node_properties: None, - to_node_properties: None, - is_fk_edge: false, - constraints: None, - }, - ); - } - - let schema = GraphSchema::build(1, "test_db".to_string(), nodes, rels); - let plan_ctx = PlanCtx::new(Arc::new(schema.clone())); - - // (u:User)-[r]->() should fail with TooManyInferredTypes error - let result = infer_relationship_type_from_nodes( - &Some("User".to_string()), - &None, - &ast::Direction::Outgoing, - &schema, - &plan_ctx, - ); - - assert!(result.is_err()); - match result.unwrap_err() { - LogicalPlanError::TooManyInferredTypes { - count, - max, - types: _, - } => { - assert_eq!(count, 6); - assert_eq!(max, 5); // default max_inferred_types - } - other => panic!("Expected TooManyInferredTypes error, got: {:?}", other), - } - } - - // ======================================== - // Tests for infer_node_label_from_schema - // ======================================== - - fn create_single_node_schema() -> GraphSchema { - use crate::graph_catalog::graph_schema::{NodeIdSchema, NodeSchema}; - use std::collections::HashMap; - - let mut nodes = HashMap::new(); - nodes.insert( - "Person".to_string(), - NodeSchema { - database: "test_db".to_string(), - table_name: "persons".to_string(), - column_names: vec!["id".to_string(), "name".to_string()], - primary_keys: "id".to_string(), - node_id: NodeIdSchema::single("id".to_string(), "UInt64".to_string()), - property_mappings: HashMap::new(), - view_parameters: None, - engine: None, - use_final: None, - filter: None, - is_denormalized: false, - from_properties: None, - to_properties: None, - denormalized_source_table: None, - label_column: None, - label_value: None, - }, - ); - - // No relationships needed for node-only inference tests - let rels = HashMap::new(); - - GraphSchema::build(1, "test_db".to_string(), nodes, rels) - } - - fn create_multi_node_schema() -> GraphSchema { - use crate::graph_catalog::graph_schema::{NodeIdSchema, NodeSchema}; - use std::collections::HashMap; - - let mut nodes = HashMap::new(); - for node_type in &["User", "Post", "Comment"] { - nodes.insert( - node_type.to_string(), - NodeSchema { - database: "test_db".to_string(), - table_name: format!("{}s", node_type.to_lowercase()), - column_names: vec!["id".to_string()], - primary_keys: "id".to_string(), - node_id: NodeIdSchema::single("id".to_string(), "UInt64".to_string()), - property_mappings: HashMap::new(), - view_parameters: None, - engine: None, - use_final: None, - filter: None, - is_denormalized: false, - from_properties: None, - to_properties: None, - denormalized_source_table: None, - label_column: None, - label_value: None, - }, - ); - } - - let rels = HashMap::new(); - - GraphSchema::build(1, "test_db".to_string(), nodes, rels) - } - - fn create_empty_node_schema() -> GraphSchema { - use std::collections::HashMap; - - let nodes = HashMap::new(); - let rels = HashMap::new(); - - GraphSchema::build(1, "test_db".to_string(), nodes, rels) - } - - #[test] - fn test_infer_node_label_single_node_schema() { - // When schema has only one node type, infer it - let schema = create_single_node_schema(); - let plan_ctx = PlanCtx::new(Arc::new(schema.clone())); - - let result = infer_node_label_from_schema(&schema, &plan_ctx).expect("should not error"); - - assert_eq!(result, Some("Person".to_string())); - } - - #[test] - fn test_infer_node_label_multi_node_schema() { - // When schema has multiple node types, cannot infer (returns None) - let schema = create_multi_node_schema(); - let plan_ctx = PlanCtx::new(Arc::new(schema.clone())); - - let result = infer_node_label_from_schema(&schema, &plan_ctx).expect("should not error"); - - // Should not auto-infer when multiple types exist - assert_eq!(result, None); - } - - #[test] - fn test_infer_node_label_empty_schema() { - // When schema has no nodes, cannot infer - let schema = create_empty_node_schema(); - let plan_ctx = PlanCtx::new(Arc::new(schema.clone())); - - let result = infer_node_label_from_schema(&schema, &plan_ctx).expect("should not error"); - - assert_eq!(result, None); - } - - #[test] - fn test_infer_node_label_many_nodes_no_error() { - // When schema has many node types, should return None without error - // (unlike relationships, we don't generate UNION for standalone nodes yet) - use crate::graph_catalog::graph_schema::{NodeIdSchema, NodeSchema}; - use std::collections::HashMap; - - let mut nodes = HashMap::new(); - for i in 1..=10 { - nodes.insert( - format!("Type{}", i), - NodeSchema { - database: "test_db".to_string(), - table_name: format!("type_{}", i), - column_names: vec!["id".to_string()], - primary_keys: "id".to_string(), - node_id: NodeIdSchema::single("id".to_string(), "UInt64".to_string()), - property_mappings: HashMap::new(), - view_parameters: None, - engine: None, - use_final: None, - filter: None, - is_denormalized: false, - from_properties: None, - to_properties: None, - denormalized_source_table: None, - label_column: None, - label_value: None, - }, - ); - } - - let schema = GraphSchema::build(1, "test_db".to_string(), nodes, HashMap::new()); - let plan_ctx = PlanCtx::new(Arc::new(schema.clone())); - - let result = infer_node_label_from_schema(&schema, &plan_ctx).expect("should not error"); - - // Should not auto-infer when many types exist (just return None, no error) - assert_eq!(result, None); - } - - #[test] - fn test_infer_node_label_denormalized_single_node() { - // Single denormalized node type should still be inferred - // The inference works at schema level - denormalized handling is done later - use crate::graph_catalog::graph_schema::{NodeIdSchema, NodeSchema}; - use std::collections::HashMap; - - let mut nodes = HashMap::new(); - nodes.insert( - "Airport".to_string(), - NodeSchema { - database: "test_db".to_string(), - table_name: "flights".to_string(), // Edge table - column_names: vec!["Origin".to_string(), "Dest".to_string()], - primary_keys: "Origin".to_string(), - node_id: NodeIdSchema::single("Origin".to_string(), "String".to_string()), - property_mappings: HashMap::new(), - view_parameters: None, - engine: None, - use_final: None, - filter: None, - is_denormalized: true, // Denormalized node! - from_properties: Some({ - let mut m = HashMap::new(); - m.insert("code".to_string(), "Origin".to_string()); - m - }), - to_properties: Some({ - let mut m = HashMap::new(); - m.insert("code".to_string(), "Dest".to_string()); - m - }), - denormalized_source_table: Some("test_db.flights".to_string()), - label_column: None, - label_value: None, - }, - ); - - let schema = GraphSchema::build(1, "test_db".to_string(), nodes, HashMap::new()); - let plan_ctx = PlanCtx::new(Arc::new(schema.clone())); - - // Should still infer the label - denormalized handling happens later - let result = infer_node_label_from_schema(&schema, &plan_ctx).expect("should not error"); - assert_eq!(result, Some("Airport".to_string())); - } - - #[test] - fn test_infer_relationship_type_polymorphic_edge() { - // Polymorphic edge with from_label_values should match typed nodes - use crate::graph_catalog::graph_schema::{NodeIdSchema, NodeSchema, RelationshipSchema}; - use std::collections::HashMap; - - let mut nodes = HashMap::new(); - for node_type in &["User", "Group", "Resource"] { - nodes.insert( - node_type.to_string(), - NodeSchema { - database: "test_db".to_string(), - table_name: format!("{}s", node_type.to_lowercase()), - column_names: vec!["id".to_string()], - primary_keys: "id".to_string(), - node_id: NodeIdSchema::single("id".to_string(), "UInt64".to_string()), - property_mappings: HashMap::new(), - view_parameters: None, - engine: None, - use_final: None, - filter: None, - is_denormalized: false, - from_properties: None, - to_properties: None, - denormalized_source_table: None, - label_column: None, - label_value: None, - }, - ); - } - - let mut rels = HashMap::new(); - // Polymorphic MEMBER_OF: (User|Group) -> Group - rels.insert( - "MEMBER_OF".to_string(), - RelationshipSchema { - database: "test_db".to_string(), - table_name: "memberships".to_string(), - column_names: vec!["member_id".to_string(), "group_id".to_string()], - from_node: "$any".to_string(), // Polymorphic - to_node: "Group".to_string(), - from_node_table: "$any".to_string(), - to_node_table: "groups".to_string(), - from_id: "member_id".to_string(), - to_id: "group_id".to_string(), - from_node_id_dtype: "UInt64".to_string(), - to_node_id_dtype: "UInt64".to_string(), - property_mappings: HashMap::new(), - view_parameters: None, - engine: None, - use_final: None, - filter: None, - edge_id: None, - type_column: None, - from_label_column: Some("member_type".to_string()), - to_label_column: None, - from_label_values: Some(vec!["User".to_string(), "Group".to_string()]), // Polymorphic! - to_label_values: None, - from_node_properties: None, - to_node_properties: None, - is_fk_edge: false, - constraints: None, - }, - ); - - let schema = GraphSchema::build(1, "test_db".to_string(), nodes, rels); - let plan_ctx = PlanCtx::new(Arc::new(schema.clone())); - - // (u:User)-[r]->(g:Group) should infer MEMBER_OF since User is in from_label_values - let result = infer_relationship_type_from_nodes( - &Some("User".to_string()), - &Some("Group".to_string()), - &ast::Direction::Outgoing, - &schema, - &plan_ctx, - ) - .expect("should not error"); - - assert_eq!(result, Some(vec!["MEMBER_OF".to_string()])); - } -} diff --git a/src/query_planner/logical_plan/match_clause/errors.rs b/src/query_planner/logical_plan/match_clause/errors.rs new file mode 100644 index 00000000..5a89dba6 --- /dev/null +++ b/src/query_planner/logical_plan/match_clause/errors.rs @@ -0,0 +1,7 @@ +//! Error types for MATCH clause processing. +//! +//! This module re-exports the main LogicalPlanError since match clause +//! errors are part of the broader logical plan error hierarchy. + +pub use crate::query_planner::logical_plan::errors::LogicalPlanError; +pub use crate::query_planner::logical_plan::plan_builder::LogicalPlanResult; diff --git a/src/query_planner/logical_plan/match_clause/helpers.rs b/src/query_planner/logical_plan/match_clause/helpers.rs new file mode 100644 index 00000000..e8e47e56 --- /dev/null +++ b/src/query_planner/logical_plan/match_clause/helpers.rs @@ -0,0 +1,536 @@ +//! Helper functions for MATCH clause processing. +//! +//! This module contains utility functions used during MATCH clause evaluation, +//! including property conversion, denormalization checks, and scan generation. + +use std::sync::Arc; + +use crate::graph_catalog::expression_parser::PropertyValue; +use crate::open_cypher_parser::ast; +use crate::query_planner::logical_expr::{ + LogicalExpr, Operator, OperatorApplication, Property, PropertyAccess, TableAlias, +}; +use crate::query_planner::logical_plan::{ + errors::LogicalPlanError, plan_builder::LogicalPlanResult, GraphRel, LogicalPlan, + ShortestPathMode, VariableLengthSpec, +}; +use crate::query_planner::plan_ctx::{PlanCtx, TableCtx}; + +/// Generate a scan operation for a node pattern. +/// +/// This function creates a ViewScan using schema information from plan_ctx. +/// If the schema lookup fails, it returns an error since node labels should be validated +/// against the schema. +/// +/// # Arguments +/// * `alias` - The variable alias for this node (e.g., "a", "user") +/// * `label` - Optional node label (e.g., Some("User"), None for unlabeled) +/// * `plan_ctx` - Planning context with schema information +/// +/// # Returns +/// * `Ok(Arc)` - ViewScan plan for the node +/// * `Err(LogicalPlanError)` - If node label not found in schema +pub fn generate_scan( + alias: String, + label: Option, + plan_ctx: &PlanCtx, +) -> LogicalPlanResult> { + log::debug!( + "generate_scan called with alias='{}', label={:?}", + alias, + label + ); + + if let Some(label_str) = &label { + // Handle $any wildcard for polymorphic edges + if label_str == "$any" { + log::debug!("Label is $any (polymorphic wildcard), creating Empty plan"); + return Ok(Arc::new(LogicalPlan::Empty)); + } + + log::debug!("Trying to create ViewScan for label '{}'", label_str); + match super::try_generate_view_scan(&alias, label_str, plan_ctx)? { + Some(view_scan) => { + log::info!("✓ Successfully created ViewScan for label '{}'", label_str); + Ok(view_scan) + } + None => { + // ViewScan creation failed - this is an error (schema not found) + Err(LogicalPlanError::NodeNotFound(label_str.to_string())) + } + } + } else { + log::debug!("No label provided - anonymous node, using Empty plan"); + // For anonymous nodes, use Empty plan + // The node label will be inferred from relationship context during analysis + Ok(Arc::new(LogicalPlan::Empty)) + } +} + +/// Check if a plan contains a denormalized ViewScan. +/// +/// Denormalized nodes are virtual nodes whose properties are stored on edge tables +/// rather than having their own dedicated table. +/// +/// # Arguments +/// * `plan` - The logical plan to check +/// +/// # Returns +/// * `true` if the plan is a ViewScan with `is_denormalized = true` +/// * `false` otherwise +pub fn is_denormalized_scan(plan: &Arc) -> bool { + let result = match plan.as_ref() { + LogicalPlan::ViewScan(view_scan) => { + crate::debug_print!( + "is_denormalized_scan: ViewScan.is_denormalized = {} for table '{}'", + view_scan.is_denormalized, + view_scan.source_table + ); + view_scan.is_denormalized + } + _ => { + crate::debug_print!("is_denormalized_scan: Not a ViewScan, returning false"); + false + } + }; + crate::debug_print!("is_denormalized_scan: returning {}", result); + result +} + +/// Check if a node label is denormalized by looking up the schema. +/// +/// Returns true if the node is denormalized (exists only in edge context). +/// +/// # Arguments +/// * `label` - Optional node label to check +/// * `plan_ctx` - Planning context with schema information +/// +/// # Returns +/// * `true` if the label exists in schema and is marked as denormalized +/// * `false` if label is None, not found, or not denormalized +pub fn is_label_denormalized(label: &Option, plan_ctx: &PlanCtx) -> bool { + if let Some(label_str) = label { + let schema = plan_ctx.schema(); + if let Ok(node_schema) = schema.node_schema(label_str) { + crate::debug_print!( + "is_label_denormalized: label '{}' is_denormalized = {}", + label_str, + node_schema.is_denormalized + ); + return node_schema.is_denormalized; + } + } + crate::debug_print!( + "is_label_denormalized: label {:?} not found or no label, returning false", + label + ); + false +} + +/// Convert property patterns from MATCH clauses into filter expressions. +/// +/// Property patterns like `{name: "Alice", age: 30}` are converted to +/// equality filter expressions like `n.name = "Alice" AND n.age = 30`. +/// +/// # Arguments +/// * `props` - Vector of Property values from the AST +/// * `node_alias` - The table alias to use for property access +/// +/// # Returns +/// * `Ok(Vec)` - Vector of equality filter expressions +/// * `Err(FoundParamInProperties)` - If a parameter reference is found +pub fn convert_properties( + props: Vec, + node_alias: &str, +) -> LogicalPlanResult> { + let mut extracted_props: Vec = vec![]; + + for prop in props { + match prop { + Property::PropertyKV(property_kvpair) => { + let op_app = LogicalExpr::OperatorApplicationExp(OperatorApplication { + operator: Operator::Equal, + operands: vec![ + LogicalExpr::PropertyAccessExp(PropertyAccess { + table_alias: TableAlias(node_alias.to_string()), + column: PropertyValue::Column(property_kvpair.key.to_string()), + }), + property_kvpair.value, + ], + }); + extracted_props.push(op_app); + } + Property::Param(_) => return Err(LogicalPlanError::FoundParamInProperties), + } + } + + Ok(extracted_props) +} + +/// Convert all property patterns in plan_ctx to filter expressions. +/// +/// Iterates through all table contexts in plan_ctx, extracts property patterns, +/// converts them to filter expressions, and appends them as filters. +/// +/// # Arguments +/// * `plan_ctx` - Mutable planning context +/// +/// # Returns +/// * `Ok(())` on success +/// * `Err(FoundParamInProperties)` if a parameter reference is found +pub fn convert_properties_to_operator_application(plan_ctx: &mut PlanCtx) -> LogicalPlanResult<()> { + for (alias, table_ctx) in plan_ctx.get_mut_alias_table_ctx_map().iter_mut() { + let mut extracted_props = convert_properties(table_ctx.get_and_clear_properties(), alias)?; + table_ctx.append_filters(&mut extracted_props); + } + Ok(()) +} + +/// Compute the variable_length specification for a relationship pattern. +/// +/// This normalizes VLP handling: +/// - `*1` on single-type relationship → None (same as regular) +/// - `*1` on multi-type relationship → Some(*1) (needed for polymorphic handling) +/// - Explicit VLP ranges → Some(spec) +/// - Multi-type without VLP → implicit *1 +/// - Single-type without VLP → None +/// +/// # Arguments +/// * `rel` - The relationship pattern from the AST +/// * `rel_labels` - The relationship type labels (if any) +pub fn compute_variable_length( + rel: &ast::RelationshipPattern, + rel_labels: &Option>, +) -> Option { + let is_multi_type = rel_labels.as_ref().map_or(false, |labels| labels.len() > 1); + + if let Some(vlp) = rel.variable_length.clone() { + let spec: VariableLengthSpec = vlp.into(); + let is_exact_one_hop = spec.min_hops == Some(1) && spec.max_hops == Some(1); + if is_exact_one_hop && !is_multi_type { + None // *1 single-type is same as regular relationship + } else { + Some(spec) // Keep *1 for multi-type or ranges + } + } else if is_multi_type { + // Add implicit *1 for multi-type without VLP (polymorphic end node) + Some(VariableLengthSpec { + min_hops: Some(1), + max_hops: Some(1), + }) + } else { + None // Single-type, no VLP + } +} + +/// Compute left/right node labels for relationship lookup based on direction. +/// +/// For relationship type inference and polymorphic resolution, we need the +/// labels of nodes in the order [from_node, to_node] regardless of how they +/// appear in the pattern. +/// +/// # Arguments +/// * `direction` - The relationship direction from the AST +/// * `start_label` - Label of the pattern's start node (left in pattern) +/// * `end_label` - Label of the pattern's end node (right in pattern) +/// +/// # Returns +/// Tuple of (left_node_label, right_node_label) where: +/// - left is the "from" node in the relationship definition +/// - right is the "to" node in the relationship definition +pub fn compute_rel_node_labels( + direction: &ast::Direction, + start_label: &Option, + end_label: &Option, +) -> (Option, Option) { + match direction { + ast::Direction::Outgoing => (start_label.clone(), end_label.clone()), + ast::Direction::Incoming => (end_label.clone(), start_label.clone()), + ast::Direction::Either => (start_label.clone(), end_label.clone()), + } +} + +/// Compute left/right connection aliases based on relationship direction. +/// +/// Similar to `compute_rel_node_labels` but for string aliases rather than Option labels. +/// Used to determine which node alias serves as the "from" and "to" connection for JOIN generation. +/// +/// # Arguments +/// * `direction` - The relationship direction from the AST +/// * `start_alias` - Alias of the pattern's start node (left in pattern) +/// * `end_alias` - Alias of the pattern's end node (right in pattern) +/// +/// # Returns +/// Tuple of (left_connection, right_connection) based on direction +pub fn compute_connection_aliases( + direction: &ast::Direction, + start_alias: &str, + end_alias: &str, +) -> (String, String) { + match direction { + ast::Direction::Outgoing => (start_alias.to_string(), end_alias.to_string()), + ast::Direction::Incoming => (end_alias.to_string(), start_alias.to_string()), + ast::Direction::Either => (start_alias.to_string(), end_alias.to_string()), + } +} + +/// Register a node in the planning context's table context map. +/// +/// This consolidates the common pattern of `plan_ctx.insert_table_ctx(alias, TableCtx::build(...))` +/// that appears multiple times in MATCH clause processing. +/// +/// # Arguments +/// * `plan_ctx` - The planning context +/// * `node_alias` - The node's alias +/// * `node_label` - The node's label (if any) +/// * `node_props` - Properties from the node pattern +/// * `is_explicitly_named` - Whether the node has an explicit name in the query +pub fn register_node_in_context( + plan_ctx: &mut PlanCtx, + node_alias: &str, + node_label: &Option, + node_props: Vec, + is_explicitly_named: bool, +) { + plan_ctx.insert_table_ctx( + node_alias.to_string(), + TableCtx::build( + node_alias.to_string(), + node_label.clone().map(|l| vec![l]), + node_props, + false, // is_rel + is_explicitly_named, + ), + ); +} + +/// Generate a scan for a node, handling denormalized cases. +/// +/// If the node label is denormalized (embedded in an edge table), returns an Empty scan. +/// Otherwise generates a regular ViewScan via `generate_scan()`. +/// +/// # Arguments +/// * `node_alias` - The node's alias +/// * `node_label` - The node's label (if any) +/// * `plan_ctx` - The planning context +/// +/// # Returns +/// Tuple of (scan_plan, is_denormalized) +pub fn generate_denormalization_aware_scan( + node_alias: &str, + node_label: &Option, + plan_ctx: &mut PlanCtx, +) -> LogicalPlanResult<(Arc, bool)> { + if is_label_denormalized(node_label, plan_ctx) { + crate::debug_print!( + "=== Node '{}' is DENORMALIZED, creating Empty scan ===", + node_alias + ); + Ok((Arc::new(LogicalPlan::Empty), true)) + } else { + let scan = generate_scan(node_alias.to_string(), node_label.clone(), plan_ctx)?; + let is_denorm = is_denormalized_scan(&scan); + Ok((scan, is_denorm)) + } +} + +/// Determine anchor connection for OPTIONAL MATCH patterns. +/// +/// For OPTIONAL MATCH, we need to identify which node serves as the "anchor" - +/// the node that already exists in the base MATCH pattern. The other node is +/// the one being optionally matched. +/// +/// # Arguments +/// * `plan_ctx` - The planning context +/// * `is_optional` - Whether this is an OPTIONAL MATCH pattern +/// * `left_conn` - The left connection alias +/// * `right_conn` - The right connection alias +/// +/// # Returns +/// Some(alias) of the anchor connection, or None if not OPTIONAL MATCH or +/// if anchor cannot be determined +pub fn determine_optional_anchor( + plan_ctx: &PlanCtx, + is_optional: bool, + left_conn: &str, + right_conn: &str, +) -> Option { + if !is_optional { + return None; + } + + let alias_map = plan_ctx.get_alias_table_ctx_map(); + if alias_map.contains_key(left_conn) && !alias_map.contains_key(right_conn) { + // left_conn exists, right_conn is new -> left_conn is anchor + Some(left_conn.to_string()) + } else if alias_map.contains_key(right_conn) && !alias_map.contains_key(left_conn) { + // right_conn exists, left_conn is new -> right_conn is anchor + Some(right_conn.to_string()) + } else { + // Both exist or neither exists - shouldn't happen in normal OPTIONAL MATCH + crate::debug_print!( + "WARN: OPTIONAL MATCH could not determine anchor: left_conn={}, right_conn={}", + left_conn, + right_conn + ); + None + } +} + +/// Register a path variable in the PlanCtx with full TypedVariable::Path metadata. +/// +/// This is extracted from the duplicated code in traverse_connected_pattern_with_mode. +/// It registers both the TypedVariable::Path and a TableCtx for backward compatibility. +/// +/// # Arguments +/// * `plan_ctx` - The planning context to register the path variable in +/// * `path_var` - The name of the path variable +/// * `graph_rel` - The GraphRel node containing the path information +/// * `rel_alias` - The relationship alias +/// * `shortest_path_mode` - Whether this is a shortest path query +pub fn register_path_variable( + plan_ctx: &mut PlanCtx, + path_var: &str, + graph_rel: &GraphRel, + rel_alias: &str, + shortest_path_mode: Option<&ShortestPathMode>, +) { + // Extract length bounds from graph_rel.variable_length for TypedVariable::Path + let length_bounds = graph_rel + .variable_length + .as_ref() + .map(|vlp| (vlp.min_hops, vlp.max_hops)); + + // First register TypedVariable::Path with full metadata + plan_ctx.define_path( + path_var.to_string(), + Some(graph_rel.left_connection.clone()), // start_node + Some(graph_rel.right_connection.clone()), // end_node + Some(rel_alias.to_string()), // relationship + length_bounds, // length bounds from VLP spec + shortest_path_mode.is_some(), // is_shortest_path + ); + + // Then register TableCtx for backward compatibility with code that uses alias_table_ctx_map + plan_ctx.insert_table_ctx( + path_var.to_string(), + TableCtx::build( + path_var.to_string(), + None, // Path variables don't have labels + vec![], // Path variables don't have properties + false, // Not a relationship + true, // Explicitly named by user + ), + ); + + log::info!( + "📍 Registered path variable '{}' with TypedVariable::Path (start={}, end={}, bounds={:?})", + path_var, + graph_rel.left_connection, + graph_rel.right_connection, + length_bounds + ); +} + +/// Register a relationship in the plan context with connected node labels. +/// +/// This consolidates the common pattern of: +/// 1. insert_table_ctx for the relationship +/// 2. set_connected_nodes for polymorphic resolution +/// 3. register_path_variable if path_variable is present +/// +/// # Arguments +/// * `plan_ctx` - The planning context +/// * `rel_alias` - The relationship alias +/// * `rel_labels` - The relationship type labels (if any) +/// * `rel_properties` - Properties from the relationship pattern +/// * `is_named` - Whether the relationship has an explicit name in the query +/// * `left_node_label` - Label of the left/from node (for polymorphic resolution) +/// * `right_node_label` - Label of the right/to node (for polymorphic resolution) +/// * `graph_rel` - The GraphRel node (for path variable registration) +/// * `path_variable` - Optional path variable name +/// * `shortest_path_mode` - Whether this is a shortest path query +pub fn register_relationship_in_context( + plan_ctx: &mut PlanCtx, + rel_alias: &str, + rel_labels: Option>, + rel_properties: Vec, + is_named: bool, + left_node_label: &Option, + right_node_label: &Option, + graph_rel: &GraphRel, + path_variable: Option<&str>, + shortest_path_mode: Option<&ShortestPathMode>, +) { + // 1. Register the relationship TableCtx + plan_ctx.insert_table_ctx( + rel_alias.to_string(), + TableCtx::build( + rel_alias.to_string(), + rel_labels, + rel_properties, + true, // is_relation + is_named, + ), + ); + + // 2. Set connected node labels for polymorphic relationship resolution + if let Some(rel_table_ctx) = plan_ctx.get_mut_table_ctx_opt(rel_alias) { + rel_table_ctx.set_connected_nodes(left_node_label.clone(), right_node_label.clone()); + } + + // 3. Register path variable if present + if let Some(path_var) = path_variable { + register_path_variable(plan_ctx, path_var, graph_rel, rel_alias, shortest_path_mode); + } +} +#[cfg(test)] +mod tests { + use super::*; + use crate::query_planner::logical_expr::{Literal, PropertyKVPair}; + + #[test] + fn test_convert_properties_with_kv_pairs() { + let properties = vec![ + Property::PropertyKV(PropertyKVPair { + key: "name".to_string(), + value: LogicalExpr::Literal(Literal::String("Alice".to_string())), + }), + Property::PropertyKV(PropertyKVPair { + key: "age".to_string(), + value: LogicalExpr::Literal(Literal::Integer(30)), + }), + ]; + + let result = convert_properties(properties, "n").unwrap(); + assert_eq!(result.len(), 2); + + // Check first property conversion + match &result[0] { + LogicalExpr::OperatorApplicationExp(op) => { + assert_eq!(op.operator, Operator::Equal); + assert_eq!(op.operands.len(), 2); + } + _ => panic!("Expected OperatorApplicationExp"), + } + } + + #[test] + fn test_convert_properties_with_param_returns_error() { + let properties = vec![Property::Param("param1".to_string())]; + + let result = convert_properties(properties, "n"); + assert!(result.is_err()); + match result.unwrap_err() { + LogicalPlanError::FoundParamInProperties => {} + _ => panic!("Expected FoundParamInProperties error"), + } + } + + #[test] + fn test_convert_properties_empty_list() { + let properties = vec![]; + let result = convert_properties(properties, "n").unwrap(); + assert_eq!(result.len(), 0); + } +} diff --git a/src/query_planner/logical_plan/match_clause/mod.rs b/src/query_planner/logical_plan/match_clause/mod.rs new file mode 100644 index 00000000..5a8f8f19 --- /dev/null +++ b/src/query_planner/logical_plan/match_clause/mod.rs @@ -0,0 +1,47 @@ +//! MATCH clause processing for Cypher queries. +//! +//! This module handles the translation of Cypher MATCH patterns into logical plans. +//! It supports: +//! - Node patterns: `(n:Label)` +//! - Relationship patterns: `(a)-[r:TYPE]->(b)` +//! - Variable-length paths: `(a)-[*1..3]->(b)` +//! - Shortest path: `shortestPath((a)-[*]->(b))` +//! +//! # Architecture +//! +//! The module is organized into focused submodules: +//! - `traversal.rs` - Core MATCH clause evaluation and pattern traversal +//! - `view_scan.rs` - ViewScan generation for nodes and relationships +//! - `type_inference.rs` - Node label and relationship type inference +//! - `helpers.rs` - Utility functions (property conversion, scan helpers, etc.) +//! - `errors.rs` - Error types for match clause processing +//! - `tests.rs` - Unit tests for match clause processing + +mod errors; +mod helpers; +mod traversal; +mod type_inference; +mod view_scan; + +#[cfg(test)] +mod tests; + +// Re-export all public items from traversal module +pub use traversal::*; + +// Re-export type inference functions +pub use type_inference::{infer_node_label_from_schema, infer_relationship_type_from_nodes}; + +// Re-export ViewScan generation functions +pub use view_scan::{ + generate_relationship_center, try_generate_relationship_view_scan, try_generate_view_scan, +}; + +// Re-export helper functions +pub use helpers::{ + compute_connection_aliases, compute_rel_node_labels, compute_variable_length, + convert_properties, convert_properties_to_operator_application, determine_optional_anchor, + generate_denormalization_aware_scan, generate_scan, is_denormalized_scan, + is_label_denormalized, register_node_in_context, register_path_variable, + register_relationship_in_context, +}; diff --git a/src/query_planner/logical_plan/match_clause/tests.rs b/src/query_planner/logical_plan/match_clause/tests.rs new file mode 100644 index 00000000..96d80a4e --- /dev/null +++ b/src/query_planner/logical_plan/match_clause/tests.rs @@ -0,0 +1,1554 @@ +//! Unit tests for MATCH clause processing. +//! +//! These tests cover: +//! - Property conversion to operator applications +//! - Node pattern traversal +//! - Connected pattern traversal +//! - Relationship type inference +//! - Node label inference from schema +//! - Polymorphic edge handling + +use std::cell::RefCell; +use std::rc::Rc; +use std::sync::Arc; + +use crate::graph_catalog::expression_parser::PropertyValue; +use crate::graph_catalog::graph_schema::GraphSchema; +use crate::open_cypher_parser::ast; +use crate::query_planner::logical_expr::{ + Direction, Literal, LogicalExpr, Operator, Property, PropertyKVPair, +}; +use crate::query_planner::logical_plan::{ + errors::LogicalPlanError, generate_id, GraphNode, GraphRel, LogicalPlan, ViewScan, +}; +use crate::query_planner::plan_ctx::{PlanCtx, TableCtx}; + +// Import from parent module (match_clause) +use super::{ + convert_properties, convert_properties_to_operator_application, generate_scan, + infer_node_label_from_schema, infer_relationship_type_from_nodes, +}; + +// Import internal functions from traversal module +use super::traversal::{evaluate_match_clause, traverse_connected_pattern, traverse_node_pattern}; + +#[test] +fn test_convert_properties_with_kv_pairs() { + let properties = vec![ + Property::PropertyKV(PropertyKVPair { + key: "name".to_string(), + value: LogicalExpr::Literal(Literal::String("John".to_string())), + }), + Property::PropertyKV(PropertyKVPair { + key: "age".to_string(), + value: LogicalExpr::Literal(Literal::Integer(30)), + }), + ]; + + let result = convert_properties(properties, "n").unwrap(); + assert_eq!(result.len(), 2); + + // Check first property conversion + match &result[0] { + LogicalExpr::OperatorApplicationExp(op_app) => { + assert_eq!(op_app.operator, Operator::Equal); + assert_eq!(op_app.operands.len(), 2); + match &op_app.operands[0] { + LogicalExpr::PropertyAccessExp(prop) => { + assert_eq!(prop.table_alias.0, "n"); + match &prop.column { + PropertyValue::Column(col) => assert_eq!(col, "name"), + _ => panic!("Expected Column property"), + } + } + _ => panic!("Expected PropertyAccessExp"), + } + match &op_app.operands[1] { + LogicalExpr::Literal(Literal::String(s)) => assert_eq!(s, "John"), + _ => panic!("Expected String literal"), + } + } + _ => panic!("Expected OperatorApplication"), + } + + // Check second property conversion + match &result[1] { + LogicalExpr::OperatorApplicationExp(op_app) => { + assert_eq!(op_app.operator, Operator::Equal); + match &op_app.operands[1] { + LogicalExpr::Literal(Literal::Integer(age)) => assert_eq!(*age, 30), + _ => panic!("Expected Integer literal"), + } + } + _ => panic!("Expected OperatorApplication"), + } +} + +#[test] +fn test_convert_properties_with_param_returns_error() { + let properties = vec![ + Property::PropertyKV(PropertyKVPair { + key: "name".to_string(), + value: LogicalExpr::Literal(Literal::String("Alice".to_string())), + }), + Property::Param("param1".to_string()), + ]; + + let result = convert_properties(properties, "n"); + assert!(result.is_err()); + match result.unwrap_err() { + LogicalPlanError::FoundParamInProperties => (), // Expected error + _ => panic!("Expected FoundParamInProperties error"), + } +} + +#[test] +fn test_convert_properties_empty_list() { + let properties = vec![]; + let result = convert_properties(properties, "n").unwrap(); + assert_eq!(result.len(), 0); +} + +#[test] +fn test_generate_id_uniqueness() { + let id1 = generate_id(); + let id2 = generate_id(); + + // IDs should be unique + assert_ne!(id1, id2); + + // IDs should start with 't' (simple format: t1, t2, t3...) + assert!(id1.starts_with('t')); + assert!(id2.starts_with('t')); + + // IDs should be reasonable length (t1 to t999999+) + assert!(id1.len() >= 2 && id1.len() < 10); + assert!(id2.len() >= 2 && id2.len() < 10); +} + +#[test] +fn test_traverse_node_pattern_new_node() { + let graph_schema = create_test_schema_with_relationships(); + let mut plan_ctx = PlanCtx::new(Arc::new(graph_schema)); + let initial_plan = Arc::new(LogicalPlan::Empty); + + let node_pattern = ast::NodePattern { + name: Some("customer"), + labels: Some(vec!["Person"]), + properties: Some(vec![ast::Property::PropertyKV(ast::PropertyKVPair { + key: "city", + value: ast::Expression::Literal(ast::Literal::String("Boston")), + })]), + }; + + let result = traverse_node_pattern(&node_pattern, initial_plan.clone(), &mut plan_ctx).unwrap(); + + // Should return a GraphNode plan + match result.as_ref() { + LogicalPlan::GraphNode(graph_node) => { + assert_eq!(graph_node.alias, "customer"); + // Input should be a ViewScan + match graph_node.input.as_ref() { + LogicalPlan::ViewScan(_view_scan) => { + // ViewScan created successfully via try_generate_view_scan + // This happens when GLOBAL_GRAPH_SCHEMA is available + } + _ => panic!("Expected ViewScan as input"), + } + } + _ => panic!("Expected GraphNode"), + } + + // Should have added entry to plan context + let table_ctx = plan_ctx.get_table_ctx("customer").unwrap(); + assert_eq!(table_ctx.get_label_opt(), Some("Person".to_string())); + // Note: properties get moved to filters after convert_properties_to_operator_application + assert!(table_ctx.is_explicit_alias()); +} + +#[test] +fn test_traverse_node_pattern_existing_node() { + let mut plan_ctx = PlanCtx::default(); + let initial_plan = Arc::new(LogicalPlan::Empty); + + // Pre-populate plan context with existing node + plan_ctx.insert_table_ctx( + "customer".to_string(), + TableCtx::build( + "customer".to_string(), + Some("User".to_string()).map(|l| vec![l]), + vec![], + false, + true, + ), + ); + + let node_pattern = ast::NodePattern { + name: Some("customer"), + labels: Some(vec!["Person"]), // Different label + properties: Some(vec![ast::Property::PropertyKV(ast::PropertyKVPair { + key: "age", + value: ast::Expression::Literal(ast::Literal::Integer(25)), + })]), + }; + + let result = traverse_node_pattern(&node_pattern, initial_plan.clone(), &mut plan_ctx).unwrap(); + + // Should return the same plan (not create new GraphNode) + assert_eq!(result, initial_plan); + + // Should have updated the existing table context + let table_ctx = plan_ctx.get_table_ctx("customer").unwrap(); + assert_eq!(table_ctx.get_label_opt(), Some("Person".to_string())); // Label should be updated + // Note: properties get moved to filters after convert_properties_to_operator_application +} + +#[test] +fn test_traverse_node_pattern_empty_node_error() { + let mut plan_ctx = PlanCtx::default(); + let initial_plan = Arc::new(LogicalPlan::Empty); + + let node_pattern = ast::NodePattern { + name: None, // Empty node + labels: Some(vec!["Person"]), + properties: None, + }; + + let result = traverse_node_pattern(&node_pattern, initial_plan, &mut plan_ctx); + assert!(result.is_err()); + match result.unwrap_err() { + LogicalPlanError::EmptyNode => (), // Expected error + _ => panic!("Expected EmptyNode error"), + } +} + +#[test] +fn test_traverse_connected_pattern_new_connection() { + let graph_schema = create_test_schema_with_relationships(); + let mut plan_ctx = PlanCtx::new(Arc::new(graph_schema)); + let initial_plan = Arc::new(LogicalPlan::Empty); + + let start_node = ast::NodePattern { + name: Some("user"), + labels: Some(vec!["Person"]), + properties: None, + }; + + let end_node = ast::NodePattern { + name: Some("company"), + labels: Some(vec!["Organization"]), + properties: None, + }; + + let relationship = ast::RelationshipPattern { + name: Some("works_at"), + direction: ast::Direction::Outgoing, + labels: Some(vec!["WORKS_AT"]), + properties: None, + variable_length: None, + }; + + let connected_pattern = ast::ConnectedPattern { + start_node: Rc::new(RefCell::new(start_node)), + relationship, + end_node: Rc::new(RefCell::new(end_node)), + }; + + let connected_patterns = vec![connected_pattern]; + + let result = + traverse_connected_pattern(&connected_patterns, initial_plan, &mut plan_ctx, 0).unwrap(); + + // Should return a GraphRel plan + match result.as_ref() { + LogicalPlan::GraphRel(graph_rel) => { + assert_eq!(graph_rel.alias, "works_at"); + assert_eq!(graph_rel.direction, Direction::Outgoing); + assert_eq!(graph_rel.left_connection, "user"); // Left node is the start node (user) for outgoing relationships + assert_eq!(graph_rel.right_connection, "company"); // Right node is the end node (company) for outgoing relationships + assert!(!graph_rel.is_rel_anchor); + + // Check left side (start node for outgoing relationships) + match graph_rel.left.as_ref() { + LogicalPlan::GraphNode(left_node) => { + assert_eq!(left_node.alias, "user"); + } + _ => panic!("Expected GraphNode on left"), + } + + // Check right side (end node for outgoing relationships) + match graph_rel.right.as_ref() { + LogicalPlan::GraphNode(right_node) => { + assert_eq!(right_node.alias, "company"); + } + _ => panic!("Expected GraphNode on right"), + } + } + _ => panic!("Expected GraphRel"), + } + + // Should have added entries to plan context + assert!(plan_ctx.get_table_ctx("user").is_ok()); + assert!(plan_ctx.get_table_ctx("company").is_ok()); + assert!(plan_ctx.get_table_ctx("works_at").is_ok()); + + let rel_ctx = plan_ctx.get_table_ctx("works_at").unwrap(); + assert!(rel_ctx.is_relation()); +} + +#[test] +fn test_traverse_connected_pattern_with_existing_start_node() { + let graph_schema = create_test_schema_with_relationships(); + let mut plan_ctx = PlanCtx::new(Arc::new(graph_schema)); + let initial_plan = Arc::new(LogicalPlan::Empty); + + // Pre-populate with existing start node + plan_ctx.insert_table_ctx( + "user".to_string(), + TableCtx::build( + "user".to_string(), + Some("Person".to_string()).map(|l| vec![l]), + vec![], + false, + true, + ), + ); + + let start_node = ast::NodePattern { + name: Some("user"), // This exists in plan_ctx + labels: Some(vec!["Employee"]), // Different label + properties: None, + }; + + let end_node = ast::NodePattern { + name: Some("project"), + labels: Some(vec!["Project"]), + properties: None, + }; + + let relationship = ast::RelationshipPattern { + name: Some("assigned_to"), + direction: ast::Direction::Incoming, + labels: Some(vec!["ASSIGNED_TO"]), + properties: None, + variable_length: None, + }; + + let connected_pattern = ast::ConnectedPattern { + start_node: Rc::new(RefCell::new(start_node)), + relationship, + end_node: Rc::new(RefCell::new(end_node)), + }; + + let connected_patterns = vec![connected_pattern]; + + let result = + traverse_connected_pattern(&connected_patterns, initial_plan, &mut plan_ctx, 0).unwrap(); + + // Should return a GraphRel plan with different structure + match result.as_ref() { + LogicalPlan::GraphRel(graph_rel) => { + assert_eq!(graph_rel.alias, "assigned_to"); + assert_eq!(graph_rel.direction, Direction::Incoming); + assert_eq!(graph_rel.left_connection, "project"); + assert_eq!(graph_rel.right_connection, "user"); + + // Left should be the new end node + match graph_rel.left.as_ref() { + LogicalPlan::GraphNode(left_node) => { + assert_eq!(left_node.alias, "project"); + } + _ => panic!("Expected GraphNode on left"), + } + } + _ => panic!("Expected GraphRel"), + } + + // Existing start node should have updated label + let user_ctx = plan_ctx.get_table_ctx("user").unwrap(); + assert_eq!(user_ctx.get_label_opt(), Some("Employee".to_string())); +} + +// Test removed: DisconnectedPatternFound error no longer exists +// as of commit b015cf0 which allows disconnected comma patterns +// with WHERE clause predicates for cross-table correlation + +#[test] +fn test_evaluate_match_clause_with_node_and_connected_pattern() { + let graph_schema = create_test_schema_with_relationships(); + let mut plan_ctx = PlanCtx::new(Arc::new(graph_schema)); + let initial_plan = Arc::new(LogicalPlan::Empty); + + // Create a match clause with both node pattern and connected pattern + let node_pattern = ast::NodePattern { + name: Some("admin"), + labels: Some(vec!["User"]), + properties: Some(vec![ast::Property::PropertyKV(ast::PropertyKVPair { + key: "role", + value: ast::Expression::Literal(ast::Literal::String("administrator")), + })]), + }; + + let start_node = ast::NodePattern { + name: Some("admin"), // Same as above - should connect + labels: None, + properties: None, + }; + + let end_node = ast::NodePattern { + name: Some("system"), + labels: Some(vec!["System"]), + properties: None, + }; + + let relationship = ast::RelationshipPattern { + name: Some("manages"), + direction: ast::Direction::Outgoing, + labels: Some(vec!["MANAGES"]), + properties: None, + variable_length: None, + }; + + let connected_pattern = ast::ConnectedPattern { + start_node: Rc::new(RefCell::new(start_node)), + relationship, + end_node: Rc::new(RefCell::new(end_node)), + }; + + let match_clause = ast::MatchClause { + path_patterns: vec![ + (None, ast::PathPattern::Node(node_pattern)), + ( + None, + ast::PathPattern::ConnectedPattern(vec![connected_pattern]), + ), + ], + where_clause: None, + }; + + let result = evaluate_match_clause(&match_clause, initial_plan, &mut plan_ctx).unwrap(); + + // Should return a GraphRel plan + match result.as_ref() { + LogicalPlan::GraphRel(graph_rel) => { + assert_eq!(graph_rel.alias, "manages"); + assert_eq!(graph_rel.direction, Direction::Outgoing); + } + _ => panic!("Expected GraphRel at top level"), + } + + // Properties should have been converted to filters + let admin_ctx = plan_ctx.get_table_ctx("admin").unwrap(); + assert_eq!(admin_ctx.get_filters().len(), 1); +} + +#[test] +fn test_convert_properties_to_operator_application() { + let mut plan_ctx = PlanCtx::default(); + + // Add table context with properties + let properties = vec![Property::PropertyKV(PropertyKVPair { + key: "status".to_string(), + value: LogicalExpr::Literal(Literal::String("active".to_string())), + })]; + + let table_ctx = TableCtx::build( + "user".to_string(), + Some("Person".to_string()).map(|l| vec![l]), + properties, + false, + true, + ); + + plan_ctx.insert_table_ctx("user".to_string(), table_ctx); + + // Before conversion, table should have no filters + let table_ctx_before = plan_ctx.get_table_ctx("user").unwrap(); + assert_eq!(table_ctx_before.get_filters().len(), 0); + + // Convert properties + let result = convert_properties_to_operator_application(&mut plan_ctx); + assert!(result.is_ok()); + + // After conversion, properties should be moved to filters + let table_ctx_after = plan_ctx.get_table_ctx("user").unwrap(); + assert_eq!(table_ctx_after.get_filters().len(), 1); // Filter added + + // Check the filter predicate + match &table_ctx_after.get_filters()[0] { + LogicalExpr::OperatorApplicationExp(op_app) => { + assert_eq!(op_app.operator, Operator::Equal); + match &op_app.operands[0] { + LogicalExpr::PropertyAccessExp(prop_access) => { + assert_eq!(prop_access.table_alias.0, "user"); + assert_eq!(prop_access.column.raw(), "status"); + } + _ => panic!("Expected PropertyAccessExp"), + } + } + _ => panic!("Expected OperatorApplication"), + } +} + +#[test] +fn test_generate_scan() { + // Create schema with Customer node + use crate::graph_catalog::graph_schema::{GraphSchema, NodeIdSchema, NodeSchema}; + use std::collections::HashMap; + + let mut nodes = HashMap::new(); + nodes.insert( + "Customer".to_string(), + NodeSchema { + database: "test_db".to_string(), + table_name: "customers".to_string(), + column_names: vec!["id".to_string(), "name".to_string()], + primary_keys: "id".to_string(), + node_id: NodeIdSchema::single("id".to_string(), "UInt64".to_string()), + property_mappings: HashMap::new(), + view_parameters: None, + engine: None, + use_final: None, + filter: None, + is_denormalized: false, + from_properties: None, + to_properties: None, + denormalized_source_table: None, + label_column: None, + label_value: None, + }, + ); + + let schema = Arc::new(GraphSchema::build( + 1, + "test".to_string(), + nodes, + HashMap::new(), + )); + let plan_ctx = PlanCtx::new(schema); + + let scan = generate_scan( + "customers".to_string(), + Some("Customer".to_string()), + &plan_ctx, + ) + .unwrap(); + + match scan.as_ref() { + LogicalPlan::ViewScan(scan_plan) => { + assert_eq!(scan_plan.source_table, "test_db.customers"); + // The label is "Customer" but ViewScan doesn't store it directly + } + _ => panic!("Expected ViewScan plan"), + } +} + +// ========================================== +// Tests for relationship type inference +// ========================================== + +fn create_test_schema_with_relationships() -> GraphSchema { + use crate::graph_catalog::graph_schema::{NodeIdSchema, NodeSchema, RelationshipSchema}; + use std::collections::HashMap; + + let mut nodes = HashMap::new(); + nodes.insert( + "Airport".to_string(), + NodeSchema { + database: "test_db".to_string(), + table_name: "airports".to_string(), + column_names: vec!["id".to_string(), "code".to_string()], + primary_keys: "id".to_string(), + node_id: NodeIdSchema::single("id".to_string(), "UInt64".to_string()), + property_mappings: HashMap::new(), + view_parameters: None, + engine: None, + use_final: None, + filter: None, + is_denormalized: false, + from_properties: None, + to_properties: None, + denormalized_source_table: None, + label_column: None, + label_value: None, + }, + ); + nodes.insert( + "User".to_string(), + NodeSchema { + database: "test_db".to_string(), + table_name: "users".to_string(), + column_names: vec!["id".to_string(), "name".to_string()], + primary_keys: "id".to_string(), + node_id: NodeIdSchema::single("id".to_string(), "UInt64".to_string()), + property_mappings: HashMap::new(), + view_parameters: None, + engine: None, + use_final: None, + filter: None, + is_denormalized: false, + from_properties: None, + to_properties: None, + denormalized_source_table: None, + label_column: None, + label_value: None, + }, + ); + nodes.insert( + "Post".to_string(), + NodeSchema { + database: "test_db".to_string(), + table_name: "posts".to_string(), + column_names: vec!["id".to_string(), "title".to_string()], + primary_keys: "id".to_string(), + node_id: NodeIdSchema::single("id".to_string(), "UInt64".to_string()), + property_mappings: HashMap::new(), + view_parameters: None, + engine: None, + use_final: None, + filter: None, + is_denormalized: false, + from_properties: None, + to_properties: None, + denormalized_source_table: None, + label_column: None, + label_value: None, + }, + ); + + let mut rels = HashMap::new(); + rels.insert( + "FLIGHT".to_string(), + RelationshipSchema { + database: "test_db".to_string(), + table_name: "flights".to_string(), + column_names: vec!["from_airport".to_string(), "to_airport".to_string()], + from_node: "Airport".to_string(), + to_node: "Airport".to_string(), + from_node_table: "airports".to_string(), + to_node_table: "airports".to_string(), + from_id: "from_airport".to_string(), + to_id: "to_airport".to_string(), + from_node_id_dtype: "UInt64".to_string(), + to_node_id_dtype: "UInt64".to_string(), + property_mappings: HashMap::new(), + view_parameters: None, + engine: None, + use_final: None, + filter: None, + edge_id: None, + type_column: None, + from_label_column: None, + to_label_column: None, + from_label_values: None, + to_label_values: None, + from_node_properties: None, + to_node_properties: None, + is_fk_edge: false, + constraints: None, + }, + ); + rels.insert( + "LIKES".to_string(), + RelationshipSchema { + database: "test_db".to_string(), + table_name: "likes".to_string(), + column_names: vec!["user_id".to_string(), "post_id".to_string()], + from_node: "User".to_string(), + to_node: "Post".to_string(), + from_node_table: "users".to_string(), + to_node_table: "posts".to_string(), + from_id: "user_id".to_string(), + to_id: "post_id".to_string(), + from_node_id_dtype: "UInt64".to_string(), + to_node_id_dtype: "UInt64".to_string(), + property_mappings: HashMap::new(), + view_parameters: None, + engine: None, + use_final: None, + filter: None, + edge_id: None, + type_column: None, + from_label_column: None, + to_label_column: None, + from_label_values: None, + to_label_values: None, + from_node_properties: None, + to_node_properties: None, + is_fk_edge: false, + constraints: None, + }, + ); + rels.insert( + "FOLLOWS".to_string(), + RelationshipSchema { + database: "test_db".to_string(), + table_name: "follows".to_string(), + column_names: vec!["follower_id".to_string(), "followed_id".to_string()], + from_node: "User".to_string(), + to_node: "User".to_string(), + from_node_table: "users".to_string(), + to_node_table: "users".to_string(), + from_id: "follower_id".to_string(), + to_id: "followed_id".to_string(), + from_node_id_dtype: "UInt64".to_string(), + to_node_id_dtype: "UInt64".to_string(), + property_mappings: HashMap::new(), + view_parameters: None, + engine: None, + use_final: None, + filter: None, + edge_id: None, + type_column: None, + from_label_column: None, + to_label_column: None, + from_label_values: None, + to_label_values: None, + from_node_properties: None, + to_node_properties: None, + is_fk_edge: false, + constraints: None, + }, + ); + + // Add missing nodes for tests + nodes.insert( + "Person".to_string(), + NodeSchema { + database: "test_db".to_string(), + table_name: "persons".to_string(), + column_names: vec!["id".to_string(), "name".to_string(), "city".to_string()], + primary_keys: "id".to_string(), + node_id: NodeIdSchema::single("id".to_string(), "UInt64".to_string()), + property_mappings: HashMap::new(), + view_parameters: None, + engine: None, + use_final: None, + filter: None, + is_denormalized: false, + from_properties: None, + to_properties: None, + denormalized_source_table: None, + label_column: None, + label_value: None, + }, + ); + nodes.insert( + "Organization".to_string(), + NodeSchema { + database: "test_db".to_string(), + table_name: "organizations".to_string(), + column_names: vec!["id".to_string(), "name".to_string()], + primary_keys: "id".to_string(), + node_id: NodeIdSchema::single("id".to_string(), "UInt64".to_string()), + property_mappings: HashMap::new(), + view_parameters: None, + engine: None, + use_final: None, + filter: None, + is_denormalized: false, + from_properties: None, + to_properties: None, + denormalized_source_table: None, + label_column: None, + label_value: None, + }, + ); + nodes.insert( + "Employee".to_string(), + NodeSchema { + database: "test_db".to_string(), + table_name: "employees".to_string(), + column_names: vec!["id".to_string(), "name".to_string()], + primary_keys: "id".to_string(), + node_id: NodeIdSchema::single("id".to_string(), "UInt64".to_string()), + property_mappings: HashMap::new(), + view_parameters: None, + engine: None, + use_final: None, + filter: None, + is_denormalized: false, + from_properties: None, + to_properties: None, + denormalized_source_table: None, + label_column: None, + label_value: None, + }, + ); + nodes.insert( + "Project".to_string(), + NodeSchema { + database: "test_db".to_string(), + table_name: "projects".to_string(), + column_names: vec!["id".to_string(), "name".to_string()], + primary_keys: "id".to_string(), + node_id: NodeIdSchema::single("id".to_string(), "UInt64".to_string()), + property_mappings: HashMap::new(), + view_parameters: None, + engine: None, + use_final: None, + filter: None, + is_denormalized: false, + from_properties: None, + to_properties: None, + denormalized_source_table: None, + label_column: None, + label_value: None, + }, + ); + nodes.insert( + "System".to_string(), + NodeSchema { + database: "test_db".to_string(), + table_name: "systems".to_string(), + column_names: vec!["id".to_string(), "name".to_string()], + primary_keys: "id".to_string(), + node_id: NodeIdSchema::single("id".to_string(), "UInt64".to_string()), + property_mappings: HashMap::new(), + view_parameters: None, + engine: None, + use_final: None, + filter: None, + is_denormalized: false, + from_properties: None, + to_properties: None, + denormalized_source_table: None, + label_column: None, + label_value: None, + }, + ); + + // Add missing relationships for tests + rels.insert( + "WORKS_AT".to_string(), + RelationshipSchema { + database: "test_db".to_string(), + table_name: "works_at".to_string(), + column_names: vec!["person_id".to_string(), "org_id".to_string()], + from_node: "Person".to_string(), + to_node: "Organization".to_string(), + from_node_table: "persons".to_string(), + to_node_table: "organizations".to_string(), + from_id: "person_id".to_string(), + to_id: "org_id".to_string(), + from_node_id_dtype: "UInt64".to_string(), + to_node_id_dtype: "UInt64".to_string(), + property_mappings: HashMap::new(), + view_parameters: None, + engine: None, + use_final: None, + filter: None, + edge_id: None, + type_column: None, + from_label_column: None, + to_label_column: None, + from_label_values: None, + to_label_values: None, + from_node_properties: None, + to_node_properties: None, + is_fk_edge: false, + constraints: None, + }, + ); + rels.insert( + "ASSIGNED_TO".to_string(), + RelationshipSchema { + database: "test_db".to_string(), + table_name: "assigned_to".to_string(), + column_names: vec!["emp_id".to_string(), "proj_id".to_string()], + from_node: "Employee".to_string(), + to_node: "Project".to_string(), + from_node_table: "employees".to_string(), + to_node_table: "projects".to_string(), + from_id: "emp_id".to_string(), + to_id: "proj_id".to_string(), + from_node_id_dtype: "UInt64".to_string(), + to_node_id_dtype: "UInt64".to_string(), + property_mappings: HashMap::new(), + view_parameters: None, + engine: None, + use_final: None, + filter: None, + edge_id: None, + type_column: None, + from_label_column: None, + to_label_column: None, + from_label_values: None, + to_label_values: None, + from_node_properties: None, + to_node_properties: None, + is_fk_edge: false, + constraints: None, + }, + ); + rels.insert( + "MANAGES".to_string(), + RelationshipSchema { + database: "test_db".to_string(), + table_name: "manages".to_string(), + column_names: vec!["user_id".to_string(), "system_id".to_string()], + from_node: "User".to_string(), + to_node: "System".to_string(), + from_node_table: "users".to_string(), + to_node_table: "systems".to_string(), + from_id: "user_id".to_string(), + to_id: "system_id".to_string(), + from_node_id_dtype: "UInt64".to_string(), + to_node_id_dtype: "UInt64".to_string(), + property_mappings: HashMap::new(), + view_parameters: None, + engine: None, + use_final: None, + filter: None, + edge_id: None, + type_column: None, + from_label_column: None, + to_label_column: None, + from_label_values: None, + to_label_values: None, + from_node_properties: None, + to_node_properties: None, + is_fk_edge: false, + constraints: None, + }, + ); + + GraphSchema::build(1, "test_db".to_string(), nodes, rels) +} + +fn create_single_relationship_schema() -> GraphSchema { + use crate::graph_catalog::graph_schema::{NodeIdSchema, NodeSchema, RelationshipSchema}; + use std::collections::HashMap; + + let mut nodes = HashMap::new(); + nodes.insert( + "Person".to_string(), + NodeSchema { + database: "test_db".to_string(), + table_name: "persons".to_string(), + column_names: vec!["id".to_string(), "name".to_string()], + primary_keys: "id".to_string(), + node_id: NodeIdSchema::single("id".to_string(), "UInt64".to_string()), + property_mappings: HashMap::new(), + view_parameters: None, + engine: None, + use_final: None, + filter: None, + is_denormalized: false, + from_properties: None, + to_properties: None, + denormalized_source_table: None, + label_column: None, + label_value: None, + }, + ); + + let mut rels = HashMap::new(); + rels.insert( + "KNOWS".to_string(), + RelationshipSchema { + database: "test_db".to_string(), + table_name: "knows".to_string(), + column_names: vec!["person1_id".to_string(), "person2_id".to_string()], + from_node: "Person".to_string(), + to_node: "Person".to_string(), + from_node_table: "persons".to_string(), + to_node_table: "persons".to_string(), + from_id: "person1_id".to_string(), + to_id: "person2_id".to_string(), + from_node_id_dtype: "UInt64".to_string(), + to_node_id_dtype: "UInt64".to_string(), + property_mappings: HashMap::new(), + view_parameters: None, + engine: None, + use_final: None, + filter: None, + edge_id: None, + type_column: None, + from_label_column: None, + to_label_column: None, + from_label_values: None, + to_label_values: None, + from_node_properties: None, + to_node_properties: None, + is_fk_edge: false, + constraints: None, + }, + ); + + GraphSchema::build(1, "test_db".to_string(), nodes, rels) +} + +#[test] +fn test_infer_relationship_type_single_schema() { + // When schema has only one relationship, use it regardless of node types + let schema = create_single_relationship_schema(); + let plan_ctx = PlanCtx::new(Arc::new(schema.clone())); + + let result = infer_relationship_type_from_nodes( + &None, // untyped start + &None, // untyped end + &ast::Direction::Outgoing, + &schema, + &plan_ctx, + ) + .expect("Should not error"); + + assert!(result.is_some()); + let types = result.unwrap(); + assert_eq!(types.len(), 1); + assert_eq!(types[0], "KNOWS"); +} + +#[test] +fn test_infer_relationship_type_from_start_node() { + // (a:Airport)-[r]->() should infer FLIGHT (only edge from Airport) + let schema = create_test_schema_with_relationships(); + let plan_ctx = PlanCtx::new(Arc::new(schema.clone())); + + let result = infer_relationship_type_from_nodes( + &Some("Airport".to_string()), + &None, + &ast::Direction::Outgoing, + &schema, + &plan_ctx, + ) + .expect("Should not error"); + + assert!(result.is_some()); + let types = result.unwrap(); + assert_eq!(types.len(), 1); + assert_eq!(types[0], "FLIGHT"); +} + +#[test] +fn test_infer_relationship_type_from_end_node() { + // ()-[r]->(p:Post) should infer LIKES (only edge to Post) + let schema = create_test_schema_with_relationships(); + let plan_ctx = PlanCtx::new(Arc::new(schema.clone())); + + let result = infer_relationship_type_from_nodes( + &None, + &Some("Post".to_string()), + &ast::Direction::Outgoing, + &schema, + &plan_ctx, + ) + .expect("Should not error"); + + assert!(result.is_some()); + let types = result.unwrap(); + assert_eq!(types.len(), 1); + assert_eq!(types[0], "LIKES"); +} + +#[test] +fn test_infer_relationship_type_from_both_nodes() { + // (u:User)-[r]->(p:Post) should infer LIKES + let schema = create_test_schema_with_relationships(); + let plan_ctx = PlanCtx::new(Arc::new(schema.clone())); + + let result = infer_relationship_type_from_nodes( + &Some("User".to_string()), + &Some("Post".to_string()), + &ast::Direction::Outgoing, + &schema, + &plan_ctx, + ) + .expect("Should not error"); + + assert!(result.is_some()); + let types = result.unwrap(); + assert_eq!(types.len(), 1); + assert_eq!(types[0], "LIKES"); +} + +#[test] +fn test_infer_relationship_type_multiple_matches() { + // (u:User)-[r]->() should return LIKES, FOLLOWS, and MANAGES (multiple edges from User) + let schema = create_test_schema_with_relationships(); + let plan_ctx = PlanCtx::new(Arc::new(schema.clone())); + + let result = infer_relationship_type_from_nodes( + &Some("User".to_string()), + &None, + &ast::Direction::Outgoing, + &schema, + &plan_ctx, + ) + .expect("Should not error"); + + assert!(result.is_some()); + let types = result.unwrap(); + assert_eq!(types.len(), 3); // Now 3 relationships: LIKES, FOLLOWS, MANAGES + assert!(types.contains(&"LIKES".to_string())); + assert!(types.contains(&"FOLLOWS".to_string())); + assert!(types.contains(&"MANAGES".to_string())); +} + +#[test] +fn test_infer_relationship_type_incoming_direction() { + // ()<-[r]-(p:Post) should infer LIKES (reversed direction) + let schema = create_test_schema_with_relationships(); + let plan_ctx = PlanCtx::new(Arc::new(schema.clone())); + + let result = infer_relationship_type_from_nodes( + &None, + &Some("Post".to_string()), + &ast::Direction::Incoming, + &schema, + &plan_ctx, + ) + .expect("Should not error"); + + // Incoming means: from=end(Post), to=start(None) + // LIKES has from=User, to=Post + // So we need to check: from_node=Post? No. LIKES doesn't match. + // Actually for incoming: from=end, to=start + // So Post is the end node, meaning we're looking for relationships with to_node=Post + // But incoming flips it: from_matches_end = "Post" == rel.from_node? No for LIKES + // Hmm, let me reconsider - for incoming, the arrow points to start + // So the relationship's to_node should be the pattern's start node + // And the relationship's from_node should be the pattern's end node + // In this case: ()<-[r]-(p:Post) means Post→anonymous + // So we want relationships where from_node=Post - but LIKES has from_node=User + // This should return None/empty + assert!(result.is_none() || result.as_ref().unwrap().is_empty()); +} + +#[test] +fn test_infer_relationship_type_incoming_correct() { + // (u:User)<-[r]-() should infer FOLLOWS (User is the to_node of FOLLOWS) + let schema = create_test_schema_with_relationships(); + let plan_ctx = PlanCtx::new(Arc::new(schema.clone())); + + let result = infer_relationship_type_from_nodes( + &Some("User".to_string()), + &None, + &ast::Direction::Incoming, + &schema, + &plan_ctx, + ) + .expect("Should not error"); + + // Incoming: from=end(None), to=start(User) + // FOLLOWS: from=User, to=User - matches (to=User checks against start) + // LIKES: from=User, to=Post - doesn't match (to=Post != User) + assert!(result.is_some()); + let types = result.unwrap(); + assert_eq!(types.len(), 1); + assert_eq!(types[0], "FOLLOWS"); +} + +#[test] +fn test_infer_relationship_type_no_matches() { + // (a:Airport)-[r]->(u:User) should find no matching relationships + let schema = create_test_schema_with_relationships(); + let plan_ctx = PlanCtx::new(Arc::new(schema.clone())); + + let result = infer_relationship_type_from_nodes( + &Some("Airport".to_string()), + &Some("User".to_string()), + &ast::Direction::Outgoing, + &schema, + &plan_ctx, + ) + .expect("Should not error"); + + // FLIGHT: Airport→Airport - doesn't match (to=Airport != User) + // LIKES: User→Post - doesn't match (from=User != Airport) + // FOLLOWS: User→User - doesn't match + assert!(result.is_none()); +} + +#[test] +fn test_infer_relationship_type_both_untyped_multi_schema() { + // ()-[r]->() with multiple relationships should return None + let schema = create_test_schema_with_relationships(); + let plan_ctx = PlanCtx::new(Arc::new(schema.clone())); + + let result = infer_relationship_type_from_nodes( + &None, + &None, + &ast::Direction::Outgoing, + &schema, + &plan_ctx, + ) + .expect("Should not error"); + + // Both nodes untyped and schema has 3 relationships - cannot infer + assert!(result.is_none()); +} + +#[test] +fn test_infer_relationship_type_too_many_matches_error() { + // Create a schema with many relationship types from User + use crate::graph_catalog::graph_schema::{NodeIdSchema, NodeSchema, RelationshipSchema}; + use std::collections::HashMap; + + let mut nodes = HashMap::new(); + nodes.insert( + "User".to_string(), + NodeSchema { + database: "test_db".to_string(), + table_name: "users".to_string(), + column_names: vec!["id".to_string()], + primary_keys: "id".to_string(), + node_id: NodeIdSchema::single("id".to_string(), "UInt64".to_string()), + property_mappings: HashMap::new(), + view_parameters: None, + engine: None, + use_final: None, + filter: None, + is_denormalized: false, + from_properties: None, + to_properties: None, + denormalized_source_table: None, + label_column: None, + label_value: None, + }, + ); + + let mut rels = HashMap::new(); + // Create 6 relationships from User to User (exceeds MAX_INFERRED_TYPES of 5) + for i in 1..=6 { + rels.insert( + format!("REL_{}", i), + RelationshipSchema { + database: "test_db".to_string(), + table_name: format!("rel_{}", i), + column_names: vec!["from_id".to_string(), "to_id".to_string()], + from_node: "User".to_string(), + to_node: "User".to_string(), + from_node_table: "users".to_string(), + to_node_table: "users".to_string(), + from_id: "from_id".to_string(), + to_id: "to_id".to_string(), + from_node_id_dtype: "UInt64".to_string(), + to_node_id_dtype: "UInt64".to_string(), + property_mappings: HashMap::new(), + view_parameters: None, + engine: None, + use_final: None, + filter: None, + edge_id: None, + type_column: None, + from_label_column: None, + to_label_column: None, + from_label_values: None, + to_label_values: None, + from_node_properties: None, + to_node_properties: None, + is_fk_edge: false, + constraints: None, + }, + ); + } + + let schema = GraphSchema::build(1, "test_db".to_string(), nodes, rels); + let plan_ctx = PlanCtx::new(Arc::new(schema.clone())); + + // (u:User)-[r]->() should fail with TooManyInferredTypes error + let result = infer_relationship_type_from_nodes( + &Some("User".to_string()), + &None, + &ast::Direction::Outgoing, + &schema, + &plan_ctx, + ); + + assert!(result.is_err()); + match result.unwrap_err() { + LogicalPlanError::TooManyInferredTypes { + count, + max, + types: _, + } => { + assert_eq!(count, 6); + assert_eq!(max, 5); // default max_inferred_types + } + other => panic!("Expected TooManyInferredTypes error, got: {:?}", other), + } +} + +// ======================================== +// Tests for infer_node_label_from_schema +// ======================================== + +fn create_single_node_schema() -> GraphSchema { + use crate::graph_catalog::graph_schema::{NodeIdSchema, NodeSchema}; + use std::collections::HashMap; + + let mut nodes = HashMap::new(); + nodes.insert( + "Person".to_string(), + NodeSchema { + database: "test_db".to_string(), + table_name: "persons".to_string(), + column_names: vec!["id".to_string(), "name".to_string()], + primary_keys: "id".to_string(), + node_id: NodeIdSchema::single("id".to_string(), "UInt64".to_string()), + property_mappings: HashMap::new(), + view_parameters: None, + engine: None, + use_final: None, + filter: None, + is_denormalized: false, + from_properties: None, + to_properties: None, + denormalized_source_table: None, + label_column: None, + label_value: None, + }, + ); + + // No relationships needed for node-only inference tests + let rels = HashMap::new(); + + GraphSchema::build(1, "test_db".to_string(), nodes, rels) +} + +fn create_multi_node_schema() -> GraphSchema { + use crate::graph_catalog::graph_schema::{NodeIdSchema, NodeSchema}; + use std::collections::HashMap; + + let mut nodes = HashMap::new(); + for node_type in &["User", "Post", "Comment"] { + nodes.insert( + node_type.to_string(), + NodeSchema { + database: "test_db".to_string(), + table_name: format!("{}s", node_type.to_lowercase()), + column_names: vec!["id".to_string()], + primary_keys: "id".to_string(), + node_id: NodeIdSchema::single("id".to_string(), "UInt64".to_string()), + property_mappings: HashMap::new(), + view_parameters: None, + engine: None, + use_final: None, + filter: None, + is_denormalized: false, + from_properties: None, + to_properties: None, + denormalized_source_table: None, + label_column: None, + label_value: None, + }, + ); + } + + let rels = HashMap::new(); + + GraphSchema::build(1, "test_db".to_string(), nodes, rels) +} + +fn create_empty_node_schema() -> GraphSchema { + use std::collections::HashMap; + + let nodes = HashMap::new(); + let rels = HashMap::new(); + + GraphSchema::build(1, "test_db".to_string(), nodes, rels) +} + +#[test] +fn test_infer_node_label_single_node_schema() { + // When schema has only one node type, infer it + let schema = create_single_node_schema(); + let plan_ctx = PlanCtx::new(Arc::new(schema.clone())); + + let result = infer_node_label_from_schema(&schema, &plan_ctx).expect("should not error"); + + assert_eq!(result, Some("Person".to_string())); +} + +#[test] +fn test_infer_node_label_multi_node_schema() { + // When schema has multiple node types, cannot infer (returns None) + let schema = create_multi_node_schema(); + let plan_ctx = PlanCtx::new(Arc::new(schema.clone())); + + let result = infer_node_label_from_schema(&schema, &plan_ctx).expect("should not error"); + + // Should not auto-infer when multiple types exist + assert_eq!(result, None); +} + +#[test] +fn test_infer_node_label_empty_schema() { + // When schema has no nodes, cannot infer + let schema = create_empty_node_schema(); + let plan_ctx = PlanCtx::new(Arc::new(schema.clone())); + + let result = infer_node_label_from_schema(&schema, &plan_ctx).expect("should not error"); + + assert_eq!(result, None); +} + +#[test] +fn test_infer_node_label_many_nodes_no_error() { + // When schema has many node types, should return None without error + // (unlike relationships, we don't generate UNION for standalone nodes yet) + use crate::graph_catalog::graph_schema::{NodeIdSchema, NodeSchema}; + use std::collections::HashMap; + + let mut nodes = HashMap::new(); + for i in 1..=10 { + nodes.insert( + format!("Type{}", i), + NodeSchema { + database: "test_db".to_string(), + table_name: format!("type_{}", i), + column_names: vec!["id".to_string()], + primary_keys: "id".to_string(), + node_id: NodeIdSchema::single("id".to_string(), "UInt64".to_string()), + property_mappings: HashMap::new(), + view_parameters: None, + engine: None, + use_final: None, + filter: None, + is_denormalized: false, + from_properties: None, + to_properties: None, + denormalized_source_table: None, + label_column: None, + label_value: None, + }, + ); + } + + let schema = GraphSchema::build(1, "test_db".to_string(), nodes, HashMap::new()); + let plan_ctx = PlanCtx::new(Arc::new(schema.clone())); + + let result = infer_node_label_from_schema(&schema, &plan_ctx).expect("should not error"); + + // Should not auto-infer when many types exist (just return None, no error) + assert_eq!(result, None); +} + +#[test] +fn test_infer_node_label_denormalized_single_node() { + // Single denormalized node type should still be inferred + // The inference works at schema level - denormalized handling is done later + use crate::graph_catalog::graph_schema::{NodeIdSchema, NodeSchema}; + use std::collections::HashMap; + + let mut nodes = HashMap::new(); + nodes.insert( + "Airport".to_string(), + NodeSchema { + database: "test_db".to_string(), + table_name: "flights".to_string(), // Edge table + column_names: vec!["Origin".to_string(), "Dest".to_string()], + primary_keys: "Origin".to_string(), + node_id: NodeIdSchema::single("Origin".to_string(), "String".to_string()), + property_mappings: HashMap::new(), + view_parameters: None, + engine: None, + use_final: None, + filter: None, + is_denormalized: true, // Denormalized node! + from_properties: Some({ + let mut m = HashMap::new(); + m.insert("code".to_string(), "Origin".to_string()); + m + }), + to_properties: Some({ + let mut m = HashMap::new(); + m.insert("code".to_string(), "Dest".to_string()); + m + }), + denormalized_source_table: Some("test_db.flights".to_string()), + label_column: None, + label_value: None, + }, + ); + + let schema = GraphSchema::build(1, "test_db".to_string(), nodes, HashMap::new()); + let plan_ctx = PlanCtx::new(Arc::new(schema.clone())); + + // Should still infer the label - denormalized handling happens later + let result = infer_node_label_from_schema(&schema, &plan_ctx).expect("should not error"); + assert_eq!(result, Some("Airport".to_string())); +} + +#[test] +fn test_infer_relationship_type_polymorphic_edge() { + // Polymorphic edge with from_label_values should match typed nodes + use crate::graph_catalog::graph_schema::{NodeIdSchema, NodeSchema, RelationshipSchema}; + use std::collections::HashMap; + + let mut nodes = HashMap::new(); + for node_type in &["User", "Group", "Resource"] { + nodes.insert( + node_type.to_string(), + NodeSchema { + database: "test_db".to_string(), + table_name: format!("{}s", node_type.to_lowercase()), + column_names: vec!["id".to_string()], + primary_keys: "id".to_string(), + node_id: NodeIdSchema::single("id".to_string(), "UInt64".to_string()), + property_mappings: HashMap::new(), + view_parameters: None, + engine: None, + use_final: None, + filter: None, + is_denormalized: false, + from_properties: None, + to_properties: None, + denormalized_source_table: None, + label_column: None, + label_value: None, + }, + ); + } + + let mut rels = HashMap::new(); + // Polymorphic MEMBER_OF: (User|Group) -> Group + rels.insert( + "MEMBER_OF".to_string(), + RelationshipSchema { + database: "test_db".to_string(), + table_name: "memberships".to_string(), + column_names: vec!["member_id".to_string(), "group_id".to_string()], + from_node: "$any".to_string(), // Polymorphic + to_node: "Group".to_string(), + from_node_table: "$any".to_string(), + to_node_table: "groups".to_string(), + from_id: "member_id".to_string(), + to_id: "group_id".to_string(), + from_node_id_dtype: "UInt64".to_string(), + to_node_id_dtype: "UInt64".to_string(), + property_mappings: HashMap::new(), + view_parameters: None, + engine: None, + use_final: None, + filter: None, + edge_id: None, + type_column: None, + from_label_column: Some("member_type".to_string()), + to_label_column: None, + from_label_values: Some(vec!["User".to_string(), "Group".to_string()]), // Polymorphic! + to_label_values: None, + from_node_properties: None, + to_node_properties: None, + is_fk_edge: false, + constraints: None, + }, + ); + + let schema = GraphSchema::build(1, "test_db".to_string(), nodes, rels); + let plan_ctx = PlanCtx::new(Arc::new(schema.clone())); + + // (u:User)-[r]->(g:Group) should infer MEMBER_OF since User is in from_label_values + let result = infer_relationship_type_from_nodes( + &Some("User".to_string()), + &Some("Group".to_string()), + &ast::Direction::Outgoing, + &schema, + &plan_ctx, + ) + .expect("should not error"); + + assert_eq!(result, Some(vec!["MEMBER_OF".to_string()])); +} diff --git a/src/query_planner/logical_plan/match_clause/traversal.rs b/src/query_planner/logical_plan/match_clause/traversal.rs new file mode 100644 index 00000000..a40187b8 --- /dev/null +++ b/src/query_planner/logical_plan/match_clause/traversal.rs @@ -0,0 +1,1174 @@ +use std::sync::Arc; + +use crate::graph_catalog::expression_parser::PropertyValue; +use crate::{ + open_cypher_parser::ast, + query_planner::{ + logical_expr::{ + LogicalExpr, Operator, OperatorApplication, Property, PropertyAccess, TableAlias, + }, + logical_plan::{ + errors::LogicalPlanError, + plan_builder::LogicalPlanResult, + { + CartesianProduct, GraphNode, GraphRel, LogicalPlan, ShortestPathMode, Union, + VariableLengthSpec, + }, + }, + plan_ctx::{PlanCtx, TableCtx}, + }, +}; + +use crate::graph_catalog::graph_schema::GraphSchema; +use crate::query_planner::logical_plan::{generate_id, ViewScan}; +use std::collections::HashMap; + +// Import from sibling modules +use super::helpers::{ + compute_connection_aliases, compute_rel_node_labels, compute_variable_length, + convert_properties, convert_properties_to_operator_application, determine_optional_anchor, + generate_denormalization_aware_scan, generate_scan, is_denormalized_scan, + is_label_denormalized, register_node_in_context, register_path_variable, + register_relationship_in_context, +}; +use super::type_inference::{infer_node_label_from_schema, infer_relationship_type_from_nodes}; +use super::view_scan::{ + generate_relationship_center, try_generate_relationship_view_scan, try_generate_view_scan, +}; + +// Wrapper for backwards compatibility +// Reserved for future use when non-optional traversal needs explicit mode +#[allow(dead_code)] +pub(super) fn traverse_connected_pattern<'a>( + connected_patterns: &Vec>, + plan: Arc, + plan_ctx: &mut PlanCtx, + pathpattern_idx: usize, +) -> LogicalPlanResult> { + traverse_connected_pattern_with_mode( + connected_patterns, + plan, + plan_ctx, + pathpattern_idx, + None, + None, + false, + ) +} + +fn traverse_connected_pattern_with_mode<'a>( + connected_patterns: &Vec>, + mut plan: Arc, + plan_ctx: &mut PlanCtx, + pathpattern_idx: usize, + shortest_path_mode: Option, + path_variable: Option<&str>, + is_optional: bool, +) -> LogicalPlanResult> { + crate::debug_print!("\n╔════════════════════════════════════════"); + crate::debug_print!("║ traverse_connected_pattern_with_mode"); + crate::debug_print!("║ connected_patterns.len() = {}", connected_patterns.len()); + crate::debug_print!("║ Current plan type: {:?}", std::mem::discriminant(&*plan)); + crate::debug_print!("╚════════════════════════════════════════\n"); + + // === PRE-PROCESS: Assign consistent aliases to shared nodes === + // When patterns share nodes via Rc::clone() (e.g., ()-[r1]->()-[r2]->()), + // we need to ensure the shared node gets the same alias in both patterns. + // Use pointer equality to detect shared Rc instances. + // Note: HashMap is already imported at the top of this file. + + // Use usize from Rc::as_ptr() cast as the key for pointer-based identity + let mut node_alias_map: HashMap = HashMap::new(); + + for connected_pattern in connected_patterns.iter() { + // Check start_node - use address as key + let start_ptr = connected_pattern.start_node.as_ptr() as usize; + if !node_alias_map.contains_key(&start_ptr) { + let start_node_ref = connected_pattern.start_node.borrow(); + let alias = if let Some(name) = start_node_ref.name { + name.to_string() + } else { + generate_id() + }; + drop(start_node_ref); + node_alias_map.insert(start_ptr, alias); + } + + // Check end_node - use address as key + let end_ptr = connected_pattern.end_node.as_ptr() as usize; + if !node_alias_map.contains_key(&end_ptr) { + let end_node_ref = connected_pattern.end_node.borrow(); + let alias = if let Some(name) = end_node_ref.name { + name.to_string() + } else { + generate_id() + }; + drop(end_node_ref); + node_alias_map.insert(end_ptr, alias); + } + } + + crate::debug_print!( + "║ Pre-assigned {} node aliases for shared node detection", + node_alias_map.len() + ); + + for (pattern_idx, connected_pattern) in connected_patterns.iter().enumerate() { + crate::debug_print!("┌─ Processing connected_pattern #{}", pattern_idx); + + let start_node_ref = connected_pattern.start_node.borrow(); + let start_node_label_from_ast = start_node_ref.first_label().map(|val| val.to_string()); + // Use pre-assigned alias to ensure shared nodes get the same alias + let start_node_alias = node_alias_map + .get(&(connected_pattern.start_node.as_ptr() as usize)) + .cloned() + .unwrap_or_else(generate_id); + + // CRITICAL FIX: Label resolution order: + // 1. If AST has explicit label (Some(...)), use it + // 2. Else if node exists in plan_ctx with label, use that + // 3. Else None + // This fixes: MATCH (a)-[:R]->(b:B), (b)-[:S]->(c) + // where second pattern needs b's label from first pattern (AST returns None after first use) + let start_node_label = if start_node_label_from_ast.is_some() { + start_node_label_from_ast + } else if let Some(table_ctx) = plan_ctx.get_mut_table_ctx_opt(&start_node_alias) { + if let Some(label) = table_ctx.get_label_opt() { + log::info!( + ">>> Found existing '{}' in plan_ctx with label: {}", + start_node_alias, + label + ); + Some(label) + } else { + None + } + } else { + None + }; + + crate::debug_print!( + "│ Start node: alias='{}', label={:?}", + start_node_alias, + start_node_label + ); + + let start_node_props = start_node_ref + .properties + .clone() + .map(|props| { + props + .into_iter() + .map(|p| Property::try_from(p)) + .collect::, _>>() + }) + .transpose() + .map_err(|e| { + LogicalPlanError::QueryPlanningError(format!( + "Failed to convert start node property: {}", + e + )) + })? + .unwrap_or_else(Vec::new); + + // Extract end node info early - needed for filtering anonymous edge types + let end_node_ref = connected_pattern.end_node.borrow(); + // Use pre-assigned alias to ensure shared nodes get the same alias + let end_node_alias = node_alias_map + .get(&(connected_pattern.end_node.as_ptr() as usize)) + .cloned() + .unwrap_or_else(generate_id); + let end_node_label_from_ast = end_node_ref.first_label().map(|val| val.to_string()); + + // CRITICAL FIX: Same label resolution order as start_node + let end_node_label = if end_node_label_from_ast.is_some() { + end_node_label_from_ast + } else if let Some(table_ctx) = plan_ctx.get_mut_table_ctx_opt(&end_node_alias) { + if let Some(label) = table_ctx.get_label_opt() { + log::info!( + ">>> Found existing '{}' in plan_ctx with label: {}", + end_node_alias, + label + ); + Some(label) + } else { + None + } + } else { + None + }; + + let rel = &connected_pattern.relationship; + let rel_alias = if let Some(alias) = rel.name { + alias.to_string() + } else { + generate_id() + }; + + // Handle anonymous edge patterns: [] (no type specified) + // Expand relationship types using composite key index from schema + // Supports multiple relationships with same type name differentiated by from/to nodes + let rel_labels = match rel.labels.as_ref() { + Some(labels) => { + // Explicit labels provided: [:TYPE1|TYPE2] + // Look up relationship types using composite key index (O(1) lookup) + // Filters by node compatibility when node types are known + let graph_schema = plan_ctx.schema(); + let mut expanded_labels = Vec::new(); + + // Get node labels for semantic expansion + let from_label = start_node_label.as_deref(); + let to_label = end_node_label.as_deref(); + + for label in labels.iter() { + let variants = + graph_schema.expand_generic_relationship_type(label, from_label, to_label); + if variants.is_empty() { + // No expansion found, use original label (will fail later if truly missing) + expanded_labels.push(label.to_string()); + } else { + // Add all expanded variants + expanded_labels.extend(variants); + } + } + + // Deduplicate in case of overlapping expansions + let unique_labels: Vec = { + let mut seen = std::collections::HashSet::new(); + expanded_labels + .into_iter() + .filter(|l| seen.insert(l.clone())) + .collect() + }; + + Some(unique_labels) + } + None => { + // Anonymous edge pattern: [] (no type specified) + // Use smart inference to determine relationship type(s): + // 1. If schema has only one relationship, use it + // 2. If nodes are typed, find relationships that match those types + // 3. Otherwise, expand to all matching relationship types for UNION + let graph_schema = plan_ctx.schema(); + + infer_relationship_type_from_nodes( + &start_node_label, + &end_node_label, + &rel.direction, + graph_schema, + plan_ctx, + )? + } + }; + + // === LABEL INFERENCE === + // NOTE: Label and edge type inference is now handled by the TypeInference analyzer pass + // which runs after parsing. This provides more robust inference that works across + // WITH boundaries and handles both node labels AND edge types. + // The labels in start_node_label/end_node_label come from AST parsing or will be + // inferred by TypeInference pass. + + log::debug!( + "Pattern processing: start='{}' ({}), end='{}' ({})", + start_node_alias, + start_node_label + .as_ref() + .map(|s| s.as_str()) + .unwrap_or("None"), + end_node_alias, + end_node_label + .as_ref() + .map(|s| s.as_str()) + .unwrap_or("None") + ); + + // Polymorphic inference removed - TypeInference pass handles this + // (start_possible_labels and end_possible_labels were used for UNION generation) + + crate::debug_print!( + "│ Relationship: alias='{}', labels={:?}, direction={:?}", + rel_alias, + rel_labels, + rel.direction + ); + crate::debug_print!( + "│ After inference: start_label={:?}, end_label={:?}", + start_node_label, + end_node_label + ); + + log::debug!("Parsed relationship labels: {:?}", rel_labels); + let rel_properties = rel + .properties + .clone() + .map(|props| { + props + .into_iter() + .map(|p| Property::try_from(p)) + .collect::, _>>() + }) + .transpose() + .map_err(|e| { + LogicalPlanError::QueryPlanningError(format!( + "Failed to convert relationship property: {}", + e + )) + })? + .unwrap_or_else(Vec::new); + + crate::debug_print!( + "│ End node: alias='{}', label={:?}", + end_node_alias, + end_node_label + ); + + let end_node_props = end_node_ref + .properties + .clone() + .map(|props| { + props + .into_iter() + .map(|p| Property::try_from(p)) + .collect::, _>>() + }) + .transpose() + .map_err(|e| { + LogicalPlanError::QueryPlanningError(format!( + "Failed to convert end node property: {}", + e + )) + })? + .unwrap_or_else(Vec::new); + + // if start alias already present in ctx map, it means the current nested connected pattern's start node will be connecting at right side plan and end node will be at the left + if let Some(table_ctx) = plan_ctx.get_mut_table_ctx_opt(&start_node_alias) { + if start_node_label.is_some() { + table_ctx.set_labels(start_node_label.clone().map(|l| vec![l])); + } + if !start_node_props.is_empty() { + table_ctx.append_properties(start_node_props); + } + + register_node_in_context( + plan_ctx, + &end_node_alias, + &end_node_label, + end_node_props, + end_node_ref.name.is_some(), + ); + + let (left_conn, right_conn) = + compute_connection_aliases(&rel.direction, &start_node_alias, &end_node_alias); + + // Compute left and right node labels based on direction for relationship lookup + let (left_node_label_for_rel, right_node_label_for_rel) = + compute_rel_node_labels(&rel.direction, &start_node_label, &end_node_label); + + // FIX: For multi-hop patterns, use the existing plan as LEFT to create nested structure + // This ensures (a)-[r1]->(b)-[r2]->(c) becomes GraphRel { left: GraphRel(a-r1-b), center: r2, right: c } + let (left_node, right_node) = match rel.direction { + ast::Direction::Outgoing => { + // (a)-[:r1]->(b)-[:r2]->(c): existing plan (a-r1-b) on left, new node (c) on right + let (scan, is_denorm) = generate_denormalization_aware_scan( + &end_node_alias, + &end_node_label, + plan_ctx, + )?; + + ( + plan.clone(), + Arc::new(LogicalPlan::GraphNode(GraphNode { + input: scan, + alias: end_node_alias.clone(), + label: end_node_label.clone().map(|s| s.to_string()), + is_denormalized: is_denorm, + projected_columns: None, + })), + ) + } + ast::Direction::Incoming => { + // (c)<-[:r2]-(b)<-[:r1]-(a): new node (c) on left, existing plan (b-r1-a) on right + let (scan, is_denorm) = generate_denormalization_aware_scan( + &end_node_alias, + &end_node_label, + plan_ctx, + )?; + + ( + Arc::new(LogicalPlan::GraphNode(GraphNode { + input: scan, + alias: end_node_alias.clone(), + label: end_node_label.clone().map(|s| s.to_string()), + is_denormalized: is_denorm, + projected_columns: None, + })), + plan.clone(), + ) + } + ast::Direction::Either => { + // Either direction: existing plan on left, new node on right + let (scan, is_denorm) = generate_denormalization_aware_scan( + &end_node_alias, + &end_node_label, + plan_ctx, + )?; + + ( + plan.clone(), + Arc::new(LogicalPlan::GraphNode(GraphNode { + input: scan, + alias: end_node_alias.clone(), + label: end_node_label.clone().map(|s| s.to_string()), + is_denormalized: is_denorm, + projected_columns: None, + })), + ) + } + }; + + // Determine anchor_connection for OPTIONAL MATCH + let anchor_connection = + determine_optional_anchor(plan_ctx, is_optional, &left_conn, &right_conn); + + // Handle variable-length patterns and multi-type relationships: + // - Single-type *1: (a)-[:TYPE*1]->(b) → simplify to regular relationship + // - Multi-type *1: (a)-[:TYPE1|TYPE2*1]->(b) → keep VLP for polymorphic nodes + // - Multi-type no VLP: (a)-[:TYPE1|TYPE2]->(b) → ADD implicit *1 for polymorphic handling + let is_multi_type = rel_labels.as_ref().map_or(false, |labels| labels.len() > 1); + + let variable_length = if let Some(vlp) = rel.variable_length.clone() { + // Has explicit VLP spec + let spec: VariableLengthSpec = vlp.into(); + let is_exact_one_hop = spec.min_hops == Some(1) && spec.max_hops == Some(1); + + if is_exact_one_hop && !is_multi_type { + log::info!("Simplifying *1 single-type pattern to regular relationship"); + None // Remove *1 for single-type - treat as regular relationship + } else { + Some(spec) // Keep VLP for multi-type or ranges + } + } else if is_multi_type { + // Multi-type without VLP: add implicit *1 for proper polymorphic handling + log::info!("Adding implicit *1 for multi-type relationship (polymorphic end node)"); + Some(VariableLengthSpec { + min_hops: Some(1), + max_hops: Some(1), + }) + } else { + None // Single-type, no VLP + }; + + let graph_rel_node = GraphRel { + left: left_node, + center: generate_relationship_center( + &rel_alias, + &rel_labels, + &left_conn, + &right_conn, + &left_node_label_for_rel, + &right_node_label_for_rel, + plan_ctx, + )?, + right: right_node, + alias: rel_alias.clone(), + direction: rel.direction.clone().into(), + left_connection: left_conn, + right_connection: right_conn, + is_rel_anchor: false, + variable_length, + shortest_path_mode: shortest_path_mode.clone(), + path_variable: path_variable.map(|s| s.to_string()), + where_predicate: None, // Will be populated by filter pushdown optimization + labels: rel_labels.clone(), + is_optional: if is_optional { Some(true) } else { None }, + anchor_connection, + cte_references: std::collections::HashMap::new(), + }; + + // Register relationship and path variable in context + register_relationship_in_context( + plan_ctx, + &rel_alias, + rel_labels, + rel_properties, + rel.name.is_some(), + &left_node_label_for_rel, + &right_node_label_for_rel, + &graph_rel_node, + path_variable, + shortest_path_mode.as_ref(), + ); + + plan = Arc::new(LogicalPlan::GraphRel(graph_rel_node)); + + crate::debug_print!("│ ✓ Created GraphRel (start node already in context)"); + crate::debug_print!("│ Plan is now: GraphRel"); + crate::debug_print!("└─ Pattern #{} complete\n", pattern_idx); + } + // if end alias already present in ctx map, it means the current nested connected pattern's end node will be connecting at right side plan and start node will be at the left + else if let Some(table_ctx) = plan_ctx.get_mut_table_ctx_opt(&end_node_alias) { + log::info!( + ">>> Found existing TableCtx for '{}', updating with label: {:?}", + end_node_alias, + end_node_label + ); + if let Some(ref label) = end_node_label { + table_ctx.set_labels(end_node_label.clone().map(|l| vec![l])); + log::info!(">>> Updated '{}' with label: {}", end_node_alias, label); + } else { + log::warn!( + ">>> end_node_label is None for '{}', cannot update TableCtx!", + end_node_alias + ); + } + if !end_node_props.is_empty() { + table_ctx.append_properties(end_node_props); + } + + let (start_scan, start_is_denorm) = generate_denormalization_aware_scan( + &start_node_alias, + &start_node_label, + plan_ctx, + )?; + + let start_graph_node = GraphNode { + input: start_scan, + alias: start_node_alias.clone(), + label: start_node_label.clone().map(|s| s.to_string()), + is_denormalized: start_is_denorm, + projected_columns: None, + }; + register_node_in_context( + plan_ctx, + &start_node_alias, + &start_node_label, + start_node_props, + start_node_ref.name.is_some(), + ); + + // Compute left and right node labels based on direction for relationship lookup + let (left_node_label_for_rel, right_node_label_for_rel) = + compute_rel_node_labels(&rel.direction, &start_node_label, &end_node_label); + + let graph_rel_node = GraphRel { + left: Arc::new(LogicalPlan::GraphNode(start_graph_node)), + center: generate_relationship_center( + &rel_alias, + &rel_labels, + &start_node_alias, + &end_node_alias, + &start_node_label, + &end_node_label, + plan_ctx, + )?, + right: plan.clone(), + alias: rel_alias.clone(), + direction: rel.direction.clone().into(), + left_connection: start_node_alias.clone(), + right_connection: end_node_alias.clone(), + is_rel_anchor: false, + variable_length: compute_variable_length(rel, &rel_labels), + shortest_path_mode: shortest_path_mode.clone(), + path_variable: path_variable.map(|s| s.to_string()), + where_predicate: None, // Will be populated by filter pushdown optimization + labels: rel_labels.clone(), + is_optional: if plan_ctx.is_optional_match_mode() { + log::warn!( + "CREATING GraphRel with is_optional=Some(true), mode={}", + plan_ctx.is_optional_match_mode() + ); + Some(true) + } else { + log::warn!( + "CREATING GraphRel with is_optional=None, mode={}", + plan_ctx.is_optional_match_mode() + ); + None + }, + // For anchor traversals, the right connection (end_node) is the anchor from base MATCH + // The left connection (start_node) is newly introduced + anchor_connection: if plan_ctx.is_optional_match_mode() { + Some(end_node_alias.clone()) + } else { + None + }, + cte_references: std::collections::HashMap::new(), + }; + + // Register relationship and path variable in context + register_relationship_in_context( + plan_ctx, + &rel_alias, + rel_labels, + rel_properties, + rel.name.is_some(), + &left_node_label_for_rel, + &right_node_label_for_rel, + &graph_rel_node, + path_variable, + shortest_path_mode.as_ref(), + ); + + plan = Arc::new(LogicalPlan::GraphRel(graph_rel_node)); + + crate::debug_print!("│ ✓ Created GraphRel (end node already in context)"); + crate::debug_print!("│ Plan is now: GraphRel"); + crate::debug_print!("└─ Pattern #{} complete\n", pattern_idx); + } + // not connected with existing nodes + else { + // if two comma separated patterns found and they are not connected to each other i.e. there is no common node alias between them + // Allow this - it will create a CartesianProduct. + // If WHERE clause has predicates connecting them (e.g., srcip1.ip = srcip2.ip), those will be processed later + // and can be converted to proper JOINs by optimizer passes. + if pathpattern_idx > 0 { + log::info!( + "Disconnected comma pattern detected at index {}. Creating CartesianProduct. WHERE clause may contain connecting predicates.", + pathpattern_idx + ); + } + + crate::debug_print!("=== CHECKING EXISTING PLAN ==="); + crate::debug_print!( + "=== plan discriminant: {:?} ===", + std::mem::discriminant(&*plan) + ); + + // Check if we have a non-empty input plan (e.g., from WITH clause or previous MATCH) + // If so, we need to create a CartesianProduct to join the previous plan with this new pattern + let has_existing_plan = !matches!(plan.as_ref(), LogicalPlan::Empty); + + crate::debug_print!("=== has_existing_plan: {} ===", has_existing_plan); + + if has_existing_plan { + crate::debug_print!( + "=== DISCONNECTED PATTERN WITH EXISTING PLAN: Creating CartesianProduct ===" + ); + crate::debug_print!( + "=== Existing plan type: {:?} ===", + std::mem::discriminant(&*plan) + ); + } + + // we will keep start graph node at the right side and end at the left side + crate::debug_print!("=== DISCONNECTED PATTERN: About to create start_graph_node ==="); + + let (start_scan, start_is_denorm) = + if is_label_denormalized(&start_node_label, plan_ctx) { + crate::debug_print!( + "=== Start node '{}' is DENORMALIZED, creating Empty scan ===", + start_node_alias + ); + (Arc::new(LogicalPlan::Empty), true) + } else { + let scan = generate_scan( + start_node_alias.clone(), + start_node_label.clone(), + plan_ctx, + )?; + crate::debug_print!( + "=== DISCONNECTED: start_scan created, calling is_denormalized_scan ===" + ); + let is_d = is_denormalized_scan(&scan); + crate::debug_print!("=== DISCONNECTED: start_is_denorm = {} ===", is_d); + (scan, is_d) + }; + + let start_graph_node = GraphNode { + input: start_scan, + alias: start_node_alias.clone(), + label: start_node_label.clone().map(|s| s.to_string()), + is_denormalized: start_is_denorm, + projected_columns: None, + }; + crate::debug_print!( + "=== DISCONNECTED: start_graph_node created with is_denormalized={} ===", + start_graph_node.is_denormalized + ); + register_node_in_context( + plan_ctx, + &start_node_alias, + &start_node_label, + start_node_props, + start_node_ref.name.is_some(), + ); + + let (end_scan, end_is_denorm) = + generate_denormalization_aware_scan(&end_node_alias, &end_node_label, plan_ctx)?; + + let end_graph_node = GraphNode { + input: end_scan, + alias: end_node_alias.clone(), + label: end_node_label.clone().map(|s| s.to_string()), + is_denormalized: end_is_denorm, + projected_columns: None, + }; + register_node_in_context( + plan_ctx, + &end_node_alias, + &end_node_label, + end_node_props, + end_node_ref.name.is_some(), + ); + + let (left_conn, right_conn) = + compute_connection_aliases(&rel.direction, &start_node_alias, &end_node_alias); + + // Compute left and right node labels based on direction for relationship lookup + let (left_node_label_for_rel, right_node_label_for_rel) = + compute_rel_node_labels(&rel.direction, &start_node_label, &end_node_label); + + let (left_node, right_node) = match rel.direction { + ast::Direction::Outgoing => ( + Arc::new(LogicalPlan::GraphNode(start_graph_node)), + Arc::new(LogicalPlan::GraphNode(end_graph_node)), + ), + ast::Direction::Incoming => ( + Arc::new(LogicalPlan::GraphNode(end_graph_node)), + Arc::new(LogicalPlan::GraphNode(start_graph_node)), + ), + ast::Direction::Either => ( + Arc::new(LogicalPlan::GraphNode(start_graph_node)), + Arc::new(LogicalPlan::GraphNode(end_graph_node)), + ), + }; + + // Determine anchor_connection for OPTIONAL MATCH + let anchor_connection = + determine_optional_anchor(plan_ctx, is_optional, &left_conn, &right_conn); + + let graph_rel_node = GraphRel { + left: left_node, + center: generate_relationship_center( + &rel_alias, + &rel_labels, + &left_conn, + &right_conn, + &left_node_label_for_rel, + &right_node_label_for_rel, + plan_ctx, + )?, + right: right_node, + alias: rel_alias.clone(), + direction: rel.direction.clone().into(), + left_connection: left_conn.clone(), // Left node is the start node (left_conn for Outgoing) + right_connection: right_conn.clone(), // Right node is the end node (right_conn for Outgoing) + is_rel_anchor: false, + variable_length: compute_variable_length(rel, &rel_labels), + shortest_path_mode: shortest_path_mode.clone(), + path_variable: path_variable.map(|s| s.to_string()), + where_predicate: { + // 🔧 FIX: For VLP patterns (including shortestPath), extract filters/properties from bound nodes + // When nodes like (p1:Airport {code: 'LAX'}) are used with VLP patterns, their filters + // are in plan_ctx but not automatically merged into GraphRel.where_predicate + // This is needed for VLP CTE generation to apply correct filters with property mapping + if shortest_path_mode.is_some() || rel.variable_length.is_some() { + use crate::query_planner::logical_expr::{Operator, OperatorApplication}; + let mut node_filters = vec![]; + + // Extract filters/properties for left node + if let Some(table_ctx) = plan_ctx.get_mut_table_ctx_opt(&left_conn) { + // Get both existing filters AND unconverted properties + node_filters.extend(table_ctx.get_filters().iter().cloned()); + + // Convert any remaining properties to filters + let props = table_ctx.get_and_clear_properties(); + if !props.is_empty() { + match convert_properties(props, &left_conn) { + Ok(mut prop_filters) => { + log::info!( + "🔧 VLP: Converted {} properties to filters for left node '{}'", + prop_filters.len(), + left_conn + ); + node_filters.append(&mut prop_filters); + } + Err(e) => { + log::warn!( + "Failed to convert properties for left node '{}': {:?}", + left_conn, + e + ); + } + } + } + } + + // Extract filters/properties for right node + if let Some(table_ctx) = plan_ctx.get_mut_table_ctx_opt(&right_conn) { + // Get both existing filters AND unconverted properties + node_filters.extend(table_ctx.get_filters().iter().cloned()); + + // Convert any remaining properties to filters + let props = table_ctx.get_and_clear_properties(); + if !props.is_empty() { + match convert_properties(props, &right_conn) { + Ok(mut prop_filters) => { + log::info!( + "🔧 VLP: Converted {} properties to filters for right node '{}'", + prop_filters.len(), + right_conn + ); + node_filters.append(&mut prop_filters); + } + Err(e) => { + log::warn!( + "Failed to convert properties for right node '{}': {:?}", + right_conn, + e + ); + } + } + } + } + + // Combine all filters with AND + node_filters + .into_iter() + .reduce(|acc, filter| { + LogicalExpr::OperatorApplicationExp(OperatorApplication { + operator: Operator::And, + operands: vec![acc, filter], + }) + }) + .map(|combined| { + log::info!( + "🔧 VLP: Merged {} bound node filters into where_predicate for rel '{}'", + "multiple", + rel_alias + ); + combined + }) + } else { + None // Will be populated by filter pushdown optimization for regular patterns + } + }, + labels: rel_labels.clone(), + is_optional: if is_optional { Some(true) } else { None }, + anchor_connection, + cte_references: std::collections::HashMap::new(), + }; + + // Register relationship and path variable in context + register_relationship_in_context( + plan_ctx, + &rel_alias, + rel_labels, + rel_properties, + rel.name.is_some(), + &left_node_label_for_rel, + &right_node_label_for_rel, + &graph_rel_node, + path_variable, + shortest_path_mode.as_ref(), + ); + + // Create the GraphRel for this pattern + let new_pattern = Arc::new(LogicalPlan::GraphRel(graph_rel_node)); + + // If we have an existing plan (e.g., from WITH clause), combine with CartesianProduct + if has_existing_plan { + // CRITICAL FIX: When existing plan is OPTIONAL and new pattern is REQUIRED, + // swap them so the required pattern becomes the anchor (FROM clause). + // This ensures correct SQL generation: + // OPTIONAL MATCH ... MATCH x → FROM x LEFT JOIN optional_pattern + // Instead of wrong: + // FROM optional_pattern CROSS JOIN x + let existing_is_optional = plan.is_optional_pattern(); + let (left, right, cp_is_optional) = if existing_is_optional && !is_optional { + // Swap: required pattern becomes left (anchor), optional becomes right + log::info!( + "🔄 CartesianProduct: Swapping left/right - existing plan is optional, new pattern is required" + ); + (new_pattern.clone(), plan.clone(), true) // is_optional=true means RIGHT is optional + } else { + // Normal case: existing plan is anchor + (plan.clone(), new_pattern.clone(), is_optional) + }; + + plan = Arc::new(LogicalPlan::CartesianProduct(CartesianProduct { + left, + right, + is_optional: cp_is_optional, + join_condition: None, // Will be populated by optimizer if WHERE bridges both sides + })); + crate::debug_print!( + "│ ✓ Created CartesianProduct (combining existing plan with new pattern)" + ); + crate::debug_print!( + "│ Plan is now: CartesianProduct(optional: {})", + cp_is_optional + ); + } else { + plan = new_pattern; + crate::debug_print!("│ ✓ Created GraphRel (first pattern - disconnected)"); + crate::debug_print!("│ Plan is now: GraphRel"); + } + crate::debug_print!("└─ Pattern #{} complete\n", pattern_idx); + } + } + + crate::debug_print!("╔════════════════════════════════════════"); + crate::debug_print!("║ traverse_connected_pattern_with_mode COMPLETE"); + crate::debug_print!("║ Final plan type: {:?}", std::mem::discriminant(&*plan)); + crate::debug_print!("╚════════════════════════════════════════\n"); + + Ok(plan) +} + +pub(super) fn traverse_node_pattern( + node_pattern: &ast::NodePattern, + plan: Arc, + plan_ctx: &mut PlanCtx, +) -> LogicalPlanResult> { + // For now we are not supporting empty node. standalone node with name is supported. + let node_alias = node_pattern + .name + .ok_or(LogicalPlanError::EmptyNode)? + .to_string(); + let mut node_label: Option = node_pattern.first_label().map(|val| val.to_string()); + + // === SINGLE-NODE-SCHEMA INFERENCE === + // If no label provided and schema has only one node type, use it + if node_label.is_none() { + if let Ok(Some(inferred_label)) = infer_node_label_from_schema(plan_ctx.schema(), plan_ctx) + { + log::info!( + "Node '{}' label inferred as '{}' (single node type in schema)", + node_alias, + inferred_label + ); + node_label = Some(inferred_label); + } + } + + let node_props: Vec = node_pattern + .properties + .clone() + .map(|props| { + props + .into_iter() + .map(|p| Property::try_from(p)) + .collect::, _>>() + }) + .transpose()? + .unwrap_or_default(); + + // if alias already present in ctx map then just add its conditions and do not add it in the logical plan + if let Some(table_ctx) = plan_ctx.get_mut_table_ctx_opt(&node_alias) { + if node_label.is_some() { + table_ctx.set_labels(node_label.map(|l| vec![l])); + } + if !node_props.is_empty() { + table_ctx.append_properties(node_props); + } + Ok(plan) + } else { + // Register the node in the context + register_node_in_context( + plan_ctx, + &node_alias, + &node_label, + node_props, + node_pattern.name.is_some(), + ); + + let scan = generate_scan(node_alias.clone(), node_label.clone(), plan_ctx)?; + + // Check if this is a Union (denormalized node with BOTH positions) + // In that case, wrap EACH branch in its own GraphNode, then return the Union + if let LogicalPlan::Union(union) = scan.as_ref() { + log::info!( + "✓ Wrapping Union branches in GraphNodes for alias '{}'", + node_alias + ); + let wrapped_inputs: Vec> = union + .inputs + .iter() + .map(|branch| { + let is_denorm = is_denormalized_scan(branch); + Arc::new(LogicalPlan::GraphNode(GraphNode { + input: branch.clone(), + alias: node_alias.clone(), + label: node_label.clone().map(|s| s.to_string()), + is_denormalized: is_denorm, + projected_columns: None, + })) + }) + .collect(); + + let wrapped_union = Union { + inputs: wrapped_inputs, + union_type: union.union_type.clone(), + }; + log::info!( + "✓✓✓ WRAPPING UNION: {} branches being wrapped in GraphNodes ✓✓✓", + wrapped_union.inputs.len() + ); + return Ok(Arc::new(LogicalPlan::Union(wrapped_union))); + } + + // Normal case: single ViewScan wrapped in GraphNode + let is_denorm = is_denormalized_scan(&scan); + let new_node_alias = node_alias.clone(); // Clone for logging + let graph_node = GraphNode { + input: scan, + alias: node_alias, + label: node_label.map(|s| s.to_string()), + is_denormalized: is_denorm, + projected_columns: None, + }; + let new_node_plan = Arc::new(LogicalPlan::GraphNode(graph_node)); + + // Check if we need to create a CartesianProduct + // For comma patterns like (a:User), (b:User), we need CROSS JOIN + let has_existing_plan = match plan.as_ref() { + LogicalPlan::Empty => false, + _ => true, + }; + + if has_existing_plan { + // CRITICAL FIX: When existing plan is OPTIONAL and new node is from REQUIRED MATCH, + // swap them so the required node becomes the anchor (FROM clause). + let existing_is_optional = plan.is_optional_pattern(); + let (left, right, cp_is_optional) = if existing_is_optional { + // Swap: required node becomes left (anchor), optional becomes right + log::info!( + "🔄 CartesianProduct (node): Swapping - existing plan is optional, node '{}' is required", + new_node_alias + ); + (new_node_plan.clone(), plan.clone(), true) // is_optional=true means RIGHT is optional + } else { + // Normal case: existing plan is anchor + (plan.clone(), new_node_plan.clone(), false) + }; + + log::info!( + "Creating CartesianProduct for comma pattern: existing plan + node '{}'", + new_node_alias + ); + Ok(Arc::new(LogicalPlan::CartesianProduct(CartesianProduct { + left, + right, + is_optional: cp_is_optional, + join_condition: None, + }))) + } else { + Ok(new_node_plan) + } + } +} + +pub fn evaluate_match_clause<'a>( + match_clause: &ast::MatchClause<'a>, + plan: Arc, + plan_ctx: &mut PlanCtx, +) -> LogicalPlanResult> { + evaluate_match_clause_with_optional(match_clause, plan, plan_ctx, false) +} + +/// Internal function that supports optional mode +pub fn evaluate_match_clause_with_optional<'a>( + match_clause: &ast::MatchClause<'a>, + mut plan: Arc, + plan_ctx: &mut PlanCtx, + is_optional: bool, +) -> LogicalPlanResult> { + for (idx, (path_variable, path_pattern)) in match_clause.path_patterns.iter().enumerate() { + match path_pattern { + ast::PathPattern::Node(node_pattern) => { + plan = traverse_node_pattern(node_pattern, plan, plan_ctx)?; + } + ast::PathPattern::ConnectedPattern(connected_patterns) => { + plan = traverse_connected_pattern_with_mode( + connected_patterns, + plan, + plan_ctx, + idx, + None, + *path_variable, + is_optional, + )?; + } + ast::PathPattern::ShortestPath(inner_pattern) => { + // Process inner pattern with shortest path mode enabled + plan = evaluate_single_path_pattern_with_mode( + inner_pattern.as_ref(), + plan, + plan_ctx, + idx, + Some(ShortestPathMode::Shortest), + *path_variable, + )?; + } + ast::PathPattern::AllShortestPaths(inner_pattern) => { + // Process inner pattern with all shortest paths mode enabled + plan = evaluate_single_path_pattern_with_mode( + inner_pattern.as_ref(), + plan, + plan_ctx, + idx, + Some(ShortestPathMode::AllShortest), + *path_variable, + )?; + } + } + } + + convert_properties_to_operator_application(plan_ctx)?; + + // Apply WHERE clause if present (OpenCypher grammar allows WHERE per MATCH) + if let Some(where_clause) = &match_clause.where_clause { + use crate::query_planner::logical_plan::where_clause::evaluate_where_clause; + plan = evaluate_where_clause(where_clause, plan)?; + } + + Ok(plan) +} + +// Helper function to evaluate a single path pattern with shortest path mode +fn evaluate_single_path_pattern_with_mode<'a>( + path_pattern: &ast::PathPattern<'a>, + plan: Arc, + plan_ctx: &mut PlanCtx, + idx: usize, + shortest_path_mode: Option, + path_variable: Option<&str>, +) -> LogicalPlanResult> { + match path_pattern { + ast::PathPattern::Node(node_pattern) => traverse_node_pattern(node_pattern, plan, plan_ctx), + ast::PathPattern::ConnectedPattern(connected_patterns) => { + traverse_connected_pattern_with_mode( + connected_patterns, + plan, + plan_ctx, + idx, + shortest_path_mode, + path_variable, + false, + ) + } + ast::PathPattern::ShortestPath(inner) => { + // Recursively unwrap with shortest path mode + evaluate_single_path_pattern_with_mode( + inner.as_ref(), + plan, + plan_ctx, + idx, + Some(ShortestPathMode::Shortest), + path_variable, + ) + } + ast::PathPattern::AllShortestPaths(inner) => { + // Recursively unwrap with all shortest paths mode + evaluate_single_path_pattern_with_mode( + inner.as_ref(), + plan, + plan_ctx, + idx, + Some(ShortestPathMode::AllShortest), + path_variable, + ) + } + } +} diff --git a/src/query_planner/logical_plan/match_clause/type_inference.rs b/src/query_planner/logical_plan/match_clause/type_inference.rs new file mode 100644 index 00000000..60828703 --- /dev/null +++ b/src/query_planner/logical_plan/match_clause/type_inference.rs @@ -0,0 +1,572 @@ +//! Type inference for MATCH clause processing. +//! +//! This module handles automatic inference of node labels and relationship types +//! when they are not explicitly specified in Cypher queries. +//! +//! # Inference Strategies +//! +//! 1. **Single-schema inference**: If the schema has only one type, use it +//! 2. **Node-type inference**: Infer relationship types from typed node endpoints +//! 3. **Relationship-type inference**: Infer node labels from relationship type +//! +//! # Examples +//! +//! ```cypher +//! // Single relationship schema - relationship type inferred +//! MATCH ()-[r]->() RETURN r +//! +//! // Typed nodes - relationship type inferred from User→Post combinations +//! MATCH (u:User)-[r]->(p:Post) RETURN r +//! ``` + +use crate::graph_catalog::graph_schema::GraphSchema; +use crate::open_cypher_parser::ast; +use crate::query_planner::logical_plan::errors::LogicalPlanError; +use crate::query_planner::logical_plan::plan_builder::LogicalPlanResult; +use crate::query_planner::plan_ctx::PlanCtx; + +/// Infer node label for standalone nodes when label is not specified. +/// +/// Handles single-schema inference: If schema has only one node type, use it. +/// - Query: `MATCH (n) RETURN n` +/// - Schema: Only one node type defined (e.g., User) +/// - Result: n inferred as :User +/// +/// # Returns +/// - `Ok(Some(label))` - Successfully inferred label +/// - `Ok(None)` - Cannot infer (multiple node types or no nodes in schema) +pub fn infer_node_label_from_schema( + schema: &GraphSchema, + plan_ctx: &PlanCtx, +) -> LogicalPlanResult> { + let node_schemas = schema.all_node_schemas(); + + // Case 1: Single node type in schema - use it + if node_schemas.len() == 1 { + let node_type = node_schemas + .keys() + .next() + .ok_or_else(|| { + LogicalPlanError::QueryPlanningError( + "Schema has exactly 1 node type but keys().next() returned None".to_string(), + ) + })? + .clone(); + log::info!( + "Node inference: Schema has only one node type '{}', using it", + node_type + ); + return Ok(Some(node_type)); + } + + // Case 2: No nodes in schema + if node_schemas.is_empty() { + log::debug!("Node inference: Schema has no node types defined, cannot infer"); + return Ok(None); + } + + // Case 3: Multiple node types - check if within limit for UNION generation + let node_count = node_schemas.len(); + if node_count <= plan_ctx.max_inferred_types { + // Could potentially generate UNION of all types, but for now just log info + log::info!( + "Node inference: Schema has {} node types ({:?}), would need UNION for all", + node_count, + node_schemas.keys().collect::>() + ); + // For now, don't auto-generate UNION - require explicit label + return Ok(None); + } + + // Case 4: Too many node types + let types_preview: Vec<_> = node_schemas.keys().take(5).cloned().collect(); + let types_str = if node_count > 5 { + format!("{}, ...", types_preview.join(", ")) + } else { + node_schemas.keys().cloned().collect::>().join(", ") + }; + + log::info!( + "Node inference: Schema has {} node types [{}], too many for auto-inference", + node_count, + types_str + ); + + // Don't error - just return None to indicate no inference possible + // User should specify an explicit label + Ok(None) +} + +/// Infer relationship type from typed node labels when edge is untyped. +/// +/// Handles two cases: +/// 1. **Single-schema inference**: If schema has only one relationship, use it +/// - Query: `()-[r]->()` → infer r:ONLY_REL if only one relationship in schema +/// +/// 2. **Node-type inference**: If nodes are typed, find relationships that match +/// - Query: `(a:Airport)-[r]->()` → infer r:FLIGHT if FLIGHT is the only edge with from_node=Airport +/// - Query: `()-[r]->(a:Airport)` → infer r:FLIGHT if FLIGHT is the only edge with to_node=Airport +/// - Query: `(a:User)-[r]->(b:Post)` → infer r:LIKES if LIKES is the only User→Post edge +/// +/// # Returns +/// - `Ok(Some(types))` - Successfully inferred relationship types +/// - `Ok(None)` - Cannot infer (both nodes untyped with multi-schema, or no matches) +/// - `Err(TooManyInferredTypes)` - Too many matches, user must specify explicit type +pub fn infer_relationship_type_from_nodes( + start_label: &Option, + end_label: &Option, + direction: &ast::Direction, + schema: &GraphSchema, + plan_ctx: &PlanCtx, +) -> LogicalPlanResult>> { + let rel_schemas = schema.get_relationships_schemas(); + + // Case 1: Single relationship in schema - use it regardless of node types + if rel_schemas.len() == 1 { + let rel_type = rel_schemas + .keys() + .next() + .ok_or_else(|| { + LogicalPlanError::QueryPlanningError( + "Schema has exactly 1 relationship type but keys().next() returned None" + .to_string(), + ) + })? + .clone(); + log::info!( + "Relationship inference: Schema has only one relationship type '{}', using it", + rel_type + ); + return Ok(Some(vec![rel_type])); + } + + // Case 2: At least one node is typed - filter relationships by node type compatibility + if start_label.is_none() && end_label.is_none() { + log::debug!( + "Relationship inference: Both nodes untyped and schema has {} relationships, cannot infer", + rel_schemas.len() + ); + return Ok(None); + } + + // Find relationships that match the typed node(s) + let matching_types: Vec = rel_schemas + .iter() + .filter(|(_, rel_schema)| { + // Check compatibility based on direction + match direction { + ast::Direction::Outgoing => { + // start→end: from_node=start, to_node=end + let from_ok = start_label + .as_ref() + .map(|l| { + // Check both from_node and from_label_values for polymorphic support + if l == &rel_schema.from_node { + return true; + } + if let Some(values) = &rel_schema.from_label_values { + return values.contains(l); + } + false + }) + .unwrap_or(true); + let to_ok = end_label + .as_ref() + .map(|l| { + if l == &rel_schema.to_node { + return true; + } + if let Some(values) = &rel_schema.to_label_values { + return values.contains(l); + } + false + }) + .unwrap_or(true); + from_ok && to_ok + } + ast::Direction::Incoming => { + // start←end: from_node=end, to_node=start + let from_ok = end_label + .as_ref() + .map(|l| { + if l == &rel_schema.from_node { + return true; + } + if let Some(values) = &rel_schema.from_label_values { + return values.contains(l); + } + false + }) + .unwrap_or(true); + let to_ok = start_label + .as_ref() + .map(|l| { + if l == &rel_schema.to_node { + return true; + } + if let Some(values) = &rel_schema.to_label_values { + return values.contains(l); + } + false + }) + .unwrap_or(true); + from_ok && to_ok + } + ast::Direction::Either => { + // Could match in either direction + let outgoing_ok = { + let from_ok = start_label + .as_ref() + .map(|l| { + l == &rel_schema.from_node + || rel_schema + .from_label_values + .as_ref() + .map(|v| v.contains(l)) + .unwrap_or(false) + }) + .unwrap_or(true); + let to_ok = end_label + .as_ref() + .map(|l| { + l == &rel_schema.to_node + || rel_schema + .to_label_values + .as_ref() + .map(|v| v.contains(l)) + .unwrap_or(false) + }) + .unwrap_or(true); + from_ok && to_ok + }; + let incoming_ok = { + let from_ok = end_label + .as_ref() + .map(|l| { + l == &rel_schema.from_node + || rel_schema + .from_label_values + .as_ref() + .map(|v| v.contains(l)) + .unwrap_or(false) + }) + .unwrap_or(true); + let to_ok = start_label + .as_ref() + .map(|l| { + l == &rel_schema.to_node + || rel_schema + .to_label_values + .as_ref() + .map(|v| v.contains(l)) + .unwrap_or(false) + }) + .unwrap_or(true); + from_ok && to_ok + }; + outgoing_ok || incoming_ok + } + } + }) + .map(|(type_name, _)| type_name.clone()) + .collect(); + + if matching_types.is_empty() { + log::warn!( + "Relationship inference: No relationships match {:?}->{:?}", + start_label, + end_label + ); + return Ok(None); + } + + // Check if too many types would result in excessive UNION branches + if matching_types.len() > plan_ctx.max_inferred_types { + let types_preview: Vec<_> = matching_types.iter().take(5).cloned().collect(); + let types_str = if matching_types.len() > 5 { + format!("{}, ...", types_preview.join(", ")) + } else { + matching_types.join(", ") + }; + + log::error!( + "Relationship inference: Too many matching types ({}) for {:?}->{:?}: [{}]. Max allowed is {}.", + matching_types.len(), start_label, end_label, types_str, plan_ctx.max_inferred_types + ); + + return Err(LogicalPlanError::TooManyInferredTypes { + count: matching_types.len(), + max: plan_ctx.max_inferred_types, + types: types_str, + }); + } + + if matching_types.len() == 1 { + log::info!( + "Relationship inference: Inferred relationship type '{}' from node types {:?}->{:?}", + matching_types[0], + start_label, + end_label + ); + } else { + log::info!( + "Relationship inference: Multiple matching types {:?} for {:?}->{:?}, will expand to UNION", + matching_types, start_label, end_label + ); + } + + Ok(Some(matching_types)) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::graph_catalog::graph_schema::{ + GraphSchema, NodeIdSchema, NodeSchema, RelationshipSchema, + }; + use std::collections::HashMap; + use std::sync::Arc; + + fn create_test_schema_with_relationships() -> GraphSchema { + let mut nodes = HashMap::new(); + nodes.insert( + "User".to_string(), + NodeSchema { + database: "test".to_string(), + table_name: "users".to_string(), + column_names: vec!["user_id".to_string()], + primary_keys: "user_id".to_string(), + node_id: NodeIdSchema::single("user_id".to_string(), "UInt64".to_string()), + property_mappings: HashMap::new(), + view_parameters: None, + engine: None, + use_final: None, + filter: None, + is_denormalized: false, + from_properties: None, + to_properties: None, + denormalized_source_table: None, + label_column: None, + label_value: None, + }, + ); + nodes.insert( + "Post".to_string(), + NodeSchema { + database: "test".to_string(), + table_name: "posts".to_string(), + column_names: vec!["post_id".to_string()], + primary_keys: "post_id".to_string(), + node_id: NodeIdSchema::single("post_id".to_string(), "UInt64".to_string()), + property_mappings: HashMap::new(), + view_parameters: None, + engine: None, + use_final: None, + filter: None, + is_denormalized: false, + from_properties: None, + to_properties: None, + denormalized_source_table: None, + label_column: None, + label_value: None, + }, + ); + + let mut rels = HashMap::new(); + rels.insert( + "FOLLOWS".to_string(), + RelationshipSchema { + database: "test".to_string(), + table_name: "follows".to_string(), + column_names: vec!["follower_id".to_string(), "followed_id".to_string()], + from_node: "User".to_string(), + to_node: "User".to_string(), + from_node_table: "users".to_string(), + to_node_table: "users".to_string(), + from_id: "follower_id".to_string(), + to_id: "followed_id".to_string(), + from_node_id_dtype: "UInt64".to_string(), + to_node_id_dtype: "UInt64".to_string(), + property_mappings: HashMap::new(), + view_parameters: None, + engine: None, + use_final: None, + filter: None, + edge_id: None, + type_column: None, + from_label_column: None, + to_label_column: None, + from_label_values: None, + to_label_values: None, + from_node_properties: None, + to_node_properties: None, + is_fk_edge: false, + constraints: None, + }, + ); + rels.insert( + "LIKES".to_string(), + RelationshipSchema { + database: "test".to_string(), + table_name: "likes".to_string(), + column_names: vec!["user_id".to_string(), "post_id".to_string()], + from_node: "User".to_string(), + to_node: "Post".to_string(), + from_node_table: "users".to_string(), + to_node_table: "posts".to_string(), + from_id: "user_id".to_string(), + to_id: "post_id".to_string(), + from_node_id_dtype: "UInt64".to_string(), + to_node_id_dtype: "UInt64".to_string(), + property_mappings: HashMap::new(), + view_parameters: None, + engine: None, + use_final: None, + filter: None, + edge_id: None, + type_column: None, + from_label_column: None, + to_label_column: None, + from_label_values: None, + to_label_values: None, + from_node_properties: None, + to_node_properties: None, + is_fk_edge: false, + constraints: None, + }, + ); + + GraphSchema::build(1, "test_db".to_string(), nodes, rels) + } + + fn create_single_relationship_schema() -> GraphSchema { + let mut nodes = HashMap::new(); + nodes.insert( + "Node".to_string(), + NodeSchema { + database: "test".to_string(), + table_name: "nodes".to_string(), + column_names: vec!["id".to_string()], + primary_keys: "id".to_string(), + node_id: NodeIdSchema::single("id".to_string(), "UInt64".to_string()), + property_mappings: HashMap::new(), + view_parameters: None, + engine: None, + use_final: None, + filter: None, + is_denormalized: false, + from_properties: None, + to_properties: None, + denormalized_source_table: None, + label_column: None, + label_value: None, + }, + ); + + let mut rels = HashMap::new(); + rels.insert( + "ONLY_REL".to_string(), + RelationshipSchema { + database: "test".to_string(), + table_name: "only_rel".to_string(), + column_names: vec!["from_id".to_string(), "to_id".to_string()], + from_node: "Node".to_string(), + to_node: "Node".to_string(), + from_node_table: "nodes".to_string(), + to_node_table: "nodes".to_string(), + from_id: "from_id".to_string(), + to_id: "to_id".to_string(), + from_node_id_dtype: "UInt64".to_string(), + to_node_id_dtype: "UInt64".to_string(), + property_mappings: HashMap::new(), + view_parameters: None, + engine: None, + use_final: None, + filter: None, + edge_id: None, + type_column: None, + from_label_column: None, + to_label_column: None, + from_label_values: None, + to_label_values: None, + from_node_properties: None, + to_node_properties: None, + is_fk_edge: false, + constraints: None, + }, + ); + + GraphSchema::build(1, "test_db".to_string(), nodes, rels) + } + + #[test] + fn test_infer_relationship_type_single_schema() { + let schema = create_single_relationship_schema(); + let plan_ctx = PlanCtx::new(Arc::new(schema.clone())); + + let result = infer_relationship_type_from_nodes( + &None, + &None, + &ast::Direction::Outgoing, + &schema, + &plan_ctx, + ) + .unwrap(); + + assert_eq!(result, Some(vec!["ONLY_REL".to_string()])); + } + + #[test] + fn test_infer_relationship_type_from_both_nodes() { + let schema = create_test_schema_with_relationships(); + let plan_ctx = PlanCtx::new(Arc::new(schema.clone())); + + // User -> Post should match LIKES + let result = infer_relationship_type_from_nodes( + &Some("User".to_string()), + &Some("Post".to_string()), + &ast::Direction::Outgoing, + &schema, + &plan_ctx, + ) + .unwrap(); + + assert_eq!(result, Some(vec!["LIKES".to_string()])); + } + + #[test] + fn test_infer_relationship_type_no_matches() { + let schema = create_test_schema_with_relationships(); + let plan_ctx = PlanCtx::new(Arc::new(schema.clone())); + + // Post -> User has no matching relationship + let result = infer_relationship_type_from_nodes( + &Some("Post".to_string()), + &Some("User".to_string()), + &ast::Direction::Outgoing, + &schema, + &plan_ctx, + ) + .unwrap(); + + assert_eq!(result, None); + } + + #[test] + fn test_infer_relationship_type_both_untyped_multi_schema() { + let schema = create_test_schema_with_relationships(); + let plan_ctx = PlanCtx::new(Arc::new(schema.clone())); + + // Both nodes untyped with multiple relationships - cannot infer + let result = infer_relationship_type_from_nodes( + &None, + &None, + &ast::Direction::Outgoing, + &schema, + &plan_ctx, + ) + .unwrap(); + + assert_eq!(result, None); + } +} diff --git a/src/query_planner/logical_plan/match_clause/view_scan.rs b/src/query_planner/logical_plan/match_clause/view_scan.rs new file mode 100644 index 00000000..48392da4 --- /dev/null +++ b/src/query_planner/logical_plan/match_clause/view_scan.rs @@ -0,0 +1,895 @@ +//! ViewScan generation for MATCH clause processing. +//! +//! This module handles the creation of ViewScan logical plan nodes for both +//! node patterns and relationship patterns. It encapsulates the complex logic +//! for: +//! - Regular (non-denormalized) node ViewScans +//! - Denormalized node ViewScans (nodes stored as columns in edge tables) +//! - Multi-table UNION ALL for nodes appearing in multiple tables +//! - Relationship ViewScans with polymorphic edge support +//! - Schema filter propagation +//! - Parameterized view support + +use std::collections::HashMap; +use std::sync::Arc; + +use crate::graph_catalog::expression_parser::PropertyValue; +use crate::query_planner::logical_plan::errors::LogicalPlanError; +use crate::query_planner::logical_plan::plan_builder::LogicalPlanResult; +use crate::query_planner::logical_plan::{LogicalPlan, Union, UnionType, ViewScan}; +use crate::query_planner::plan_ctx::PlanCtx; + +/// Try to generate a ViewScan for a node by looking up the label in the schema from plan_ctx. +/// +/// This function handles several complex cases: +/// 1. **Denormalized nodes in multiple tables**: Creates UNION ALL of ViewScans +/// 2. **Denormalized nodes with both positions**: Creates UNION ALL of FROM and TO branches +/// 3. **Multi-table labels**: Same label in different tables → UNION ALL +/// 4. **Standard nodes**: Single ViewScan from node table +/// +/// # Returns +/// - `Ok(Some(plan))` - Successfully created ViewScan or Union plan +/// - `Ok(None)` - Label not found in schema (caller should handle) +/// - `Err(...)` - Invalid schema configuration +pub fn try_generate_view_scan( + _alias: &str, + label: &str, + plan_ctx: &PlanCtx, +) -> Result>, LogicalPlanError> { + log::debug!("try_generate_view_scan: label='{}'", label); + + // Use plan_ctx.schema() instead of GLOBAL_SCHEMAS + let schema = plan_ctx.schema(); + + // Look up the node schema for this label + let node_schema = match schema.node_schema(label) { + Ok(s) => s, + Err(e) => { + log::warn!("Could not find node schema for label '{}': {:?}", label, e); + return Ok(None); + } + }; + + // DENORMALIZED NODE-ONLY QUERIES: + // For denormalized nodes (virtual nodes that exist as columns on edge tables), + // we need to generate queries from the edge table itself. + // + // For nodes that appear in MULTIPLE edge tables (like IP in dns_log and conn_log), + // we create a UNION ALL of all possible sources. + // + // For each relationship where this node appears: + // - If node is FROM → ViewScan with from_node_properties from that edge table + // - If node is TO → ViewScan with to_node_properties from that edge table + if node_schema.is_denormalized { + log::info!( + "✓ Denormalized node-only query for label '{}' - checking all tables", + label + ); + + // Check if this node appears in multiple relationships/tables + if let Some(metadata) = schema.get_denormalized_node_metadata(label) { + let rel_types = metadata.get_relationship_types(); + + if rel_types.len() > 1 || metadata.id_sources.values().any(|v| v.len() > 1) { + // MULTI-TABLE CASE: Node appears in multiple tables/positions + log::info!( + "✓ Denormalized node '{}' appears in {} relationship type(s) - creating multi-table UNION", + label, rel_types.len() + ); + + let mut union_inputs: Vec> = Vec::new(); + + for rel_type in &rel_types { + if let Ok(rel_schema) = schema.get_rel_schema(rel_type) { + let full_table_name = rel_schema.full_table_name(); + + // Check if this node is in FROM position + if rel_schema.from_node == label { + if let Some(ref from_props) = rel_schema.from_node_properties { + log::debug!( + "✓ Adding FROM branch for '{}' from table '{}' (rel: {})", + label, + full_table_name, + rel_type + ); + log::debug!( + "Adding FROM branch. union_inputs before push: len={}", + union_inputs.len() + ); + + // Populate property_mapping from from_props so full node expansion works + let property_mapping: HashMap = from_props + .iter() + .map(|(k, v)| (k.clone(), PropertyValue::Column(v.clone()))) + .collect(); + + // Get the actual ID column name from node_id property + let id_prop_name = node_schema + .node_id + .columns() + .first() + .map(|s| s.to_string()) + .unwrap_or_else(|| "id".to_string()); + let id_column = from_props + .get(&id_prop_name) + .cloned() + .unwrap_or_else(|| id_prop_name.clone()); + + log::info!( + "✓ FROM branch for '{}': id_prop='{}', id_column='{}', {} properties", + label, id_prop_name, id_column, property_mapping.len() + ); + + let mut from_scan = ViewScan::new( + full_table_name.clone(), + None, + property_mapping.clone(), + id_column, + vec![], + vec![], + ); + from_scan.is_denormalized = true; + from_scan.from_node_properties = Some(property_mapping); + log::debug!( + "FROM ViewScan properties: from={:?}, to={:?}", + from_scan + .from_node_properties + .as_ref() + .map(|p| p.keys().collect::>()), + from_scan + .to_node_properties + .as_ref() + .map(|p| p.keys().collect::>()) + ); + union_inputs + .push(Arc::new(LogicalPlan::ViewScan(Arc::new(from_scan)))); + log::debug!( + "Added FROM branch. union_inputs after push: len={}", + union_inputs.len() + ); + } + } + + // Check if this node is in TO position + if rel_schema.to_node == label { + log::debug!( + "Checking TO position. to_node='{}', label='{}', has to_node_properties: {}", + rel_schema.to_node, label, rel_schema.to_node_properties.is_some() + ); + if let Some(ref to_props) = rel_schema.to_node_properties { + log::debug!("TO props: {:?}", to_props.keys().collect::>()); + log::debug!( + "✓ Adding TO branch for '{}' from table '{}' (rel: {})", + label, + full_table_name, + rel_type + ); + log::debug!( + "Adding TO branch. union_inputs before push: len={}", + union_inputs.len() + ); + + // Populate property_mapping from to_props so full node expansion works + let property_mapping: HashMap = to_props + .iter() + .map(|(k, v)| (k.clone(), PropertyValue::Column(v.clone()))) + .collect(); + + // Get the actual ID column name from node_id property + let id_prop_name = node_schema + .node_id + .columns() + .first() + .map(|s| s.to_string()) + .unwrap_or_else(|| "id".to_string()); + let id_column = to_props + .get(&id_prop_name) + .cloned() + .unwrap_or_else(|| id_prop_name.clone()); + + log::info!( + "✓ TO branch for '{}': id_prop='{}', id_column='{}', {} properties", + label, id_prop_name, id_column, property_mapping.len() + ); + + let mut to_scan = ViewScan::new( + full_table_name.clone(), + None, + property_mapping.clone(), + id_column, + vec![], + vec![], + ); + to_scan.is_denormalized = true; + to_scan.to_node_properties = Some(property_mapping); + log::debug!( + "TO ViewScan properties: from={:?}, to={:?}", + to_scan + .from_node_properties + .as_ref() + .map(|p| p.keys().collect::>()), + to_scan + .to_node_properties + .as_ref() + .map(|p| p.keys().collect::>()) + ); + union_inputs + .push(Arc::new(LogicalPlan::ViewScan(Arc::new(to_scan)))); + log::debug!( + "Added TO branch. union_inputs after push: len={}", + union_inputs.len() + ); + } + } + } + } + + if union_inputs.is_empty() { + log::error!("No ViewScans generated for denormalized node '{}'", label); + return Ok(None); + } + + if union_inputs.len() == 1 { + log::info!( + "✓ Single ViewScan for denormalized node '{}' (only one source)", + label + ); + // Safe: we just checked that union_inputs.len() == 1 + if let Some(plan) = union_inputs.pop() { + return Ok(Some(plan)); + } + } + + let union = Union { + inputs: union_inputs, + union_type: UnionType::All, + }; + + log::info!( + "✓ Created UNION ALL with {} branches for denormalized node '{}'", + union.inputs.len(), + label + ); + return Ok(Some(Arc::new(LogicalPlan::Union(union)))); + } + } + + // SINGLE-TABLE CASE: Fall through to existing logic + let has_from_props = node_schema.from_properties.is_some(); + let has_to_props = node_schema.to_properties.is_some(); + let source_table = node_schema + .denormalized_source_table + .as_ref() + .ok_or_else(|| { + log::error!("Denormalized node '{}' missing source table", label); + LogicalPlanError::InvalidSchema { + label: label.to_string(), + reason: "Denormalized node missing source table".to_string(), + } + })?; + + log::debug!( + "Denormalized node '{}': has_from_props={}, has_to_props={}, source_table={}", + label, + has_from_props, + has_to_props, + source_table + ); + + // source_table is already fully qualified (database.table) from config.rs + let full_table_name = source_table.clone(); + + // Case 3: BOTH from and to properties → UNION ALL of two ViewScans + if has_from_props && has_to_props { + log::info!( + "✓✓✓ SINGLE-TABLE CASE: Denormalized node '{}' has BOTH positions - creating UNION ALL ✓✓✓", + label + ); + + // Create FROM position ViewScan + let mut from_scan = ViewScan::new( + full_table_name.clone(), + None, + HashMap::new(), + String::new(), + vec![], + vec![], + ); + from_scan.is_denormalized = true; + from_scan.from_node_properties = node_schema.from_properties.as_ref().map(|props| { + props + .iter() + .map(|(k, v)| (k.clone(), PropertyValue::Column(v.clone()))) + .collect() + }); + from_scan.schema_filter = node_schema.filter.clone(); + // Note: to_node_properties is None - this is the FROM branch + + // Create TO position ViewScan + let mut to_scan = ViewScan::new( + full_table_name, + None, + HashMap::new(), + String::new(), + vec![], + vec![], + ); + to_scan.is_denormalized = true; + to_scan.to_node_properties = node_schema.to_properties.as_ref().map(|props| { + props + .iter() + .map(|(k, v)| (k.clone(), PropertyValue::Column(v.clone()))) + .collect() + }); + to_scan.schema_filter = node_schema.filter.clone(); + // Note: from_node_properties is None - this is the TO branch + + // Create Union of the two ViewScans + let union = Union { + inputs: vec![ + Arc::new(LogicalPlan::ViewScan(Arc::new(from_scan))), + Arc::new(LogicalPlan::ViewScan(Arc::new(to_scan))), + ], + union_type: UnionType::All, + }; + + log::info!( + ">>>SINGLE-TABLE CASE: Created UNION with 2 branches for '{}' <<<", + label + ); + return Ok(Some(Arc::new(LogicalPlan::Union(union)))); + } + + // Case 1 or 2: Only one position - single ViewScan + let mut view_scan = ViewScan::new( + full_table_name, + None, + HashMap::new(), + String::new(), + vec![], + vec![], + ); + + view_scan.is_denormalized = true; + view_scan.from_node_properties = node_schema.from_properties.as_ref().map(|props| { + props + .iter() + .map(|(k, v)| (k.clone(), PropertyValue::Column(v.clone()))) + .collect() + }); + view_scan.to_node_properties = node_schema.to_properties.as_ref().map(|props| { + props + .iter() + .map(|(k, v)| (k.clone(), PropertyValue::Column(v.clone()))) + .collect() + }); + view_scan.schema_filter = node_schema.filter.clone(); + + log::info!( + "✓ Created denormalized ViewScan for '{}' (single position)", + label + ); + + return Ok(Some(Arc::new(LogicalPlan::ViewScan(Arc::new(view_scan))))); + } + + // MULTI_TABLE_LABEL CHECK: Non-denormalized nodes with same label in multiple tables + // This happens when the config has multiple node definitions with the same label but different tables + let all_schemas_for_label = schema.get_all_node_schemas_for_label(label); + if all_schemas_for_label.len() > 1 { + log::info!( + "✓ MULTI_TABLE_LABEL: Found '{}' in {} different tables - creating UNION ALL", + label, + all_schemas_for_label.len() + ); + + let mut union_inputs: Vec> = Vec::new(); + + for (_composite_key, other_schema) in all_schemas_for_label { + let full_table_name = format!("{}.{}", other_schema.database, other_schema.table_name); + let id_column = other_schema + .node_id + .columns() + .first() + .map(|s| s.to_string()) + .unwrap_or_else(|| "id".to_string()); + + let mut view_scan = ViewScan::new( + full_table_name, + None, + other_schema.property_mappings.clone(), + id_column, + vec![], + vec![], + ); + + view_scan.schema_filter = other_schema.filter.clone(); + log::debug!( + "Added ViewScan for '{}' from table '{}.{}'", + label, + other_schema.database, + other_schema.table_name + ); + + union_inputs.push(Arc::new(LogicalPlan::ViewScan(Arc::new(view_scan)))); + } + + if union_inputs.len() > 1 { + let union = Union { + inputs: union_inputs, + union_type: UnionType::All, + }; + + log::info!( + "✓ Created MULTI_TABLE_LABEL UNION with {} branches for '{}'", + union.inputs.len(), + label + ); + return Ok(Some(Arc::new(LogicalPlan::Union(union)))); + } + } + + // SINGLE-TABLE CASE OR NON-DENORMALIZED: Use standard ViewScan logic + log::info!( + "✓ ViewScan: Resolved label '{}' to table '{}'", + label, + node_schema.table_name + ); + + // Use property mapping from schema directly (already PropertyValue) + // For denormalized nodes, property_mappings is often empty because properties + // are stored in from_properties/to_properties. Merge them into property_mapping + // so that full node expansion (RETURN n) works correctly for MULTI_TABLE_LABEL schemas. + let mut property_mapping = node_schema.property_mappings.clone(); + + if node_schema.is_denormalized && property_mapping.is_empty() { + // Merge from_properties and to_properties into property_mapping + // This enables full node expansion to find the actual column names + if let Some(ref from_props) = node_schema.from_properties { + for (prop_name, col_name) in from_props.iter() { + property_mapping.insert(prop_name.clone(), PropertyValue::Column(col_name.clone())); + } + } + if let Some(ref to_props) = node_schema.to_properties { + for (prop_name, col_name) in to_props.iter() { + // Only add if not already present (from_properties takes precedence) + property_mapping + .entry(prop_name.clone()) + .or_insert_with(|| PropertyValue::Column(col_name.clone())); + } + } + + if !property_mapping.is_empty() { + log::info!( + "✓ Populated property_mapping for denormalized node '{}' with {} properties: {:?}", + label, + property_mapping.len(), + property_mapping.keys().collect::>() + ); + } + } + + // Create fully qualified table name (database.table) + let full_table_name = format!("{}.{}", node_schema.database, node_schema.table_name); + log::debug!("Using fully qualified table name: {}", full_table_name); + + // Get view parameter names from schema (if this is a parameterized view) + let view_parameter_names = node_schema.view_parameters.clone(); + + // Get view parameter values from PlanCtx (if provided) + let view_parameter_values = plan_ctx.view_parameter_values().cloned(); + + // Log parameter info + if let Some(ref param_names) = view_parameter_names { + log::debug!( + "ViewScan: Table '{}' expects parameters: {:?}", + node_schema.table_name, + param_names + ); + if let Some(ref param_values) = view_parameter_values { + log::debug!("ViewScan: Will use parameter values: {:?}", param_values); + } else { + log::warn!( + "ViewScan: Table '{}' is parameterized but no values provided!", + node_schema.table_name + ); + } + } + + // Create ViewScan with the actual table name from schema + // For denormalized nodes, node_id refers to the property name (e.g., "ip"), + // but we need the actual column name (e.g., "id.orig_h") for SQL generation. + // Look it up from from_properties/to_properties for denormalized schemas. + let id_column = if node_schema.is_denormalized { + // Get the node_id property name first + let id_prop_name = node_schema + .node_id + .columns() + .first() + .map(|s| s.to_string()) + .unwrap_or_else(|| "id".to_string()); + + // Look up the actual column name from from_properties or to_properties + let actual_column = node_schema + .from_properties + .as_ref() + .and_then(|props| props.get(&id_prop_name)) + .or_else(|| { + node_schema + .to_properties + .as_ref() + .and_then(|props| props.get(&id_prop_name)) + }) + .cloned() + .unwrap_or_else(|| { + log::warn!( + "Denormalized node '{}' ID property '{}' not found in from/to_properties, using as-is", + label, + id_prop_name + ); + id_prop_name.clone() + }); + + log::info!( + "✓ Resolved denormalized node '{}' ID column: '{}' (property) → '{}' (column)", + label, + id_prop_name, + actual_column + ); + actual_column + } else { + // For non-denormalized nodes, node_id IS the actual column name + node_schema + .node_id + .columns() + .first() + .map(|s| s.to_string()) + .ok_or_else(|| { + log::error!("Node schema for '{}' has no ID columns defined", label); + // Don't hardcode "id" - this causes bugs with auto_discover_columns + // where the actual column might be user_id, object_id, etc. + // This should never happen in valid schemas. + LogicalPlanError::InvalidSchema { + label: label.to_string(), + reason: "No ID columns defined in node schema".to_string(), + } + })? + }; + + let mut view_scan = ViewScan::new( + full_table_name, // Use fully qualified table name (database.table) + None, // No filter condition yet + property_mapping, // Property mappings from schema + id_column, // ID column from schema (first for composite) + vec!["id".to_string()], // Basic output schema + vec![], // No projections yet + ); + + // Set view parameters if this is a parameterized view + view_scan.view_parameter_names = view_parameter_names.clone(); + view_scan.view_parameter_values = view_parameter_values.clone(); + log::debug!( + "ViewScan created for '{}': param_names={:?}, param_values={:?}", + label, + view_parameter_names, + view_parameter_values + ); + + // Set denormalized flag and properties from schema + view_scan.is_denormalized = node_schema.is_denormalized; + + // Populate denormalized node properties (for role-based mapping) + if node_schema.is_denormalized { + // Convert from HashMap to HashMap + view_scan.from_node_properties = node_schema.from_properties.as_ref().map(|props| { + props + .iter() + .map(|(k, v)| (k.clone(), PropertyValue::Column(v.clone()))) + .collect() + }); + + view_scan.to_node_properties = node_schema.to_properties.as_ref().map(|props| { + props + .iter() + .map(|(k, v)| (k.clone(), PropertyValue::Column(v.clone()))) + .collect() + }); + + log::debug!( + "ViewScan: Populated denormalized properties for label '{}' - from_props={:?}, to_props={:?}", + label, + view_scan.from_node_properties.as_ref().map(|p| p.keys().collect::>()), + view_scan.to_node_properties.as_ref().map(|p| p.keys().collect::>()) + ); + } + + log::debug!( + "ViewScan: Set is_denormalized={} for node label '{}' (table: {})", + node_schema.is_denormalized, + label, + node_schema.table_name + ); + + // Set schema-level filter if defined in schema + view_scan.schema_filter = node_schema.filter.clone(); + if view_scan.schema_filter.is_some() { + log::info!( + "ViewScan: Applied schema filter for label '{}': {:?}", + label, + node_schema.filter.as_ref().map(|f| &f.raw) + ); + } + + Ok(Some(Arc::new(LogicalPlan::ViewScan(Arc::new(view_scan))))) +} + +/// Try to generate a ViewScan for a relationship by looking up the relationship type in the schema. +/// +/// This function handles: +/// - Single relationship type lookups with node context for disambiguation +/// - Property mapping propagation from schema +/// - Polymorphic edge field population (type_column, label columns) +/// - Denormalized node property propagation +/// - Schema filter application +/// - Parameterized view support +/// +/// # Arguments +/// - `_alias` - The relationship alias (currently unused, reserved for future use) +/// - `rel_type` - The relationship type name to look up +/// - `left_node_label` - Optional left node label for disambiguation +/// - `right_node_label` - Optional right node label for disambiguation +/// - `plan_ctx` - Planning context containing schema information +/// +/// # Returns +/// - `Some(plan)` - Successfully created relationship ViewScan +/// - `None` - Relationship type not found in schema +pub fn try_generate_relationship_view_scan( + _alias: &str, + rel_type: &str, + left_node_label: Option<&str>, + right_node_label: Option<&str>, + plan_ctx: &PlanCtx, +) -> Option> { + log::debug!( + "try_generate_relationship_view_scan: rel_type='{}', left_node_label={:?}, right_node_label={:?}", + rel_type, + left_node_label, + right_node_label + ); + + // Use plan_ctx.schema() instead of GLOBAL_SCHEMAS + let schema = plan_ctx.schema(); + + // Look up the relationship schema for this type, using node labels for disambiguation + let rel_schema = + match schema.get_rel_schema_with_nodes(rel_type, left_node_label, right_node_label) { + Ok(s) => s, + Err(e) => { + log::warn!( + "Could not find relationship schema for type '{}' with nodes ({:?}, {:?}): {:?}", + rel_type, + left_node_label, + right_node_label, + e + ); + return None; + } + }; + + // Log successful resolution + log::info!( + "✓ Relationship ViewScan: Resolved type '{}' to table '{}'", + rel_type, + rel_schema.table_name + ); + + // Copy property mappings from schema so relationships can be expanded in RETURN + let property_mapping = rel_schema.property_mappings.clone(); + log::debug!( + "Relationship ViewScan: property_mapping has {} entries", + property_mapping.len() + ); + + // Create fully qualified table name (database.table) + let full_table_name = format!("{}.{}", rel_schema.database, rel_schema.table_name); + log::debug!( + "Using fully qualified relationship table name: {}", + full_table_name + ); + + // Get view parameter names from schema (if this is a parameterized view) + let view_parameter_names = rel_schema.view_parameters.clone(); + + // Get view parameter values from PlanCtx (if provided) + let view_parameter_values = plan_ctx.view_parameter_values().cloned(); + + // Log parameter info + if let Some(ref param_names) = view_parameter_names { + log::debug!( + "Relationship ViewScan: Table '{}' expects parameters: {:?}", + rel_schema.table_name, + param_names + ); + if let Some(ref param_values) = view_parameter_values { + log::debug!( + "Relationship ViewScan: Will use parameter values: {:?}", + param_values + ); + } else { + log::warn!( + "Relationship ViewScan: Table '{}' is parameterized but no values provided!", + rel_schema.table_name + ); + } + } + + // Create ViewScan for relationship with from/to columns + let mut view_scan = ViewScan::new_relationship( + full_table_name, + None, + property_mapping, + rel_schema.from_id.clone(), + vec!["id".to_string()], + vec![], + rel_schema.from_id.clone(), + rel_schema.to_id.clone(), + ); + + // Set view parameters if this is a parameterized view + view_scan.view_parameter_names = view_parameter_names; + view_scan.view_parameter_values = view_parameter_values; + + // Populate polymorphic edge fields from schema + // Copy label columns even if type_column is None (fixed-endpoint pattern) + view_scan.type_column = rel_schema.type_column.clone(); + view_scan.from_label_column = rel_schema.from_label_column.clone(); + view_scan.to_label_column = rel_schema.to_label_column.clone(); + + if rel_schema.type_column.is_some() + || rel_schema.from_label_column.is_some() + || rel_schema.to_label_column.is_some() + { + log::debug!( + "ViewScan: Populated polymorphic fields for rel '{}' - type_column={:?}, from_label={:?}, to_label={:?}", + rel_type, + view_scan.type_column, + view_scan.from_label_column, + view_scan.to_label_column + ); + } + + // Set denormalized node properties from schema + // Convert HashMap to HashMap + view_scan.from_node_properties = rel_schema.from_node_properties.as_ref().map(|props| { + props + .iter() + .map(|(k, v)| (k.clone(), PropertyValue::Column(v.clone()))) + .collect() + }); + view_scan.to_node_properties = rel_schema.to_node_properties.as_ref().map(|props| { + props + .iter() + .map(|(k, v)| (k.clone(), PropertyValue::Column(v.clone()))) + .collect() + }); + + if view_scan.from_node_properties.is_some() || view_scan.to_node_properties.is_some() { + log::debug!( + "ViewScan: Set denormalized node properties for rel '{}' - from_props={:?}, to_props={:?}", + rel_type, + view_scan.from_node_properties.as_ref().map(|p| p.keys().collect::>()), + view_scan.to_node_properties.as_ref().map(|p| p.keys().collect::>()) + ); + } + + // Set schema-level filter if defined in schema + view_scan.schema_filter = rel_schema.filter.clone(); + if view_scan.schema_filter.is_some() { + log::info!( + "ViewScan: Applied schema filter for relationship '{}': {:?}", + rel_type, + rel_schema.filter.as_ref().map(|f| &f.raw) + ); + } + + Some(Arc::new(LogicalPlan::ViewScan(Arc::new(view_scan)))) +} + +/// Generate a relationship center (ViewScan if possible, otherwise Empty plan). +/// +/// This function is used internally during pattern processing to create the +/// logical plan node for a relationship. It handles: +/// - Single relationship types: Creates ViewScan via `try_generate_relationship_view_scan` +/// - Multiple relationship types (e.g., `[:TYPE1|TYPE2]`): Returns Empty plan +/// (actual UNION ALL CTE generation happens in render phase using GraphRel.labels) +/// - No specified type: Returns Empty plan (type inference will fill in later) +/// +/// # Arguments +/// - `rel_alias` - Alias for the relationship variable +/// - `rel_labels` - Optional list of relationship type names +/// - `_left_connection` - Left node connection (reserved) +/// - `_right_connection` - Right node connection (reserved) +/// - `left_node_label` - Optional left node label for disambiguation +/// - `right_node_label` - Optional right node label for disambiguation +/// - `plan_ctx` - Planning context +/// +/// # Returns +/// - `Ok(plan)` - ViewScan or Empty plan +/// - `Err(...)` - Relationship not found when single type specified +pub fn generate_relationship_center( + rel_alias: &str, + rel_labels: &Option>, + _left_connection: &str, + _right_connection: &str, + left_node_label: &Option, + right_node_label: &Option, + plan_ctx: &PlanCtx, +) -> LogicalPlanResult> { + log::debug!( + "Creating relationship center for alias '{}', labels: {:?}, left_node_label: {:?}, right_node_label: {:?}", + rel_alias, + rel_labels, + left_node_label, + right_node_label + ); + // Try to generate a ViewScan for the relationship if we have a single type + if let Some(labels) = rel_labels { + log::debug!("Relationship has {} labels: {:?}", labels.len(), labels); + + // Deduplicate labels - [:FOLLOWS|FOLLOWS] should be treated as single type + let unique_labels: Vec<_> = { + let mut seen = std::collections::HashSet::new(); + labels.iter().filter(|l| seen.insert(*l)).cloned().collect() + }; + log::debug!( + "After deduplication: {} unique labels: {:?}", + unique_labels.len(), + unique_labels + ); + + if unique_labels.len() == 1 { + log::debug!( + "Trying to create Relationship ViewScan for type '{}'", + unique_labels[0] + ); + if let Some(view_scan) = try_generate_relationship_view_scan( + rel_alias, + &unique_labels[0], + left_node_label.as_ref().map(|s| s.as_str()), + right_node_label.as_ref().map(|s| s.as_str()), + plan_ctx, + ) { + log::info!( + "✓ Successfully created Relationship ViewScan for type '{}'", + unique_labels[0] + ); + return Ok(view_scan); + } else { + // ViewScan creation failed - this is an error + return Err(LogicalPlanError::RelationshipNotFound( + unique_labels[0].clone(), + )); + } + } else { + log::debug!( + "Multiple relationship types ({}), using Empty plan (CTE uses GraphRel.labels)", + unique_labels.len() + ); + // For multiple relationships, use Empty plan + // The actual UNION ALL CTE generation happens in render phase using GraphRel.labels + // No need for "rel_*" placeholder - it was never actually looked up + return Ok(Arc::new(LogicalPlan::Empty)); + } + } else { + log::debug!("No relationship labels specified, using Empty plan"); + // For relationships without labels, use Empty + // Type inference pass will fill in the relationship type + return Ok(Arc::new(LogicalPlan::Empty)); + } +} + +#[cfg(test)] +mod tests { + // Tests would go here - currently these functions are tested via integration tests + // since they require a full GraphSchema setup +} diff --git a/src/query_planner/logical_plan/return_clause.rs b/src/query_planner/logical_plan/return_clause.rs index 56b339d4..171267e6 100644 --- a/src/query_planner/logical_plan/return_clause.rs +++ b/src/query_planner/logical_plan/return_clause.rs @@ -4,8 +4,8 @@ use crate::{ AggregateFnCall, ColumnAlias, LogicalExpr, PropertyAccess, TableAlias, }, query_planner::logical_plan::{ - optional_match_clause::evaluate_optional_match_clause, LogicalPlan, Projection, - ProjectionItem, Union, UnionType, + optional_match_clause::evaluate_optional_match_clause, LogicalPlan, LogicalPlanError, + Projection, ProjectionItem, Union, UnionType, }, query_planner::plan_ctx::PlanCtx, }; @@ -389,7 +389,10 @@ pub fn evaluate_return_clause<'a>( let projection_items: Vec = rewritten_return_items .iter() - .map(|item| item.clone().into()) + .map(|item| { + ProjectionItem::try_from(item.clone()) + .expect("Bug: Failed to convert RETURN expression to ProjectionItem") + }) .collect(); // If input is a Union, handle specially