diff --git a/datafusion/sqllogictest/README.md b/datafusion/sqllogictest/README.md index a389ae1ef60e..8c1708742c48 100644 --- a/datafusion/sqllogictest/README.md +++ b/datafusion/sqllogictest/README.md @@ -142,6 +142,17 @@ select substr('Andrew Lamb', 1, 6), '|' Andrew | ``` +## Cookbook: Ignoring volatile output + +Sometimes parts of a result change every run (timestamps, counters, etc). To keep the rest of the snapshot checked in, replace those fragments with the `` marker inside the expected block. During validation the marker acts like a wildcard, so only the surrounding text must match. + +```text +query TT +EXPLAIN ANALYZE SELECT * FROM generate_series(100); +---- +Plan with Metrics LazyMemoryExec: partitions=1, batch_generators=[generate_series: start=0, end=100, batch_size=8192], metrics=[output_rows=101, elapsed_compute=, output_bytes=] +``` + # Reference ## Running tests: Validation Mode diff --git a/datafusion/sqllogictest/src/util.rs b/datafusion/sqllogictest/src/util.rs index 2c3bd12d897d..3e129848f0cd 100644 --- a/datafusion/sqllogictest/src/util.rs +++ b/datafusion/sqllogictest/src/util.rs @@ -82,6 +82,10 @@ pub fn df_value_validator( actual: &[Vec], expected: &[String], ) -> bool { + // Support ignore marker to skip volatile parts of output. + const IGNORE_MARKER: &str = ""; + let contains_ignore_marker = expected.iter().any(|line| line.contains(IGNORE_MARKER)); + let normalized_expected = expected.iter().map(normalizer).collect::>(); let normalized_actual = actual .iter() @@ -89,6 +93,25 @@ pub fn df_value_validator( .map(|str| str.trim_end().to_string()) .collect_vec(); + // If ignore marker present, perform fragment-based matching on the full snapshot. + if contains_ignore_marker { + let expected_snapshot = normalized_expected.join("\n"); + let actual_snapshot = normalized_actual.join("\n"); + let fragments: Vec<&str> = expected_snapshot.split(IGNORE_MARKER).collect(); + let mut pos = 0; + for frag in fragments { + if frag.is_empty() { + continue; + } + if let Some(idx) = actual_snapshot[pos..].find(frag) { + pos += idx + frag.len(); + } else { + return false; + } + } + return true; + } + if log_enabled!(Warn) && normalized_actual != normalized_expected { warn!("df validation failed. actual vs expected:"); for i in 0..normalized_actual.len() { diff --git a/datafusion/sqllogictest/test_files/explain_analyze.slt b/datafusion/sqllogictest/test_files/explain_analyze.slt new file mode 100644 index 000000000000..b213cd9565c8 --- /dev/null +++ b/datafusion/sqllogictest/test_files/explain_analyze.slt @@ -0,0 +1,27 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +statement ok +set datafusion.explain.analyze_level = summary; + +query TT +EXPLAIN ANALYZE SELECT * FROM generate_series(100); +---- +Plan with Metrics LazyMemoryExec: partitions=1, batch_generators=[generate_series: start=0, end=100, batch_size=8192], metrics=[output_rows=101, elapsed_compute=, output_bytes=] + +statement ok +reset datafusion.explain.analyze_level;