From 13aa6868e6da344281d5fee037d17c8e594e772f Mon Sep 17 00:00:00 2001 From: Karuppayya Rajendran Date: Wed, 8 Oct 2025 14:46:55 -0700 Subject: [PATCH 1/2] Add ability to support Aplpha sketches --- .../resources/error/error-conditions.json | 6 ++ python/pyspark/sql/functions/builtin.py | 21 +++++- .../org/apache/spark/sql/functions.scala | 52 ++++++++++++++ .../aggregate/thetasketchesAggregates.scala | 72 +++++++++++++------ .../sql/catalyst/util/ThetaSketchUtils.scala | 30 +++++++- .../sql/errors/QueryExecutionErrors.scala | 9 +++ .../spark/sql/DataFrameAggregateSuite.scala | 22 +++++- 7 files changed, 182 insertions(+), 30 deletions(-) diff --git a/common/utils/src/main/resources/error/error-conditions.json b/common/utils/src/main/resources/error/error-conditions.json index 8f6687587f781..a88f2ee0c7a37 100644 --- a/common/utils/src/main/resources/error/error-conditions.json +++ b/common/utils/src/main/resources/error/error-conditions.json @@ -5605,6 +5605,12 @@ ], "sqlState" : "428EK" }, + "THETA_INVALID_FAMILY" : { + "message" : [ + "Invalid call to ; the `family` parameter must be one of: . Got: ." + ], + "sqlState" : "22546" + }, "THETA_INVALID_INPUT_SKETCH_BUFFER" : { "message" : [ "Invalid call to ; only valid Theta sketch buffers are supported as inputs (such as those produced by the `theta_sketch_agg` function)." diff --git a/python/pyspark/sql/functions/builtin.py b/python/pyspark/sql/functions/builtin.py index cf54fd23e8186..7b8dee019d127 100644 --- a/python/pyspark/sql/functions/builtin.py +++ b/python/pyspark/sql/functions/builtin.py @@ -25941,10 +25941,12 @@ def hll_union( def theta_sketch_agg( col: "ColumnOrName", lgNomEntries: Optional[Union[int, Column]] = None, + family: Optional[str] = None, ) -> Column: """ Aggregate function: returns the compact binary representation of the Datasketches - ThetaSketch with the values in the input column configured with lgNomEntries nominal entries. + ThetaSketch with the values in the input column configured with lgNomEntries nominal entries + and the specified sketch family. .. versionadded:: 4.1.0 @@ -25954,6 +25956,8 @@ def theta_sketch_agg( lgNomEntries : :class:`~pyspark.sql.Column` or int, optional The log-base-2 of nominal entries, where nominal entries is the size of the sketch (must be between 4 and 26, defaults to 12) + family : str, optional + The sketch family: 'QUICKSELECT' or 'ALPHA' (defaults to 'QUICKSELECT'). Returns ------- @@ -25986,12 +25990,23 @@ def theta_sketch_agg( +--------------------------------------------------+ | 3| +--------------------------------------------------+ + + >>> df.agg(sf.theta_sketch_estimate(sf.theta_sketch_agg("value", 15, "ALPHA"))).show() + +-------------------------------------------------------+ + |theta_sketch_estimate(theta_sketch_agg(value, 15, AL..| + +-------------------------------------------------------+ + | 3| + +-------------------------------------------------------+ """ fn = "theta_sketch_agg" - if lgNomEntries is None: + if lgNomEntries is None and family is None: return _invoke_function_over_columns(fn, col) - else: + elif family is None: return _invoke_function_over_columns(fn, col, lit(lgNomEntries)) + else: + if lgNomEntries is None: + lgNomEntries = 12 # default value + return _invoke_function_over_columns(fn, col, lit(lgNomEntries), lit(family)) @_try_remote_functions diff --git a/sql/api/src/main/scala/org/apache/spark/sql/functions.scala b/sql/api/src/main/scala/org/apache/spark/sql/functions.scala index 4983f98ea1763..7f0497dca5b0d 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/functions.scala @@ -1198,6 +1198,17 @@ object functions { def theta_sketch_agg(e: Column, lgNomEntries: Column): Column = Column.fn("theta_sketch_agg", e, lgNomEntries) + /** + * Aggregate function: returns the compact binary representation of the Datasketches ThetaSketch + * built with the values in the input column and configured with the `lgNomEntries` nominal + * entries and `family`. + * + * @group agg_funcs + * @since 4.1.0 + */ + def theta_sketch_agg(e: Column, lgNomEntries: Column, family: Column): Column = + Column.fn("theta_sketch_agg", e, lgNomEntries, family) + /** * Aggregate function: returns the compact binary representation of the Datasketches ThetaSketch * built with the values in the input column and configured with the `lgNomEntries` nominal @@ -1242,6 +1253,47 @@ object functions { def theta_sketch_agg(columnName: String): Column = theta_sketch_agg(Column(columnName)) + /** + * Aggregate function: returns the compact binary representation of the Datasketches ThetaSketch + * built with the values in the input column, configured with `lgNomEntries` and `family`. + * + * @group agg_funcs + * @since 4.1.0 + */ + def theta_sketch_agg(e: Column, lgNomEntries: Int, family: String): Column = + Column.fn("theta_sketch_agg", e, lit(lgNomEntries), lit(family)) + + /** + * Aggregate function: returns the compact binary representation of the Datasketches ThetaSketch + * built with the values in the input column, configured with `lgNomEntries` and `family`. + * + * @group agg_funcs + * @since 4.1.0 + */ + def theta_sketch_agg(columnName: String, lgNomEntries: Int, family: String): Column = + theta_sketch_agg(Column(columnName), lgNomEntries, family) + + /** + * Aggregate function: returns the compact binary representation of the Datasketches ThetaSketch + * built with the values in the input column, configured with the specified `family` and default + * lgNomEntries. + * + * @group agg_funcs + * @since 4.1.0 + */ + def theta_sketch_agg(e: Column, family: String): Column = + theta_sketch_agg(e, 12, family) + + /** + * Aggregate function: returns the compact binary representation of the Datasketches ThetaSketch + * built with the values in the input column, configured with specified `family`. + * + * @group agg_funcs + * @since 4.1.0 + */ + def theta_sketch_agg(columnName: String, family: String): Column = + theta_sketch_agg(columnName, 12, family) + /** * Aggregate function: returns the compact binary representation of the Datasketches * ThetaSketch, generated by the union of Datasketches ThetaSketch instances in the input column diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/thetasketchesAggregates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/thetasketchesAggregates.scala index 7e55c006782cf..b9149d78f4746 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/thetasketchesAggregates.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/thetasketchesAggregates.scala @@ -17,13 +17,13 @@ package org.apache.spark.sql.catalyst.expressions.aggregate +import org.apache.datasketches.common.Family import org.apache.datasketches.memory.Memory import org.apache.datasketches.theta.{CompactSketch, Intersection, SetOperation, Sketch, Union, UpdateSketch, UpdateSketchBuilder} import org.apache.spark.SparkUnsupportedOperationException import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.{ExpectsInputTypes, Expression, ExpressionDescription, Literal} -import org.apache.spark.sql.catalyst.expressions.aggregate.TypedImperativeAggregate import org.apache.spark.sql.catalyst.trees.{BinaryLike, UnaryLike} import org.apache.spark.sql.catalyst.util.{ArrayData, CollationFactory, ThetaSketchUtils} import org.apache.spark.sql.errors.QueryExecutionErrors @@ -59,10 +59,12 @@ case class FinalizedSketch(sketch: CompactSketch) extends ThetaSketchState { * * See [[https://datasketches.apache.org/docs/Theta/ThetaSketches.html]] for more information. * - * @param left + * @param child * child expression against which unique counting will occur - * @param right + * @param lgNomEntriesExpr * the log-base-2 of nomEntries decides the number of buckets for the sketch + * @param familyExpr + * the family of the sketch (QUICKSELECT or ALPHA) * @param mutableAggBufferOffset * offset for mutable aggregation buffer * @param inputAggBufferOffset @@ -71,46 +73,66 @@ case class FinalizedSketch(sketch: CompactSketch) extends ThetaSketchState { // scalastyle:off line.size.limit @ExpressionDescription( usage = """ - _FUNC_(expr, lgNomEntries) - Returns the ThetaSketch compact binary representation. + _FUNC_(expr, lgNomEntries, family) - Returns the ThetaSketch compact binary representation. `lgNomEntries` (optional) is the log-base-2 of nominal entries, with nominal entries deciding - the number buckets or slots for the ThetaSketch. """, + the number buckets or slots for the ThetaSketch. + `family` (optional) is the sketch family, either 'QUICKSELECT' or 'ALPHA' (defaults to 'QUICKSELECT'). + Note: You can pass family as the second parameter to use default lgNomEntries with a specific family.""", examples = """ Examples: + > SELECT theta_sketch_estimate(_FUNC_(col)) FROM VALUES (1), (1), (2), (2), (3) tab(col); + 3 > SELECT theta_sketch_estimate(_FUNC_(col, 12)) FROM VALUES (1), (1), (2), (2), (3) tab(col); 3 + > SELECT theta_sketch_estimate(_FUNC_(col, 'ALPHA')) FROM VALUES (1), (1), (2), (2), (3) tab(col); + 3 + > SELECT theta_sketch_estimate(_FUNC_(col, 15, 'ALPHA')) FROM VALUES (1), (1), (2), (2), (3) tab(col); + 3 """, group = "agg_funcs", since = "4.1.0") // scalastyle:on line.size.limit case class ThetaSketchAgg( - left: Expression, - right: Expression, + child: Expression, + lgNomEntriesExpr: Expression, + familyExpr: Expression, override val mutableAggBufferOffset: Int, override val inputAggBufferOffset: Int) extends TypedImperativeAggregate[ThetaSketchState] - with BinaryLike[Expression] with ExpectsInputTypes { // ThetaSketch config - mark as lazy so that they're not evaluated during tree transformation. - lazy val lgNomEntries: Int = { - val lgNomEntriesInput = right.eval().asInstanceOf[Int] + private lazy val lgNomEntries: Int = { + val lgNomEntriesInput = lgNomEntriesExpr.eval().asInstanceOf[Int] ThetaSketchUtils.checkLgNomLongs(lgNomEntriesInput, prettyName) lgNomEntriesInput } - // Constructors + private lazy val family: Family = + ThetaSketchUtils.parseFamily(familyExpr.eval().asInstanceOf[UTF8String].toString, prettyName) + // Constructors def this(child: Expression) = { - this(child, Literal(ThetaSketchUtils.DEFAULT_LG_NOM_LONGS), 0, 0) + this(child, + Literal(ThetaSketchUtils.DEFAULT_LG_NOM_LONGS), + Literal(UTF8String.fromString(ThetaSketchUtils.DEFAULT_FAMILY)), + 0, 0) } def this(child: Expression, lgNomEntries: Expression) = { - this(child, lgNomEntries, 0, 0) + this(child, + lgNomEntries, + Literal(UTF8String.fromString(ThetaSketchUtils.DEFAULT_FAMILY)), + 0, 0) + } + + def this(child: Expression, lgNomEntries: Expression, family: Expression) = { + this(child, lgNomEntries, family, 0, 0) } def this(child: Expression, lgNomEntries: Int) = { - this(child, Literal(lgNomEntries), 0, 0) + this(child, Literal(lgNomEntries)) } // Copy constructors required by ImperativeAggregate @@ -122,15 +144,16 @@ case class ThetaSketchAgg( copy(inputAggBufferOffset = newInputAggBufferOffset) override protected def withNewChildrenInternal( - newLeft: Expression, - newRight: Expression): ThetaSketchAgg = - copy(left = newLeft, right = newRight) + newChildren: IndexedSeq[Expression]): ThetaSketchAgg = + copy(child = newChildren(0), lgNomEntriesExpr = newChildren(1), familyExpr = newChildren(2)) + + override def children: Seq[Expression] = Seq(child, lgNomEntriesExpr, familyExpr) // Overrides for TypedImperativeAggregate override def prettyName: String = "theta_sketch_agg" - override def inputTypes: Seq[AbstractDataType] = + override def inputTypes: Seq[AbstractDataType] = { Seq( TypeCollection( ArrayType(IntegerType), @@ -141,14 +164,16 @@ case class ThetaSketchAgg( IntegerType, LongType, StringTypeWithCollation(supportsTrimCollation = true)), - IntegerType) + IntegerType, + StringType) + } override def dataType: DataType = BinaryType override def nullable: Boolean = false /** - * Instantiate an UpdateSketch instance using the lgNomEntries param. + * Instantiate an UpdateSketch instance using the lgNomEntries and family params. * * @return * an UpdateSketch instance wrapped with UpdatableSketchBuffer @@ -156,6 +181,7 @@ case class ThetaSketchAgg( override def createAggregationBuffer(): ThetaSketchState = { val builder = new UpdateSketchBuilder builder.setLogNominalEntries(lgNomEntries) + builder.setFamily(family) UpdatableSketchBuffer(builder.build) } @@ -176,7 +202,7 @@ case class ThetaSketchAgg( */ override def update(updateBuffer: ThetaSketchState, input: InternalRow): ThetaSketchState = { // Return early for null values. - val v = left.eval(input) + val v = child.eval(input) if (v == null) return updateBuffer // Initialized buffer should be UpdatableSketchBuffer, else error out. @@ -186,7 +212,7 @@ case class ThetaSketchAgg( } // Handle the different data types for sketch updates. - left.dataType match { + child.dataType match { case ArrayType(IntegerType, _) => val arr = v.asInstanceOf[ArrayData].toIntArray() sketch.update(arr) @@ -213,7 +239,7 @@ case class ThetaSketchAgg( case _ => throw new SparkUnsupportedOperationException( errorClass = "_LEGACY_ERROR_TEMP_3121", - messageParameters = Map("dataType" -> left.dataType.toString)) + messageParameters = Map("dataType" -> child.dataType.toString)) } UpdatableSketchBuffer(sketch) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ThetaSketchUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ThetaSketchUtils.scala index f9a651b5662db..db26671d59783 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ThetaSketchUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ThetaSketchUtils.scala @@ -17,12 +17,15 @@ package org.apache.spark.sql.catalyst.util -import org.apache.datasketches.common.SketchesArgumentException +import java.util.Locale + +import org.apache.datasketches.common.{Family, SketchesArgumentException} import org.apache.datasketches.memory.{Memory, MemoryBoundsException} import org.apache.datasketches.theta.CompactSketch import org.apache.spark.sql.errors.QueryExecutionErrors + object ThetaSketchUtils { /* * Bounds copied from DataSketches' ThetaUtil. These define the valid range for lgNomEntries, @@ -36,6 +39,11 @@ object ThetaSketchUtils { final val MAX_LG_NOM_LONGS = 26 final val DEFAULT_LG_NOM_LONGS = 12 + // Family constants for ThetaSketch + final val FAMILY_QUICKSELECT = "QUICKSELECT" + final val FAMILY_ALPHA = "ALPHA" + final val DEFAULT_FAMILY = FAMILY_QUICKSELECT + /** * Validates the lgNomLongs parameter for Theta sketch size. Throws a Spark SQL exception if the * value is out of bounds. @@ -53,6 +61,26 @@ object ThetaSketchUtils { } } + /** + * Converts a family string to DataSketches Family enum. + * Throws a Spark SQL exception if the family name is invalid. + * + * @param familyName The family name string + * @param prettyName The display name of the function/expression for error messages + * @return The corresponding DataSketches Family enum value + */ + def parseFamily(familyName: String, prettyName: String): Family = { + familyName.toUpperCase(Locale.ROOT) match { + case FAMILY_QUICKSELECT => Family.QUICKSELECT + case FAMILY_ALPHA => Family.ALPHA + case _ => + throw QueryExecutionErrors.thetaInvalidFamily( + function = prettyName, + value = familyName, + validFamilies = Seq(FAMILY_QUICKSELECT, FAMILY_ALPHA)) + } + } + /** * Wraps a byte array into a DataSketches CompactSketch object. * This method safely deserializes a compact Theta sketch from its binary representation, diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index a6c5bbf91eb0b..9c6984b43761a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -3136,4 +3136,13 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE "max" -> toSQLValue(max, IntegerType), "value" -> toSQLValue(value, IntegerType))) } + + def thetaInvalidFamily(function: String, value: String, validFamilies: Seq[String]): Throwable = { + new SparkRuntimeException( + errorClass = "THETA_INVALID_FAMILY", + messageParameters = Map( + "function" -> toSQLId(function), + "value" -> toSQLValue(value, StringType), + "validFamilies" -> validFamilies.map(f => toSQLId(f)).mkString(", "))) + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala index 37614145fe83f..66c64ad5f9fa7 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala @@ -2426,7 +2426,7 @@ class DataFrameAggregateSuite extends QueryTest val res = sql(""" |select | id, - | theta_sketch_agg(value, 'text') + | theta_sketch_agg(value, 'text', 'ALPHA') |from | df1 |group by 1 @@ -2435,13 +2435,29 @@ class DataFrameAggregateSuite extends QueryTest }, condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE", parameters = Map( - "sqlExpr" -> "\"theta_sketch_agg(value, text)\"", + "sqlExpr" -> "\"theta_sketch_agg(value, text, ALPHA)\"", "paramIndex" -> "second", "inputSql" -> "\"text\"", "inputType" -> "\"STRING\"", "requiredType" -> "\"INT\""), context = - ExpectedContext(fragment = "theta_sketch_agg(value, 'text')", start = 14, stop = 44)) + ExpectedContext(fragment = "theta_sketch_agg(value, 'text', 'ALPHA')", + start = 14, stop = 53)) + + // Test invalid family names + checkError( + exception = intercept[SparkRuntimeException] { + df1.groupBy("id") + .agg(theta_sketch_agg("value", 12, "INVALID_FAMILY").as("sketch")) + .collect() + }, + condition = "THETA_INVALID_FAMILY", + parameters = Map( + "function" -> "`theta_sketch_agg`", + "value" -> "'INVALID_FAMILY'", + "validFamilies" -> "`QUICKSELECT`, `ALPHA`" + ) + ) checkError( exception = intercept[AnalysisException] { From 47ef17e162d8cccb13d4a2a3a6405bb02735406e Mon Sep 17 00:00:00 2001 From: Karuppayya Rajendran Date: Thu, 9 Oct 2025 16:22:37 -0700 Subject: [PATCH 2/2] Fix test failures --- .../pyspark/sql/connect/functions/builtin.py | 9 +- python/pyspark/sql/functions/builtin.py | 40 ++-- .../aggregate/thetasketchesAggregates.scala | 6 +- .../sql-functions/sql-expression-schema.md | 10 +- .../analyzer-results/thetasketch.sql.out | 198 +++++++++--------- .../sql-tests/results/thetasketch.sql.out | 94 ++++----- 6 files changed, 180 insertions(+), 177 deletions(-) diff --git a/python/pyspark/sql/connect/functions/builtin.py b/python/pyspark/sql/connect/functions/builtin.py index 71865816b49a9..0851d13f6b090 100644 --- a/python/pyspark/sql/connect/functions/builtin.py +++ b/python/pyspark/sql/connect/functions/builtin.py @@ -4337,12 +4337,17 @@ def hll_union( def theta_sketch_agg( col: "ColumnOrName", lgNomEntries: Optional[Union[int, Column]] = None, + family: Optional[str] = None, ) -> Column: fn = "theta_sketch_agg" - if lgNomEntries is None: + if lgNomEntries is None and family is None: return _invoke_function_over_columns(fn, col) - else: + elif family is None: return _invoke_function_over_columns(fn, col, lit(lgNomEntries)) + else: + if lgNomEntries is None: + lgNomEntries = 12 # default value + return _invoke_function_over_columns(fn, col, lit(lgNomEntries), lit(family)) theta_sketch_agg.__doc__ = pysparkfuncs.theta_sketch_agg.__doc__ diff --git a/python/pyspark/sql/functions/builtin.py b/python/pyspark/sql/functions/builtin.py index 7b8dee019d127..9b8b326a1ff69 100644 --- a/python/pyspark/sql/functions/builtin.py +++ b/python/pyspark/sql/functions/builtin.py @@ -25978,25 +25978,25 @@ def theta_sketch_agg( >>> from pyspark.sql import functions as sf >>> df = spark.createDataFrame([1,2,2,3], "INT") >>> df.agg(sf.theta_sketch_estimate(sf.theta_sketch_agg("value"))).show() - +--------------------------------------------------+ - |theta_sketch_estimate(theta_sketch_agg(value, 12))| - +--------------------------------------------------+ - | 3| - +--------------------------------------------------+ + +---------------------------------------------------------------+ + |theta_sketch_estimate(theta_sketch_agg(value, 12, QUICKSELECT))| + +---------------------------------------------------------------+ + | 3| + +---------------------------------------------------------------+ >>> df.agg(sf.theta_sketch_estimate(sf.theta_sketch_agg("value", 15))).show() - +--------------------------------------------------+ - |theta_sketch_estimate(theta_sketch_agg(value, 15))| - +--------------------------------------------------+ - | 3| - +--------------------------------------------------+ + +---------------------------------------------------------------+ + |theta_sketch_estimate(theta_sketch_agg(value, 15, QUICKSELECT))| + +---------------------------------------------------------------+ + | 3| + +---------------------------------------------------------------+ >>> df.agg(sf.theta_sketch_estimate(sf.theta_sketch_agg("value", 15, "ALPHA"))).show() - +-------------------------------------------------------+ - |theta_sketch_estimate(theta_sketch_agg(value, 15, AL..| - +-------------------------------------------------------+ - | 3| - +-------------------------------------------------------+ + +---------------------------------------------------------+ + |theta_sketch_estimate(theta_sketch_agg(value, 15, ALPHA))| + +---------------------------------------------------------+ + | 3| + +---------------------------------------------------------+ """ fn = "theta_sketch_agg" if lgNomEntries is None and family is None: @@ -26133,11 +26133,11 @@ def theta_sketch_estimate(col: "ColumnOrName") -> Column: >>> from pyspark.sql import functions as sf >>> df = spark.createDataFrame([1,2,2,3], "INT") >>> df.agg(sf.theta_sketch_estimate(sf.theta_sketch_agg("value"))).show() - +--------------------------------------------------+ - |theta_sketch_estimate(theta_sketch_agg(value, 12))| - +--------------------------------------------------+ - | 3| - +--------------------------------------------------+ + +---------------------------------------------------------------+ + |theta_sketch_estimate(theta_sketch_agg(value, 12, QUICKSELECT))| + +---------------------------------------------------------------+ + | 3| + +---------------------------------------------------------------+ """ fn = "theta_sketch_estimate" diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/thetasketchesAggregates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/thetasketchesAggregates.scala index b9149d78f4746..ef6212ee8733a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/thetasketchesAggregates.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/thetasketchesAggregates.scala @@ -76,16 +76,14 @@ case class FinalizedSketch(sketch: CompactSketch) extends ThetaSketchState { _FUNC_(expr, lgNomEntries, family) - Returns the ThetaSketch compact binary representation. `lgNomEntries` (optional) is the log-base-2 of nominal entries, with nominal entries deciding the number buckets or slots for the ThetaSketch. - `family` (optional) is the sketch family, either 'QUICKSELECT' or 'ALPHA' (defaults to 'QUICKSELECT'). - Note: You can pass family as the second parameter to use default lgNomEntries with a specific family.""", + `family` (optional) is the sketch family, either 'QUICKSELECT' or 'ALPHA' (defaults to + 'QUICKSELECT').""", examples = """ Examples: > SELECT theta_sketch_estimate(_FUNC_(col)) FROM VALUES (1), (1), (2), (2), (3) tab(col); 3 > SELECT theta_sketch_estimate(_FUNC_(col, 12)) FROM VALUES (1), (1), (2), (2), (3) tab(col); 3 - > SELECT theta_sketch_estimate(_FUNC_(col, 'ALPHA')) FROM VALUES (1), (1), (2), (2), (3) tab(col); - 3 > SELECT theta_sketch_estimate(_FUNC_(col, 15, 'ALPHA')) FROM VALUES (1), (1), (2), (2), (3) tab(col); 3 """, diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md index 7e2783a67228e..42d677c2738ea 100644 --- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md +++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md @@ -342,10 +342,10 @@ | org.apache.spark.sql.catalyst.expressions.Subtract | - | SELECT 2 - 1 | struct<(2 - 1):int> | | org.apache.spark.sql.catalyst.expressions.Tan | tan | SELECT tan(0) | struct | | org.apache.spark.sql.catalyst.expressions.Tanh | tanh | SELECT tanh(0) | struct | -| org.apache.spark.sql.catalyst.expressions.ThetaDifference | theta_difference | SELECT theta_sketch_estimate(theta_difference(theta_sketch_agg(col1), theta_sketch_agg(col2))) FROM VALUES (5, 4), (1, 4), (2, 5), (2, 5), (3, 1) tab(col1, col2) | struct | -| org.apache.spark.sql.catalyst.expressions.ThetaIntersection | theta_intersection | SELECT theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1), theta_sketch_agg(col2))) FROM VALUES (5, 4), (1, 4), (2, 5), (2, 5), (3, 1) tab(col1, col2) | struct | -| org.apache.spark.sql.catalyst.expressions.ThetaSketchEstimate | theta_sketch_estimate | SELECT theta_sketch_estimate(theta_sketch_agg(col)) FROM VALUES (1), (1), (2), (2), (3) tab(col) | struct | -| org.apache.spark.sql.catalyst.expressions.ThetaUnion | theta_union | SELECT theta_sketch_estimate(theta_union(theta_sketch_agg(col1), theta_sketch_agg(col2))) FROM VALUES (1, 4), (1, 4), (2, 5), (2, 5), (3, 6) tab(col1, col2) | struct | +| org.apache.spark.sql.catalyst.expressions.ThetaDifference | theta_difference | SELECT theta_sketch_estimate(theta_difference(theta_sketch_agg(col1), theta_sketch_agg(col2))) FROM VALUES (5, 4), (1, 4), (2, 5), (2, 5), (3, 1) tab(col1, col2) | struct | +| org.apache.spark.sql.catalyst.expressions.ThetaIntersection | theta_intersection | SELECT theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1), theta_sketch_agg(col2))) FROM VALUES (5, 4), (1, 4), (2, 5), (2, 5), (3, 1) tab(col1, col2) | struct | +| org.apache.spark.sql.catalyst.expressions.ThetaSketchEstimate | theta_sketch_estimate | SELECT theta_sketch_estimate(theta_sketch_agg(col)) FROM VALUES (1), (1), (2), (2), (3) tab(col) | struct | +| org.apache.spark.sql.catalyst.expressions.ThetaUnion | theta_union | SELECT theta_sketch_estimate(theta_union(theta_sketch_agg(col1), theta_sketch_agg(col2))) FROM VALUES (1, 4), (1, 4), (2, 5), (2, 5), (3, 6) tab(col1, col2) | struct | | org.apache.spark.sql.catalyst.expressions.TimeDiff | time_diff | SELECT time_diff('HOUR', TIME'20:30:29', TIME'21:30:28') | struct | | org.apache.spark.sql.catalyst.expressions.TimeTrunc | time_trunc | SELECT time_trunc('HOUR', TIME'09:32:05.359') | struct | | org.apache.spark.sql.catalyst.expressions.TimeWindow | window | SELECT a, window.start, window.end, count(*) as cnt FROM VALUES ('A1', '2021-01-01 00:00:00'), ('A1', '2021-01-01 00:04:30'), ('A1', '2021-01-01 00:06:00'), ('A2', '2021-01-01 00:01:00') AS tab(a, b) GROUP by a, window(b, '5 minutes') ORDER BY a, start | struct | @@ -468,7 +468,7 @@ | org.apache.spark.sql.catalyst.expressions.aggregate.StddevSamp | stddev_samp | SELECT stddev_samp(col) FROM VALUES (1), (2), (3) AS tab(col) | struct | | org.apache.spark.sql.catalyst.expressions.aggregate.Sum | sum | SELECT sum(col) FROM VALUES (5), (10), (15) AS tab(col) | struct | | org.apache.spark.sql.catalyst.expressions.aggregate.ThetaIntersectionAgg | theta_intersection_agg | SELECT theta_sketch_estimate(theta_intersection_agg(sketch)) FROM (SELECT theta_sketch_agg(col) as sketch FROM VALUES (1) tab(col) UNION ALL SELECT theta_sketch_agg(col, 20) as sketch FROM VALUES (1) tab(col)) | struct | -| org.apache.spark.sql.catalyst.expressions.aggregate.ThetaSketchAgg | theta_sketch_agg | SELECT theta_sketch_estimate(theta_sketch_agg(col, 12)) FROM VALUES (1), (1), (2), (2), (3) tab(col) | struct | +| org.apache.spark.sql.catalyst.expressions.aggregate.ThetaSketchAgg | theta_sketch_agg | SELECT theta_sketch_estimate(theta_sketch_agg(col)) FROM VALUES (1), (1), (2), (2), (3) tab(col) | struct | | org.apache.spark.sql.catalyst.expressions.aggregate.ThetaUnionAgg | theta_union_agg | SELECT theta_sketch_estimate(theta_union_agg(sketch)) FROM (SELECT theta_sketch_agg(col) as sketch FROM VALUES (1) tab(col) UNION ALL SELECT theta_sketch_agg(col, 20) as sketch FROM VALUES (1) tab(col)) | struct | | org.apache.spark.sql.catalyst.expressions.aggregate.TryAverageExpressionBuilder | try_avg | SELECT try_avg(col) FROM VALUES (1), (2), (3) AS tab(col) | struct | | org.apache.spark.sql.catalyst.expressions.aggregate.TrySumExpressionBuilder | try_sum | SELECT try_sum(col) FROM VALUES (5), (10), (15) AS tab(col) | struct | diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/thetasketch.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/thetasketch.sql.out index 323084223d4bc..72e03d826efc4 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/thetasketch.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/thetasketch.sql.out @@ -167,7 +167,7 @@ CreateDataSourceTableAsSelectCommand `spark_catalog`.`default`.`t_string_collati -- !query SELECT theta_sketch_estimate(theta_sketch_agg(col1)) AS result FROM t_int_1_5_through_7_11 -- !query analysis -Aggregate [theta_sketch_estimate(theta_sketch_agg(col1#x, 12, 0, 0)) AS result#xL] +Aggregate [theta_sketch_estimate(theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0)) AS result#xL] +- SubqueryAlias spark_catalog.default.t_int_1_5_through_7_11 +- Relation spark_catalog.default.t_int_1_5_through_7_11[col1#x,col2#x] parquet @@ -175,7 +175,7 @@ Aggregate [theta_sketch_estimate(theta_sketch_agg(col1#x, 12, 0, 0)) AS result#x -- !query SELECT theta_sketch_estimate(theta_sketch_agg(col1)) FROM t_array_int_1_3_through_4_6 -- !query analysis -Aggregate [theta_sketch_estimate(theta_sketch_agg(col1#x, 12, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col1, 12))#xL] +Aggregate [theta_sketch_estimate(theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col1, 12, QUICKSELECT))#xL] +- SubqueryAlias spark_catalog.default.t_array_int_1_3_through_4_6 +- Relation spark_catalog.default.t_array_int_1_3_through_4_6[col1#x,col2#x] parquet @@ -183,7 +183,7 @@ Aggregate [theta_sketch_estimate(theta_sketch_agg(col1#x, 12, 0, 0)) AS theta_sk -- !query SELECT theta_sketch_estimate(theta_sketch_agg(col2)) FROM t_array_long_1_3_through_4_6 -- !query analysis -Aggregate [theta_sketch_estimate(theta_sketch_agg(col2#x, 12, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col2, 12))#xL] +Aggregate [theta_sketch_estimate(theta_sketch_agg(col2#x, 12, QUICKSELECT, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col2, 12, QUICKSELECT))#xL] +- SubqueryAlias spark_catalog.default.t_array_long_1_3_through_4_6 +- Relation spark_catalog.default.t_array_long_1_3_through_4_6[col1#x,col2#x] parquet @@ -191,7 +191,7 @@ Aggregate [theta_sketch_estimate(theta_sketch_agg(col2#x, 12, 0, 0)) AS theta_sk -- !query SELECT theta_sketch_estimate(theta_sketch_agg(col1)) FROM t_binary_a_b_through_e_f -- !query analysis -Aggregate [theta_sketch_estimate(theta_sketch_agg(col1#x, 12, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col1, 12))#xL] +Aggregate [theta_sketch_estimate(theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col1, 12, QUICKSELECT))#xL] +- SubqueryAlias spark_catalog.default.t_binary_a_b_through_e_f +- Relation spark_catalog.default.t_binary_a_b_through_e_f[col1#x,col2#x] parquet @@ -199,7 +199,7 @@ Aggregate [theta_sketch_estimate(theta_sketch_agg(col1#x, 12, 0, 0)) AS theta_sk -- !query SELECT theta_sketch_estimate(theta_sketch_agg(col1)) FROM t_double_1_1_1_4_through_1_5_1_8 -- !query analysis -Aggregate [theta_sketch_estimate(theta_sketch_agg(col1#x, 12, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col1, 12))#xL] +Aggregate [theta_sketch_estimate(theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col1, 12, QUICKSELECT))#xL] +- SubqueryAlias spark_catalog.default.t_double_1_1_1_4_through_1_5_1_8 +- Relation spark_catalog.default.t_double_1_1_1_4_through_1_5_1_8[col1#x,col2#x] parquet @@ -207,7 +207,7 @@ Aggregate [theta_sketch_estimate(theta_sketch_agg(col1#x, 12, 0, 0)) AS theta_sk -- !query SELECT theta_sketch_estimate(theta_sketch_agg(col2)) FROM t_float_1_1_1_4_through_1_5_1_8 -- !query analysis -Aggregate [theta_sketch_estimate(theta_sketch_agg(col2#x, 12, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col2, 12))#xL] +Aggregate [theta_sketch_estimate(theta_sketch_agg(col2#x, 12, QUICKSELECT, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col2, 12, QUICKSELECT))#xL] +- SubqueryAlias spark_catalog.default.t_float_1_1_1_4_through_1_5_1_8 +- Relation spark_catalog.default.t_float_1_1_1_4_through_1_5_1_8[col1#x,col2#x] parquet @@ -215,7 +215,7 @@ Aggregate [theta_sketch_estimate(theta_sketch_agg(col2#x, 12, 0, 0)) AS theta_sk -- !query SELECT theta_sketch_estimate(theta_sketch_agg(col1, 22)) FROM t_int_1_5_through_7_11 -- !query analysis -Aggregate [theta_sketch_estimate(theta_sketch_agg(col1#x, 22, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col1, 22))#xL] +Aggregate [theta_sketch_estimate(theta_sketch_agg(col1#x, 22, QUICKSELECT, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col1, 22, QUICKSELECT))#xL] +- SubqueryAlias spark_catalog.default.t_int_1_5_through_7_11 +- Relation spark_catalog.default.t_int_1_5_through_7_11[col1#x,col2#x] parquet @@ -223,7 +223,7 @@ Aggregate [theta_sketch_estimate(theta_sketch_agg(col1#x, 22, 0, 0)) AS theta_sk -- !query SELECT theta_sketch_estimate(theta_sketch_agg(col1)) FROM t_long_1_5_through_7_11 -- !query analysis -Aggregate [theta_sketch_estimate(theta_sketch_agg(col1#xL, 12, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col1, 12))#xL] +Aggregate [theta_sketch_estimate(theta_sketch_agg(col1#xL, 12, QUICKSELECT, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col1, 12, QUICKSELECT))#xL] +- SubqueryAlias spark_catalog.default.t_long_1_5_through_7_11 +- Relation spark_catalog.default.t_long_1_5_through_7_11[col1#xL,col2#xL] parquet @@ -231,7 +231,7 @@ Aggregate [theta_sketch_estimate(theta_sketch_agg(col1#xL, 12, 0, 0)) AS theta_s -- !query SELECT theta_sketch_estimate(theta_sketch_agg(col1)) FROM t_string_a_d_through_e_h -- !query analysis -Aggregate [theta_sketch_estimate(theta_sketch_agg(col1#x, 12, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col1, 12))#xL] +Aggregate [theta_sketch_estimate(theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col1, 12, QUICKSELECT))#xL] +- SubqueryAlias spark_catalog.default.t_string_a_d_through_e_h +- Relation spark_catalog.default.t_string_a_d_through_e_h[col1#x,col2#x] parquet @@ -242,7 +242,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2))) FROM t_int_1_5_through_7_11 -- !query analysis -Aggregate [theta_sketch_estimate(theta_union(theta_sketch_agg(col1#x, 12, 0, 0), theta_sketch_agg(col2#x, 12, 0, 0), 12)) AS theta_sketch_estimate(theta_union(theta_sketch_agg(col1, 12), theta_sketch_agg(col2, 12), 12))#xL] +Aggregate [theta_sketch_estimate(theta_union(theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0), theta_sketch_agg(col2#x, 12, QUICKSELECT, 0, 0), 12)) AS theta_sketch_estimate(theta_union(theta_sketch_agg(col1, 12, QUICKSELECT), theta_sketch_agg(col2, 12, QUICKSELECT), 12))#xL] +- SubqueryAlias spark_catalog.default.t_int_1_5_through_7_11 +- Relation spark_catalog.default.t_int_1_5_through_7_11[col1#x,col2#x] parquet @@ -253,7 +253,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1, 15), theta_sketch_agg(col2))) FROM t_long_1_5_through_7_11 -- !query analysis -Aggregate [theta_sketch_estimate(theta_union(theta_sketch_agg(col1#xL, 15, 0, 0), theta_sketch_agg(col2#xL, 12, 0, 0), 12)) AS theta_sketch_estimate(theta_union(theta_sketch_agg(col1, 15), theta_sketch_agg(col2, 12), 12))#xL] +Aggregate [theta_sketch_estimate(theta_union(theta_sketch_agg(col1#xL, 15, QUICKSELECT, 0, 0), theta_sketch_agg(col2#xL, 12, QUICKSELECT, 0, 0), 12)) AS theta_sketch_estimate(theta_union(theta_sketch_agg(col1, 15, QUICKSELECT), theta_sketch_agg(col2, 12, QUICKSELECT), 12))#xL] +- SubqueryAlias spark_catalog.default.t_long_1_5_through_7_11 +- Relation spark_catalog.default.t_long_1_5_through_7_11[col1#xL,col2#xL] parquet @@ -264,7 +264,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2))) FROM t_double_1_1_1_4_through_1_5_1_8 -- !query analysis -Aggregate [theta_sketch_estimate(theta_union(theta_sketch_agg(col1#x, 12, 0, 0), theta_sketch_agg(col2#x, 12, 0, 0), 12)) AS theta_sketch_estimate(theta_union(theta_sketch_agg(col1, 12), theta_sketch_agg(col2, 12), 12))#xL] +Aggregate [theta_sketch_estimate(theta_union(theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0), theta_sketch_agg(col2#x, 12, QUICKSELECT, 0, 0), 12)) AS theta_sketch_estimate(theta_union(theta_sketch_agg(col1, 12, QUICKSELECT), theta_sketch_agg(col2, 12, QUICKSELECT), 12))#xL] +- SubqueryAlias spark_catalog.default.t_double_1_1_1_4_through_1_5_1_8 +- Relation spark_catalog.default.t_double_1_1_1_4_through_1_5_1_8[col1#x,col2#x] parquet @@ -275,7 +275,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1, 6), theta_sketch_agg(col2, 15), 15)) FROM t_float_1_1_1_4_through_1_5_1_8 -- !query analysis -Aggregate [theta_sketch_estimate(theta_union(theta_sketch_agg(col1#x, 6, 0, 0), theta_sketch_agg(col2#x, 15, 0, 0), 15)) AS theta_sketch_estimate(theta_union(theta_sketch_agg(col1, 6), theta_sketch_agg(col2, 15), 15))#xL] +Aggregate [theta_sketch_estimate(theta_union(theta_sketch_agg(col1#x, 6, QUICKSELECT, 0, 0), theta_sketch_agg(col2#x, 15, QUICKSELECT, 0, 0), 15)) AS theta_sketch_estimate(theta_union(theta_sketch_agg(col1, 6, QUICKSELECT), theta_sketch_agg(col2, 15, QUICKSELECT), 15))#xL] +- SubqueryAlias spark_catalog.default.t_float_1_1_1_4_through_1_5_1_8 +- Relation spark_catalog.default.t_float_1_1_1_4_through_1_5_1_8[col1#x,col2#x] parquet @@ -286,7 +286,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2))) FROM t_string_a_d_through_e_h -- !query analysis -Aggregate [theta_sketch_estimate(theta_union(theta_sketch_agg(col1#x, 12, 0, 0), theta_sketch_agg(col2#x, 12, 0, 0), 12)) AS theta_sketch_estimate(theta_union(theta_sketch_agg(col1, 12), theta_sketch_agg(col2, 12), 12))#xL] +Aggregate [theta_sketch_estimate(theta_union(theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0), theta_sketch_agg(col2#x, 12, QUICKSELECT, 0, 0), 12)) AS theta_sketch_estimate(theta_union(theta_sketch_agg(col1, 12, QUICKSELECT), theta_sketch_agg(col2, 12, QUICKSELECT), 12))#xL] +- SubqueryAlias spark_catalog.default.t_string_a_d_through_e_h +- Relation spark_catalog.default.t_string_a_d_through_e_h[col1#x,col2#x] parquet @@ -297,7 +297,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2), 20)) FROM t_binary_a_b_through_e_f -- !query analysis -Aggregate [theta_sketch_estimate(theta_union(theta_sketch_agg(col1#x, 12, 0, 0), theta_sketch_agg(col2#x, 12, 0, 0), 20)) AS theta_sketch_estimate(theta_union(theta_sketch_agg(col1, 12), theta_sketch_agg(col2, 12), 20))#xL] +Aggregate [theta_sketch_estimate(theta_union(theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0), theta_sketch_agg(col2#x, 12, QUICKSELECT, 0, 0), 20)) AS theta_sketch_estimate(theta_union(theta_sketch_agg(col1, 12, QUICKSELECT), theta_sketch_agg(col2, 12, QUICKSELECT), 20))#xL] +- SubqueryAlias spark_catalog.default.t_binary_a_b_through_e_f +- Relation spark_catalog.default.t_binary_a_b_through_e_f[col1#x,col2#x] parquet @@ -308,7 +308,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2))) FROM t_array_int_1_3_through_4_6 -- !query analysis -Aggregate [theta_sketch_estimate(theta_union(theta_sketch_agg(col1#x, 12, 0, 0), theta_sketch_agg(col2#x, 12, 0, 0), 12)) AS theta_sketch_estimate(theta_union(theta_sketch_agg(col1, 12), theta_sketch_agg(col2, 12), 12))#xL] +Aggregate [theta_sketch_estimate(theta_union(theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0), theta_sketch_agg(col2#x, 12, QUICKSELECT, 0, 0), 12)) AS theta_sketch_estimate(theta_union(theta_sketch_agg(col1, 12, QUICKSELECT), theta_sketch_agg(col2, 12, QUICKSELECT), 12))#xL] +- SubqueryAlias spark_catalog.default.t_array_int_1_3_through_4_6 +- Relation spark_catalog.default.t_array_int_1_3_through_4_6[col1#x,col2#x] parquet @@ -319,7 +319,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2, 13))) FROM t_array_long_1_3_through_4_6 -- !query analysis -Aggregate [theta_sketch_estimate(theta_union(theta_sketch_agg(col1#x, 12, 0, 0), theta_sketch_agg(col2#x, 13, 0, 0), 12)) AS theta_sketch_estimate(theta_union(theta_sketch_agg(col1, 12), theta_sketch_agg(col2, 13), 12))#xL] +Aggregate [theta_sketch_estimate(theta_union(theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0), theta_sketch_agg(col2#x, 13, QUICKSELECT, 0, 0), 12)) AS theta_sketch_estimate(theta_union(theta_sketch_agg(col1, 12, QUICKSELECT), theta_sketch_agg(col2, 13, QUICKSELECT), 12))#xL] +- SubqueryAlias spark_catalog.default.t_array_long_1_3_through_4_6 +- Relation spark_catalog.default.t_array_long_1_3_through_4_6[col1#x,col2#x] parquet @@ -330,7 +330,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2))) FROM t_int_1_5_through_7_11 -- !query analysis -Aggregate [theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1#x, 12, 0, 0), theta_sketch_agg(col2#x, 12, 0, 0))) AS theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1, 12), theta_sketch_agg(col2, 12)))#xL] +Aggregate [theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0), theta_sketch_agg(col2#x, 12, QUICKSELECT, 0, 0))) AS theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1, 12, QUICKSELECT), theta_sketch_agg(col2, 12, QUICKSELECT)))#xL] +- SubqueryAlias spark_catalog.default.t_int_1_5_through_7_11 +- Relation spark_catalog.default.t_int_1_5_through_7_11[col1#x,col2#x] parquet @@ -341,7 +341,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1, 5), theta_sketch_agg(col2, 12))) FROM t_long_1_5_through_7_11 -- !query analysis -Aggregate [theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1#xL, 5, 0, 0), theta_sketch_agg(col2#xL, 12, 0, 0))) AS theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1, 5), theta_sketch_agg(col2, 12)))#xL] +Aggregate [theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1#xL, 5, QUICKSELECT, 0, 0), theta_sketch_agg(col2#xL, 12, QUICKSELECT, 0, 0))) AS theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1, 5, QUICKSELECT), theta_sketch_agg(col2, 12, QUICKSELECT)))#xL] +- SubqueryAlias spark_catalog.default.t_long_1_5_through_7_11 +- Relation spark_catalog.default.t_long_1_5_through_7_11[col1#xL,col2#xL] parquet @@ -352,7 +352,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2))) FROM t_double_1_1_1_4_through_1_5_1_8 -- !query analysis -Aggregate [theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1#x, 12, 0, 0), theta_sketch_agg(col2#x, 12, 0, 0))) AS theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1, 12), theta_sketch_agg(col2, 12)))#xL] +Aggregate [theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0), theta_sketch_agg(col2#x, 12, QUICKSELECT, 0, 0))) AS theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1, 12, QUICKSELECT), theta_sketch_agg(col2, 12, QUICKSELECT)))#xL] +- SubqueryAlias spark_catalog.default.t_double_1_1_1_4_through_1_5_1_8 +- Relation spark_catalog.default.t_double_1_1_1_4_through_1_5_1_8[col1#x,col2#x] parquet @@ -363,7 +363,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1, 5), theta_sketch_agg(col2))) FROM t_float_1_1_1_4_through_1_5_1_8 -- !query analysis -Aggregate [theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1#x, 5, 0, 0), theta_sketch_agg(col2#x, 12, 0, 0))) AS theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1, 5), theta_sketch_agg(col2, 12)))#xL] +Aggregate [theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1#x, 5, QUICKSELECT, 0, 0), theta_sketch_agg(col2#x, 12, QUICKSELECT, 0, 0))) AS theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1, 5, QUICKSELECT), theta_sketch_agg(col2, 12, QUICKSELECT)))#xL] +- SubqueryAlias spark_catalog.default.t_float_1_1_1_4_through_1_5_1_8 +- Relation spark_catalog.default.t_float_1_1_1_4_through_1_5_1_8[col1#x,col2#x] parquet @@ -374,7 +374,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2))) FROM t_string_a_d_through_e_h -- !query analysis -Aggregate [theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1#x, 12, 0, 0), theta_sketch_agg(col2#x, 12, 0, 0))) AS theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1, 12), theta_sketch_agg(col2, 12)))#xL] +Aggregate [theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0), theta_sketch_agg(col2#x, 12, QUICKSELECT, 0, 0))) AS theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1, 12, QUICKSELECT), theta_sketch_agg(col2, 12, QUICKSELECT)))#xL] +- SubqueryAlias spark_catalog.default.t_string_a_d_through_e_h +- Relation spark_catalog.default.t_string_a_d_through_e_h[col1#x,col2#x] parquet @@ -385,7 +385,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2, 22))) FROM t_binary_a_b_through_e_f -- !query analysis -Aggregate [theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1#x, 12, 0, 0), theta_sketch_agg(col2#x, 22, 0, 0))) AS theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1, 12), theta_sketch_agg(col2, 22)))#xL] +Aggregate [theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0), theta_sketch_agg(col2#x, 22, QUICKSELECT, 0, 0))) AS theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1, 12, QUICKSELECT), theta_sketch_agg(col2, 22, QUICKSELECT)))#xL] +- SubqueryAlias spark_catalog.default.t_binary_a_b_through_e_f +- Relation spark_catalog.default.t_binary_a_b_through_e_f[col1#x,col2#x] parquet @@ -396,7 +396,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2))) FROM t_array_int_1_3_through_4_6 -- !query analysis -Aggregate [theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1#x, 12, 0, 0), theta_sketch_agg(col2#x, 12, 0, 0))) AS theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1, 12), theta_sketch_agg(col2, 12)))#xL] +Aggregate [theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0), theta_sketch_agg(col2#x, 12, QUICKSELECT, 0, 0))) AS theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1, 12, QUICKSELECT), theta_sketch_agg(col2, 12, QUICKSELECT)))#xL] +- SubqueryAlias spark_catalog.default.t_array_int_1_3_through_4_6 +- Relation spark_catalog.default.t_array_int_1_3_through_4_6[col1#x,col2#x] parquet @@ -407,7 +407,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2, 10))) FROM t_array_long_1_3_through_4_6 -- !query analysis -Aggregate [theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1#x, 12, 0, 0), theta_sketch_agg(col2#x, 10, 0, 0))) AS theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1, 12), theta_sketch_agg(col2, 10)))#xL] +Aggregate [theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0), theta_sketch_agg(col2#x, 10, QUICKSELECT, 0, 0))) AS theta_sketch_estimate(theta_intersection(theta_sketch_agg(col1, 12, QUICKSELECT), theta_sketch_agg(col2, 10, QUICKSELECT)))#xL] +- SubqueryAlias spark_catalog.default.t_array_long_1_3_through_4_6 +- Relation spark_catalog.default.t_array_long_1_3_through_4_6[col1#x,col2#x] parquet @@ -418,7 +418,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2))) FROM t_int_1_5_through_7_11 -- !query analysis -Aggregate [theta_sketch_estimate(theta_difference(theta_sketch_agg(col1#x, 12, 0, 0), theta_sketch_agg(col2#x, 12, 0, 0))) AS theta_sketch_estimate(theta_difference(theta_sketch_agg(col1, 12), theta_sketch_agg(col2, 12)))#xL] +Aggregate [theta_sketch_estimate(theta_difference(theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0), theta_sketch_agg(col2#x, 12, QUICKSELECT, 0, 0))) AS theta_sketch_estimate(theta_difference(theta_sketch_agg(col1, 12, QUICKSELECT), theta_sketch_agg(col2, 12, QUICKSELECT)))#xL] +- SubqueryAlias spark_catalog.default.t_int_1_5_through_7_11 +- Relation spark_catalog.default.t_int_1_5_through_7_11[col1#x,col2#x] parquet @@ -429,7 +429,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2, 5))) FROM t_long_1_5_through_7_11 -- !query analysis -Aggregate [theta_sketch_estimate(theta_difference(theta_sketch_agg(col1#xL, 12, 0, 0), theta_sketch_agg(col2#xL, 5, 0, 0))) AS theta_sketch_estimate(theta_difference(theta_sketch_agg(col1, 12), theta_sketch_agg(col2, 5)))#xL] +Aggregate [theta_sketch_estimate(theta_difference(theta_sketch_agg(col1#xL, 12, QUICKSELECT, 0, 0), theta_sketch_agg(col2#xL, 5, QUICKSELECT, 0, 0))) AS theta_sketch_estimate(theta_difference(theta_sketch_agg(col1, 12, QUICKSELECT), theta_sketch_agg(col2, 5, QUICKSELECT)))#xL] +- SubqueryAlias spark_catalog.default.t_long_1_5_through_7_11 +- Relation spark_catalog.default.t_long_1_5_through_7_11[col1#xL,col2#xL] parquet @@ -440,7 +440,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2))) FROM t_double_1_1_1_4_through_1_5_1_8 -- !query analysis -Aggregate [theta_sketch_estimate(theta_difference(theta_sketch_agg(col1#x, 12, 0, 0), theta_sketch_agg(col2#x, 12, 0, 0))) AS theta_sketch_estimate(theta_difference(theta_sketch_agg(col1, 12), theta_sketch_agg(col2, 12)))#xL] +Aggregate [theta_sketch_estimate(theta_difference(theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0), theta_sketch_agg(col2#x, 12, QUICKSELECT, 0, 0))) AS theta_sketch_estimate(theta_difference(theta_sketch_agg(col1, 12, QUICKSELECT), theta_sketch_agg(col2, 12, QUICKSELECT)))#xL] +- SubqueryAlias spark_catalog.default.t_double_1_1_1_4_through_1_5_1_8 +- Relation spark_catalog.default.t_double_1_1_1_4_through_1_5_1_8[col1#x,col2#x] parquet @@ -451,7 +451,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1, 12), theta_sketch_agg(col2))) FROM t_float_1_1_1_4_through_1_5_1_8 -- !query analysis -Aggregate [theta_sketch_estimate(theta_difference(theta_sketch_agg(col1#x, 12, 0, 0), theta_sketch_agg(col2#x, 12, 0, 0))) AS theta_sketch_estimate(theta_difference(theta_sketch_agg(col1, 12), theta_sketch_agg(col2, 12)))#xL] +Aggregate [theta_sketch_estimate(theta_difference(theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0), theta_sketch_agg(col2#x, 12, QUICKSELECT, 0, 0))) AS theta_sketch_estimate(theta_difference(theta_sketch_agg(col1, 12, QUICKSELECT), theta_sketch_agg(col2, 12, QUICKSELECT)))#xL] +- SubqueryAlias spark_catalog.default.t_float_1_1_1_4_through_1_5_1_8 +- Relation spark_catalog.default.t_float_1_1_1_4_through_1_5_1_8[col1#x,col2#x] parquet @@ -462,7 +462,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2))) FROM t_string_a_d_through_e_h -- !query analysis -Aggregate [theta_sketch_estimate(theta_difference(theta_sketch_agg(col1#x, 12, 0, 0), theta_sketch_agg(col2#x, 12, 0, 0))) AS theta_sketch_estimate(theta_difference(theta_sketch_agg(col1, 12), theta_sketch_agg(col2, 12)))#xL] +Aggregate [theta_sketch_estimate(theta_difference(theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0), theta_sketch_agg(col2#x, 12, QUICKSELECT, 0, 0))) AS theta_sketch_estimate(theta_difference(theta_sketch_agg(col1, 12, QUICKSELECT), theta_sketch_agg(col2, 12, QUICKSELECT)))#xL] +- SubqueryAlias spark_catalog.default.t_string_a_d_through_e_h +- Relation spark_catalog.default.t_string_a_d_through_e_h[col1#x,col2#x] parquet @@ -473,7 +473,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1, 6), theta_sketch_agg(col2, 8))) FROM t_binary_a_b_through_e_f -- !query analysis -Aggregate [theta_sketch_estimate(theta_difference(theta_sketch_agg(col1#x, 6, 0, 0), theta_sketch_agg(col2#x, 8, 0, 0))) AS theta_sketch_estimate(theta_difference(theta_sketch_agg(col1, 6), theta_sketch_agg(col2, 8)))#xL] +Aggregate [theta_sketch_estimate(theta_difference(theta_sketch_agg(col1#x, 6, QUICKSELECT, 0, 0), theta_sketch_agg(col2#x, 8, QUICKSELECT, 0, 0))) AS theta_sketch_estimate(theta_difference(theta_sketch_agg(col1, 6, QUICKSELECT), theta_sketch_agg(col2, 8, QUICKSELECT)))#xL] +- SubqueryAlias spark_catalog.default.t_binary_a_b_through_e_f +- Relation spark_catalog.default.t_binary_a_b_through_e_f[col1#x,col2#x] parquet @@ -484,7 +484,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2))) FROM t_array_int_1_3_through_4_6 -- !query analysis -Aggregate [theta_sketch_estimate(theta_difference(theta_sketch_agg(col1#x, 12, 0, 0), theta_sketch_agg(col2#x, 12, 0, 0))) AS theta_sketch_estimate(theta_difference(theta_sketch_agg(col1, 12), theta_sketch_agg(col2, 12)))#xL] +Aggregate [theta_sketch_estimate(theta_difference(theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0), theta_sketch_agg(col2#x, 12, QUICKSELECT, 0, 0))) AS theta_sketch_estimate(theta_difference(theta_sketch_agg(col1, 12, QUICKSELECT), theta_sketch_agg(col2, 12, QUICKSELECT)))#xL] +- SubqueryAlias spark_catalog.default.t_array_int_1_3_through_4_6 +- Relation spark_catalog.default.t_array_int_1_3_through_4_6[col1#x,col2#x] parquet @@ -495,7 +495,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2, 4))) FROM t_array_long_1_3_through_4_6 -- !query analysis -Aggregate [theta_sketch_estimate(theta_difference(theta_sketch_agg(col1#x, 12, 0, 0), theta_sketch_agg(col2#x, 4, 0, 0))) AS theta_sketch_estimate(theta_difference(theta_sketch_agg(col1, 12), theta_sketch_agg(col2, 4)))#xL] +Aggregate [theta_sketch_estimate(theta_difference(theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0), theta_sketch_agg(col2#x, 4, QUICKSELECT, 0, 0))) AS theta_sketch_estimate(theta_difference(theta_sketch_agg(col1, 12, QUICKSELECT), theta_sketch_agg(col2, 4, QUICKSELECT)))#xL] +- SubqueryAlias spark_catalog.default.t_array_long_1_3_through_4_6 +- Relation spark_catalog.default.t_array_long_1_3_through_4_6[col1#x,col2#x] parquet @@ -509,10 +509,10 @@ SELECT theta_sketch_estimate(theta_union_agg(sketch, 15)) Aggregate [theta_sketch_estimate(theta_union_agg(sketch#x, 15, 0, 0)) AS theta_sketch_estimate(theta_union_agg(sketch, 15))#xL] +- SubqueryAlias __auto_generated_subquery_name +- Union false, false - :- Aggregate [theta_sketch_agg(col1#x, 12, 0, 0) AS sketch#x] + :- Aggregate [theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0) AS sketch#x] : +- SubqueryAlias spark_catalog.default.t_int_1_5_through_7_11 : +- Relation spark_catalog.default.t_int_1_5_through_7_11[col1#x,col2#x] parquet - +- Aggregate [theta_sketch_agg(col2#x, 20, 0, 0) AS sketch#x] + +- Aggregate [theta_sketch_agg(col2#x, 20, QUICKSELECT, 0, 0) AS sketch#x] +- SubqueryAlias spark_catalog.default.t_int_1_5_through_7_11 +- Relation spark_catalog.default.t_int_1_5_through_7_11[col1#x,col2#x] parquet @@ -526,10 +526,10 @@ SELECT theta_sketch_estimate(theta_union_agg(sketch, 12)) Aggregate [theta_sketch_estimate(theta_union_agg(sketch#x, 12, 0, 0)) AS theta_sketch_estimate(theta_union_agg(sketch, 12))#xL] +- SubqueryAlias __auto_generated_subquery_name +- Union false, false - :- Aggregate [theta_sketch_agg(col1#x, 12, 0, 0) AS sketch#x] + :- Aggregate [theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0) AS sketch#x] : +- SubqueryAlias spark_catalog.default.t_double_1_1_1_4_through_1_5_1_8 : +- Relation spark_catalog.default.t_double_1_1_1_4_through_1_5_1_8[col1#x,col2#x] parquet - +- Aggregate [theta_sketch_agg(col2#x, 12, 0, 0) AS sketch#x] + +- Aggregate [theta_sketch_agg(col2#x, 12, QUICKSELECT, 0, 0) AS sketch#x] +- SubqueryAlias spark_catalog.default.t_double_1_1_1_4_through_1_5_1_8 +- Relation spark_catalog.default.t_double_1_1_1_4_through_1_5_1_8[col1#x,col2#x] parquet @@ -543,10 +543,10 @@ SELECT theta_sketch_estimate(theta_union_agg(sketch, 14)) Aggregate [theta_sketch_estimate(theta_union_agg(sketch#x, 14, 0, 0)) AS theta_sketch_estimate(theta_union_agg(sketch, 14))#xL] +- SubqueryAlias __auto_generated_subquery_name +- Union false, false - :- Aggregate [theta_sketch_agg(col1#x, 12, 0, 0) AS sketch#x] + :- Aggregate [theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0) AS sketch#x] : +- SubqueryAlias spark_catalog.default.t_string_a_d_through_e_h : +- Relation spark_catalog.default.t_string_a_d_through_e_h[col1#x,col2#x] parquet - +- Aggregate [theta_sketch_agg(col2#x, 12, 0, 0) AS sketch#x] + +- Aggregate [theta_sketch_agg(col2#x, 12, QUICKSELECT, 0, 0) AS sketch#x] +- SubqueryAlias spark_catalog.default.t_string_a_d_through_e_h +- Relation spark_catalog.default.t_string_a_d_through_e_h[col1#x,col2#x] parquet @@ -560,10 +560,10 @@ SELECT theta_sketch_estimate(theta_union_agg(sketch, 10)) Aggregate [theta_sketch_estimate(theta_union_agg(sketch#x, 10, 0, 0)) AS theta_sketch_estimate(theta_union_agg(sketch, 10))#xL] +- SubqueryAlias __auto_generated_subquery_name +- Union false, false - :- Aggregate [theta_sketch_agg(col1#xL, 12, 0, 0) AS sketch#x] + :- Aggregate [theta_sketch_agg(col1#xL, 12, QUICKSELECT, 0, 0) AS sketch#x] : +- SubqueryAlias spark_catalog.default.t_long_1_5_through_7_11 : +- Relation spark_catalog.default.t_long_1_5_through_7_11[col1#xL,col2#xL] parquet - +- Aggregate [theta_sketch_agg(col2#xL, 12, 0, 0) AS sketch#x] + +- Aggregate [theta_sketch_agg(col2#xL, 12, QUICKSELECT, 0, 0) AS sketch#x] +- SubqueryAlias spark_catalog.default.t_long_1_5_through_7_11 +- Relation spark_catalog.default.t_long_1_5_through_7_11[col1#xL,col2#xL] parquet @@ -577,10 +577,10 @@ SELECT theta_sketch_estimate(theta_union_agg(sketch, 6)) Aggregate [theta_sketch_estimate(theta_union_agg(sketch#x, 6, 0, 0)) AS theta_sketch_estimate(theta_union_agg(sketch, 6))#xL] +- SubqueryAlias __auto_generated_subquery_name +- Union false, false - :- Aggregate [theta_sketch_agg(col1#x, 12, 0, 0) AS sketch#x] + :- Aggregate [theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0) AS sketch#x] : +- SubqueryAlias spark_catalog.default.t_float_1_1_1_4_through_1_5_1_8 : +- Relation spark_catalog.default.t_float_1_1_1_4_through_1_5_1_8[col1#x,col2#x] parquet - +- Aggregate [theta_sketch_agg(col2#x, 12, 0, 0) AS sketch#x] + +- Aggregate [theta_sketch_agg(col2#x, 12, QUICKSELECT, 0, 0) AS sketch#x] +- SubqueryAlias spark_catalog.default.t_float_1_1_1_4_through_1_5_1_8 +- Relation spark_catalog.default.t_float_1_1_1_4_through_1_5_1_8[col1#x,col2#x] parquet @@ -594,10 +594,10 @@ SELECT theta_sketch_estimate(theta_union_agg(sketch)) Aggregate [theta_sketch_estimate(theta_union_agg(sketch#x, 12, 0, 0)) AS theta_sketch_estimate(theta_union_agg(sketch, 12))#xL] +- SubqueryAlias __auto_generated_subquery_name +- Union false, false - :- Aggregate [theta_sketch_agg(col1#x, 12, 0, 0) AS sketch#x] + :- Aggregate [theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0) AS sketch#x] : +- SubqueryAlias spark_catalog.default.t_binary_a_b_through_e_f : +- Relation spark_catalog.default.t_binary_a_b_through_e_f[col1#x,col2#x] parquet - +- Aggregate [theta_sketch_agg(col2#x, 12, 0, 0) AS sketch#x] + +- Aggregate [theta_sketch_agg(col2#x, 12, QUICKSELECT, 0, 0) AS sketch#x] +- SubqueryAlias spark_catalog.default.t_binary_a_b_through_e_f +- Relation spark_catalog.default.t_binary_a_b_through_e_f[col1#x,col2#x] parquet @@ -611,10 +611,10 @@ SELECT theta_sketch_estimate(theta_union_agg(sketch, 12)) Aggregate [theta_sketch_estimate(theta_union_agg(sketch#x, 12, 0, 0)) AS theta_sketch_estimate(theta_union_agg(sketch, 12))#xL] +- SubqueryAlias __auto_generated_subquery_name +- Union false, false - :- Aggregate [theta_sketch_agg(col1#x, 12, 0, 0) AS sketch#x] + :- Aggregate [theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0) AS sketch#x] : +- SubqueryAlias spark_catalog.default.t_array_int_1_3_through_4_6 : +- Relation spark_catalog.default.t_array_int_1_3_through_4_6[col1#x,col2#x] parquet - +- Aggregate [theta_sketch_agg(col2#x, 12, 0, 0) AS sketch#x] + +- Aggregate [theta_sketch_agg(col2#x, 12, QUICKSELECT, 0, 0) AS sketch#x] +- SubqueryAlias spark_catalog.default.t_array_int_1_3_through_4_6 +- Relation spark_catalog.default.t_array_int_1_3_through_4_6[col1#x,col2#x] parquet @@ -628,10 +628,10 @@ SELECT theta_sketch_estimate(theta_union_agg(sketch, 16)) Aggregate [theta_sketch_estimate(theta_union_agg(sketch#x, 16, 0, 0)) AS theta_sketch_estimate(theta_union_agg(sketch, 16))#xL] +- SubqueryAlias __auto_generated_subquery_name +- Union false, false - :- Aggregate [theta_sketch_agg(col1#x, 12, 0, 0) AS sketch#x] + :- Aggregate [theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0) AS sketch#x] : +- SubqueryAlias spark_catalog.default.t_array_long_1_3_through_4_6 : +- Relation spark_catalog.default.t_array_long_1_3_through_4_6[col1#x,col2#x] parquet - +- Aggregate [theta_sketch_agg(col2#x, 12, 0, 0) AS sketch#x] + +- Aggregate [theta_sketch_agg(col2#x, 12, QUICKSELECT, 0, 0) AS sketch#x] +- SubqueryAlias spark_catalog.default.t_array_long_1_3_through_4_6 +- Relation spark_catalog.default.t_array_long_1_3_through_4_6[col1#x,col2#x] parquet @@ -645,10 +645,10 @@ SELECT theta_sketch_estimate(theta_intersection_agg(sketch)) Aggregate [theta_sketch_estimate(theta_intersection_agg(sketch#x, 0, 0)) AS theta_sketch_estimate(theta_intersection_agg(sketch))#xL] +- SubqueryAlias __auto_generated_subquery_name +- Union false, false - :- Aggregate [theta_sketch_agg(col1#x, 12, 0, 0) AS sketch#x] + :- Aggregate [theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0) AS sketch#x] : +- SubqueryAlias spark_catalog.default.t_int_1_5_through_7_11 : +- Relation spark_catalog.default.t_int_1_5_through_7_11[col1#x,col2#x] parquet - +- Aggregate [theta_sketch_agg(col2#x, 12, 0, 0) AS sketch#x] + +- Aggregate [theta_sketch_agg(col2#x, 12, QUICKSELECT, 0, 0) AS sketch#x] +- SubqueryAlias spark_catalog.default.t_int_1_5_through_7_11 +- Relation spark_catalog.default.t_int_1_5_through_7_11[col1#x,col2#x] parquet @@ -662,10 +662,10 @@ SELECT theta_sketch_estimate(theta_intersection_agg(sketch)) Aggregate [theta_sketch_estimate(theta_intersection_agg(sketch#x, 0, 0)) AS theta_sketch_estimate(theta_intersection_agg(sketch))#xL] +- SubqueryAlias __auto_generated_subquery_name +- Union false, false - :- Aggregate [theta_sketch_agg(col1#xL, 12, 0, 0) AS sketch#x] + :- Aggregate [theta_sketch_agg(col1#xL, 12, QUICKSELECT, 0, 0) AS sketch#x] : +- SubqueryAlias spark_catalog.default.t_long_1_5_through_7_11 : +- Relation spark_catalog.default.t_long_1_5_through_7_11[col1#xL,col2#xL] parquet - +- Aggregate [theta_sketch_agg(col2#xL, 12, 0, 0) AS sketch#x] + +- Aggregate [theta_sketch_agg(col2#xL, 12, QUICKSELECT, 0, 0) AS sketch#x] +- SubqueryAlias spark_catalog.default.t_long_1_5_through_7_11 +- Relation spark_catalog.default.t_long_1_5_through_7_11[col1#xL,col2#xL] parquet @@ -679,10 +679,10 @@ SELECT theta_sketch_estimate(theta_intersection_agg(sketch)) Aggregate [theta_sketch_estimate(theta_intersection_agg(sketch#x, 0, 0)) AS theta_sketch_estimate(theta_intersection_agg(sketch))#xL] +- SubqueryAlias __auto_generated_subquery_name +- Union false, false - :- Aggregate [theta_sketch_agg(col1#x, 12, 0, 0) AS sketch#x] + :- Aggregate [theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0) AS sketch#x] : +- SubqueryAlias spark_catalog.default.t_float_1_1_1_4_through_1_5_1_8 : +- Relation spark_catalog.default.t_float_1_1_1_4_through_1_5_1_8[col1#x,col2#x] parquet - +- Aggregate [theta_sketch_agg(col2#x, 12, 0, 0) AS sketch#x] + +- Aggregate [theta_sketch_agg(col2#x, 12, QUICKSELECT, 0, 0) AS sketch#x] +- SubqueryAlias spark_catalog.default.t_float_1_1_1_4_through_1_5_1_8 +- Relation spark_catalog.default.t_float_1_1_1_4_through_1_5_1_8[col1#x,col2#x] parquet @@ -696,10 +696,10 @@ SELECT theta_sketch_estimate(theta_intersection_agg(sketch)) Aggregate [theta_sketch_estimate(theta_intersection_agg(sketch#x, 0, 0)) AS theta_sketch_estimate(theta_intersection_agg(sketch))#xL] +- SubqueryAlias __auto_generated_subquery_name +- Union false, false - :- Aggregate [theta_sketch_agg(col1#x, 12, 0, 0) AS sketch#x] + :- Aggregate [theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0) AS sketch#x] : +- SubqueryAlias spark_catalog.default.t_double_1_1_1_4_through_1_5_1_8 : +- Relation spark_catalog.default.t_double_1_1_1_4_through_1_5_1_8[col1#x,col2#x] parquet - +- Aggregate [theta_sketch_agg(col2#x, 12, 0, 0) AS sketch#x] + +- Aggregate [theta_sketch_agg(col2#x, 12, QUICKSELECT, 0, 0) AS sketch#x] +- SubqueryAlias spark_catalog.default.t_double_1_1_1_4_through_1_5_1_8 +- Relation spark_catalog.default.t_double_1_1_1_4_through_1_5_1_8[col1#x,col2#x] parquet @@ -713,10 +713,10 @@ SELECT theta_sketch_estimate(theta_intersection_agg(sketch)) Aggregate [theta_sketch_estimate(theta_intersection_agg(sketch#x, 0, 0)) AS theta_sketch_estimate(theta_intersection_agg(sketch))#xL] +- SubqueryAlias __auto_generated_subquery_name +- Union false, false - :- Aggregate [theta_sketch_agg(col1#x, 12, 0, 0) AS sketch#x] + :- Aggregate [theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0) AS sketch#x] : +- SubqueryAlias spark_catalog.default.t_string_a_d_through_e_h : +- Relation spark_catalog.default.t_string_a_d_through_e_h[col1#x,col2#x] parquet - +- Aggregate [theta_sketch_agg(col2#x, 12, 0, 0) AS sketch#x] + +- Aggregate [theta_sketch_agg(col2#x, 12, QUICKSELECT, 0, 0) AS sketch#x] +- SubqueryAlias spark_catalog.default.t_string_a_d_through_e_h +- Relation spark_catalog.default.t_string_a_d_through_e_h[col1#x,col2#x] parquet @@ -730,10 +730,10 @@ SELECT theta_sketch_estimate(theta_intersection_agg(sketch)) Aggregate [theta_sketch_estimate(theta_intersection_agg(sketch#x, 0, 0)) AS theta_sketch_estimate(theta_intersection_agg(sketch))#xL] +- SubqueryAlias __auto_generated_subquery_name +- Union false, false - :- Aggregate [theta_sketch_agg(col1#x, 12, 0, 0) AS sketch#x] + :- Aggregate [theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0) AS sketch#x] : +- SubqueryAlias spark_catalog.default.t_binary_a_b_through_e_f : +- Relation spark_catalog.default.t_binary_a_b_through_e_f[col1#x,col2#x] parquet - +- Aggregate [theta_sketch_agg(col2#x, 12, 0, 0) AS sketch#x] + +- Aggregate [theta_sketch_agg(col2#x, 12, QUICKSELECT, 0, 0) AS sketch#x] +- SubqueryAlias spark_catalog.default.t_binary_a_b_through_e_f +- Relation spark_catalog.default.t_binary_a_b_through_e_f[col1#x,col2#x] parquet @@ -747,10 +747,10 @@ SELECT theta_sketch_estimate(theta_intersection_agg(sketch)) Aggregate [theta_sketch_estimate(theta_intersection_agg(sketch#x, 0, 0)) AS theta_sketch_estimate(theta_intersection_agg(sketch))#xL] +- SubqueryAlias __auto_generated_subquery_name +- Union false, false - :- Aggregate [theta_sketch_agg(col1#x, 12, 0, 0) AS sketch#x] + :- Aggregate [theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0) AS sketch#x] : +- SubqueryAlias spark_catalog.default.t_array_int_1_3_through_4_6 : +- Relation spark_catalog.default.t_array_int_1_3_through_4_6[col1#x,col2#x] parquet - +- Aggregate [theta_sketch_agg(col2#x, 12, 0, 0) AS sketch#x] + +- Aggregate [theta_sketch_agg(col2#x, 12, QUICKSELECT, 0, 0) AS sketch#x] +- SubqueryAlias spark_catalog.default.t_array_int_1_3_through_4_6 +- Relation spark_catalog.default.t_array_int_1_3_through_4_6[col1#x,col2#x] parquet @@ -764,10 +764,10 @@ SELECT theta_sketch_estimate(theta_intersection_agg(sketch)) Aggregate [theta_sketch_estimate(theta_intersection_agg(sketch#x, 0, 0)) AS theta_sketch_estimate(theta_intersection_agg(sketch))#xL] +- SubqueryAlias __auto_generated_subquery_name +- Union false, false - :- Aggregate [theta_sketch_agg(col1#x, 12, 0, 0) AS sketch#x] + :- Aggregate [theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0) AS sketch#x] : +- SubqueryAlias spark_catalog.default.t_array_long_1_3_through_4_6 : +- Relation spark_catalog.default.t_array_long_1_3_through_4_6[col1#x,col2#x] parquet - +- Aggregate [theta_sketch_agg(col2#x, 12, 0, 0) AS sketch#x] + +- Aggregate [theta_sketch_agg(col2#x, 12, QUICKSELECT, 0, 0) AS sketch#x] +- SubqueryAlias spark_catalog.default.t_array_long_1_3_through_4_6 +- Relation spark_catalog.default.t_array_long_1_3_through_4_6[col1#x,col2#x] parquet @@ -776,7 +776,7 @@ Aggregate [theta_sketch_estimate(theta_intersection_agg(sketch#x, 0, 0)) AS thet SELECT theta_sketch_estimate(theta_sketch_agg(col)) FROM VALUES (1), (null), (2), (null), (3) tab(col) -- !query analysis -Aggregate [theta_sketch_estimate(theta_sketch_agg(col#x, 12, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col, 12))#xL] +Aggregate [theta_sketch_estimate(theta_sketch_agg(col#x, 12, QUICKSELECT, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col, 12, QUICKSELECT))#xL] +- SubqueryAlias tab +- LocalRelation [col#x] @@ -785,7 +785,7 @@ Aggregate [theta_sketch_estimate(theta_sketch_agg(col#x, 12, 0, 0)) AS theta_ske SELECT theta_sketch_estimate(theta_sketch_agg(col)) FROM VALUES ('test'), (null), ('null'), (null) tab(col) -- !query analysis -Aggregate [theta_sketch_estimate(theta_sketch_agg(col#x, 12, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col, 12))#xL] +Aggregate [theta_sketch_estimate(theta_sketch_agg(col#x, 12, QUICKSELECT, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col, 12, QUICKSELECT))#xL] +- SubqueryAlias tab +- LocalRelation [col#x] @@ -794,7 +794,7 @@ Aggregate [theta_sketch_estimate(theta_sketch_agg(col#x, 12, 0, 0)) AS theta_ske SELECT theta_sketch_estimate(theta_sketch_agg(col)) FROM VALUES (100L), (null), (200L), (null), (300L) tab(col) -- !query analysis -Aggregate [theta_sketch_estimate(theta_sketch_agg(col#xL, 12, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col, 12))#xL] +Aggregate [theta_sketch_estimate(theta_sketch_agg(col#xL, 12, QUICKSELECT, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col, 12, QUICKSELECT))#xL] +- SubqueryAlias tab +- LocalRelation [col#xL] @@ -803,7 +803,7 @@ Aggregate [theta_sketch_estimate(theta_sketch_agg(col#xL, 12, 0, 0)) AS theta_sk SELECT theta_sketch_estimate(theta_sketch_agg(CAST(col AS DOUBLE))) FROM VALUES (1.1), (null), (2.2), (null), (3.3) tab(col) -- !query analysis -Aggregate [theta_sketch_estimate(theta_sketch_agg(cast(col#x as double), 12, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(CAST(col AS DOUBLE), 12))#xL] +Aggregate [theta_sketch_estimate(theta_sketch_agg(cast(col#x as double), 12, QUICKSELECT, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(CAST(col AS DOUBLE), 12, QUICKSELECT))#xL] +- SubqueryAlias tab +- LocalRelation [col#x] @@ -812,7 +812,7 @@ Aggregate [theta_sketch_estimate(theta_sketch_agg(cast(col#x as double), 12, 0, SELECT theta_sketch_estimate(theta_sketch_agg(CAST(col AS FLOAT))) FROM VALUES (1.5), (null), (2.5), (null), (3.5) tab(col) -- !query analysis -Aggregate [theta_sketch_estimate(theta_sketch_agg(cast(col#x as float), 12, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(CAST(col AS FLOAT), 12))#xL] +Aggregate [theta_sketch_estimate(theta_sketch_agg(cast(col#x as float), 12, QUICKSELECT, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(CAST(col AS FLOAT), 12, QUICKSELECT))#xL] +- SubqueryAlias tab +- LocalRelation [col#x] @@ -821,7 +821,7 @@ Aggregate [theta_sketch_estimate(theta_sketch_agg(cast(col#x as float), 12, 0, 0 SELECT theta_sketch_estimate(theta_sketch_agg(col)) FROM VALUES (X'AA'), (null), (X'BB'), (null), (X'CC') tab(col) -- !query analysis -Aggregate [theta_sketch_estimate(theta_sketch_agg(col#x, 12, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col, 12))#xL] +Aggregate [theta_sketch_estimate(theta_sketch_agg(col#x, 12, QUICKSELECT, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col, 12, QUICKSELECT))#xL] +- SubqueryAlias tab +- LocalRelation [col#x] @@ -830,7 +830,7 @@ Aggregate [theta_sketch_estimate(theta_sketch_agg(col#x, 12, 0, 0)) AS theta_ske SELECT theta_sketch_estimate(theta_sketch_agg(col)) FROM VALUES (ARRAY(1, 2)), (null), (ARRAY(3, 4)), (null), (ARRAY(5, 6)) tab(col) -- !query analysis -Aggregate [theta_sketch_estimate(theta_sketch_agg(col#x, 12, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col, 12))#xL] +Aggregate [theta_sketch_estimate(theta_sketch_agg(col#x, 12, QUICKSELECT, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col, 12, QUICKSELECT))#xL] +- SubqueryAlias tab +- LocalRelation [col#x] @@ -839,7 +839,7 @@ Aggregate [theta_sketch_estimate(theta_sketch_agg(col#x, 12, 0, 0)) AS theta_ske SELECT theta_sketch_estimate(theta_sketch_agg(col)) FROM VALUES (ARRAY(10L, 20L)), (null), (ARRAY(30L, 40L)), (null), (ARRAY(50L, 60L)) tab(col) -- !query analysis -Aggregate [theta_sketch_estimate(theta_sketch_agg(col#x, 12, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col, 12))#xL] +Aggregate [theta_sketch_estimate(theta_sketch_agg(col#x, 12, QUICKSELECT, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col, 12, QUICKSELECT))#xL] +- SubqueryAlias tab +- LocalRelation [col#x] @@ -848,7 +848,7 @@ Aggregate [theta_sketch_estimate(theta_sketch_agg(col#x, 12, 0, 0)) AS theta_ske SELECT theta_sketch_estimate(theta_sketch_agg(col)) FROM VALUES (ARRAY(1, null)), (ARRAY(1)), (ARRAY(2, null, 3)), (ARRAY(4)) tab(col) -- !query analysis -Aggregate [theta_sketch_estimate(theta_sketch_agg(col#x, 12, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col, 12))#xL] +Aggregate [theta_sketch_estimate(theta_sketch_agg(col#x, 12, QUICKSELECT, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col, 12, QUICKSELECT))#xL] +- SubqueryAlias tab +- LocalRelation [col#x] @@ -857,7 +857,7 @@ Aggregate [theta_sketch_estimate(theta_sketch_agg(col#x, 12, 0, 0)) AS theta_ske SELECT theta_sketch_estimate(theta_sketch_agg(col)) FROM VALUES (ARRAY(10L, null)), (ARRAY(10L)), (ARRAY(20L, null, 30L)), (ARRAY(40L)) tab(col) -- !query analysis -Aggregate [theta_sketch_estimate(theta_sketch_agg(col#x, 12, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col, 12))#xL] +Aggregate [theta_sketch_estimate(theta_sketch_agg(col#x, 12, QUICKSELECT, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col, 12, QUICKSELECT))#xL] +- SubqueryAlias tab +- LocalRelation [col#x] @@ -866,7 +866,7 @@ Aggregate [theta_sketch_estimate(theta_sketch_agg(col#x, 12, 0, 0)) AS theta_ske SELECT theta_sketch_estimate(theta_sketch_agg(col)) FROM VALUES (ARRAY()), (ARRAY(1, 2)), (ARRAY()), (ARRAY(3, 4)) tab(col) -- !query analysis -Aggregate [theta_sketch_estimate(theta_sketch_agg(col#x, 12, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col, 12))#xL] +Aggregate [theta_sketch_estimate(theta_sketch_agg(col#x, 12, QUICKSELECT, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col, 12, QUICKSELECT))#xL] +- SubqueryAlias tab +- LocalRelation [col#x] @@ -875,7 +875,7 @@ Aggregate [theta_sketch_estimate(theta_sketch_agg(col#x, 12, 0, 0)) AS theta_ske SELECT theta_sketch_estimate(theta_sketch_agg(col)) FROM VALUES (''), ('a'), (''), ('b'), ('c') tab(col) -- !query analysis -Aggregate [theta_sketch_estimate(theta_sketch_agg(col#x, 12, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col, 12))#xL] +Aggregate [theta_sketch_estimate(theta_sketch_agg(col#x, 12, QUICKSELECT, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col, 12, QUICKSELECT))#xL] +- SubqueryAlias tab +- LocalRelation [col#x] @@ -884,7 +884,7 @@ Aggregate [theta_sketch_estimate(theta_sketch_agg(col#x, 12, 0, 0)) AS theta_ske SELECT theta_sketch_estimate(theta_sketch_agg(col)) FROM VALUES (X''), (X'01'), (X'02'), (X'03'), (CAST(' ' AS BINARY)), (X'e280'), (X'c1'), (X'c120') tab(col) -- !query analysis -Aggregate [theta_sketch_estimate(theta_sketch_agg(col#x, 12, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col, 12))#xL] +Aggregate [theta_sketch_estimate(theta_sketch_agg(col#x, 12, QUICKSELECT, 0, 0)) AS theta_sketch_estimate(theta_sketch_agg(col, 12, QUICKSELECT))#xL] +- SubqueryAlias tab +- LocalRelation [col#x] @@ -892,7 +892,7 @@ Aggregate [theta_sketch_estimate(theta_sketch_agg(col#x, 12, 0, 0)) AS theta_ske -- !query SELECT theta_sketch_estimate(theta_sketch_agg(col1)) utf8_b FROM t_string_collation -- !query analysis -Aggregate [theta_sketch_estimate(theta_sketch_agg(col1#x, 12, 0, 0)) AS utf8_b#xL] +Aggregate [theta_sketch_estimate(theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0)) AS utf8_b#xL] +- SubqueryAlias spark_catalog.default.t_string_collation +- Relation spark_catalog.default.t_string_collation[col1#x] parquet @@ -900,7 +900,7 @@ Aggregate [theta_sketch_estimate(theta_sketch_agg(col1#x, 12, 0, 0)) AS utf8_b#x -- !query SELECT theta_sketch_estimate(theta_sketch_agg(col1 COLLATE UTF8_LCASE)) utf8_lc FROM t_string_collation -- !query analysis -Aggregate [theta_sketch_estimate(theta_sketch_agg(collate(col1#x, UTF8_LCASE), 12, 0, 0)) AS utf8_lc#xL] +Aggregate [theta_sketch_estimate(theta_sketch_agg(collate(col1#x, UTF8_LCASE), 12, QUICKSELECT, 0, 0)) AS utf8_lc#xL] +- SubqueryAlias spark_catalog.default.t_string_collation +- Relation spark_catalog.default.t_string_collation[col1#x] parquet @@ -908,7 +908,7 @@ Aggregate [theta_sketch_estimate(theta_sketch_agg(collate(col1#x, UTF8_LCASE), 1 -- !query SELECT theta_sketch_estimate(theta_sketch_agg(col1 COLLATE UNICODE)) unicode FROM t_string_collation -- !query analysis -Aggregate [theta_sketch_estimate(theta_sketch_agg(collate(col1#x, UNICODE), 12, 0, 0)) AS unicode#xL] +Aggregate [theta_sketch_estimate(theta_sketch_agg(collate(col1#x, UNICODE), 12, QUICKSELECT, 0, 0)) AS unicode#xL] +- SubqueryAlias spark_catalog.default.t_string_collation +- Relation spark_catalog.default.t_string_collation[col1#x] parquet @@ -916,7 +916,7 @@ Aggregate [theta_sketch_estimate(theta_sketch_agg(collate(col1#x, UNICODE), 12, -- !query SELECT theta_sketch_estimate(theta_sketch_agg(col1 COLLATE UNICODE_CI)) unicode_ci FROM t_string_collation -- !query analysis -Aggregate [theta_sketch_estimate(theta_sketch_agg(collate(col1#x, UNICODE_CI), 12, 0, 0)) AS unicode_ci#xL] +Aggregate [theta_sketch_estimate(theta_sketch_agg(collate(col1#x, UNICODE_CI), 12, QUICKSELECT, 0, 0)) AS unicode_ci#xL] +- SubqueryAlias spark_catalog.default.t_string_collation +- Relation spark_catalog.default.t_string_collation[col1#x] parquet @@ -924,7 +924,7 @@ Aggregate [theta_sketch_estimate(theta_sketch_agg(collate(col1#x, UNICODE_CI), 1 -- !query SELECT theta_sketch_estimate(theta_sketch_agg(col1 COLLATE UTF8_BINARY_RTRIM)) utf8_b_rt FROM t_string_collation -- !query analysis -Aggregate [theta_sketch_estimate(theta_sketch_agg(collate(col1#x, UTF8_BINARY_RTRIM), 12, 0, 0)) AS utf8_b_rt#xL] +Aggregate [theta_sketch_estimate(theta_sketch_agg(collate(col1#x, UTF8_BINARY_RTRIM), 12, QUICKSELECT, 0, 0)) AS utf8_b_rt#xL] +- SubqueryAlias spark_catalog.default.t_string_collation +- Relation spark_catalog.default.t_string_collation[col1#x] parquet @@ -932,7 +932,7 @@ Aggregate [theta_sketch_estimate(theta_sketch_agg(collate(col1#x, UTF8_BINARY_RT -- !query SELECT theta_sketch_estimate(theta_sketch_agg(col1 COLLATE UTF8_LCASE_RTRIM)) utf8_lc_rt FROM t_string_collation -- !query analysis -Aggregate [theta_sketch_estimate(theta_sketch_agg(collate(col1#x, UTF8_LCASE_RTRIM), 12, 0, 0)) AS utf8_lc_rt#xL] +Aggregate [theta_sketch_estimate(theta_sketch_agg(collate(col1#x, UTF8_LCASE_RTRIM), 12, QUICKSELECT, 0, 0)) AS utf8_lc_rt#xL] +- SubqueryAlias spark_catalog.default.t_string_collation +- Relation spark_catalog.default.t_string_collation[col1#x] parquet @@ -940,7 +940,7 @@ Aggregate [theta_sketch_estimate(theta_sketch_agg(collate(col1#x, UTF8_LCASE_RTR -- !query SELECT theta_sketch_estimate(theta_sketch_agg(col1 COLLATE UNICODE_RTRIM)) unicode_rt FROM t_string_collation -- !query analysis -Aggregate [theta_sketch_estimate(theta_sketch_agg(collate(col1#x, UNICODE_RTRIM), 12, 0, 0)) AS unicode_rt#xL] +Aggregate [theta_sketch_estimate(theta_sketch_agg(collate(col1#x, UNICODE_RTRIM), 12, QUICKSELECT, 0, 0)) AS unicode_rt#xL] +- SubqueryAlias spark_catalog.default.t_string_collation +- Relation spark_catalog.default.t_string_collation[col1#x] parquet @@ -948,7 +948,7 @@ Aggregate [theta_sketch_estimate(theta_sketch_agg(collate(col1#x, UNICODE_RTRIM) -- !query SELECT theta_sketch_estimate(theta_sketch_agg(col1 COLLATE UNICODE_CI_RTRIM)) unicode_ci_rt FROM t_string_collation -- !query analysis -Aggregate [theta_sketch_estimate(theta_sketch_agg(collate(col1#x, UNICODE_CI_RTRIM), 12, 0, 0)) AS unicode_ci_rt#xL] +Aggregate [theta_sketch_estimate(theta_sketch_agg(collate(col1#x, UNICODE_CI_RTRIM), 12, QUICKSELECT, 0, 0)) AS unicode_ci_rt#xL] +- SubqueryAlias spark_catalog.default.t_string_collation +- Relation spark_catalog.default.t_string_collation[col1#x] parquet @@ -982,16 +982,16 @@ WithCTE : +- Union false, false : :- Union false, false : : :- Union false, false -: : : :- Aggregate [int_sketch AS sketch_type#x, theta_sketch_agg(col1#x, 12, 0, 0) AS sketch#x] +: : : :- Aggregate [int_sketch AS sketch_type#x, theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0) AS sketch#x] : : : : +- SubqueryAlias spark_catalog.default.t_int_1_5_through_7_11 : : : : +- Relation spark_catalog.default.t_int_1_5_through_7_11[col1#x,col2#x] parquet -: : : +- Aggregate [long_sketch AS sketch_type#x, theta_sketch_agg(col1#xL, 15, 0, 0) AS sketch#x] +: : : +- Aggregate [long_sketch AS sketch_type#x, theta_sketch_agg(col1#xL, 15, QUICKSELECT, 0, 0) AS sketch#x] : : : +- SubqueryAlias spark_catalog.default.t_long_1_5_through_7_11 : : : +- Relation spark_catalog.default.t_long_1_5_through_7_11[col1#xL,col2#xL] parquet -: : +- Aggregate [double_sketch AS sketch_type#x, theta_sketch_agg(col1#x, 10, 0, 0) AS sketch#x] +: : +- Aggregate [double_sketch AS sketch_type#x, theta_sketch_agg(col1#x, 10, QUICKSELECT, 0, 0) AS sketch#x] : : +- SubqueryAlias spark_catalog.default.t_double_1_1_1_4_through_1_5_1_8 : : +- Relation spark_catalog.default.t_double_1_1_1_4_through_1_5_1_8[col1#x,col2#x] parquet -: +- Aggregate [string_sketch AS sketch_type#x, theta_sketch_agg(col1#x, 14, 0, 0) AS sketch#x] +: +- Aggregate [string_sketch AS sketch_type#x, theta_sketch_agg(col1#x, 14, QUICKSELECT, 0, 0) AS sketch#x] : +- SubqueryAlias spark_catalog.default.t_string_a_d_through_e_h : +- Relation spark_catalog.default.t_string_a_d_through_e_h[col1#x,col2#x] parquet :- CTERelationDef xxxx, false @@ -1001,7 +1001,7 @@ WithCTE : +- CTERelationRef xxxx, true, [sketch_type#x, sketch#x], false, false, 4 :- CTERelationDef xxxx, false : +- SubqueryAlias individual_sketches -: +- Aggregate [theta_sketch_agg(col1#x, 12, 0, 0) AS sketch1#x, theta_sketch_agg(col2#x, 12, 0, 0) AS sketch2#x] +: +- Aggregate [theta_sketch_agg(col1#x, 12, QUICKSELECT, 0, 0) AS sketch1#x, theta_sketch_agg(col2#x, 12, QUICKSELECT, 0, 0) AS sketch2#x] : +- SubqueryAlias spark_catalog.default.t_int_1_5_through_7_11 : +- Relation spark_catalog.default.t_int_1_5_through_7_11[col1#x,col2#x] parquet +- Project [theta_sketch_estimate(scalar-subquery#x []) AS union_estimate#xL, theta_sketch_estimate(theta_union(sketch1#x, sketch2#x, 15)) AS binary_union_estimate#xL, theta_sketch_estimate(theta_intersection(sketch1#x, sketch2#x)) AS intersection_estimate#xL, theta_sketch_estimate(theta_difference(sketch1#x, sketch2#x)) AS difference_estimate#xL] @@ -1016,7 +1016,7 @@ WithCTE SELECT theta_sketch_agg(col, 2) FROM VALUES (50), (60), (60) tab(col) -- !query analysis -Aggregate [theta_sketch_agg(col#x, 2, 0, 0) AS theta_sketch_agg(col, 2)#x] +Aggregate [theta_sketch_agg(col#x, 2, QUICKSELECT, 0, 0) AS theta_sketch_agg(col, 2, QUICKSELECT)#x] +- SubqueryAlias tab +- LocalRelation [col#x] @@ -1025,7 +1025,7 @@ Aggregate [theta_sketch_agg(col#x, 2, 0, 0) AS theta_sketch_agg(col, 2)#x] SELECT theta_sketch_agg(col, 40) FROM VALUES (50), (60), (60) tab(col) -- !query analysis -Aggregate [theta_sketch_agg(col#x, 40, 0, 0) AS theta_sketch_agg(col, 40)#x] +Aggregate [theta_sketch_agg(col#x, 40, QUICKSELECT, 0, 0) AS theta_sketch_agg(col, 40, QUICKSELECT)#x] +- SubqueryAlias tab +- LocalRelation [col#x] @@ -1041,10 +1041,10 @@ FROM (SELECT theta_sketch_agg(col, 12) as sketch Aggregate [theta_union_agg(sketch#x, 3, 0, 0) AS theta_union_agg(sketch, 3)#x] +- SubqueryAlias __auto_generated_subquery_name +- Union false, false - :- Aggregate [theta_sketch_agg(col#x, 12, 0, 0) AS sketch#x] + :- Aggregate [theta_sketch_agg(col#x, 12, QUICKSELECT, 0, 0) AS sketch#x] : +- SubqueryAlias tab : +- LocalRelation [col#x] - +- Aggregate [theta_sketch_agg(col#x, 20, 0, 0) AS sketch#x] + +- Aggregate [theta_sketch_agg(col#x, 20, QUICKSELECT, 0, 0) AS sketch#x] +- SubqueryAlias tab +- LocalRelation [col#x] @@ -1060,10 +1060,10 @@ FROM (SELECT theta_sketch_agg(col, 12) as sketch Aggregate [theta_union_agg(sketch#x, 27, 0, 0) AS theta_union_agg(sketch, 27)#x] +- SubqueryAlias __auto_generated_subquery_name +- Union false, false - :- Aggregate [theta_sketch_agg(col#x, 12, 0, 0) AS sketch#x] + :- Aggregate [theta_sketch_agg(col#x, 12, QUICKSELECT, 0, 0) AS sketch#x] : +- SubqueryAlias tab : +- LocalRelation [col#x] - +- Aggregate [theta_sketch_agg(col#x, 20, 0, 0) AS sketch#x] + +- Aggregate [theta_sketch_agg(col#x, 20, QUICKSELECT, 0, 0) AS sketch#x] +- SubqueryAlias tab +- LocalRelation [col#x] @@ -1170,7 +1170,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException "inputType" : "\"STRING\"", "paramIndex" : "third", "requiredType" : "\"INT\"", - "sqlExpr" : "\"theta_union(theta_sketch_agg(col1, 12), theta_sketch_agg(col2, 12), invalid)\"" + "sqlExpr" : "\"theta_union(theta_sketch_agg(col1, 12, QUICKSELECT), theta_sketch_agg(col2, 12, QUICKSELECT), invalid)\"" }, "queryContext" : [ { "objectType" : "", @@ -1200,7 +1200,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException "inputType" : "\"STRING\"", "paramIndex" : "second", "requiredType" : "\"BINARY\"", - "sqlExpr" : "\"theta_intersection(theta_sketch_agg(col1, 12), invalid_sketch)\"" + "sqlExpr" : "\"theta_intersection(theta_sketch_agg(col1, 12, QUICKSELECT), invalid_sketch)\"" }, "queryContext" : [ { "objectType" : "", diff --git a/sql/core/src/test/resources/sql-tests/results/thetasketch.sql.out b/sql/core/src/test/resources/sql-tests/results/thetasketch.sql.out index 95c6e28a8c426..f6863c9d31c1d 100644 --- a/sql/core/src/test/resources/sql-tests/results/thetasketch.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/thetasketch.sql.out @@ -182,7 +182,7 @@ struct -- !query SELECT theta_sketch_estimate(theta_sketch_agg(col1)) FROM t_array_int_1_3_through_4_6 -- !query schema -struct +struct -- !query output 4 @@ -190,7 +190,7 @@ struct -- !query SELECT theta_sketch_estimate(theta_sketch_agg(col2)) FROM t_array_long_1_3_through_4_6 -- !query schema -struct +struct -- !query output 4 @@ -198,7 +198,7 @@ struct -- !query SELECT theta_sketch_estimate(theta_sketch_agg(col1)) FROM t_binary_a_b_through_e_f -- !query schema -struct +struct -- !query output 5 @@ -206,7 +206,7 @@ struct -- !query SELECT theta_sketch_estimate(theta_sketch_agg(col1)) FROM t_double_1_1_1_4_through_1_5_1_8 -- !query schema -struct +struct -- !query output 5 @@ -214,7 +214,7 @@ struct -- !query SELECT theta_sketch_estimate(theta_sketch_agg(col2)) FROM t_float_1_1_1_4_through_1_5_1_8 -- !query schema -struct +struct -- !query output 5 @@ -222,7 +222,7 @@ struct -- !query SELECT theta_sketch_estimate(theta_sketch_agg(col1, 22)) FROM t_int_1_5_through_7_11 -- !query schema -struct +struct -- !query output 7 @@ -230,7 +230,7 @@ struct -- !query SELECT theta_sketch_estimate(theta_sketch_agg(col1)) FROM t_long_1_5_through_7_11 -- !query schema -struct +struct -- !query output 7 @@ -238,7 +238,7 @@ struct -- !query SELECT theta_sketch_estimate(theta_sketch_agg(col1)) FROM t_string_a_d_through_e_h -- !query schema -struct +struct -- !query output 5 @@ -249,7 +249,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2))) FROM t_int_1_5_through_7_11 -- !query schema -struct +struct -- !query output 11 @@ -260,7 +260,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1, 15), theta_sketch_agg(col2))) FROM t_long_1_5_through_7_11 -- !query schema -struct +struct -- !query output 11 @@ -271,7 +271,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2))) FROM t_double_1_1_1_4_through_1_5_1_8 -- !query schema -struct +struct -- !query output 8 @@ -282,7 +282,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1, 6), theta_sketch_agg(col2, 15), 15)) FROM t_float_1_1_1_4_through_1_5_1_8 -- !query schema -struct +struct -- !query output 8 @@ -293,7 +293,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2))) FROM t_string_a_d_through_e_h -- !query schema -struct +struct -- !query output 8 @@ -304,7 +304,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2), 20)) FROM t_binary_a_b_through_e_f -- !query schema -struct +struct -- !query output 6 @@ -315,7 +315,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2))) FROM t_array_int_1_3_through_4_6 -- !query schema -struct +struct -- !query output 6 @@ -326,7 +326,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2, 13))) FROM t_array_long_1_3_through_4_6 -- !query schema -struct +struct -- !query output 6 @@ -337,7 +337,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2))) FROM t_int_1_5_through_7_11 -- !query schema -struct +struct -- !query output 3 @@ -348,7 +348,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1, 5), theta_sketch_agg(col2, 12))) FROM t_long_1_5_through_7_11 -- !query schema -struct +struct -- !query output 3 @@ -359,7 +359,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2))) FROM t_double_1_1_1_4_through_1_5_1_8 -- !query schema -struct +struct -- !query output 2 @@ -370,7 +370,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1, 5), theta_sketch_agg(col2))) FROM t_float_1_1_1_4_through_1_5_1_8 -- !query schema -struct +struct -- !query output 2 @@ -381,7 +381,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2))) FROM t_string_a_d_through_e_h -- !query schema -struct +struct -- !query output 2 @@ -392,7 +392,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2, 22))) FROM t_binary_a_b_through_e_f -- !query schema -struct +struct -- !query output 4 @@ -403,7 +403,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2))) FROM t_array_int_1_3_through_4_6 -- !query schema -struct +struct -- !query output 2 @@ -414,7 +414,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2, 10))) FROM t_array_long_1_3_through_4_6 -- !query schema -struct +struct -- !query output 2 @@ -425,7 +425,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2))) FROM t_int_1_5_through_7_11 -- !query schema -struct +struct -- !query output 4 @@ -436,7 +436,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2, 5))) FROM t_long_1_5_through_7_11 -- !query schema -struct +struct -- !query output 4 @@ -447,7 +447,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2))) FROM t_double_1_1_1_4_through_1_5_1_8 -- !query schema -struct +struct -- !query output 3 @@ -458,7 +458,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1, 12), theta_sketch_agg(col2))) FROM t_float_1_1_1_4_through_1_5_1_8 -- !query schema -struct +struct -- !query output 3 @@ -469,7 +469,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2))) FROM t_string_a_d_through_e_h -- !query schema -struct +struct -- !query output 3 @@ -480,7 +480,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1, 6), theta_sketch_agg(col2, 8))) FROM t_binary_a_b_through_e_f -- !query schema -struct +struct -- !query output 1 @@ -491,7 +491,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2))) FROM t_array_int_1_3_through_4_6 -- !query schema -struct +struct -- !query output 2 @@ -502,7 +502,7 @@ SELECT theta_sketch_estimate( theta_sketch_agg(col1), theta_sketch_agg(col2, 4))) FROM t_array_long_1_3_through_4_6 -- !query schema -struct +struct -- !query output 2 @@ -687,7 +687,7 @@ struct SELECT theta_sketch_estimate(theta_sketch_agg(col)) FROM VALUES (1), (null), (2), (null), (3) tab(col) -- !query schema -struct +struct -- !query output 3 @@ -696,7 +696,7 @@ struct SELECT theta_sketch_estimate(theta_sketch_agg(col)) FROM VALUES ('test'), (null), ('null'), (null) tab(col) -- !query schema -struct +struct -- !query output 2 @@ -705,7 +705,7 @@ struct SELECT theta_sketch_estimate(theta_sketch_agg(col)) FROM VALUES (100L), (null), (200L), (null), (300L) tab(col) -- !query schema -struct +struct -- !query output 3 @@ -714,7 +714,7 @@ struct SELECT theta_sketch_estimate(theta_sketch_agg(CAST(col AS DOUBLE))) FROM VALUES (1.1), (null), (2.2), (null), (3.3) tab(col) -- !query schema -struct +struct -- !query output 3 @@ -723,7 +723,7 @@ struct SELECT theta_sketch_estimate(theta_sketch_agg(CAST(col AS FLOAT))) FROM VALUES (1.5), (null), (2.5), (null), (3.5) tab(col) -- !query schema -struct +struct -- !query output 3 @@ -732,7 +732,7 @@ struct SELECT theta_sketch_estimate(theta_sketch_agg(col)) FROM VALUES (X'AA'), (null), (X'BB'), (null), (X'CC') tab(col) -- !query schema -struct +struct -- !query output 3 @@ -741,7 +741,7 @@ struct SELECT theta_sketch_estimate(theta_sketch_agg(col)) FROM VALUES (ARRAY(1, 2)), (null), (ARRAY(3, 4)), (null), (ARRAY(5, 6)) tab(col) -- !query schema -struct +struct -- !query output 3 @@ -750,7 +750,7 @@ struct SELECT theta_sketch_estimate(theta_sketch_agg(col)) FROM VALUES (ARRAY(10L, 20L)), (null), (ARRAY(30L, 40L)), (null), (ARRAY(50L, 60L)) tab(col) -- !query schema -struct +struct -- !query output 3 @@ -759,7 +759,7 @@ struct SELECT theta_sketch_estimate(theta_sketch_agg(col)) FROM VALUES (ARRAY(1, null)), (ARRAY(1)), (ARRAY(2, null, 3)), (ARRAY(4)) tab(col) -- !query schema -struct +struct -- !query output 4 @@ -768,7 +768,7 @@ struct SELECT theta_sketch_estimate(theta_sketch_agg(col)) FROM VALUES (ARRAY(10L, null)), (ARRAY(10L)), (ARRAY(20L, null, 30L)), (ARRAY(40L)) tab(col) -- !query schema -struct +struct -- !query output 4 @@ -777,7 +777,7 @@ struct SELECT theta_sketch_estimate(theta_sketch_agg(col)) FROM VALUES (ARRAY()), (ARRAY(1, 2)), (ARRAY()), (ARRAY(3, 4)) tab(col) -- !query schema -struct +struct -- !query output 2 @@ -786,7 +786,7 @@ struct SELECT theta_sketch_estimate(theta_sketch_agg(col)) FROM VALUES (''), ('a'), (''), ('b'), ('c') tab(col) -- !query schema -struct +struct -- !query output 3 @@ -795,7 +795,7 @@ struct SELECT theta_sketch_estimate(theta_sketch_agg(col)) FROM VALUES (X''), (X'01'), (X'02'), (X'03'), (CAST(' ' AS BINARY)), (X'e280'), (X'c1'), (X'c120') tab(col) -- !query schema -struct +struct -- !query output 7 @@ -1086,7 +1086,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException "inputType" : "\"STRING\"", "paramIndex" : "third", "requiredType" : "\"INT\"", - "sqlExpr" : "\"theta_union(theta_sketch_agg(col1, 12), theta_sketch_agg(col2, 12), invalid)\"" + "sqlExpr" : "\"theta_union(theta_sketch_agg(col1, 12, QUICKSELECT), theta_sketch_agg(col2, 12, QUICKSELECT), invalid)\"" }, "queryContext" : [ { "objectType" : "", @@ -1118,7 +1118,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException "inputType" : "\"STRING\"", "paramIndex" : "second", "requiredType" : "\"BINARY\"", - "sqlExpr" : "\"theta_intersection(theta_sketch_agg(col1, 12), invalid_sketch)\"" + "sqlExpr" : "\"theta_intersection(theta_sketch_agg(col1, 12, QUICKSELECT), invalid_sketch)\"" }, "queryContext" : [ { "objectType" : "",