From 1aac9f7724f481d1ae2d9c4478e25b013a9636bc Mon Sep 17 00:00:00 2001 From: Igor Lukanin Date: Fri, 1 Mar 2024 17:40:23 +0100 Subject: [PATCH] docs: Explain FILTER_GROUP --- .../data-model/context-variables.mdx | 352 +++++++++++++++++- 1 file changed, 343 insertions(+), 9 deletions(-) diff --git a/docs/pages/reference/data-model/context-variables.mdx b/docs/pages/reference/data-model/context-variables.mdx index 2a77d36be4874..07dffc5411737 100644 --- a/docs/pages/reference/data-model/context-variables.mdx +++ b/docs/pages/reference/data-model/context-variables.mdx @@ -4,7 +4,7 @@ You can use the following context variables within [cube][ref-ref-cubes] definitions: - [`CUBE`](#cube) for [referencing members][ref-syntax-references] of the same cube. -- [`FILTER_PARAMS`](#filter_params) for optimizing generated SQL queries. +- [`FILTER_PARAMS`](#filter_params) and [`FILTER_GROUP`](#filter_group) for optimizing generated SQL queries. - [`SQL_UTILS`](#sql_utils) for time zone conversion. - [`COMPILE_CONTEXT`](#compile_context) for creation of [dynamic data models][ref-dynamic-data-models]. @@ -108,8 +108,8 @@ cubes: ## `FILTER_PARAMS` -`FILTER_PARAMS` allows you to use [filter][ref-query-filter] values during -SQL generation. +`FILTER_PARAMS` context variable allows you to use [filter][ref-query-filter] +values from the Cube query during SQL generation. This is useful for hinting your database optimizer to use a specific index or filter out partitions or shards in your cloud data warehouse so you won't @@ -128,16 +128,49 @@ data source. -`FILTER_PARAMS` has the following syntax: +`FILTER_PARAMS` has to be a top-level expression in `WHERE` and it has the +following syntax: + + + +```yaml +cubes: + - name: cube_name + sql: > + SELECT * + FROM table + WHERE {FILTER_PARAMS.cube_name.member_name.filter(sql_expression)} + + dimensions: + - name: member_name + # ... + + + +``` ```javascript -FILTER_PARAMS.cube_name.member_name.filter(sql_expression) +cube(`cube_name`, { + sql: ` + SELECT * + FROM table + WHERE ${FILTER_PARAMS.cube_name.member_name.filter(sql_expression)} + `, + + dimensions: { + member_name: { + // ... + } + } +}) ``` -The `filter()` function accepts a SQL expression, which could be either -a plain string or a function returning one. + + +The `filter()` function accepts `sql_expression`, which could be either +a string or a function returning a string. -### String +### Example with string See the example below for the case when a string is passed to `filter()`: @@ -216,7 +249,7 @@ WHERE } ``` -### Function +### Example with function You can also pass a function as a `filter()` argument. This way, you can add BigQuery shard filtering, which will reduce your billing cost. @@ -277,6 +310,306 @@ type conversions in this case. +## `FILTER_GROUP` + +If you use `FILTER_PARAMS` in your query more than once, you must wrap them +with `FILTER_GROUP`. + + + +Otherwise, if you combine `FILTER_PARAMS` with any logical operators other than +`AND` in SQL or if you use filters with [boolean operators][ref-filter-boolean] +in your Cube queries, incorrect SQL might be generated. + + + +`FILTER_GROUP` has to be a top-level expression in `WHERE` and it has the +following syntax: + + + +```yaml +cubes: + - name: cube_name + sql: > + SELECT * + FROM table + WHERE {FILTER_GROUP( + FILTER_PARAMS.cube_name.member_name.filter(sql_expression), + FILTER_PARAMS.cube_name.another_member_name.filter(sql_expression) + )} + + dimensions: + - name: member_name + # ... + + - name: another_member_name + # ... + + + + +``` + +```javascript +cube(`cube_name`, { + sql: ` + SELECT * + FROM table + WHERE ${FILTER_GROUP( + FILTER_PARAMS.cube_name.member_name.filter(sql_expression), + FILTER_PARAMS.cube_name.another_member_name.filter(sql_expression) + )} + `, + + dimensions: { + member_name: { + // ... + }, + + another_member_name: { + // ... + } + } +}) +``` + + + +### Example + +To understand the value of `FILTER_GROUP`, consider the following data model +where two `FILTER_PARAMS` are combined in SQL using the `OR` operator: + + + +```yaml +cubes: + - name: filter_group + sql: > + SELECT * + FROM ( + SELECT 1 AS a, 3 AS b UNION ALL + SELECT 2 AS a, 2 AS b UNION ALL + SELECT 3 AS a, 1 AS b + ) AS data + WHERE + {FILTER_PARAMS.filter_group.a.filter("a")} OR + {FILTER_PARAMS.filter_group.b.filter("b")} + + dimensions: + - name: a + sql: a + type: number + + - name: b + sql: b + type: number + + + + +``` + +```javascript +cube(`filter_group`, { + sql: ` + SELECT * + FROM ( + SELECT 1 AS a, 3 AS b UNION ALL + SELECT 2 AS a, 2 AS b UNION ALL + SELECT 3 AS a, 1 AS b + ) AS data + WHERE + ${FILTER_PARAMS.filter_group.a.filter('a')} OR + ${FILTER_PARAMS.filter_group.b.filter('b')} + `, + + dimensions: { + a: { + sql: `a`, + type: `number` + }, + + b: { + sql: `b`, + type: `number` + } + } +}) +``` + + + +If the following query is run... + +```json +{ + "dimensions": [ + "filter_group.a", + "filter_group.b" + ], + "filters": [ + { + "member": "filter_group.a", + "operator": "gt", + "values": ["1"] + }, + { + "member": "filter_group.b", + "operator": "gt", + "values": ["1"] + } + ] +} +``` + +...the following (logically incorrect) SQL will be generated: + +```sql +SELECT + "filter_group".a, + "filter_group".b +FROM ( + SELECT * + FROM ( + SELECT 1 AS a, 3 AS b UNION ALL + SELECT 2 AS a, 2 AS b UNION ALL + SELECT 3 AS a, 1 AS b + ) AS data + WHERE + (a > 1) OR -- Incorrect logical operator here + (b > 1) +) AS "filter_group" +WHERE + "filter_group".a > 1 AND + "filter_group".b > 1 +GROUP BY 1, 2 +``` + +As you can see, since an array of filters has `AND` semantics, Cube has +correctly used the `AND` operator in the "outer" `WHERE`. At the same time, +the hardcoded `OR` operator has propagated to the "inner" `WHERE`, leading to +a logically incorrect query. + +Now, if the cube is defined the following way... + + + +```yaml +cubes: + - name: filter_group + sql: > + SELECT * + FROM ( + SELECT 1 AS a, 3 AS b UNION ALL + SELECT 2 AS a, 2 AS b UNION ALL + SELECT 3 AS a, 1 AS b + ) AS data + WHERE + {FILTER_GROUP( + FILTER_PARAMS.filter_group.a.filter("a"), + FILTER_PARAMS.filter_group.b.filter("b") + )} + + # ... +``` + +```javascript +cube(`filter_group`, { + sql: ` + SELECT * + FROM ( + SELECT 1 AS a, 3 AS b UNION ALL + SELECT 2 AS a, 2 AS b UNION ALL + SELECT 3 AS a, 1 AS b + ) AS data + WHERE + ${FILTER_GROUP( + FILTER_PARAMS.filter_group.a.filter('a'), + FILTER_PARAMS.filter_group.b.filter('b') + )} + `, + + // ... +``` + + + +...the following correct SQL will be generated for the same query: + +```sql +SELECT + "filter_group".a, + "filter_group".b +FROM ( + SELECT * + FROM ( + SELECT 1 AS a, 3 AS b UNION ALL + SELECT 2 AS a, 2 AS b UNION ALL + SELECT 3 AS a, 1 AS b + ) AS data + WHERE + (a > 1) AND -- Correct logical operator here + (b > 1) +) AS "filter_group" +WHERE + "filter_group".a > 1 AND + "filter_group".b > 1 +GROUP BY 1, 2 +``` + +You can also use [boolean operators][ref-filter-boolean] in the Cube query +to express more complex filtering logic: + +```json +{ + "dimensions": [ + "filter_group.a", + "filter_group.b" + ], + "filters": [ + { + "or": [ + { + "member": "filter_group.a", + "operator": "gt", + "values": ["1"] + }, + { + "member": "filter_group.b", + "operator": "gt", + "values": ["1"] + } + ] + } + ] +} +``` + +With `FILTER_GROUP`, the following correct SQL will be generated: + +```sql +SELECT + "filter_group".a, + "filter_group".b +FROM ( + SELECT * + FROM ( + SELECT 1 AS a, 3 AS b UNION ALL + SELECT 2 AS a, 2 AS b UNION ALL + SELECT 3 AS a, 1 AS b + ) AS data + WHERE + (a > 1) OR + (b > 1) +) AS "filter_group" +WHERE + "filter_group".a > 1 OR + "filter_group".b > 1 +GROUP BY 1, 2 +``` + ## `SQL_UTILS` ### `convertTz` @@ -464,3 +797,4 @@ cube(`orders`, { [ref-dynamic-data-models]: /product/data-modeling/dynamic/jinja [ref-query-filter]: /product/apis-integrations/rest-api/query-format#query-properties [ref-dynamic-jinja]: /product/data-modeling/dynamic/jinja +[ref-filter-boolean]: /product/apis-integrations/rest-api/query-format#boolean-logical-operators \ No newline at end of file