From a69ddb040d99fc1e9a49889c1ad028fb08ccd56e Mon Sep 17 00:00:00 2001 From: zaidjan Date: Sat, 8 Nov 2025 19:42:31 +0530 Subject: [PATCH 01/11] added any filter --- a.json | 0 examples/meerkat-node-example/src/main.ts | 2 +- .../src/cube-filter-transformer/in/in.ts | 91 +++++++++++++++++-- 3 files changed, 82 insertions(+), 11 deletions(-) create mode 100644 a.json diff --git a/a.json b/a.json new file mode 100644 index 00000000..e69de29b diff --git a/examples/meerkat-node-example/src/main.ts b/examples/meerkat-node-example/src/main.ts index f5d8edaf..e8051866 100644 --- a/examples/meerkat-node-example/src/main.ts +++ b/examples/meerkat-node-example/src/main.ts @@ -14,7 +14,7 @@ app.use(express.json()); app.use('/assets', express.static(path.join(__dirname, 'assets'))); app.get('/api', async (req, res) => { - const sql = `SELECT json_serialize_sql('SELECT CASE WHEN COUNT(DISTINCT CASE WHEN sla_stage = 0 THEN id END) + COUNT(DISTINCT CASE WHEN sla_stage = 1 AND (ARRAY_LENGTH(next_resp_time_arr) > 0 OR ARRAY_LENGTH(first_resp_time_arr) > 0 OR ARRAY_LENGTH(resolution_time_arr) > 0) AND (total_second_resp_breaches_ever = 0 OR total_second_resp_breaches_ever IS NULL) AND (total_first_resp_breaches_ever = 0 OR total_first_resp_breaches_ever IS NULL) AND (total_resolution_breaches_ever = 0 OR total_resolution_breaches_ever IS NULL) THEN id END) > 0 THEN 100 - (COUNT(DISTINCT CASE WHEN sla_stage = 0 THEN id END) * 100.0 /(COUNT(DISTINCT CASE WHEN sla_stage = 1 THEN id END) + COUNT(DISTINCT CASE WHEN sla_stage = 2 AND (ARRAY_LENGTH(next_resp_time_arr) > 0 OR ARRAY_LENGTH(first_resp_time_arr) > 0 OR ARRAY_LENGTH(resolution_time_arr) > 0) AND (total_second_resp_breaches_ever = 0 OR total_second_resp_breaches_ever IS NULL) AND (total_first_resp_breaches_ever = 0 OR total_first_resp_breaches_ever IS NULL) AND (total_resolution_breaches_ever = 0 OR total_resolution_breaches_ever IS NULL) THEN id END))) ELSE NULL END FROM tbl1');`; + const sql = `SELECT json_serialize_sql('SELECT * FROM table_1 WHERE type = ANY(ARRAY[''issue'']::VARCHAR[])');`; const data = await nodeSQLToSerialization(sql); res.json({ message: data }); }); diff --git a/meerkat-core/src/cube-filter-transformer/in/in.ts b/meerkat-core/src/cube-filter-transformer/in/in.ts index 41d3e87d..9b475e38 100644 --- a/meerkat-core/src/cube-filter-transformer/in/in.ts +++ b/meerkat-core/src/cube-filter-transformer/in/in.ts @@ -20,12 +20,6 @@ const inDuckDbCondition = ( value: valueBuilder(value, memberInfo), }; }); - const columnRef = { - class: 'COLUMN_REF', - type: 'COLUMN_REF', - alias: '', - column_names: columnName.split(COLUMN_NAME_DELIMITER), - }; switch (memberInfo.type) { case 'number_array': case 'string_array': { @@ -36,7 +30,12 @@ const inDuckDbCondition = ( function_name: '&&', schema: '', children: [ - columnRef, + { + class: 'COLUMN_REF', + type: 'COLUMN_REF', + alias: '', + column_names: columnName.split(COLUMN_NAME_DELIMITER), + }, { class: ExpressionClass.OPERATOR, type: ExpressionType.ARRAY_CONSTRUCTOR, @@ -57,10 +56,82 @@ const inDuckDbCondition = ( } default: { return { - class: ExpressionClass.OPERATOR, - type: ExpressionType.COMPARE_IN, + class: ExpressionClass.SUBQUERY, + type: ExpressionType.SUBQUERY, alias: '', - children: [columnRef, ...sqlTreeValues], + subquery_type: 'ANY', + subquery: { + node: { + type: 'SELECT_NODE', + modifiers: [], + cte_map: { + map: [], + }, + select_list: [ + { + class: 'FUNCTION', + type: 'FUNCTION', + alias: '', + function_name: 'unnest', + schema: '', + children: [ + { + class: 'CAST', + type: 'OPERATOR_CAST', + alias: '', + child: { + class: 'OPERATOR', + type: 'ARRAY_CONSTRUCTOR', + alias: '', + children: sqlTreeValues, + }, + cast_type: { + id: 'LIST', + type_info: { + type: 'LIST_TYPE_INFO', + alias: '', + modifiers: [], + child_type: { + id: 'VARCHAR', + type_info: null, + }, + }, + }, + try_cast: false, + }, + ], + filter: null, + order_bys: { + type: 'ORDER_MODIFIER', + orders: [], + }, + distinct: false, + is_operator: false, + export_state: false, + catalog: '', + }, + ], + from_table: { + type: 'EMPTY', + alias: '', + sample: null, + }, + where_clause: null, + group_expressions: [], + group_sets: [], + aggregate_handling: 'STANDARD_HANDLING', + having: null, + sample: null, + qualify: null, + }, + }, + child: { + class: 'COLUMN_REF', + type: 'COLUMN_REF', + alias: '', + column_names: ['type'], + }, + comparison_type: 'COMPARE_EQUAL', }; } } From dae2358f315501f8292613c652855c6184afcbdf Mon Sep 17 00:00:00 2001 From: zaidjan Date: Sat, 8 Nov 2025 19:55:37 +0530 Subject: [PATCH 02/11] added not in operator update --- a.json | 109 ++++++++++++++++++ examples/meerkat-node-example/src/main.ts | 2 +- .../src/cube-filter-transformer/in/in.ts | 48 ++++---- .../cube-filter-transformer/not-in/not-in.ts | 91 ++++++++++++++- 4 files changed, 221 insertions(+), 29 deletions(-) diff --git a/a.json b/a.json index e69de29b..d8494ec6 100644 --- a/a.json +++ b/a.json @@ -0,0 +1,109 @@ +{ + "class": "OPERATOR", + "type": "OPERATOR_NOT", + "alias": "", + "query_location": 33, + "children": [ + { + "class": "SUBQUERY", + "type": "SUBQUERY", + "alias": "", + "query_location": 33, + "subquery_type": "ANY", + "subquery": { + "node": { + "type": "SELECT_NODE", + "modifiers": [], + "cte_map": { + "map": [] + }, + "select_list": [ + { + "class": "FUNCTION", + "type": "FUNCTION", + "alias": "", + "query_location": 1.8446744073709552e+19, + "function_name": "unnest", + "schema": "", + "children": [ + { + "class": "CAST", + "type": "OPERATOR_CAST", + "alias": "", + "query_location": 54, + "child": { + "class": "OPERATOR", + "type": "ARRAY_CONSTRUCTOR", + "alias": "", + "query_location": 1.8446744073709552e+19, + "children": [ + { + "class": "CONSTANT", + "type": "VALUE_CONSTANT", + "alias": "", + "query_location": 46, + "value": { + "type": { + "id": "VARCHAR", + "type_info": null + }, + "is_null": false, + "value": "issue" + } + } + ] + }, + "cast_type": { + "id": "LIST", + "type_info": { + "type": "LIST_TYPE_INFO", + "alias": "", + "modifiers": [], + "child_type": { + "id": "VARCHAR", + "type_info": null + } + } + }, + "try_cast": false + } + ], + "filter": null, + "order_bys": { + "type": "ORDER_MODIFIER", + "orders": [] + }, + "distinct": false, + "is_operator": false, + "export_state": false, + "catalog": "" + } + ], + "from_table": { + "type": "EMPTY", + "alias": "", + "sample": null, + "query_location": 1.8446744073709552e+19 + }, + "where_clause": null, + "group_expressions": [], + "group_sets": [], + "aggregate_handling": "STANDARD_HANDLING", + "having": null, + "sample": null, + "qualify": null + } + }, + "child": { + "class": "COLUMN_REF", + "type": "COLUMN_REF", + "alias": "", + "query_location": 28, + "column_names": [ + "type" + ] + }, + "comparison_type": "COMPARE_EQUAL" + } + ] + }, \ No newline at end of file diff --git a/examples/meerkat-node-example/src/main.ts b/examples/meerkat-node-example/src/main.ts index e8051866..e25283dc 100644 --- a/examples/meerkat-node-example/src/main.ts +++ b/examples/meerkat-node-example/src/main.ts @@ -14,7 +14,7 @@ app.use(express.json()); app.use('/assets', express.static(path.join(__dirname, 'assets'))); app.get('/api', async (req, res) => { - const sql = `SELECT json_serialize_sql('SELECT * FROM table_1 WHERE type = ANY(ARRAY[''issue'']::VARCHAR[])');`; + const sql = `SELECT json_serialize_sql('SELECT * FROM table_1 WHERE type != ALL(ARRAY[''issue'']::VARCHAR[])');`; const data = await nodeSQLToSerialization(sql); res.json({ message: data }); }); diff --git a/meerkat-core/src/cube-filter-transformer/in/in.ts b/meerkat-core/src/cube-filter-transformer/in/in.ts index 9b475e38..1b9a3909 100644 --- a/meerkat-core/src/cube-filter-transformer/in/in.ts +++ b/meerkat-core/src/cube-filter-transformer/in/in.ts @@ -1,5 +1,11 @@ import { COLUMN_NAME_DELIMITER } from '../../member-formatters/constants'; import { Dimension, Measure } from '../../types/cube-types/table'; +import { + AggregateHandling, + QueryNodeType, + ResultModifierType, + TableReferenceType, +} from '../../types/duckdb-serialization-types'; import { ExpressionClass, ExpressionType, @@ -20,6 +26,12 @@ const inDuckDbCondition = ( value: valueBuilder(value, memberInfo), }; }); + const columnRef = { + class: ExpressionClass.COLUMN_REF, + type: ExpressionType.COLUMN_REF, + alias: '', + column_names: columnName.split(COLUMN_NAME_DELIMITER), + }; switch (memberInfo.type) { case 'number_array': case 'string_array': { @@ -30,12 +42,7 @@ const inDuckDbCondition = ( function_name: '&&', schema: '', children: [ - { - class: 'COLUMN_REF', - type: 'COLUMN_REF', - alias: '', - column_names: columnName.split(COLUMN_NAME_DELIMITER), - }, + columnRef, { class: ExpressionClass.OPERATOR, type: ExpressionType.ARRAY_CONSTRUCTOR, @@ -62,26 +69,26 @@ const inDuckDbCondition = ( subquery_type: 'ANY', subquery: { node: { - type: 'SELECT_NODE', + type: QueryNodeType.SELECT_NODE, modifiers: [], cte_map: { map: [], }, select_list: [ { - class: 'FUNCTION', - type: 'FUNCTION', + class: ExpressionClass.FUNCTION, + type: ExpressionType.FUNCTION, alias: '', function_name: 'unnest', schema: '', children: [ { - class: 'CAST', - type: 'OPERATOR_CAST', + class: ExpressionClass.CAST, + type: ExpressionType.OPERATOR_CAST, alias: '', child: { - class: 'OPERATOR', - type: 'ARRAY_CONSTRUCTOR', + class: ExpressionClass.OPERATOR, + type: ExpressionType.ARRAY_CONSTRUCTOR, alias: '', children: sqlTreeValues, }, @@ -102,7 +109,7 @@ const inDuckDbCondition = ( ], filter: null, order_bys: { - type: 'ORDER_MODIFIER', + type: ResultModifierType.ORDER_MODIFIER, orders: [], }, distinct: false, @@ -112,26 +119,21 @@ const inDuckDbCondition = ( }, ], from_table: { - type: 'EMPTY', + type: TableReferenceType.EMPTY, alias: '', sample: null, }, where_clause: null, group_expressions: [], group_sets: [], - aggregate_handling: 'STANDARD_HANDLING', + aggregate_handling: AggregateHandling.STANDARD_HANDLING, having: null, sample: null, qualify: null, }, }, - child: { - class: 'COLUMN_REF', - type: 'COLUMN_REF', - alias: '', - column_names: ['type'], - }, - comparison_type: 'COMPARE_EQUAL', + child: columnRef, + comparison_type: ExpressionType.COMPARE_EQUAL, }; } } diff --git a/meerkat-core/src/cube-filter-transformer/not-in/not-in.ts b/meerkat-core/src/cube-filter-transformer/not-in/not-in.ts index b5eb63a1..1ef74872 100644 --- a/meerkat-core/src/cube-filter-transformer/not-in/not-in.ts +++ b/meerkat-core/src/cube-filter-transformer/not-in/not-in.ts @@ -2,6 +2,13 @@ import { Dimension, Measure } from '../../types/cube-types/table'; import { CubeToParseExpressionTransform } from '../factory'; import { COLUMN_NAME_DELIMITER } from '../../member-formatters/constants'; +import { + AggregateHandling, + QueryNodeType, + ResultModifierType, + SubqueryType, + TableReferenceType, +} from '../../types/duckdb-serialization-types'; import { ExpressionClass, ExpressionType, @@ -22,8 +29,8 @@ const notInDuckDbCondition = ( }; }); const columnRef = { - class: 'COLUMN_REF', - type: 'COLUMN_REF', + class: ExpressionClass.COLUMN_REF, + type: ExpressionType.COLUMN_REF, alias: '', column_names: columnName.split(COLUMN_NAME_DELIMITER), }; @@ -52,7 +59,7 @@ const notInDuckDbCondition = ( ], filter: null, order_bys: { - type: 'ORDER_MODIFIER', + type: ResultModifierType.ORDER_MODIFIER, orders: [], }, distinct: false, @@ -66,9 +73,83 @@ const notInDuckDbCondition = ( default: { return { class: ExpressionClass.OPERATOR, - type: ExpressionType.COMPARE_NOT_IN, + type: ExpressionType.OPERATOR_NOT, alias: '', - children: [columnRef, ...sqlTreeValues], + children: [ + { + class: 'SUBQUERY', + type: ExpressionType.SUBQUERY, + alias: '', + subquery_type: SubqueryType.ANY, + subquery: { + node: { + type: QueryNodeType.SELECT_NODE, + modifiers: [], + cte_map: { + map: [], + }, + select_list: [ + { + class: ExpressionClass.FUNCTION, + type: ExpressionType.FUNCTION, + alias: '', + function_name: 'unnest', + schema: '', + children: [ + { + class: ExpressionClass.CAST, + type: ExpressionType.OPERATOR_CAST, + alias: '', + child: { + class: ExpressionClass.OPERATOR, + type: ExpressionType.ARRAY_CONSTRUCTOR, + alias: '', + children: sqlTreeValues, + }, + cast_type: { + id: 'LIST', + type_info: { + type: 'LIST_TYPE_INFO', + alias: '', + modifiers: [], + child_type: { + id: 'VARCHAR', + type_info: null, + }, + }, + }, + try_cast: false, + }, + ], + filter: null, + order_bys: { + type: ResultModifierType.ORDER_MODIFIER, + orders: [], + }, + distinct: false, + is_operator: false, + export_state: false, + catalog: '', + }, + ], + from_table: { + type: TableReferenceType.EMPTY, + alias: '', + sample: null, + }, + where_clause: null, + group_expressions: [], + group_sets: [], + aggregate_handling: AggregateHandling.STANDARD_HANDLING, + having: null, + sample: null, + qualify: null, + }, + }, + child: columnRef, + comparison_type: ExpressionType.COMPARE_EQUAL, + }, + ], }; } } From 3951c145477bf96e22429d19341ac7bc9bc2e18d Mon Sep 17 00:00:00 2001 From: zaidjan Date: Mon, 10 Nov 2025 16:44:59 +0530 Subject: [PATCH 03/11] smaller AST --- examples/meerkat-node-example/src/main.ts | 2 +- .../src/cube-filter-transformer/in/in.spec.ts | 56 ++++--------- .../src/cube-filter-transformer/in/in.ts | 83 ++++++++++++------- .../not-in/not-in.spec.ts | 54 ++++-------- .../cube-filter-transformer/not-in/not-in.ts | 82 +++++++++++------- meerkat-node/src/__tests__/test-data.ts | 4 +- meerkat-node/src/cube-to-sql/cube-to-sql.ts | 1 + 7 files changed, 143 insertions(+), 139 deletions(-) diff --git a/examples/meerkat-node-example/src/main.ts b/examples/meerkat-node-example/src/main.ts index e25283dc..8ae6d8dc 100644 --- a/examples/meerkat-node-example/src/main.ts +++ b/examples/meerkat-node-example/src/main.ts @@ -14,7 +14,7 @@ app.use(express.json()); app.use('/assets', express.static(path.join(__dirname, 'assets'))); app.get('/api', async (req, res) => { - const sql = `SELECT json_serialize_sql('SELECT * FROM table_1 WHERE type != ALL(ARRAY[''issue'']::VARCHAR[])');`; + const sql = `SELECT json_serialize_sql('SELECT * FROM table_1 WHERE type = ALL(ARRAY[''issue'', ''ticket'']::VARCHAR[])');`; const data = await nodeSQLToSerialization(sql); res.json({ message: data }); }); diff --git a/meerkat-core/src/cube-filter-transformer/in/in.spec.ts b/meerkat-core/src/cube-filter-transformer/in/in.spec.ts index 6ad59a49..53012392 100644 --- a/meerkat-core/src/cube-filter-transformer/in/in.spec.ts +++ b/meerkat-core/src/cube-filter-transformer/in/in.spec.ts @@ -17,46 +17,22 @@ describe('In transforms Tests', () => { }); it('Should return the correct value for string member', () => { - const expectedOutput = { - "alias": "", - "children": [ - { - "alias": "", - "class": "COLUMN_REF", - "column_names": [ - "country", - ], - "type": "COLUMN_REF", - }, - { - "alias": "", - "class": "CONSTANT", - "type": "VALUE_CONSTANT", - "value": { - "is_null": false, - "type": { - "id": "VARCHAR", - "type_info": null, - }, - "value": "US", - }, - }, - ], - "class": "OPERATOR", - "type": "COMPARE_IN", - }; - expect( - inTransform({ - member: 'country', - operator: 'contains', - values: ['US'], - memberInfo: { - name: 'country', - sql: 'table.country', - type: 'string', - }, - }) - ).toEqual(expectedOutput); + // Now uses optimized subquery approach for all cases + const result = inTransform({ + member: 'country', + operator: 'contains', + values: ['US'], + memberInfo: { + name: 'country', + sql: 'table.country', + type: 'string', + }, + }); + + // Check it returns a subquery structure + expect(result).toHaveProperty('class', 'SUBQUERY'); + expect(result).toHaveProperty('type', 'SUBQUERY'); + expect(result).toHaveProperty('subquery_type', 'ANY'); }); it('Should return the correct value for string_array member', () => { diff --git a/meerkat-core/src/cube-filter-transformer/in/in.ts b/meerkat-core/src/cube-filter-transformer/in/in.ts index 1b9a3909..fa9fa573 100644 --- a/meerkat-core/src/cube-filter-transformer/in/in.ts +++ b/meerkat-core/src/cube-filter-transformer/in/in.ts @@ -4,6 +4,7 @@ import { AggregateHandling, QueryNodeType, ResultModifierType, + SubqueryType, TableReferenceType, } from '../../types/duckdb-serialization-types'; import { @@ -18,23 +19,24 @@ const inDuckDbCondition = ( values: string[], memberInfo: Measure | Dimension ) => { - const sqlTreeValues = values.map((value) => { - return { - class: ExpressionClass.CONSTANT, - type: ExpressionType.VALUE_CONSTANT, - alias: '', - value: valueBuilder(value, memberInfo), - }; - }); const columnRef = { class: ExpressionClass.COLUMN_REF, type: ExpressionType.COLUMN_REF, alias: '', column_names: columnName.split(COLUMN_NAME_DELIMITER), }; + switch (memberInfo.type) { case 'number_array': case 'string_array': { + const sqlTreeValues = values.map((value) => { + return { + class: ExpressionClass.CONSTANT, + type: ExpressionType.VALUE_CONSTANT, + alias: '', + value: valueBuilder(value, memberInfo), + }; + }); return { class: ExpressionClass.FUNCTION, type: ExpressionType.FUNCTION, @@ -62,18 +64,27 @@ const inDuckDbCondition = ( }; } default: { + // Optimized approach: Use string_split with delimiter + // This provides 91% size reduction by avoiding N VALUE_CONSTANT nodes + // Use special delimiter sequence unlikely to appear in data + const DELIMITER = '§§'; // Section sign - uncommon in normal data + const sanitizedValues = values.map((v) => { + const strVal = String(v); + // Escape delimiter if it appears in the value + return strVal.replace(/§§/g, '§§§§'); + }); + const joinedValues = sanitizedValues.join(DELIMITER); + return { class: ExpressionClass.SUBQUERY, type: ExpressionType.SUBQUERY, alias: '', - subquery_type: 'ANY', + subquery_type: SubqueryType.ANY, subquery: { node: { type: QueryNodeType.SELECT_NODE, modifiers: [], - cte_map: { - map: [], - }, + cte_map: { map: [] }, select_list: [ { class: ExpressionClass.FUNCTION, @@ -83,28 +94,42 @@ const inDuckDbCondition = ( schema: '', children: [ { - class: ExpressionClass.CAST, - type: ExpressionType.OPERATOR_CAST, + class: ExpressionClass.FUNCTION, + type: ExpressionType.FUNCTION, alias: '', - child: { - class: ExpressionClass.OPERATOR, - type: ExpressionType.ARRAY_CONSTRUCTOR, - alias: '', - children: sqlTreeValues, - }, - cast_type: { - id: 'LIST', - type_info: { - type: 'LIST_TYPE_INFO', + function_name: 'string_split', + schema: '', + children: [ + { + class: ExpressionClass.CONSTANT, + type: ExpressionType.VALUE_CONSTANT, + alias: '', + value: { + type: { id: 'VARCHAR', type_info: null }, + is_null: false, + value: joinedValues, + }, + }, + { + class: ExpressionClass.CONSTANT, + type: ExpressionType.VALUE_CONSTANT, alias: '', - modifiers: [], - child_type: { - id: 'VARCHAR', - type_info: null, + value: { + type: { id: 'VARCHAR', type_info: null }, + is_null: false, + value: DELIMITER, }, }, + ], + filter: null, + order_bys: { + type: ResultModifierType.ORDER_MODIFIER, + orders: [], }, - try_cast: false, + distinct: false, + is_operator: false, + export_state: false, + catalog: '', }, ], filter: null, diff --git a/meerkat-core/src/cube-filter-transformer/not-in/not-in.spec.ts b/meerkat-core/src/cube-filter-transformer/not-in/not-in.spec.ts index 3907aa8c..447bae67 100644 --- a/meerkat-core/src/cube-filter-transformer/not-in/not-in.spec.ts +++ b/meerkat-core/src/cube-filter-transformer/not-in/not-in.spec.ts @@ -17,44 +17,22 @@ describe('Not In transforms Tests', () => { }); it('Should return the correct value for string member', () => { - const expectedOutput = { - "alias": "", - "children": [{ - "alias": "", - "class": "COLUMN_REF", - "column_names": [ - "country", - ], - "type": "COLUMN_REF", - }, - { - "alias": "", - "class": "CONSTANT", - "type": "VALUE_CONSTANT", - "value": { - "is_null": false, - "type": { - "id": "VARCHAR", - "type_info": null, - }, - "value": "US", - }, - }], - "class": "OPERATOR", - "type": "COMPARE_NOT_IN", - } - expect( - notInTransform({ - member: 'country', - operator: 'contains', - values: ['US'], - memberInfo: { - name: 'country', - sql: 'table.country', - type: 'string', - }, - }) - ).toEqual(expectedOutput); + // Now uses optimized subquery approach for all cases + const result = notInTransform({ + member: 'country', + operator: 'contains', + values: ['US'], + memberInfo: { + name: 'country', + sql: 'table.country', + type: 'string', + }, + }); + + // Check it returns OPERATOR_NOT wrapping a subquery + expect(result).toHaveProperty('class', 'OPERATOR'); + expect(result).toHaveProperty('type', 'OPERATOR_NOT'); + expect(result.children[0]).toHaveProperty('class', 'SUBQUERY'); }); it('Should return the correct value for string_array member', () => { diff --git a/meerkat-core/src/cube-filter-transformer/not-in/not-in.ts b/meerkat-core/src/cube-filter-transformer/not-in/not-in.ts index 1ef74872..c5978762 100644 --- a/meerkat-core/src/cube-filter-transformer/not-in/not-in.ts +++ b/meerkat-core/src/cube-filter-transformer/not-in/not-in.ts @@ -20,23 +20,24 @@ const notInDuckDbCondition = ( values: string[], memberInfo: Measure | Dimension ) => { - const sqlTreeValues = values.map((value) => { - return { - class: ExpressionClass.CONSTANT, - type: ExpressionType.VALUE_CONSTANT, - alias: '', - value: valueBuilder(value, memberInfo), - }; - }); const columnRef = { class: ExpressionClass.COLUMN_REF, type: ExpressionType.COLUMN_REF, alias: '', column_names: columnName.split(COLUMN_NAME_DELIMITER), }; + switch (memberInfo.type) { case 'number_array': case 'string_array': { + const sqlTreeValues = values.map((value) => { + return { + class: ExpressionClass.CONSTANT, + type: ExpressionType.VALUE_CONSTANT, + alias: '', + value: valueBuilder(value, memberInfo), + }; + }); return { class: ExpressionClass.OPERATOR, type: ExpressionType.OPERATOR_NOT, @@ -71,13 +72,24 @@ const notInDuckDbCondition = ( }; } default: { + // Optimized approach: Use string_split with delimiter + // This provides 91% size reduction by avoiding N VALUE_CONSTANT nodes + // Use special delimiter sequence unlikely to appear in data + const DELIMITER = '§§'; // Section sign - uncommon in normal data + const sanitizedValues = values.map((v) => { + const strVal = String(v); + // Escape delimiter if it appears in the value + return strVal.replace(/§§/g, '§§§§'); + }); + const joinedValues = sanitizedValues.join(DELIMITER); + return { class: ExpressionClass.OPERATOR, type: ExpressionType.OPERATOR_NOT, alias: '', children: [ { - class: 'SUBQUERY', + class: ExpressionClass.SUBQUERY, type: ExpressionType.SUBQUERY, alias: '', subquery_type: SubqueryType.ANY, @@ -85,9 +97,7 @@ const notInDuckDbCondition = ( node: { type: QueryNodeType.SELECT_NODE, modifiers: [], - cte_map: { - map: [], - }, + cte_map: { map: [] }, select_list: [ { class: ExpressionClass.FUNCTION, @@ -97,28 +107,42 @@ const notInDuckDbCondition = ( schema: '', children: [ { - class: ExpressionClass.CAST, - type: ExpressionType.OPERATOR_CAST, + class: ExpressionClass.FUNCTION, + type: ExpressionType.FUNCTION, alias: '', - child: { - class: ExpressionClass.OPERATOR, - type: ExpressionType.ARRAY_CONSTRUCTOR, - alias: '', - children: sqlTreeValues, - }, - cast_type: { - id: 'LIST', - type_info: { - type: 'LIST_TYPE_INFO', + function_name: 'string_split', + schema: '', + children: [ + { + class: ExpressionClass.CONSTANT, + type: ExpressionType.VALUE_CONSTANT, + alias: '', + value: { + type: { id: 'VARCHAR', type_info: null }, + is_null: false, + value: joinedValues, + }, + }, + { + class: ExpressionClass.CONSTANT, + type: ExpressionType.VALUE_CONSTANT, alias: '', - modifiers: [], - child_type: { - id: 'VARCHAR', - type_info: null, + value: { + type: { id: 'VARCHAR', type_info: null }, + is_null: false, + value: DELIMITER, }, }, + ], + filter: null, + order_bys: { + type: ResultModifierType.ORDER_MODIFIER, + orders: [], }, - try_cast: false, + distinct: false, + is_operator: false, + export_state: false, + catalog: '', }, ], filter: null, diff --git a/meerkat-node/src/__tests__/test-data.ts b/meerkat-node/src/__tests__/test-data.ts index 09b57b08..725bced1 100644 --- a/meerkat-node/src/__tests__/test-data.ts +++ b/meerkat-node/src/__tests__/test-data.ts @@ -986,7 +986,7 @@ export const TEST_DATA = [ [ { testName: 'In', - expectedSQL: `SELECT orders.* FROM (SELECT customer_id AS orders__customer_id, vendors AS orders__vendors, * FROM (select * from orders) AS orders) AS orders WHERE ((orders__customer_id IN ('1', '2')) AND (orders__vendors && (ARRAY['myntra', 'amazon'])))`, + expectedSQL: `SELECT orders.* FROM (SELECT customer_id AS orders__customer_id, vendors AS orders__vendors, * FROM (select * from orders) AS orders) AS orders WHERE ((orders__customer_id = ANY(SELECT unnest(string_split('1§§2', '§§')))) AND (orders__vendors && (ARRAY['myntra', 'amazon'])))`, cubeInput: { measures: ['*'], filters: [ @@ -1067,7 +1067,7 @@ export const TEST_DATA = [ [ { testName: 'Not In', - expectedSQL: `SELECT orders.* FROM (SELECT customer_id AS orders__customer_id, vendors AS orders__vendors, * FROM (select * from orders) AS orders) AS orders WHERE ((orders__customer_id NOT IN ('1', '2')) AND (NOT (orders__vendors && (ARRAY['myntra', 'flipkart']))))`, + expectedSQL: `SELECT orders.* FROM (SELECT customer_id AS orders__customer_id, vendors AS orders__vendors, * FROM (select * from orders) AS orders) AS orders WHERE ((NOT (orders__customer_id = ANY(SELECT unnest(string_split('1§§2', '§§'))))) AND (NOT (orders__vendors && (ARRAY['myntra', 'flipkart']))))`, cubeInput: { measures: ['*'], filters: [ diff --git a/meerkat-node/src/cube-to-sql/cube-to-sql.ts b/meerkat-node/src/cube-to-sql/cube-to-sql.ts index fc3d9a9b..19d82d13 100644 --- a/meerkat-node/src/cube-to-sql/cube-to-sql.ts +++ b/meerkat-node/src/cube-to-sql/cube-to-sql.ts @@ -91,5 +91,6 @@ export const cubeQueryToSQL = async ({ replaceBaseTableName ); + console.log({ finalQuery }); return finalQuery; }; From 6e69168f1c652e1a140bda283112190d941731a0 Mon Sep 17 00:00:00 2001 From: zaidjan Date: Mon, 10 Nov 2025 17:52:10 +0530 Subject: [PATCH 04/11] recd '/Users/zaidjan/Documents/Projects/meerkat' --- a.json | 109 -------------------- examples/meerkat-node-example/src/main.ts | 2 +- meerkat-node/src/cube-to-sql/cube-to-sql.ts | 1 - 3 files changed, 1 insertion(+), 111 deletions(-) delete mode 100644 a.json diff --git a/a.json b/a.json deleted file mode 100644 index d8494ec6..00000000 --- a/a.json +++ /dev/null @@ -1,109 +0,0 @@ -{ - "class": "OPERATOR", - "type": "OPERATOR_NOT", - "alias": "", - "query_location": 33, - "children": [ - { - "class": "SUBQUERY", - "type": "SUBQUERY", - "alias": "", - "query_location": 33, - "subquery_type": "ANY", - "subquery": { - "node": { - "type": "SELECT_NODE", - "modifiers": [], - "cte_map": { - "map": [] - }, - "select_list": [ - { - "class": "FUNCTION", - "type": "FUNCTION", - "alias": "", - "query_location": 1.8446744073709552e+19, - "function_name": "unnest", - "schema": "", - "children": [ - { - "class": "CAST", - "type": "OPERATOR_CAST", - "alias": "", - "query_location": 54, - "child": { - "class": "OPERATOR", - "type": "ARRAY_CONSTRUCTOR", - "alias": "", - "query_location": 1.8446744073709552e+19, - "children": [ - { - "class": "CONSTANT", - "type": "VALUE_CONSTANT", - "alias": "", - "query_location": 46, - "value": { - "type": { - "id": "VARCHAR", - "type_info": null - }, - "is_null": false, - "value": "issue" - } - } - ] - }, - "cast_type": { - "id": "LIST", - "type_info": { - "type": "LIST_TYPE_INFO", - "alias": "", - "modifiers": [], - "child_type": { - "id": "VARCHAR", - "type_info": null - } - } - }, - "try_cast": false - } - ], - "filter": null, - "order_bys": { - "type": "ORDER_MODIFIER", - "orders": [] - }, - "distinct": false, - "is_operator": false, - "export_state": false, - "catalog": "" - } - ], - "from_table": { - "type": "EMPTY", - "alias": "", - "sample": null, - "query_location": 1.8446744073709552e+19 - }, - "where_clause": null, - "group_expressions": [], - "group_sets": [], - "aggregate_handling": "STANDARD_HANDLING", - "having": null, - "sample": null, - "qualify": null - } - }, - "child": { - "class": "COLUMN_REF", - "type": "COLUMN_REF", - "alias": "", - "query_location": 28, - "column_names": [ - "type" - ] - }, - "comparison_type": "COMPARE_EQUAL" - } - ] - }, \ No newline at end of file diff --git a/examples/meerkat-node-example/src/main.ts b/examples/meerkat-node-example/src/main.ts index 8ae6d8dc..f5d8edaf 100644 --- a/examples/meerkat-node-example/src/main.ts +++ b/examples/meerkat-node-example/src/main.ts @@ -14,7 +14,7 @@ app.use(express.json()); app.use('/assets', express.static(path.join(__dirname, 'assets'))); app.get('/api', async (req, res) => { - const sql = `SELECT json_serialize_sql('SELECT * FROM table_1 WHERE type = ALL(ARRAY[''issue'', ''ticket'']::VARCHAR[])');`; + const sql = `SELECT json_serialize_sql('SELECT CASE WHEN COUNT(DISTINCT CASE WHEN sla_stage = 0 THEN id END) + COUNT(DISTINCT CASE WHEN sla_stage = 1 AND (ARRAY_LENGTH(next_resp_time_arr) > 0 OR ARRAY_LENGTH(first_resp_time_arr) > 0 OR ARRAY_LENGTH(resolution_time_arr) > 0) AND (total_second_resp_breaches_ever = 0 OR total_second_resp_breaches_ever IS NULL) AND (total_first_resp_breaches_ever = 0 OR total_first_resp_breaches_ever IS NULL) AND (total_resolution_breaches_ever = 0 OR total_resolution_breaches_ever IS NULL) THEN id END) > 0 THEN 100 - (COUNT(DISTINCT CASE WHEN sla_stage = 0 THEN id END) * 100.0 /(COUNT(DISTINCT CASE WHEN sla_stage = 1 THEN id END) + COUNT(DISTINCT CASE WHEN sla_stage = 2 AND (ARRAY_LENGTH(next_resp_time_arr) > 0 OR ARRAY_LENGTH(first_resp_time_arr) > 0 OR ARRAY_LENGTH(resolution_time_arr) > 0) AND (total_second_resp_breaches_ever = 0 OR total_second_resp_breaches_ever IS NULL) AND (total_first_resp_breaches_ever = 0 OR total_first_resp_breaches_ever IS NULL) AND (total_resolution_breaches_ever = 0 OR total_resolution_breaches_ever IS NULL) THEN id END))) ELSE NULL END FROM tbl1');`; const data = await nodeSQLToSerialization(sql); res.json({ message: data }); }); diff --git a/meerkat-node/src/cube-to-sql/cube-to-sql.ts b/meerkat-node/src/cube-to-sql/cube-to-sql.ts index 19d82d13..fc3d9a9b 100644 --- a/meerkat-node/src/cube-to-sql/cube-to-sql.ts +++ b/meerkat-node/src/cube-to-sql/cube-to-sql.ts @@ -91,6 +91,5 @@ export const cubeQueryToSQL = async ({ replaceBaseTableName ); - console.log({ finalQuery }); return finalQuery; }; From b344b1e76d2d98fbc5a8153c2bd5fd83cb5f2fe2 Mon Sep 17 00:00:00 2001 From: zaidjan Date: Mon, 10 Nov 2025 17:52:51 +0530 Subject: [PATCH 05/11] updated comments --- meerkat-core/src/cube-filter-transformer/in/in.ts | 14 +++++--------- .../src/cube-filter-transformer/not-in/not-in.ts | 14 +++++--------- 2 files changed, 10 insertions(+), 18 deletions(-) diff --git a/meerkat-core/src/cube-filter-transformer/in/in.ts b/meerkat-core/src/cube-filter-transformer/in/in.ts index fa9fa573..2a06e081 100644 --- a/meerkat-core/src/cube-filter-transformer/in/in.ts +++ b/meerkat-core/src/cube-filter-transformer/in/in.ts @@ -64,16 +64,12 @@ const inDuckDbCondition = ( }; } default: { - // Optimized approach: Use string_split with delimiter - // This provides 91% size reduction by avoiding N VALUE_CONSTANT nodes - // Use special delimiter sequence unlikely to appear in data + /** + * Doing the string split optimization here because as the number of nodes in the AST increase, + * the time take to parse the AST increases, thereby increasing the time to generate the SQL. + */ const DELIMITER = '§§'; // Section sign - uncommon in normal data - const sanitizedValues = values.map((v) => { - const strVal = String(v); - // Escape delimiter if it appears in the value - return strVal.replace(/§§/g, '§§§§'); - }); - const joinedValues = sanitizedValues.join(DELIMITER); + const joinedValues = values.join(DELIMITER); return { class: ExpressionClass.SUBQUERY, diff --git a/meerkat-core/src/cube-filter-transformer/not-in/not-in.ts b/meerkat-core/src/cube-filter-transformer/not-in/not-in.ts index c5978762..35f0279b 100644 --- a/meerkat-core/src/cube-filter-transformer/not-in/not-in.ts +++ b/meerkat-core/src/cube-filter-transformer/not-in/not-in.ts @@ -72,16 +72,12 @@ const notInDuckDbCondition = ( }; } default: { - // Optimized approach: Use string_split with delimiter - // This provides 91% size reduction by avoiding N VALUE_CONSTANT nodes - // Use special delimiter sequence unlikely to appear in data + /** + * Doing the string split optimization here because as the number of nodes in the AST increase, + * the time take to parse the AST increases, thereby increasing the time to generate the SQL. + */ const DELIMITER = '§§'; // Section sign - uncommon in normal data - const sanitizedValues = values.map((v) => { - const strVal = String(v); - // Escape delimiter if it appears in the value - return strVal.replace(/§§/g, '§§§§'); - }); - const joinedValues = sanitizedValues.join(DELIMITER); + const joinedValues = values.join(DELIMITER); return { class: ExpressionClass.OPERATOR, From 10e0a151508562cb13380e0541b6d97f12544ac2 Mon Sep 17 00:00:00 2001 From: zaidjan Date: Mon, 10 Nov 2025 17:53:25 +0530 Subject: [PATCH 06/11] updated versions --- meerkat-browser/package.json | 2 +- meerkat-core/package.json | 2 +- meerkat-node/package.json | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/meerkat-browser/package.json b/meerkat-browser/package.json index 0eb9f783..24bf7dec 100644 --- a/meerkat-browser/package.json +++ b/meerkat-browser/package.json @@ -1,6 +1,6 @@ { "name": "@devrev/meerkat-browser", - "version": "0.0.104", + "version": "0.0.105", "dependencies": { "tslib": "^2.3.0", "@devrev/meerkat-core": "*", diff --git a/meerkat-core/package.json b/meerkat-core/package.json index 743afa17..74ef3cda 100644 --- a/meerkat-core/package.json +++ b/meerkat-core/package.json @@ -1,6 +1,6 @@ { "name": "@devrev/meerkat-core", - "version": "0.0.104", + "version": "0.0.105", "dependencies": { "tslib": "^2.3.0" }, diff --git a/meerkat-node/package.json b/meerkat-node/package.json index f5a7a1ec..f56f91ee 100644 --- a/meerkat-node/package.json +++ b/meerkat-node/package.json @@ -1,6 +1,6 @@ { "name": "@devrev/meerkat-node", - "version": "0.0.104", + "version": "0.0.105", "dependencies": { "@swc/helpers": "~0.5.0", "@devrev/meerkat-core": "*", From 6705786f6c452a629ca669f52c4ed1ac27497e0b Mon Sep 17 00:00:00 2001 From: zaidjan Date: Mon, 10 Nov 2025 18:21:21 +0530 Subject: [PATCH 07/11] added number and string based filtering with string split --- .../src/cube-filter-transformer/in/in.spec.ts | 123 ++++++++- .../src/cube-filter-transformer/in/in.ts | 161 +++++++++--- .../not-in/not-in.spec.ts | 245 ++++++++++++------ .../cube-filter-transformer/not-in/not-in.ts | 161 +++++++++--- meerkat-node/src/__tests__/test-data.ts | 168 ++++++++++++ 5 files changed, 703 insertions(+), 155 deletions(-) diff --git a/meerkat-core/src/cube-filter-transformer/in/in.spec.ts b/meerkat-core/src/cube-filter-transformer/in/in.spec.ts index 53012392..6220b33c 100644 --- a/meerkat-core/src/cube-filter-transformer/in/in.spec.ts +++ b/meerkat-core/src/cube-filter-transformer/in/in.spec.ts @@ -16,12 +16,11 @@ describe('In transforms Tests', () => { ).toThrow(); }); - it('Should return the correct value for string member', () => { - // Now uses optimized subquery approach for all cases + it('Should return optimized string_split approach for string type', () => { const result = inTransform({ member: 'country', - operator: 'contains', - values: ['US'], + operator: 'in', + values: ['US', 'Canada', 'Mexico'], memberInfo: { name: 'country', sql: 'table.country', @@ -29,16 +28,128 @@ describe('In transforms Tests', () => { }, }); + // Check it returns a subquery structure with string_split + expect(result).toHaveProperty('class', 'SUBQUERY'); + expect(result).toHaveProperty('type', 'SUBQUERY'); + expect(result).toHaveProperty('subquery_type', 'ANY'); + + // Verify it's using string_split + const selectList = (result as any).subquery.node.select_list[0]; + expect(selectList.function_name).toBe('unnest'); + expect(selectList.children[0].function_name).toBe('string_split'); + + // Verify no CAST for strings + expect(selectList.type).toBe('FUNCTION'); + }); + + it('Should return optimized string_split approach with CAST for number type', () => { + const result = inTransform({ + member: 'order_id', + operator: 'in', + values: [1, 2, 3], + memberInfo: { + name: 'order_id', + sql: 'table.order_id', + type: 'number', + }, + }); + // Check it returns a subquery structure expect(result).toHaveProperty('class', 'SUBQUERY'); expect(result).toHaveProperty('type', 'SUBQUERY'); expect(result).toHaveProperty('subquery_type', 'ANY'); + + // Verify it's using string_split with CAST + const selectList = (result as any).subquery.node.select_list[0]; + expect(selectList.type).toBe('OPERATOR_CAST'); + expect(selectList.cast_type.id).toBe('DOUBLE'); + expect(selectList.child.function_name).toBe('unnest'); + expect(selectList.child.children[0].function_name).toBe('string_split'); + }); + + it('Should return standard ARRAY_CONSTRUCTOR for string_array type', () => { + const output = inTransform({ + member: 'country', + operator: 'in', + values: ['US', 'Germany', 'Israel'], + memberInfo: { + name: 'country', + sql: 'table.country', + type: 'string_array', + }, + }) as ConjunctionExpression; + + // For array types, should use && operator with ARRAY_CONSTRUCTOR + expect(output.function_name).toBe('&&'); + expect(output.children[1].type).toBe('ARRAY_CONSTRUCTOR'); + expect(output.children[1].children.length).toBe(3); + }); + + it('Should return standard COMPARE_IN for other types (default case)', () => { + const output = inTransform({ + member: 'some_field', + operator: 'in', + values: ['val1', 'val2'], + memberInfo: { + name: 'some_field', + sql: 'table.some_field', + type: 'time' as any, // Unknown type to trigger default case + }, + }); + + // Default case should use COMPARE_IN + expect(output).toHaveProperty('type', 'COMPARE_IN'); + expect(output).toHaveProperty('class', 'OPERATOR'); + expect((output as any).children.length).toBe(3); // column + 2 values + }); + + it('Should handle large value lists efficiently with string_split', () => { + const largeValueList = Array.from({ length: 1000 }, (_, i) => `value${i}`); + const result = inTransform({ + member: 'country', + operator: 'in', + values: largeValueList, + memberInfo: { + name: 'country', + sql: 'table.country', + type: 'string', + }, + }); + + // Should still use subquery approach + expect(result).toHaveProperty('class', 'SUBQUERY'); + + // Verify only 2 VALUE_CONSTANT nodes (joined string + delimiter) + const selectList = (result as any).subquery.node.select_list[0]; + const stringSplitChildren = selectList.children[0].children; + expect(stringSplitChildren.length).toBe(2); + expect(stringSplitChildren[0].value.value).toContain('§§'); // Contains delimiter + }); + + it('Should use delimiter to join values', () => { + const result = inTransform({ + member: 'country', + operator: 'in', + values: ['US', 'Canada'], + memberInfo: { + name: 'country', + sql: 'table.country', + type: 'string', + }, + }); + + const selectList = (result as any).subquery.node.select_list[0]; + const joinedValue = selectList.children[0].children[0].value.value; + const delimiter = selectList.children[0].children[1].value.value; + + expect(delimiter).toBe('§§'); + expect(joinedValue).toBe('US§§Canada'); }); - it('Should return the correct value for string_array member', () => { + it('Should handle the original test case structure for reference', () => { const output = inTransform({ member: 'country', - operator: 'contains', + operator: 'in', values: ['US', 'Germany', 'Israel'], memberInfo: { name: 'country', diff --git a/meerkat-core/src/cube-filter-transformer/in/in.ts b/meerkat-core/src/cube-filter-transformer/in/in.ts index 2a06e081..73e65df2 100644 --- a/meerkat-core/src/cube-filter-transformer/in/in.ts +++ b/meerkat-core/src/cube-filter-transformer/in/in.ts @@ -63,7 +63,8 @@ const inDuckDbCondition = ( catalog: '', }; } - default: { + case 'string': + case 'number': { /** * Doing the string split optimization here because as the number of nodes in the AST increase, * the time take to parse the AST increases, thereby increasing the time to generate the SQL. @@ -82,39 +83,118 @@ const inDuckDbCondition = ( modifiers: [], cte_map: { map: [] }, select_list: [ - { - class: ExpressionClass.FUNCTION, - type: ExpressionType.FUNCTION, - alias: '', - function_name: 'unnest', - schema: '', - children: [ - { + // For numeric types, we need to CAST the string result to the appropriate type + memberInfo.type === 'number' + ? { + class: ExpressionClass.CAST, + type: ExpressionType.OPERATOR_CAST, + alias: '', + child: { + class: ExpressionClass.FUNCTION, + type: ExpressionType.FUNCTION, + alias: '', + function_name: 'unnest', + schema: '', + children: [ + { + class: ExpressionClass.FUNCTION, + type: ExpressionType.FUNCTION, + alias: '', + function_name: 'string_split', + schema: '', + children: [ + { + class: ExpressionClass.CONSTANT, + type: ExpressionType.VALUE_CONSTANT, + alias: '', + value: { + type: { id: 'VARCHAR', type_info: null }, + is_null: false, + value: joinedValues, + }, + }, + { + class: ExpressionClass.CONSTANT, + type: ExpressionType.VALUE_CONSTANT, + alias: '', + value: { + type: { id: 'VARCHAR', type_info: null }, + is_null: false, + value: DELIMITER, + }, + }, + ], + filter: null, + order_bys: { + type: ResultModifierType.ORDER_MODIFIER, + orders: [], + }, + distinct: false, + is_operator: false, + export_state: false, + catalog: '', + }, + ], + filter: null, + order_bys: { + type: ResultModifierType.ORDER_MODIFIER, + orders: [], + }, + distinct: false, + is_operator: false, + export_state: false, + catalog: '', + }, + cast_type: { + id: 'DOUBLE', + type_info: null, + }, + try_cast: false, + } + : { class: ExpressionClass.FUNCTION, type: ExpressionType.FUNCTION, alias: '', - function_name: 'string_split', + function_name: 'unnest', schema: '', children: [ { - class: ExpressionClass.CONSTANT, - type: ExpressionType.VALUE_CONSTANT, + class: ExpressionClass.FUNCTION, + type: ExpressionType.FUNCTION, alias: '', - value: { - type: { id: 'VARCHAR', type_info: null }, - is_null: false, - value: joinedValues, - }, - }, - { - class: ExpressionClass.CONSTANT, - type: ExpressionType.VALUE_CONSTANT, - alias: '', - value: { - type: { id: 'VARCHAR', type_info: null }, - is_null: false, - value: DELIMITER, + function_name: 'string_split', + schema: '', + children: [ + { + class: ExpressionClass.CONSTANT, + type: ExpressionType.VALUE_CONSTANT, + alias: '', + value: { + type: { id: 'VARCHAR', type_info: null }, + is_null: false, + value: joinedValues, + }, + }, + { + class: ExpressionClass.CONSTANT, + type: ExpressionType.VALUE_CONSTANT, + alias: '', + value: { + type: { id: 'VARCHAR', type_info: null }, + is_null: false, + value: DELIMITER, + }, + }, + ], + filter: null, + order_bys: { + type: ResultModifierType.ORDER_MODIFIER, + orders: [], }, + distinct: false, + is_operator: false, + export_state: false, + catalog: '', }, ], filter: null, @@ -127,17 +207,6 @@ const inDuckDbCondition = ( export_state: false, catalog: '', }, - ], - filter: null, - order_bys: { - type: ResultModifierType.ORDER_MODIFIER, - orders: [], - }, - distinct: false, - is_operator: false, - export_state: false, - catalog: '', - }, ], from_table: { type: TableReferenceType.EMPTY, @@ -157,6 +226,24 @@ const inDuckDbCondition = ( comparison_type: ExpressionType.COMPARE_EQUAL, }; } + default: { + // For other types, use the standard COMPARE_IN approach + const sqlTreeValues = values.map((value) => { + return { + class: ExpressionClass.CONSTANT, + type: ExpressionType.VALUE_CONSTANT, + alias: '', + value: valueBuilder(value, memberInfo), + }; + }); + + return { + class: ExpressionClass.OPERATOR, + type: ExpressionType.COMPARE_IN, + alias: '', + children: [columnRef, ...sqlTreeValues], + }; + } } }; diff --git a/meerkat-core/src/cube-filter-transformer/not-in/not-in.spec.ts b/meerkat-core/src/cube-filter-transformer/not-in/not-in.spec.ts index 447bae67..c753d0c7 100644 --- a/meerkat-core/src/cube-filter-transformer/not-in/not-in.spec.ts +++ b/meerkat-core/src/cube-filter-transformer/not-in/not-in.spec.ts @@ -4,7 +4,7 @@ import { notInTransform } from './not-in'; describe('Not In transforms Tests', () => { it('Should throw error if values are undefined', () => { expect(() => - notInTransform({ + notInTransform({ member: 'country', operator: 'contains', memberInfo: { @@ -16,23 +16,120 @@ describe('Not In transforms Tests', () => { ).toThrow(); }); - it('Should return the correct value for string member', () => { - // Now uses optimized subquery approach for all cases + it('Should return optimized string_split approach for string type', () => { const result = notInTransform({ member: 'country', - operator: 'contains', - values: ['US'], + operator: 'notIn', + values: ['US', 'Canada', 'Mexico'], memberInfo: { name: 'country', sql: 'table.country', type: 'string', }, }); - + // Check it returns OPERATOR_NOT wrapping a subquery expect(result).toHaveProperty('class', 'OPERATOR'); expect(result).toHaveProperty('type', 'OPERATOR_NOT'); expect(result.children[0]).toHaveProperty('class', 'SUBQUERY'); + + // Verify it's using string_split + const subquery = (result as any).children[0]; + const selectList = subquery.subquery.node.select_list[0]; + expect(selectList.function_name).toBe('unnest'); + expect(selectList.children[0].function_name).toBe('string_split'); + + // Verify no CAST for strings + expect(selectList.type).toBe('FUNCTION'); + }); + + it('Should return optimized string_split approach with CAST for number type', () => { + const result = notInTransform({ + member: 'order_id', + operator: 'notIn', + values: [1, 2, 3], + memberInfo: { + name: 'order_id', + sql: 'table.order_id', + type: 'number', + }, + }); + + // Check it returns OPERATOR_NOT wrapping a subquery + expect(result).toHaveProperty('class', 'OPERATOR'); + expect(result).toHaveProperty('type', 'OPERATOR_NOT'); + + // Verify it's using string_split with CAST + const subquery = (result as any).children[0]; + const selectList = subquery.subquery.node.select_list[0]; + expect(selectList.type).toBe('OPERATOR_CAST'); + expect(selectList.cast_type.id).toBe('DOUBLE'); + expect(selectList.child.function_name).toBe('unnest'); + expect(selectList.child.children[0].function_name).toBe('string_split'); + }); + + it('Should return standard COMPARE_NOT_IN for other types (default case)', () => { + const output = notInTransform({ + member: 'some_field', + operator: 'notIn', + values: ['val1', 'val2'], + memberInfo: { + name: 'some_field', + sql: 'table.some_field', + type: 'time' as any, // Unknown type to trigger default case + }, + }); + + // Default case should use COMPARE_NOT_IN + expect(output).toHaveProperty('type', 'COMPARE_NOT_IN'); + expect(output).toHaveProperty('class', 'OPERATOR'); + expect((output as any).children.length).toBe(3); // column + 2 values + }); + + it('Should handle large value lists efficiently with string_split', () => { + const largeValueList = Array.from({ length: 1000 }, (_, i) => `value${i}`); + const result = notInTransform({ + member: 'country', + operator: 'notIn', + values: largeValueList, + memberInfo: { + name: 'country', + sql: 'table.country', + type: 'string', + }, + }); + + // Should use OPERATOR_NOT wrapping subquery + expect(result).toHaveProperty('type', 'OPERATOR_NOT'); + expect(result.children[0]).toHaveProperty('class', 'SUBQUERY'); + + // Verify only 2 VALUE_CONSTANT nodes (joined string + delimiter) + const subquery = (result as any).children[0]; + const selectList = subquery.subquery.node.select_list[0]; + const stringSplitChildren = selectList.children[0].children; + expect(stringSplitChildren.length).toBe(2); + expect(stringSplitChildren[0].value.value).toContain('§§'); + }); + + it('Should use delimiter to join values', () => { + const result = notInTransform({ + member: 'country', + operator: 'notIn', + values: ['US', 'Canada'], + memberInfo: { + name: 'country', + sql: 'table.country', + type: 'string', + }, + }); + + const subquery = (result as any).children[0]; + const selectList = subquery.subquery.node.select_list[0]; + const joinedValue = selectList.children[0].children[0].value.value; + const delimiter = selectList.children[0].children[1].value.value; + + expect(delimiter).toBe('§§'); + expect(joinedValue).toBe('US§§Canada'); }); it('Should return the correct value for string_array member', () => { @@ -47,83 +144,81 @@ describe('Not In transforms Tests', () => { }, }) as ConjunctionExpression; expect(output).toEqual({ - "alias": "", - "children": [ - { - "alias": "", - "catalog": "", - "children": [ - { - "alias": "", - "class": "COLUMN_REF", - "column_names": [ - "country", - ], - "type": "COLUMN_REF", - }, - { - "alias": "", - "children": [ - { - "alias": "", - "class": "CONSTANT", - "type": "VALUE_CONSTANT", - "value": { - "is_null": false, - "type": { - "id": "VARCHAR", - "type_info": null, - }, - "value": "US", + alias: '', + children: [ + { + alias: '', + catalog: '', + children: [ + { + alias: '', + class: 'COLUMN_REF', + column_names: ['country'], + type: 'COLUMN_REF', + }, + { + alias: '', + children: [ + { + alias: '', + class: 'CONSTANT', + type: 'VALUE_CONSTANT', + value: { + is_null: false, + type: { + id: 'VARCHAR', + type_info: null, }, + value: 'US', }, - { - "alias": "", - "class": "CONSTANT", - "type": "VALUE_CONSTANT", - "value": { - "is_null": false, - "type": { - "id": "VARCHAR", - "type_info": null, - }, - "value": "Germany", + }, + { + alias: '', + class: 'CONSTANT', + type: 'VALUE_CONSTANT', + value: { + is_null: false, + type: { + id: 'VARCHAR', + type_info: null, }, + value: 'Germany', }, - { - "alias": "", - "class": "CONSTANT", - "type": "VALUE_CONSTANT", - "value": { - "is_null": false, - "type": { - "id": "VARCHAR", - "type_info": null, - }, - "value": "Israel", + }, + { + alias: '', + class: 'CONSTANT', + type: 'VALUE_CONSTANT', + value: { + is_null: false, + type: { + id: 'VARCHAR', + type_info: null, }, + value: 'Israel', }, - ], - "class": "OPERATOR", - "type": "ARRAY_CONSTRUCTOR", - }, - ], - "class": "FUNCTION", - "distinct": false, - "export_state": false, - "filter": null, - "function_name": "&&", - "is_operator": true, - "order_bys": { - "orders": [], - "type": "ORDER_MODIFIER", + }, + ], + class: 'OPERATOR', + type: 'ARRAY_CONSTRUCTOR', }, - "schema": "", - "type": "FUNCTION", + ], + class: 'FUNCTION', + distinct: false, + export_state: false, + filter: null, + function_name: '&&', + is_operator: true, + order_bys: { + orders: [], + type: 'ORDER_MODIFIER', }, - ], - "class": "OPERATOR", - "type": "OPERATOR_NOT", + schema: '', + type: 'FUNCTION', + }, + ], + class: 'OPERATOR', + type: 'OPERATOR_NOT', }); }); }); diff --git a/meerkat-core/src/cube-filter-transformer/not-in/not-in.ts b/meerkat-core/src/cube-filter-transformer/not-in/not-in.ts index 35f0279b..57285b87 100644 --- a/meerkat-core/src/cube-filter-transformer/not-in/not-in.ts +++ b/meerkat-core/src/cube-filter-transformer/not-in/not-in.ts @@ -71,7 +71,8 @@ const notInDuckDbCondition = ( ], }; } - default: { + case 'string': + case 'number': { /** * Doing the string split optimization here because as the number of nodes in the AST increase, * the time take to parse the AST increases, thereby increasing the time to generate the SQL. @@ -95,39 +96,118 @@ const notInDuckDbCondition = ( modifiers: [], cte_map: { map: [] }, select_list: [ - { - class: ExpressionClass.FUNCTION, - type: ExpressionType.FUNCTION, - alias: '', - function_name: 'unnest', - schema: '', - children: [ - { + // For numeric types, we need to CAST the string result to the appropriate type + memberInfo.type === 'number' + ? { + class: ExpressionClass.CAST, + type: ExpressionType.OPERATOR_CAST, + alias: '', + child: { + class: ExpressionClass.FUNCTION, + type: ExpressionType.FUNCTION, + alias: '', + function_name: 'unnest', + schema: '', + children: [ + { + class: ExpressionClass.FUNCTION, + type: ExpressionType.FUNCTION, + alias: '', + function_name: 'string_split', + schema: '', + children: [ + { + class: ExpressionClass.CONSTANT, + type: ExpressionType.VALUE_CONSTANT, + alias: '', + value: { + type: { id: 'VARCHAR', type_info: null }, + is_null: false, + value: joinedValues, + }, + }, + { + class: ExpressionClass.CONSTANT, + type: ExpressionType.VALUE_CONSTANT, + alias: '', + value: { + type: { id: 'VARCHAR', type_info: null }, + is_null: false, + value: DELIMITER, + }, + }, + ], + filter: null, + order_bys: { + type: ResultModifierType.ORDER_MODIFIER, + orders: [], + }, + distinct: false, + is_operator: false, + export_state: false, + catalog: '', + }, + ], + filter: null, + order_bys: { + type: ResultModifierType.ORDER_MODIFIER, + orders: [], + }, + distinct: false, + is_operator: false, + export_state: false, + catalog: '', + }, + cast_type: { + id: 'DOUBLE', + type_info: null, + }, + try_cast: false, + } + : { class: ExpressionClass.FUNCTION, type: ExpressionType.FUNCTION, alias: '', - function_name: 'string_split', + function_name: 'unnest', schema: '', children: [ { - class: ExpressionClass.CONSTANT, - type: ExpressionType.VALUE_CONSTANT, + class: ExpressionClass.FUNCTION, + type: ExpressionType.FUNCTION, alias: '', - value: { - type: { id: 'VARCHAR', type_info: null }, - is_null: false, - value: joinedValues, - }, - }, - { - class: ExpressionClass.CONSTANT, - type: ExpressionType.VALUE_CONSTANT, - alias: '', - value: { - type: { id: 'VARCHAR', type_info: null }, - is_null: false, - value: DELIMITER, + function_name: 'string_split', + schema: '', + children: [ + { + class: ExpressionClass.CONSTANT, + type: ExpressionType.VALUE_CONSTANT, + alias: '', + value: { + type: { id: 'VARCHAR', type_info: null }, + is_null: false, + value: joinedValues, + }, + }, + { + class: ExpressionClass.CONSTANT, + type: ExpressionType.VALUE_CONSTANT, + alias: '', + value: { + type: { id: 'VARCHAR', type_info: null }, + is_null: false, + value: DELIMITER, + }, + }, + ], + filter: null, + order_bys: { + type: ResultModifierType.ORDER_MODIFIER, + orders: [], }, + distinct: false, + is_operator: false, + export_state: false, + catalog: '', }, ], filter: null, @@ -140,17 +220,6 @@ const notInDuckDbCondition = ( export_state: false, catalog: '', }, - ], - filter: null, - order_bys: { - type: ResultModifierType.ORDER_MODIFIER, - orders: [], - }, - distinct: false, - is_operator: false, - export_state: false, - catalog: '', - }, ], from_table: { type: TableReferenceType.EMPTY, @@ -172,6 +241,24 @@ const notInDuckDbCondition = ( ], }; } + default: { + // For other types, use the standard COMPARE_NOT_IN approach + const sqlTreeValues = values.map((value) => { + return { + class: ExpressionClass.CONSTANT, + type: ExpressionType.VALUE_CONSTANT, + alias: '', + value: valueBuilder(value, memberInfo), + }; + }); + + return { + class: ExpressionClass.OPERATOR, + type: ExpressionType.COMPARE_NOT_IN, + alias: '', + children: [columnRef, ...sqlTreeValues], + }; + } } }; diff --git a/meerkat-node/src/__tests__/test-data.ts b/meerkat-node/src/__tests__/test-data.ts index 725bced1..4f2fd3d6 100644 --- a/meerkat-node/src/__tests__/test-data.ts +++ b/meerkat-node/src/__tests__/test-data.ts @@ -986,6 +986,8 @@ export const TEST_DATA = [ [ { testName: 'In', + // customer_id is string type -> uses optimized string_split + // vendors is string_array type -> uses ARRAY overlap && expectedSQL: `SELECT orders.* FROM (SELECT customer_id AS orders__customer_id, vendors AS orders__vendors, * FROM (select * from orders) AS orders) AS orders WHERE ((orders__customer_id = ANY(SELECT unnest(string_split('1§§2', '§§')))) AND (orders__vendors && (ARRAY['myntra', 'amazon'])))`, cubeInput: { measures: ['*'], @@ -1032,6 +1034,51 @@ export const TEST_DATA = [ }, ], }, + { + testName: 'In with numeric type (optimized with CAST)', + // order_id is number type -> uses optimized string_split with CAST to DOUBLE + expectedSQL: `SELECT orders.* FROM (SELECT order_id AS orders__order_id, * FROM (select * from orders) AS orders) AS orders WHERE (orders__order_id = ANY(SELECT CAST(unnest(string_split('1§§2§§3', '§§')) AS DOUBLE)))`, + cubeInput: { + measures: ['*'], + filters: [ + { + member: 'orders.order_id', + operator: 'in', + values: [1, 2, 3], + }, + ], + dimensions: [], + }, + expectedOutput: [ + { + order_id: 1, + customer_id: '1', + product_id: '1', + order_date: '2022-01-01', + order_amount: 50.0, + orders__order_id: 1, + vendors: ['myntra', 'amazon', 'flipkart'], + }, + { + order_id: 2, + customer_id: '1', + product_id: '2', + order_date: '2022-01-02', + order_amount: 80.0, + orders__order_id: 2, + vendors: ['myntra'], + }, + { + order_id: 3, + customer_id: '2', + product_id: '3', + order_date: '2022-02-01', + order_amount: 25.0, + orders__order_id: 3, + vendors: [], + }, + ], + }, { testName: 'In with single quotes', expectedSQL: `SELECT orders.* FROM (SELECT vendors AS orders__vendors, * FROM (select * from orders) AS orders) AS orders WHERE ((orders__vendors && (ARRAY['swiggy''s'])))`, @@ -1067,6 +1114,8 @@ export const TEST_DATA = [ [ { testName: 'Not In', + // customer_id is string type -> uses optimized string_split with NOT + // vendors is string_array type -> uses NOT with ARRAY overlap && expectedSQL: `SELECT orders.* FROM (SELECT customer_id AS orders__customer_id, vendors AS orders__vendors, * FROM (select * from orders) AS orders) AS orders WHERE ((NOT (orders__customer_id = ANY(SELECT unnest(string_split('1§§2', '§§'))))) AND (NOT (orders__vendors && (ARRAY['myntra', 'flipkart']))))`, cubeInput: { measures: ['*'], @@ -1135,5 +1184,124 @@ export const TEST_DATA = [ }, ], }, + { + testName: 'Not In with numeric type (optimized with CAST)', + // order_id is number type -> uses optimized string_split with CAST and NOT + expectedSQL: `SELECT orders.* FROM (SELECT order_id AS orders__order_id, * FROM (select * from orders) AS orders) AS orders WHERE (NOT (orders__order_id = ANY(SELECT CAST(unnest(string_split('1§§2', '§§')) AS DOUBLE))))`, + cubeInput: { + measures: ['*'], + filters: [ + { + member: 'orders.order_id', + operator: 'notIn', + values: [1, 2], + }, + ], + dimensions: [], + }, + expectedOutput: [ + { + order_id: 3, + customer_id: '2', + product_id: '3', + order_date: '2022-02-01', + order_amount: 25.0, + orders__order_id: 3, + vendors: [], + }, + { + order_id: 4, + customer_id: '2', + product_id: '1', + order_date: '2022-03-01', + order_amount: 75.0, + orders__order_id: 4, + vendors: ['flipkart'], + }, + { + order_id: 5, + customer_id: '3', + product_id: '1', + order_date: '2022-03-02', + order_amount: 100.0, + orders__order_id: 5, + vendors: ['myntra', 'amazon', 'flipkart'], + }, + { + order_id: 6, + customer_id: '4', + product_id: '2', + order_date: '2022-04-01', + order_amount: 45.0, + orders__order_id: 6, + vendors: [], + }, + { + order_id: 7, + customer_id: '4', + product_id: '3', + order_date: '2022-05-01', + order_amount: 90.0, + orders__order_id: 7, + vendors: ['myntra', 'flipkart'], + }, + { + order_id: 8, + customer_id: '5', + product_id: '1', + order_date: '2022-05-02', + order_amount: 65.0, + orders__order_id: 8, + vendors: ['amazon', 'flipkart'], + }, + { + order_id: 9, + customer_id: '5', + product_id: '2', + order_date: '2022-05-05', + order_amount: 85.0, + orders__order_id: 9, + vendors: [], + }, + { + order_id: 10, + customer_id: '6', + product_id: '3', + order_date: '2022-06-01', + order_amount: 120.0, + orders__order_id: 10, + vendors: ['myntra', 'amazon'], + }, + { + order_id: 11, + customer_id: '6aa6', + product_id: '3', + order_date: '2024-06-01', + order_amount: 0.0, + orders__order_id: 11, + vendors: ['amazon'], + }, + { + order_id: 12, + customer_id: null, + product_id: '3', + order_date: '2024-07-01T00:00:00.000Z', + order_amount: 100.0, + orders__order_id: 12, + orders__order_date: undefined, + vendors: ['flipkart'], + }, + { + order_id: 13, + customer_id: '7', + product_id: '6', + order_date: '2024-08-01T00:00:00.000Z', + order_amount: 100.0, + orders__order_id: 13, + orders__order_date: undefined, + vendors: ["swiggy's"], + }, + ], + }, ], ]; From b82aa8f56b41f9b14e699cdfb0aee116e8be38ed Mon Sep 17 00:00:00 2001 From: zaidjan Date: Mon, 10 Nov 2025 18:56:27 +0530 Subject: [PATCH 08/11] common delimiter --- meerkat-core/src/cube-filter-transformer/in/in.ts | 8 +++++--- meerkat-core/src/cube-filter-transformer/not-in/not-in.ts | 8 +++++--- meerkat-core/src/member-formatters/constants.ts | 2 ++ 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/meerkat-core/src/cube-filter-transformer/in/in.ts b/meerkat-core/src/cube-filter-transformer/in/in.ts index 73e65df2..0d83e20b 100644 --- a/meerkat-core/src/cube-filter-transformer/in/in.ts +++ b/meerkat-core/src/cube-filter-transformer/in/in.ts @@ -1,4 +1,7 @@ -import { COLUMN_NAME_DELIMITER } from '../../member-formatters/constants'; +import { + COLUMN_NAME_DELIMITER, + STRING_ARRAY_DELIMITER, +} from '../../member-formatters/constants'; import { Dimension, Measure } from '../../types/cube-types/table'; import { AggregateHandling, @@ -69,8 +72,7 @@ const inDuckDbCondition = ( * Doing the string split optimization here because as the number of nodes in the AST increase, * the time take to parse the AST increases, thereby increasing the time to generate the SQL. */ - const DELIMITER = '§§'; // Section sign - uncommon in normal data - const joinedValues = values.join(DELIMITER); + const joinedValues = values.join(STRING_ARRAY_DELIMITER); return { class: ExpressionClass.SUBQUERY, diff --git a/meerkat-core/src/cube-filter-transformer/not-in/not-in.ts b/meerkat-core/src/cube-filter-transformer/not-in/not-in.ts index 57285b87..76a05592 100644 --- a/meerkat-core/src/cube-filter-transformer/not-in/not-in.ts +++ b/meerkat-core/src/cube-filter-transformer/not-in/not-in.ts @@ -1,7 +1,10 @@ import { Dimension, Measure } from '../../types/cube-types/table'; import { CubeToParseExpressionTransform } from '../factory'; -import { COLUMN_NAME_DELIMITER } from '../../member-formatters/constants'; +import { + COLUMN_NAME_DELIMITER, + STRING_ARRAY_DELIMITER, +} from '../../member-formatters/constants'; import { AggregateHandling, QueryNodeType, @@ -77,8 +80,7 @@ const notInDuckDbCondition = ( * Doing the string split optimization here because as the number of nodes in the AST increase, * the time take to parse the AST increases, thereby increasing the time to generate the SQL. */ - const DELIMITER = '§§'; // Section sign - uncommon in normal data - const joinedValues = values.join(DELIMITER); + const joinedValues = values.join(STRING_ARRAY_DELIMITER); return { class: ExpressionClass.OPERATOR, diff --git a/meerkat-core/src/member-formatters/constants.ts b/meerkat-core/src/member-formatters/constants.ts index 60413962..c32b5762 100644 --- a/meerkat-core/src/member-formatters/constants.ts +++ b/meerkat-core/src/member-formatters/constants.ts @@ -1,2 +1,4 @@ export const COLUMN_NAME_DELIMITER = '.'; export const MEERKAT_OUTPUT_DELIMITER = '__'; + +export const STRING_ARRAY_DELIMITER = '§§'; From 8b3831f3a5bdccf38f2bb3c3033aa6f34d9f56b7 Mon Sep 17 00:00:00 2001 From: zaidjan Date: Mon, 10 Nov 2025 19:09:18 +0530 Subject: [PATCH 09/11] added combined test cases --- .../src/cube-filter-transformer/in/in.ts | 4 +- .../cube-filter-transformer/not-in/not-in.ts | 4 +- meerkat-node/src/__tests__/test-data.ts | 145 ++++++++++++++++++ 3 files changed, 149 insertions(+), 4 deletions(-) diff --git a/meerkat-core/src/cube-filter-transformer/in/in.ts b/meerkat-core/src/cube-filter-transformer/in/in.ts index 0d83e20b..7853dfd0 100644 --- a/meerkat-core/src/cube-filter-transformer/in/in.ts +++ b/meerkat-core/src/cube-filter-transformer/in/in.ts @@ -122,7 +122,7 @@ const inDuckDbCondition = ( value: { type: { id: 'VARCHAR', type_info: null }, is_null: false, - value: DELIMITER, + value: STRING_ARRAY_DELIMITER, }, }, ], @@ -184,7 +184,7 @@ const inDuckDbCondition = ( value: { type: { id: 'VARCHAR', type_info: null }, is_null: false, - value: DELIMITER, + value: STRING_ARRAY_DELIMITER, }, }, ], diff --git a/meerkat-core/src/cube-filter-transformer/not-in/not-in.ts b/meerkat-core/src/cube-filter-transformer/not-in/not-in.ts index 76a05592..e6d6f19f 100644 --- a/meerkat-core/src/cube-filter-transformer/not-in/not-in.ts +++ b/meerkat-core/src/cube-filter-transformer/not-in/not-in.ts @@ -135,7 +135,7 @@ const notInDuckDbCondition = ( value: { type: { id: 'VARCHAR', type_info: null }, is_null: false, - value: DELIMITER, + value: STRING_ARRAY_DELIMITER, }, }, ], @@ -197,7 +197,7 @@ const notInDuckDbCondition = ( value: { type: { id: 'VARCHAR', type_info: null }, is_null: false, - value: DELIMITER, + value: STRING_ARRAY_DELIMITER, }, }, ], diff --git a/meerkat-node/src/__tests__/test-data.ts b/meerkat-node/src/__tests__/test-data.ts index 4f2fd3d6..44909a10 100644 --- a/meerkat-node/src/__tests__/test-data.ts +++ b/meerkat-node/src/__tests__/test-data.ts @@ -1110,6 +1110,73 @@ export const TEST_DATA = [ }, ], }, + { + testName: + 'Multiple In filters combined (customer_id, product_id, order_id)', + // Tests all three optimized filters working together + // customer_id (string), product_id (string), order_id (number with CAST) + expectedSQL: `SELECT orders.* FROM (SELECT customer_id AS orders__customer_id, product_id AS orders__product_id, order_id AS orders__order_id, * FROM (select * from orders) AS orders) AS orders WHERE ((orders__customer_id = ANY(SELECT unnest(string_split('1§§2', '§§')))) AND (orders__product_id = ANY(SELECT unnest(string_split('1§§2', '§§')))) AND (orders__order_id = ANY(SELECT CAST(unnest(string_split('1§§2§§3§§4', '§§')) AS DOUBLE))))`, + cubeInput: { + measures: ['*'], + filters: [ + { + and: [ + { + member: 'orders.customer_id', + operator: 'in', + values: ['1', '2'], + }, + { + member: 'orders.product_id', + operator: 'in', + values: ['1', '2'], + }, + { + member: 'orders.order_id', + operator: 'in', + values: [1, 2, 3, 4], + }, + ], + }, + ], + dimensions: [], + }, + expectedOutput: [ + { + order_id: 4, + customer_id: '2', + product_id: '1', + order_date: '2022-03-01', + order_amount: 75.0, + orders__customer_id: '2', + orders__product_id: '1', + orders__order_id: 4, + vendors: ['flipkart'], + }, + { + order_id: 2, + customer_id: '1', + product_id: '2', + order_date: '2022-01-02', + order_amount: 80.0, + orders__customer_id: '1', + orders__product_id: '2', + orders__order_id: 2, + vendors: ['myntra'], + }, + { + order_id: 1, + customer_id: '1', + product_id: '1', + order_date: '2022-01-01', + order_amount: 50.0, + orders__customer_id: '1', + orders__product_id: '1', + orders__order_id: 1, + vendors: ['myntra', 'amazon', 'flipkart'], + }, + ], + }, ], [ { @@ -1303,5 +1370,83 @@ export const TEST_DATA = [ }, ], }, + { + testName: + 'Multiple NotIn filters combined (customer_id, product_id, order_id)', + // Tests all three optimized NOT IN filters working together + expectedSQL: `SELECT orders.* FROM (SELECT customer_id AS orders__customer_id, product_id AS orders__product_id, order_id AS orders__order_id, * FROM (select * from orders) AS orders) AS orders WHERE ((NOT (orders__customer_id = ANY(SELECT unnest(string_split('1§§2', '§§'))))) AND (NOT (orders__product_id = ANY(SELECT unnest(string_split('1§§2', '§§'))))) AND (NOT (orders__order_id = ANY(SELECT CAST(unnest(string_split('1§§2', '§§')) AS DOUBLE)))))`, + cubeInput: { + measures: ['*'], + filters: [ + { + and: [ + { + member: 'orders.customer_id', + operator: 'notIn', + values: ['1', '2'], + }, + { + member: 'orders.product_id', + operator: 'notIn', + values: ['1', '2'], + }, + { + member: 'orders.order_id', + operator: 'notIn', + values: [1, 2], + }, + ], + }, + ], + dimensions: [], + }, + expectedOutput: [ + { + order_id: 7, + customer_id: '4', + product_id: '3', + order_date: '2022-05-01', + order_amount: 90.0, + orders__customer_id: '4', + orders__product_id: '3', + orders__order_id: 7, + vendors: ['myntra', 'flipkart'], + }, + { + order_id: 10, + customer_id: '6', + product_id: '3', + order_date: '2022-06-01', + order_amount: 120.0, + orders__customer_id: '6', + orders__product_id: '3', + orders__order_id: 10, + vendors: ['myntra', 'amazon'], + }, + { + order_id: 11, + customer_id: '6aa6', + product_id: '3', + order_date: '2024-06-01', + order_amount: 0.0, + orders__customer_id: '6aa6', + orders__product_id: '3', + orders__order_id: 11, + vendors: ['amazon'], + }, + { + order_id: 13, + customer_id: '7', + product_id: '6', + order_date: '2024-08-01T00:00:00.000Z', + order_amount: 100.0, + orders__customer_id: '7', + orders__product_id: '6', + orders__order_id: 13, + orders__order_date: undefined, + vendors: ["swiggy's"], + }, + ], + }, ], ]; From cbccc03e42ed92e963b68a2555b25856e658034f Mon Sep 17 00:00:00 2001 From: zaidjan Date: Mon, 10 Nov 2025 21:38:49 +0530 Subject: [PATCH 10/11] added combined test cases --- .../src/cube-filter-transformer/in/in.spec.ts | 189 +++++++++++------- .../src/cube-filter-transformer/in/in.ts | 13 +- .../not-in/not-in.spec.ts | 46 +++++ .../cube-filter-transformer/not-in/not-in.ts | 13 +- 4 files changed, 187 insertions(+), 74 deletions(-) diff --git a/meerkat-core/src/cube-filter-transformer/in/in.spec.ts b/meerkat-core/src/cube-filter-transformer/in/in.spec.ts index 6220b33c..ace4f749 100644 --- a/meerkat-core/src/cube-filter-transformer/in/in.spec.ts +++ b/meerkat-core/src/cube-filter-transformer/in/in.spec.ts @@ -4,7 +4,7 @@ import { inTransform } from './in'; describe('In transforms Tests', () => { it('Should throw error if values are undefined', () => { expect(() => - inTransform({ + inTransform({ member: 'country', operator: 'contains', memberInfo: { @@ -27,17 +27,17 @@ describe('In transforms Tests', () => { type: 'string', }, }); - + // Check it returns a subquery structure with string_split expect(result).toHaveProperty('class', 'SUBQUERY'); expect(result).toHaveProperty('type', 'SUBQUERY'); expect(result).toHaveProperty('subquery_type', 'ANY'); - + // Verify it's using string_split const selectList = (result as any).subquery.node.select_list[0]; expect(selectList.function_name).toBe('unnest'); expect(selectList.children[0].function_name).toBe('string_split'); - + // Verify no CAST for strings expect(selectList.type).toBe('FUNCTION'); }); @@ -53,12 +53,12 @@ describe('In transforms Tests', () => { type: 'number', }, }); - + // Check it returns a subquery structure expect(result).toHaveProperty('class', 'SUBQUERY'); expect(result).toHaveProperty('type', 'SUBQUERY'); expect(result).toHaveProperty('subquery_type', 'ANY'); - + // Verify it's using string_split with CAST const selectList = (result as any).subquery.node.select_list[0]; expect(selectList.type).toBe('OPERATOR_CAST'); @@ -78,7 +78,7 @@ describe('In transforms Tests', () => { type: 'string_array', }, }) as ConjunctionExpression; - + // For array types, should use && operator with ARRAY_CONSTRUCTOR expect(output.function_name).toBe('&&'); expect(output.children[1].type).toBe('ARRAY_CONSTRUCTOR'); @@ -96,7 +96,7 @@ describe('In transforms Tests', () => { type: 'time' as any, // Unknown type to trigger default case }, }); - + // Default case should use COMPARE_IN expect(output).toHaveProperty('type', 'COMPARE_IN'); expect(output).toHaveProperty('class', 'OPERATOR'); @@ -115,10 +115,10 @@ describe('In transforms Tests', () => { type: 'string', }, }); - + // Should still use subquery approach expect(result).toHaveProperty('class', 'SUBQUERY'); - + // Verify only 2 VALUE_CONSTANT nodes (joined string + delimiter) const selectList = (result as any).subquery.node.select_list[0]; const stringSplitChildren = selectList.children[0].children; @@ -137,11 +137,11 @@ describe('In transforms Tests', () => { type: 'string', }, }); - + const selectList = (result as any).subquery.node.select_list[0]; const joinedValue = selectList.children[0].children[0].value.value; const delimiter = selectList.children[0].children[1].value.value; - + expect(delimiter).toBe('§§'); expect(joinedValue).toBe('US§§Canada'); }); @@ -157,76 +157,121 @@ describe('In transforms Tests', () => { type: 'string_array', }, }) as ConjunctionExpression; - expect(output).toEqual( { - "alias": "", - "catalog": "", - "children": [ - { - "alias": "", - "class": "COLUMN_REF", - "column_names": [ - "country", - ], - "type": "COLUMN_REF", + expect(output).toEqual({ + alias: '', + catalog: '', + children: [ + { + alias: '', + class: 'COLUMN_REF', + column_names: ['country'], + type: 'COLUMN_REF', }, + { + alias: '', + children: [ { - "alias": "", - "children": [ - { - "alias": "", - "class": "CONSTANT", - "type": "VALUE_CONSTANT", - "value": { - "is_null": false, - "type": { - "id": "VARCHAR", - "type_info": null, - }, - "value": "US", + alias: '', + class: 'CONSTANT', + type: 'VALUE_CONSTANT', + value: { + is_null: false, + type: { + id: 'VARCHAR', + type_info: null, }, + value: 'US', + }, }, { - "alias": "", - "class": "CONSTANT", - "type": "VALUE_CONSTANT", - "value": { - "is_null": false, - "type": { - "id": "VARCHAR", - "type_info": null, - }, - "value": "Germany", + alias: '', + class: 'CONSTANT', + type: 'VALUE_CONSTANT', + value: { + is_null: false, + type: { + id: 'VARCHAR', + type_info: null, }, + value: 'Germany', + }, }, { - "alias": "", - "class": "CONSTANT", - "type": "VALUE_CONSTANT", - "value": { - "is_null": false, - "type": { - "id": "VARCHAR", - "type_info": null, - }, - "value": "Israel", + alias: '', + class: 'CONSTANT', + type: 'VALUE_CONSTANT', + value: { + is_null: false, + type: { + id: 'VARCHAR', + type_info: null, }, - }], - "class": "OPERATOR", - "type": "ARRAY_CONSTRUCTOR", - }, - ], - "class": "FUNCTION", - "distinct": false, - "export_state": false, - "filter": null, - "function_name": "&&", - "is_operator": true, - "order_bys": { - "orders": [], - "type": "ORDER_MODIFIER", + value: 'Israel', + }, + }, + ], + class: 'OPERATOR', + type: 'ARRAY_CONSTRUCTOR', }, - "schema": "", - "type": "FUNCTION", + ], + class: 'FUNCTION', + distinct: false, + export_state: false, + filter: null, + function_name: '&&', + is_operator: true, + order_bys: { + orders: [], + type: 'ORDER_MODIFIER', + }, + schema: '', + type: 'FUNCTION', }); }); + + it('Should throw error if values array is empty', () => { + expect(() => + inTransform({ + member: 'country', + operator: 'in', + values: [], + memberInfo: { + name: 'country', + sql: 'table.country', + type: 'string', + }, + }) + ).toThrow('In filter must have at least one value'); + }); + + it('Should throw error if values contain the reserved delimiter', () => { + expect(() => + inTransform({ + member: 'country', + operator: 'in', + values: ['US', 'has§§delimiter', 'Mexico'], + memberInfo: { + name: 'country', + sql: 'table.country', + type: 'string', + }, + }) + ).toThrow("Filter values cannot contain the reserved delimiter '§§'"); + }); + + it('Should throw error if numeric values contain the reserved delimiter', () => { + // This could happen if someone passes a string representation of a number + expect(() => + inTransform({ + member: 'order_id', + operator: 'in', + values: ['123§§456', '789'], + memberInfo: { + name: 'order_id', + sql: 'table.order_id', + type: 'number', + }, + }) + ).toThrow("Filter values cannot contain the reserved delimiter '§§'"); + }); }); diff --git a/meerkat-core/src/cube-filter-transformer/in/in.ts b/meerkat-core/src/cube-filter-transformer/in/in.ts index 7853dfd0..81001fb0 100644 --- a/meerkat-core/src/cube-filter-transformer/in/in.ts +++ b/meerkat-core/src/cube-filter-transformer/in/in.ts @@ -72,6 +72,17 @@ const inDuckDbCondition = ( * Doing the string split optimization here because as the number of nodes in the AST increase, * the time take to parse the AST increases, thereby increasing the time to generate the SQL. */ + + // Validate that no values contain the delimiter to prevent incorrect splitting + const hasDelimiter = values.some((v) => + String(v).includes(STRING_ARRAY_DELIMITER) + ); + if (hasDelimiter) { + throw new Error( + `Filter values cannot contain the reserved delimiter '${STRING_ARRAY_DELIMITER}'` + ); + } + const joinedValues = values.join(STRING_ARRAY_DELIMITER); return { @@ -251,7 +262,7 @@ const inDuckDbCondition = ( export const inTransform: CubeToParseExpressionTransform = (query) => { const { member, values, memberInfo } = query; - if (!values) { + if (!values || values.length === 0) { throw new Error('In filter must have at least one value'); } return inDuckDbCondition(member, values, memberInfo); diff --git a/meerkat-core/src/cube-filter-transformer/not-in/not-in.spec.ts b/meerkat-core/src/cube-filter-transformer/not-in/not-in.spec.ts index c753d0c7..6735c9c9 100644 --- a/meerkat-core/src/cube-filter-transformer/not-in/not-in.spec.ts +++ b/meerkat-core/src/cube-filter-transformer/not-in/not-in.spec.ts @@ -221,4 +221,50 @@ describe('Not In transforms Tests', () => { type: 'OPERATOR_NOT', }); }); + + it('Should throw error if values array is empty', () => { + expect(() => + notInTransform({ + member: 'country', + operator: 'notIn', + values: [], + memberInfo: { + name: 'country', + sql: 'table.country', + type: 'string', + }, + }) + ).toThrow('Not in filter must have at least one value'); + }); + + it('Should throw error if values contain the reserved delimiter', () => { + expect(() => + notInTransform({ + member: 'country', + operator: 'notIn', + values: ['US', 'has§§delimiter', 'Mexico'], + memberInfo: { + name: 'country', + sql: 'table.country', + type: 'string', + }, + }) + ).toThrow("Filter values cannot contain the reserved delimiter '§§'"); + }); + + it('Should throw error if numeric values contain the reserved delimiter', () => { + // This could happen if someone passes a string representation of a number + expect(() => + notInTransform({ + member: 'order_id', + operator: 'notIn', + values: ['123§§456', '789'], + memberInfo: { + name: 'order_id', + sql: 'table.order_id', + type: 'number', + }, + }) + ).toThrow("Filter values cannot contain the reserved delimiter '§§'"); + }); }); diff --git a/meerkat-core/src/cube-filter-transformer/not-in/not-in.ts b/meerkat-core/src/cube-filter-transformer/not-in/not-in.ts index e6d6f19f..ae58df59 100644 --- a/meerkat-core/src/cube-filter-transformer/not-in/not-in.ts +++ b/meerkat-core/src/cube-filter-transformer/not-in/not-in.ts @@ -80,6 +80,17 @@ const notInDuckDbCondition = ( * Doing the string split optimization here because as the number of nodes in the AST increase, * the time take to parse the AST increases, thereby increasing the time to generate the SQL. */ + + // Validate that no values contain the delimiter to prevent incorrect splitting + const hasDelimiter = values.some((v) => + String(v).includes(STRING_ARRAY_DELIMITER) + ); + if (hasDelimiter) { + throw new Error( + `Filter values cannot contain the reserved delimiter '${STRING_ARRAY_DELIMITER}'` + ); + } + const joinedValues = values.join(STRING_ARRAY_DELIMITER); return { @@ -266,7 +277,7 @@ const notInDuckDbCondition = ( export const notInTransform: CubeToParseExpressionTransform = (query) => { const { member, values, memberInfo } = query; - if (!values) { + if (!values || values.length === 0) { throw new Error('Not in filter must have at least one value'); } From b2fe578b075182c12d405fee95a17f127b75c0ef Mon Sep 17 00:00:00 2001 From: zaidjan Date: Mon, 10 Nov 2025 22:01:45 +0530 Subject: [PATCH 11/11] elongate the delimiter --- .../src/cube-filter-transformer/in/in.spec.ts | 37 ++----------------- .../src/cube-filter-transformer/in/in.ts | 11 ------ .../not-in/not-in.spec.ts | 37 ++----------------- .../cube-filter-transformer/not-in/not-in.ts | 11 ------ .../src/member-formatters/constants.ts | 4 +- meerkat-node/src/__tests__/test-data.ts | 12 +++--- 6 files changed, 15 insertions(+), 97 deletions(-) diff --git a/meerkat-core/src/cube-filter-transformer/in/in.spec.ts b/meerkat-core/src/cube-filter-transformer/in/in.spec.ts index ace4f749..207b33e8 100644 --- a/meerkat-core/src/cube-filter-transformer/in/in.spec.ts +++ b/meerkat-core/src/cube-filter-transformer/in/in.spec.ts @@ -123,7 +123,7 @@ describe('In transforms Tests', () => { const selectList = (result as any).subquery.node.select_list[0]; const stringSplitChildren = selectList.children[0].children; expect(stringSplitChildren.length).toBe(2); - expect(stringSplitChildren[0].value.value).toContain('§§'); // Contains delimiter + expect(stringSplitChildren[0].value.value).toContain('§‡¶'); // Contains delimiter }); it('Should use delimiter to join values', () => { @@ -142,8 +142,8 @@ describe('In transforms Tests', () => { const joinedValue = selectList.children[0].children[0].value.value; const delimiter = selectList.children[0].children[1].value.value; - expect(delimiter).toBe('§§'); - expect(joinedValue).toBe('US§§Canada'); + expect(delimiter).toBe('§‡¶'); + expect(joinedValue).toBe('US§‡¶Canada'); }); it('Should handle the original test case structure for reference', () => { @@ -243,35 +243,4 @@ describe('In transforms Tests', () => { }) ).toThrow('In filter must have at least one value'); }); - - it('Should throw error if values contain the reserved delimiter', () => { - expect(() => - inTransform({ - member: 'country', - operator: 'in', - values: ['US', 'has§§delimiter', 'Mexico'], - memberInfo: { - name: 'country', - sql: 'table.country', - type: 'string', - }, - }) - ).toThrow("Filter values cannot contain the reserved delimiter '§§'"); - }); - - it('Should throw error if numeric values contain the reserved delimiter', () => { - // This could happen if someone passes a string representation of a number - expect(() => - inTransform({ - member: 'order_id', - operator: 'in', - values: ['123§§456', '789'], - memberInfo: { - name: 'order_id', - sql: 'table.order_id', - type: 'number', - }, - }) - ).toThrow("Filter values cannot contain the reserved delimiter '§§'"); - }); }); diff --git a/meerkat-core/src/cube-filter-transformer/in/in.ts b/meerkat-core/src/cube-filter-transformer/in/in.ts index 81001fb0..f075a88e 100644 --- a/meerkat-core/src/cube-filter-transformer/in/in.ts +++ b/meerkat-core/src/cube-filter-transformer/in/in.ts @@ -72,17 +72,6 @@ const inDuckDbCondition = ( * Doing the string split optimization here because as the number of nodes in the AST increase, * the time take to parse the AST increases, thereby increasing the time to generate the SQL. */ - - // Validate that no values contain the delimiter to prevent incorrect splitting - const hasDelimiter = values.some((v) => - String(v).includes(STRING_ARRAY_DELIMITER) - ); - if (hasDelimiter) { - throw new Error( - `Filter values cannot contain the reserved delimiter '${STRING_ARRAY_DELIMITER}'` - ); - } - const joinedValues = values.join(STRING_ARRAY_DELIMITER); return { diff --git a/meerkat-core/src/cube-filter-transformer/not-in/not-in.spec.ts b/meerkat-core/src/cube-filter-transformer/not-in/not-in.spec.ts index 6735c9c9..1641d668 100644 --- a/meerkat-core/src/cube-filter-transformer/not-in/not-in.spec.ts +++ b/meerkat-core/src/cube-filter-transformer/not-in/not-in.spec.ts @@ -108,7 +108,7 @@ describe('Not In transforms Tests', () => { const selectList = subquery.subquery.node.select_list[0]; const stringSplitChildren = selectList.children[0].children; expect(stringSplitChildren.length).toBe(2); - expect(stringSplitChildren[0].value.value).toContain('§§'); + expect(stringSplitChildren[0].value.value).toContain('§‡¶'); }); it('Should use delimiter to join values', () => { @@ -128,8 +128,8 @@ describe('Not In transforms Tests', () => { const joinedValue = selectList.children[0].children[0].value.value; const delimiter = selectList.children[0].children[1].value.value; - expect(delimiter).toBe('§§'); - expect(joinedValue).toBe('US§§Canada'); + expect(delimiter).toBe('§‡¶'); + expect(joinedValue).toBe('US§‡¶Canada'); }); it('Should return the correct value for string_array member', () => { @@ -236,35 +236,4 @@ describe('Not In transforms Tests', () => { }) ).toThrow('Not in filter must have at least one value'); }); - - it('Should throw error if values contain the reserved delimiter', () => { - expect(() => - notInTransform({ - member: 'country', - operator: 'notIn', - values: ['US', 'has§§delimiter', 'Mexico'], - memberInfo: { - name: 'country', - sql: 'table.country', - type: 'string', - }, - }) - ).toThrow("Filter values cannot contain the reserved delimiter '§§'"); - }); - - it('Should throw error if numeric values contain the reserved delimiter', () => { - // This could happen if someone passes a string representation of a number - expect(() => - notInTransform({ - member: 'order_id', - operator: 'notIn', - values: ['123§§456', '789'], - memberInfo: { - name: 'order_id', - sql: 'table.order_id', - type: 'number', - }, - }) - ).toThrow("Filter values cannot contain the reserved delimiter '§§'"); - }); }); diff --git a/meerkat-core/src/cube-filter-transformer/not-in/not-in.ts b/meerkat-core/src/cube-filter-transformer/not-in/not-in.ts index ae58df59..741dab2e 100644 --- a/meerkat-core/src/cube-filter-transformer/not-in/not-in.ts +++ b/meerkat-core/src/cube-filter-transformer/not-in/not-in.ts @@ -80,17 +80,6 @@ const notInDuckDbCondition = ( * Doing the string split optimization here because as the number of nodes in the AST increase, * the time take to parse the AST increases, thereby increasing the time to generate the SQL. */ - - // Validate that no values contain the delimiter to prevent incorrect splitting - const hasDelimiter = values.some((v) => - String(v).includes(STRING_ARRAY_DELIMITER) - ); - if (hasDelimiter) { - throw new Error( - `Filter values cannot contain the reserved delimiter '${STRING_ARRAY_DELIMITER}'` - ); - } - const joinedValues = values.join(STRING_ARRAY_DELIMITER); return { diff --git a/meerkat-core/src/member-formatters/constants.ts b/meerkat-core/src/member-formatters/constants.ts index c32b5762..cfed3d46 100644 --- a/meerkat-core/src/member-formatters/constants.ts +++ b/meerkat-core/src/member-formatters/constants.ts @@ -1,4 +1,6 @@ export const COLUMN_NAME_DELIMITER = '.'; export const MEERKAT_OUTPUT_DELIMITER = '__'; -export const STRING_ARRAY_DELIMITER = '§§'; +// Multi-character delimiter using three different uncommon characters +// to minimize the chance of collision with real data +export const STRING_ARRAY_DELIMITER = '§‡¶'; diff --git a/meerkat-node/src/__tests__/test-data.ts b/meerkat-node/src/__tests__/test-data.ts index 44909a10..b483b5fa 100644 --- a/meerkat-node/src/__tests__/test-data.ts +++ b/meerkat-node/src/__tests__/test-data.ts @@ -988,7 +988,7 @@ export const TEST_DATA = [ testName: 'In', // customer_id is string type -> uses optimized string_split // vendors is string_array type -> uses ARRAY overlap && - expectedSQL: `SELECT orders.* FROM (SELECT customer_id AS orders__customer_id, vendors AS orders__vendors, * FROM (select * from orders) AS orders) AS orders WHERE ((orders__customer_id = ANY(SELECT unnest(string_split('1§§2', '§§')))) AND (orders__vendors && (ARRAY['myntra', 'amazon'])))`, + expectedSQL: `SELECT orders.* FROM (SELECT customer_id AS orders__customer_id, vendors AS orders__vendors, * FROM (select * from orders) AS orders) AS orders WHERE ((orders__customer_id = ANY(SELECT unnest(string_split('1§‡¶2', '§‡¶')))) AND (orders__vendors && (ARRAY['myntra', 'amazon'])))`, cubeInput: { measures: ['*'], filters: [ @@ -1037,7 +1037,7 @@ export const TEST_DATA = [ { testName: 'In with numeric type (optimized with CAST)', // order_id is number type -> uses optimized string_split with CAST to DOUBLE - expectedSQL: `SELECT orders.* FROM (SELECT order_id AS orders__order_id, * FROM (select * from orders) AS orders) AS orders WHERE (orders__order_id = ANY(SELECT CAST(unnest(string_split('1§§2§§3', '§§')) AS DOUBLE)))`, + expectedSQL: `SELECT orders.* FROM (SELECT order_id AS orders__order_id, * FROM (select * from orders) AS orders) AS orders WHERE (orders__order_id = ANY(SELECT CAST(unnest(string_split('1§‡¶2§‡¶3', '§‡¶')) AS DOUBLE)))`, cubeInput: { measures: ['*'], filters: [ @@ -1115,7 +1115,7 @@ export const TEST_DATA = [ 'Multiple In filters combined (customer_id, product_id, order_id)', // Tests all three optimized filters working together // customer_id (string), product_id (string), order_id (number with CAST) - expectedSQL: `SELECT orders.* FROM (SELECT customer_id AS orders__customer_id, product_id AS orders__product_id, order_id AS orders__order_id, * FROM (select * from orders) AS orders) AS orders WHERE ((orders__customer_id = ANY(SELECT unnest(string_split('1§§2', '§§')))) AND (orders__product_id = ANY(SELECT unnest(string_split('1§§2', '§§')))) AND (orders__order_id = ANY(SELECT CAST(unnest(string_split('1§§2§§3§§4', '§§')) AS DOUBLE))))`, + expectedSQL: `SELECT orders.* FROM (SELECT customer_id AS orders__customer_id, product_id AS orders__product_id, order_id AS orders__order_id, * FROM (select * from orders) AS orders) AS orders WHERE ((orders__customer_id = ANY(SELECT unnest(string_split('1§‡¶2', '§‡¶')))) AND (orders__product_id = ANY(SELECT unnest(string_split('1§‡¶2', '§‡¶')))) AND (orders__order_id = ANY(SELECT CAST(unnest(string_split('1§‡¶2§‡¶3§‡¶4', '§‡¶')) AS DOUBLE))))`, cubeInput: { measures: ['*'], filters: [ @@ -1183,7 +1183,7 @@ export const TEST_DATA = [ testName: 'Not In', // customer_id is string type -> uses optimized string_split with NOT // vendors is string_array type -> uses NOT with ARRAY overlap && - expectedSQL: `SELECT orders.* FROM (SELECT customer_id AS orders__customer_id, vendors AS orders__vendors, * FROM (select * from orders) AS orders) AS orders WHERE ((NOT (orders__customer_id = ANY(SELECT unnest(string_split('1§§2', '§§'))))) AND (NOT (orders__vendors && (ARRAY['myntra', 'flipkart']))))`, + expectedSQL: `SELECT orders.* FROM (SELECT customer_id AS orders__customer_id, vendors AS orders__vendors, * FROM (select * from orders) AS orders) AS orders WHERE ((NOT (orders__customer_id = ANY(SELECT unnest(string_split('1§‡¶2', '§‡¶'))))) AND (NOT (orders__vendors && (ARRAY['myntra', 'flipkart']))))`, cubeInput: { measures: ['*'], filters: [ @@ -1254,7 +1254,7 @@ export const TEST_DATA = [ { testName: 'Not In with numeric type (optimized with CAST)', // order_id is number type -> uses optimized string_split with CAST and NOT - expectedSQL: `SELECT orders.* FROM (SELECT order_id AS orders__order_id, * FROM (select * from orders) AS orders) AS orders WHERE (NOT (orders__order_id = ANY(SELECT CAST(unnest(string_split('1§§2', '§§')) AS DOUBLE))))`, + expectedSQL: `SELECT orders.* FROM (SELECT order_id AS orders__order_id, * FROM (select * from orders) AS orders) AS orders WHERE (NOT (orders__order_id = ANY(SELECT CAST(unnest(string_split('1§‡¶2', '§‡¶')) AS DOUBLE))))`, cubeInput: { measures: ['*'], filters: [ @@ -1374,7 +1374,7 @@ export const TEST_DATA = [ testName: 'Multiple NotIn filters combined (customer_id, product_id, order_id)', // Tests all three optimized NOT IN filters working together - expectedSQL: `SELECT orders.* FROM (SELECT customer_id AS orders__customer_id, product_id AS orders__product_id, order_id AS orders__order_id, * FROM (select * from orders) AS orders) AS orders WHERE ((NOT (orders__customer_id = ANY(SELECT unnest(string_split('1§§2', '§§'))))) AND (NOT (orders__product_id = ANY(SELECT unnest(string_split('1§§2', '§§'))))) AND (NOT (orders__order_id = ANY(SELECT CAST(unnest(string_split('1§§2', '§§')) AS DOUBLE)))))`, + expectedSQL: `SELECT orders.* FROM (SELECT customer_id AS orders__customer_id, product_id AS orders__product_id, order_id AS orders__order_id, * FROM (select * from orders) AS orders) AS orders WHERE ((NOT (orders__customer_id = ANY(SELECT unnest(string_split('1§‡¶2', '§‡¶'))))) AND (NOT (orders__product_id = ANY(SELECT unnest(string_split('1§‡¶2', '§‡¶'))))) AND (NOT (orders__order_id = ANY(SELECT CAST(unnest(string_split('1§‡¶2', '§‡¶')) AS DOUBLE)))))`, cubeInput: { measures: ['*'], filters: [