diff --git a/meerkat-browser/package.json b/meerkat-browser/package.json index 24bf7dec..d158b1d6 100644 --- a/meerkat-browser/package.json +++ b/meerkat-browser/package.json @@ -1,6 +1,6 @@ { "name": "@devrev/meerkat-browser", - "version": "0.0.105", + "version": "0.0.106", "dependencies": { "tslib": "^2.3.0", "@devrev/meerkat-core": "*", diff --git a/meerkat-browser/src/browser-cube-to-sql-with-resolution/browser-cube-to-sql-with-resolution.ts b/meerkat-browser/src/browser-cube-to-sql-with-resolution/browser-cube-to-sql-with-resolution.ts index c06d8a75..33e02d6e 100644 --- a/meerkat-browser/src/browser-cube-to-sql-with-resolution/browser-cube-to-sql-with-resolution.ts +++ b/meerkat-browser/src/browser-cube-to-sql-with-resolution/browser-cube-to-sql-with-resolution.ts @@ -1,18 +1,21 @@ import { + BASE_DATA_SOURCE_NAME, ContextParams, + getAggregatedSql as coreGetAggregatedSql, + getResolvedTableSchema as coreGetResolvedTableSchema, + getUnnestTableSchema as coreGetUnnestTableSchema, createBaseTableSchema, - generateResolutionJoinPaths, - generateResolutionSchemas, - generateResolvedDimensions, + Dimension, + generateRowNumberSql, + memberKeyToSafeKey, Query, ResolutionConfig, + ROW_ID_DIMENSION_NAME, + shouldSkipResolution, TableSchema, } from '@devrev/meerkat-core'; import { AsyncDuckDBConnection } from '@duckdb/duckdb-wasm'; -import { - cubeQueryToSQL, - CubeQueryToSQLParams, -} from '../browser-cube-to-sql/browser-cube-to-sql'; +import { cubeQueryToSQL } from '../browser-cube-to-sql/browser-cube-to-sql'; export interface CubeQueryToSQLWithResolutionParams { connection: AsyncDuckDBConnection; @@ -38,13 +41,23 @@ export const cubeQueryToSQLWithResolution = async ({ contextParams, }); - if (resolutionConfig.columnConfigs.length === 0) { - // If no resolution is needed, return the base SQL. + // Check if resolution should be skipped + if (shouldSkipResolution(resolutionConfig, query, columnProjections)) { return baseSql; } - // Create a table schema for the base query. - const baseTable: TableSchema = createBaseTableSchema( + if (!columnProjections) { + columnProjections = [...(query.dimensions || []), ...query.measures]; + } + // This is to ensure that, only the column projection columns + // are being resolved and other definitions are ignored. + resolutionConfig.columnConfigs = resolutionConfig.columnConfigs.filter( + (config) => { + return columnProjections?.includes(config.name); + } + ); + + const baseSchema: TableSchema = createBaseTableSchema( baseSql, tableSchemas, resolutionConfig, @@ -52,25 +65,48 @@ export const cubeQueryToSQLWithResolution = async ({ query.dimensions ); - const resolutionSchemas: TableSchema[] = generateResolutionSchemas( + const rowIdDimension: Dimension = { + name: ROW_ID_DIMENSION_NAME, + sql: generateRowNumberSql( + query, + baseSchema.dimensions, + BASE_DATA_SOURCE_NAME + ), + type: 'number', + alias: ROW_ID_DIMENSION_NAME, + }; + baseSchema.dimensions.push(rowIdDimension); + columnProjections.push(ROW_ID_DIMENSION_NAME); + + // Doing this because we need to use the original name of the column in the base table schema. + resolutionConfig.columnConfigs.forEach((config) => { + config.name = memberKeyToSafeKey(config.name); + }); + + // Generate SQL with row_id and unnested arrays + const unnestTableSchema = await coreGetUnnestTableSchema({ + baseTableSchema: baseSchema, resolutionConfig, - tableSchemas - ); + contextParams, + cubeQueryToSQL: async (params) => cubeQueryToSQL({ connection, ...params }), + }); - const resolveParams: CubeQueryToSQLParams = { - connection: connection, - query: { - measures: [], - dimensions: generateResolvedDimensions( - query, - resolutionConfig, - columnProjections - ), - joinPaths: generateResolutionJoinPaths(resolutionConfig, tableSchemas), - }, - tableSchemas: [baseTable, ...resolutionSchemas], - }; - const sql = await cubeQueryToSQL(resolveParams); + // Apply resolution (join with lookup tables) + const resolvedTableSchema = await coreGetResolvedTableSchema({ + baseTableSchema: unnestTableSchema, + resolutionConfig, + contextParams, + columnProjections, + cubeQueryToSQL: async (params) => cubeQueryToSQL({ connection, ...params }), + }); + + // Re-aggregate to reverse the unnest + const aggregatedSql = await coreGetAggregatedSql({ + resolvedTableSchema, + resolutionConfig, + contextParams, + cubeQueryToSQL: async (params) => cubeQueryToSQL({ connection, ...params }), + }); - return sql; + return aggregatedSql; }; diff --git a/meerkat-core/package.json b/meerkat-core/package.json index 74ef3cda..2b047d9f 100644 --- a/meerkat-core/package.json +++ b/meerkat-core/package.json @@ -1,6 +1,6 @@ { "name": "@devrev/meerkat-core", - "version": "0.0.105", + "version": "0.0.106", "dependencies": { "tslib": "^2.3.0" }, diff --git a/meerkat-core/src/constants/exports/index.ts b/meerkat-core/src/constants/exports/index.ts new file mode 100644 index 00000000..c6a7041c --- /dev/null +++ b/meerkat-core/src/constants/exports/index.ts @@ -0,0 +1 @@ +export const ROW_ID_DIMENSION_NAME = '__row_id'; diff --git a/meerkat-core/src/get-wrapped-base-query-with-projections/__tests__/sql-expression-modifier.spec.ts b/meerkat-core/src/get-wrapped-base-query-with-projections/__tests__/sql-expression-modifier.spec.ts index 945dd43e..9bc4b687 100644 --- a/meerkat-core/src/get-wrapped-base-query-with-projections/__tests__/sql-expression-modifier.spec.ts +++ b/meerkat-core/src/get-wrapped-base-query-with-projections/__tests__/sql-expression-modifier.spec.ts @@ -1,56 +1,66 @@ import { Dimension } from '../../types/cube-types/table'; import { isArrayTypeMember } from '../../utils/is-array-member-type'; -import { arrayFieldUnNestModifier, DimensionModifier, getModifiedSqlExpression, MODIFIERS, shouldUnnest } from "../sql-expression-modifiers"; +import { + arrayFieldUnNestModifier, + shouldUnnest, +} from '../modifiers/array-unnest-modifier'; +import { + getModifiedSqlExpression, + MODIFIERS, +} from '../sql-expression-modifiers'; +import { DimensionModifier } from '../types'; -jest.mock("../../utils/is-array-member-type", () => { +jest.mock('../../utils/is-array-member-type', () => { return { - isArrayTypeMember: jest.fn() - } + isArrayTypeMember: jest.fn(), + }; }); const QUERY = { - measures: ["test_measure"], - dimensions: ["test_dimension"] -} + measures: ['test_measure'], + dimensions: ['test_dimension'], +}; -describe("Dimension Modifier", () => { - describe("arrayFieldUnNestModifier", () => { - it("should return the correct unnested SQL expression", () => { +describe('Dimension Modifier', () => { + describe('arrayFieldUnNestModifier', () => { + it('should return the correct unnested SQL expression', () => { const modifier: DimensionModifier = { - sqlExpression: "some_array_field", + sqlExpression: 'some_array_field', dimension: {} as Dimension, - key: "test_key", - query: QUERY + key: 'test_key', + query: QUERY, }; - expect(arrayFieldUnNestModifier(modifier)).toBe("array[unnest(some_array_field)]"); + expect(arrayFieldUnNestModifier(modifier)).toBe( + 'array[unnest(some_array_field)]' + ); }); }); - describe("shouldUnnest", () => { - it("should return true when dimension is array type and has shouldUnnestGroupBy modifier", () => { + describe('shouldUnnest', () => { + it('should return true when dimension is array type and has shouldUnnestGroupBy modifier', () => { (isArrayTypeMember as jest.Mock).mockReturnValue(true); const modifier: DimensionModifier = { - sqlExpression: "some_expression", - dimension: { - type: "array", - modifier: { shouldUnnestGroupBy: true } + sqlExpression: 'some_expression', + dimension: { + type: 'array', + modifier: { shouldUnnestGroupBy: true }, } as Dimension, - key: "test_key", - query: QUERY + key: 'test_key', + query: QUERY, }; expect(shouldUnnest(modifier)).toBe(true); }); - it("should return false when dimension is not array type", () => { + it('should return false when dimension is not array type', () => { (isArrayTypeMember as jest.Mock).mockReturnValue(false); const modifier: DimensionModifier = { - sqlExpression: "some_expression", - dimension: { - type: "string", - modifier: { shouldUnnestGroupBy: true } + sqlExpression: 'some_expression', + dimension: { + type: 'string', + modifier: { shouldUnnestGroupBy: true }, } as Dimension, - key: "test_key", - query: QUERY + key: 'test_key', + query: QUERY, }; expect(shouldUnnest(modifier)).toBe(false); }); @@ -58,73 +68,75 @@ describe("Dimension Modifier", () => { it("should return false when dimension doesn't have shouldUnnestGroupBy modifier", () => { (isArrayTypeMember as jest.Mock).mockReturnValue(true); const modifier: DimensionModifier = { - sqlExpression: "some_expression", - dimension: { - type: "array", - modifier: {} + sqlExpression: 'some_expression', + dimension: { + type: 'array', + modifier: {}, + } as Dimension, + key: 'test_key', + query: QUERY, + }; + expect(shouldUnnest(modifier)).toBe(false); + }); + it('should return false when dimension when modifier undefined', () => { + (isArrayTypeMember as jest.Mock).mockReturnValue(true); + const modifier: DimensionModifier = { + sqlExpression: 'some_expression', + dimension: { + type: 'array', } as Dimension, - key: "test_key", - query: QUERY + key: 'test_key', + query: QUERY, }; expect(shouldUnnest(modifier)).toBe(false); }); - it("should return false when dimension when modifier undefined", () => { - (isArrayTypeMember as jest.Mock).mockReturnValue(true); - const modifier: DimensionModifier = { - sqlExpression: "some_expression", - dimension: { - type: "array", - } as Dimension, - key: "test_key", - query: QUERY - }; - expect(shouldUnnest(modifier)).toBe(false); - }); }); - describe("getModifiedSqlExpression", () => { - it("should not modify if no modifiers passed", () => { + describe('getModifiedSqlExpression', () => { + it('should not modify if no modifiers passed', () => { (isArrayTypeMember as jest.Mock).mockReturnValue(true); const input = { - sqlExpression: "array_field", + sqlExpression: 'array_field', dimension: { - type: "array", - modifier: { shouldUnnestGroupBy: true } + type: 'array', + modifier: { shouldUnnestGroupBy: true }, } as Dimension, query: QUERY, - key: "test_key", - modifiers: [] + key: 'test_key', + modifiers: [], }; - expect(getModifiedSqlExpression(input)).toBe("array_field"); + expect(getModifiedSqlExpression(input)).toBe('array_field'); }); - it("should apply the modifier when conditions are met", () => { + it('should apply the modifier when conditions are met', () => { (isArrayTypeMember as jest.Mock).mockReturnValue(true); const input = { - sqlExpression: "array_field", + sqlExpression: 'array_field', dimension: { - type: "array", - modifier: { shouldUnnestGroupBy: true } + type: 'array', + modifier: { shouldUnnestGroupBy: true }, } as Dimension, query: QUERY, - key: "test_key", - modifiers: MODIFIERS + key: 'test_key', + modifiers: MODIFIERS, }; - expect(getModifiedSqlExpression(input)).toBe("array[unnest(array_field)]"); + expect(getModifiedSqlExpression(input)).toBe( + 'array[unnest(array_field)]' + ); }); - it("should not apply the modifier when conditions are not met", () => { + it('should not apply the modifier when conditions are not met', () => { (isArrayTypeMember as jest.Mock).mockReturnValue(false); const input = { - sqlExpression: "non_array_field", + sqlExpression: 'non_array_field', dimension: { - type: "string", - modifier: {} + type: 'string', + modifier: {}, } as Dimension, query: QUERY, - key: "test_key", - modifiers: MODIFIERS + key: 'test_key', + modifiers: MODIFIERS, }; - expect(getModifiedSqlExpression(input)).toBe("non_array_field"); + expect(getModifiedSqlExpression(input)).toBe('non_array_field'); }); }); -}); \ No newline at end of file +}); diff --git a/meerkat-core/src/get-wrapped-base-query-with-projections/get-aliased-columns-from-filters.ts b/meerkat-core/src/get-wrapped-base-query-with-projections/get-aliased-columns-from-filters.ts index 2ada853e..8c21347c 100644 --- a/meerkat-core/src/get-wrapped-base-query-with-projections/get-aliased-columns-from-filters.ts +++ b/meerkat-core/src/get-wrapped-base-query-with-projections/get-aliased-columns-from-filters.ts @@ -5,7 +5,8 @@ import { findInDimensionSchema, findInMeasureSchema, } from '../utils/find-in-table-schema'; -import { getModifiedSqlExpression, Modifier } from './sql-expression-modifiers'; +import { getModifiedSqlExpression } from './sql-expression-modifiers'; +import { Modifier } from './types'; export const getDimensionProjection = ({ key, diff --git a/meerkat-core/src/get-wrapped-base-query-with-projections/index.ts b/meerkat-core/src/get-wrapped-base-query-with-projections/index.ts new file mode 100644 index 00000000..c4aae4e7 --- /dev/null +++ b/meerkat-core/src/get-wrapped-base-query-with-projections/index.ts @@ -0,0 +1,6 @@ +export { getWrappedBaseQueryWithProjections } from './get-wrapped-base-query-with-projections'; +export { + getModifiedSqlExpression, + MODIFIERS, +} from './sql-expression-modifiers'; +export type { DimensionModifier, Modifier } from './types'; diff --git a/meerkat-core/src/get-wrapped-base-query-with-projections/modifiers/array-flatten-modifier.ts b/meerkat-core/src/get-wrapped-base-query-with-projections/modifiers/array-flatten-modifier.ts new file mode 100644 index 00000000..5b3b2573 --- /dev/null +++ b/meerkat-core/src/get-wrapped-base-query-with-projections/modifiers/array-flatten-modifier.ts @@ -0,0 +1,25 @@ +import { isArrayTypeMember } from '../../utils/is-array-member-type'; +import { DimensionModifier, Modifier } from '../types'; + +export const arrayFlattenModifier = ({ + sqlExpression, +}: DimensionModifier): string => { + // Ensure NULL or empty arrays produce at least one row with NULL value + // This prevents rows from being dropped when arrays are NULL or empty + // COALESCE handles NULL, and len() = 0 check handles empty arrays [] + return `unnest(CASE WHEN ${sqlExpression} IS NULL OR len(COALESCE(${sqlExpression}, [])) = 0 THEN [NULL] ELSE ${sqlExpression} END)`; +}; + +export const shouldFlattenArray = ({ + dimension, +}: DimensionModifier): boolean => { + const isArrayType = isArrayTypeMember(dimension.type); + const shouldFlattenArray = dimension.modifier?.shouldFlattenArray; + return !!(isArrayType && shouldFlattenArray); +}; + +export const arrayFlattenModifierConfig: Modifier = { + name: 'shouldFlattenArray', + matcher: shouldFlattenArray, + modifier: arrayFlattenModifier, +}; diff --git a/meerkat-core/src/get-wrapped-base-query-with-projections/modifiers/array-unnest-modifier.ts b/meerkat-core/src/get-wrapped-base-query-with-projections/modifiers/array-unnest-modifier.ts new file mode 100644 index 00000000..697173ed --- /dev/null +++ b/meerkat-core/src/get-wrapped-base-query-with-projections/modifiers/array-unnest-modifier.ts @@ -0,0 +1,23 @@ +import { isArrayTypeMember } from '../../utils/is-array-member-type'; +import { DimensionModifier, Modifier } from '../types'; + +export const arrayFieldUnNestModifier = ({ + sqlExpression, +}: DimensionModifier): string => { + return `array[unnest(${sqlExpression})]`; +}; + +export const shouldUnnest = ({ + dimension, + query, +}: DimensionModifier): boolean => { + const isArrayType = isArrayTypeMember(dimension.type); + const hasUnNestedGroupBy = dimension.modifier?.shouldUnnestGroupBy; + return !!(isArrayType && hasUnNestedGroupBy && query.measures.length > 0); +}; + +export const arrayUnnestModifier: Modifier = { + name: 'shouldUnnestGroupBy', + matcher: shouldUnnest, + modifier: arrayFieldUnNestModifier, +}; diff --git a/meerkat-core/src/get-wrapped-base-query-with-projections/sql-expression-modifiers.ts b/meerkat-core/src/get-wrapped-base-query-with-projections/sql-expression-modifiers.ts index 2f2a7326..1dcab304 100644 --- a/meerkat-core/src/get-wrapped-base-query-with-projections/sql-expression-modifiers.ts +++ b/meerkat-core/src/get-wrapped-base-query-with-projections/sql-expression-modifiers.ts @@ -1,46 +1,34 @@ -import { Dimension, Query } from "../types/cube-types"; -import { isArrayTypeMember } from "../utils/is-array-member-type"; - -export interface DimensionModifier { - sqlExpression: string, - dimension: Dimension, - key: string, - query: Query -} - -export const arrayFieldUnNestModifier = ({ sqlExpression }: DimensionModifier): string => { - return `array[unnest(${sqlExpression})]`; -} - -export const shouldUnnest = ({ dimension, query }: DimensionModifier): boolean => { - const isArrayType = isArrayTypeMember(dimension.type); - const hasUnNestedGroupBy = dimension.modifier?.shouldUnnestGroupBy; - return !!(isArrayType && hasUnNestedGroupBy && query.measures.length > 0); -} - - -export type Modifier = { - name: string, - matcher: (modifier: DimensionModifier) => boolean, - modifier: (modifier: DimensionModifier) => string -} - -export const MODIFIERS: Modifier[] = [{ - name: 'shouldUnnestGroupBy', - matcher: shouldUnnest, - modifier: arrayFieldUnNestModifier -}] - - -export const getModifiedSqlExpression = ({ sqlExpression, dimension, key, modifiers, query }: DimensionModifier & { - modifiers: Modifier[] +import { arrayFlattenModifierConfig } from './modifiers/array-flatten-modifier'; +import { arrayUnnestModifier } from './modifiers/array-unnest-modifier'; +import { DimensionModifier, Modifier } from './types'; + +export const MODIFIERS = [arrayUnnestModifier, arrayFlattenModifierConfig]; + +export const getModifiedSqlExpression = ({ + sqlExpression, + dimension, + key, + modifiers, + query, +}: DimensionModifier & { + modifiers: Modifier[]; }) => { let finalDimension: string = sqlExpression; modifiers.forEach(({ modifier, matcher }) => { - const shouldModify = matcher({ sqlExpression: finalDimension, dimension, key, query }); + const shouldModify = matcher({ + sqlExpression: finalDimension, + dimension, + key, + query, + }); if (shouldModify) { - finalDimension = modifier({ sqlExpression: finalDimension, dimension, key, query }); + finalDimension = modifier({ + sqlExpression: finalDimension, + dimension, + key, + query, + }); } - }) + }); return finalDimension; -} \ No newline at end of file +}; diff --git a/meerkat-core/src/get-wrapped-base-query-with-projections/types.ts b/meerkat-core/src/get-wrapped-base-query-with-projections/types.ts new file mode 100644 index 00000000..ab2eb214 --- /dev/null +++ b/meerkat-core/src/get-wrapped-base-query-with-projections/types.ts @@ -0,0 +1,14 @@ +import { Dimension, Query } from '../types/cube-types'; + +export interface DimensionModifier { + sqlExpression: string; + dimension: Dimension; + key: string; + query: Query; +} + +export type Modifier = { + name: string; + matcher: (modifier: DimensionModifier) => boolean; + modifier: (modifier: DimensionModifier) => string; +}; diff --git a/meerkat-core/src/index.ts b/meerkat-core/src/index.ts index 2b6d03c7..0699e47a 100644 --- a/meerkat-core/src/index.ts +++ b/meerkat-core/src/index.ts @@ -2,6 +2,7 @@ export * from './ast-builder/ast-builder'; export * from './ast-deserializer/ast-deserializer'; export * from './ast-serializer/ast-serializer'; export * from './ast-validator'; +export * from './constants/exports'; export { detectApplyContextParamsToBaseSQL } from './context-params/context-params-ast'; export * from './cube-measure-transformer/cube-measure-transformer'; export * from './cube-to-duckdb/cube-filter-to-duckdb'; @@ -12,10 +13,14 @@ export { } from './filter-params/filter-params-ast'; export { getFilterParamsSQL } from './get-filter-params-sql/get-filter-params-sql'; export { getFinalBaseSQL } from './get-final-base-sql/get-final-base-sql'; -export { getWrappedBaseQueryWithProjections } from './get-wrapped-base-query-with-projections/get-wrapped-base-query-with-projections'; +export * from './get-wrapped-base-query-with-projections'; export * from './joins/joins'; export * from './member-formatters'; +export * from './resolution/generators'; export * from './resolution/resolution'; +export * from './resolution/steps/aggregation-step'; +export * from './resolution/steps/resolution-step'; +export * from './resolution/steps/unnest-step'; export * from './resolution/types'; export { FilterType } from './types/cube-types'; export * from './types/cube-types/index'; diff --git a/meerkat-core/src/member-formatters/constants.ts b/meerkat-core/src/member-formatters/constants.ts index cfed3d46..8bb6f9a2 100644 --- a/meerkat-core/src/member-formatters/constants.ts +++ b/meerkat-core/src/member-formatters/constants.ts @@ -1,6 +1,5 @@ export const COLUMN_NAME_DELIMITER = '.'; export const MEERKAT_OUTPUT_DELIMITER = '__'; - // Multi-character delimiter using three different uncommon characters // to minimize the chance of collision with real data export const STRING_ARRAY_DELIMITER = '§‡¶'; diff --git a/meerkat-core/src/member-formatters/index.ts b/meerkat-core/src/member-formatters/index.ts index f9e67e98..b668014e 100644 --- a/meerkat-core/src/member-formatters/index.ts +++ b/meerkat-core/src/member-formatters/index.ts @@ -1,4 +1,4 @@ -export { COLUMN_NAME_DELIMITER } from './constants'; +export { COLUMN_NAME_DELIMITER, MEERKAT_OUTPUT_DELIMITER } from './constants'; export { constructAlias, getAliasFromSchema } from './get-alias'; export { getNamespacedKey } from './get-namespaced-key'; export { memberKeyToSafeKey } from './member-key-to-safe-key'; diff --git a/meerkat-core/src/resolution/generators/generate-resolution-join-paths.ts b/meerkat-core/src/resolution/generators/generate-resolution-join-paths.ts new file mode 100644 index 00000000..d6c5ab1a --- /dev/null +++ b/meerkat-core/src/resolution/generators/generate-resolution-join-paths.ts @@ -0,0 +1,23 @@ +import { constructAlias, memberKeyToSafeKey } from '../../member-formatters'; +import { JoinPath } from '../../types/cube-types/query'; +import { TableSchema } from '../../types/cube-types/table'; +import { findInSchemas } from '../../utils/find-in-table-schema'; +import { ResolutionConfig } from '../types'; + +export const generateResolutionJoinPaths = ( + baseDataSourceName: string, + resolutionConfig: ResolutionConfig, + baseTableSchemas: TableSchema[] +): JoinPath[] => { + return resolutionConfig.columnConfigs.map((config) => [ + { + left: baseDataSourceName, + right: memberKeyToSafeKey(config.name), + on: constructAlias({ + name: config.name, + alias: findInSchemas(config.name, baseTableSchemas)?.alias, + aliasContext: { isAstIdentifier: false }, + }), + }, + ]); +}; diff --git a/meerkat-core/src/resolution/generators/generate-resolution-schemas.ts b/meerkat-core/src/resolution/generators/generate-resolution-schemas.ts new file mode 100644 index 00000000..7adc799e --- /dev/null +++ b/meerkat-core/src/resolution/generators/generate-resolution-schemas.ts @@ -0,0 +1,67 @@ +import { + constructAlias, + getNamespacedKey, + memberKeyToSafeKey, +} from '../../member-formatters'; +import { TableSchema } from '../../types/cube-types/table'; +import { + findInDimensionSchemas, + findInSchemas, +} from '../../utils/find-in-table-schema'; +import { ResolutionConfig } from '../types'; + +export const generateResolutionSchemas = ( + config: ResolutionConfig, + baseTableSchemas: TableSchema[] +) => { + const resolutionSchemas: TableSchema[] = []; + config.columnConfigs.forEach((colConfig) => { + const tableSchema = config.tableSchemas.find( + (ts) => ts.name === colConfig.source + ); + if (!tableSchema) { + throw new Error(`Table schema not found for ${colConfig.source}`); + } + + const baseName = memberKeyToSafeKey(colConfig.name); + const baseAlias = constructAlias({ + name: colConfig.name, + alias: findInSchemas(colConfig.name, baseTableSchemas)?.alias, + aliasContext: { isTableSchemaAlias: true }, + }); + + // For each column that needs to be resolved, create a copy of the relevant table schema. + // We use the name of the column in the base query as the table schema name + // to avoid conflicts. + const resolutionSchema: TableSchema = { + name: baseName, + sql: tableSchema.sql, + measures: [], + dimensions: colConfig.resolutionColumns.map((col) => { + const dimension = findInDimensionSchemas( + getNamespacedKey(colConfig.source, col), + config.tableSchemas + ); + if (!dimension) { + throw new Error(`Dimension not found: ${col}`); + } + return { + // Need to create a new name due to limitations with how + // CubeToSql handles duplicate dimension names between different sources. + name: memberKeyToSafeKey(getNamespacedKey(colConfig.name, col)), + sql: `${baseName}.${col}`, + type: dimension.type, + alias: `${baseAlias} - ${constructAlias({ + name: col, + alias: dimension.alias, + aliasContext: { isTableSchemaAlias: true }, + })}`, + }; + }), + }; + + resolutionSchemas.push(resolutionSchema); + }); + + return resolutionSchemas; +}; diff --git a/meerkat-core/src/resolution/generators/generate-resolved-dimensions.ts b/meerkat-core/src/resolution/generators/generate-resolved-dimensions.ts new file mode 100644 index 00000000..788b9703 --- /dev/null +++ b/meerkat-core/src/resolution/generators/generate-resolved-dimensions.ts @@ -0,0 +1,38 @@ +import { getNamespacedKey, memberKeyToSafeKey } from '../../member-formatters'; +import { Member, Query } from '../../types/cube-types/query'; +import { ResolutionConfig } from '../types'; + +export const generateResolvedDimensions = ( + baseDataSourceName: string, + query: Query, + config: ResolutionConfig, + columnProjections?: string[] +): Member[] => { + // If column projections are provided, use those. + // Otherwise, use all measures and dimensions from the original query. + const aggregatedDimensions = columnProjections + ? columnProjections + : [...query.measures, ...(query.dimensions || [])]; + + const resolvedDimensions: Member[] = aggregatedDimensions.flatMap( + (dimension) => { + const columnConfig = config.columnConfigs.find( + (c) => c.name === dimension + ); + + if (!columnConfig) { + return [ + getNamespacedKey(baseDataSourceName, memberKeyToSafeKey(dimension)), + ]; + } else { + return columnConfig.resolutionColumns.map((col) => + getNamespacedKey( + memberKeyToSafeKey(dimension), + memberKeyToSafeKey(getNamespacedKey(columnConfig.name, col)) + ) + ); + } + } + ); + return resolvedDimensions; +}; diff --git a/meerkat-core/src/resolution/generators/generate-row-number-sql.ts b/meerkat-core/src/resolution/generators/generate-row-number-sql.ts new file mode 100644 index 00000000..43555674 --- /dev/null +++ b/meerkat-core/src/resolution/generators/generate-row-number-sql.ts @@ -0,0 +1,44 @@ +import { memberKeyToSafeKey } from '../../member-formatters'; + +/** + * Generates row_number() OVER (ORDER BY ...) SQL based on query order. + * This is used to preserve the original query ordering through resolution operations. + * + * @param query - The query object that may contain an order clause + * @param dimensions - The dimensions array from the base table schema + * @param baseTableName - The base table name to use in column references + * @returns SQL expression for row_number() OVER (ORDER BY ...) + */ +export const generateRowNumberSql = ( + query: { order?: Record }, + dimensions: { name: string; alias?: string }[], + baseTableName: string +): string => { + let rowNumberSql = 'row_number() OVER ('; + if (query.order && Object.keys(query.order).length > 0) { + const orderClauses = Object.entries(query.order).map( + ([member, direction]) => { + // Find the actual column name/alias in the base table dimensions + const safeMember = memberKeyToSafeKey(member); + const dimension = dimensions.find( + (d) => d.name === safeMember || d.alias === safeMember + ); + const columnName = dimension + ? dimension.alias || dimension.name + : safeMember; + return generateOrderClause(baseTableName, columnName, direction); + } + ); + rowNumberSql += `ORDER BY ${orderClauses.join(', ')}`; + } + rowNumberSql += ')'; + return rowNumberSql; +}; + +const generateOrderClause = ( + baseTableName: string, + columnName: string, + direction: string +) => { + return `${baseTableName}."${columnName}" ${direction.toUpperCase()}`; +}; diff --git a/meerkat-core/src/resolution/generators/index.ts b/meerkat-core/src/resolution/generators/index.ts new file mode 100644 index 00000000..dea7f48d --- /dev/null +++ b/meerkat-core/src/resolution/generators/index.ts @@ -0,0 +1,4 @@ +export * from './generate-resolution-join-paths'; +export * from './generate-resolution-schemas'; +export * from './generate-resolved-dimensions'; +export * from './generate-row-number-sql'; diff --git a/meerkat-core/src/resolution/resolution.spec.ts b/meerkat-core/src/resolution/resolution.spec.ts index 45170199..4f432bb8 100644 --- a/meerkat-core/src/resolution/resolution.spec.ts +++ b/meerkat-core/src/resolution/resolution.spec.ts @@ -1,10 +1,17 @@ +import { isArrayTypeMember } from '../utils/is-array-member-type'; import { - createBaseTableSchema, generateResolutionJoinPaths, generateResolutionSchemas, generateResolvedDimensions, + generateRowNumberSql, +} from './generators'; +import { + createBaseTableSchema, + createWrapperTableSchema, + getArrayTypeResolutionColumnConfigs, + withArrayFlattenModifier, } from './resolution'; -import { ResolutionConfig } from './types'; +import { BASE_DATA_SOURCE_NAME, ResolutionConfig } from './types'; describe('Create base table schema', () => { it('dimensions and measures are converted to dimensions', () => { @@ -18,19 +25,19 @@ describe('Create base table schema', () => { { name: 'count', sql: 'COUNT(*)', - type: 'number', + type: 'number' as const, }, ], dimensions: [ { name: 'column1', sql: 'base_table.column1', - type: 'string', + type: 'string' as const, }, { name: 'column2', sql: 'base_table.column2', - type: 'string', + type: 'string' as const, }, ], }, @@ -89,12 +96,12 @@ describe('Create base table schema', () => { { name: 'column1', sql: 'base_table.column1', - type: 'string', + type: 'string' as const, }, { name: 'column2', sql: 'base_table.column2', - type: 'string', + type: 'string' as const, }, ], }, @@ -104,12 +111,14 @@ describe('Create base table schema', () => { { name: 'base_table.column1', source: 'resolution_table', + type: 'string' as const, joinColumn: 'id', resolutionColumns: ['display_id'], }, { name: 'base_table.column2', source: 'resolution_table', + type: 'string' as const, joinColumn: 'id', resolutionColumns: ['display_name'], }, @@ -166,7 +175,7 @@ describe('Create base table schema', () => { { name: 'column1', sql: 'base_table.column1', - type: 'string', + type: 'string' as const, }, ], }, @@ -199,13 +208,13 @@ describe('Create base table schema', () => { { name: 'column1', sql: 'base_table.column1', - type: 'string', + type: 'string' as const, alias: 'Column 1', }, { name: 'column2', sql: 'base_table.column2', - type: 'string', + type: 'string' as const, alias: 'Column 2', }, ], @@ -215,12 +224,14 @@ describe('Create base table schema', () => { columnConfigs: [ { name: 'base_table.column1', + type: 'string' as const, source: 'resolution_table', joinColumn: 'id', resolutionColumns: ['display_id'], }, { name: 'base_table.column2', + type: 'string' as const, source: 'resolution_table', joinColumn: 'id', resolutionColumns: ['display_name'], @@ -279,12 +290,12 @@ describe('Generate resolution schemas', () => { { name: 'column1', sql: 'base_table.column1', - type: 'string', + type: 'string' as const, }, { name: 'column2', sql: 'base_table.column2', - type: 'string', + type: 'string' as const, }, ], }, @@ -294,12 +305,14 @@ describe('Generate resolution schemas', () => { columnConfigs: [ { name: 'base_table.column1', + type: 'string' as const, source: 'resolution_table', joinColumn: 'id', resolutionColumns: ['display_id'], }, { name: 'base_table.column2', + type: 'string' as const, source: 'resolution_table', joinColumn: 'id', resolutionColumns: ['id', 'display_name'], @@ -314,17 +327,17 @@ describe('Generate resolution schemas', () => { { name: 'id', sql: 'resolution_table.id', - type: 'string', + type: 'string' as const, }, { name: 'display_id', sql: 'resolution_table.display_id', - type: 'string', + type: 'string' as const, }, { name: 'display_name', sql: 'resolution_table.display_name', - type: 'string', + type: 'string' as const, }, ], }, @@ -377,12 +390,14 @@ describe('Generate resolution schemas', () => { columnConfigs: [ { name: 'base_table.column1', + type: 'string' as const, source: 'resolution_table', joinColumn: 'id', resolutionColumns: ['display_id'], }, { name: 'base_table.column2', + type: 'string' as const, source: 'resolution_table1', // does not exist joinColumn: 'id', resolutionColumns: ['id', 'display_name'], @@ -397,17 +412,17 @@ describe('Generate resolution schemas', () => { { name: 'id', sql: 'resolution_table.id', - type: 'string', + type: 'string' as const, }, { name: 'display_id', sql: 'resolution_table.display_id', - type: 'string', + type: 'string' as const, }, { name: 'display_name', sql: 'resolution_table.display_name', - type: 'string', + type: 'string' as const, }, ], }, @@ -424,6 +439,7 @@ describe('Generate resolution schemas', () => { columnConfigs: [ { name: 'base_table.column1', + type: 'string' as const, source: 'resolution_table', joinColumn: 'id', resolutionColumns: ['display_id'], @@ -437,14 +453,14 @@ describe('Generate resolution schemas', () => { { name: 'display_id', sql: 'resolution_table.display_id', - type: 'string', + type: 'string' as const, }, ], dimensions: [ { name: 'id', sql: 'resolution_table.id', - type: 'string', + type: 'string' as const, }, ], }, @@ -461,6 +477,7 @@ describe('Generate resolution schemas', () => { columnConfigs: [ { name: 'base_table.column1', + type: 'string' as const, source: 'resolution_table', joinColumn: 'id', resolutionColumns: ['display_id'], @@ -475,12 +492,12 @@ describe('Generate resolution schemas', () => { { name: 'id', sql: 'resolution_table.id', - type: 'string', + type: 'string' as const, }, { name: 'display_id', sql: 'resolution_table.display_id', - type: 'string', + type: 'string' as const, }, ], }, @@ -515,7 +532,7 @@ describe('Generate resolution schemas', () => { { name: 'column1', sql: 'base_table.column1', - type: 'string', + type: 'string' as const, alias: 'Column 1', }, ], @@ -526,6 +543,7 @@ describe('Generate resolution schemas', () => { columnConfigs: [ { name: 'base_table.column1', + type: 'string' as const, source: 'resolution_table', joinColumn: 'id', resolutionColumns: ['display_id'], @@ -540,13 +558,13 @@ describe('Generate resolution schemas', () => { { name: 'id', sql: 'resolution_table.id', - type: 'string', + type: 'string' as const, alias: 'ID', }, { name: 'display_id', sql: 'resolution_table.display_id', - type: 'string', + type: 'string' as const, alias: 'Display ID', }, ], @@ -586,12 +604,14 @@ describe('Generate resolved dimensions', () => { columnConfigs: [ { name: 'base_table.column1', + type: 'string' as const, source: 'resolution_table', joinColumn: 'id', resolutionColumns: ['display_id'], }, { name: 'base_table.column2', + type: 'string' as const, source: 'resolution_table', joinColumn: 'id', resolutionColumns: ['display_name'], @@ -601,6 +621,7 @@ describe('Generate resolved dimensions', () => { }; const resolvedDimensions = generateResolvedDimensions( + BASE_DATA_SOURCE_NAME, query, resolutionConfig ); @@ -620,6 +641,7 @@ describe('Generate resolved dimensions', () => { columnConfigs: [ { name: 'base_table.column3', + type: 'string' as const, source: 'resolution_table', joinColumn: 'id', resolutionColumns: ['display_id'], @@ -629,6 +651,7 @@ describe('Generate resolved dimensions', () => { }; const resolvedDimensions = generateResolvedDimensions( + BASE_DATA_SOURCE_NAME, query, resolutionConfig ); @@ -648,12 +671,14 @@ describe('Generate resolved dimensions', () => { columnConfigs: [ { name: 'base_table.column1', + type: 'string' as const, source: 'resolution_table', joinColumn: 'id', resolutionColumns: ['display_id'], }, { name: 'base_table.column2', + type: 'string' as const, source: 'resolution_table', joinColumn: 'id', resolutionColumns: ['id', 'display_name'], @@ -668,6 +693,7 @@ describe('Generate resolved dimensions', () => { ]; const resolvedDimensions = generateResolvedDimensions( + BASE_DATA_SOURCE_NAME, query, resolutionConfig, projections @@ -688,12 +714,14 @@ describe('Generate resolution join paths', () => { columnConfigs: [ { name: 'base_table.column1', + type: 'string' as const, source: 'resolution_table', joinColumn: 'id', resolutionColumns: ['display_id'], }, { name: 'base_table.column2', + type: 'string' as const, source: 'resolution_table', joinColumn: 'id', resolutionColumns: ['display_name'], @@ -702,7 +730,11 @@ describe('Generate resolution join paths', () => { tableSchemas: [], }; - const joinPaths = generateResolutionJoinPaths(resolutionConfig, []); + const joinPaths = generateResolutionJoinPaths( + BASE_DATA_SOURCE_NAME, + resolutionConfig, + [] + ); expect(joinPaths).toEqual([ [ @@ -732,13 +764,13 @@ describe('Generate resolution join paths', () => { { name: 'column1', sql: 'base_table.column1', - type: 'string', + type: 'string' as const, alias: 'Column 1', }, { name: 'column2', sql: 'base_table.column2', - type: 'string', + type: 'string' as const, alias: 'Column 2', }, ], @@ -749,6 +781,7 @@ describe('Generate resolution join paths', () => { columnConfigs: [ { name: 'base_table.column1', + type: 'string' as const, source: 'resolution_table', joinColumn: 'id', resolutionColumns: ['display_id'], @@ -758,6 +791,7 @@ describe('Generate resolution join paths', () => { }; const joinPaths = generateResolutionJoinPaths( + BASE_DATA_SOURCE_NAME, resolutionConfig, baseTableSchemas ); @@ -772,3 +806,536 @@ describe('Generate resolution join paths', () => { ]); }); }); + +describe('createWrapperTableSchema', () => { + it('should create wrapper schema with correct structure', () => { + const sql = 'SELECT * FROM base_table'; + const baseTableSchema = { + name: 'original_table', + sql: 'original sql', + dimensions: [ + { + name: 'column1', + sql: 'original_table.column1', + type: 'string' as const, + alias: 'Column 1', + }, + { + name: 'column2', + sql: 'original_table.column2', + type: 'number' as const, + alias: 'Column 2', + }, + ], + measures: [ + { + name: 'count', + sql: 'COUNT(*)', + type: 'number' as const, + alias: 'Count', + }, + ], + joins: [ + { + sql: 'some_join_condition', + }, + ], + } as any; + + const result = createWrapperTableSchema(sql, baseTableSchema); + + expect(result).toEqual({ + name: '__base_query', + sql: 'SELECT * FROM base_table', + dimensions: [ + { + name: 'column1', + sql: '__base_query."Column 1"', + type: 'string', + alias: 'Column 1', + }, + { + name: 'column2', + sql: '__base_query."Column 2"', + type: 'number', + alias: 'Column 2', + }, + ], + measures: [ + { + name: 'count', + sql: '__base_query."Count"', + type: 'number', + alias: 'Count', + }, + ], + joins: [ + { + sql: 'some_join_condition', + }, + ], + }); + }); + + it('should handle dimensions without aliases', () => { + const sql = 'SELECT column1 FROM base_table'; + const baseTableSchema = { + name: 'original_table', + sql: 'original sql', + dimensions: [ + { + name: 'column1', + sql: 'original_table.column1', + type: 'string' as const, + }, + ], + measures: [], + joins: [], + } as any; + + const result = createWrapperTableSchema(sql, baseTableSchema); + + expect(result.dimensions[0].sql).toBe('__base_query."column1"'); + }); + + it('should handle empty dimensions and measures', () => { + const sql = 'SELECT * FROM base_table'; + const baseTableSchema = { + name: 'original_table', + sql: 'original sql', + dimensions: [], + measures: [], + joins: [], + } as any; + + const result = createWrapperTableSchema(sql, baseTableSchema); + + expect(result).toEqual({ + name: '__base_query', + sql: 'SELECT * FROM base_table', + dimensions: [], + measures: [], + joins: [], + }); + }); +}); + +describe('getArrayTypeResolutionColumnConfigs', () => { + it('should filter and return only array type column configs', () => { + const resolutionConfig: ResolutionConfig = { + columnConfigs: [ + { + name: 'table.array_column', + type: 'string_array' as const, + source: 'lookup_table', + joinColumn: 'id', + resolutionColumns: ['name'], + }, + { + name: 'table.scalar_column', + type: 'string' as const, + source: 'lookup_table', + joinColumn: 'id', + resolutionColumns: ['name'], + }, + { + name: 'table.another_array', + type: 'number_array' as const, + source: 'lookup_table2', + joinColumn: 'id', + resolutionColumns: ['value'], + }, + ], + tableSchemas: [], + }; + + const result = getArrayTypeResolutionColumnConfigs(resolutionConfig); + + expect(result).toHaveLength(2); + expect(result[0].name).toBe('table.array_column'); + expect(result[1].name).toBe('table.another_array'); + expect(result.every((config) => isArrayTypeMember(config.type))).toBe(true); + }); + + it('should return empty array when no array type configs exist', () => { + const resolutionConfig: ResolutionConfig = { + columnConfigs: [ + { + name: 'table.scalar_column1', + type: 'string' as const, + source: 'lookup_table', + joinColumn: 'id', + resolutionColumns: ['name'], + }, + { + name: 'table.scalar_column2', + type: 'number' as const, + source: 'lookup_table', + joinColumn: 'id', + resolutionColumns: ['name'], + }, + ], + tableSchemas: [], + }; + + const result = getArrayTypeResolutionColumnConfigs(resolutionConfig); + + expect(result).toEqual([]); + }); + + it('should return empty array when columnConfigs is empty', () => { + const resolutionConfig: ResolutionConfig = { + columnConfigs: [], + tableSchemas: [], + }; + + const result = getArrayTypeResolutionColumnConfigs(resolutionConfig); + + expect(result).toEqual([]); + }); +}); + +describe('withArrayFlattenModifier', () => { + it('should add shouldFlattenArray modifier to array columns', () => { + const baseTableSchema = { + name: 'base_table', + sql: 'SELECT * FROM base_table', + dimensions: [ + { + name: 'array_column', + sql: 'base_table.array_column', + type: 'string_array' as const, + }, + { + name: 'scalar_column', + sql: 'base_table.scalar_column', + type: 'string' as const, + }, + ], + measures: [], + } as any; + + const resolutionConfig: ResolutionConfig = { + columnConfigs: [ + { + name: 'array_column', + type: 'string_array' as const, + source: 'lookup_table', + joinColumn: 'id', + resolutionColumns: ['name'], + }, + ], + tableSchemas: [], + }; + + const result = withArrayFlattenModifier(baseTableSchema, resolutionConfig); + + expect(result.dimensions[0].modifier).toEqual({ + shouldFlattenArray: true, + }); + expect(result.dimensions[1].modifier).toBeUndefined(); + // Verify immutability + expect(baseTableSchema.dimensions[0].modifier).toBeUndefined(); + }); + + it('should handle multiple array columns', () => { + const baseTableSchema = { + name: 'base_table', + sql: 'SELECT * FROM base_table', + dimensions: [ + { + name: 'array_column1', + sql: 'base_table.array_column1', + type: 'string_array' as const, + }, + { + name: 'array_column2', + sql: 'base_table.array_column2', + type: 'string_array' as const, + }, + { + name: 'scalar_column', + sql: 'base_table.scalar_column', + type: 'string' as const, + }, + ], + measures: [], + } as any; + + const resolutionConfig: ResolutionConfig = { + columnConfigs: [ + { + name: 'array_column1', + type: 'string_array' as const, + source: 'lookup_table1', + joinColumn: 'id', + resolutionColumns: ['name'], + }, + { + name: 'array_column2', + type: 'number_array' as const, + source: 'lookup_table2', + joinColumn: 'id', + resolutionColumns: ['value'], + }, + ], + tableSchemas: [], + }; + + const result = withArrayFlattenModifier(baseTableSchema, resolutionConfig); + + expect(result.dimensions[0].modifier).toEqual({ + shouldFlattenArray: true, + }); + expect(result.dimensions[1].modifier).toEqual({ + shouldFlattenArray: true, + }); + expect(result.dimensions[2].modifier).toBeUndefined(); + // Verify immutability + expect(baseTableSchema.dimensions[0].modifier).toBeUndefined(); + }); + + it('should not modify dimensions when no array columns in config', () => { + const baseTableSchema = { + name: 'base_table', + sql: 'SELECT * FROM base_table', + dimensions: [ + { + name: 'column1', + sql: 'base_table.column1', + type: 'string', + }, + { + name: 'column2', + sql: 'base_table.column2', + type: 'number', + }, + ], + measures: [], + } as any; + + const resolutionConfig: ResolutionConfig = { + columnConfigs: [ + { + name: 'column1', + type: 'string' as const, + source: 'lookup_table', + joinColumn: 'id', + resolutionColumns: ['name'], + }, + ], + tableSchemas: [], + }; + + const result = withArrayFlattenModifier(baseTableSchema, resolutionConfig); + + expect(result.dimensions[0].modifier).toBeUndefined(); + expect(result.dimensions[1].modifier).toBeUndefined(); + }); + + it('should handle empty dimensions array', () => { + const baseTableSchema = { + name: 'base_table', + sql: 'SELECT * FROM base_table', + dimensions: [], + measures: [], + } as any; + + const resolutionConfig: ResolutionConfig = { + columnConfigs: [ + { + name: 'array_column', + type: 'string_array' as const, + source: 'lookup_table', + joinColumn: 'id', + resolutionColumns: ['name'], + }, + ], + tableSchemas: [], + }; + + // Should not throw error + expect(() => { + withArrayFlattenModifier(baseTableSchema, resolutionConfig); + }).not.toThrow(); + + const result = withArrayFlattenModifier(baseTableSchema, resolutionConfig); + expect(result.dimensions).toEqual([]); + }); +}); + +describe('generateRowNumberSql', () => { + it('should generate row_number with ORDER BY for single column', () => { + const query = { + order: { 'table.id': 'asc' }, + }; + const dimensions = [ + { + name: 'table__id', + alias: 'ID', + }, + { + name: 'table__name', + alias: 'Name', + }, + ]; + + const result = generateRowNumberSql( + query, + dimensions, + BASE_DATA_SOURCE_NAME + ); + + expect(result).toBe('row_number() OVER (ORDER BY __base_query."ID" ASC)'); + }); + + it('should generate row_number with ORDER BY for multiple columns', () => { + const query = { + order: { 'table.id': 'asc', 'table.name': 'desc' }, + }; + const dimensions = [ + { + name: 'table__id', + alias: 'ID', + }, + { + name: 'table__name', + alias: 'Name', + }, + ]; + + const result = generateRowNumberSql( + query, + dimensions, + BASE_DATA_SOURCE_NAME + ); + + expect(result).toBe( + 'row_number() OVER (ORDER BY __base_query."ID" ASC, __base_query."Name" DESC)' + ); + }); + + it('should generate row_number without ORDER BY when query has no order', () => { + const query = {}; + const dimensions = [ + { + name: 'table__id', + alias: 'ID', + }, + ]; + + const result = generateRowNumberSql( + query, + dimensions, + BASE_DATA_SOURCE_NAME + ); + + expect(result).toBe('row_number() OVER ()'); + }); + + it('should generate row_number without ORDER BY when order is empty', () => { + const query = { + order: {}, + }; + const dimensions = [ + { + name: 'table__id', + alias: 'ID', + }, + ]; + + const result = generateRowNumberSql( + query, + dimensions, + BASE_DATA_SOURCE_NAME + ); + + expect(result).toBe('row_number() OVER ()'); + }); + + it('should use dimension name when alias is not present', () => { + const query = { + order: { 'table.id': 'asc' }, + }; + const dimensions = [ + { + name: 'table__id', + }, + ]; + + const result = generateRowNumberSql( + query, + dimensions, + BASE_DATA_SOURCE_NAME + ); + + expect(result).toBe( + 'row_number() OVER (ORDER BY __base_query."table__id" ASC)' + ); + }); + + it('should handle dimension not found by using safe member name', () => { + const query = { + order: { 'table.unknown_column': 'desc' }, + }; + const dimensions = [ + { + name: 'table__id', + alias: 'ID', + }, + ]; + + const result = generateRowNumberSql( + query, + dimensions, + BASE_DATA_SOURCE_NAME + ); + + expect(result).toBe( + 'row_number() OVER (ORDER BY __base_query."table__unknown_column" DESC)' + ); + }); + + it('should handle mixed case order directions', () => { + const query = { + order: { 'table.id': 'asc', 'table.name': 'desc' }, + }; + const dimensions = [ + { + name: 'table__id', + alias: 'ID', + }, + { + name: 'table__name', + alias: 'Name', + }, + ]; + + const result = generateRowNumberSql( + query, + dimensions, + BASE_DATA_SOURCE_NAME + ); + + expect(result).toBe( + 'row_number() OVER (ORDER BY __base_query."ID" ASC, __base_query."Name" DESC)' + ); + }); + + it('should use custom base table name', () => { + const query = { + order: { 'table.id': 'asc' }, + }; + const dimensions = [ + { + name: 'table__id', + alias: 'ID', + }, + ]; + const customBaseTableName = 'custom_table'; + + const result = generateRowNumberSql(query, dimensions, customBaseTableName); + + expect(result).toBe('row_number() OVER (ORDER BY custom_table."ID" ASC)'); + }); +}); diff --git a/meerkat-core/src/resolution/resolution.ts b/meerkat-core/src/resolution/resolution.ts index 3afe29ce..dbf61a05 100644 --- a/meerkat-core/src/resolution/resolution.ts +++ b/meerkat-core/src/resolution/resolution.ts @@ -3,13 +3,50 @@ import { getNamespacedKey, memberKeyToSafeKey, } from '../member-formatters'; -import { JoinPath, Member, Query } from '../types/cube-types/query'; +import { Member, Query } from '../types/cube-types/query'; import { Dimension, Measure, TableSchema } from '../types/cube-types/table'; +import { isArrayTypeMember } from '../utils/is-array-member-type'; import { - findInDimensionSchemas, - findInSchemas, -} from '../utils/find-in-table-schema'; -import { BASE_DATA_SOURCE_NAME, ResolutionConfig } from './types'; + BASE_DATA_SOURCE_NAME, + ResolutionColumnConfig, + ResolutionConfig, +} from './types'; + +/** + * Constructs a SQL column reference from a table name and a dimension/measure. + * + * @param tableName - The name of the table + * @param member - The dimension or measure object with name and optional alias + * @returns Formatted SQL column reference like: tableName."columnName" + */ +export const getColumnReference = ( + tableName: string, + member: { name: string; alias?: string } +): string => { + return `${tableName}."${member.alias || member.name}"`; +}; + +/** + * Checks if resolution should be skipped based on the resolution configuration and column projections. + * Resolution is skipped when there are no columns to resolve and no column projections. + * + * @param resolutionConfig - The resolution configuration + * @param columnProjections - Optional array of column projections + * @returns true if resolution should be skipped, false otherwise + */ +export const shouldSkipResolution = ( + resolutionConfig: ResolutionConfig, + query: Query, + columnProjections?: string[] +): boolean => { + // If no resolution required and no column projections to ensure order in which export is happening + // and explicit order is not provided, then skip resolution. + return ( + resolutionConfig.columnConfigs.length === 0 && + columnProjections?.length === 0 && + !query.order + ); +}; const constructBaseDimension = (name: string, schema: Measure | Dimension) => { return { @@ -69,112 +106,73 @@ export const createBaseTableSchema = ( }; }; -export const generateResolutionSchemas = ( - config: ResolutionConfig, - baseTableSchemas: TableSchema[] +export const createWrapperTableSchema = ( + sql: string, + baseTableSchema: TableSchema ) => { - const resolutionSchemas: TableSchema[] = []; - config.columnConfigs.forEach((colConfig) => { - const tableSchema = config.tableSchemas.find( - (ts) => ts.name === colConfig.source - ); - if (!tableSchema) { - throw new Error(`Table schema not found for ${colConfig.source}`); - } + return { + name: BASE_DATA_SOURCE_NAME, + sql: sql, + dimensions: baseTableSchema.dimensions.map((d) => ({ + name: d.name, + sql: getColumnReference(BASE_DATA_SOURCE_NAME, d), + type: d.type, + alias: d.alias, + })), + measures: baseTableSchema.measures.map((m) => ({ + name: m.name, + sql: getColumnReference(BASE_DATA_SOURCE_NAME, m), + type: m.type, + alias: m.alias, + })), + joins: baseTableSchema.joins, + }; +}; - const baseName = memberKeyToSafeKey(colConfig.name); - const baseAlias = constructAlias({ - name: colConfig.name, - alias: findInSchemas(colConfig.name, baseTableSchemas)?.alias, - aliasContext: { isTableSchemaAlias: true }, - }); +export const withArrayFlattenModifier = ( + baseTableSchema: TableSchema, + resolutionConfig: ResolutionConfig +): TableSchema => { + const arrayColumns = getArrayTypeResolutionColumnConfigs(resolutionConfig); - // For each column that needs to be resolved, create a copy of the relevant table schema. - // We use the name of the column in the base query as the table schema name - // to avoid conflicts. - const resolutionSchema: TableSchema = { - name: baseName, - sql: tableSchema.sql, - measures: [], - dimensions: colConfig.resolutionColumns.map((col) => { - const dimension = findInDimensionSchemas( - getNamespacedKey(colConfig.source, col), - config.tableSchemas - ); - if (!dimension) { - throw new Error(`Dimension not found: ${col}`); - } + return { + ...baseTableSchema, + dimensions: baseTableSchema.dimensions.map((dimension) => { + const shouldFlatten = arrayColumns.some( + (ac: ResolutionColumnConfig) => ac.name === dimension.name + ); + + if (shouldFlatten) { return { - // Need to create a new name due to limitations with how - // CubeToSql handles duplicate dimension names between different sources. - name: memberKeyToSafeKey(getNamespacedKey(colConfig.name, col)), - sql: `${baseName}.${col}`, - type: dimension.type, - alias: `${baseAlias} - ${constructAlias({ - name: col, - alias: dimension.alias, - aliasContext: { isTableSchemaAlias: true }, - })}`, + ...dimension, + modifier: { shouldFlattenArray: true }, }; - }), - }; - - resolutionSchemas.push(resolutionSchema); - }); + } - return resolutionSchemas; + return dimension; + }), + }; }; -export const generateResolvedDimensions = ( - query: Query, - config: ResolutionConfig, - columnProjections?: string[] -): Member[] => { - // If column projections are provided, use those. - // Otherwise, use all measures and dimensions from the original query. - const aggregatedDimensions = columnProjections - ? columnProjections - : [...query.measures, ...(query.dimensions || [])]; - - const resolvedDimensions: Member[] = aggregatedDimensions.flatMap( - (dimension) => { - const columnConfig = config.columnConfigs.find( - (c) => c.name === dimension - ); - - if (!columnConfig) { - return [ - getNamespacedKey( - BASE_DATA_SOURCE_NAME, - memberKeyToSafeKey(dimension) - ), - ]; - } else { - return columnConfig.resolutionColumns.map((col) => - getNamespacedKey( - memberKeyToSafeKey(dimension), - memberKeyToSafeKey(getNamespacedKey(columnConfig.name, col)) - ) - ); - } - } +export const getArrayTypeResolutionColumnConfigs = ( + resolutionConfig: ResolutionConfig +) => { + return resolutionConfig.columnConfigs.filter((config) => + isArrayTypeMember(config.type) ); - return resolvedDimensions; }; -export const generateResolutionJoinPaths = ( - resolutionConfig: ResolutionConfig, - baseTableSchemas: TableSchema[] -): JoinPath[] => { - return resolutionConfig.columnConfigs.map((config) => [ - { - left: BASE_DATA_SOURCE_NAME, - right: memberKeyToSafeKey(config.name), - on: constructAlias({ - name: config.name, - alias: findInSchemas(config.name, baseTableSchemas)?.alias, - aliasContext: { isAstIdentifier: false }, - }), - }, - ]); +/** + * Wraps SQL to order by row_id and then exclude it from results. + * This maintains the ordering from the base query while removing the internal row_id column. + * + * @param sql - The SQL query that includes a __row_id column + * @param rowIdColumnName - The name of the row_id column (defaults to '__row_id') + * @returns SQL query ordered by row_id with the row_id column excluded + */ +export const wrapWithRowIdOrderingAndExclusion = ( + sql: string, + rowIdColumnName: string +): string => { + return `select * exclude(${rowIdColumnName}) from (${sql}) order by ${rowIdColumnName}`; }; diff --git a/meerkat-core/src/resolution/steps/aggregation-step.ts b/meerkat-core/src/resolution/steps/aggregation-step.ts new file mode 100644 index 00000000..1161fbe6 --- /dev/null +++ b/meerkat-core/src/resolution/steps/aggregation-step.ts @@ -0,0 +1,124 @@ +import { + ContextParams, + getArrayTypeResolutionColumnConfigs, + getNamespacedKey, + Measure, + MEERKAT_OUTPUT_DELIMITER, + Query, + ResolutionConfig, + ROW_ID_DIMENSION_NAME, + TableSchema, + wrapWithRowIdOrderingAndExclusion, +} from '../../index'; + +/** + * Constructs the resolved column name prefix for array resolution. + * This is used to identify which columns in the resolved schema correspond to array fields. + * + * @param columnName - The original column name + * @returns The prefixed column name used in resolution + */ +const getResolvedArrayColumnPrefix = (columnName: string): string => { + return `${columnName}${MEERKAT_OUTPUT_DELIMITER}`; +}; + +/** + * Re-aggregate to reverse the unnest + * + * This function: + * 1. Groups by row_id + * 2. Uses MAX for non-array columns (they're duplicated) + * 3. Uses ARRAY_AGG for resolved array columns + * + * @param resolvedTableSchema - Schema from Phase 2 (contains all column info) + * @param resolutionConfig - Resolution configuration + * @param contextParams - Optional context parameters + * @returns Final SQL with arrays containing resolved values + */ +export const getAggregatedSql = async ({ + resolvedTableSchema, + resolutionConfig, + contextParams, + cubeQueryToSQL, +}: { + resolvedTableSchema: TableSchema; + resolutionConfig: ResolutionConfig; + contextParams?: ContextParams; + cubeQueryToSQL: (params: { + query: Query; + tableSchemas: TableSchema[]; + contextParams?: ContextParams; + }) => Promise; +}): Promise => { + const aggregationBaseTableSchema: TableSchema = resolvedTableSchema; + + // Identify which columns need ARRAY_AGG vs MAX + const arrayColumns = getArrayTypeResolutionColumnConfigs(resolutionConfig); + const baseTableName = aggregationBaseTableSchema.name; + + const isResolvedArrayColumn = (dimName: string) => { + return arrayColumns.some((arrayCol) => { + return dimName.includes(getResolvedArrayColumnPrefix(arrayCol.name)); + }); + }; + + // Create aggregation measures with proper aggregation functions + // Get row_id dimension for GROUP BY + const rowIdDimension = aggregationBaseTableSchema.dimensions.find( + (d) => d.name === ROW_ID_DIMENSION_NAME + ); + + if (!rowIdDimension) { + throw new Error('Row id dimension not found'); + } + // Create measures with MAX or ARRAY_AGG based on column type + const aggregationMeasures: Measure[] = []; + + aggregationBaseTableSchema.dimensions + .filter((dim) => dim.name !== rowIdDimension?.name) + .forEach((dim) => { + const isArrayColumn = isResolvedArrayColumn(dim.name); + + // The dimension's sql field already has the correct reference (e.g., __resolved_query."__row_id") + // We just need to wrap it in the aggregation function + const columnRef = dim.sql; + + // Use ARRAY_AGG for resolved array columns, MAX for others + // Filter out null values for ARRAY_AGG using FILTER clause + const aggregationFn = isArrayColumn + ? `COALESCE(ARRAY_AGG(DISTINCT ${columnRef}) FILTER (WHERE ${columnRef} IS NOT NULL), [])` + : `MAX(${columnRef})`; + + aggregationMeasures.push({ + name: dim.name, + sql: aggregationFn, + type: dim.type, + alias: dim.alias, + }); + }); + + // Update the schema with aggregation measures + const schemaWithAggregation: TableSchema = { + ...aggregationBaseTableSchema, + measures: aggregationMeasures, + dimensions: [rowIdDimension], + }; + + // Generate the final SQL + const aggregatedSql = await cubeQueryToSQL({ + query: { + measures: aggregationMeasures.map((m) => + getNamespacedKey(baseTableName, m.name) + ), + dimensions: [getNamespacedKey(baseTableName, rowIdDimension.name)], + }, + tableSchemas: [schemaWithAggregation], + contextParams, + }); + + // Order by row_id to maintain consistent ordering before excluding it + return wrapWithRowIdOrderingAndExclusion( + aggregatedSql, + ROW_ID_DIMENSION_NAME + ); +}; diff --git a/meerkat-core/src/resolution/steps/resolution-step.ts b/meerkat-core/src/resolution/steps/resolution-step.ts new file mode 100644 index 00000000..186ad134 --- /dev/null +++ b/meerkat-core/src/resolution/steps/resolution-step.ts @@ -0,0 +1,152 @@ +import { + ContextParams, + createWrapperTableSchema, + generateResolutionJoinPaths, + generateResolutionSchemas, + generateResolvedDimensions, + getColumnReference, + getNamespacedKey, + memberKeyToSafeKey, + Query, + ResolutionConfig, + TableSchema, +} from '../../index'; + +/** + * Apply resolution (join with lookup tables) + * + * This function: + * 1. Uses the base table schema from Phase 1 (source of truth) + * 2. Generates resolution schemas for array fields + * 3. Sets up join paths between unnested data and resolution tables + * @returns Table schema with resolved values from lookup tables + */ +export const getResolvedTableSchema = async ({ + baseTableSchema, + resolutionConfig, + columnProjections, + contextParams, + cubeQueryToSQL, +}: { + baseTableSchema: TableSchema; + resolutionConfig: ResolutionConfig; + columnProjections: string[]; + contextParams?: ContextParams; + cubeQueryToSQL: (params: { + query: Query; + tableSchemas: TableSchema[]; + contextParams?: ContextParams; + }) => Promise; +}): Promise => { + const updatedBaseTableSchema: TableSchema = baseTableSchema; + + // Generate resolution schemas for fields that need resolution + const resolutionSchemas = generateResolutionSchemas(resolutionConfig, [ + updatedBaseTableSchema, + ]); + + const joinPaths = generateResolutionJoinPaths( + updatedBaseTableSchema.name, + resolutionConfig, + [updatedBaseTableSchema] + ); + + const tempQuery: Query = { + measures: [], + dimensions: baseTableSchema.dimensions.map((d) => + getNamespacedKey(updatedBaseTableSchema.name, d.name) + ), + }; + + const updatedColumnProjections = columnProjections?.map((cp) => + memberKeyToSafeKey(cp) + ); + // Generate resolved dimensions using columnProjections + const resolvedDimensions = generateResolvedDimensions( + updatedBaseTableSchema.name, + tempQuery, + resolutionConfig, + updatedColumnProjections + ); + + // Create query and generate SQL + const resolutionQuery: Query = { + measures: [], + dimensions: resolvedDimensions, + joinPaths, + }; + + const resolvedSql = await cubeQueryToSQL({ + query: resolutionQuery, + tableSchemas: [updatedBaseTableSchema, ...resolutionSchemas], + contextParams, + }); + + // Use the baseTableSchema which already has all the column info + const resolvedTableSchema: TableSchema = createWrapperTableSchema( + resolvedSql, + updatedBaseTableSchema + ); + + // Create a map of resolution schema dimensions by original column name + const resolutionDimensionsByColumnName = new Map(); + + // Create a map of resolution schemas by config name for efficient lookup + const resolutionSchemaByConfigName = new Map< + string, + (typeof resolutionSchemas)[0] + >(); + resolutionSchemas.forEach((resSchema) => { + resolutionConfig.columnConfigs.forEach((config) => { + if ( + resSchema.dimensions.some((dim) => dim.name.startsWith(config.name)) + ) { + resolutionSchemaByConfigName.set(config.name, resSchema); + } + }); + }); + + // Build the dimension map using the pre-indexed schemas + resolutionConfig.columnConfigs.forEach((config) => { + const resSchema = resolutionSchemaByConfigName.get(config.name); + if (resSchema) { + resolutionDimensionsByColumnName.set( + config.name, + resSchema.dimensions.map((dim) => ({ + name: dim.name, + sql: getColumnReference(resolvedTableSchema.name, dim), + type: dim.type, + alias: dim.alias, + })) + ); + } + }); + + // Maintain the same order as columnProjections + // Replace dimensions that need resolution with their resolved counterparts + resolvedTableSchema.dimensions = (updatedColumnProjections || []).flatMap( + (projectionName) => { + // Check if this column has resolved dimensions + const resolvedDims = resolutionDimensionsByColumnName.get(projectionName); + if (resolvedDims) { + // Use resolved dimensions + return resolvedDims; + } + + // Otherwise, find the original dimension from baseTableSchema + const originalDim = baseTableSchema.dimensions.find( + (d) => d.name === projectionName + ); + if (originalDim) { + return [originalDim]; + } + + // If not found, throw an error + throw new Error( + `Column projection '${projectionName}' not found in base table schema dimensions` + ); + } + ); + + return resolvedTableSchema; +}; diff --git a/meerkat-core/src/resolution/steps/unnest-step.ts b/meerkat-core/src/resolution/steps/unnest-step.ts new file mode 100644 index 00000000..a04ee1db --- /dev/null +++ b/meerkat-core/src/resolution/steps/unnest-step.ts @@ -0,0 +1,56 @@ +import { + ContextParams, + createWrapperTableSchema, + getNamespacedKey, + Query, + ResolutionConfig, + TableSchema, + withArrayFlattenModifier, +} from '../../index'; + +/** + * Apply unnesting + * + * This function performs 1 step: + * 1. Create schema with unnest modifiers for array columns + * 2. Generate final unnested SQL + * @returns Table schema with unnest modifiers for array columns + */ +export const getUnnestTableSchema = async ({ + baseTableSchema, + resolutionConfig, + contextParams, + cubeQueryToSQL, +}: { + baseTableSchema: TableSchema; + resolutionConfig: ResolutionConfig; + contextParams?: ContextParams; + cubeQueryToSQL: (params: { + query: Query; + tableSchemas: TableSchema[]; + contextParams?: ContextParams; + }) => Promise; +}): Promise => { + const updatedBaseTableSchema = withArrayFlattenModifier( + baseTableSchema, + resolutionConfig + ); + + const unnestedSql = await cubeQueryToSQL({ + query: { + measures: [], + dimensions: updatedBaseTableSchema.dimensions.map((d) => + getNamespacedKey(updatedBaseTableSchema.name, d.name) + ), + }, + tableSchemas: [updatedBaseTableSchema], + contextParams, + }); + + const unnestedBaseTableSchema: TableSchema = createWrapperTableSchema( + unnestedSql, + baseTableSchema + ); + + return unnestedBaseTableSchema; +}; diff --git a/meerkat-core/src/resolution/types.ts b/meerkat-core/src/resolution/types.ts index 0a0c1981..3f7824b7 100644 --- a/meerkat-core/src/resolution/types.ts +++ b/meerkat-core/src/resolution/types.ts @@ -1,9 +1,16 @@ -import { TableSchema } from '../types/cube-types/table'; +import { + DimensionType, + MeasureType, + TableSchema, +} from '../types/cube-types/table'; export interface ResolutionColumnConfig { // Name of the column that needs resolution. // Should match a measure or dimension in the query. name: string; + // Type of the dimension/measure (e.g., 'string', 'number', 'string_array') + // Used to determine if special array handling (UNNEST/ARRAY_AGG) is needed. + type: DimensionType | MeasureType; // Name of the data source to use for resolution. source: string; // Name of the column in the data source to join on. diff --git a/meerkat-core/src/types/cube-types/table.ts b/meerkat-core/src/types/cube-types/table.ts index 4578a4e7..4bfd5a24 100644 --- a/meerkat-core/src/types/cube-types/table.ts +++ b/meerkat-core/src/types/cube-types/table.ts @@ -27,6 +27,7 @@ export type Dimension = { type: DimensionType; modifier?: { shouldUnnestGroupBy?: boolean; + shouldFlattenArray?: boolean; }; alias?: string; }; diff --git a/meerkat-core/src/utils/find-in-table-schema.ts b/meerkat-core/src/utils/find-in-table-schema.ts index c58e25ea..66065111 100644 --- a/meerkat-core/src/utils/find-in-table-schema.ts +++ b/meerkat-core/src/utils/find-in-table-schema.ts @@ -50,8 +50,18 @@ export const findInDimensionSchemas = ( export const findInSchemas = (name: string, tableSchemas: TableSchema[]) => { /* ** Finds the dimension or measure in the provided table schemas. - ** Assumes the provided name is namespaced as `tableName.columnName`. + ** Handles both namespaced (`tableName.columnName`) and non-namespaced names. */ + // TODO: Move to only using namespaced keys. + if (!name.includes('.')) { + if (tableSchemas.length > 1) { + throw new Error( + `Multiple table schemas found for ${name} and field doesn't have a table name` + ); + } + return findInSchema(name, tableSchemas[0]); + } + const [tableName, columnName] = splitIntoDataSourceAndFields(name); const tableSchema = tableSchemas.find((table) => table.name === tableName); if (!tableSchema) { diff --git a/meerkat-node/package.json b/meerkat-node/package.json index f56f91ee..911eb07d 100644 --- a/meerkat-node/package.json +++ b/meerkat-node/package.json @@ -1,6 +1,6 @@ { "name": "@devrev/meerkat-node", - "version": "0.0.105", + "version": "0.0.106", "dependencies": { "@swc/helpers": "~0.5.0", "@devrev/meerkat-core": "*", diff --git a/meerkat-node/src/__tests__/cube-to-sql-with-resolution.spec.ts b/meerkat-node/src/__tests__/cube-to-sql-with-resolution.spec.ts new file mode 100644 index 00000000..1455f6a0 --- /dev/null +++ b/meerkat-node/src/__tests__/cube-to-sql-with-resolution.spec.ts @@ -0,0 +1,557 @@ +import { Query, ResolutionConfig, TableSchema } from '@devrev/meerkat-core'; +import { cubeQueryToSQLWithResolution } from '../cube-to-sql-with-resolution/cube-to-sql-with-resolution'; +import { duckdbExec } from '../duckdb-exec'; +const CREATE_TEST_TABLE = `CREATE TABLE tickets ( + id INTEGER, + owners VARCHAR[], + tags VARCHAR[], + created_by VARCHAR, + subscribers_count INTEGER +)`; + +const INPUT_DATA_QUERY = `INSERT INTO tickets VALUES +(2, ['owner2', 'owner3'], ['tag2', 'tag3'], 'user2', 10), +(1, ['owner1', 'owner2'], ['tag1'], 'user1', 30), +(3, ['owner4'], ['tag1', 'tag4', 'tag3'], 'user3', 80)`; + +const CREATE_RESOLUTION_TABLE = `CREATE TABLE owners_lookup ( + id VARCHAR, + display_name VARCHAR, + email VARCHAR +)`; + +const RESOLUTION_DATA_QUERY = `INSERT INTO owners_lookup VALUES +('owner1', 'Alice Smith', 'alice@example.com'), +('owner2', 'Bob Jones', 'bob@example.com'), +('owner3', 'Charlie Brown', 'charlie@example.com'), +('owner4', 'Diana Prince', 'diana@example.com')`; + +const CREATE_TAGS_LOOKUP_TABLE = `CREATE TABLE tags_lookup ( + id VARCHAR, + tag_name VARCHAR +)`; +const CREATE_CREATED_BY_LOOKUP_TABLE = `CREATE TABLE created_by_lookup ( + id VARCHAR, + name VARCHAR +)`; +const CREATED_BY_LOOKUP_DATA_QUERY = `INSERT INTO created_by_lookup VALUES +('user1', 'User 1'), +('user2', 'User 2'), +('user3', 'User 3')`; +const TAGS_LOOKUP_DATA_QUERY = `INSERT INTO tags_lookup VALUES +('tag1', 'Tag 1'), +('tag2', 'Tag 2'), +('tag3', 'Tag 3'), +('tag4', 'Tag 4')`; + +const TICKETS_TABLE_SCHEMA: TableSchema = { + name: 'tickets', + sql: 'select * from tickets', + measures: [ + { + name: 'count', + sql: 'COUNT(*)', + type: 'number', + }, + ], + dimensions: [ + { + alias: 'ID', + name: 'id', + sql: 'id', + type: 'number', + }, + { + alias: 'Created By', + name: 'created_by', + sql: 'created_by', + type: 'string', + }, + { + alias: 'Owners', + name: 'owners', + sql: 'owners', + type: 'string_array', + }, + { + alias: 'Tags', + name: 'tags', + sql: 'tags', + type: 'string_array', + }, + ], +}; + +const OWNERS_LOOKUP_SCHEMA: TableSchema = { + name: 'owners_lookup', + sql: 'select * from owners_lookup', + measures: [], + dimensions: [ + { + alias: 'ID', + name: 'id', + sql: 'id', + type: 'string', + }, + { + alias: 'Display Name', + name: 'display_name', + sql: 'display_name', + type: 'string', + }, + { + alias: 'Email', + name: 'email', + sql: 'email', + type: 'string', + }, + ], +}; + +const TAGS_LOOKUP_SCHEMA: TableSchema = { + name: 'tags_lookup', + sql: 'select * from tags_lookup', + measures: [], + dimensions: [ + { + alias: 'ID', + name: 'id', + sql: 'id', + type: 'string', + }, + { + alias: 'Tag Name', + name: 'tag_name', + sql: 'tag_name', + type: 'string', + }, + ], +}; + +const CREATED_BY_LOOKUP_SCHEMA: TableSchema = { + name: 'created_by_lookup', + sql: 'select * from created_by_lookup', + measures: [], + dimensions: [ + { + alias: 'ID', + name: 'id', + sql: 'id', + type: 'string', + }, + { + alias: 'Name', + name: 'name', + sql: 'name', + type: 'string', + }, + ], +}; +describe('cubeQueryToSQLWithResolution - Array field resolution', () => { + jest.setTimeout(1000000); + beforeAll(async () => { + // Create test tables + await duckdbExec(CREATE_TEST_TABLE); + await duckdbExec(INPUT_DATA_QUERY); + await duckdbExec(CREATE_RESOLUTION_TABLE); + await duckdbExec(RESOLUTION_DATA_QUERY); + await duckdbExec(CREATE_TAGS_LOOKUP_TABLE); + await duckdbExec(TAGS_LOOKUP_DATA_QUERY); + await duckdbExec(CREATE_CREATED_BY_LOOKUP_TABLE); + await duckdbExec(CREATED_BY_LOOKUP_DATA_QUERY); + }); + + it('Should resolve array fields with lookup tables', async () => { + const query: Query = { + measures: ['tickets.count'], + dimensions: ['tickets.id', 'tickets.owners'], + order: { 'tickets.id': 'asc' }, + }; + + const resolutionConfig: ResolutionConfig = { + columnConfigs: [ + { + name: 'tickets.owners', + type: 'string_array' as const, + source: 'owners_lookup', + joinColumn: 'id', + resolutionColumns: ['display_name', 'email'], + }, + ], + tableSchemas: [OWNERS_LOOKUP_SCHEMA], + }; + + const sql = await cubeQueryToSQLWithResolution({ + query, + tableSchemas: [TICKETS_TABLE_SCHEMA], + resolutionConfig, + columnProjections: ['tickets.owners', 'tickets.count', 'tickets.id'], + }); + + console.log('SQL with resolution:', sql); + + // Execute the SQL to verify it works + const result = (await duckdbExec(sql)) as any[]; + console.log('Result:', result); + + // Without array unnesting, should have 3 rows (original count) + expect(result.length).toBe(3); + + // Verify ordering is maintained (ORDER BY tickets.id ASC) + expect(result[0].ID).toBe(1); + expect(result[1].ID).toBe(2); + expect(result[2].ID).toBe(3); + + // Each row should have the expected properties + expect(result[0]).toHaveProperty('tickets__count'); + expect(result[0]).toHaveProperty('ID'); + + // The owners field should be resolved with display_name and email + expect(result[0]).toHaveProperty('Owners - Display Name'); + expect(result[0]).toHaveProperty('Owners - Email'); + + const id1Record = result[0]; + // Note: Array order may not be preserved without index tracking in UNNEST/ARRAY_AGG + expect(id1Record['Owners - Display Name']).toEqual( + expect.arrayContaining(['Alice Smith', 'Bob Jones']) + ); + expect(id1Record['Owners - Email']).toEqual( + expect.arrayContaining(['alice@example.com', 'bob@example.com']) + ); + + const id2Record = result[1]; + expect(id2Record.ID).toBe(2); + expect(id2Record['Owners - Display Name']).toEqual( + expect.arrayContaining(['Bob Jones', 'Charlie Brown']) + ); + expect(id2Record['Owners - Email']).toEqual( + expect.arrayContaining(['bob@example.com', 'charlie@example.com']) + ); + }); + + it('Should handle multiple array fields that need unnesting', async () => { + const query: Query = { + measures: ['tickets.count'], + dimensions: [ + 'tickets.id', + 'tickets.owners', //array + 'tickets.tags', // array + 'tickets.created_by', // scalar + ], + order: { 'tickets.id': 'asc' }, + }; + + const resolutionConfig: ResolutionConfig = { + columnConfigs: [ + { + name: 'tickets.owners', + type: 'string_array' as const, + source: 'owners_lookup', + joinColumn: 'id', + resolutionColumns: ['display_name'], + }, + { + name: 'tickets.tags', + type: 'string_array' as const, + source: 'tags_lookup', + joinColumn: 'id', + resolutionColumns: ['tag_name'], + }, + { + name: 'tickets.created_by', + type: 'string' as const, + source: 'created_by_lookup', + joinColumn: 'id', + resolutionColumns: ['name'], + }, + ], + tableSchemas: [ + OWNERS_LOOKUP_SCHEMA, + TAGS_LOOKUP_SCHEMA, + CREATED_BY_LOOKUP_SCHEMA, + ], + }; + + const columnProjections = [ + 'tickets.id', + 'tickets.owners', + 'tickets.tags', + 'tickets.created_by', + 'tickets.count', + ]; + const sql = await cubeQueryToSQLWithResolution({ + query, + tableSchemas: [TICKETS_TABLE_SCHEMA], + resolutionConfig, + columnProjections, + }); + + console.log('SQL (multiple arrays):', sql); + + // Execute the SQL to verify it works + const result = (await duckdbExec(sql)) as any[]; + console.log('Result:', result); + + // Should have 3 rows (original ticket count) + expect(result.length).toBe(3); + + // Verify ordering is maintained (ORDER BY tickets.id ASC) + expect(result[0].ID).toBe(1); + expect(result[1].ID).toBe(2); + expect(result[2].ID).toBe(3); + + // Each row should have the expected properties + expect(result[0]).toHaveProperty('tickets__count'); + expect(result[0]).toHaveProperty('ID'); + expect(result[0]).toHaveProperty('Owners - Display Name'); + expect(result[0]).toHaveProperty('Tags - Tag Name'); + expect(result[0]).toHaveProperty('Created By - Name'); + + // Verify ticket 1: 2 owners, 1 tag + const ticket1 = result[0]; + expect(ticket1['Owners - Display Name']).toEqual( + expect.arrayContaining(['Alice Smith', 'Bob Jones']) + ); + expect(ticket1['Owners - Display Name'].length).toBe(2); + expect(ticket1['Tags - Tag Name']).toEqual( + expect.arrayContaining(['Tag 1']) + ); + expect(ticket1['Tags - Tag Name'].length).toBe(1); + expect(ticket1['Created By - Name']).toBe('User 1'); + + // Verify ticket 2: 2 owners, 2 tags + const ticket2 = result[1]; + expect(ticket2.ID).toBe(2); + expect(ticket2['Owners - Display Name']).toEqual( + expect.arrayContaining(['Bob Jones', 'Charlie Brown']) + ); + expect(ticket2['Owners - Display Name'].length).toBe(2); + expect(ticket2['Tags - Tag Name']).toEqual( + expect.arrayContaining(['Tag 2', 'Tag 3']) + ); + expect(ticket2['Tags - Tag Name'].length).toBe(2); + expect(ticket2['Created By - Name']).toBe('User 2'); + + // Verify ticket 3: 1 owner, 3 tags + const ticket3 = result[2]; + expect(ticket3.ID).toBe(3); + expect(ticket3['Owners - Display Name']).toEqual( + expect.arrayContaining(['Diana Prince']) + ); + expect(ticket3['Owners - Display Name'].length).toBe(1); + expect(ticket3['Tags - Tag Name']).toEqual( + expect.arrayContaining(['Tag 1', 'Tag 3', 'Tag 4']) + ); + expect(ticket3['Tags - Tag Name'].length).toBe(3); + expect(ticket3['Created By - Name']).toBe('User 3'); + }); + + it('Should handle only scalar field resolution without unnesting', async () => { + const query: Query = { + measures: ['tickets.count'], + dimensions: [ + 'tickets.id', + 'tickets.owners', + 'tickets.tags', + 'tickets.created_by', + ], + order: { 'tickets.id': 'asc' }, + }; + + const resolutionConfig: ResolutionConfig = { + columnConfigs: [ + { + name: 'tickets.created_by', + type: 'string' as const, + source: 'created_by_lookup', + joinColumn: 'id', + resolutionColumns: ['name'], + }, + ], + tableSchemas: [CREATED_BY_LOOKUP_SCHEMA], + }; + + const columnProjections = [ + 'tickets.id', + 'tickets.owners', + 'tickets.tags', + 'tickets.created_by', + 'tickets.count', + ]; + + const sql = await cubeQueryToSQLWithResolution({ + query, + tableSchemas: [TICKETS_TABLE_SCHEMA], + resolutionConfig, + columnProjections, + }); + + console.log('SQL (scalar resolution only):', sql); + + // Execute the SQL to verify it works + const result = (await duckdbExec(sql)) as any[]; + console.log('Result:', result); + + // Should have 3 rows (no array unnesting, only scalar resolution) + expect(result.length).toBe(3); + + // Verify ordering is maintained (ORDER BY tickets.id ASC) + expect(result[0].ID).toBe(1); + expect(result[1].ID).toBe(2); + expect(result[2].ID).toBe(3); + + // Each row should have the expected properties + expect(result[0]).toHaveProperty('tickets__count'); + expect(result[0]).toHaveProperty('ID'); + expect(result[0]).toHaveProperty('Owners'); // Original array, not resolved + expect(result[0]).toHaveProperty('Tags'); // Original array, not resolved + expect(result[0]).toHaveProperty('Created By - Name'); // Resolved scalar field + + // Verify scalar resolution worked correctly + const ticket1 = result[0]; + expect(ticket1.ID).toBe(1); + expect(ticket1['Created By - Name']).toBe('User 1'); + expect(Array.isArray(ticket1['Owners'])).toBe(true); + expect(ticket1['Owners']).toEqual(['owner1', 'owner2']); + expect(Array.isArray(ticket1['Tags'])).toBe(true); + expect(ticket1['Tags']).toEqual(['tag1']); + + const ticket2 = result[1]; + expect(ticket2.ID).toBe(2); + expect(ticket2['Created By - Name']).toBe('User 2'); + expect(ticket2['Owners']).toEqual(['owner2', 'owner3']); + expect(ticket2['Tags']).toEqual(['tag2', 'tag3']); + + const ticket3 = result[2]; + expect(ticket3.ID).toBe(3); + expect(ticket3['Created By - Name']).toBe('User 3'); + expect(ticket3['Owners']).toEqual(['owner4']); + expect(ticket3['Tags']).toEqual(['tag1', 'tag4', 'tag3']); + }); + + it('Should return aggregated SQL even when no resolution is configured', async () => { + const query: Query = { + measures: ['tickets.count'], + dimensions: ['tickets.id', 'tickets.created_by'], + }; + + const resolutionConfig: ResolutionConfig = { + columnConfigs: [], + tableSchemas: [], + }; + + const sql = await cubeQueryToSQLWithResolution({ + query, + tableSchemas: [TICKETS_TABLE_SCHEMA], + resolutionConfig, + }); + + console.log('SQL without resolution:', sql); + + // Should not have resolution-specific features when no resolution is configured + expect(sql).toContain('__row_id'); + expect(sql).not.toContain('unnest'); + expect(sql).not.toContain('ARRAY_AGG'); + + // Execute the SQL to verify it works + const result = (await duckdbExec(sql)) as any[]; + console.log('Result:', result); + + // Should have 3 rows (original ticket count) + expect(result.length).toBe(3); + + // Each row should have basic properties (no resolution, so original column names) + expect(result[0]).toHaveProperty('tickets__count'); + expect(result[0]).toHaveProperty('ID'); + expect(result[0]).toHaveProperty('Created By'); + + // Verify data is correct (original scalar values, not resolved) + const ticket1 = result.find((r: any) => r.ID === 1); + expect(ticket1['Created By']).toBe('user1'); + + const ticket2 = result.find((r: any) => r.ID === 2); + expect(ticket2['Created By']).toBe('user2'); + + const ticket3 = result.find((r: any) => r.ID === 3); + expect(ticket3['Created By']).toBe('user3'); + }); + + it('Should handle resolution without ORDER BY clause', async () => { + const query: Query = { + measures: ['tickets.count'], + dimensions: [ + 'tickets.id', + 'tickets.owners', + 'tickets.tags', + 'tickets.created_by', + ], + // NOTE: No order clause specified + }; + + const resolutionConfig: ResolutionConfig = { + columnConfigs: [ + { + name: 'tickets.created_by', + type: 'string' as const, + source: 'created_by_lookup', + joinColumn: 'id', + resolutionColumns: ['name'], + }, + ], + tableSchemas: [CREATED_BY_LOOKUP_SCHEMA], + }; + + const columnProjections = [ + 'tickets.id', + 'tickets.owners', + 'tickets.tags', + 'tickets.created_by', + 'tickets.count', + ]; + + const sql = await cubeQueryToSQLWithResolution({ + query, + tableSchemas: [TICKETS_TABLE_SCHEMA], + resolutionConfig, + columnProjections, + }); + + console.log('SQL (no ORDER BY):', sql); + + // Should contain row_id even without ORDER BY (for consistency) + expect(sql).toContain('__row_id'); + // Should contain row_number() OVER () without ORDER BY inside + expect(sql).toContain('row_number() OVER ()'); + // Should still order by row_id at the end + expect(sql).toContain('order by __row_id'); + + // Execute the SQL to verify it works + const result = (await duckdbExec(sql)) as any[]; + console.log('Result (no ORDER BY):', result); + + // Should have 3 rows (no array unnesting, only scalar resolution) + expect(result.length).toBe(3); + + // Each row should have the expected properties + expect(result[0]).toHaveProperty('tickets__count'); + expect(result[0]).toHaveProperty('ID'); + expect(result[0]).toHaveProperty('Owners'); // Original array, not resolved + expect(result[0]).toHaveProperty('Tags'); // Original array, not resolved + expect(result[0]).toHaveProperty('Created By - Name'); // Resolved scalar field + + // Verify scalar resolution worked correctly + // Order might vary without ORDER BY, so we find by ID + const ticket1 = result.find((r: any) => r.ID === 1); + expect(ticket1['Created By - Name']).toBe('User 1'); + expect(Array.isArray(ticket1['Owners'])).toBe(true); + expect(ticket1['Owners']).toEqual(['owner1', 'owner2']); + + const ticket2 = result.find((r: any) => r.ID === 2); + expect(ticket2['Created By - Name']).toBe('User 2'); + expect(ticket2['Owners']).toEqual(['owner2', 'owner3']); + + const ticket3 = result.find((r: any) => r.ID === 3); + expect(ticket3['Created By - Name']).toBe('User 3'); + expect(ticket3['Owners']).toEqual(['owner4']); + }); +}); diff --git a/meerkat-node/src/__tests__/resolution.spec.ts b/meerkat-node/src/__tests__/resolution.spec.ts index 90bde754..c909afc3 100644 --- a/meerkat-node/src/__tests__/resolution.spec.ts +++ b/meerkat-node/src/__tests__/resolution.spec.ts @@ -105,6 +105,12 @@ export const BASE_TABLE_SCHEMA_WITH_ALIASES = { type: 'string', alias: 'Part ID 2', }, + { + name: 'work_id', + sql: 'base_table.work_id', + type: 'string', + alias: 'Work ID', + }, ], }; @@ -128,6 +134,33 @@ export const DIM_PART_SCHEMA_WITH_ALIASES = { ], }; +export const DIM_WORK_SCHEMA_WITH_ALIASES = { + name: 'dim_work', + sql: 'select id, display_id, title from system.dim_issue', + measures: [], + dimensions: [ + { + name: 'id', + sql: 'dim_work.id', + type: 'string', + alias: 'ID', + }, + { + name: 'display_id', + sql: 'dim_work.display_id', + type: 'string', + alias: 'Display ID', + }, + + { + name: 'title', + sql: 'dim_work.title', + type: 'string', + alias: 'Title', + }, + ], +}; + describe('Resolution Tests', () => { it('No Resolution Config', async () => { const query = { @@ -150,22 +183,16 @@ describe('Resolution Tests', () => { }); console.info(`SQL: `, sql); const expectedSQL = ` - SELECT - base_table__part_id_1, - base_table__random_column, - base_table__work_id, - base_table__part_id_2 - FROM - (SELECT - base_table.part_id_1 AS base_table__part_id_1, - base_table.random_column AS base_table__random_column, - base_table.work_id AS base_table__work_id, - base_table.part_id_2 AS base_table__part_id_2, - * + select * exclude(__row_id) from + (SELECT + MAX(__base_query."base_table__part_id_1") AS "base_table__part_id_1" , + MAX(__base_query."base_table__random_column") AS "base_table__random_column" , + MAX(__base_query."base_table__work_id") AS "base_table__work_id" , + MAX(__base_query."base_table__part_id_2") AS "base_table__part_id_2" , + "__row_id" FROM (SELECT __base_query."__row_id" AS "__row_id", * FROM - (select * from base_table) - AS base_table) - AS base_table + (SELECT "base_table__part_id_1", "base_table__random_column", "base_table__work_id", "base_table__part_id_2", "__row_id" FROM (SELECT __base_query."base_table__part_id_1" AS "base_table__part_id_1", __base_query."base_table__random_column" AS "base_table__random_column", __base_query."base_table__work_id" AS "base_table__work_id", __base_query."base_table__part_id_2" AS "base_table__part_id_2", __base_query."__row_id" AS "__row_id", * FROM (SELECT "base_table__part_id_1", "base_table__random_column", "base_table__work_id", "base_table__part_id_2", "__row_id" FROM (SELECT __base_query.base_table__part_id_1 AS "base_table__part_id_1", __base_query.base_table__random_column AS "base_table__random_column", __base_query.base_table__work_id AS "base_table__work_id", __base_query.base_table__part_id_2 AS "base_table__part_id_2", row_number() OVER () AS "__row_id", * FROM (SELECT base_table__part_id_1, base_table__random_column, base_table__work_id, base_table__part_id_2 FROM (SELECT base_table.part_id_1 AS base_table__part_id_1, base_table.random_column AS base_table__random_column, base_table.work_id AS base_table__work_id, base_table.part_id_2 AS base_table__part_id_2, * FROM (select * from base_table) AS base_table) AS base_table) AS __base_query) AS __base_query) AS __base_query) AS __base_query) AS __base_query) AS __base_query GROUP BY __row_id) + order by __row_id `; expect(sql.replace(/\s+/g, ' ').trim()).toBe( expectedSQL.replace(/\s+/g, ' ').trim() @@ -191,6 +218,7 @@ describe('Resolution Tests', () => { columnConfigs: [ { name: 'base_table.part_id_1', + type: 'string' as const, source: 'dim_part', joinColumn: 'id', resolutionColumns: ['display_id'], @@ -215,48 +243,51 @@ describe('Resolution Tests', () => { const sql = await cubeQueryToSQLWithResolution({ query, - tableSchemas: [BASE_TABLE_SCHEMA], + tableSchemas: [BASE_TABLE_SCHEMA_WITH_ALIASES], resolutionConfig: { columnConfigs: [ { name: 'base_table.part_id_1', + type: 'string' as const, source: 'dim_part', joinColumn: 'id', resolutionColumns: ['display_id'], }, { name: 'base_table.work_id', + type: 'string' as const, source: 'dim_work', joinColumn: 'id', resolutionColumns: ['display_id', 'title'], }, { name: 'base_table.part_id_2', + type: 'string' as const, source: 'dim_part', joinColumn: 'id', resolutionColumns: ['display_id'], }, ], - tableSchemas: [DIM_PART_SCHEMA, DIM_WORK_SCHEMA], + tableSchemas: [ + DIM_PART_SCHEMA_WITH_ALIASES, + DIM_WORK_SCHEMA_WITH_ALIASES, + ], }, }); console.info(`SQL: `, sql); const expectedSQL = ` - SELECT - "base_table__part_id_1 - display_id", - "base_table__random_column", - "base_table__work_id - display_id", - "base_table__work_id - title", - "base_table__part_id_2 - display_id" - FROM - (SELECT __base_query.base_table__random_column AS "base_table__random_column", * FROM (SELECT base_table__part_id_1, base_table__random_column, base_table__work_id, base_table__part_id_2 FROM (SELECT base_table.part_id_1 AS base_table__part_id_1, base_table.random_column AS base_table__random_column, base_table.work_id AS base_table__work_id, base_table.part_id_2 AS base_table__part_id_2, * FROM (select * from base_table) AS base_table) AS base_table) AS __base_query - LEFT JOIN (SELECT base_table__part_id_1.display_id AS "base_table__part_id_1 - display_id", * FROM (select id, display_id from system.dim_feature UNION ALL select id, display_id from system.dim_product) AS base_table__part_id_1) AS base_table__part_id_1 - ON __base_query.base_table__part_id_1 = base_table__part_id_1.id - LEFT JOIN (SELECT base_table__work_id.display_id AS "base_table__work_id - display_id", base_table__work_id.title AS "base_table__work_id - title", * FROM (select id, display_id, title from system.dim_issue) AS base_table__work_id) AS base_table__work_id - ON __base_query.base_table__work_id = base_table__work_id.id - LEFT JOIN (SELECT base_table__part_id_2.display_id AS "base_table__part_id_2 - display_id", * FROM (select id, display_id from system.dim_feature UNION ALL select id, display_id from system.dim_product) AS base_table__part_id_2) AS base_table__part_id_2 - ON __base_query.base_table__part_id_2 = base_table__part_id_2.id) - AS MEERKAT_GENERATED_TABLE + select * exclude(__row_id) from + (SELECT + MAX(__base_query."Part ID 1 - Display ID") AS "Part ID 1 - Display ID" , + MAX(__base_query."Random Column") AS "Random Column" , + MAX(__base_query."Work ID - Display ID") AS "Work ID - Display ID" , + MAX(__base_query."Work ID - Title") AS "Work ID - Title" , + MAX(__base_query."Part ID 2 - Display ID") AS "Part ID 2 - Display ID" , + "__row_id" + FROM (SELECT __base_query."__row_id" AS "__row_id", * FROM (SELECT "Part ID 1 - Display ID", "Random Column", "Work ID - Display ID", "Work ID - Title", "Part ID 2 - Display ID", "__row_id" FROM (SELECT __base_query."Random Column" AS "Random Column", __base_query."__row_id" AS "__row_id", * FROM (SELECT "Part ID 1", "Random Column", "Work ID", "Part ID 2", "__row_id" FROM (SELECT __base_query."Part ID 1" AS "Part ID 1", __base_query."Random Column" AS "Random Column", __base_query."Work ID" AS "Work ID", __base_query."Part ID 2" AS "Part ID 2", row_number() OVER () AS "__row_id", * FROM (SELECT "Part ID 1", "Random Column", "Work ID", "Part ID 2" FROM (SELECT base_table.part_id_1 AS "Part ID 1", base_table.random_column AS "Random Column", base_table.work_id AS "Work ID", base_table.part_id_2 AS "Part ID 2", * FROM (select * from base_table) AS base_table) AS base_table) AS __base_query) AS __base_query) AS __base_query + LEFT JOIN (SELECT base_table__part_id_1.display_id AS "Part ID 1 - Display ID", * FROM (select id, display_id from system.dim_feature UNION ALL select id, display_id from system.dim_product) AS base_table__part_id_1) AS base_table__part_id_1 ON __base_query."Part ID 1" = base_table__part_id_1.id LEFT JOIN (SELECT base_table__work_id.display_id AS "Work ID - Display ID", base_table__work_id.title AS "Work ID - Title", * FROM (select id, display_id, title from system.dim_issue) AS base_table__work_id) AS base_table__work_id ON __base_query."Work ID" = base_table__work_id.id + LEFT JOIN (SELECT base_table__part_id_2.display_id AS "Part ID 2 - Display ID", * FROM (select id, display_id from system.dim_feature UNION ALL select id, display_id from system.dim_product) AS base_table__part_id_2) AS base_table__part_id_2 ON __base_query."Part ID 2" = base_table__part_id_2.id) AS MEERKAT_GENERATED_TABLE) AS __base_query) AS __base_query GROUP BY __row_id) + order by __row_id `; expect(sql.replace(/\s+/g, ' ').trim()).toBe( expectedSQL.replace(/\s+/g, ' ').trim() @@ -270,29 +301,23 @@ describe('Resolution Tests', () => { }; const sql = await cubeQueryToSQLWithResolution({ query, - tableSchemas: [BASE_TABLE_SCHEMA], + tableSchemas: [BASE_TABLE_SCHEMA_WITH_ALIASES], resolutionConfig: { columnConfigs: [ { name: 'base_table.part_id_1', + type: 'string' as const, source: 'dim_part', joinColumn: 'id', resolutionColumns: ['display_id'], }, ], - tableSchemas: [DIM_PART_SCHEMA], + tableSchemas: [DIM_PART_SCHEMA_WITH_ALIASES], }, }); console.info(`SQL: `, sql); const expectedSQL = ` - SELECT - "base_table__count", - "base_table__part_id_1 - display_id" - FROM - (SELECT __base_query.base_table__count AS "base_table__count", * FROM (SELECT count(*) AS base_table__count , base_table__part_id_1 FROM (SELECT base_table.part_id_1 AS base_table__part_id_1, * FROM (select * from base_table) AS base_table) AS base_table GROUP BY base_table__part_id_1) AS __base_query - LEFT JOIN (SELECT base_table__part_id_1.display_id AS "base_table__part_id_1 - display_id", * FROM (select id, display_id from system.dim_feature UNION ALL select id, display_id from system.dim_product) AS base_table__part_id_1) AS base_table__part_id_1 - ON __base_query.base_table__part_id_1 = base_table__part_id_1.id) - AS MEERKAT_GENERATED_TABLE + select * exclude(__row_id) from (SELECT MAX(__base_query."Part ID 1 - Display ID") AS "Part ID 1 - Display ID" , MAX(__base_query."Count") AS "Count" , "__row_id" FROM (SELECT __base_query."__row_id" AS "__row_id", * FROM (SELECT "Part ID 1 - Display ID", "Count", "__row_id" FROM (SELECT __base_query."Count" AS "Count", __base_query."__row_id" AS "__row_id", * FROM (SELECT "Part ID 1", "Count", "__row_id" FROM (SELECT __base_query."Part ID 1" AS "Part ID 1", __base_query."Count" AS "Count", row_number() OVER () AS "__row_id", * FROM (SELECT count(*) AS "Count" , "Part ID 1" FROM (SELECT base_table.part_id_1 AS "Part ID 1", * FROM (select * from base_table) AS base_table) AS base_table GROUP BY "Part ID 1") AS __base_query) AS __base_query) AS __base_query LEFT JOIN (SELECT base_table__part_id_1.display_id AS "Part ID 1 - Display ID", * FROM (select id, display_id from system.dim_feature UNION ALL select id, display_id from system.dim_product) AS base_table__part_id_1) AS base_table__part_id_1 ON __base_query."Part ID 1" = base_table__part_id_1.id) AS MEERKAT_GENERATED_TABLE) AS __base_query) AS __base_query GROUP BY __row_id) order by __row_id `; expect(sql.replace(/\s+/g, ' ').trim()).toBe( expectedSQL.replace(/\s+/g, ' ').trim() @@ -316,12 +341,14 @@ describe('Resolution Tests', () => { columnConfigs: [ { name: 'base_table.part_id_1', + type: 'string' as const, source: 'dim_part', joinColumn: 'id', resolutionColumns: ['display_id'], }, { name: 'base_table.part_id_2', + type: 'string' as const, source: 'dim_part', joinColumn: 'id', resolutionColumns: ['display_id'], @@ -332,17 +359,7 @@ describe('Resolution Tests', () => { }); console.info(`SQL: `, sql); const expectedSQL = ` - SELECT - "Part ID 1 - Display ID", - "Random Column", - "Part ID 2 - Display ID" - FROM - (SELECT __base_query."Random Column" AS "Random Column", * FROM (SELECT "Part ID 1", "Random Column", "Part ID 2" FROM (SELECT base_table.part_id_1 AS "Part ID 1", base_table.random_column AS "Random Column", base_table.part_id_2 AS "Part ID 2", * FROM (select * from base_table) AS base_table) AS base_table) AS __base_query - LEFT JOIN (SELECT base_table__part_id_1.display_id AS "Part ID 1 - Display ID", * FROM (select id, display_id from system.dim_feature UNION ALL select id, display_id from system.dim_product) AS base_table__part_id_1) AS base_table__part_id_1 - ON __base_query."Part ID 1" = base_table__part_id_1.id - LEFT JOIN (SELECT base_table__part_id_2.display_id AS "Part ID 2 - Display ID", * FROM (select id, display_id from system.dim_feature UNION ALL select id, display_id from system.dim_product) AS base_table__part_id_2) AS base_table__part_id_2 - ON __base_query."Part ID 2" = base_table__part_id_2.id) - AS MEERKAT_GENERATED_TABLE + select * exclude(__row_id) from (SELECT MAX(__base_query."Part ID 1 - Display ID") AS "Part ID 1 - Display ID" , MAX(__base_query."Random Column") AS "Random Column" , MAX(__base_query."Part ID 2 - Display ID") AS "Part ID 2 - Display ID" , "__row_id" FROM (SELECT __base_query."__row_id" AS "__row_id", * FROM (SELECT "Part ID 1 - Display ID", "Random Column", "Part ID 2 - Display ID", "__row_id" FROM (SELECT __base_query."Random Column" AS "Random Column", __base_query."__row_id" AS "__row_id", * FROM (SELECT "Part ID 1", "Random Column", "Part ID 2", "__row_id" FROM (SELECT __base_query."Part ID 1" AS "Part ID 1", __base_query."Random Column" AS "Random Column", __base_query."Part ID 2" AS "Part ID 2", row_number() OVER () AS "__row_id", * FROM (SELECT "Part ID 1", "Random Column", "Part ID 2" FROM (SELECT base_table.part_id_1 AS "Part ID 1", base_table.random_column AS "Random Column", base_table.part_id_2 AS "Part ID 2", * FROM (select * from base_table) AS base_table) AS base_table) AS __base_query) AS __base_query) AS __base_query LEFT JOIN (SELECT base_table__part_id_1.display_id AS "Part ID 1 - Display ID", * FROM (select id, display_id from system.dim_feature UNION ALL select id, display_id from system.dim_product) AS base_table__part_id_1) AS base_table__part_id_1 ON __base_query."Part ID 1" = base_table__part_id_1.id LEFT JOIN (SELECT base_table__part_id_2.display_id AS "Part ID 2 - Display ID", * FROM (select id, display_id from system.dim_feature UNION ALL select id, display_id from system.dim_product) AS base_table__part_id_2) AS base_table__part_id_2 ON __base_query."Part ID 2" = base_table__part_id_2.id) AS MEERKAT_GENERATED_TABLE) AS __base_query) AS __base_query GROUP BY __row_id) order by __row_id `; expect(sql.replace(/\s+/g, ' ').trim()).toBe( expectedSQL.replace(/\s+/g, ' ').trim() @@ -362,30 +379,24 @@ describe('Resolution Tests', () => { const sql = await cubeQueryToSQLWithResolution({ query, - tableSchemas: [BASE_TABLE_SCHEMA], + tableSchemas: [BASE_TABLE_SCHEMA_WITH_ALIASES], resolutionConfig: { columnConfigs: [ { name: 'base_table.part_id_1', + type: 'string' as const, source: 'dim_part', joinColumn: 'id', resolutionColumns: ['display_id'], }, ], - tableSchemas: [DIM_PART_SCHEMA, DIM_WORK_SCHEMA], + tableSchemas: [DIM_PART_SCHEMA_WITH_ALIASES], }, columnProjections: ['base_table.random_column', 'base_table.part_id_1'], }); console.info(`SQL: `, sql); const expectedSQL = ` - SELECT - "base_table__random_column", - "base_table__part_id_1 - display_id" - FROM - (SELECT __base_query.base_table__random_column AS "base_table__random_column", * FROM (SELECT base_table__part_id_1, base_table__random_column, base_table__work_id, base_table__part_id_2 FROM (SELECT base_table.part_id_1 AS base_table__part_id_1, base_table.random_column AS base_table__random_column, base_table.work_id AS base_table__work_id, base_table.part_id_2 AS base_table__part_id_2, * FROM (select * from base_table) AS base_table) AS base_table) AS __base_query - LEFT JOIN (SELECT base_table__part_id_1.display_id AS "base_table__part_id_1 - display_id", * FROM (select id, display_id from system.dim_feature UNION ALL select id, display_id from system.dim_product) AS base_table__part_id_1) AS base_table__part_id_1 - ON __base_query.base_table__part_id_1 = base_table__part_id_1.id) - AS MEERKAT_GENERATED_TABLE + select * exclude(__row_id) from (SELECT MAX(__base_query."Random Column") AS "Random Column" , MAX(__base_query."Part ID 1 - Display ID") AS "Part ID 1 - Display ID" , "__row_id" FROM (SELECT __base_query."__row_id" AS "__row_id", * FROM (SELECT "Random Column", "Part ID 1 - Display ID", "__row_id" FROM (SELECT __base_query."Random Column" AS "Random Column", __base_query."__row_id" AS "__row_id", * FROM (SELECT "Random Column", "Part ID 1", "__row_id" FROM (SELECT __base_query."Random Column" AS "Random Column", __base_query."Part ID 1" AS "Part ID 1", row_number() OVER () AS "__row_id", * FROM (SELECT "Part ID 1", "Random Column", "Work ID", "Part ID 2" FROM (SELECT base_table.part_id_1 AS "Part ID 1", base_table.random_column AS "Random Column", base_table.work_id AS "Work ID", base_table.part_id_2 AS "Part ID 2", * FROM (select * from base_table) AS base_table) AS base_table) AS __base_query) AS __base_query) AS __base_query LEFT JOIN (SELECT base_table__part_id_1.display_id AS "Part ID 1 - Display ID", * FROM (select id, display_id from system.dim_feature UNION ALL select id, display_id from system.dim_product) AS base_table__part_id_1) AS base_table__part_id_1 ON __base_query."Part ID 1" = base_table__part_id_1.id) AS MEERKAT_GENERATED_TABLE) AS __base_query) AS __base_query GROUP BY __row_id) order by __row_id `; expect(sql.replace(/\s+/g, ' ').trim()).toBe( expectedSQL.replace(/\s+/g, ' ').trim() diff --git a/meerkat-node/src/cube-to-sql-with-resolution/cube-to-sql-with-resolution.ts b/meerkat-node/src/cube-to-sql-with-resolution/cube-to-sql-with-resolution.ts index 8ef3de8e..44145be6 100644 --- a/meerkat-node/src/cube-to-sql-with-resolution/cube-to-sql-with-resolution.ts +++ b/meerkat-node/src/cube-to-sql-with-resolution/cube-to-sql-with-resolution.ts @@ -1,17 +1,20 @@ import { + BASE_DATA_SOURCE_NAME, ContextParams, + getAggregatedSql as coreGetAggregatedSql, + getResolvedTableSchema as coreGetResolvedTableSchema, + getUnnestTableSchema as coreGetUnnestTableSchema, createBaseTableSchema, - generateResolutionJoinPaths, - generateResolutionSchemas, - generateResolvedDimensions, + Dimension, + generateRowNumberSql, + memberKeyToSafeKey, Query, ResolutionConfig, + ROW_ID_DIMENSION_NAME, + shouldSkipResolution, TableSchema, } from '@devrev/meerkat-core'; -import { - cubeQueryToSQL, - CubeQueryToSQLParams, -} from '../cube-to-sql/cube-to-sql'; +import { cubeQueryToSQL } from '../cube-to-sql/cube-to-sql'; export interface CubeQueryToSQLWithResolutionParams { query: Query; @@ -34,38 +37,71 @@ export const cubeQueryToSQLWithResolution = async ({ contextParams, }); - if (resolutionConfig.columnConfigs.length === 0) { - // If no resolution is needed, return the base SQL. + // Check if resolution should be skipped + if (shouldSkipResolution(resolutionConfig, query, columnProjections)) { return baseSql; } - // Create a table schema for the base query. - const baseTable: TableSchema = createBaseTableSchema( + if (!columnProjections) { + columnProjections = [...(query.dimensions || []), ...query.measures]; + } + // This is to ensure that, only the column projection columns + // are being resolved and other definitions are ignored. + resolutionConfig.columnConfigs = resolutionConfig.columnConfigs.filter( + (config) => { + return columnProjections?.includes(config.name); + } + ); + + const baseSchema: TableSchema = createBaseTableSchema( baseSql, tableSchemas, resolutionConfig, - query.measures, - query.dimensions + [], + columnProjections ); + const rowIdDimension: Dimension = { + name: ROW_ID_DIMENSION_NAME, + sql: generateRowNumberSql( + query, + baseSchema.dimensions, + BASE_DATA_SOURCE_NAME + ), + type: 'number', + alias: ROW_ID_DIMENSION_NAME, + }; + baseSchema.dimensions.push(rowIdDimension); + columnProjections.push(ROW_ID_DIMENSION_NAME); - const resolutionSchemas: TableSchema[] = generateResolutionSchemas( + // Doing this because we need to use the original name of the column in the base table schema. + resolutionConfig.columnConfigs.forEach((config) => { + config.name = memberKeyToSafeKey(config.name); + }); + + // Generate SQL with row_id and unnested arrays + const unnestTableSchema = await coreGetUnnestTableSchema({ + baseTableSchema: baseSchema, resolutionConfig, - tableSchemas - ); + contextParams, + cubeQueryToSQL: async (params) => cubeQueryToSQL(params), + }); - const resolveParams: CubeQueryToSQLParams = { - query: { - measures: [], - dimensions: generateResolvedDimensions( - query, - resolutionConfig, - columnProjections - ), - joinPaths: generateResolutionJoinPaths(resolutionConfig, tableSchemas), - }, - tableSchemas: [baseTable, ...resolutionSchemas], - }; - const sql = await cubeQueryToSQL(resolveParams); + // Apply resolution (join with lookup tables) + const resolvedTableSchema = await coreGetResolvedTableSchema({ + baseTableSchema: unnestTableSchema, + resolutionConfig, + contextParams, + columnProjections, + cubeQueryToSQL: async (params) => cubeQueryToSQL(params), + }); + + // Re-aggregate to reverse the unnest + const aggregatedSql = await coreGetAggregatedSql({ + resolvedTableSchema, + resolutionConfig, + contextParams, + cubeQueryToSQL: async (params) => cubeQueryToSQL(params), + }); - return sql; + return aggregatedSql; };