From 27b3d336711af6b36c38570c1f1c9956101feca2 Mon Sep 17 00:00:00 2001 From: Prashant Pandey Date: Wed, 22 Oct 2025 13:08:35 +0530 Subject: [PATCH 1/2] Support for unnested for json arrays in flat collections --- .../documentstore/DocStoreQueryV1Test.java | 37 +++++++++++++++++++ .../PostgresFilterTypeExpressionVisitor.java | 31 +++++++++------- .../PostgresFromTypeExpressionVisitor.java | 18 ++++++--- 3 files changed, 68 insertions(+), 18 deletions(-) diff --git a/document-store/src/integrationTest/java/org/hypertrace/core/documentstore/DocStoreQueryV1Test.java b/document-store/src/integrationTest/java/org/hypertrace/core/documentstore/DocStoreQueryV1Test.java index cb192c4b..bb3c992a 100644 --- a/document-store/src/integrationTest/java/org/hypertrace/core/documentstore/DocStoreQueryV1Test.java +++ b/document-store/src/integrationTest/java/org/hypertrace/core/documentstore/DocStoreQueryV1Test.java @@ -3981,6 +3981,43 @@ void testFlatVsNestedCollectionNestedFieldSelections(String dataStoreName) throw assertDocsAndSizeEqual( dataStoreName, flatBrandNoAliasIterator, "query/no_alias_response.json", 8); } + + /** + * Tests UNNEST operation on JSONB array fields in flat collections. This validates that + * jsonb_array_elements() is used for JSONB arrays (props.colors) instead of unnest() which is + * only for native arrays (tags). + */ + @ParameterizedTest + @ArgumentsSource(PostgresProvider.class) + void testFlatCollectionUnnestJsonbArray(String dataStoreName) throws IOException { + Datastore datastore = datastoreMap.get(dataStoreName); + Collection flatCollection = + datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); + + // Test UNNEST on JSONB array field: props.colors + // Expected: Should unnest colors and count distinct items with colors + // Data: id=1 has ["Blue", "Green"], id=3 has ["Black"], id=5 has ["Orange", "Blue"] + // Total: 5 color entries from 3 items + Query unnestJsonbQuery = + Query.builder() + .addSelection(IdentifierExpression.of("item")) + .addSelection(JsonIdentifierExpression.of("props", "colors")) + .addFromClause( + UnnestExpression.of(JsonIdentifierExpression.of("props", "colors"), false)) + .build(); + + Iterator resultIterator = flatCollection.aggregate(unnestJsonbQuery); + + long count = 0; + while (resultIterator.hasNext()) { + resultIterator.next(); + count++; + } + + // Expecting 5 results: 2 from Soap (Blue, Green), 1 from Shampoo (Black), + // 2 from Lifebuoy (Orange, Blue) + assertEquals(5, count, "Should find 5 color entries after unnesting JSONB arrays"); + } } @Nested diff --git a/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/vistors/PostgresFilterTypeExpressionVisitor.java b/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/vistors/PostgresFilterTypeExpressionVisitor.java index a47fa262..3704a33e 100644 --- a/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/vistors/PostgresFilterTypeExpressionVisitor.java +++ b/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/vistors/PostgresFilterTypeExpressionVisitor.java @@ -18,6 +18,7 @@ import org.hypertrace.core.documentstore.expression.impl.ArrayRelationalFilterExpression; import org.hypertrace.core.documentstore.expression.impl.ConstantExpression; import org.hypertrace.core.documentstore.expression.impl.DocumentArrayFilterExpression; +import org.hypertrace.core.documentstore.expression.impl.JsonIdentifierExpression; import org.hypertrace.core.documentstore.expression.impl.KeyExpression; import org.hypertrace.core.documentstore.expression.impl.LogicalExpression; import org.hypertrace.core.documentstore.expression.impl.RelationalExpression; @@ -169,6 +170,8 @@ private String getFilterStringForAnyOperator(final ArrayRelationalFilterExpressi boolean isFlatCollection = postgresQueryParser.getPgColTransformer().getDocumentType() == DocumentType.FLAT; + boolean isJsonbArray = expression.getArraySource() instanceof JsonIdentifierExpression; + // Extract the field name final String identifierName = expression @@ -176,15 +179,15 @@ private String getFilterStringForAnyOperator(final ArrayRelationalFilterExpressi .accept(new PostgresIdentifierExpressionVisitor(postgresQueryParser)); final String parsedLhs; - if (isFlatCollection) { - // For flat collections, assume all arrays are native PostgreSQL arrays + if (isFlatCollection && !isJsonbArray) { + // For flat collections with native arrays, use direct column reference parsedLhs = postgresQueryParser.transformField(identifierName).getPgColumn(); } else { - // For nested collections, use JSONB path accessor + // For nested collections OR JSONB arrays in flat collections, use JSONB path accessor // Convert 'elements' to planets->'elements' where planets could be an alias for an upper // level array filter // For the first time (if 'elements' was not under any nested array, say a top-level field), - // use the field identifier visitor to make it document->'elements' + // use the field identifier visitor to make it document->'elements' or props->'colors' final PostgresIdentifierExpressionVisitor identifierVisitor = new PostgresIdentifierExpressionVisitor(postgresQueryParser); final PostgresSelectTypeExpressionVisitor arrayPathVisitor = @@ -206,18 +209,18 @@ private String getFilterStringForAnyOperator(final ArrayRelationalFilterExpressi .getFilter() .accept(new PostgresFilterTypeExpressionVisitor(postgresQueryParser, visitorProvider)); - if (isFlatCollection) { + if (isFlatCollection && !isJsonbArray) { // todo: For array filters, UNNEST is not the most optimal way as it won't use the index. // Perhaps, we should use ANY or @> ARRAY operator - // For flat collections, assume all arrays are native and use unnest() + // For flat collections with native arrays (e.g., tags), use unnest() // Infer array type from filter to properly cast empty array String arrayTypeCast = inferArrayTypeCastFromFilter(expression.getFilter()); return String.format( "EXISTS (SELECT 1 FROM unnest(COALESCE(%s, ARRAY[]%s)) AS \"%s\" WHERE %s)", parsedLhs, arrayTypeCast, alias, parsedFilter); } else { - // For nested collections with JSONB arrays, use jsonb_array_elements() + // For nested collections OR JSONB arrays in flat collections, use jsonb_array_elements() return String.format( "EXISTS (SELECT 1 FROM jsonb_array_elements(COALESCE(%s, '[]'::jsonb)) AS \"%s\" WHERE %s)", parsedLhs, alias, parsedFilter); @@ -284,6 +287,8 @@ private String getFilterStringForAnyOperator(final DocumentArrayFilterExpression boolean isFlatCollection = postgresQueryParser.getPgColTransformer().getDocumentType() == DocumentType.FLAT; + boolean isJsonbArray = expression.getArraySource() instanceof JsonIdentifierExpression; + // Extract the field name final String identifierName = expression @@ -291,11 +296,11 @@ private String getFilterStringForAnyOperator(final DocumentArrayFilterExpression .accept(new PostgresIdentifierExpressionVisitor(postgresQueryParser)); final String parsedLhs; - if (isFlatCollection) { - // For flat collections, assume all arrays are native PostgreSQL arrays - // Use direct column reference with double quotes + if (isFlatCollection && !isJsonbArray) { + // For flat collections with native arrays, use direct column reference with double quotes parsedLhs = postgresQueryParser.transformField(identifierName).getPgColumn(); } else { + // For nested collections OR JSONB arrays in flat collections, use JSONB path accessor final PostgresIdentifierExpressionVisitor identifierVisitor = new PostgresIdentifierExpressionVisitor(postgresQueryParser); final PostgresSelectTypeExpressionVisitor arrayPathVisitor = @@ -316,8 +321,8 @@ private String getFilterStringForAnyOperator(final DocumentArrayFilterExpression .getFilter() .accept(new PostgresFilterTypeExpressionVisitor(postgresQueryParser, wrapper)); - if (isFlatCollection) { - // For flat collections, assume all arrays are native and use unnest() + if (isFlatCollection && !isJsonbArray) { + // For flat collections with native arrays, use unnest() // Note: DocumentArrayFilterExpression typically works with JSONB arrays containing objects // For simplicity, we default to text[] type cast, though this may need refinement String arrayTypeCast = "::text[]"; @@ -325,7 +330,7 @@ private String getFilterStringForAnyOperator(final DocumentArrayFilterExpression "EXISTS (SELECT 1 FROM unnest(COALESCE(%s, ARRAY[]%s)) AS \"%s\" WHERE %s)", parsedLhs, arrayTypeCast, alias, parsedFilter); } else { - // For nested collections with JSONB arrays, use jsonb_array_elements() + // For nested collections OR JSONB arrays in flat collections, use jsonb_array_elements() return String.format( "EXISTS (SELECT 1 FROM jsonb_array_elements(COALESCE(%s, '[]'::jsonb)) AS \"%s\" WHERE %s)", parsedLhs, alias, parsedFilter); diff --git a/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/vistors/PostgresFromTypeExpressionVisitor.java b/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/vistors/PostgresFromTypeExpressionVisitor.java index 7f815e2e..d1045c5c 100644 --- a/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/vistors/PostgresFromTypeExpressionVisitor.java +++ b/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/vistors/PostgresFromTypeExpressionVisitor.java @@ -4,6 +4,7 @@ import java.util.stream.Collectors; import lombok.Getter; import org.hypertrace.core.documentstore.DocumentType; +import org.hypertrace.core.documentstore.expression.impl.JsonIdentifierExpression; import org.hypertrace.core.documentstore.expression.impl.SubQueryJoinExpression; import org.hypertrace.core.documentstore.expression.impl.UnnestExpression; import org.hypertrace.core.documentstore.parser.FromTypeExpressionVisitor; @@ -47,11 +48,14 @@ public String visit(UnnestExpression unnestExpression) { boolean isFlatCollection = postgresQueryParser.getPgColTransformer().getDocumentType() == DocumentType.FLAT; + boolean isJsonbArray = + unnestExpression.getIdentifierExpression() instanceof JsonIdentifierExpression; + String transformedFieldName; String unnestFunction; - if (isFlatCollection) { - // For flat collections, assume all unnested fields are native PostgreSQL arrays + if (isFlatCollection && !isJsonbArray) { + // For flat collections with native arrays (e.g., tags), use unnest() // Use the transformer to get the proper column name (handles quotes and naming) transformedFieldName = postgresQueryParser.transformField(orgFieldName).getPgColumn(); // Use native unnest() for PostgreSQL array columns @@ -60,7 +64,7 @@ public String visit(UnnestExpression unnestExpression) { // e.g., unnest("tags") p1(tags_unnested) instead of p1(tags) pgColumnName = pgColumnName + "_unnested"; } else { - // For nested collections, use JSONB path accessor + // For nested collections OR JSONB arrays in flat collections, use jsonb_array_elements() transformedFieldName = unnestExpression .getIdentifierExpression() @@ -78,8 +82,12 @@ public String visit(UnnestExpression unnestExpression) { String tableAlias = "t" + preIndex; String unwindExpr = String.format(unnestFunction, transformedFieldName); + // we'll quote the col name to prevent folding to lower case for top-level array fields String unwindExprAlias = - String.format(UNWIND_EXP_ALIAS_FMT, nextIndex, getColName(isFlatCollection, pgColumnName)); + String.format( + UNWIND_EXP_ALIAS_FMT, + nextIndex, + shouldQuoteColName(isFlatCollection && !isJsonbArray, pgColumnName)); String fmt = unnestExpression.isPreserveNullAndEmptyArrays() @@ -144,7 +152,7 @@ private static String prepareTable0Query(PostgresQueryParser postgresQueryParser /* Returns the column name with double quotes if the collection is flat to prevent folding to lower-case by PG */ - private String getColName(boolean isFlatCollection, String pgColumnName) { + private String shouldQuoteColName(boolean isFlatCollection, String pgColumnName) { return isFlatCollection ? PostgresUtils.wrapFieldNamesWithDoubleQuotes(pgColumnName) : pgColumnName; From ed0aa0675b9b97e69a1b65bccad85647f29b59ab Mon Sep 17 00:00:00 2001 From: Prashant Pandey Date: Wed, 22 Oct 2025 15:04:44 +0530 Subject: [PATCH 2/2] Added testFlatCollectionArrayAnyOnJsonbArray --- .../documentstore/DocStoreQueryV1Test.java | 28 +++++++++++++++++++ .../PostgresFromTypeExpressionVisitor.java | 8 ++---- 2 files changed, 31 insertions(+), 5 deletions(-) diff --git a/document-store/src/integrationTest/java/org/hypertrace/core/documentstore/DocStoreQueryV1Test.java b/document-store/src/integrationTest/java/org/hypertrace/core/documentstore/DocStoreQueryV1Test.java index bb3c992a..5b0ba342 100644 --- a/document-store/src/integrationTest/java/org/hypertrace/core/documentstore/DocStoreQueryV1Test.java +++ b/document-store/src/integrationTest/java/org/hypertrace/core/documentstore/DocStoreQueryV1Test.java @@ -4018,6 +4018,34 @@ void testFlatCollectionUnnestJsonbArray(String dataStoreName) throws IOException // 2 from Lifebuoy (Orange, Blue) assertEquals(5, count, "Should find 5 color entries after unnesting JSONB arrays"); } + + @ParameterizedTest + @ArgumentsSource(PostgresProvider.class) + void testFlatCollectionArrayAnyOnJsonbArray(String dataStoreName) { + Datastore datastore = datastoreMap.get(dataStoreName); + Collection flatCollection = + datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); + + // Test ArrayRelationalFilterExpression.ANY on JSONB array (props.colors) + // This uses jsonb_array_elements() internally + Query jsonbArrayQuery = + Query.builder() + .addSelection(IdentifierExpression.of("item")) + .setFilter( + ArrayRelationalFilterExpression.builder() + .operator(ArrayOperator.ANY) + .filter( + RelationalExpression.of( + JsonIdentifierExpression.of("props", "colors"), + EQ, + ConstantExpression.of("Blue"))) + .build()) + .build(); + + long count = flatCollection.count(jsonbArrayQuery); + // ids 1 and 5 have "Blue" in their colors array + assertEquals(2, count, "Should find 2 items with 'Blue' color (ids 1, 5)"); + } } @Nested diff --git a/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/vistors/PostgresFromTypeExpressionVisitor.java b/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/vistors/PostgresFromTypeExpressionVisitor.java index d1045c5c..0fe7ded4 100644 --- a/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/vistors/PostgresFromTypeExpressionVisitor.java +++ b/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/vistors/PostgresFromTypeExpressionVisitor.java @@ -87,7 +87,7 @@ public String visit(UnnestExpression unnestExpression) { String.format( UNWIND_EXP_ALIAS_FMT, nextIndex, - shouldQuoteColName(isFlatCollection && !isJsonbArray, pgColumnName)); + getColName(isFlatCollection && !isJsonbArray, pgColumnName)); String fmt = unnestExpression.isPreserveNullAndEmptyArrays() @@ -152,9 +152,7 @@ private static String prepareTable0Query(PostgresQueryParser postgresQueryParser /* Returns the column name with double quotes if the collection is flat to prevent folding to lower-case by PG */ - private String shouldQuoteColName(boolean isFlatCollection, String pgColumnName) { - return isFlatCollection - ? PostgresUtils.wrapFieldNamesWithDoubleQuotes(pgColumnName) - : pgColumnName; + private String getColName(boolean shouldQuote, String pgColumnName) { + return shouldQuote ? PostgresUtils.wrapFieldNamesWithDoubleQuotes(pgColumnName) : pgColumnName; } }