From c3629fbc56b9730a2a545bb429ff79186bc43e28 Mon Sep 17 00:00:00 2001 From: Prashant Pandey Date: Tue, 25 Nov 2025 17:39:32 +0530 Subject: [PATCH 1/9] Postgres Query Parser Bugfixes --- .../documentstore/DocStoreQueryV1Test.java | 239 ++++++++++++++++++ .../query/pg_flat_collection_insert.json | 8 +- .../PostgresExistsRelationalFilterParser.java | 56 ++-- ...stgresNotExistsRelationalFilterParser.java | 65 +++-- .../PostgresFromTypeExpressionVisitor.java | 9 +- .../query/v1/PostgresQueryParserTest.java | 48 ++++ 6 files changed, 367 insertions(+), 58 deletions(-) diff --git a/document-store/src/integrationTest/java/org/hypertrace/core/documentstore/DocStoreQueryV1Test.java b/document-store/src/integrationTest/java/org/hypertrace/core/documentstore/DocStoreQueryV1Test.java index 0c3991c5..e98acbd9 100644 --- a/document-store/src/integrationTest/java/org/hypertrace/core/documentstore/DocStoreQueryV1Test.java +++ b/document-store/src/integrationTest/java/org/hypertrace/core/documentstore/DocStoreQueryV1Test.java @@ -90,6 +90,7 @@ import org.hypertrace.core.documentstore.expression.impl.AliasedIdentifierExpression; import org.hypertrace.core.documentstore.expression.impl.ArrayIdentifierExpression; import org.hypertrace.core.documentstore.expression.impl.ArrayRelationalFilterExpression; +import org.hypertrace.core.documentstore.expression.impl.ArrayType; import org.hypertrace.core.documentstore.expression.impl.ConstantExpression; import org.hypertrace.core.documentstore.expression.impl.FunctionExpression; import org.hypertrace.core.documentstore.expression.impl.IdentifierExpression; @@ -122,6 +123,7 @@ import org.hypertrace.core.documentstore.query.SortingSpec; import org.hypertrace.core.documentstore.utils.Utils; import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Nested; import org.junit.jupiter.api.extension.ExtensionContext; @@ -4583,6 +4585,40 @@ void testJsonbNumericComparisonOperators(String dataStoreName) { assertEquals(2, lteCount, "LTE: Should find 2 documents with pincode <= 400004"); } } + + /** + * This test validates that cols with hyphens ("-") are properly quoted so that PG doesn't + * interpret them as '-' operator + */ + @ParameterizedTest + @ArgumentsSource(PostgresProvider.class) + void testFlatPostgresCollectionUnnestJsonbArrayWithHyphens(String dataStoreName) { + Datastore datastore = datastoreMap.get(dataStoreName); + Collection flatCollection = + datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); + + // Unnest the hyphenated JSONB array field + Query unnestQuery = + Query.builder() + .addSelection(IdentifierExpression.of("item")) + .addSelection( + JsonIdentifierExpression.of("props", JsonFieldType.STRING_ARRAY, "source-loc")) + .addFromClause( + UnnestExpression.of( + JsonIdentifierExpression.of( + "props", JsonFieldType.STRING_ARRAY, "source-loc"), + true)) + .build(); + + // Execute query - should not throw syntax error + Iterator resultIterator = flatCollection.find(unnestQuery); + + Set foundLocations = new HashSet<>(); + while (resultIterator.hasNext()) { + Document doc = resultIterator.next(); + Assertions.assertNotNull(doc); + } + } } @Nested @@ -4774,6 +4810,209 @@ void testNotExistsFilterOnJsonArrays(String dataStoreName) throws JsonProcessing assertTrue( returnedItems.contains("Comb"), "Should include Comb (has empty colors array in props)"); } + + @ParameterizedTest + @ArgumentsSource(PostgresProvider.class) + void testExistsFilterOnJsonScalars(String dataStoreName) { + Datastore datastore = datastoreMap.get(dataStoreName); + Collection flatCollection = + datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); + + Query query = + Query.builder() + .addSelection(IdentifierExpression.of("item")) + .addSelection( + JsonIdentifierExpression.of("props", JsonFieldType.STRING, "product-code")) + .setFilter( + RelationalExpression.of( + JsonIdentifierExpression.of("props", JsonFieldType.STRING, "product-code"), + EXISTS, + ConstantExpression.of("null"))) + .build(); + + Iterator results = flatCollection.find(query); + + int count = 0; + while (results.hasNext()) { + Document next = results.next(); + count++; + } + // We have 4 rows with "props"->'product-code' field present (regardless of the value) + assertEquals(4, count, "Should return exactly 4 documents with non-empty product-code"); + } + + @ParameterizedTest + @ArgumentsSource(PostgresProvider.class) + void testNotExistsFilterOnJsonScalars(String dataStoreName) { + Datastore datastore = datastoreMap.get(dataStoreName); + Collection flatCollection = + datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); + + Query query = + Query.builder() + .addSelection(IdentifierExpression.of("item")) + .addSelection( + JsonIdentifierExpression.of("props", JsonFieldType.STRING, "product-code")) + .setFilter( + RelationalExpression.of( + JsonIdentifierExpression.of("props", JsonFieldType.STRING, "product-code"), + NOT_EXISTS, + ConstantExpression.of("null"))) + .build(); + + Iterator results = flatCollection.find(query); + + int count = 0; + while (results.hasNext()) { + Document next = results.next(); + count++; + } + // We have 6 rows that have "props"->'product-code' field missing + assertEquals(6, count, "Should return exactly 6 documents with missing product-code"); + } + + @ParameterizedTest + @ArgumentsSource(PostgresProvider.class) + void testExistsFilterOnUnnestedNativeArray(String dataStoreName) { + Datastore datastore = datastoreMap.get(dataStoreName); + Collection flatCollection = + datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); + + Query unnestQuery = + Query.builder() + .addSelection(IdentifierExpression.of("item")) + .addSelection(ArrayIdentifierExpression.of("tags")) + .addFromClause( + UnnestExpression.of(ArrayIdentifierExpression.of("tags", ArrayType.TEXT), true)) + // Only include tags[] that have at least 1 element, all rows with NULL or empty tags + // should be excluded. + .setFilter( + RelationalExpression.of( + ArrayIdentifierExpression.of("tags", ArrayType.TEXT), + EXISTS, + ConstantExpression.of("null"))) + .build(); + + Iterator results = flatCollection.find(unnestQuery); + + int count = 0; + while (results.hasNext()) { + Document doc = results.next(); + Assertions.assertNotNull(doc); + count++; + } + + assertEquals(25, count, "Should return unnested tag elements from non-empty arrays"); + } + + @ParameterizedTest + @ArgumentsSource(PostgresProvider.class) + void testNotExistsFilterOnUnnestNativeArray(String dataStoreName) { + Datastore datastore = datastoreMap.get(dataStoreName); + Collection flatCollection = + datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); + + Query unnestQuery = + Query.builder() + .addSelection(IdentifierExpression.of("item")) + .addSelection(ArrayIdentifierExpression.of("tags")) + .addFromClause( + UnnestExpression.of(ArrayIdentifierExpression.of("tags", ArrayType.TEXT), true)) + // Only include tags[] that are either NULL or empty (we have one row with NULL tag + // and one with empty tag. Unnest will result in two rows with NULL for + // "tags_unnested"). Note that this behavior will change with + // preserveNulLAndEmptyArrays = false. This is because unnest won't preserve those + // rows for which the unnested column is NULL then. + .setFilter( + RelationalExpression.of( + ArrayIdentifierExpression.of("tags", ArrayType.TEXT), + NOT_EXISTS, + ConstantExpression.of("null"))) + .build(); + + Iterator results = flatCollection.find(unnestQuery); + + int count = 0; + while (results.hasNext()) { + Document doc = results.next(); + Assertions.assertNotNull(doc); + count++; + } + + assertEquals(2, count, "Should return at least 2 rows with NULL unnested tags"); + } + + @ParameterizedTest + @ArgumentsSource(PostgresProvider.class) + void testExistsFilterOnUnnestJsonbArray(String dataStoreName) { + Datastore datastore = datastoreMap.get(dataStoreName); + Collection flatCollection = + datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); + + Query unnestQuery = + Query.builder() + .addSelection(IdentifierExpression.of("item")) + .addFromClause( + UnnestExpression.of( + JsonIdentifierExpression.of( + "props", JsonFieldType.STRING_ARRAY, "source-loc"), + true)) + // Should include only those props->source_loc arrays that have at least one element. + // So essentially, after unnesting this array, we don't have any rows with NULL for + // the unnested col + .setFilter( + RelationalExpression.of( + JsonIdentifierExpression.of( + "props", JsonFieldType.STRING_ARRAY, "source-loc"), + EXISTS, + ConstantExpression.of("null"))) + .build(); + + Iterator resultIterator = flatCollection.find(unnestQuery); + + int count = 0; + while (resultIterator.hasNext()) { + Document doc = resultIterator.next(); + Assertions.assertNotNull(doc); + count++; + } + assertEquals(6, count); + } + + @ParameterizedTest + @ArgumentsSource(PostgresProvider.class) + void testNotExistsFilterOnUnnestJsonbArray(String dataStoreName) { + Datastore datastore = datastoreMap.get(dataStoreName); + Collection flatCollection = + datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); + + Query unnestQuery = + Query.builder() + .addSelection(IdentifierExpression.of("item")) + .addFromClause( + UnnestExpression.of( + JsonIdentifierExpression.of( + "props", JsonFieldType.STRING_ARRAY, "source-loc"), + true)) + // Should include only those props->source_loc arrays that are either NULL or empty. + .setFilter( + RelationalExpression.of( + JsonIdentifierExpression.of( + "props", JsonFieldType.STRING_ARRAY, "source-loc"), + NOT_EXISTS, + ConstantExpression.of("null"))) + .build(); + + Iterator resultIterator = flatCollection.find(unnestQuery); + + int count = 0; + while (resultIterator.hasNext()) { + Document doc = resultIterator.next(); + Assertions.assertNotNull(doc); + count++; + } + assertEquals(7, count); + } } @Nested diff --git a/document-store/src/integrationTest/resources/query/pg_flat_collection_insert.json b/document-store/src/integrationTest/resources/query/pg_flat_collection_insert.json index 050105fd..ab363fb7 100644 --- a/document-store/src/integrationTest/resources/query/pg_flat_collection_insert.json +++ b/document-store/src/integrationTest/resources/query/pg_flat_collection_insert.json @@ -1,12 +1,12 @@ { "statements": [ - "INSERT INTO \"myTestFlat\" (\n\"_id\", \"item\", \"price\", \"quantity\", \"date\", \"tags\", \"categoryTags\", \"props\", \"sales\", \"numbers\", \"scores\", \"flags\"\n) VALUES (\n1, 'Soap', 10, 2, '2014-03-01T08:00:00Z',\n'{\"hygiene\", \"personal-care\", \"premium\"}',\n'{\"Hygiene\", \"PersonalCare\"}',\n'{\"colors\": [\"Blue\", \"Green\"], \"brand\": \"Dettol\", \"size\": \"M\", \"seller\": {\"name\": \"Metro Chemicals Pvt. Ltd.\", \"address\": {\"city\": \"Mumbai\", \"pincode\": 400004}}}',\nNULL,\n'{1, 2, 3}',\n'{4.5, 9.2}',\n'{true, false}'\n)", + "INSERT INTO \"myTestFlat\" (\n\"_id\", \"item\", \"price\", \"quantity\", \"date\", \"tags\", \"categoryTags\", \"props\", \"sales\", \"numbers\", \"scores\", \"flags\"\n) VALUES (\n1, 'Soap', 10, 2, '2014-03-01T08:00:00Z',\n'{\"hygiene\", \"personal-care\", \"premium\"}',\n'{\"Hygiene\", \"PersonalCare\"}',\n'{\"colors\": [\"Blue\", \"Green\"], \"brand\": \"Dettol\", \"size\": \"M\", \"product-code\": \"SOAP-DET-001\", \"source-loc\": [\"warehouse-A\", \"store-1\"], \"seller\": {\"name\": \"Metro Chemicals Pvt. Ltd.\", \"address\": {\"city\": \"Mumbai\", \"pincode\": 400004}}}',\nNULL,\n'{1, 2, 3}',\n'{4.5, 9.2}',\n'{true, false}'\n)", "INSERT INTO \"myTestFlat\" (\n\"_id\", \"item\", \"price\", \"quantity\", \"date\", \"tags\", \"categoryTags\", \"props\", \"sales\", \"numbers\", \"scores\", \"flags\"\n) VALUES (\n2, 'Mirror', 20, 1, '2014-03-01T09:00:00Z',\n'{\"home-decor\", \"reflective\", \"glass\"}',\n'{\"HomeDecor\"}',\nNULL,\nNULL,\n'{10, 20}',\nNULL,\nNULL\n)", - "INSERT INTO \"myTestFlat\" (\n\"_id\", \"item\", \"price\", \"quantity\", \"date\", \"tags\", \"categoryTags\", \"props\", \"sales\", \"numbers\", \"scores\", \"flags\"\n) VALUES (\n3, 'Shampoo', 5, 10, '2014-03-15T09:00:00Z',\n'{\"hair-care\", \"personal-care\", \"premium\", \"herbal\"}',\n'{\"HairCare\", \"PersonalCare\"}',\n'{\"colors\": [\"Black\"], \"brand\": \"Sunsilk\", \"size\": \"L\", \"seller\": {\"name\": \"Metro Chemicals Pvt. Ltd.\", \"address\": {\"city\": \"Mumbai\", \"pincode\": 400004}}}',\nNULL,\nNULL,\n'{3.14, 2.71}',\nNULL\n)", + "INSERT INTO \"myTestFlat\" (\n\"_id\", \"item\", \"price\", \"quantity\", \"date\", \"tags\", \"categoryTags\", \"props\", \"sales\", \"numbers\", \"scores\", \"flags\"\n) VALUES (\n3, 'Shampoo', 5, 10, '2014-03-15T09:00:00Z',\n'{\"hair-care\", \"personal-care\", \"premium\", \"herbal\"}',\n'{\"HairCare\", \"PersonalCare\"}',\n'{\"colors\": [\"Black\"], \"brand\": \"Sunsilk\", \"size\": \"L\", \"product-code\": \"SHAMP-SUN-003\", \"source-loc\": [\"warehouse-B\", \"store-2\", \"online\"], \"seller\": {\"name\": \"Metro Chemicals Pvt. Ltd.\", \"address\": {\"city\": \"Mumbai\", \"pincode\": 400004}}}',\nNULL,\nNULL,\n'{3.14, 2.71}',\nNULL\n)", "INSERT INTO \"myTestFlat\" (\n\"_id\", \"item\", \"price\", \"quantity\", \"date\", \"tags\", \"categoryTags\", \"props\", \"sales\", \"numbers\", \"scores\", \"flags\"\n) VALUES (\n4, 'Shampoo', 5, 20, '2014-04-04T11:21:39.736Z',\n'{\"hair-care\", \"budget\", \"bulk\"}',\n'{\"HairCare\"}',\nNULL,\nNULL,\nNULL,\nNULL,\n'{true, true}'\n)", - "INSERT INTO \"myTestFlat\" (\n\"_id\", \"item\", \"price\", \"quantity\", \"date\", \"tags\", \"categoryTags\", \"props\", \"sales\", \"numbers\", \"scores\", \"flags\"\n) VALUES (\n5, 'Soap', 20, 5, '2014-04-04T21:23:13.331Z',\n'{\"hygiene\", \"antibacterial\", \"family-pack\"}',\n'{\"Hygiene\"}',\n'{\"colors\": [\"Orange\", \"Blue\"], \"brand\": \"Lifebuoy\", \"size\": \"S\", \"seller\": {\"name\": \"Hans and Co.\", \"address\": {\"city\": \"Kolkata\", \"pincode\": 700007}}}',\nNULL,\nNULL,\nNULL,\nNULL\n)", + "INSERT INTO \"myTestFlat\" (\n\"_id\", \"item\", \"price\", \"quantity\", \"date\", \"tags\", \"categoryTags\", \"props\", \"sales\", \"numbers\", \"scores\", \"flags\"\n) VALUES (\n5, 'Soap', 20, 5, '2014-04-04T21:23:13.331Z',\n'{\"hygiene\", \"antibacterial\", \"family-pack\"}',\n'{\"Hygiene\"}',\n'{\"colors\": [\"Orange\", \"Blue\"], \"brand\": \"Lifebuoy\", \"size\": \"S\", \"product-code\": \"SOAP-LIF-005\", \"source-loc\": [\"warehouse-C\"], \"seller\": {\"name\": \"Hans and Co.\", \"address\": {\"city\": \"Kolkata\", \"pincode\": 700007}}}',\nNULL,\nNULL,\nNULL,\nNULL\n)", "INSERT INTO \"myTestFlat\" (\n\"_id\", \"item\", \"price\", \"quantity\", \"date\", \"tags\", \"categoryTags\", \"props\", \"sales\", \"numbers\", \"scores\", \"flags\"\n) VALUES (\n6, 'Comb', 7.5, 5, '2015-06-04T05:08:13Z',\n'{\"grooming\", \"plastic\", \"essential\"}',\n'{\"Grooming\"}',\nNULL,\nNULL,\nNULL,\nNULL,\nNULL\n)", - "INSERT INTO \"myTestFlat\" (\n\"_id\", \"item\", \"price\", \"quantity\", \"date\", \"tags\", \"categoryTags\", \"props\", \"sales\", \"numbers\", \"scores\", \"flags\"\n) VALUES (\n7, 'Comb', 7.5, 10, '2015-09-10T08:43:00Z',\n'{\"grooming\", \"bulk\", \"wholesale\"}',\n'{\"Grooming\"}',\n'{\"colors\": [], \"seller\": {\"name\": \"Go Go Plastics\", \"address\": {\"city\": \"Kolkata\", \"pincode\": 700007}}}',\nNULL,\nNULL,\nNULL,\nNULL\n)", + "INSERT INTO \"myTestFlat\" (\n\"_id\", \"item\", \"price\", \"quantity\", \"date\", \"tags\", \"categoryTags\", \"props\", \"sales\", \"numbers\", \"scores\", \"flags\"\n) VALUES (\n7, 'Comb', 7.5, 10, '2015-09-10T08:43:00Z',\n'{\"grooming\", \"bulk\", \"wholesale\"}',\n'{\"Grooming\"}',\n'{\"colors\": [], \"product-code\": null, \"source-loc\": [], \"seller\": {\"name\": \"Go Go Plastics\", \"address\": {\"city\": \"Kolkata\", \"pincode\": 700007}}}',\nNULL,\nNULL,\nNULL,\nNULL\n)", "INSERT INTO \"myTestFlat\" (\n\"_id\", \"item\", \"price\", \"quantity\", \"date\", \"tags\", \"categoryTags\", \"props\", \"sales\", \"numbers\", \"scores\", \"flags\"\n) VALUES (\n8, 'Soap', 10, 5, '2016-02-06T20:20:13Z',\n'{\"hygiene\", \"budget\", \"basic\"}',\n'{\"Hygiene\"}',\nNULL,\nNULL,\nNULL,\nNULL,\nNULL\n)", "INSERT INTO \"myTestFlat\" (\n\"_id\", \"item\", \"price\", \"quantity\", \"date\", \"tags\", \"categoryTags\", \"props\", \"sales\", \"numbers\", \"scores\", \"flags\"\n) VALUES (\n9, 'Bottle', 15, 3, '2016-03-01T10:00:00Z',\nNULL,\nNULL,\nNULL,\nNULL,\nNULL,\nNULL,\nNULL\n)", "INSERT INTO \"myTestFlat\" (\n\"_id\", \"item\", \"price\", \"quantity\", \"date\", \"tags\", \"categoryTags\", \"props\", \"sales\", \"numbers\", \"scores\", \"flags\"\n) VALUES (\n10, 'Cup', 8, 2, '2016-04-01T10:00:00Z',\n'{}',\n'{}',\nNULL,\nNULL,\nNULL,\nNULL,\nNULL\n)" diff --git a/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/parser/filter/PostgresExistsRelationalFilterParser.java b/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/parser/filter/PostgresExistsRelationalFilterParser.java index 224704c4..78ede1c1 100644 --- a/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/parser/filter/PostgresExistsRelationalFilterParser.java +++ b/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/parser/filter/PostgresExistsRelationalFilterParser.java @@ -1,5 +1,6 @@ package org.hypertrace.core.documentstore.postgres.query.v1.parser.filter; +import org.hypertrace.core.documentstore.expression.impl.ArrayIdentifierExpression; import org.hypertrace.core.documentstore.expression.impl.ConstantExpression; import org.hypertrace.core.documentstore.expression.impl.JsonIdentifierExpression; import org.hypertrace.core.documentstore.expression.impl.RelationalExpression; @@ -25,16 +26,37 @@ public String parse( switch (category) { case ARRAY: - // First-class PostgreSQL array columns (text[], int[], etc.) - return parsedRhs - // We don't need to check that LHS is NOT NULL because WHERE cardinality(NULL) will not - // be included in the result set - ? String.format("(cardinality(%s) > 0)", parsedLhs) - : String.format("COALESCE(cardinality(%s), 0) = 0", parsedLhs); + { + // First-class PostgreSQL array columns (text[], int[], etc.) + // Check if this field has been unnested - if so, treat it as a scalar (because the + // unnested array col is not longer an array, but a scalar col) + ArrayIdentifierExpression arrayExpr = (ArrayIdentifierExpression) expression.getLhs(); + String arrayFieldName = arrayExpr.getName(); + if (context.getPgColumnNames().containsKey(arrayFieldName)) { + // Field is unnested - each element is now a scalar, not an array + // Use simple NULL checks instead of cardinality + return getScalarExpr(parsedRhs, parsedLhs); + } + + // Field is NOT unnested - apply cardinality logic + return parsedRhs + // We don't need to check that LHS is NOT NULL because WHERE cardinality(NULL) will + // not be included in the result set + ? String.format("(cardinality(%s) > 0)", parsedLhs) + : String.format("COALESCE(cardinality(%s), 0) = 0", parsedLhs); + } case JSONB_ARRAY: { JsonIdentifierExpression jsonExpr = (JsonIdentifierExpression) expression.getLhs(); + // Check if this field has been unnested - if so, treat it as a scalar + String fieldName = jsonExpr.getName(); + if (context.getPgColumnNames().containsKey(fieldName)) { + // Field is unnested - each element is now a scalar. Treat how we treated the array case + return getScalarExpr(parsedRhs, parsedLhs); + } + + // Field is NOT unnested - apply array length logic String baseColumn = wrapWithDoubleQuotes(jsonExpr.getColumnName()); String nestedPath = String.join(".", jsonExpr.getJsonPath()); return parsedRhs @@ -49,26 +71,18 @@ public String parse( } case JSONB_SCALAR: - { - // JSONB scalar fields - use ? operator for GIN index optimization - JsonIdentifierExpression jsonExpr = (JsonIdentifierExpression) expression.getLhs(); - String baseColumn = wrapWithDoubleQuotes(jsonExpr.getColumnName()); - String nestedPath = String.join(".", jsonExpr.getJsonPath()); - - return parsedRhs - ? String.format("%s ? '%s'", baseColumn, nestedPath) - : String.format("NOT (%s ? '%s')", baseColumn, nestedPath); - } - case SCALAR: default: - // Regular scalar fields - use standard NULL checks - return parsedRhs - ? String.format("%s IS NOT NULL", parsedLhs) - : String.format("%s IS NULL", parsedLhs); + return getScalarExpr(parsedRhs, parsedLhs); } } + private String getScalarExpr(boolean parsedRhs, String parsedLhs) { + return parsedRhs + ? String.format("%s IS NOT NULL", parsedLhs) + : String.format("%s IS NULL", parsedLhs); + } + private String wrapWithDoubleQuotes(String identifier) { return "\"" + identifier + "\""; } diff --git a/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/parser/filter/PostgresNotExistsRelationalFilterParser.java b/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/parser/filter/PostgresNotExistsRelationalFilterParser.java index 2d558a02..5c6e2547 100644 --- a/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/parser/filter/PostgresNotExistsRelationalFilterParser.java +++ b/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/parser/filter/PostgresNotExistsRelationalFilterParser.java @@ -1,6 +1,7 @@ package org.hypertrace.core.documentstore.postgres.query.v1.parser.filter; import org.hypertrace.core.documentstore.expression.impl.ConstantExpression; +import org.hypertrace.core.documentstore.expression.impl.IdentifierExpression; import org.hypertrace.core.documentstore.expression.impl.JsonIdentifierExpression; import org.hypertrace.core.documentstore.expression.impl.RelationalExpression; import org.hypertrace.core.documentstore.postgres.query.v1.parser.filter.PostgresFieldTypeDetector.FieldCategory; @@ -25,20 +26,41 @@ public String parse( switch (category) { case ARRAY: - // For first-class array fields, only return those arrays that are not null and have - // at-least 1 element in it (so exclude NULL or empty arrays). This is to match Mongo's - // behavior - return parsedRhs - ? String.format("(cardinality(%s) > 0)", parsedLhs) - // More efficient than: %s IS NULL OR cardinality(%s) = 0)? as we can create - // an index on the COALESCE function itself which will return in a single - // index seek rather than two index seeks in the OR query - : String.format("COALESCE(cardinality(%s), 0) = 0", parsedLhs); + { + // For first-class array fields, only return those arrays that are not null and have + // at-least 1 element in it (so exclude NULL or empty arrays). This is to match Mongo's + // behavior + // Check if this field has been unnested - if so, treat it as a scalar + IdentifierExpression arrayExpr = (IdentifierExpression) expression.getLhs(); + String arrayFieldName = arrayExpr.getName(); + if (context.getPgColumnNames().containsKey(arrayFieldName)) { + // Field is unnested - each element is now a scalar, not an array + // Use simple NULL checks instead of cardinality + return getScalarExpr(parsedRhs, parsedLhs); + } + + // Field is NOT unnested - apply cardinality logic + return parsedRhs + ? String.format("(cardinality(%s) > 0)", parsedLhs) + // More efficient than: %s IS NULL OR cardinality(%s) = 0)? as we can create + // an index on the COALESCE function itself which will return in a single + // index seek rather than two index seeks in the OR query + : String.format("COALESCE(cardinality(%s), 0) = 0", parsedLhs); + } case JSONB_ARRAY: { // Arrays inside JSONB columns - use optimized GIN index queries JsonIdentifierExpression jsonExpr = (JsonIdentifierExpression) expression.getLhs(); + // Check if this field has been unnested - if so, treat it as a scalar + String fieldName = jsonExpr.getName(); + if (context.getPgColumnNames().containsKey(fieldName)) { + // Field is unnested - each element is now a scalar, not an array + // Use simple NULL checks instead of array length + return getScalarExpr(parsedRhs, parsedLhs); + } + + // Field is NOT unnested - apply array length logic String baseColumn = wrapWithDoubleQuotes(jsonExpr.getColumnName()); String nestedPath = String.join(".", jsonExpr.getJsonPath()); @@ -51,29 +73,18 @@ public String parse( } case JSONB_SCALAR: - { - // JSONB scalar fields - use ? operator for GIN index optimization - JsonIdentifierExpression jsonExpr = (JsonIdentifierExpression) expression.getLhs(); - String baseColumn = wrapWithDoubleQuotes(jsonExpr.getColumnName()); - String nestedPath = String.join(".", jsonExpr.getJsonPath()); - - return parsedRhs - // Uses the GIN index on the parent JSONB col - ? String.format("%s ? '%s'", baseColumn, nestedPath) - // Does not use the GIN index but is more computationally efficient than doing a IS - // NULL check - : String.format("NOT (%s ? '%s')", baseColumn, nestedPath); - } - case SCALAR: default: - // Regular scalar fields - use standard NULL checks - return parsedRhs - ? String.format("%s IS NOT NULL", parsedLhs) - : String.format("%s IS NULL", parsedLhs); + return getScalarExpr(parsedRhs, parsedLhs); } } + private static String getScalarExpr(boolean parsedRhs, String parsedLhs) { + return parsedRhs + ? String.format("%s IS NOT NULL", parsedLhs) + : String.format("%s IS NULL", parsedLhs); + } + private String wrapWithDoubleQuotes(String identifier) { return "\"" + identifier + "\""; } diff --git a/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/vistors/PostgresFromTypeExpressionVisitor.java b/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/vistors/PostgresFromTypeExpressionVisitor.java index 0fe7ded4..9cdf3c8c 100644 --- a/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/vistors/PostgresFromTypeExpressionVisitor.java +++ b/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/vistors/PostgresFromTypeExpressionVisitor.java @@ -84,10 +84,7 @@ public String visit(UnnestExpression unnestExpression) { // we'll quote the col name to prevent folding to lower case for top-level array fields String unwindExprAlias = - String.format( - UNWIND_EXP_ALIAS_FMT, - nextIndex, - getColName(isFlatCollection && !isJsonbArray, pgColumnName)); + String.format(UNWIND_EXP_ALIAS_FMT, nextIndex, getQuotedColName(pgColumnName)); String fmt = unnestExpression.isPreserveNullAndEmptyArrays() @@ -152,7 +149,7 @@ private static String prepareTable0Query(PostgresQueryParser postgresQueryParser /* Returns the column name with double quotes if the collection is flat to prevent folding to lower-case by PG */ - private String getColName(boolean shouldQuote, String pgColumnName) { - return shouldQuote ? PostgresUtils.wrapFieldNamesWithDoubleQuotes(pgColumnName) : pgColumnName; + private String getQuotedColName(String pgColumnName) { + return PostgresUtils.wrapFieldNamesWithDoubleQuotes(pgColumnName); } } diff --git a/document-store/src/test/java/org/hypertrace/core/documentstore/postgres/query/v1/PostgresQueryParserTest.java b/document-store/src/test/java/org/hypertrace/core/documentstore/postgres/query/v1/PostgresQueryParserTest.java index 0c41a793..aad495fc 100644 --- a/document-store/src/test/java/org/hypertrace/core/documentstore/postgres/query/v1/PostgresQueryParserTest.java +++ b/document-store/src/test/java/org/hypertrace/core/documentstore/postgres/query/v1/PostgresQueryParserTest.java @@ -1761,4 +1761,52 @@ void testNotExistsOnJsonbArrayField() { assertEquals(0, params.getObjectParams().size()); } } + + @Test + void testFlatCollectionWithHyphenatedJsonbArrayFieldInUnnest() { + // This test reproduces the syntax error with field names containing hyphens + // When a JSONB array field with hyphens (e.g., "dev-ops-owner") is unnested, + // the alias becomes "customAttribute_dot_dev-ops-owner" which needs quotes in LATERAL join + Query query = + Query.builder() + .addSelection(IdentifierExpression.of("id")) + .addSelection( + JsonIdentifierExpression.of( + "customAttribute", JsonFieldType.STRING_ARRAY, "dev-ops-owner")) + .addFromClause( + UnnestExpression.of( + JsonIdentifierExpression.of( + "customAttribute", JsonFieldType.STRING_ARRAY, "dev-ops-owner"), + true)) + .setFilter( + RelationalExpression.of( + JsonIdentifierExpression.of( + "customAttribute", JsonFieldType.STRING_ARRAY, "dev-ops-owner"), + EQ, + ConstantExpression.of("team-alpha"))) + .build(); + + PostgresQueryParser postgresQueryParser = + new PostgresQueryParser( + TEST_TABLE, + PostgresQueryTransformer.transform(query), + new FlatPostgresFieldTransformer()); + + String sql = postgresQueryParser.parse(); + + // The key assertion: the alias in the LATERAL join must be quoted + // CORRECT: p1("customAttribute_dot_dev-ops-owner") + // INCORRECT: p1(customAttribute_dot_dev-ops-owner) <- causes PostgreSQL syntax error + String expectedSql = + "With \n" + + "table0 as (SELECT * from \"testCollection\"),\n" + + "table1 as (SELECT * from table0 t0 LEFT JOIN LATERAL jsonb_array_elements(\"customAttribute\"->'dev-ops-owner') p1(\"customAttribute_dot_dev-ops-owner\") on TRUE)\n" + + "SELECT \"id\" AS \"id\", \"customAttribute_dot_dev-ops-owner\" AS \"customAttribute_dot_dev-ops-owner\" " + + "FROM table1 WHERE \"customAttribute_dot_dev-ops-owner\" = ?"; + + assertEquals(expectedSql, sql); + + Params params = postgresQueryParser.getParamsBuilder().build(); + assertEquals("team-alpha", params.getObjectParams().get(1)); + } } From 8a2c8f625ecee1ba01bd88beaf54fc24bc96d10f Mon Sep 17 00:00:00 2001 From: Prashant Pandey Date: Tue, 25 Nov 2025 17:58:02 +0530 Subject: [PATCH 2/9] Fixed failing test cases --- .../PostgresExistsRelationalFilterParserTest.java | 10 ++-------- .../PostgresNotExistsRelationalFilterParserTest.java | 10 ++-------- 2 files changed, 4 insertions(+), 16 deletions(-) diff --git a/document-store/src/test/java/org/hypertrace/core/documentstore/postgres/query/v1/parser/filter/PostgresExistsRelationalFilterParserTest.java b/document-store/src/test/java/org/hypertrace/core/documentstore/postgres/query/v1/parser/filter/PostgresExistsRelationalFilterParserTest.java index 2fbade12..7fbb780d 100644 --- a/document-store/src/test/java/org/hypertrace/core/documentstore/postgres/query/v1/parser/filter/PostgresExistsRelationalFilterParserTest.java +++ b/document-store/src/test/java/org/hypertrace/core/documentstore/postgres/query/v1/parser/filter/PostgresExistsRelationalFilterParserTest.java @@ -151,10 +151,7 @@ void testParse_jsonScalarField_rhsTrue() { String result = parser.parse(expression, context); - assertEquals( - "\"customAttribute\" ? 'brand'", - result, - "EXISTS with RHS=true on JSON scalar should use ? operator for GIN index"); + assertEquals("\"customAttribute\"->>'brand' IS NOT NULL", result); } @Test @@ -170,9 +167,6 @@ void testParse_jsonScalarField_rhsFalse() { String result = parser.parse(expression, context); - assertEquals( - "NOT (\"customAttribute\" ? 'brand')", - result, - "EXISTS with RHS=false on JSON scalar should use negated ? operator"); + assertEquals("\"customAttribute\"->>'brand' IS NULL", result); } } diff --git a/document-store/src/test/java/org/hypertrace/core/documentstore/postgres/query/v1/parser/filter/PostgresNotExistsRelationalFilterParserTest.java b/document-store/src/test/java/org/hypertrace/core/documentstore/postgres/query/v1/parser/filter/PostgresNotExistsRelationalFilterParserTest.java index db1b6701..c28d3b68 100644 --- a/document-store/src/test/java/org/hypertrace/core/documentstore/postgres/query/v1/parser/filter/PostgresNotExistsRelationalFilterParserTest.java +++ b/document-store/src/test/java/org/hypertrace/core/documentstore/postgres/query/v1/parser/filter/PostgresNotExistsRelationalFilterParserTest.java @@ -151,10 +151,7 @@ void testParse_jsonScalarField_rhsFalse() { String result = parser.parse(expression, context); - assertEquals( - "\"customAttribute\" ? 'brand'", - result, - "NOT_EXISTS with RHS=false on JSON scalar should use ? operator for GIN index"); + assertEquals("\"customAttribute\"->>'brand' IS NOT NULL", result); } @Test @@ -170,9 +167,6 @@ void testParse_jsonScalarField_rhsTrue() { String result = parser.parse(expression, context); - assertEquals( - "NOT (\"customAttribute\" ? 'brand')", - result, - "NOT_EXISTS with RHS=true on JSON scalar should use negated ? operator"); + assertEquals("\"customAttribute\"->>'brand' IS NULL", result); } } From 9fe72d9aa94f8a8ed414a6b513d674208307b5c6 Mon Sep 17 00:00:00 2001 From: Prashant Pandey Date: Tue, 25 Nov 2025 19:02:23 +0530 Subject: [PATCH 3/9] Fix failing test cases --- .../query/v1/PostgresQueryParserTest.java | 45 ++++++++++--------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/document-store/src/test/java/org/hypertrace/core/documentstore/postgres/query/v1/PostgresQueryParserTest.java b/document-store/src/test/java/org/hypertrace/core/documentstore/postgres/query/v1/PostgresQueryParserTest.java index aad495fc..9afb2ba7 100644 --- a/document-store/src/test/java/org/hypertrace/core/documentstore/postgres/query/v1/PostgresQueryParserTest.java +++ b/document-store/src/test/java/org/hypertrace/core/documentstore/postgres/query/v1/PostgresQueryParserTest.java @@ -709,8 +709,8 @@ void testUnnestWithoutPreserveNullAndEmptyArrays() { assertEquals( "With \n" + "table0 as (SELECT * from \"testCollection\"),\n" - + "table1 as (SELECT * from table0 t0, jsonb_array_elements(document->'sales') p1(sales)),\n" - + "table2 as (SELECT * from table1 t1, jsonb_array_elements(sales->'medium') p2(sales_dot_medium))\n" + + "table1 as (SELECT * from table0 t0, jsonb_array_elements(document->'sales') p1(\"sales\")),\n" + + "table2 as (SELECT * from table1 t1, jsonb_array_elements(sales->'medium') p2(\"sales_dot_medium\"))\n" + "SELECT document->'item' AS \"item\", " + "document->'price' AS \"price\", " + "sales->'city' AS \"sales_dot_city\", " @@ -741,8 +741,8 @@ void testUnnestWithPreserveNullAndEmptyArrays() { assertEquals( "With \n" + "table0 as (SELECT * from \"testCollection\"),\n" - + "table1 as (SELECT * from table0 t0 LEFT JOIN LATERAL jsonb_array_elements(document->'sales') p1(sales) on TRUE),\n" - + "table2 as (SELECT * from table1 t1 LEFT JOIN LATERAL jsonb_array_elements(sales->'medium') p2(sales_dot_medium) on TRUE)\n" + + "table1 as (SELECT * from table0 t0 LEFT JOIN LATERAL jsonb_array_elements(document->'sales') p1(\"sales\") on TRUE),\n" + + "table2 as (SELECT * from table1 t1 LEFT JOIN LATERAL jsonb_array_elements(sales->'medium') p2(\"sales_dot_medium\") on TRUE)\n" + "SELECT document->'item' AS \"item\", " + "document->'price' AS \"price\", " + "sales->'city' AS \"sales_dot_city\", " @@ -785,8 +785,8 @@ void testUnnestWithoutPreserveNullAndEmptyArraysWithFilters() { "With \n" + "table0 as (SELECT * from \"testCollection\" " + "WHERE CAST (document->>'quantity' AS NUMERIC) != ?),\n" - + "table1 as (SELECT * from table0 t0, jsonb_array_elements(document->'sales') p1(sales)),\n" - + "table2 as (SELECT * from table1 t1, jsonb_array_elements(sales->'medium') p2(sales_dot_medium))\n" + + "table1 as (SELECT * from table0 t0, jsonb_array_elements(document->'sales') p1(\"sales\")),\n" + + "table2 as (SELECT * from table1 t1, jsonb_array_elements(sales->'medium') p2(\"sales_dot_medium\"))\n" + "SELECT document->'item' AS \"item\", " + "sales->'city' AS \"sales_dot_city\", " + "sales_dot_medium->'type' AS \"sales_dot_medium_dot_type\" " @@ -830,8 +830,8 @@ void testUnnestWithRegularFilterAtSecondLevelArray() { assertEquals( "With \n" + "table0 as (SELECT * from \"testCollection\" WHERE CAST (document->>'quantity' AS NUMERIC) > ?),\n" - + "table1 as (SELECT * from table0 t0 LEFT JOIN LATERAL jsonb_array_elements(document->'sales') p1(sales) on TRUE),\n" - + "table2 as (SELECT * from table1 t1 LEFT JOIN LATERAL jsonb_array_elements(sales->'medium') p2(sales_dot_medium) on TRUE)\n" + + "table1 as (SELECT * from table0 t0 LEFT JOIN LATERAL jsonb_array_elements(document->'sales') p1(\"sales\") on TRUE),\n" + + "table2 as (SELECT * from table1 t1 LEFT JOIN LATERAL jsonb_array_elements(sales->'medium') p2(\"sales_dot_medium\") on TRUE)\n" + "SELECT document->'item' AS \"item\", document->'price' AS \"price\", sales->'city' AS \"sales_dot_city\", sales_dot_medium->'type' AS \"sales_dot_medium_dot_type\" FROM table2 WHERE sales_dot_medium->>'type' = ?", sql); @@ -870,8 +870,8 @@ void testUnnestWithRegularORFilterAtSecondLevelArray() { assertEquals( "With \n" + "table0 as (SELECT * from \"testCollection\"),\n" - + "table1 as (SELECT * from table0 t0 LEFT JOIN LATERAL jsonb_array_elements(document->'sales') p1(sales) on TRUE),\n" - + "table2 as (SELECT * from table1 t1 LEFT JOIN LATERAL jsonb_array_elements(sales->'medium') p2(sales_dot_medium) on TRUE)\n" + + "table1 as (SELECT * from table0 t0 LEFT JOIN LATERAL jsonb_array_elements(document->'sales') p1(\"sales\") on TRUE),\n" + + "table2 as (SELECT * from table1 t1 LEFT JOIN LATERAL jsonb_array_elements(sales->'medium') p2(\"sales_dot_medium\") on TRUE)\n" + "SELECT document->'item' AS \"item\", document->'price' AS \"price\", sales->'city' AS \"sales_dot_city\", sales_dot_medium->'type' AS \"sales_dot_medium_dot_type\" FROM table2 WHERE (CAST (document->>'quantity' AS NUMERIC) > ?) OR (sales_dot_medium->>'type' = ?)", sql); @@ -919,8 +919,8 @@ void testUnnestWithRegularAndORFilterAtSecondLevelArray() { assertEquals( "With \n" + "table0 as (SELECT * from \"testCollection\" WHERE CAST (document->>'price' AS NUMERIC) > ?),\n" - + "table1 as (SELECT * from table0 t0 LEFT JOIN LATERAL jsonb_array_elements(document->'sales') p1(sales) on TRUE),\n" - + "table2 as (SELECT * from table1 t1 LEFT JOIN LATERAL jsonb_array_elements(sales->'medium') p2(sales_dot_medium) on TRUE)\n" + + "table1 as (SELECT * from table0 t0 LEFT JOIN LATERAL jsonb_array_elements(document->'sales') p1(\"sales\") on TRUE),\n" + + "table2 as (SELECT * from table1 t1 LEFT JOIN LATERAL jsonb_array_elements(sales->'medium') p2(\"sales_dot_medium\") on TRUE)\n" + "SELECT document->'item' AS \"item\", document->'price' AS \"price\", sales->'city' AS \"sales_dot_city\", sales_dot_medium->'type' AS \"sales_dot_medium_dot_type\" FROM table2 WHERE (CAST (document->>'quantity' AS NUMERIC) > ?) OR (sales_dot_medium->>'type' = ?)", sql); @@ -968,8 +968,8 @@ void testUnnestWithRegularAndUnnestFilterAtSecondLevelArray() { assertEquals( "With \n" + "table0 as (SELECT * from \"testCollection\" WHERE CAST (document->>'quantity' AS NUMERIC) > ?),\n" - + "table1 as (SELECT * from table0 t0 LEFT JOIN LATERAL jsonb_array_elements(document->'sales') p1(sales) on TRUE),\n" - + "table2 as (SELECT * from table1 t1 LEFT JOIN LATERAL jsonb_array_elements(sales->'medium') p2(sales_dot_medium) on TRUE)\n" + + "table1 as (SELECT * from table0 t0 LEFT JOIN LATERAL jsonb_array_elements(document->'sales') p1(\"sales\") on TRUE),\n" + + "table2 as (SELECT * from table1 t1 LEFT JOIN LATERAL jsonb_array_elements(sales->'medium') p2(\"sales_dot_medium\") on TRUE)\n" + "SELECT document->'item' AS \"item\", document->'quantity' AS \"quantity\", sales->'city' AS \"sales_dot_city\", sales_dot_medium->'type' AS \"sales_dot_medium_dot_type\" FROM table2 WHERE (sales_dot_medium->>'type' = ?) AND (sales_dot_medium->>'type' = ?)", sql); @@ -1017,8 +1017,8 @@ void testUnnestWithRegularAndDifferentUnnestFilterAtSecondLevelArray() { assertEquals( "With \n" + "table0 as (SELECT * from \"testCollection\" WHERE CAST (document->>'quantity' AS NUMERIC) > ?),\n" - + "table1 as (SELECT * from table0 t0 LEFT JOIN LATERAL jsonb_array_elements(document->'sales') p1(sales) on TRUE),\n" - + "table2 as (SELECT * from table1 t1 LEFT JOIN LATERAL jsonb_array_elements(sales->'medium') p2(sales_dot_medium) on TRUE)\n" + + "table1 as (SELECT * from table0 t0 LEFT JOIN LATERAL jsonb_array_elements(document->'sales') p1(\"sales\") on TRUE),\n" + + "table2 as (SELECT * from table1 t1 LEFT JOIN LATERAL jsonb_array_elements(sales->'medium') p2(\"sales_dot_medium\") on TRUE)\n" + "SELECT document->'item' AS \"item\", document->'quantity' AS \"quantity\", sales->'city' AS \"sales_dot_city\", sales_dot_medium->'type' AS \"sales_dot_medium_dot_type\" FROM table2 WHERE (sales_dot_medium->>'type' = ?) AND (sales_dot_medium->>'channel' = ?)", sql); @@ -1065,8 +1065,8 @@ void testUnnestWithRegularAndDifferentUnnestFilterAtFirstLevelArray() { assertEquals( "With \n" + "table0 as (SELECT * from \"testCollection\" WHERE CAST (document->>'quantity' AS NUMERIC) > ?),\n" - + "table1 as (SELECT * from table0 t0 LEFT JOIN LATERAL jsonb_array_elements(document->'sales') p1(sales) on TRUE),\n" - + "table2 as (SELECT * from table1 t1 LEFT JOIN LATERAL jsonb_array_elements(sales->'medium') p2(sales_dot_medium) on TRUE)\n" + + "table1 as (SELECT * from table0 t0 LEFT JOIN LATERAL jsonb_array_elements(document->'sales') p1(\"sales\") on TRUE),\n" + + "table2 as (SELECT * from table1 t1 LEFT JOIN LATERAL jsonb_array_elements(sales->'medium') p2(\"sales_dot_medium\") on TRUE)\n" + "SELECT document->'item' AS \"item\", document->'quantity' AS \"quantity\", sales->'city' AS \"sales_dot_city\" FROM table2 WHERE (sales->>'channel' = ?) AND (sales->>'city' = ?)", sql); @@ -1381,7 +1381,7 @@ void testContainsAndUnnestFilters() throws IOException { assertEquals( "With \n" + "table0 as (SELECT * from \"testCollection\"),\n" - + "table1 as (SELECT * from table0 t0, jsonb_array_elements(document->'sales') p1(sales))\n" + + "table1 as (SELECT * from table0 t0, jsonb_array_elements(document->'sales') p1(\"sales\"))\n" + "SELECT document->'item' AS \"item\", sales->'medium' AS \"sales_dot_medium\" FROM table1 WHERE sales->'medium' @> ?::jsonb", sql); @@ -1416,7 +1416,7 @@ void testNotContainsAndUnnestFilters() throws IOException { assertEquals( "With \n" + "table0 as (SELECT * from \"testCollection\"),\n" - + "table1 as (SELECT * from table0 t0, jsonb_array_elements(document->'sales') p1(sales))\n" + + "table1 as (SELECT * from table0 t0, jsonb_array_elements(document->'sales') p1(\"sales\"))\n" + "SELECT document->'item' AS \"item\", sales->'medium' AS \"sales_dot_medium\" FROM table1 WHERE sales->'medium' IS NULL OR NOT sales->'medium' @> ?::jsonb", sql); @@ -1627,7 +1627,8 @@ void testExistsOnJsonbScalarField() { new FlatPostgresFieldTransformer()); String sql = postgresQueryParser.parse(); - assertEquals("SELECT * FROM \"testCollection\" WHERE \"customAttribute\" ? 'brand'", sql); + assertEquals( + "SELECT * FROM \"testCollection\" WHERE \"customAttribute\"->'brand' IS NOT NULL", sql); Params params = postgresQueryParser.getParamsBuilder().build(); assertEquals(0, params.getObjectParams().size()); @@ -1727,7 +1728,7 @@ void testNotExistsOnJsonbScalarField() { String sql = postgresQueryParser.parse(); assertEquals( - "SELECT * FROM \"testCollection\" WHERE NOT (\"customAttribute\" ? 'brand')", sql); + "SELECT * FROM \"testCollection\" WHERE NOT (\"customAttribute\"->'brand' IS NULL)", sql); Params params = postgresQueryParser.getParamsBuilder().build(); assertEquals(0, params.getObjectParams().size()); From eea994740dab1c7be81669763ddeeb1694082915 Mon Sep 17 00:00:00 2001 From: Prashant Pandey Date: Tue, 25 Nov 2025 19:35:48 +0530 Subject: [PATCH 4/9] Fix failing test cases --- .../postgres/query/v1/PostgresQueryParserTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/document-store/src/test/java/org/hypertrace/core/documentstore/postgres/query/v1/PostgresQueryParserTest.java b/document-store/src/test/java/org/hypertrace/core/documentstore/postgres/query/v1/PostgresQueryParserTest.java index 9afb2ba7..64adc4da 100644 --- a/document-store/src/test/java/org/hypertrace/core/documentstore/postgres/query/v1/PostgresQueryParserTest.java +++ b/document-store/src/test/java/org/hypertrace/core/documentstore/postgres/query/v1/PostgresQueryParserTest.java @@ -1728,7 +1728,7 @@ void testNotExistsOnJsonbScalarField() { String sql = postgresQueryParser.parse(); assertEquals( - "SELECT * FROM \"testCollection\" WHERE NOT (\"customAttribute\"->'brand' IS NULL)", sql); + "SELECT * FROM \"testCollection\" WHERE \"customAttribute\"->'brand' IS NULL", sql); Params params = postgresQueryParser.getParamsBuilder().build(); assertEquals(0, params.getObjectParams().size()); From 10f29be9d70c9c0466ab0a8d8bdaecdc02d41780 Mon Sep 17 00:00:00 2001 From: Prashant Pandey Date: Tue, 25 Nov 2025 20:25:59 +0530 Subject: [PATCH 5/9] WIP --- .../documentstore/DocStoreQueryV1Test.java | 236 +++++++++++++++--- ...gresInRelationalFilterParserJsonArray.java | 70 +++++- ...resInRelationalFilterParserArrayField.java | 46 +++- ...esNotExistsRelationalFilterParserTest.java | 2 +- 4 files changed, 310 insertions(+), 44 deletions(-) diff --git a/document-store/src/integrationTest/java/org/hypertrace/core/documentstore/DocStoreQueryV1Test.java b/document-store/src/integrationTest/java/org/hypertrace/core/documentstore/DocStoreQueryV1Test.java index e98acbd9..e9c83a89 100644 --- a/document-store/src/integrationTest/java/org/hypertrace/core/documentstore/DocStoreQueryV1Test.java +++ b/document-store/src/integrationTest/java/org/hypertrace/core/documentstore/DocStoreQueryV1Test.java @@ -320,7 +320,7 @@ public Stream provideArguments(final ExtensionContext context) { Arguments.of(POSTGRES_STORE, "WITH_TYPE"), // ArrayIdentifierExpression WITH ArrayType Arguments.of( POSTGRES_STORE, "WITHOUT_TYPE") // ArrayIdentifierExpression WITHOUT ArrayType - ); + ); } } @@ -2025,8 +2025,7 @@ public void testAtomicCreateOrReplace(final String datastoreName) .collect(toUnmodifiableList()); assertEquals(1, documents.size()); - @SuppressWarnings("unchecked") - final Map mapping = + @SuppressWarnings("unchecked") final Map mapping = new ObjectMapper().readValue(documents.get(0).toJson(), Map.class); assertTrue( (long) mapping.get(DocStoreConstants.LAST_UPDATED_TIME) @@ -2694,25 +2693,25 @@ void testUpdateWithAllOperators(final String datastoreName) throws IOException { SubDocumentUpdate.builder() .subDocument("props.added.set") .operator(ADD_TO_LIST_IF_ABSENT) - .subDocumentValue(SubDocumentValue.of(new Integer[] {5, 1, 5})) + .subDocumentValue(SubDocumentValue.of(new Integer[]{5, 1, 5})) .build(); final SubDocumentUpdate another_add = SubDocumentUpdate.builder() .subDocument("props.planets") .operator(ADD_TO_LIST_IF_ABSENT) - .subDocumentValue(SubDocumentValue.of(new String[] {"Neptune", "Pluto"})) + .subDocumentValue(SubDocumentValue.of(new String[]{"Neptune", "Pluto"})) .build(); final SubDocumentUpdate append = SubDocumentUpdate.builder() .subDocument("props.appended.list") .operator(APPEND_TO_LIST) - .subDocumentValue(SubDocumentValue.of(new Integer[] {1, 2})) + .subDocumentValue(SubDocumentValue.of(new Integer[]{1, 2})) .build(); final SubDocumentUpdate remove = SubDocumentUpdate.builder() .subDocument("props.removed.list") .operator(REMOVE_ALL_FROM_LIST) - .subDocumentValue(SubDocumentValue.of(new String[] {"Hello"})) + .subDocumentValue(SubDocumentValue.of(new String[]{"Hello"})) .build(); final SubDocumentUpdate increment = SubDocumentUpdate.builder() @@ -2746,19 +2745,19 @@ void testUpdateWithAllOperators(final String datastoreName) throws IOException { SubDocumentUpdate.builder() .subDocument("props.added.set") .operator(ADD_TO_LIST_IF_ABSENT) - .subDocumentValue(SubDocumentValue.of(new Integer[] {3, 1, 1000})) + .subDocumentValue(SubDocumentValue.of(new Integer[]{3, 1, 1000})) .build(); final SubDocumentUpdate append_new = SubDocumentUpdate.builder() .subDocument("props.appended.list") .operator(APPEND_TO_LIST) - .subDocumentValue(SubDocumentValue.of(new Integer[] {8, 2})) + .subDocumentValue(SubDocumentValue.of(new Integer[]{8, 2})) .build(); final SubDocumentUpdate remove_new = SubDocumentUpdate.builder() .subDocument("props.planets") .operator(REMOVE_ALL_FROM_LIST) - .subDocumentValue(SubDocumentValue.of(new String[] {"Pluto", "Mars"})) + .subDocumentValue(SubDocumentValue.of(new String[]{"Pluto", "Mars"})) .build(); final SubDocumentUpdate decrement = SubDocumentUpdate.builder() @@ -2905,10 +2904,10 @@ void testUpdateWithAllOperatorsOnObject(final String datastoreName) throws IOExc .operator(ADD_TO_LIST_IF_ABSENT) .subDocumentValue( SubDocumentValue.of( - new Document[] { - new JSONDocument(Map.of("key", 1)), - new JSONDocument(Map.of("key", 2)), - new JSONDocument(Map.of("key", 1)) + new Document[]{ + new JSONDocument(Map.of("key", 1)), + new JSONDocument(Map.of("key", 2)), + new JSONDocument(Map.of("key", 1)) })) .build(); final SubDocumentUpdate another_add = @@ -2917,9 +2916,9 @@ void testUpdateWithAllOperatorsOnObject(final String datastoreName) throws IOExc .operator(ADD_TO_LIST_IF_ABSENT) .subDocumentValue( SubDocumentValue.of( - new Document[] { - new JSONDocument(Map.of("name", "Neptune")), - new JSONDocument(Map.of("name", "Pluto")) + new Document[]{ + new JSONDocument(Map.of("name", "Neptune")), + new JSONDocument(Map.of("name", "Pluto")) })) .build(); final SubDocumentUpdate append = @@ -2928,8 +2927,8 @@ void testUpdateWithAllOperatorsOnObject(final String datastoreName) throws IOExc .operator(APPEND_TO_LIST) .subDocumentValue( SubDocumentValue.of( - new Document[] { - new JSONDocument(Map.of("key", 1)), new JSONDocument(Map.of("key", 2)) + new Document[]{ + new JSONDocument(Map.of("key", 1)), new JSONDocument(Map.of("key", 2)) })) .build(); final SubDocumentUpdate remove = @@ -2937,7 +2936,7 @@ void testUpdateWithAllOperatorsOnObject(final String datastoreName) throws IOExc .subDocument("props.removed.list") .operator(REMOVE_ALL_FROM_LIST) .subDocumentValue( - SubDocumentValue.of(new Document[] {new JSONDocument(Map.of("Hello", "world!"))})) + SubDocumentValue.of(new Document[]{new JSONDocument(Map.of("Hello", "world!"))})) .build(); final Query query = Query.builder().build(); @@ -2954,7 +2953,7 @@ void testUpdateWithAllOperatorsOnObject(final String datastoreName) throws IOExc .subDocument("props.sales") .operator(SET) .subDocumentValue( - SubDocumentValue.of(new Document[] {new JSONDocument(Map.of("count", 789))})) + SubDocumentValue.of(new Document[]{new JSONDocument(Map.of("count", 789))})) .build(); final SubDocumentUpdate unset_new = SubDocumentUpdate.builder() @@ -2967,8 +2966,8 @@ void testUpdateWithAllOperatorsOnObject(final String datastoreName) throws IOExc .operator(ADD_TO_LIST_IF_ABSENT) .subDocumentValue( SubDocumentValue.of( - new Document[] { - new JSONDocument(Map.of("key", 3)), new JSONDocument(Map.of("key", 1)) + new Document[]{ + new JSONDocument(Map.of("key", 3)), new JSONDocument(Map.of("key", 1)) })) .build(); final SubDocumentUpdate append_new = @@ -2977,8 +2976,8 @@ void testUpdateWithAllOperatorsOnObject(final String datastoreName) throws IOExc .operator(APPEND_TO_LIST) .subDocumentValue( SubDocumentValue.of( - new Document[] { - new JSONDocument(Map.of("key", 8)), new JSONDocument(Map.of("key", 2)) + new Document[]{ + new JSONDocument(Map.of("key", 8)), new JSONDocument(Map.of("key", 2)) })) .build(); final SubDocumentUpdate remove_new = @@ -2987,9 +2986,9 @@ void testUpdateWithAllOperatorsOnObject(final String datastoreName) throws IOExc .operator(REMOVE_ALL_FROM_LIST) .subDocumentValue( SubDocumentValue.of( - new Document[] { - new JSONDocument(Map.of("name", "Pluto")), - new JSONDocument(Map.of("name", "Mars")) + new Document[]{ + new JSONDocument(Map.of("name", "Pluto")), + new JSONDocument(Map.of("name", "Mars")) })) .build(); @@ -3013,7 +3012,7 @@ void testRemoveFromSingletonList(final String datastoreName) throws IOException SubDocumentUpdate.builder() .subDocument("props.added.habitable_planets") .operator(SET) - .subDocumentValue(SubDocumentValue.of(new String[] {"Earth"})) + .subDocumentValue(SubDocumentValue.of(new String[]{"Earth"})) .build(); final Query query = Query.builder().build(); @@ -3033,7 +3032,7 @@ void testRemoveFromSingletonList(final String datastoreName) throws IOException SubDocumentUpdate.builder() .subDocument("props.added.habitable_planets") .operator(REMOVE_ALL_FROM_LIST) - .subDocumentValue(SubDocumentValue.of(new String[] {"Earth"})) + .subDocumentValue(SubDocumentValue.of(new String[]{"Earth"})) .build(); final List newUpdates = List.of(remove); @@ -3061,7 +3060,7 @@ void testRemoveAllOccurrencesFromIntegerList(final String datastoreName) throws SubDocumentUpdate.builder() .subDocument("props.added.list") .operator(SET) - .subDocumentValue(SubDocumentValue.of(new Integer[] {5, 1, 5})) + .subDocumentValue(SubDocumentValue.of(new Integer[]{5, 1, 5})) .build(); final Query query = Query.builder().build(); @@ -3100,7 +3099,7 @@ void testAddToListIfAbsentDoesNotDeduplicateTheExistingList(final String datasto final SubDocumentUpdate add = SubDocumentUpdate.builder() .subDocument("props.added.list") - .subDocumentValue(SubDocumentValue.of(new Integer[] {5, 1, 5})) + .subDocumentValue(SubDocumentValue.of(new Integer[]{5, 1, 5})) .build(); final Query query = Query.builder().build(); @@ -3114,7 +3113,7 @@ void testAddToListIfAbsentDoesNotDeduplicateTheExistingList(final String datasto SubDocumentUpdate.builder() .subDocument("props.added.list") .operator(ADD_TO_LIST_IF_ABSENT) - .subDocumentValue(SubDocumentValue.of(new Integer[] {3, 1, 4})) + .subDocumentValue(SubDocumentValue.of(new Integer[]{3, 1, 4})) .build(); final List new_updates = List.of(remove); @@ -3139,7 +3138,7 @@ void testSameHierarchyUpdateThrowsException(final String datastoreName) throws I final SubDocumentUpdate add = SubDocumentUpdate.builder() .subDocument("props.added.list") - .subDocumentValue(SubDocumentValue.of(new Integer[] {5, 1, 5})) + .subDocumentValue(SubDocumentValue.of(new Integer[]{5, 1, 5})) .build(); final Query query = Query.builder().build(); @@ -3176,7 +3175,7 @@ void testAddOperatorThrowExceptionForNonNumericValue(final String datastoreName) SubDocumentUpdate.builder() .subDocument("props.added.list") .operator(ADD) - .subDocumentValue(SubDocumentValue.of(new Integer[] {5, 1, 5})) + .subDocumentValue(SubDocumentValue.of(new Integer[]{5, 1, 5})) .build(); final Query query_addList = Query.builder().build(); final List updates_addList = List.of(addList); @@ -3189,9 +3188,9 @@ void testAddOperatorThrowExceptionForNonNumericValue(final String datastoreName) .operator(ADD) .subDocumentValue( SubDocumentValue.of( - new Document[] { - new JSONDocument(Map.of("name", "Pluto")), - new JSONDocument(Map.of("name", "Mars")) + new Document[]{ + new JSONDocument(Map.of("name", "Pluto")), + new JSONDocument(Map.of("name", "Mars")) })) .build(); final Query query_addObject = Query.builder().build(); @@ -5013,6 +5012,167 @@ void testNotExistsFilterOnUnnestJsonbArray(String dataStoreName) { } assertEquals(7, count); } + + @Nested + class FlatCollectionUnnestWithInFilterTests { + + @ParameterizedTest + @ArgumentsSource(PostgresProvider.class) + void testInFilterWithUnnestOnTopLevelArray(String dataStoreName) { + Datastore datastore = datastoreMap.get(dataStoreName); + Collection flatCollection = + datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); + + for (boolean preserveNullAndEmptyArrays : List.of(true, false)) { + Query unnestQuery = + Query.builder() + .addSelection(IdentifierExpression.of("item")) + .addSelection(ArrayIdentifierExpression.of("tags")) + .addFromClause( + UnnestExpression.of(ArrayIdentifierExpression.of("tags", ArrayType.TEXT), + preserveNullAndEmptyArrays)) + // Should return unnested tag elements that match 'hygiene' OR 'grooming' + .setFilter( + RelationalExpression.of( + ArrayIdentifierExpression.of("tags", ArrayType.TEXT), + IN, + ConstantExpression.ofStrings(List.of("hygiene", "grooming")))) + .build(); + + //this query will first unnest "tags" array and keep rows that have null and empty arrays. It'll then filter those rows for which the + // unnested tag is either hygiene or grooming. We have a total of 5 rows that'll match this filter + Iterator results = flatCollection.find(unnestQuery); + + int count = 0; + while (results.hasNext()) { + Document doc = results.next(); + Assertions.assertNotNull(doc); + count++; + } + assertEquals(5, count, "Should return at least one unnested tag matching the filter"); + } + } + + @ParameterizedTest + @ArgumentsSource(PostgresProvider.class) + void testNotInFilterWithUnnestOnTopLevelArray(String dataStoreName) { + Datastore datastore = datastoreMap.get(dataStoreName); + Collection flatCollection = + datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); + + for (boolean preserveNullAndEmptyArrays : List.of(true, false)) { + Query unnestQuery = + Query.builder() + .addSelection(IdentifierExpression.of("item")) + .addSelection(ArrayIdentifierExpression.of("tags")) + .addFromClause( + UnnestExpression.of(ArrayIdentifierExpression.of("tags", ArrayType.TEXT), + preserveNullAndEmptyArrays)) + .setFilter( + RelationalExpression.of( + ArrayIdentifierExpression.of("tags", ArrayType.TEXT), + NOT_IN, + ConstantExpression.ofStrings(List.of("hygiene", "grooming")))) + .build(); + //this query will first unnest "tags" array and keep rows that have null and empty arrays. unnest() on empty and null arrays returns NULL which is then + // included in the result set (as the predicate contains tags_unnested == NULL OR ...) + + Iterator results = flatCollection.find(unnestQuery); + + int count = 0; + while (results.hasNext()) { + Document doc = results.next(); + Assertions.assertNotNull(doc); + count++; + } + assertEquals(preserveNullAndEmptyArrays ? 22 : 20, count, + "Should return unnested tags not matching the filter"); + } + } + + @ParameterizedTest + @ArgumentsSource(PostgresProvider.class) + void testInFilterWithUnnestOnJsonbArray(String dataStoreName) throws Exception { + Datastore datastore = datastoreMap.get(dataStoreName); + Collection flatCollection = + datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); + + Query unnestQuery = + Query.builder() + .addSelection(IdentifierExpression.of("item")) + .addSelection( + JsonIdentifierExpression.of("props", JsonFieldType.STRING_ARRAY, "source-loc")) + .addFromClause( + UnnestExpression.of( + JsonIdentifierExpression.of( + "props", JsonFieldType.STRING_ARRAY, "source-loc"), + true)) + // Should return unnested source-loc elements that match 'warehouse-A' OR 'store-1' + .setFilter( + RelationalExpression.of( + JsonIdentifierExpression.of( + "props", JsonFieldType.STRING_ARRAY, "source-loc"), + IN, + ConstantExpression.ofStrings(List.of("warehouse-A", "store-1")))) + .build(); + + Iterator resultIterator = flatCollection.find(unnestQuery); + + int count = 0; + while (resultIterator.hasNext()) { + Document doc = resultIterator.next(); + Assertions.assertNotNull(doc); + // Parse JSON to extract the unnested value + JsonNode json = new ObjectMapper().readTree(doc.toJson()); + // The unnested value is aliased as "props.source-loc" + JsonNode locationNode = json.get("props.source-loc"); + count++; + } + + assertEquals(2, count, "Should return at least 2 unnested locations matching the filter"); + } + + @ParameterizedTest + @ArgumentsSource(PostgresProvider.class) + void testNotInFilterOnUnnestedJsonbArray(String dataStoreName) throws Exception { + Datastore datastore = datastoreMap.get(dataStoreName); + Collection flatCollection = + datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); + + Query unnestQuery = + Query.builder() + .addSelection(IdentifierExpression.of("item")) + .addSelection( + JsonIdentifierExpression.of("props", JsonFieldType.STRING_ARRAY, "source-loc")) + .addFromClause( + UnnestExpression.of( + JsonIdentifierExpression.of( + "props", JsonFieldType.STRING_ARRAY, "source-loc"), + true)) + // Should return unnested source-loc elements that DO NOT match 'warehouse-A' + .setFilter( + RelationalExpression.of( + JsonIdentifierExpression.of( + "props", JsonFieldType.STRING_ARRAY, "source-loc"), + NOT_IN, + ConstantExpression.ofStrings(List.of("warehouse-A")))) + .build(); + + Iterator resultIterator = flatCollection.find(unnestQuery); + + int count = 0; + while (resultIterator.hasNext()) { + Document doc = resultIterator.next(); + Assertions.assertNotNull(doc); + // Parse JSON to extract the unnested value + JsonNode json = new ObjectMapper().readTree(doc.toJson()); + JsonNode locationNode = json.get("props.source-loc"); + count++; + } + // Should NOT contain 'warehouse-A' + assertEquals(12, count, "Should return unnested locations not matching the filter"); + } + } } @Nested diff --git a/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/parser/filter/PostgresInRelationalFilterParserJsonArray.java b/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/parser/filter/PostgresInRelationalFilterParserJsonArray.java index a0b8b2ad..23391791 100644 --- a/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/parser/filter/PostgresInRelationalFilterParserJsonArray.java +++ b/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/parser/filter/PostgresInRelationalFilterParserJsonArray.java @@ -22,6 +22,9 @@ * *

This checks if the JSON array contains ANY of the provided values, using efficient JSONB * containment instead of defensive type checking. + * + *

Special case: If the JSONB array field has been unnested, each row contains a scalar value + * (not an array), so we use scalar IN syntax instead of the @> containment operator. */ public class PostgresInRelationalFilterParserJsonArray implements PostgresInRelationalFilterParserInterface { @@ -42,11 +45,74 @@ public String parse( new IllegalStateException( "JsonFieldType must be present - this should have been caught by the selector")); - return prepareFilterStringForInOperator( + // Check if this field has been unnested - if so, treat it as a scalar + String fieldName = jsonExpr.getName(); + if (context.getPgColumnNames().containsKey(fieldName)) { + // Field is unnested - each element is now a scalar, not an array + // Use scalar IN operator instead of JSONB containment + return prepareFilterStringForScalarInOperator( + parsedLhs, parsedRhs, context.getParamsBuilder()); + } + + // Field is NOT unnested - use JSONB containment logic + return prepareFilterStringForArrayInOperator( parsedLhs, parsedRhs, fieldType, context.getParamsBuilder()); } - private String prepareFilterStringForInOperator( + /** + * Generates SQL for scalar IN operator (used when JSONB array field has been unnested). Example: + * "props_dot_source-loc" IN (?::jsonb, ?::jsonb) + * + *

Note: After unnesting with jsonb_array_elements(), each row contains a JSONB scalar value. + * We cast the parameters to jsonb for direct JSONB-to-JSONB comparison, which works for all JSONB + * types (strings, numbers, booleans, objects). + */ + private String prepareFilterStringForScalarInOperator( + final String parsedLhs, + final Iterable parsedRhs, + final Params.Builder paramsBuilder) { + + String placeholders = + StreamSupport.stream(parsedRhs.spliterator(), false) + .map( + value -> { + // Add the value as a JSONB-formatted string + // For strings, this needs to be JSON-quoted (e.g., "warehouse-A" becomes + // "\"warehouse-A\"") + String jsonValue = convertToJsonString(value); + paramsBuilder.addObjectParam(jsonValue); + return "?::jsonb"; + }) + .collect(Collectors.joining(", ")); + + // Direct JSONB comparison - no text conversion needed + return String.format("%s IN (%s)", parsedLhs, placeholders); + } + + /** + * Converts a Java value to its JSON string representation for JSONB casting. Strings are quoted, + * numbers/booleans are not. + */ + private String convertToJsonString(Object value) { + if (value == null) { + return "null"; + } else if (value instanceof String) { + // JSON strings must be quoted + return "\"" + value.toString().replace("\"", "\\\"") + "\""; + } else if (value instanceof Number || value instanceof Boolean) { + // Numbers and booleans are not quoted in JSON + return value.toString(); + } else { + // For other types, assume they're already JSON-formatted or treat as string + return "\"" + value.toString().replace("\"", "\\\"") + "\""; + } + } + + /** + * Generates SQL for JSONB containment operator (used for non-unnested JSONB array fields). + * Example: document->'tags' @> jsonb_build_array(?::text) + */ + private String prepareFilterStringForArrayInOperator( final String parsedLhs, final Iterable parsedRhs, final JsonFieldType fieldType, diff --git a/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/parser/filter/nonjson/field/PostgresInRelationalFilterParserArrayField.java b/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/parser/filter/nonjson/field/PostgresInRelationalFilterParserArrayField.java index b30446f8..a120ac14 100644 --- a/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/parser/filter/nonjson/field/PostgresInRelationalFilterParserArrayField.java +++ b/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/parser/filter/nonjson/field/PostgresInRelationalFilterParserArrayField.java @@ -2,6 +2,7 @@ import java.util.stream.Collectors; import java.util.stream.StreamSupport; +import org.hypertrace.core.documentstore.expression.impl.ArrayIdentifierExpression; import org.hypertrace.core.documentstore.expression.impl.RelationalExpression; import org.hypertrace.core.documentstore.postgres.Params; import org.hypertrace.core.documentstore.postgres.query.v1.parser.filter.PostgresInRelationalFilterParserInterface; @@ -16,6 +17,9 @@ * *

Example: tags IN ('hygiene', 'premium') translates to: tags && ARRAY['hygiene', * 'premium']::text[] + * + *

Special case: If the array field has been unnested, each row contains a scalar value (not an + * array), so we use scalar IN syntax instead of the array overlap operator. */ public class PostgresInRelationalFilterParserArrayField implements PostgresInRelationalFilterParserInterface { @@ -27,13 +31,49 @@ public String parse( final String parsedLhs = expression.getLhs().accept(context.lhsParser()); final Iterable parsedRhs = expression.getRhs().accept(context.rhsParser()); - String arrayTypeCast = expression.getLhs().accept(new PostgresArrayTypeExtractor()); + // Check if this field has been unnested - if so, treat it as a scalar + ArrayIdentifierExpression arrayExpr = (ArrayIdentifierExpression) expression.getLhs(); + String fieldName = arrayExpr.getName(); + if (context.getPgColumnNames().containsKey(fieldName)) { + // Field is unnested - each element is now a scalar, not an array + // Use scalar IN operator instead of array overlap + return prepareFilterStringForScalarInOperator( + parsedLhs, parsedRhs, context.getParamsBuilder()); + } - return prepareFilterStringForInOperator( + // Field is NOT unnested - use array overlap logic + String arrayTypeCast = expression.getLhs().accept(new PostgresArrayTypeExtractor()); + return prepareFilterStringForArrayInOperator( parsedLhs, parsedRhs, arrayTypeCast, context.getParamsBuilder()); } - private String prepareFilterStringForInOperator( + /** + * Generates SQL for scalar IN operator (used when array field has been unnested). Example: + * "tags_unnested" IN (?, ?, ?) + */ + private String prepareFilterStringForScalarInOperator( + final String parsedLhs, + final Iterable parsedRhs, + final Params.Builder paramsBuilder) { + + String placeholders = + StreamSupport.stream(parsedRhs.spliterator(), false) + .map( + value -> { + paramsBuilder.addObjectParam(value); + return "?"; + }) + .collect(Collectors.joining(", ")); + + // Scalar IN operator for unnested array elements + return String.format("%s IN (%s)", parsedLhs, placeholders); + } + + /** + * Generates SQL for array overlap operator (used for non-unnested array fields). Example: "tags" + * && ARRAY[?, ?]::text[] + */ + private String prepareFilterStringForArrayInOperator( final String parsedLhs, final Iterable parsedRhs, final String arrayType, diff --git a/document-store/src/test/java/org/hypertrace/core/documentstore/postgres/query/v1/parser/filter/PostgresNotExistsRelationalFilterParserTest.java b/document-store/src/test/java/org/hypertrace/core/documentstore/postgres/query/v1/parser/filter/PostgresNotExistsRelationalFilterParserTest.java index c28d3b68..ecafc8ca 100644 --- a/document-store/src/test/java/org/hypertrace/core/documentstore/postgres/query/v1/parser/filter/PostgresNotExistsRelationalFilterParserTest.java +++ b/document-store/src/test/java/org/hypertrace/core/documentstore/postgres/query/v1/parser/filter/PostgresNotExistsRelationalFilterParserTest.java @@ -68,7 +68,7 @@ void testParse_arrayField_rhsTrue() { @Test void testParse_jsonbArrayField_rhsFalse() { - // Test NOT_EXISTS on JSONB array with RHS = false + // Test NOT_EXISTS on JSONB array with RHS = falsei JsonIdentifierExpression lhs = JsonIdentifierExpression.of("props", JsonFieldType.STRING_ARRAY, "colors"); ConstantExpression rhs = ConstantExpression.of(false); From 7d965bea69dff0915c0525ad45f988e1ca34c427 Mon Sep 17 00:00:00 2001 From: Prashant Pandey Date: Tue, 25 Nov 2025 23:31:14 +0530 Subject: [PATCH 6/9] Refactor test cases --- .../documentstore/DocStoreQueryV1Test.java | 2449 +++++++++-------- .../query/pg_flat_collection_insert.json | 20 +- 2 files changed, 1365 insertions(+), 1104 deletions(-) diff --git a/document-store/src/integrationTest/java/org/hypertrace/core/documentstore/DocStoreQueryV1Test.java b/document-store/src/integrationTest/java/org/hypertrace/core/documentstore/DocStoreQueryV1Test.java index e9c83a89..e012257d 100644 --- a/document-store/src/integrationTest/java/org/hypertrace/core/documentstore/DocStoreQueryV1Test.java +++ b/document-store/src/integrationTest/java/org/hypertrace/core/documentstore/DocStoreQueryV1Test.java @@ -55,7 +55,6 @@ import static org.hypertrace.core.documentstore.utils.Utils.assertDocsAndSizeEqualWithoutOrder; import static org.hypertrace.core.documentstore.utils.Utils.convertJsonToMap; import static org.hypertrace.core.documentstore.utils.Utils.readFileFromResource; -import static org.junit.Assert.assertNotNull; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotEquals; @@ -125,6 +124,7 @@ import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Nested; import org.junit.jupiter.api.extension.ExtensionContext; import org.junit.jupiter.params.ParameterizedTest; @@ -206,6 +206,7 @@ private static void createFlatCollectionSchema( + "\"price\" INTEGER," + "\"quantity\" INTEGER," + "\"date\" TIMESTAMPTZ," + + "\"in_stock\" BOOLEAN," + "\"tags\" TEXT[]," + "\"categoryTags\" TEXT[]," + "\"props\" JSONB," @@ -320,7 +321,7 @@ public Stream provideArguments(final ExtensionContext context) { Arguments.of(POSTGRES_STORE, "WITH_TYPE"), // ArrayIdentifierExpression WITH ArrayType Arguments.of( POSTGRES_STORE, "WITHOUT_TYPE") // ArrayIdentifierExpression WITHOUT ArrayType - ); + ); } } @@ -2025,7 +2026,8 @@ public void testAtomicCreateOrReplace(final String datastoreName) .collect(toUnmodifiableList()); assertEquals(1, documents.size()); - @SuppressWarnings("unchecked") final Map mapping = + @SuppressWarnings("unchecked") + final Map mapping = new ObjectMapper().readValue(documents.get(0).toJson(), Map.class); assertTrue( (long) mapping.get(DocStoreConstants.LAST_UPDATED_TIME) @@ -2693,25 +2695,25 @@ void testUpdateWithAllOperators(final String datastoreName) throws IOException { SubDocumentUpdate.builder() .subDocument("props.added.set") .operator(ADD_TO_LIST_IF_ABSENT) - .subDocumentValue(SubDocumentValue.of(new Integer[]{5, 1, 5})) + .subDocumentValue(SubDocumentValue.of(new Integer[] {5, 1, 5})) .build(); final SubDocumentUpdate another_add = SubDocumentUpdate.builder() .subDocument("props.planets") .operator(ADD_TO_LIST_IF_ABSENT) - .subDocumentValue(SubDocumentValue.of(new String[]{"Neptune", "Pluto"})) + .subDocumentValue(SubDocumentValue.of(new String[] {"Neptune", "Pluto"})) .build(); final SubDocumentUpdate append = SubDocumentUpdate.builder() .subDocument("props.appended.list") .operator(APPEND_TO_LIST) - .subDocumentValue(SubDocumentValue.of(new Integer[]{1, 2})) + .subDocumentValue(SubDocumentValue.of(new Integer[] {1, 2})) .build(); final SubDocumentUpdate remove = SubDocumentUpdate.builder() .subDocument("props.removed.list") .operator(REMOVE_ALL_FROM_LIST) - .subDocumentValue(SubDocumentValue.of(new String[]{"Hello"})) + .subDocumentValue(SubDocumentValue.of(new String[] {"Hello"})) .build(); final SubDocumentUpdate increment = SubDocumentUpdate.builder() @@ -2745,19 +2747,19 @@ void testUpdateWithAllOperators(final String datastoreName) throws IOException { SubDocumentUpdate.builder() .subDocument("props.added.set") .operator(ADD_TO_LIST_IF_ABSENT) - .subDocumentValue(SubDocumentValue.of(new Integer[]{3, 1, 1000})) + .subDocumentValue(SubDocumentValue.of(new Integer[] {3, 1, 1000})) .build(); final SubDocumentUpdate append_new = SubDocumentUpdate.builder() .subDocument("props.appended.list") .operator(APPEND_TO_LIST) - .subDocumentValue(SubDocumentValue.of(new Integer[]{8, 2})) + .subDocumentValue(SubDocumentValue.of(new Integer[] {8, 2})) .build(); final SubDocumentUpdate remove_new = SubDocumentUpdate.builder() .subDocument("props.planets") .operator(REMOVE_ALL_FROM_LIST) - .subDocumentValue(SubDocumentValue.of(new String[]{"Pluto", "Mars"})) + .subDocumentValue(SubDocumentValue.of(new String[] {"Pluto", "Mars"})) .build(); final SubDocumentUpdate decrement = SubDocumentUpdate.builder() @@ -2904,10 +2906,10 @@ void testUpdateWithAllOperatorsOnObject(final String datastoreName) throws IOExc .operator(ADD_TO_LIST_IF_ABSENT) .subDocumentValue( SubDocumentValue.of( - new Document[]{ - new JSONDocument(Map.of("key", 1)), - new JSONDocument(Map.of("key", 2)), - new JSONDocument(Map.of("key", 1)) + new Document[] { + new JSONDocument(Map.of("key", 1)), + new JSONDocument(Map.of("key", 2)), + new JSONDocument(Map.of("key", 1)) })) .build(); final SubDocumentUpdate another_add = @@ -2916,9 +2918,9 @@ void testUpdateWithAllOperatorsOnObject(final String datastoreName) throws IOExc .operator(ADD_TO_LIST_IF_ABSENT) .subDocumentValue( SubDocumentValue.of( - new Document[]{ - new JSONDocument(Map.of("name", "Neptune")), - new JSONDocument(Map.of("name", "Pluto")) + new Document[] { + new JSONDocument(Map.of("name", "Neptune")), + new JSONDocument(Map.of("name", "Pluto")) })) .build(); final SubDocumentUpdate append = @@ -2927,8 +2929,8 @@ void testUpdateWithAllOperatorsOnObject(final String datastoreName) throws IOExc .operator(APPEND_TO_LIST) .subDocumentValue( SubDocumentValue.of( - new Document[]{ - new JSONDocument(Map.of("key", 1)), new JSONDocument(Map.of("key", 2)) + new Document[] { + new JSONDocument(Map.of("key", 1)), new JSONDocument(Map.of("key", 2)) })) .build(); final SubDocumentUpdate remove = @@ -2936,7 +2938,7 @@ void testUpdateWithAllOperatorsOnObject(final String datastoreName) throws IOExc .subDocument("props.removed.list") .operator(REMOVE_ALL_FROM_LIST) .subDocumentValue( - SubDocumentValue.of(new Document[]{new JSONDocument(Map.of("Hello", "world!"))})) + SubDocumentValue.of(new Document[] {new JSONDocument(Map.of("Hello", "world!"))})) .build(); final Query query = Query.builder().build(); @@ -2953,7 +2955,7 @@ void testUpdateWithAllOperatorsOnObject(final String datastoreName) throws IOExc .subDocument("props.sales") .operator(SET) .subDocumentValue( - SubDocumentValue.of(new Document[]{new JSONDocument(Map.of("count", 789))})) + SubDocumentValue.of(new Document[] {new JSONDocument(Map.of("count", 789))})) .build(); final SubDocumentUpdate unset_new = SubDocumentUpdate.builder() @@ -2966,8 +2968,8 @@ void testUpdateWithAllOperatorsOnObject(final String datastoreName) throws IOExc .operator(ADD_TO_LIST_IF_ABSENT) .subDocumentValue( SubDocumentValue.of( - new Document[]{ - new JSONDocument(Map.of("key", 3)), new JSONDocument(Map.of("key", 1)) + new Document[] { + new JSONDocument(Map.of("key", 3)), new JSONDocument(Map.of("key", 1)) })) .build(); final SubDocumentUpdate append_new = @@ -2976,8 +2978,8 @@ void testUpdateWithAllOperatorsOnObject(final String datastoreName) throws IOExc .operator(APPEND_TO_LIST) .subDocumentValue( SubDocumentValue.of( - new Document[]{ - new JSONDocument(Map.of("key", 8)), new JSONDocument(Map.of("key", 2)) + new Document[] { + new JSONDocument(Map.of("key", 8)), new JSONDocument(Map.of("key", 2)) })) .build(); final SubDocumentUpdate remove_new = @@ -2986,9 +2988,9 @@ void testUpdateWithAllOperatorsOnObject(final String datastoreName) throws IOExc .operator(REMOVE_ALL_FROM_LIST) .subDocumentValue( SubDocumentValue.of( - new Document[]{ - new JSONDocument(Map.of("name", "Pluto")), - new JSONDocument(Map.of("name", "Mars")) + new Document[] { + new JSONDocument(Map.of("name", "Pluto")), + new JSONDocument(Map.of("name", "Mars")) })) .build(); @@ -3012,7 +3014,7 @@ void testRemoveFromSingletonList(final String datastoreName) throws IOException SubDocumentUpdate.builder() .subDocument("props.added.habitable_planets") .operator(SET) - .subDocumentValue(SubDocumentValue.of(new String[]{"Earth"})) + .subDocumentValue(SubDocumentValue.of(new String[] {"Earth"})) .build(); final Query query = Query.builder().build(); @@ -3032,7 +3034,7 @@ void testRemoveFromSingletonList(final String datastoreName) throws IOException SubDocumentUpdate.builder() .subDocument("props.added.habitable_planets") .operator(REMOVE_ALL_FROM_LIST) - .subDocumentValue(SubDocumentValue.of(new String[]{"Earth"})) + .subDocumentValue(SubDocumentValue.of(new String[] {"Earth"})) .build(); final List newUpdates = List.of(remove); @@ -3060,7 +3062,7 @@ void testRemoveAllOccurrencesFromIntegerList(final String datastoreName) throws SubDocumentUpdate.builder() .subDocument("props.added.list") .operator(SET) - .subDocumentValue(SubDocumentValue.of(new Integer[]{5, 1, 5})) + .subDocumentValue(SubDocumentValue.of(new Integer[] {5, 1, 5})) .build(); final Query query = Query.builder().build(); @@ -3099,7 +3101,7 @@ void testAddToListIfAbsentDoesNotDeduplicateTheExistingList(final String datasto final SubDocumentUpdate add = SubDocumentUpdate.builder() .subDocument("props.added.list") - .subDocumentValue(SubDocumentValue.of(new Integer[]{5, 1, 5})) + .subDocumentValue(SubDocumentValue.of(new Integer[] {5, 1, 5})) .build(); final Query query = Query.builder().build(); @@ -3113,7 +3115,7 @@ void testAddToListIfAbsentDoesNotDeduplicateTheExistingList(final String datasto SubDocumentUpdate.builder() .subDocument("props.added.list") .operator(ADD_TO_LIST_IF_ABSENT) - .subDocumentValue(SubDocumentValue.of(new Integer[]{3, 1, 4})) + .subDocumentValue(SubDocumentValue.of(new Integer[] {3, 1, 4})) .build(); final List new_updates = List.of(remove); @@ -3138,7 +3140,7 @@ void testSameHierarchyUpdateThrowsException(final String datastoreName) throws I final SubDocumentUpdate add = SubDocumentUpdate.builder() .subDocument("props.added.list") - .subDocumentValue(SubDocumentValue.of(new Integer[]{5, 1, 5})) + .subDocumentValue(SubDocumentValue.of(new Integer[] {5, 1, 5})) .build(); final Query query = Query.builder().build(); @@ -3175,7 +3177,7 @@ void testAddOperatorThrowExceptionForNonNumericValue(final String datastoreName) SubDocumentUpdate.builder() .subDocument("props.added.list") .operator(ADD) - .subDocumentValue(SubDocumentValue.of(new Integer[]{5, 1, 5})) + .subDocumentValue(SubDocumentValue.of(new Integer[] {5, 1, 5})) .build(); final Query query_addList = Query.builder().build(); final List updates_addList = List.of(addList); @@ -3188,9 +3190,9 @@ void testAddOperatorThrowExceptionForNonNumericValue(final String datastoreName) .operator(ADD) .subDocumentValue( SubDocumentValue.of( - new Document[]{ - new JSONDocument(Map.of("name", "Pluto")), - new JSONDocument(Map.of("name", "Mars")) + new Document[] { + new JSONDocument(Map.of("name", "Pluto")), + new JSONDocument(Map.of("name", "Mars")) })) .build(); final Query query_addObject = Query.builder().build(); @@ -3209,11 +3211,11 @@ private void assertExceptionForNonNumericValues( } @Nested - class FlatPostgresCollectionTest { + class FlatPostgresCollectionGeneralQueries { @ParameterizedTest @ArgumentsSource(PostgresProvider.class) - void testFlatPostgresCollectionFindAll(String dataStoreName) throws IOException { + void testFindAll(String dataStoreName) throws IOException { Datastore datastore = datastoreMap.get(dataStoreName); Collection flatCollection = datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); @@ -3228,329 +3230,57 @@ void testFlatPostgresCollectionFindAll(String dataStoreName) throws IOException Document doc = iterator.next(); count++; // Verify document has content (basic validation) - assertNotNull(doc); - assertNotNull(doc.toJson()); - assertTrue(doc.toJson().length() > 0); + Assertions.assertNotNull(doc); + Assertions.assertNotNull(doc.toJson()); + assertTrue(!doc.toJson().isEmpty()); assertEquals(DocumentType.FLAT, doc.getDocumentType()); } iterator.close(); - - // Should have 8 documents from the INSERT statements + // Should have 10 documents from the INSERT statements assertEquals(10, count); } @ParameterizedTest @ArgumentsSource(PostgresProvider.class) - void testFlatPostgresCollectionFilterByItem(String dataStoreName) throws IOException { - Datastore datastore = datastoreMap.get(dataStoreName); - Collection flatCollection = - datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); - - // Test filtering by item - Query itemQuery = - Query.builder() - .setFilter( - RelationalExpression.of( - IdentifierExpression.of("item"), EQ, ConstantExpression.of("Soap"))) - .build(); - - CloseableIterator soapIterator = flatCollection.find(itemQuery); - long soapCount = 0; - while (soapIterator.hasNext()) { - Document doc = soapIterator.next(); - // Verify it's a soap document by checking JSON contains "Soap" - assertTrue(doc.toJson().contains("\"Soap\"")); - soapCount++; - } - soapIterator.close(); - - // Should have 3 soap documents (IDs 1, 5, 8) - assertEquals(3, soapCount); - } - - @ParameterizedTest - @ArgumentsSource(PostgresProvider.class) - void testFlatPostgresCollectionCount(String dataStoreName) { + void testUnnestPreserveEmptyArraysFalse(String dataStoreName) throws IOException { Datastore datastore = datastoreMap.get(dataStoreName); Collection flatCollection = datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); - // Test count method - all documents - long totalCount = flatCollection.count(Query.builder().build()); - assertEquals(10, totalCount); - - // Test count with filter - soap documents only - Query soapQuery = - Query.builder() - .setFilter( - RelationalExpression.of( - IdentifierExpression.of("item"), EQ, ConstantExpression.of("Soap"))) - .build(); - long soapCountQuery = flatCollection.count(soapQuery); - assertEquals(3, soapCountQuery); - } - - /** - * Tests IN operator on flat collection fields (top-level columns and JSONB fields) with - * type-specific optimization. - * - *

Flat collection schema: - * - *

    - *
  • Top-level columns: item (TEXT), price (INTEGER), quantity (INTEGER) - *
  • JSONB column: props (with nested fields like brand, size, seller) - *
- * - *

Expected SQL patterns for top-level columns: - * - *

    - *
  • "item" IN (?, ?) - Direct column reference - *
  • "price" IN (?, ?) - Direct column reference - *
- * - *

Expected SQL patterns for JSONB fields with JsonFieldType: - * - *

    - *
  • STRING: "props" ->> 'brand' IN (?, ?) - *
  • NUMBER: CAST("props" ->> 'field' AS NUMERIC) IN (?, ?) - *
- */ - @ParameterizedTest - @ArgumentsSource(PostgresProvider.class) - void testNestedCollectionInOperatorOnJsonPrimitiveFields(String dataStoreName) { - Datastore datastore = datastoreMap.get(dataStoreName); - Collection collection = - datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); - - // Test 1: IN operator on top-level STRING column (item) - // Find documents where item is "Soap" OR "Shampoo" - // Expected SQL: "item" IN ('Soap', 'Shampoo') - Query itemInQuery = - Query.builder() - .setFilter( - RelationalExpression.of( - IdentifierExpression.of("item"), - IN, - ConstantExpression.ofStrings(List.of("Soap", "Shampoo")))) - .build(); - - long itemInCount = collection.count(itemInQuery); - assertEquals(5, itemInCount); // 3 Soap + 2 Shampoo documents - - // Test 2: IN operator on top-level NUMBER column (quantity) - // Find documents where quantity is 5 OR 10 - // Expected SQL: "quantity" IN (5, 10) - Query quantityInQuery = - Query.builder() - .setFilter( - RelationalExpression.of( - IdentifierExpression.of("quantity"), - IN, - ConstantExpression.ofNumbers(List.of(5, 10)))) - .build(); - - long quantityInCount = collection.count(quantityInQuery); - assertEquals(5, quantityInCount); // quantity=5: _id 5,6,8; quantity=10: _id 3,7 - - // Test 3: IN operator on top-level NUMBER column (price) - // Find documents where price is 5 OR 10 - // Expected SQL: "price" IN (5, 10) - Query priceInQuery = - Query.builder() - .setFilter( - RelationalExpression.of( - IdentifierExpression.of("price"), - IN, - ConstantExpression.ofNumbers(List.of(5, 10)))) - .build(); - - long priceInCount = collection.count(priceInQuery); - assertEquals(4, priceInCount); // price=10: _id 1,8; price=5: _id 3,4 - - // Test 4: IN operator on JSONB STRING field (props.brand) with type optimization - // Find documents where props.brand is "Dettol" OR "Sunsilk" - // Expected SQL: "props" ->> 'brand' IN ('Dettol', 'Sunsilk') - Query nestedInQuery = - Query.builder() - .setFilter( - RelationalExpression.of( - JsonIdentifierExpression.of("props", JsonFieldType.STRING, "brand"), - IN, - ConstantExpression.ofStrings(List.of("Dettol", "Sunsilk")))) - .build(); - - long nestedInCount = collection.count(nestedInQuery); - assertEquals(2, nestedInCount); // _id 1 (Dettol) + _id 3 (Sunsilk) - - // Test 5: NOT_IN operator on top-level STRING column (item) - // Find documents where item is NOT "Soap" - // Expected SQL: "item" NOT IN ('Soap') OR "item" IS NULL - Query itemNotInQuery = - Query.builder() - .setFilter( - RelationalExpression.of( - IdentifierExpression.of("item"), - NOT_IN, - ConstantExpression.ofStrings(List.of("Soap")))) - .build(); - - long itemNotInCount = collection.count(itemNotInQuery); - assertEquals(7, itemNotInCount); // All 10 docs minus 3 Soap docs = 7 - - // Test 6: Combined IN with other filters (AND) - // Filter: item IN ("Soap", "Shampoo") AND quantity >= 5 - Query combinedQuery = + Query unnestQuery = Query.builder() - .setFilter( - LogicalExpression.builder() - .operator(LogicalOperator.AND) - .operand( - RelationalExpression.of( - IdentifierExpression.of("item"), - IN, - ConstantExpression.ofStrings(List.of("Soap", "Shampoo")))) - .operand( - RelationalExpression.of( - IdentifierExpression.of("quantity"), GTE, ConstantExpression.of(5))) - .build()) + .addSelection(IdentifierExpression.of("tags")) + .addSelection(AggregateExpression.of(COUNT, ConstantExpression.of("*")), "count") + .addAggregation(IdentifierExpression.of("tags")) + .addFromClause(UnnestExpression.of(IdentifierExpression.of("tags"), false)) .build(); - long combinedCount = collection.count(combinedQuery); - assertTrue(combinedCount > 0, "Combined IN with >= filter should find documents"); + Iterator resultIterator = flatCollection.aggregate(unnestQuery); + assertDocsAndSizeEqualWithoutOrder( + dataStoreName, resultIterator, "query/flat_unnest_tags_response.json", 17); } - /** - * Tests querying JSONB nested fields with JsonIdentifierExpression and JsonFieldType for - * optimized SQL generation. - * - *

JsonFieldType is required for all JSONB IN/NOT_IN operations to generate optimal SQL. - * - *

Test coverage: - * - *

    - *
  • EQ operator on JSONB STRING fields with type info - *
  • IN operator on JSONB STRING fields with type info - *
  • NOT_IN operator on JSONB STRING fields with type info (includes NULL handling) - *
  • Deeply nested JSONB field access (props.seller.address.city) - *
  • Combined filters with JSONB and top-level columns - *
- */ @ParameterizedTest @ArgumentsSource(PostgresProvider.class) - void testFlatPostgresCollectionNestedFieldQuery(String dataStoreName) throws IOException { + void testUnnestPreserveEmptyArraysTrue(String dataStoreName) throws IOException { Datastore datastore = datastoreMap.get(dataStoreName); Collection flatCollection = datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); - // Test 1: EQ operator on JSONB STRING field (props.brand) with type optimization - // Expected SQL: "props" ->> 'brand' = 'Dettol' - Query brandQuery = - Query.builder() - .setFilter( - RelationalExpression.of( - JsonIdentifierExpression.of("props", JsonFieldType.STRING, "brand"), - EQ, - ConstantExpression.of("Dettol"))) - .build(); - - CloseableIterator brandIterator = flatCollection.find(brandQuery); - long brandCount = 0; - while (brandIterator.hasNext()) { - Document doc = brandIterator.next(); - // Verify it contains the expected brand - assertTrue(doc.toJson().contains("\"Dettol\"")); - brandCount++; - } - brandIterator.close(); - - // Should have 1 Dettol document (_id=1) - assertEquals(1, brandCount); - - // Test 2: Deeply nested JSONB field access (props.seller.address.city) - // Expected SQL: "props" -> 'seller' -> 'address' ->> 'city' = 'Mumbai' - Query cityQuery = - Query.builder() - .setFilter( - RelationalExpression.of( - JsonIdentifierExpression.of( - "props", JsonFieldType.STRING, "seller", "address", "city"), - EQ, - ConstantExpression.of("Mumbai"))) - .build(); - - CloseableIterator cityIterator = flatCollection.find(cityQuery); - long cityCount = 0; - while (cityIterator.hasNext()) { - Document doc = cityIterator.next(); - // Verify it contains Mumbai - assertTrue(doc.toJson().contains("\"Mumbai\"")); - cityCount++; - } - cityIterator.close(); - - // Should have 2 Mumbai documents (_id=1, _id=3) - assertEquals(2, cityCount); - - // Test 3: IN operator on JSONB STRING field with type optimization - // Expected SQL: "props" ->> 'brand' IN ('Dettol', 'Sunsilk', 'Lifebuoy') - Query brandInQuery = - Query.builder() - .setFilter( - RelationalExpression.of( - JsonIdentifierExpression.of("props", JsonFieldType.STRING, "brand"), - IN, - ConstantExpression.ofStrings(List.of("Dettol", "Sunsilk", "Lifebuoy")))) - .build(); - - long brandInCount = flatCollection.count(brandInQuery); - assertEquals(3, brandInCount); // _id=1 (Dettol), _id=3 (Sunsilk), _id=5 (Lifebuoy) - - // Test 4: Combined filter - JSONB field + top-level column - // Filter: brand = "Dettol" AND price = 10 - // Expected SQL: "props" ->> 'brand' = 'Dettol' AND "price" = 10 - Query combinedQuery = - Query.builder() - .setFilter( - LogicalExpression.builder() - .operator(LogicalOperator.AND) - .operand( - RelationalExpression.of( - JsonIdentifierExpression.of("props", JsonFieldType.STRING, "brand"), - EQ, - ConstantExpression.of("Dettol"))) - .operand( - RelationalExpression.of( - IdentifierExpression.of("price"), EQ, ConstantExpression.of(10))) - .build()) - .build(); - - long combinedCount = flatCollection.count(combinedQuery); - assertEquals(1, combinedCount); // Only _id=1 matches both conditions - - // Test 5: NOT_IN operator on JSONB STRING field with type optimization - // Find documents where brand is NOT "Dettol" (should find Sunsilk and Lifebuoy) - // Expected SQL: NOT ("props" ->> 'brand' IN ('Dettol')) OR "props" ->> 'brand' IS NULL - Query brandNotInQuery = + // Include all documents in result irrespective of tags field (LEFT JOIN) + // Counts rows after unnest: 25 (from 8 docs with tags) + 2 (from docs with NULL/empty) + Query unnestPreserveTrueQuery = Query.builder() - .setFilter( - RelationalExpression.of( - JsonIdentifierExpression.of("props", JsonFieldType.STRING, "brand"), - NOT_IN, - ConstantExpression.ofStrings(List.of("Dettol")))) + .addSelection(AggregateExpression.of(COUNT, IdentifierExpression.of("item")), "count") + .addFromClause(UnnestExpression.of(IdentifierExpression.of("tags"), true)) .build(); - long brandNotInCount = flatCollection.count(brandNotInQuery); - // Docs with brand: _id=1 (Dettol), _id=3 (Sunsilk), _id=5 (Lifebuoy) - // Docs with NULL props or no brand: _id=2, 4, 6, 7, 8, 9, 10 - // NOT_IN "Dettol" should return: 2 (Sunsilk, Lifebuoy) + 7 (NULL/missing) = 9 - assertEquals(9, brandNotInCount); - - // Test 6: Verify count methods with JSONB fields - long brandCountQuery = flatCollection.count(brandQuery); - assertEquals(1, brandCountQuery); - - long cityCountQuery = flatCollection.count(cityQuery); - assertEquals(2, cityCountQuery); + Iterator resultIterator = flatCollection.aggregate(unnestPreserveTrueQuery); + assertDocsAndSizeEqualWithoutOrder( + dataStoreName, + resultIterator, + "query/flat_unnest_preserving_empty_array_response.json", + 1); } @ParameterizedTest @@ -3694,37 +3424,6 @@ void testFlatVsNestedCollectionConsistency(String dataStoreName) throws IOExcept flatDocIterator.close(); } - /** - * Tests basic UNNEST operation on flat PostgreSQL collection with native TEXT[] arrays. - * Validates that PostgresFromTypeExpressionVisitor correctly uses unnest() instead of - * jsonb_array_elements() for native PostgreSQL arrays. Groups by tags and counts occurrences. - */ - @ParameterizedTest - @ArgumentsSource(PostgresProvider.class) - void testFlatPostgresCollectionUnnestTags(String dataStoreName) throws IOException { - Datastore datastore = datastoreMap.get(dataStoreName); - Collection flatCollection = - datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); - - Query unnestQuery = - Query.builder() - .addSelection(IdentifierExpression.of("tags")) - .addSelection(AggregateExpression.of(COUNT, ConstantExpression.of("*")), "count") - .addAggregation(IdentifierExpression.of("tags")) - .addFromClause(UnnestExpression.of(IdentifierExpression.of("tags"), false)) - .build(); - - Iterator resultIterator = flatCollection.aggregate(unnestQuery); - assertDocsAndSizeEqualWithoutOrder( - dataStoreName, resultIterator, "query/flat_unnest_tags_response.json", 17); - } - - /** - * Tests complex UNNEST operation on flat PostgreSQL collection with native TEXT[] arrays. - * Combines multiple filters (WHERE, unnest filter), aggregations (COUNT, AVG), HAVING clause, - * and ORDER BY sorting. Validates integration of PostgresFromTypeExpressionVisitor, - * PostgresUnnestFilterTypeExpressionVisitor, and PostgresFilterTypeExpressionVisitor. - */ @ParameterizedTest @ArgumentsSource(PostgresProvider.class) void testFlatPostgresCollectionUnnestWithComplexQuery(String dataStoreName) throws IOException { @@ -3769,96 +3468,6 @@ void testFlatPostgresCollectionUnnestWithComplexQuery(String dataStoreName) thro dataStoreName, resultIterator, "query/flat_unnest_complex_query_response.json", 7); } - /** - * Tests UNNEST with preserveNullAndEmptyArrays=true on flat collection. Counts rows after - * unnesting. Returns 27 rows: 25 from docs with tags (one per tag) + 2 from docs with - * NULL/empty tags. This demonstrates LEFT JOIN behavior. - */ - @ParameterizedTest - @ArgumentsSource(PostgresProvider.class) - void testFlatPostgresCollectionUnnestWithPreserveEmptyTrue(String dataStoreName) - throws IOException { - Datastore datastore = datastoreMap.get(dataStoreName); - Collection flatCollection = - datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); - - // Include all documents in result irrespective of tags field (LEFT JOIN) - // Counts rows after unnest: 25 (from 8 docs with tags) + 2 (from docs with NULL/empty) - Query unnestPreserveTrueQuery = - Query.builder() - .addSelection(AggregateExpression.of(COUNT, IdentifierExpression.of("item")), "count") - .addFromClause(UnnestExpression.of(IdentifierExpression.of("tags"), true)) - .build(); - - Iterator resultIterator = flatCollection.aggregate(unnestPreserveTrueQuery); - assertDocsAndSizeEqualWithoutOrder( - dataStoreName, - resultIterator, - "query/flat_unnest_preserving_empty_array_response.json", - 1); - } - - /** - * Tests UNNEST with filters on flat collection. Combines main WHERE filter on quantity field - * with unnest filter on tags, and preserveEmpty=false to exclude documents without tags. - */ - @ParameterizedTest - @ArgumentsSource(PostgresProvider.class) - void testFlatPostgresCollectionUnnestWithFilters(String dataStoreName) throws IOException { - Datastore datastore = datastoreMap.get(dataStoreName); - Collection flatCollection = - datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); - - Query unnestWithFiltersQuery = - Query.builder() - .addSelection(IdentifierExpression.of("item")) - .addSelection(IdentifierExpression.of("tags")) - .setFilter( - RelationalExpression.of( - IdentifierExpression.of("quantity"), GT, ConstantExpression.of(2))) - .addFromClause( - UnnestExpression.builder() - .identifierExpression(IdentifierExpression.of("tags")) - .preserveNullAndEmptyArrays(false) - .filterTypeExpression( - RelationalExpression.of( - IdentifierExpression.of("tags"), - EQ, - ConstantExpression.of("grooming"))) - .build()) - .build(); - - Iterator resultIterator = flatCollection.aggregate(unnestWithFiltersQuery); - assertDocsAndSizeEqualWithoutOrder( - dataStoreName, resultIterator, "query/flat_unnest_with_filters_response.json", 2); - } - - @ParameterizedTest - @ArgumentsSource(PostgresProvider.class) - void testFlatPostgresCollectionUnnestWithPreserveEmptyFalse(String dataStoreName) - throws IOException { - Datastore datastore = datastoreMap.get(dataStoreName); - Collection flatCollection = - datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); - - // Test UNNEST on native TEXT[] array with preserveEmptyArrays = false (INNER JOIN) - // This counts all individual tag values after unnesting - // Expected: 25 total tags (3+3+4+3+3+3+3+3 from 8 documents with non-empty tags) - // Excludes 2 documents with NULL/empty tags - Query unnestPreserveFalseQuery = - Query.builder() - .addSelection(AggregateExpression.of(COUNT, ConstantExpression.of(1)), "count") - .addFromClause(UnnestExpression.of(IdentifierExpression.of("tags"), false)) - .build(); - - Iterator resultIterator = flatCollection.aggregate(unnestPreserveFalseQuery); - assertDocsAndSizeEqualWithoutOrder( - dataStoreName, - resultIterator, - "query/flat_unnest_not_preserving_empty_array_response.json", - 1); - } - @ParameterizedTest @ArgumentsSource(PostgresProvider.class) void testFlatPostgresCollectionUnnestWithOnlyUnnestFilter(String dataStoreName) @@ -3889,10 +3498,6 @@ void testFlatPostgresCollectionUnnestWithOnlyUnnestFilter(String dataStoreName) dataStoreName, resultIterator, "query/flat_unnest_only_unnest_filter_response.json", 2); } - /** - * Tests UNNEST with ONLY main filter (no unnest filter). Covers line 65 in - * PostgresUnnestFilterTypeExpressionVisitor: only main filter exists. - */ @ParameterizedTest @ArgumentsSource(PostgresProvider.class) void testFlatPostgresCollectionUnnestWithOnlyMainFilter(String dataStoreName) @@ -3957,13 +3562,6 @@ void testFlatPostgresCollectionArrayRelationalFilter(String dataStoreName) throw dataStoreName, resultIterator, "query/flat_array_relational_filter_response.json", 3); } - /** - * Tests UNNEST operation on flat PostgreSQL collection with mixed-case column name. This test - * reproduces the case sensitivity bug where unquoted alias gets lowercased by PostgreSQL but - * quoted references preserve case, causing a column mismatch error. Field "categoryTags" → - * alias "categoryTags_unnested" → PostgreSQL lowercases to "categorytags_unnested" (if - * unquoted) but references use "categoryTags_unnested". - */ @ParameterizedTest @ArgumentsSource(PostgresProvider.class) void testFlatPostgresCollectionUnnestMixedCaseField(String dataStoreName) throws IOException { @@ -4068,51 +3666,12 @@ void testFlatPostgresCollectionBooleanArrayFilter(String dataStoreName) throws I dataStoreName, resultIterator, "query/flat_boolean_array_filter_response.json", 2); } - /** - * Tests selection of JSONB nested fields using JsonIdentifierExpression on flat collection. - * Validates selecting simple nested fields, deeply nested fields, and JSONB arrays without any - * filters. - */ @ParameterizedTest @ArgumentsSource(PostgresProvider.class) - void testFlatCollectionNestedJsonSelections(String dataStoreName) throws IOException { + void testFlatVsNestedCollectionNestedFieldSelections(String dataStoreName) throws IOException { Datastore datastore = datastoreMap.get(dataStoreName); - Collection flatCollection = - datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); - - // Test 1: Select nested STRING field from JSONB column (props.brand) - Query brandSelectionQuery = - Query.builder().addSelection(JsonIdentifierExpression.of("props", "brand")).build(); - - Iterator brandIterator = flatCollection.find(brandSelectionQuery); - assertDocsAndSizeEqualWithoutOrder( - dataStoreName, brandIterator, "query/flat_jsonb_brand_selection_response.json", 10); - - // Test 2: Select deeply nested STRING field from JSONB column (props.seller.address.city) - Query citySelectionQuery = - Query.builder() - .addSelection(JsonIdentifierExpression.of("props", "seller", "address", "city")) - .build(); - - Iterator cityIterator = flatCollection.find(citySelectionQuery); - assertDocsAndSizeEqualWithoutOrder( - dataStoreName, cityIterator, "query/flat_jsonb_city_selection_response.json", 10); - - // Test 3: Select STRING_ARRAY field from JSONB column (props.colors) - Query colorsSelectionQuery = - Query.builder().addSelection(JsonIdentifierExpression.of("props", "colors")).build(); - - Iterator colorsIterator = flatCollection.find(colorsSelectionQuery); - assertDocsAndSizeEqualWithoutOrder( - dataStoreName, colorsIterator, "query/flat_jsonb_colors_selection_response.json", 10); - } - - @ParameterizedTest - @ArgumentsSource(PostgresProvider.class) - void testFlatVsNestedCollectionNestedFieldSelections(String dataStoreName) throws IOException { - Datastore datastore = datastoreMap.get(dataStoreName); - - Collection nestedCollection = datastore.getCollection(COLLECTION_NAME); + + Collection nestedCollection = datastore.getCollection(COLLECTION_NAME); Collection flatCollection = datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); @@ -4249,474 +3808,1413 @@ void testFlatVsNestedCollectionGroupByArrayField(String dataStoreName) throws IO assertDocsAndSizeEqualWithoutOrder( dataStoreName, flatResultIterator, "query/group_by_colors_comparison_response.json", 4); } + } + + @Nested + class FlatCollectionScalarColumns { - /** - * Tests UNNEST operation on JSONB array fields in flat collections. This validates that - * jsonb_array_elements() is used for JSONB arrays (props.colors) instead of unnest() which is - * only for native arrays (tags). - */ @ParameterizedTest @ArgumentsSource(PostgresProvider.class) - void testFlatCollectionUnnestJsonbArray(String dataStoreName) throws IOException { + void testGroupBy(String dataStoreName) throws IOException { Datastore datastore = datastoreMap.get(dataStoreName); Collection flatCollection = datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); - // Test UNNEST on JSONB array field: props.colors - // Expected: Should unnest colors and count distinct items with colors - // Data: id=1 has ["Blue", "Green"], id=3 has ["Black"], id=5 has ["Orange", "Blue"] - // Total: 5 color entries from 3 items - Query unnestJsonbQuery = + // Test GROUP BY on scalar field (item) with COUNT aggregation + Query groupByQuery = Query.builder() .addSelection(IdentifierExpression.of("item")) - .addSelection(JsonIdentifierExpression.of("props", "colors")) - .addFromClause( - UnnestExpression.of(JsonIdentifierExpression.of("props", "colors"), false)) + .addSelection(AggregateExpression.of(COUNT, ConstantExpression.of("*")), "count") + .addAggregation(IdentifierExpression.of("item")) .build(); - Iterator resultIterator = flatCollection.aggregate(unnestJsonbQuery); + Iterator results = flatCollection.aggregate(groupByQuery); - long count = 0; - while (resultIterator.hasNext()) { - resultIterator.next(); - count++; + int groupCount = 0; + Map itemCounts = new HashMap<>(); + while (results.hasNext()) { + Document doc = results.next(); + JsonNode json = new ObjectMapper().readTree(doc.toJson()); + groupCount++; + + String item = json.get("item").asText(); + int count = json.get("count").asInt(); + itemCounts.put(item, count); } - // Expecting 5 results: 2 from Soap (Blue, Green), 1 from Shampoo (Black), - // 2 from Lifebuoy (Orange, Blue) - assertEquals(5, count, "Should find 5 color entries after unnesting JSONB arrays"); + assertTrue(groupCount > 0); + + // Verify Soap appears 3 times (IDs 1, 5, 8) + assertEquals(3, itemCounts.getOrDefault("Soap", 0)); } @ParameterizedTest @ArgumentsSource(PostgresProvider.class) - void testFlatCollectionGroupByJsonbScalarField(String dataStoreName) throws IOException { + void testAllRelationalOps(String dataStoreName) { Datastore datastore = datastoreMap.get(dataStoreName); Collection flatCollection = datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); - // Test GROUP BY on JSONB scalar field: props.brand - // This tests grouping by a nested string field in a JSONB column - // Data: 3 rows have brands (Dettol, Sunsilk, Lifebuoy), 7 rows have NULL/missing brand - // GROUP BY on JSONB fields groups NULL values together (standard SQL behavior) - Query groupByBrandQuery = + // Test NEQ (Not Equal) on string field + Query neqQuery = Query.builder() - .addSelection(JsonIdentifierExpression.of("props", "brand")) - .addSelection(AggregateExpression.of(COUNT, ConstantExpression.of(1)), "count") - .addAggregation(JsonIdentifierExpression.of("props", "brand")) - .addSort(JsonIdentifierExpression.of("props", "brand"), ASC) + .setFilter( + RelationalExpression.of( + IdentifierExpression.of("item"), NEQ, ConstantExpression.of("Soap"))) .build(); + long neqCount = flatCollection.count(neqQuery); + assertEquals(7, neqCount); // 10 total - 3 Soap = 7 - Iterator resultIterator = flatCollection.aggregate(groupByBrandQuery); - assertDocsAndSizeEqualWithoutOrder( - dataStoreName, resultIterator, "query/flat_jsonb_group_by_brand_test_response.json", 4); + // Test LT (Less Than) on integer field + Query ltQuery = + Query.builder() + .setFilter( + RelationalExpression.of( + IdentifierExpression.of("price"), LT, ConstantExpression.of(10))) + .build(); + long ltCount = flatCollection.count(ltQuery); + assertTrue(ltCount > 0); // Should have prices < 10 + + // Test LTE (Less Than or Equal) on integer field + Query lteQuery = + Query.builder() + .setFilter( + RelationalExpression.of( + IdentifierExpression.of("price"), LTE, ConstantExpression.of(10))) + .build(); + long lteCount = flatCollection.count(lteQuery); + assertTrue(lteCount >= ltCount); // LTE should include LT results + + // Test GTE (Greater Than or Equal) on integer field + Query gteQuery = + Query.builder() + .setFilter( + RelationalExpression.of( + IdentifierExpression.of("quantity"), GTE, ConstantExpression.of(5))) + .build(); + long gteCount = flatCollection.count(gteQuery); + assertTrue(gteCount > 0); + + // Test IN operator on string field + Query inQuery = + Query.builder() + .setFilter( + RelationalExpression.of( + IdentifierExpression.of("item"), + IN, + ConstantExpression.ofStrings(List.of("Soap", "Mirror", "Comb")))) + .build(); + long inCount = flatCollection.count(inQuery); + assertEquals(6, inCount); // 3 Soap + 1 Mirror + 2 Comb = 6 + + // Test NOT_IN operator on string field + Query notInQuery = + Query.builder() + .setFilter( + RelationalExpression.of( + IdentifierExpression.of("item"), + NOT_IN, + ConstantExpression.ofStrings(List.of("Soap", "Mirror")))) + .build(); + long notInCount = flatCollection.count(notInQuery); + assertEquals(6, notInCount); // 10 total - 3 Soap - 1 Mirror = 6 + + // Test LIKE operator on string field (pattern matching) + Query likeQuery = + Query.builder() + .setFilter( + RelationalExpression.of( + IdentifierExpression.of("item"), LIKE, ConstantExpression.of(".*amp.*"))) + .build(); + long likeCount = flatCollection.count(likeQuery); + assertEquals(2, likeCount); // Should match "Shampoo" (IDs 3, 4) + + // Test STARTS_WITH operator on string field + Query startsWithQuery = + Query.builder() + .setFilter( + RelationalExpression.of( + IdentifierExpression.of("item"), STARTS_WITH, ConstantExpression.of("S"))) + .build(); + long startsWithCount = flatCollection.count(startsWithQuery); + assertEquals(5, startsWithCount); // "Soap" (3) + "Shampoo" (2) = 5 + + // Test combined operators with AND logic + Query combinedQuery = + Query.builder() + .setFilter( + LogicalExpression.builder() + .operator(LogicalOperator.AND) + .operand( + RelationalExpression.of( + IdentifierExpression.of("price"), GTE, ConstantExpression.of(5))) + .operand( + RelationalExpression.of( + IdentifierExpression.of("quantity"), LTE, ConstantExpression.of(10))) + .operand( + RelationalExpression.of( + IdentifierExpression.of("in_stock"), EQ, ConstantExpression.of(true))) + .build()) + .build(); + long combinedCount = flatCollection.count(combinedQuery); + assertTrue(combinedCount > 0); } @ParameterizedTest @ArgumentsSource(PostgresProvider.class) - void testFlatCollectionGroupByJsonbArrayField(String dataStoreName) throws IOException { + void testSorting(String dataStoreName) throws IOException { Datastore datastore = datastoreMap.get(dataStoreName); Collection flatCollection = datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); - // Test GROUP BY on JSONB array field: props.colors with UNNEST - // This tests grouping by individual elements (after unnesting) in a JSONB array - // Behavior should match nested collections: UNNEST flattens array, GROUP BY groups elements - // Data: Row 1 has ["Blue", "Green"], Row 3 has ["Black"], Row 5 has ["Orange", "Blue"] - // Expected: Blue (2), Green (1), Black (1), Orange (1) - 4 distinct color groups - Query groupByColorsQuery = + // Test 1: Sort by string field ASC + Query sortItemAscQuery = Query.builder() - .addSelection(JsonIdentifierExpression.of("props", "colors"), "color") - .addSelection(AggregateExpression.of(COUNT, ConstantExpression.of(1)), "count") - .addFromClause( - UnnestExpression.of(JsonIdentifierExpression.of("props", "colors"), false)) - .addAggregation(JsonIdentifierExpression.of("props", "colors")) - .addSort(JsonIdentifierExpression.of("props", "colors"), ASC) + .addSelection(IdentifierExpression.of("item")) + .addSelection(IdentifierExpression.of("price")) + .addSort(IdentifierExpression.of("item"), ASC) .build(); - Iterator resultIterator = flatCollection.aggregate(groupByColorsQuery); - assertDocsAndSizeEqualWithoutOrder( - dataStoreName, resultIterator, "query/flat_jsonb_group_by_colors_test_response.json", 4); + Iterator sortItemAscResults = flatCollection.find(sortItemAscQuery); + String previousItem = null; + int count = 0; + while (sortItemAscResults.hasNext()) { + Document doc = sortItemAscResults.next(); + JsonNode json = new ObjectMapper().readTree(doc.toJson()); + String currentItem = json.get("item").asText(); + if (previousItem != null) { + assertTrue( + currentItem.compareTo(previousItem) >= 0, + "Items should be sorted in ascending order"); + } + previousItem = currentItem; + count++; + } + assertEquals(10, count); + + // Test 2: Sort by integer field DESC + Query sortPriceDescQuery = + Query.builder() + .addSelection(IdentifierExpression.of("item")) + .addSelection(IdentifierExpression.of("price")) + .addSort(IdentifierExpression.of("price"), DESC) + .build(); + + Iterator sortPriceDescResults = flatCollection.find(sortPriceDescQuery); + Integer previousPrice = null; + count = 0; + while (sortPriceDescResults.hasNext()) { + Document doc = sortPriceDescResults.next(); + JsonNode json = new ObjectMapper().readTree(doc.toJson()); + int currentPrice = json.get("price").asInt(); + if (previousPrice != null) { + assertTrue(currentPrice <= previousPrice, "Prices should be sorted in descending order"); + } + previousPrice = currentPrice; + count++; + } + assertEquals(10, count); + + // Test 3: Multi-level sort (item ASC, then price DESC) + Query multiSortQuery = + Query.builder() + .addSelection(IdentifierExpression.of("item")) + .addSelection(IdentifierExpression.of("price")) + .addSort(IdentifierExpression.of("item"), ASC) + .addSort(IdentifierExpression.of("price"), DESC) + .build(); + + Iterator multiSortResults = flatCollection.find(multiSortQuery); + String prevItem = null; + Integer prevPrice = null; + while (multiSortResults.hasNext()) { + Document doc = multiSortResults.next(); + JsonNode json = new ObjectMapper().readTree(doc.toJson()); + String currentItem = json.get("item").asText(); + int currentPrice = json.get("price").asInt(); + + if (prevItem != null) { + if (currentItem.equals(prevItem)) { + // Same item, price should be descending + assertTrue(currentPrice <= prevPrice, "Within same item, price should descend"); + } else { + // Different item, should be ascending + assertTrue(currentItem.compareTo(prevItem) >= 0, "Items should be sorted ascending"); + } + } + prevItem = currentItem; + prevPrice = currentPrice; + } } @ParameterizedTest @ArgumentsSource(PostgresProvider.class) - void testFlatCollectionArrayAnyOnJsonbArray(String dataStoreName) { + void testNumericAggregations(String dataStoreName) throws IOException { Datastore datastore = datastoreMap.get(dataStoreName); Collection flatCollection = datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); - // Test ArrayRelationalFilterExpression.ANY on JSONB array (props.colors) - // This uses jsonb_array_elements() internally - Query jsonbArrayQuery = + // Test SUM, AVG, MIN, MAX, COUNT on integer fields + Query aggQuery = Query.builder() - .addSelection(IdentifierExpression.of("item")) - .setFilter( - ArrayRelationalFilterExpression.builder() - .operator(ArrayOperator.ANY) - .filter( - RelationalExpression.of( - JsonIdentifierExpression.of("props", "colors"), - EQ, - ConstantExpression.of("Blue"))) - .build()) + .addSelection(AggregateExpression.of(SUM, IdentifierExpression.of("price")), "sum") + .addSelection(AggregateExpression.of(AVG, IdentifierExpression.of("price")), "avg") + .addSelection(AggregateExpression.of(MIN, IdentifierExpression.of("price")), "min") + .addSelection(AggregateExpression.of(MAX, IdentifierExpression.of("price")), "max") + .addSelection( + AggregateExpression.of(COUNT, IdentifierExpression.of("price")), "count") .build(); - long count = flatCollection.count(jsonbArrayQuery); - // ids 1 and 5 have "Blue" in their colors array - assertEquals(2, count, "Should find 2 items with 'Blue' color (ids 1, 5)"); - } + Iterator aggResults = flatCollection.aggregate(aggQuery); + assertTrue(aggResults.hasNext()); - /** - * Tests for relational operators on JSONB nested fields in flat collections. Tests: CONTAINS, - * NOT_CONTAINS, IN, NOT_IN, EQ, NEQ, LT, GT on JSONB columns. - */ - @Nested - class FlatCollectionJsonbRelationalOperatorTest { - - /** - * Tests CONTAINS and NOT_CONTAINS operators on JSONB array fields. - CONTAINS: finds - * documents where array contains the value - NOT_CONTAINS: finds documents where array - * doesn't contain the value (including NULL) - */ - @ParameterizedTest - @ArgumentsSource(PostgresProvider.class) - void testJsonbArrayContainsOperators(String dataStoreName) { - Datastore datastore = datastoreMap.get(dataStoreName); - Collection flatCollection = - datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); - - // Test 1: CONTAINS - props.colors CONTAINS "Green" - // Expected: 1 document (id=1, Dettol Soap has ["Green", "White"]) - Query containsQuery = - Query.builder() - .setFilter( - RelationalExpression.of( - JsonIdentifierExpression.of("props", JsonFieldType.STRING_ARRAY, "colors"), - CONTAINS, - ConstantExpression.of("Green"))) - .build(); + Document aggDoc = aggResults.next(); + JsonNode json = new ObjectMapper().readTree(aggDoc.toJson()); - long containsCount = flatCollection.count(containsQuery); - assertEquals(1, containsCount, "CONTAINS: Should find 1 document with Green color"); + // Validate aggregation results + assertTrue(json.get("sum").asDouble() > 0, "SUM should be positive"); + assertTrue(json.get("avg").asDouble() > 0, "AVG should be positive"); + assertTrue(json.get("min").asDouble() > 0, "MIN should be positive"); + assertTrue(json.get("max").asDouble() > 0, "MAX should be positive"); + assertEquals(10, json.get("count").asInt(), "COUNT should be 10"); - // Test 2: NOT_CONTAINS - props.colors NOT_CONTAINS "Green" AND _id <= 8 - // Expected: 7 documents (all except id=1 which has Green, limited to first 8) - Query notContainsQuery = - Query.builder() - .setFilter( - LogicalExpression.builder() - .operator(LogicalOperator.AND) - .operand( - RelationalExpression.of( - JsonIdentifierExpression.of( - "props", JsonFieldType.STRING_ARRAY, "colors"), - NOT_CONTAINS, - ConstantExpression.of("Green"))) - .operand( - RelationalExpression.of( - IdentifierExpression.of("_id"), LTE, ConstantExpression.of(8))) - .build()) - .build(); + // Verify MIN <= AVG <= MAX + double min = json.get("min").asDouble(); + double avg = json.get("avg").asDouble(); + double max = json.get("max").asDouble(); + assertTrue(min <= avg, "MIN should be <= AVG"); + assertTrue(avg <= max, "AVG should be <= MAX"); - long notContainsCount = flatCollection.count(notContainsQuery); - assertEquals( - 7, notContainsCount, "NOT_CONTAINS: Should find 7 documents without Green color"); + // Test GROUP BY with aggregations + Query groupAggQuery = + Query.builder() + .addSelection(IdentifierExpression.of("item")) + .addSelection( + AggregateExpression.of(SUM, IdentifierExpression.of("quantity")), "total_qty") + .addSelection( + AggregateExpression.of(AVG, IdentifierExpression.of("price")), "avg_price") + .addSelection(AggregateExpression.of(COUNT, ConstantExpression.of("*")), "count") + .addAggregation(IdentifierExpression.of("item")) + .addSort(IdentifierExpression.of("item"), ASC) + .build(); + + Iterator groupAggResults = flatCollection.aggregate(groupAggQuery); + int groupCount = 0; + while (groupAggResults.hasNext()) { + Document doc = groupAggResults.next(); + JsonNode groupJson = new ObjectMapper().readTree(doc.toJson()); + groupCount++; + + // Validate each group has all expected fields + Assertions.assertNotNull(groupJson.get("item")); + assertTrue(groupJson.get("total_qty").asInt() > 0); + assertTrue(groupJson.get("avg_price").asDouble() > 0); + assertTrue(groupJson.get("count").asInt() > 0); } + assertTrue(groupCount > 0, "Should have at least one group"); + } - /** - * Tests IN and NOT_IN operators on JSONB scalar fields. - IN: finds documents where field - * value is in the provided list - NOT_IN: finds documents where field value is not in the - * list (including NULL) - */ - @ParameterizedTest - @ArgumentsSource(PostgresProvider.class) - void testJsonbScalarInOperators(String dataStoreName) { - Datastore datastore = datastoreMap.get(dataStoreName); - Collection flatCollection = - datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); - - // Test 1: IN - props.brand IN ["Dettol", "Lifebuoy"] - // Expected: 2 documents (id=1 Dettol, id=5 Lifebuoy) - Query inQuery = - Query.builder() - .setFilter( - RelationalExpression.of( - JsonIdentifierExpression.of("props", JsonFieldType.STRING, "brand"), - IN, - ConstantExpression.ofStrings(List.of("Dettol", "Lifebuoy")))) - .build(); + @ParameterizedTest + @ArgumentsSource(PostgresProvider.class) + void testNullHandling(String dataStoreName) throws IOException { + Datastore datastore = datastoreMap.get(dataStoreName); + Collection flatCollection = + datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); - long inCount = flatCollection.count(inQuery); - assertEquals(2, inCount, "IN: Should find 2 documents with Dettol or Lifebuoy brand"); + // Note: All scalar fields (item, price, quantity, in_stock) have non-NULL values + // in existing data. This test validates correct handling when no NULLs are present. - // Test 2: NOT_IN - props.brand NOT_IN ["Dettol"] AND _id <= 8 - // Expected: 7 documents (all except id=1 which is Dettol, limited to first 8) - Query notInQuery = - Query.builder() - .setFilter( - LogicalExpression.builder() - .operator(LogicalOperator.AND) - .operand( - RelationalExpression.of( - JsonIdentifierExpression.of("props", JsonFieldType.STRING, "brand"), - NOT_IN, - ConstantExpression.ofStrings(List.of("Dettol")))) - .operand( - RelationalExpression.of( - IdentifierExpression.of("_id"), LTE, ConstantExpression.of(8))) - .build()) - .build(); + // Test 1: Verify all items are non-NULL + Query notNullQuery = + Query.builder() + .setFilter( + RelationalExpression.of( + IdentifierExpression.of("item"), NEQ, ConstantExpression.of("null"))) + .build(); - long notInCount = flatCollection.count(notInQuery); - assertEquals(7, notInCount, "NOT_IN: Should find 7 documents without Dettol brand"); - } + long notNullCount = flatCollection.count(notNullQuery); + assertEquals(10, notNullCount); - /** - * Tests EQ and NEQ operators on JSONB scalar fields. - EQ: finds documents where field equals - * the value - NEQ: finds documents where field doesn't equal the value (excluding NULL) - */ - @ParameterizedTest - @ArgumentsSource(PostgresProvider.class) - void testJsonbScalarEqualityOperators(String dataStoreName) { - Datastore datastore = datastoreMap.get(dataStoreName); - Collection flatCollection = - datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); - - // Test 1: EQ - props.brand EQ "Dettol" - // Expected: 1 document (id=1, Dettol Soap) - Query eqQuery = - Query.builder() - .setFilter( - RelationalExpression.of( - JsonIdentifierExpression.of("props", JsonFieldType.STRING, "brand"), - EQ, - ConstantExpression.of("Dettol"))) - .build(); + // Test 2: Verify COUNT(field) equals COUNT(*) when no NULLs present + Query aggQuery = + Query.builder() + .addSelection( + AggregateExpression.of(COUNT, IdentifierExpression.of("item")), "count_item") + .addSelection( + AggregateExpression.of(COUNT, IdentifierExpression.of("price")), "count_price") + .addSelection(AggregateExpression.of(COUNT, ConstantExpression.of("*")), "count_all") + .build(); - long eqCount = flatCollection.count(eqQuery); - assertEquals(1, eqCount, "EQ: Should find 1 document with Dettol brand"); + Iterator aggResults = flatCollection.aggregate(aggQuery); + assertTrue(aggResults.hasNext()); - // Test 2: NEQ - props.brand NEQ "Dettol" (no _id filter needed) - // Expected: 2 documents (id=3 Sunsilk, id=5 Lifebuoy, excluding NULL props) - Query neqQuery = - Query.builder() - .setFilter( - RelationalExpression.of( - JsonIdentifierExpression.of("props", JsonFieldType.STRING, "brand"), - NEQ, - ConstantExpression.of("Dettol"))) - .build(); + Document aggDoc = aggResults.next(); + JsonNode json = new ObjectMapper().readTree(aggDoc.toJson()); - long neqCount = flatCollection.count(neqQuery); - assertEquals(2, neqCount, "NEQ: Should find 2 documents without Dettol brand"); - } + // When no NULLs, COUNT(field) should equal COUNT(*) + int countItem = json.get("count_item").asInt(); + int countPrice = json.get("count_price").asInt(); + int countAll = json.get("count_all").asInt(); - /** - * Tests LT, GT, LTE, GTE comparison operators on JSONB numeric fields. Tests deeply nested - * numeric fields like props.seller.address.pincode. Data: ids 1,3 have pincode 400004; ids - * 5,7 have pincode 700007; rest are NULL - */ - @ParameterizedTest - @ArgumentsSource(PostgresProvider.class) - void testJsonbNumericComparisonOperators(String dataStoreName) { - Datastore datastore = datastoreMap.get(dataStoreName); - Collection flatCollection = - datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); - - // Test 1: GT - props.seller.address.pincode > 500000 - // Expected: 2 documents (ids 5,7 with pincode 700007 in Kolkata) - Query gtQuery = - Query.builder() - .setFilter( - RelationalExpression.of( - JsonIdentifierExpression.of( - "props", JsonFieldType.NUMBER, "seller", "address", "pincode"), - GT, - ConstantExpression.of(500000))) - .build(); + assertEquals(10, countItem, "COUNT(item) should be 10"); + assertEquals(10, countPrice, "COUNT(price) should be 10"); + assertEquals(10, countAll, "COUNT(*) should be 10"); + assertEquals(countItem, countAll, "COUNT(item) should equal COUNT(*) when no NULLs"); + assertEquals(countPrice, countAll, "COUNT(price) should equal COUNT(*) when no NULLs"); - long gtCount = flatCollection.count(gtQuery); - assertEquals(2, gtCount, "GT: Should find 2 documents with pincode > 500000"); + // Test 3: Test NULL equality filter returns empty result + Query nullEqualQuery = + Query.builder() + .setFilter( + RelationalExpression.of( + IdentifierExpression.of("item"), EQ, ConstantExpression.of("null"))) + .build(); - // Test 2: LT - props.seller.address.pincode < 500000 - // Expected: 2 documents (ids 1,3 with pincode 400004 in Mumbai) - Query ltQuery = - Query.builder() - .setFilter( - RelationalExpression.of( - JsonIdentifierExpression.of( - "props", JsonFieldType.NUMBER, "seller", "address", "pincode"), - LT, - ConstantExpression.of(500000))) - .build(); + long nullCount = flatCollection.count(nullEqualQuery); + assertEquals(0, nullCount); + } + } + + @Nested + class FlatCollectionTopLevelArrayColumns { + + @ParameterizedTest + @ArgumentsSource(PostgresProvider.class) + void testNotEmpty(String dataStoreName) throws JsonProcessingException { + Datastore datastore = datastoreMap.get(dataStoreName); + Collection flatCollection = + datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); + + Query query = + Query.builder() + .addSelection(IdentifierExpression.of("item")) + .addSelection(IdentifierExpression.of("tags")) + .setFilter( + RelationalExpression.of( + ArrayIdentifierExpression.of("tags"), EXISTS, ConstantExpression.of("null"))) + .build(); + + Iterator results = flatCollection.find(query); + + int count = 0; + while (results.hasNext()) { + Document doc = results.next(); + JsonNode json = new ObjectMapper().readTree(doc.toJson()); + count++; + JsonNode tags = json.get("tags"); + assertTrue(tags.isArray() && !tags.isEmpty()); + } + // (Ids 1 to 8 have non-empty tags) + assertEquals(8, count); + } + + @ParameterizedTest + @ArgumentsSource(PostgresProvider.class) + void testEmpty(String dataStoreName) { + Datastore datastore = datastoreMap.get(dataStoreName); + Collection flatCollection = + datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); + + Query query = + Query.builder() + .addSelection(IdentifierExpression.of("item")) + .addSelection(IdentifierExpression.of("tags")) + .setFilter( + RelationalExpression.of( + ArrayIdentifierExpression.of("tags"), + NOT_EXISTS, + ConstantExpression.of("null"))) + .build(); + + Iterator results = flatCollection.find(query); + + int count = 0; + while (results.hasNext()) { + Document doc = results.next(); + count++; + } + + // (Ids 9 and 10 have NULL or EMPTY arrays) + assertEquals(2, count); + } + + @ParameterizedTest + @ArgumentsSource(PostgresProvider.class) + void testUnnest(String dataStoreName) { + + Datastore datastore = datastoreMap.get(dataStoreName); + Collection flatCollection = + datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); + + for (boolean preserveNullAndEmpty : List.of(true, false)) { + Query unnestQuery = + Query.builder() + .addSelection(IdentifierExpression.of("item")) + .addSelection( + JsonIdentifierExpression.of("props", JsonFieldType.STRING_ARRAY, "source-loc")) + .addFromClause( + UnnestExpression.of( + JsonIdentifierExpression.of( + "props", JsonFieldType.STRING_ARRAY, "source-loc"), + preserveNullAndEmpty)) + .build(); + + Iterator resultIterator = flatCollection.find(unnestQuery); + int count = 0; + while (resultIterator.hasNext()) { + Document doc = resultIterator.next(); + Assertions.assertNotNull(doc); + count++; + } + // With preserveNullAndEmpty = false, unnest will only unwind arrays with size >= 1. A total + // of 6 such rows would be created from the 3 arrays in the table + // With true, it'll include NULL and EMPTY arrays too. This will result in 13 rows + assertTrue(preserveNullAndEmpty ? count == 13 : count == 6); + } + } + + @ParameterizedTest + @ArgumentsSource(PostgresProvider.class) + void testInStringArray(String dataStoreName) throws JsonProcessingException { + Datastore datastore = datastoreMap.get(dataStoreName); + Collection flatCollection = + datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); + + Query inQuery = + Query.builder() + .addSelection(IdentifierExpression.of("item")) + .addSelection(ArrayIdentifierExpression.of("tags")) + .setFilter( + RelationalExpression.of( + ArrayIdentifierExpression.of("tags", ArrayType.TEXT), + IN, + ConstantExpression.ofStrings(List.of("hygiene", "grooming")))) + .build(); + + Iterator results = flatCollection.find(inQuery); + + int count = 0; + Set items = new HashSet<>(); + while (results.hasNext()) { + Document doc = results.next(); + JsonNode json = new ObjectMapper().readTree(doc.toJson()); + count++; + + String item = json.get("item").asText(); + items.add(item); + + // Verify that returned arrays contain at least one of the IN values + JsonNode tags = json.get("tags"); + if (tags != null && tags.isArray()) { + boolean containsMatch = false; + for (JsonNode tag : tags) { + String tagValue = tag.asText(); + if ("hygiene".equals(tagValue) || "grooming".equals(tagValue)) { + containsMatch = true; + break; + } + } + assertTrue(containsMatch, "Array should contain at least one IN value for item: " + item); + } + } + + // Should return rows where tags array overlaps with ["hygiene", "grooming"] + // hygiene: IDs 1, 5, 8 (Soap), 6, 7 (Comb) + assertTrue(count >= 5, "Should return at least 5 items"); + assertTrue(items.contains("Soap")); + assertTrue(items.contains("Comb")); + } + + @ParameterizedTest + @ArgumentsSource(PostgresProvider.class) + void testNotInStringArray(String dataStoreName) throws JsonProcessingException { + Datastore datastore = datastoreMap.get(dataStoreName); + Collection flatCollection = + datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); + + // Test NOT_IN on native array WITHOUT unnest + // This should use NOT (array overlap) to check arrays don't contain any of the values + Query notInQuery = + Query.builder() + .addSelection(IdentifierExpression.of("item")) + .addSelection(ArrayIdentifierExpression.of("tags")) + .setFilter( + RelationalExpression.of( + ArrayIdentifierExpression.of("tags", ArrayType.TEXT), + NOT_IN, + ConstantExpression.ofStrings(List.of("premium", "hygiene")))) + .build(); + + Iterator results = flatCollection.find(notInQuery); + + int count = 0; + while (results.hasNext()) { + Document doc = results.next(); + JsonNode json = new ObjectMapper().readTree(doc.toJson()); + count++; + + // Verify that returned arrays do NOT contain any of the NOT_IN values + JsonNode tags = json.get("tags"); + if (tags != null && tags.isArray() && !tags.isEmpty()) { + for (JsonNode tag : tags) { + String tagValue = tag.asText(); + assertNotEquals( + "premium", + tagValue, + "tags array should NOT contain 'premium' for item: " + json.get("item").asText()); + assertNotEquals( + "hygiene", + tagValue, + "tags array should NOT contain 'hygiene' for item: " + json.get("item").asText()); + } + } + } + + // Should return rows where tags array does NOT overlap with ["premium", "hygiene"] + // Rows 1, 3, 5, 8 have hygiene or premium, so should be excluded + // Should return: Mirror, Shampoo (id 4), Comb + assertTrue(count >= 3, "Should return at least 3 items without premium/hygiene tags"); + } + + @ParameterizedTest + @ArgumentsSource(PostgresProvider.class) + void testInIntArray(String dataStoreName) throws JsonProcessingException { + Datastore datastore = datastoreMap.get(dataStoreName); + Collection flatCollection = + datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); + + // Test IN on integer array (numbers column) + Query inQuery = + Query.builder() + .addSelection(IdentifierExpression.of("item")) + .addSelection(ArrayIdentifierExpression.of("numbers")) + .setFilter( + RelationalExpression.of( + ArrayIdentifierExpression.of("numbers", ArrayType.INTEGER), + IN, + ConstantExpression.ofNumbers(List.of(1, 10, 20)))) + .build(); + + Iterator results = flatCollection.find(inQuery); + + int count = 0; + while (results.hasNext()) { + Document doc = results.next(); + JsonNode json = new ObjectMapper().readTree(doc.toJson()); + count++; + + // Verify that returned arrays contain at least one of the IN values + JsonNode numbers = json.get("numbers"); + if (numbers != null && numbers.isArray()) { + boolean containsMatch = false; + for (JsonNode num : numbers) { + int value = num.asInt(); + if (value == 1 || value == 10 || value == 20) { + containsMatch = true; + break; + } + } + assertTrue( + containsMatch, + "Array should contain at least one IN value for item: " + json.get("item").asText()); + } + } + + // Should return rows where numbers array overlaps with [1, 10, 20] + // IDs: 1 {1,2,3}, 2 {10,20}, 3 {5,10,15}, 6 {20,30}, 7 {10}, 8 {1,10,20} + assertTrue(count >= 6, "Should return at least 6 items"); + } + + @ParameterizedTest + @ArgumentsSource(PostgresProvider.class) + void testInDoubleArray(String dataStoreName) throws JsonProcessingException { + Datastore datastore = datastoreMap.get(dataStoreName); + Collection flatCollection = + datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); + + // Test IN on double precision array (scores column) + Query inQuery = + Query.builder() + .addSelection(IdentifierExpression.of("item")) + .addSelection(ArrayIdentifierExpression.of("scores")) + .setFilter( + RelationalExpression.of( + ArrayIdentifierExpression.of("scores", ArrayType.DOUBLE_PRECISION), + IN, + ConstantExpression.ofNumbers(List.of(3.14, 5.0)))) + .build(); + + Iterator results = flatCollection.find(inQuery); + + int count = 0; + while (results.hasNext()) { + Document doc = results.next(); + JsonNode json = new ObjectMapper().readTree(doc.toJson()); + count++; + + // Verify that returned arrays contain at least one of the IN values + JsonNode scores = json.get("scores"); + if (scores != null && scores.isArray()) { + boolean containsMatch = false; + for (JsonNode score : scores) { + double value = score.asDouble(); + if (Math.abs(value - 3.14) < 0.01 || Math.abs(value - 5.0) < 0.01) { + containsMatch = true; + break; + } + } + assertTrue( + containsMatch, + "Array should contain at least one IN value for item: " + json.get("item").asText()); + } + } - long ltCount = flatCollection.count(ltQuery); - assertEquals(2, ltCount, "LT: Should find 2 documents with pincode < 500000"); + // Should return rows where scores array overlaps with [3.14, 5.0] + // IDs: 3 {3.14,2.71}, 4 {5.0,10.0}, 8 {2.5,5.0} + assertTrue(count >= 3, "Should return at least 3 items"); + } + + @ParameterizedTest + @ArgumentsSource(PostgresProvider.class) + void testInWithUnnest(String dataStoreName) { + Datastore datastore = datastoreMap.get(dataStoreName); + Collection flatCollection = + datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); - // Test 3: GTE - props.seller.address.pincode >= 700000 - // Expected: 2 documents (ids 5,7 with pincode 700007) - Query gteQuery = + for (boolean preserveNullAndEmptyArrays : List.of(true, false)) { + Query unnestQuery = Query.builder() + .addSelection(IdentifierExpression.of("item")) + .addSelection(ArrayIdentifierExpression.of("tags")) + .addFromClause( + UnnestExpression.of( + ArrayIdentifierExpression.of("tags", ArrayType.TEXT), + preserveNullAndEmptyArrays)) + // Should return unnested tag elements that match 'hygiene' OR 'grooming' .setFilter( RelationalExpression.of( - JsonIdentifierExpression.of( - "props", JsonFieldType.NUMBER, "seller", "address", "pincode"), - GTE, - ConstantExpression.of(700000))) + ArrayIdentifierExpression.of("tags", ArrayType.TEXT), + IN, + ConstantExpression.ofStrings(List.of("hygiene", "grooming")))) + .build(); + + // this query will first unnest "tags" array and keep rows that have null and empty + // arrays. It'll then filter those rows for which the + // unnested tag is either hygiene or grooming. We have a total of 5 rows that'll match + // this filter + Iterator results = flatCollection.find(unnestQuery); + + int count = 0; + while (results.hasNext()) { + Document doc = results.next(); + Assertions.assertNotNull(doc); + count++; + } + assertEquals(5, count, "Should return at least one unnested tag matching the filter"); + } + } + + @ParameterizedTest + @ArgumentsSource(PostgresProvider.class) + void testNotInWithUnnest(String dataStoreName) { + Datastore datastore = datastoreMap.get(dataStoreName); + Collection flatCollection = + datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); + + for (boolean preserveNullAndEmptyArrays : List.of(true, false)) { + Query unnestQuery = + Query.builder() + .addSelection(IdentifierExpression.of("item")) + .addSelection(ArrayIdentifierExpression.of("tags")) + .addFromClause( + UnnestExpression.of( + ArrayIdentifierExpression.of("tags", ArrayType.TEXT), + preserveNullAndEmptyArrays)) + .setFilter( + RelationalExpression.of( + ArrayIdentifierExpression.of("tags", ArrayType.TEXT), + NOT_IN, + ConstantExpression.ofStrings(List.of("hygiene", "grooming")))) .build(); + // this query will first unnest "tags" array and keep rows that have null and empty + // arrays. unnest() on empty and null arrays returns NULL which is then + // included in the result set (as the predicate contains tags_unnested == NULL OR ...) + + Iterator results = flatCollection.find(unnestQuery); + + int count = 0; + while (results.hasNext()) { + Document doc = results.next(); + Assertions.assertNotNull(doc); + count++; + } + assertEquals( + preserveNullAndEmptyArrays ? 22 : 20, + count, + "Should return unnested tags not matching the filter"); + } + } + + @ParameterizedTest + @ArgumentsSource(PostgresProvider.class) + void testEmptyWithUnnest(String dataStoreName) { + Datastore datastore = datastoreMap.get(dataStoreName); + Collection flatCollection = + datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); - long gteCount = flatCollection.count(gteQuery); - assertEquals(2, gteCount, "GTE: Should find 2 documents with pincode >= 700000"); + Query unnestQuery = + Query.builder() + .addSelection(IdentifierExpression.of("item")) + .addSelection(ArrayIdentifierExpression.of("tags")) + .addFromClause( + UnnestExpression.of(ArrayIdentifierExpression.of("tags", ArrayType.TEXT), true)) + // Only include tags[] that are either NULL or empty (we have one row with NULL tag + // and one with empty tag. Unnest will result in two rows with NULL for + // "tags_unnested"). Note that this behavior will change with + // preserveNulLAndEmptyArrays = false. This is because unnest won't preserve those + // rows for which the unnested column is NULL then. + .setFilter( + RelationalExpression.of( + ArrayIdentifierExpression.of("tags", ArrayType.TEXT), + NOT_EXISTS, + ConstantExpression.of("null"))) + .build(); + + Iterator results = flatCollection.find(unnestQuery); + + int count = 0; + while (results.hasNext()) { + Document doc = results.next(); + Assertions.assertNotNull(doc); + count++; + } + + assertEquals(2, count, "Should return at least 2 rows with NULL unnested tags"); + } + + @ParameterizedTest + @ArgumentsSource(PostgresProvider.class) + void testNotEmptyWithUnnest(String dataStoreName) { + Datastore datastore = datastoreMap.get(dataStoreName); + Collection flatCollection = + datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); + + Query unnestQuery = + Query.builder() + .addSelection(IdentifierExpression.of("item")) + .addSelection(ArrayIdentifierExpression.of("tags")) + .addFromClause( + UnnestExpression.of(ArrayIdentifierExpression.of("tags", ArrayType.TEXT), true)) + // Only include tags[] that have at least 1 element, all rows with NULL or empty tags + // should be excluded. + .setFilter( + RelationalExpression.of( + ArrayIdentifierExpression.of("tags", ArrayType.TEXT), + EXISTS, + ConstantExpression.of("null"))) + .build(); + + Iterator results = flatCollection.find(unnestQuery); + + int count = 0; + while (results.hasNext()) { + Document doc = results.next(); + Assertions.assertNotNull(doc); + count++; + } + + assertEquals(25, count, "Should return unnested tag elements from non-empty arrays"); + } + + @ParameterizedTest + @ArgumentsSource(PostgresProvider.class) + @Disabled + void testContainsOnNonUnnestedArray(String dataStoreName) throws JsonProcessingException { + Datastore datastore = datastoreMap.get(dataStoreName); + Collection flatCollection = + datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); + + // Test CONTAINS on array WITHOUT unnest + // This should use the array overlap operator (&&) or @> containment + // tags column: row 1 has ["hygiene", "premium"], rows 5-6 have ["hygiene"] + Query query = + Query.builder() + .addSelection(IdentifierExpression.of("item")) + .addSelection(ArrayIdentifierExpression.of("tags")) + .setFilter( + RelationalExpression.of( + ArrayIdentifierExpression.of("tags", ArrayType.TEXT), + CONTAINS, + ConstantExpression.of("hygiene"))) + .build(); + + Iterator results = flatCollection.find(query); + + int count = 0; + Set items = new HashSet<>(); + while (results.hasNext()) { + Document doc = results.next(); + JsonNode json = new ObjectMapper().readTree(doc.toJson()); + count++; + + String item = json.get("item").asText(); + items.add(item); + + // Verify that returned arrays contain "hygiene" + JsonNode tags = json.get("tags"); + assertTrue(tags.isArray(), "tags should be an array"); + boolean containsHygiene = false; + for (JsonNode tag : tags) { + if ("hygiene".equals(tag.asText())) { + containsHygiene = true; + break; + } + } + assertTrue(containsHygiene, "tags array should contain 'hygiene' for item: " + item); + } + + // Should return rows where tags array contains "hygiene" + // From test data: rows with Soap, Shampoo (ids 1, 5, 6) + assertTrue(count >= 3, "Should return at least 3 items with 'hygiene' tag"); + assertTrue(items.contains("Soap"), "Should include Soap"); + } + + @ParameterizedTest + @ArgumentsSource(PostgresProvider.class) + @Disabled + void testNotContainsOnNonUnnestedArray(String dataStoreName) throws JsonProcessingException { + Datastore datastore = datastoreMap.get(dataStoreName); + Collection flatCollection = + datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); + + // Test NOT_CONTAINS on array WITHOUT unnest + // This should use NOT (array overlap operator) + // Should return rows where tags array does NOT contain "premium" + Query query = + Query.builder() + .addSelection(IdentifierExpression.of("item")) + .addSelection(ArrayIdentifierExpression.of("tags")) + .setFilter( + RelationalExpression.of( + ArrayIdentifierExpression.of("tags", ArrayType.TEXT), + NOT_CONTAINS, + ConstantExpression.of("premium"))) + .build(); + + Iterator results = flatCollection.find(query); + + int count = 0; + while (results.hasNext()) { + Document doc = results.next(); + JsonNode json = new ObjectMapper().readTree(doc.toJson()); + count++; + + // Verify that returned arrays do NOT contain "premium" (or are NULL/empty) + JsonNode tags = json.get("tags"); + if (tags != null && tags.isArray() && !tags.isEmpty()) { + for (JsonNode tag : tags) { + assertNotEquals( + "premium", + tag.asText(), + "tags array should NOT contain 'premium' for item: " + json.get("item").asText()); + } + } + } + + // Should return rows where tags is NULL, empty, or doesn't contain "premium" + // Only row 1 (Soap) has "premium", so should return all other rows + assertTrue(count >= 9, "Should return at least 9 items without 'premium' tag"); + } + + @ParameterizedTest + @ArgumentsSource(PostgresProvider.class) + @Disabled + void testContainsOnBooleanArray(String dataStoreName) throws JsonProcessingException { + Datastore datastore = datastoreMap.get(dataStoreName); + Collection flatCollection = + datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); + + // Test CONTAINS on boolean array (flags column) + Query containsQuery = + Query.builder() + .addSelection(IdentifierExpression.of("item")) + .addSelection(ArrayIdentifierExpression.of("flags")) + .setFilter( + RelationalExpression.of( + ArrayIdentifierExpression.of("flags", ArrayType.BOOLEAN), + CONTAINS, + ConstantExpression.of(true))) + .build(); + + Iterator results = flatCollection.find(containsQuery); + + int count = 0; + while (results.hasNext()) { + Document doc = results.next(); + JsonNode json = new ObjectMapper().readTree(doc.toJson()); + count++; + + // Verify that returned arrays contain true + JsonNode flags = json.get("flags"); + if (flags != null && flags.isArray()) { + boolean containsTrue = false; + for (JsonNode flag : flags) { + if (flag.asBoolean()) { + containsTrue = true; + break; + } + } + assertTrue( + containsTrue, "Array should contain 'true' for item: " + json.get("item").asText()); + } + } + + // Should return rows where flags array contains true + // IDs: 1 {true,false}, 3 {true,false,true}, 4 {true,true}, 6 {true,false}, 8 {true} + assertTrue(count >= 5, "Should return at least 5 items with 'true' flag"); + } + } + + @Nested + class FlatCollectionJsonbColumns { + + @ParameterizedTest + @ArgumentsSource(PostgresProvider.class) + void testSelections(String dataStoreName) throws IOException { + Datastore datastore = datastoreMap.get(dataStoreName); + Collection flatCollection = + datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); + + Query brandSelectionQuery = + Query.builder() + .addSelection(JsonIdentifierExpression.of("props", JsonFieldType.STRING, "brand")) + .build(); + + Iterator brandIterator = flatCollection.find(brandSelectionQuery); + assertDocsAndSizeEqualWithoutOrder( + dataStoreName, brandIterator, "query/flat_jsonb_brand_selection_response.json", 10); + + // Test 2: Select deeply nested STRING field from JSONB column (props.seller.address.city) + Query citySelectionQuery = + Query.builder() + .addSelection( + JsonIdentifierExpression.of( + "props", JsonFieldType.STRING, "seller", "address", "city")) + .build(); + + Iterator cityIterator = flatCollection.find(citySelectionQuery); + assertDocsAndSizeEqualWithoutOrder( + dataStoreName, cityIterator, "query/flat_jsonb_city_selection_response.json", 10); + + // Test 3: Select STRING_ARRAY field from JSONB column (props.colors) + Query colorsSelectionQuery = + Query.builder() + .addSelection( + JsonIdentifierExpression.of("props", JsonFieldType.STRING_ARRAY, "colors")) + .build(); + + Iterator colorsIterator = flatCollection.find(colorsSelectionQuery); + assertDocsAndSizeEqualWithoutOrder( + dataStoreName, colorsIterator, "query/flat_jsonb_colors_selection_response.json", 10); + } + + @ParameterizedTest + @ArgumentsSource(PostgresProvider.class) + void testRelOpArrayContains(String dataStoreName) { + Datastore datastore = datastoreMap.get(dataStoreName); + Collection flatCollection = + datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); + + // Test 1: CONTAINS - props.colors CONTAINS "Green" + // Expected: 1 document (id=1, Dettol Soap has ["Green", "White"]) + Query containsQuery = + Query.builder() + .setFilter( + RelationalExpression.of( + JsonIdentifierExpression.of("props", JsonFieldType.STRING_ARRAY, "colors"), + CONTAINS, + ConstantExpression.of("Green"))) + .build(); + + long containsCount = flatCollection.count(containsQuery); + // Generated query: SELECT COUNT(*) FROM (SELECT * FROM "myTestFlat" WHERE "props"->'colors' + // @> ('["Green"]')::jsonb) p(countWithParser) + assertEquals(1, containsCount); + + // Test 2: NOT_CONTAINS - props.colors NOT_CONTAINS "Green" AND _id <= 8 + // Expected: 7 documents (all except id=1 which has Green, limited to first 8) + Query notContainsQuery = + Query.builder() + .setFilter( + LogicalExpression.builder() + .operator(LogicalOperator.AND) + .operand( + RelationalExpression.of( + JsonIdentifierExpression.of( + "props", JsonFieldType.STRING_ARRAY, "colors"), + NOT_CONTAINS, + ConstantExpression.of("Green"))) + .operand( + RelationalExpression.of( + IdentifierExpression.of("_id"), LTE, ConstantExpression.of(8))) + .build()) + .build(); + + long notContainsCount = flatCollection.count(notContainsQuery); + // Generated query: SELECT COUNT(*) FROM (SELECT * FROM "myTestFlat" WHERE ("props"->'colors' + // IS NULL OR NOT "props"->'colors' @> ('["Green"]')::jsonb) AND ("_id" <= ('8'::int4))) + // p(countWithParser) + assertEquals(7, notContainsCount); + } + + @ParameterizedTest + @ArgumentsSource(PostgresProvider.class) + void testRelOpArrayIN(String dataStoreName) { + Datastore datastore = datastoreMap.get(dataStoreName); + Collection flatCollection = + datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); + + // Test 1: IN - props.brand IN ["Dettol", "Lifebuoy"] + // Expected: 2 documents (id=1 Dettol, id=5 Lifebuoy) + Query inQuery = + Query.builder() + .setFilter( + RelationalExpression.of( + JsonIdentifierExpression.of("props", JsonFieldType.STRING, "brand"), + IN, + ConstantExpression.ofStrings(List.of("Dettol", "Lifebuoy")))) + .build(); + + long inCount = flatCollection.count(inQuery); + assertEquals(2, inCount); + + // Test 2: NOT_IN - props.brand NOT_IN ["Dettol"] AND _id <= 8 + // Expected: 7 documents (all except id=1 which is Dettol, limited to first 8) + Query notInQuery = + Query.builder() + .setFilter( + LogicalExpression.builder() + .operator(LogicalOperator.AND) + .operand( + RelationalExpression.of( + JsonIdentifierExpression.of("props", JsonFieldType.STRING, "brand"), + NOT_IN, + ConstantExpression.ofStrings(List.of("Dettol")))) + .operand( + RelationalExpression.of( + IdentifierExpression.of("_id"), LTE, ConstantExpression.of(8))) + .build()) + .build(); + + long notInCount = flatCollection.count(notInQuery); + assertEquals(7, notInCount); + } + + @ParameterizedTest + @ArgumentsSource(PostgresProvider.class) + void testRelOpScalarEq(String dataStoreName) { + Datastore datastore = datastoreMap.get(dataStoreName); + Collection flatCollection = + datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); + + Query eqQuery = + Query.builder() + .setFilter( + RelationalExpression.of( + JsonIdentifierExpression.of("props", JsonFieldType.STRING, "brand"), + EQ, + ConstantExpression.of("Dettol"))) + .build(); + + long eqCount = flatCollection.count(eqQuery); + // Generate query: SELECT COUNT(*) FROM (SELECT * FROM "myTestFlat" WHERE "props"->>'brand' = + // ('Dettol')) p(countWithParser) + assertEquals(1, eqCount); + + Query neqQuery = + Query.builder() + .setFilter( + RelationalExpression.of( + JsonIdentifierExpression.of("props", JsonFieldType.STRING, "brand"), + NEQ, + ConstantExpression.of("Dettol"))) + .build(); + + long neqCount = flatCollection.count(neqQuery); + // Generate query: SELECT COUNT(*) FROM (SELECT * FROM "myTestFlat" WHERE "props"->>'brand' != + // ('Dettol')) p(countWithParser) + assertEquals(2, neqCount); + } + + @ParameterizedTest + @ArgumentsSource(PostgresProvider.class) + void testRelOpScalarNumericComparison(String dataStoreName) { + Datastore datastore = datastoreMap.get(dataStoreName); + Collection flatCollection = + datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); + + Query gtQuery = + Query.builder() + .setFilter( + RelationalExpression.of( + JsonIdentifierExpression.of( + "props", JsonFieldType.NUMBER, "seller", "address", "pincode"), + GT, + ConstantExpression.of(500000))) + .build(); + + long gtCount = flatCollection.count(gtQuery); + // SELECT COUNT(*) FROM (SELECT * FROM "myTestFlat" WHERE CAST + // ("props"->'seller'->'address'->>'pincode' AS NUMERIC) > ('500000'::int4)) + // p(countWithParser) + assertEquals(2, gtCount, "GT: Should find 2 documents with pincode > 500000"); + + Query ltQuery = + Query.builder() + .setFilter( + RelationalExpression.of( + JsonIdentifierExpression.of( + "props", JsonFieldType.NUMBER, "seller", "address", "pincode"), + LT, + ConstantExpression.of(500000))) + .build(); + + long ltCount = flatCollection.count(ltQuery); + // SELECT COUNT(*) FROM (SELECT * FROM "myTestFlat" WHERE CAST + // ("props"->'seller'->'address'->>'pincode' AS NUMERIC) < ('500000'::int4)) + // p(countWithParser) + assertEquals(2, ltCount, "LT: Should find 2 documents with pincode < 500000"); + + Query gteQuery = + Query.builder() + .setFilter( + RelationalExpression.of( + JsonIdentifierExpression.of( + "props", JsonFieldType.NUMBER, "seller", "address", "pincode"), + GTE, + ConstantExpression.of(700000))) + .build(); + + long gteCount = flatCollection.count(gteQuery); + // SELECT COUNT(*) FROM (SELECT * FROM "myTestFlat" WHERE CAST + // ("props"->'seller'->'address'->>'pincode' AS NUMERIC) >= ('700000'::int4)) + // p(countWithParser) + assertEquals(2, gteCount, "GTE: Should find 2 documents with pincode >= 700000"); + + Query lteQuery = + Query.builder() + .setFilter( + RelationalExpression.of( + JsonIdentifierExpression.of( + "props", JsonFieldType.NUMBER, "seller", "address", "pincode"), + LTE, + ConstantExpression.of(400004))) + .build(); + + long lteCount = flatCollection.count(lteQuery); + // SELECT COUNT(*) FROM (SELECT * FROM "myTestFlat" WHERE CAST + // ("props"->'seller'->'address'->>'pincode' AS NUMERIC) <= ('400004'::int4)) + // p(countWithParser) + assertEquals(2, lteCount, "LTE: Should find 2 documents with pincode <= 400004"); + } + + @ParameterizedTest + @ArgumentsSource(PostgresProvider.class) + void testUnnest(String dataStoreName) { + Datastore datastore = datastoreMap.get(dataStoreName); + Collection flatCollection = + datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); + + // Test UNNEST on JSONB array field: props.colors + // Expected: Should unnest colors and count distinct items with colors + // Data: id=1 has ["Blue", "Green"], id=3 has ["Black"], id=5 has ["Orange", "Blue"] + // Total: 5 color entries from 3 items + Query unnestJsonbQuery = + Query.builder() + .addSelection(IdentifierExpression.of("item")) + .addSelection(JsonIdentifierExpression.of("props", "colors")) + .addFromClause( + UnnestExpression.of(JsonIdentifierExpression.of("props", "colors"), false)) + .build(); + + Iterator resultIterator = flatCollection.aggregate(unnestJsonbQuery); + + long count = 0; + while (resultIterator.hasNext()) { + resultIterator.next(); + count++; + } + + // Expecting 5 results: 2 from Soap (Blue, Green), 1 from Shampoo (Black), + // 2 from Lifebuoy (Orange, Blue) + assertEquals(5, count, "Should find 5 color entries after unnesting JSONB arrays"); + } + + @ParameterizedTest + @ArgumentsSource(PostgresProvider.class) + void testGroupByScalarField(String dataStoreName) throws IOException { + Datastore datastore = datastoreMap.get(dataStoreName); + Collection flatCollection = + datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); + + // Test GROUP BY on JSONB scalar field: props.brand + // This tests grouping by a nested string field in a JSONB column + // Data: 3 rows have brands (Dettol, Sunsilk, Lifebuoy), 7 rows have NULL/missing brand + // GROUP BY on JSONB fields groups NULL values together (standard SQL behavior) + Query groupByBrandQuery = + Query.builder() + .addSelection(JsonIdentifierExpression.of("props", "brand")) + .addSelection(AggregateExpression.of(COUNT, ConstantExpression.of(1)), "count") + .addAggregation(JsonIdentifierExpression.of("props", "brand")) + .addSort(JsonIdentifierExpression.of("props", "brand"), ASC) + .build(); + + Iterator resultIterator = flatCollection.aggregate(groupByBrandQuery); + assertDocsAndSizeEqualWithoutOrder( + dataStoreName, resultIterator, "query/flat_jsonb_group_by_brand_test_response.json", 4); + } + + @ParameterizedTest + @ArgumentsSource(PostgresProvider.class) + void testGroupByArray(String dataStoreName) throws IOException { + Datastore datastore = datastoreMap.get(dataStoreName); + Collection flatCollection = + datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); - // Test 4: LTE - props.seller.address.pincode <= 400004 - // Expected: 2 documents (ids 1,3 with pincode 400004) - Query lteQuery = - Query.builder() - .setFilter( - RelationalExpression.of( - JsonIdentifierExpression.of( - "props", JsonFieldType.NUMBER, "seller", "address", "pincode"), - LTE, - ConstantExpression.of(400004))) - .build(); + // Test GROUP BY on JSONB array field: props.colors with UNNEST + // This tests grouping by individual elements (after unnesting) in a JSONB array + // Behavior should match nested collections: UNNEST flattens array, GROUP BY groups elements + // Data: Row 1 has ["Blue", "Green"], Row 3 has ["Black"], Row 5 has ["Orange", "Blue"] + // Expected: Blue (2), Green (1), Black (1), Orange (1) - 4 distinct color groups + Query groupByColorsQuery = + Query.builder() + .addSelection(JsonIdentifierExpression.of("props", "colors"), "color") + .addSelection(AggregateExpression.of(COUNT, ConstantExpression.of(1)), "count") + .addFromClause( + UnnestExpression.of(JsonIdentifierExpression.of("props", "colors"), false)) + .addAggregation(JsonIdentifierExpression.of("props", "colors")) + .addSort(JsonIdentifierExpression.of("props", "colors"), ASC) + .build(); - long lteCount = flatCollection.count(lteQuery); - assertEquals(2, lteCount, "LTE: Should find 2 documents with pincode <= 400004"); - } + Iterator resultIterator = flatCollection.aggregate(groupByColorsQuery); + assertDocsAndSizeEqualWithoutOrder( + dataStoreName, resultIterator, "query/flat_jsonb_group_by_colors_test_response.json", 4); } - /** - * This test validates that cols with hyphens ("-") are properly quoted so that PG doesn't - * interpret them as '-' operator - */ @ParameterizedTest @ArgumentsSource(PostgresProvider.class) - void testFlatPostgresCollectionUnnestJsonbArrayWithHyphens(String dataStoreName) { + void testAnyOnArray(String dataStoreName) { Datastore datastore = datastoreMap.get(dataStoreName); Collection flatCollection = datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); - // Unnest the hyphenated JSONB array field - Query unnestQuery = + // Test ArrayRelationalFilterExpression.ANY on JSONB array (props.colors) + // This uses jsonb_array_elements() internally + Query jsonbArrayQuery = Query.builder() .addSelection(IdentifierExpression.of("item")) - .addSelection( - JsonIdentifierExpression.of("props", JsonFieldType.STRING_ARRAY, "source-loc")) - .addFromClause( - UnnestExpression.of( - JsonIdentifierExpression.of( - "props", JsonFieldType.STRING_ARRAY, "source-loc"), - true)) + .setFilter( + ArrayRelationalFilterExpression.builder() + .operator(ArrayOperator.ANY) + .filter( + RelationalExpression.of( + JsonIdentifierExpression.of("props", "colors"), + EQ, + ConstantExpression.of("Blue"))) + .build()) .build(); - // Execute query - should not throw syntax error - Iterator resultIterator = flatCollection.find(unnestQuery); - - Set foundLocations = new HashSet<>(); - while (resultIterator.hasNext()) { - Document doc = resultIterator.next(); - Assertions.assertNotNull(doc); - } + long count = flatCollection.count(jsonbArrayQuery); + // ids 1 and 5 have "Blue" in their colors array + assertEquals(2, count, "Should find 2 items with 'Blue' color (ids 1, 5)"); } - } - - @Nested - class FlatCollectionArrayBehaviourTest { - /** - * Test EXISTS filter on top-level arrays. It should only return arrays that are non-empty (have - * at-least one element) - */ @ParameterizedTest @ArgumentsSource(PostgresProvider.class) - void testExistsFilterOnArray(String dataStoreName) throws JsonProcessingException { + void testInOnUnnestedArray(String dataStoreName) throws Exception { Datastore datastore = datastoreMap.get(dataStoreName); Collection flatCollection = datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); - // Query using EXISTS on array field (simulating ArrayIdentifierExpression behavior) - // tags column has: NULL (row 9), empty '{}' (rows 10, 11, 13), non-empty (rows 1-8, 12, 14) - // Using EXISTS with 'null' parameter (matching entity-service pattern) - Query query = + Query unnestQuery = Query.builder() .addSelection(IdentifierExpression.of("item")) - .addSelection(IdentifierExpression.of("tags")) + .addSelection( + JsonIdentifierExpression.of("props", JsonFieldType.STRING_ARRAY, "source-loc")) + .addFromClause( + UnnestExpression.of( + JsonIdentifierExpression.of( + "props", JsonFieldType.STRING_ARRAY, "source-loc"), + true)) + // Should return unnested source-loc elements that match 'warehouse-A' OR 'store-1' .setFilter( RelationalExpression.of( - ArrayIdentifierExpression.of("tags"), EXISTS, ConstantExpression.of("null"))) + JsonIdentifierExpression.of( + "props", JsonFieldType.STRING_ARRAY, "source-loc"), + IN, + ConstantExpression.ofStrings(List.of("warehouse-A", "store-1")))) .build(); - Iterator results = flatCollection.find(query); + Iterator resultIterator = flatCollection.find(unnestQuery); int count = 0; - while (results.hasNext()) { - Document doc = results.next(); + while (resultIterator.hasNext()) { + Document doc = resultIterator.next(); + Assertions.assertNotNull(doc); + // Parse JSON to extract the unnested value JsonNode json = new ObjectMapper().readTree(doc.toJson()); + // The unnested value is aliased as "props.source-loc" + JsonNode locationNode = json.get("props.source-loc"); count++; - // Verify that ALL returned documents have non-empty arrays - JsonNode tags = json.get("tags"); - assertTrue( - tags.isArray() && !tags.isEmpty(), "tags should be non-empty array, but was: " + tags); } - // Should return only documents with non-empty arrays - // From test data: rows 1-8 have non-empty arrays (8 docs) - // Plus rows 9, 10 have non-empty arrays (2 docs) - // Total: 10 documents - assertEquals(8, count, "Should return a total of 10 docs that have non-empty tags"); + assertEquals(2, count, "Should return at least 2 unnested locations matching the filter"); } - /** - * Test NOT_EXISTS filter on top-level arrays. This validates that NOT_EXISTS on array fields - * returns both NULL and empty arrays, excluding only non-empty arrays. - */ @ParameterizedTest @ArgumentsSource(PostgresProvider.class) - void testNotExistsFilterOnArrays(String dataStoreName) throws JsonProcessingException { + void testNotInOnUnnestedArray(String dataStoreName) throws Exception { Datastore datastore = datastoreMap.get(dataStoreName); Collection flatCollection = datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); - // Query using NOT_EXISTS on array field (simulating ArrayIdentifierExpression behavior) - // Using NOT_EXISTS with 'null' parameter (matching entity-service pattern) - Query query = + Query unnestQuery = Query.builder() .addSelection(IdentifierExpression.of("item")) - .addSelection(IdentifierExpression.of("tags")) + .addSelection( + JsonIdentifierExpression.of("props", JsonFieldType.STRING_ARRAY, "source-loc")) + .addFromClause( + UnnestExpression.of( + JsonIdentifierExpression.of( + "props", JsonFieldType.STRING_ARRAY, "source-loc"), + true)) + // Should return unnested source-loc elements that DO NOT match 'warehouse-A' .setFilter( RelationalExpression.of( - ArrayIdentifierExpression.of("tags"), - NOT_EXISTS, - ConstantExpression.of("null"))) + JsonIdentifierExpression.of( + "props", JsonFieldType.STRING_ARRAY, "source-loc"), + NOT_IN, + ConstantExpression.ofStrings(List.of("warehouse-A")))) .build(); - Iterator results = flatCollection.find(query); + Iterator resultIterator = flatCollection.find(unnestQuery); int count = 0; - while (results.hasNext()) { - Document doc = results.next(); + while (resultIterator.hasNext()) { + Document doc = resultIterator.next(); + Assertions.assertNotNull(doc); + // Parse JSON to extract the unnested value JsonNode json = new ObjectMapper().readTree(doc.toJson()); + JsonNode locationNode = json.get("props.source-loc"); count++; - // Verify that ALL returned documents have NULL or empty arrays - JsonNode tags = json.get("tags"); - assertTrue( - tags == null || !tags.isArray() || tags.isEmpty(), - "tags should be NULL or empty array, but was: " + tags); } - - // Should return documents with NULL or empty arrays - // From test data: row 9 (NULL), rows 10, 11, 13 (empty arrays) - // Total: 4 documents - assertEquals(2, count, "Should return at 4 documents with NULL or empty tags"); + // Should NOT contain 'warehouse-A' + assertEquals(12, count, "Should return unnested locations not matching the filter"); } - /** - * Test EXISTS filter on JSONB arrays. Should only return non-empty arrays (with at-least one - * element). - */ @ParameterizedTest @ArgumentsSource(PostgresProvider.class) - void testExistsFilterOnJsonArrays(String dataStoreName) throws JsonProcessingException { + void testExistsOnArrays(String dataStoreName) throws JsonProcessingException { Datastore datastore = datastoreMap.get(dataStoreName); Collection flatCollection = datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); @@ -4756,14 +5254,9 @@ void testExistsFilterOnJsonArrays(String dataStoreName) throws JsonProcessingExc assertEquals(3, count, "Should return exactly 3 documents with non-empty colors"); } - /** - * Test NOT_EXISTS filter on JSONB arrays. This validates that NOT_EXISTS on array fields inside - * JSONB returns documents where the field is NULL, the parent object is NULL, or the array is - * empty. - */ @ParameterizedTest @ArgumentsSource(PostgresProvider.class) - void testNotExistsFilterOnJsonArrays(String dataStoreName) throws JsonProcessingException { + void testNotExistsOnArrays(String dataStoreName) throws JsonProcessingException { Datastore datastore = datastoreMap.get(dataStoreName); Collection flatCollection = datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); @@ -4812,7 +5305,7 @@ void testNotExistsFilterOnJsonArrays(String dataStoreName) throws JsonProcessing @ParameterizedTest @ArgumentsSource(PostgresProvider.class) - void testExistsFilterOnJsonScalars(String dataStoreName) { + void testExistsOnScalars(String dataStoreName) { Datastore datastore = datastoreMap.get(dataStoreName); Collection flatCollection = datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); @@ -4842,7 +5335,7 @@ void testExistsFilterOnJsonScalars(String dataStoreName) { @ParameterizedTest @ArgumentsSource(PostgresProvider.class) - void testNotExistsFilterOnJsonScalars(String dataStoreName) { + void testNotExistsOnScalars(String dataStoreName) { Datastore datastore = datastoreMap.get(dataStoreName); Collection flatCollection = datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); @@ -4872,78 +5365,7 @@ void testNotExistsFilterOnJsonScalars(String dataStoreName) { @ParameterizedTest @ArgumentsSource(PostgresProvider.class) - void testExistsFilterOnUnnestedNativeArray(String dataStoreName) { - Datastore datastore = datastoreMap.get(dataStoreName); - Collection flatCollection = - datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); - - Query unnestQuery = - Query.builder() - .addSelection(IdentifierExpression.of("item")) - .addSelection(ArrayIdentifierExpression.of("tags")) - .addFromClause( - UnnestExpression.of(ArrayIdentifierExpression.of("tags", ArrayType.TEXT), true)) - // Only include tags[] that have at least 1 element, all rows with NULL or empty tags - // should be excluded. - .setFilter( - RelationalExpression.of( - ArrayIdentifierExpression.of("tags", ArrayType.TEXT), - EXISTS, - ConstantExpression.of("null"))) - .build(); - - Iterator results = flatCollection.find(unnestQuery); - - int count = 0; - while (results.hasNext()) { - Document doc = results.next(); - Assertions.assertNotNull(doc); - count++; - } - - assertEquals(25, count, "Should return unnested tag elements from non-empty arrays"); - } - - @ParameterizedTest - @ArgumentsSource(PostgresProvider.class) - void testNotExistsFilterOnUnnestNativeArray(String dataStoreName) { - Datastore datastore = datastoreMap.get(dataStoreName); - Collection flatCollection = - datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); - - Query unnestQuery = - Query.builder() - .addSelection(IdentifierExpression.of("item")) - .addSelection(ArrayIdentifierExpression.of("tags")) - .addFromClause( - UnnestExpression.of(ArrayIdentifierExpression.of("tags", ArrayType.TEXT), true)) - // Only include tags[] that are either NULL or empty (we have one row with NULL tag - // and one with empty tag. Unnest will result in two rows with NULL for - // "tags_unnested"). Note that this behavior will change with - // preserveNulLAndEmptyArrays = false. This is because unnest won't preserve those - // rows for which the unnested column is NULL then. - .setFilter( - RelationalExpression.of( - ArrayIdentifierExpression.of("tags", ArrayType.TEXT), - NOT_EXISTS, - ConstantExpression.of("null"))) - .build(); - - Iterator results = flatCollection.find(unnestQuery); - - int count = 0; - while (results.hasNext()) { - Document doc = results.next(); - Assertions.assertNotNull(doc); - count++; - } - - assertEquals(2, count, "Should return at least 2 rows with NULL unnested tags"); - } - - @ParameterizedTest - @ArgumentsSource(PostgresProvider.class) - void testExistsFilterOnUnnestJsonbArray(String dataStoreName) { + void testExistsOnUnnestedArray(String dataStoreName) { Datastore datastore = datastoreMap.get(dataStoreName); Collection flatCollection = datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); @@ -4980,7 +5402,7 @@ void testExistsFilterOnUnnestJsonbArray(String dataStoreName) { @ParameterizedTest @ArgumentsSource(PostgresProvider.class) - void testNotExistsFilterOnUnnestJsonbArray(String dataStoreName) { + void testNotExistsOnUnnestedArray(String dataStoreName) { Datastore datastore = datastoreMap.get(dataStoreName); Collection flatCollection = datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); @@ -5012,167 +5434,6 @@ void testNotExistsFilterOnUnnestJsonbArray(String dataStoreName) { } assertEquals(7, count); } - - @Nested - class FlatCollectionUnnestWithInFilterTests { - - @ParameterizedTest - @ArgumentsSource(PostgresProvider.class) - void testInFilterWithUnnestOnTopLevelArray(String dataStoreName) { - Datastore datastore = datastoreMap.get(dataStoreName); - Collection flatCollection = - datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); - - for (boolean preserveNullAndEmptyArrays : List.of(true, false)) { - Query unnestQuery = - Query.builder() - .addSelection(IdentifierExpression.of("item")) - .addSelection(ArrayIdentifierExpression.of("tags")) - .addFromClause( - UnnestExpression.of(ArrayIdentifierExpression.of("tags", ArrayType.TEXT), - preserveNullAndEmptyArrays)) - // Should return unnested tag elements that match 'hygiene' OR 'grooming' - .setFilter( - RelationalExpression.of( - ArrayIdentifierExpression.of("tags", ArrayType.TEXT), - IN, - ConstantExpression.ofStrings(List.of("hygiene", "grooming")))) - .build(); - - //this query will first unnest "tags" array and keep rows that have null and empty arrays. It'll then filter those rows for which the - // unnested tag is either hygiene or grooming. We have a total of 5 rows that'll match this filter - Iterator results = flatCollection.find(unnestQuery); - - int count = 0; - while (results.hasNext()) { - Document doc = results.next(); - Assertions.assertNotNull(doc); - count++; - } - assertEquals(5, count, "Should return at least one unnested tag matching the filter"); - } - } - - @ParameterizedTest - @ArgumentsSource(PostgresProvider.class) - void testNotInFilterWithUnnestOnTopLevelArray(String dataStoreName) { - Datastore datastore = datastoreMap.get(dataStoreName); - Collection flatCollection = - datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); - - for (boolean preserveNullAndEmptyArrays : List.of(true, false)) { - Query unnestQuery = - Query.builder() - .addSelection(IdentifierExpression.of("item")) - .addSelection(ArrayIdentifierExpression.of("tags")) - .addFromClause( - UnnestExpression.of(ArrayIdentifierExpression.of("tags", ArrayType.TEXT), - preserveNullAndEmptyArrays)) - .setFilter( - RelationalExpression.of( - ArrayIdentifierExpression.of("tags", ArrayType.TEXT), - NOT_IN, - ConstantExpression.ofStrings(List.of("hygiene", "grooming")))) - .build(); - //this query will first unnest "tags" array and keep rows that have null and empty arrays. unnest() on empty and null arrays returns NULL which is then - // included in the result set (as the predicate contains tags_unnested == NULL OR ...) - - Iterator results = flatCollection.find(unnestQuery); - - int count = 0; - while (results.hasNext()) { - Document doc = results.next(); - Assertions.assertNotNull(doc); - count++; - } - assertEquals(preserveNullAndEmptyArrays ? 22 : 20, count, - "Should return unnested tags not matching the filter"); - } - } - - @ParameterizedTest - @ArgumentsSource(PostgresProvider.class) - void testInFilterWithUnnestOnJsonbArray(String dataStoreName) throws Exception { - Datastore datastore = datastoreMap.get(dataStoreName); - Collection flatCollection = - datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); - - Query unnestQuery = - Query.builder() - .addSelection(IdentifierExpression.of("item")) - .addSelection( - JsonIdentifierExpression.of("props", JsonFieldType.STRING_ARRAY, "source-loc")) - .addFromClause( - UnnestExpression.of( - JsonIdentifierExpression.of( - "props", JsonFieldType.STRING_ARRAY, "source-loc"), - true)) - // Should return unnested source-loc elements that match 'warehouse-A' OR 'store-1' - .setFilter( - RelationalExpression.of( - JsonIdentifierExpression.of( - "props", JsonFieldType.STRING_ARRAY, "source-loc"), - IN, - ConstantExpression.ofStrings(List.of("warehouse-A", "store-1")))) - .build(); - - Iterator resultIterator = flatCollection.find(unnestQuery); - - int count = 0; - while (resultIterator.hasNext()) { - Document doc = resultIterator.next(); - Assertions.assertNotNull(doc); - // Parse JSON to extract the unnested value - JsonNode json = new ObjectMapper().readTree(doc.toJson()); - // The unnested value is aliased as "props.source-loc" - JsonNode locationNode = json.get("props.source-loc"); - count++; - } - - assertEquals(2, count, "Should return at least 2 unnested locations matching the filter"); - } - - @ParameterizedTest - @ArgumentsSource(PostgresProvider.class) - void testNotInFilterOnUnnestedJsonbArray(String dataStoreName) throws Exception { - Datastore datastore = datastoreMap.get(dataStoreName); - Collection flatCollection = - datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); - - Query unnestQuery = - Query.builder() - .addSelection(IdentifierExpression.of("item")) - .addSelection( - JsonIdentifierExpression.of("props", JsonFieldType.STRING_ARRAY, "source-loc")) - .addFromClause( - UnnestExpression.of( - JsonIdentifierExpression.of( - "props", JsonFieldType.STRING_ARRAY, "source-loc"), - true)) - // Should return unnested source-loc elements that DO NOT match 'warehouse-A' - .setFilter( - RelationalExpression.of( - JsonIdentifierExpression.of( - "props", JsonFieldType.STRING_ARRAY, "source-loc"), - NOT_IN, - ConstantExpression.ofStrings(List.of("warehouse-A")))) - .build(); - - Iterator resultIterator = flatCollection.find(unnestQuery); - - int count = 0; - while (resultIterator.hasNext()) { - Document doc = resultIterator.next(); - Assertions.assertNotNull(doc); - // Parse JSON to extract the unnested value - JsonNode json = new ObjectMapper().readTree(doc.toJson()); - JsonNode locationNode = json.get("props.source-loc"); - count++; - } - // Should NOT contain 'warehouse-A' - assertEquals(12, count, "Should return unnested locations not matching the filter"); - } - } } @Nested diff --git a/document-store/src/integrationTest/resources/query/pg_flat_collection_insert.json b/document-store/src/integrationTest/resources/query/pg_flat_collection_insert.json index ab363fb7..73f65f9d 100644 --- a/document-store/src/integrationTest/resources/query/pg_flat_collection_insert.json +++ b/document-store/src/integrationTest/resources/query/pg_flat_collection_insert.json @@ -1,14 +1,14 @@ { "statements": [ - "INSERT INTO \"myTestFlat\" (\n\"_id\", \"item\", \"price\", \"quantity\", \"date\", \"tags\", \"categoryTags\", \"props\", \"sales\", \"numbers\", \"scores\", \"flags\"\n) VALUES (\n1, 'Soap', 10, 2, '2014-03-01T08:00:00Z',\n'{\"hygiene\", \"personal-care\", \"premium\"}',\n'{\"Hygiene\", \"PersonalCare\"}',\n'{\"colors\": [\"Blue\", \"Green\"], \"brand\": \"Dettol\", \"size\": \"M\", \"product-code\": \"SOAP-DET-001\", \"source-loc\": [\"warehouse-A\", \"store-1\"], \"seller\": {\"name\": \"Metro Chemicals Pvt. Ltd.\", \"address\": {\"city\": \"Mumbai\", \"pincode\": 400004}}}',\nNULL,\n'{1, 2, 3}',\n'{4.5, 9.2}',\n'{true, false}'\n)", - "INSERT INTO \"myTestFlat\" (\n\"_id\", \"item\", \"price\", \"quantity\", \"date\", \"tags\", \"categoryTags\", \"props\", \"sales\", \"numbers\", \"scores\", \"flags\"\n) VALUES (\n2, 'Mirror', 20, 1, '2014-03-01T09:00:00Z',\n'{\"home-decor\", \"reflective\", \"glass\"}',\n'{\"HomeDecor\"}',\nNULL,\nNULL,\n'{10, 20}',\nNULL,\nNULL\n)", - "INSERT INTO \"myTestFlat\" (\n\"_id\", \"item\", \"price\", \"quantity\", \"date\", \"tags\", \"categoryTags\", \"props\", \"sales\", \"numbers\", \"scores\", \"flags\"\n) VALUES (\n3, 'Shampoo', 5, 10, '2014-03-15T09:00:00Z',\n'{\"hair-care\", \"personal-care\", \"premium\", \"herbal\"}',\n'{\"HairCare\", \"PersonalCare\"}',\n'{\"colors\": [\"Black\"], \"brand\": \"Sunsilk\", \"size\": \"L\", \"product-code\": \"SHAMP-SUN-003\", \"source-loc\": [\"warehouse-B\", \"store-2\", \"online\"], \"seller\": {\"name\": \"Metro Chemicals Pvt. Ltd.\", \"address\": {\"city\": \"Mumbai\", \"pincode\": 400004}}}',\nNULL,\nNULL,\n'{3.14, 2.71}',\nNULL\n)", - "INSERT INTO \"myTestFlat\" (\n\"_id\", \"item\", \"price\", \"quantity\", \"date\", \"tags\", \"categoryTags\", \"props\", \"sales\", \"numbers\", \"scores\", \"flags\"\n) VALUES (\n4, 'Shampoo', 5, 20, '2014-04-04T11:21:39.736Z',\n'{\"hair-care\", \"budget\", \"bulk\"}',\n'{\"HairCare\"}',\nNULL,\nNULL,\nNULL,\nNULL,\n'{true, true}'\n)", - "INSERT INTO \"myTestFlat\" (\n\"_id\", \"item\", \"price\", \"quantity\", \"date\", \"tags\", \"categoryTags\", \"props\", \"sales\", \"numbers\", \"scores\", \"flags\"\n) VALUES (\n5, 'Soap', 20, 5, '2014-04-04T21:23:13.331Z',\n'{\"hygiene\", \"antibacterial\", \"family-pack\"}',\n'{\"Hygiene\"}',\n'{\"colors\": [\"Orange\", \"Blue\"], \"brand\": \"Lifebuoy\", \"size\": \"S\", \"product-code\": \"SOAP-LIF-005\", \"source-loc\": [\"warehouse-C\"], \"seller\": {\"name\": \"Hans and Co.\", \"address\": {\"city\": \"Kolkata\", \"pincode\": 700007}}}',\nNULL,\nNULL,\nNULL,\nNULL\n)", - "INSERT INTO \"myTestFlat\" (\n\"_id\", \"item\", \"price\", \"quantity\", \"date\", \"tags\", \"categoryTags\", \"props\", \"sales\", \"numbers\", \"scores\", \"flags\"\n) VALUES (\n6, 'Comb', 7.5, 5, '2015-06-04T05:08:13Z',\n'{\"grooming\", \"plastic\", \"essential\"}',\n'{\"Grooming\"}',\nNULL,\nNULL,\nNULL,\nNULL,\nNULL\n)", - "INSERT INTO \"myTestFlat\" (\n\"_id\", \"item\", \"price\", \"quantity\", \"date\", \"tags\", \"categoryTags\", \"props\", \"sales\", \"numbers\", \"scores\", \"flags\"\n) VALUES (\n7, 'Comb', 7.5, 10, '2015-09-10T08:43:00Z',\n'{\"grooming\", \"bulk\", \"wholesale\"}',\n'{\"Grooming\"}',\n'{\"colors\": [], \"product-code\": null, \"source-loc\": [], \"seller\": {\"name\": \"Go Go Plastics\", \"address\": {\"city\": \"Kolkata\", \"pincode\": 700007}}}',\nNULL,\nNULL,\nNULL,\nNULL\n)", - "INSERT INTO \"myTestFlat\" (\n\"_id\", \"item\", \"price\", \"quantity\", \"date\", \"tags\", \"categoryTags\", \"props\", \"sales\", \"numbers\", \"scores\", \"flags\"\n) VALUES (\n8, 'Soap', 10, 5, '2016-02-06T20:20:13Z',\n'{\"hygiene\", \"budget\", \"basic\"}',\n'{\"Hygiene\"}',\nNULL,\nNULL,\nNULL,\nNULL,\nNULL\n)", - "INSERT INTO \"myTestFlat\" (\n\"_id\", \"item\", \"price\", \"quantity\", \"date\", \"tags\", \"categoryTags\", \"props\", \"sales\", \"numbers\", \"scores\", \"flags\"\n) VALUES (\n9, 'Bottle', 15, 3, '2016-03-01T10:00:00Z',\nNULL,\nNULL,\nNULL,\nNULL,\nNULL,\nNULL,\nNULL\n)", - "INSERT INTO \"myTestFlat\" (\n\"_id\", \"item\", \"price\", \"quantity\", \"date\", \"tags\", \"categoryTags\", \"props\", \"sales\", \"numbers\", \"scores\", \"flags\"\n) VALUES (\n10, 'Cup', 8, 2, '2016-04-01T10:00:00Z',\n'{}',\n'{}',\nNULL,\nNULL,\nNULL,\nNULL,\nNULL\n)" + "INSERT INTO \"myTestFlat\" (\n\"_id\", \"item\", \"price\", \"quantity\", \"date\", \"in_stock\", \"tags\", \"categoryTags\", \"props\", \"sales\", \"numbers\", \"scores\", \"flags\"\n) VALUES (\n1, 'Soap', 10, 2, '2014-03-01T08:00:00Z', true,\n'{\"hygiene\", \"personal-care\", \"premium\"}',\n'{\"Hygiene\", \"PersonalCare\"}',\n'{\"colors\": [\"Blue\", \"Green\"], \"brand\": \"Dettol\", \"size\": \"M\", \"product-code\": \"SOAP-DET-001\", \"source-loc\": [\"warehouse-A\", \"store-1\"], \"seller\": {\"name\": \"Metro Chemicals Pvt. Ltd.\", \"address\": {\"city\": \"Mumbai\", \"pincode\": 400004}}}',\nNULL,\n'{1, 2, 3}',\n'{4.5, 9.2}',\n'{true, false}'\n)", + "INSERT INTO \"myTestFlat\" (\n\"_id\", \"item\", \"price\", \"quantity\", \"date\", \"in_stock\", \"tags\", \"categoryTags\", \"props\", \"sales\", \"numbers\", \"scores\", \"flags\"\n) VALUES (\n2, 'Mirror', 20, 1, '2014-03-01T09:00:00Z', true,\n'{\"home-decor\", \"reflective\", \"glass\"}',\n'{\"HomeDecor\"}',\nNULL,\nNULL,\n'{10, 20}',\n'{1.5, 2.5, 3.5}',\n'{false, false}'\n)", + "INSERT INTO \"myTestFlat\" (\n\"_id\", \"item\", \"price\", \"quantity\", \"date\", \"in_stock\", \"tags\", \"categoryTags\", \"props\", \"sales\", \"numbers\", \"scores\", \"flags\"\n) VALUES (\n3, 'Shampoo', 5, 10, '2014-03-15T09:00:00Z', true,\n'{\"hair-care\", \"personal-care\", \"premium\", \"herbal\"}',\n'{\"HairCare\", \"PersonalCare\"}',\n'{\"colors\": [\"Black\"], \"brand\": \"Sunsilk\", \"size\": \"L\", \"product-code\": \"SHAMP-SUN-003\", \"source-loc\": [\"warehouse-B\", \"store-2\", \"online\"], \"seller\": {\"name\": \"Metro Chemicals Pvt. Ltd.\", \"address\": {\"city\": \"Mumbai\", \"pincode\": 400004}}}',\nNULL,\n'{5, 10, 15}',\n'{3.14, 2.71}',\n'{true, false, true}'\n)", + "INSERT INTO \"myTestFlat\" (\n\"_id\", \"item\", \"price\", \"quantity\", \"date\", \"in_stock\", \"tags\", \"categoryTags\", \"props\", \"sales\", \"numbers\", \"scores\", \"flags\"\n) VALUES (\n4, 'Shampoo', 5, 20, '2014-04-04T11:21:39.736Z', false,\n'{\"hair-care\", \"budget\", \"bulk\"}',\n'{\"HairCare\"}',\nNULL,\nNULL,\n'{1, 2}',\n'{5.0, 10.0}',\n'{true, true}'\n)", + "INSERT INTO \"myTestFlat\" (\n\"_id\", \"item\", \"price\", \"quantity\", \"date\", \"in_stock\", \"tags\", \"categoryTags\", \"props\", \"sales\", \"numbers\", \"scores\", \"flags\"\n) VALUES (\n5, 'Soap', 20, 5, '2014-04-04T21:23:13.331Z', true,\n'{\"hygiene\", \"antibacterial\", \"family-pack\"}',\n'{\"Hygiene\"}',\n'{\"colors\": [\"Orange\", \"Blue\"], \"brand\": \"Lifebuoy\", \"size\": \"S\", \"product-code\": \"SOAP-LIF-005\", \"source-loc\": [\"warehouse-C\"], \"seller\": {\"name\": \"Hans and Co.\", \"address\": {\"city\": \"Kolkata\", \"pincode\": 700007}}}',\nNULL,\n'{3, 6, 9}',\n'{7.5}',\n'{false}'\n)", + "INSERT INTO \"myTestFlat\" (\n\"_id\", \"item\", \"price\", \"quantity\", \"date\", \"in_stock\", \"tags\", \"categoryTags\", \"props\", \"sales\", \"numbers\", \"scores\", \"flags\"\n) VALUES (\n6, 'Comb', 7.5, 5, '2015-06-04T05:08:13Z', true,\n'{\"grooming\", \"plastic\", \"essential\"}',\n'{\"Grooming\"}',\nNULL,\nNULL,\n'{20, 30}',\n'{6.0, 8.0}',\n'{true, false}'\n)", + "INSERT INTO \"myTestFlat\" (\n\"_id\", \"item\", \"price\", \"quantity\", \"date\", \"in_stock\", \"tags\", \"categoryTags\", \"props\", \"sales\", \"numbers\", \"scores\", \"flags\"\n) VALUES (\n7, 'Comb', 7.5, 10, '2015-09-10T08:43:00Z', false,\n'{\"grooming\", \"bulk\", \"wholesale\"}',\n'{\"Grooming\"}',\n'{\"colors\": [], \"product-code\": null, \"source-loc\": [], \"seller\": {\"name\": \"Go Go Plastics\", \"address\": {\"city\": \"Kolkata\", \"pincode\": 700007}}}',\nNULL,\n'{10}',\n'{3.0}',\n'{false, false, false}'\n)", + "INSERT INTO \"myTestFlat\" (\n\"_id\", \"item\", \"price\", \"quantity\", \"date\", \"in_stock\", \"tags\", \"categoryTags\", \"props\", \"sales\", \"numbers\", \"scores\", \"flags\"\n) VALUES (\n8, 'Soap', 10, 5, '2016-02-06T20:20:13Z', true,\n'{\"hygiene\", \"budget\", \"basic\"}',\n'{\"Hygiene\"}',\nNULL,\nNULL,\n'{1, 10, 20}',\n'{2.5, 5.0}',\n'{true}'\n)", + "INSERT INTO \"myTestFlat\" (\n\"_id\", \"item\", \"price\", \"quantity\", \"date\", \"in_stock\", \"tags\", \"categoryTags\", \"props\", \"sales\", \"numbers\", \"scores\", \"flags\"\n) VALUES (\n9, 'Bottle', 15, 3, '2016-03-01T10:00:00Z', false,\nNULL,\nNULL,\nNULL,\nNULL,\nNULL,\nNULL,\nNULL\n)", + "INSERT INTO \"myTestFlat\" (\n\"_id\", \"item\", \"price\", \"quantity\", \"date\", \"in_stock\", \"tags\", \"categoryTags\", \"props\", \"sales\", \"numbers\", \"scores\", \"flags\"\n) VALUES (\n10, 'Cup', 8, 2, '2016-04-01T10:00:00Z', true,\n'{}',\n'{}',\nNULL,\nNULL,\nNULL,\nNULL,\nNULL\n)" ] } From 7e4cfd5bfe959929c0d95074342d4d3cddeb388d Mon Sep 17 00:00:00 2001 From: Prashant Pandey Date: Wed, 26 Nov 2025 00:02:46 +0530 Subject: [PATCH 7/9] Fixed failing test cases --- .../documentstore/DocStoreQueryV1Test.java | 155 ++++++++---------- .../flat_boolean_array_filter_response.json | 15 +- .../flat_double_array_filter_response.json | 3 +- .../flat_integer_array_filter_response.json | 12 +- 4 files changed, 94 insertions(+), 91 deletions(-) diff --git a/document-store/src/integrationTest/java/org/hypertrace/core/documentstore/DocStoreQueryV1Test.java b/document-store/src/integrationTest/java/org/hypertrace/core/documentstore/DocStoreQueryV1Test.java index e012257d..b9dcf1e2 100644 --- a/document-store/src/integrationTest/java/org/hypertrace/core/documentstore/DocStoreQueryV1Test.java +++ b/document-store/src/integrationTest/java/org/hypertrace/core/documentstore/DocStoreQueryV1Test.java @@ -3585,87 +3585,6 @@ void testFlatPostgresCollectionUnnestMixedCaseField(String dataStoreName) throws dataStoreName, resultIterator, "query/flat_unnest_mixed_case_response.json", 5); } - @ParameterizedTest - @ArgumentsSource(PostgresProvider.class) - void testFlatPostgresCollectionIntegerArrayFilter(String dataStoreName) throws IOException { - Datastore datastore = datastoreMap.get(dataStoreName); - Collection flatCollection = - datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); - - // Filter: ANY number in numbers equals 10 (Integer constant) - // This tests L265-266: Integer/Long → ::bigint[] - Query integerArrayQuery = - Query.builder() - .addSelection(IdentifierExpression.of("item")) - .addSelection(IdentifierExpression.of("price")) - .setFilter( - ArrayRelationalFilterExpression.builder() - .operator(ArrayOperator.ANY) - .filter( - RelationalExpression.of( - IdentifierExpression.of("numbers"), EQ, ConstantExpression.of(10))) - .build()) - .build(); - - Iterator resultIterator = flatCollection.find(integerArrayQuery); - assertDocsAndSizeEqualWithoutOrder( - dataStoreName, resultIterator, "query/flat_integer_array_filter_response.json", 1); - } - - @ParameterizedTest - @ArgumentsSource(PostgresProvider.class) - void testFlatPostgresCollectionDoubleArrayFilter(String dataStoreName) throws IOException { - Datastore datastore = datastoreMap.get(dataStoreName); - Collection flatCollection = - datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); - - // Filter: ANY score in scores equals 3.14 (Double constant) - // This tests L267-268: Double/Float → ::double precision[] - Query doubleArrayQuery = - Query.builder() - .addSelection(IdentifierExpression.of("item")) - .addSelection(IdentifierExpression.of("price")) - .setFilter( - ArrayRelationalFilterExpression.builder() - .operator(ArrayOperator.ANY) - .filter( - RelationalExpression.of( - IdentifierExpression.of("scores"), EQ, ConstantExpression.of(3.14))) - .build()) - .build(); - - Iterator resultIterator = flatCollection.find(doubleArrayQuery); - assertDocsAndSizeEqualWithoutOrder( - dataStoreName, resultIterator, "query/flat_double_array_filter_response.json", 1); - } - - @ParameterizedTest - @ArgumentsSource(PostgresProvider.class) - void testFlatPostgresCollectionBooleanArrayFilter(String dataStoreName) throws IOException { - Datastore datastore = datastoreMap.get(dataStoreName); - Collection flatCollection = - datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); - - // Filter: ANY flag in flags equals true (Boolean constant) - // This tests L269-270: Boolean → ::boolean[] - Query booleanArrayQuery = - Query.builder() - .addSelection(IdentifierExpression.of("item")) - .addSelection(IdentifierExpression.of("price")) - .setFilter( - ArrayRelationalFilterExpression.builder() - .operator(ArrayOperator.ANY) - .filter( - RelationalExpression.of( - IdentifierExpression.of("flags"), EQ, ConstantExpression.of(true))) - .build()) - .build(); - - Iterator resultIterator = flatCollection.find(booleanArrayQuery); - assertDocsAndSizeEqualWithoutOrder( - dataStoreName, resultIterator, "query/flat_boolean_array_filter_response.json", 2); - } - @ParameterizedTest @ArgumentsSource(PostgresProvider.class) void testFlatVsNestedCollectionNestedFieldSelections(String dataStoreName) throws IOException { @@ -4769,10 +4688,80 @@ void testContainsOnBooleanArray(String dataStoreName) throws JsonProcessingExcep } } - // Should return rows where flags array contains true - // IDs: 1 {true,false}, 3 {true,false,true}, 4 {true,true}, 6 {true,false}, 8 {true} assertTrue(count >= 5, "Should return at least 5 items with 'true' flag"); } + + @ParameterizedTest + @ArgumentsSource(PostgresProvider.class) + void testAnyOnIntegerArray(String dataStoreName) throws IOException { + Datastore datastore = datastoreMap.get(dataStoreName); + Collection flatCollection = + datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); + + Query integerArrayQuery = + Query.builder() + .addSelection(IdentifierExpression.of("item")) + .setFilter( + ArrayRelationalFilterExpression.builder() + .operator(ArrayOperator.ANY) + .filter( + RelationalExpression.of( + IdentifierExpression.of("numbers"), EQ, ConstantExpression.of(10))) + .build()) + .build(); + + Iterator resultIterator = flatCollection.find(integerArrayQuery); + assertDocsAndSizeEqualWithoutOrder( + dataStoreName, resultIterator, "query/flat_integer_array_filter_response.json", 4); + } + + @ParameterizedTest + @ArgumentsSource(PostgresProvider.class) + void testAnyOnDoubleArray(String dataStoreName) throws IOException { + Datastore datastore = datastoreMap.get(dataStoreName); + Collection flatCollection = + datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); + + Query doubleArrayQuery = + Query.builder() + .addSelection(IdentifierExpression.of("item")) + .setFilter( + ArrayRelationalFilterExpression.builder() + .operator(ArrayOperator.ANY) + .filter( + RelationalExpression.of( + IdentifierExpression.of("scores"), EQ, ConstantExpression.of(3.14))) + .build()) + .build(); + + Iterator resultIterator = flatCollection.find(doubleArrayQuery); + assertDocsAndSizeEqualWithoutOrder( + dataStoreName, resultIterator, "query/flat_double_array_filter_response.json", 1); + } + + @ParameterizedTest + @ArgumentsSource(PostgresProvider.class) + void testAnyOnBooleanArray(String dataStoreName) throws IOException { + Datastore datastore = datastoreMap.get(dataStoreName); + Collection flatCollection = + datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); + + Query booleanArrayQuery = + Query.builder() + .addSelection(IdentifierExpression.of("item")) + .setFilter( + ArrayRelationalFilterExpression.builder() + .operator(ArrayOperator.ANY) + .filter( + RelationalExpression.of( + IdentifierExpression.of("flags"), EQ, ConstantExpression.of(true))) + .build()) + .build(); + + Iterator resultIterator = flatCollection.find(booleanArrayQuery); + assertDocsAndSizeEqualWithoutOrder( + dataStoreName, resultIterator, "query/flat_boolean_array_filter_response.json", 5); + } } @Nested diff --git a/document-store/src/integrationTest/resources/query/flat_boolean_array_filter_response.json b/document-store/src/integrationTest/resources/query/flat_boolean_array_filter_response.json index 25c65fcc..5383975b 100644 --- a/document-store/src/integrationTest/resources/query/flat_boolean_array_filter_response.json +++ b/document-store/src/integrationTest/resources/query/flat_boolean_array_filter_response.json @@ -1,10 +1,17 @@ [ { - "item": "Soap", - "price": 10 + "item": "Soap" }, { - "item": "Shampoo", - "price": 5 + "item": "Shampoo" + }, + { + "item": "Shampoo" + }, + { + "item": "Comb" + }, + { + "item": "Soap" } ] diff --git a/document-store/src/integrationTest/resources/query/flat_double_array_filter_response.json b/document-store/src/integrationTest/resources/query/flat_double_array_filter_response.json index b7aecab1..4824f9d3 100644 --- a/document-store/src/integrationTest/resources/query/flat_double_array_filter_response.json +++ b/document-store/src/integrationTest/resources/query/flat_double_array_filter_response.json @@ -1,6 +1,5 @@ [ { - "item": "Shampoo", - "price": 5 + "item": "Shampoo" } ] diff --git a/document-store/src/integrationTest/resources/query/flat_integer_array_filter_response.json b/document-store/src/integrationTest/resources/query/flat_integer_array_filter_response.json index 9eab7526..bf42d5c6 100644 --- a/document-store/src/integrationTest/resources/query/flat_integer_array_filter_response.json +++ b/document-store/src/integrationTest/resources/query/flat_integer_array_filter_response.json @@ -1,6 +1,14 @@ [ { - "item": "Mirror", - "price": 20 + "item": "Mirror" + }, + { + "item": "Shampoo" + }, + { + "item": "Comb" + }, + { + "item": "Soap" } ] From 9d9a719534bd9261e545fac2e92c3bfe6149b838 Mon Sep 17 00:00:00 2001 From: Prashant Pandey Date: Wed, 26 Nov 2025 09:43:41 +0530 Subject: [PATCH 8/9] Handle CONTAINS properly --- .../documentstore/DocStoreQueryV1Test.java | 252 +++++++++++++----- .../postgres/PostgresCollection.java | 52 +++- ...insRelationalFilterParserNonJsonField.java | 52 +++- .../query/v1/PostgresQueryParserTest.java | 5 +- 4 files changed, 285 insertions(+), 76 deletions(-) diff --git a/document-store/src/integrationTest/java/org/hypertrace/core/documentstore/DocStoreQueryV1Test.java b/document-store/src/integrationTest/java/org/hypertrace/core/documentstore/DocStoreQueryV1Test.java index b9dcf1e2..c4812528 100644 --- a/document-store/src/integrationTest/java/org/hypertrace/core/documentstore/DocStoreQueryV1Test.java +++ b/document-store/src/integrationTest/java/org/hypertrace/core/documentstore/DocStoreQueryV1Test.java @@ -58,6 +58,7 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -122,9 +123,7 @@ import org.hypertrace.core.documentstore.query.SortingSpec; import org.hypertrace.core.documentstore.utils.Utils; import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Nested; import org.junit.jupiter.api.extension.ExtensionContext; import org.junit.jupiter.params.ParameterizedTest; @@ -3230,8 +3229,8 @@ void testFindAll(String dataStoreName) throws IOException { Document doc = iterator.next(); count++; // Verify document has content (basic validation) - Assertions.assertNotNull(doc); - Assertions.assertNotNull(doc.toJson()); + assertNotNull(doc); + assertNotNull(doc.toJson()); assertTrue(!doc.toJson().isEmpty()); assertEquals(DocumentType.FLAT, doc.getDocumentType()); } @@ -4025,7 +4024,7 @@ void testNumericAggregations(String dataStoreName) throws IOException { groupCount++; // Validate each group has all expected fields - Assertions.assertNotNull(groupJson.get("item")); + assertNotNull(groupJson.get("item")); assertTrue(groupJson.get("total_qty").asInt() > 0); assertTrue(groupJson.get("avg_price").asDouble() > 0); assertTrue(groupJson.get("count").asInt() > 0); @@ -4182,7 +4181,7 @@ void testUnnest(String dataStoreName) { int count = 0; while (resultIterator.hasNext()) { Document doc = resultIterator.next(); - Assertions.assertNotNull(doc); + assertNotNull(doc); count++; } // With preserveNullAndEmpty = false, unnest will only unwind arrays with size >= 1. A total @@ -4426,7 +4425,7 @@ void testInWithUnnest(String dataStoreName) { int count = 0; while (results.hasNext()) { Document doc = results.next(); - Assertions.assertNotNull(doc); + assertNotNull(doc); count++; } assertEquals(5, count, "Should return at least one unnested tag matching the filter"); @@ -4464,7 +4463,7 @@ void testNotInWithUnnest(String dataStoreName) { int count = 0; while (results.hasNext()) { Document doc = results.next(); - Assertions.assertNotNull(doc); + assertNotNull(doc); count++; } assertEquals( @@ -4504,7 +4503,7 @@ void testEmptyWithUnnest(String dataStoreName) { int count = 0; while (results.hasNext()) { Document doc = results.next(); - Assertions.assertNotNull(doc); + assertNotNull(doc); count++; } @@ -4538,7 +4537,7 @@ void testNotEmptyWithUnnest(String dataStoreName) { int count = 0; while (results.hasNext()) { Document doc = results.next(); - Assertions.assertNotNull(doc); + assertNotNull(doc); count++; } @@ -4547,24 +4546,49 @@ void testNotEmptyWithUnnest(String dataStoreName) { @ParameterizedTest @ArgumentsSource(PostgresProvider.class) - @Disabled - void testContainsOnNonUnnestedArray(String dataStoreName) throws JsonProcessingException { + void testContainsStrArrayWithUnnest(String dataStoreName) { Datastore datastore = datastoreMap.get(dataStoreName); Collection flatCollection = datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); - // Test CONTAINS on array WITHOUT unnest - // This should use the array overlap operator (&&) or @> containment - // tags column: row 1 has ["hygiene", "premium"], rows 5-6 have ["hygiene"] Query query = Query.builder() .addSelection(IdentifierExpression.of("item")) - .addSelection(ArrayIdentifierExpression.of("tags")) + .addFromClause( + UnnestExpression.of(ArrayIdentifierExpression.of("tags", ArrayType.TEXT), true)) .setFilter( RelationalExpression.of( ArrayIdentifierExpression.of("tags", ArrayType.TEXT), CONTAINS, - ConstantExpression.of("hygiene"))) + ConstantExpression.ofStrings(List.of("hygiene", "premium")))) + .build(); + + Iterator results = flatCollection.find(query); + + int count = 0; + while (results.hasNext()) { + results.next(); + count++; + } + assertEquals(5, count); + } + + @ParameterizedTest + @ArgumentsSource(PostgresProvider.class) + void testContainsStrArray(String dataStoreName) throws JsonProcessingException { + Datastore datastore = datastoreMap.get(dataStoreName); + Collection flatCollection = + datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); + + Query query = + Query.builder() + .addSelection(IdentifierExpression.of("item")) + .addSelection(ArrayIdentifierExpression.of("tags", ArrayType.TEXT)) + .setFilter( + RelationalExpression.of( + ArrayIdentifierExpression.of("tags", ArrayType.TEXT), + CONTAINS, + ConstantExpression.ofStrings(List.of("hygiene", "personal-care")))) .build(); Iterator results = flatCollection.find(query); @@ -4579,36 +4603,34 @@ void testContainsOnNonUnnestedArray(String dataStoreName) throws JsonProcessingE String item = json.get("item").asText(); items.add(item); - // Verify that returned arrays contain "hygiene" + // Verify that returned arrays contain both "hygiene" AND "personal-care" JsonNode tags = json.get("tags"); assertTrue(tags.isArray(), "tags should be an array"); boolean containsHygiene = false; + boolean containsPersonalCare = false; for (JsonNode tag : tags) { if ("hygiene".equals(tag.asText())) { containsHygiene = true; - break; + } + if ("personal-care".equals(tag.asText())) { + containsPersonalCare = true; } } - assertTrue(containsHygiene, "tags array should contain 'hygiene' for item: " + item); + assertTrue(containsHygiene); + assertTrue(containsPersonalCare); } - // Should return rows where tags array contains "hygiene" - // From test data: rows with Soap, Shampoo (ids 1, 5, 6) - assertTrue(count >= 3, "Should return at least 3 items with 'hygiene' tag"); - assertTrue(items.contains("Soap"), "Should include Soap"); + assertEquals(1, count); + assertTrue(items.contains("Soap")); } @ParameterizedTest @ArgumentsSource(PostgresProvider.class) - @Disabled - void testNotContainsOnNonUnnestedArray(String dataStoreName) throws JsonProcessingException { + void testNotContainsStrArray(String dataStoreName) throws JsonProcessingException { Datastore datastore = datastoreMap.get(dataStoreName); Collection flatCollection = datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); - // Test NOT_CONTAINS on array WITHOUT unnest - // This should use NOT (array overlap operator) - // Should return rows where tags array does NOT contain "premium" Query query = Query.builder() .addSelection(IdentifierExpression.of("item")) @@ -4617,55 +4639,59 @@ void testNotContainsOnNonUnnestedArray(String dataStoreName) throws JsonProcessi RelationalExpression.of( ArrayIdentifierExpression.of("tags", ArrayType.TEXT), NOT_CONTAINS, - ConstantExpression.of("premium"))) + ConstantExpression.ofStrings(List.of("hair-care", "personal-care")))) .build(); Iterator results = flatCollection.find(query); int count = 0; + Set items = new HashSet<>(); while (results.hasNext()) { Document doc = results.next(); JsonNode json = new ObjectMapper().readTree(doc.toJson()); count++; + items.add(json.get("item").asText()); - // Verify that returned arrays do NOT contain "premium" (or are NULL/empty) + // Verify that returned arrays do NOT contain BOTH "hair-care" AND "personal-care" JsonNode tags = json.get("tags"); if (tags != null && tags.isArray() && !tags.isEmpty()) { + boolean hasHairCare = false; + boolean hasPersonalCare = false; for (JsonNode tag : tags) { - assertNotEquals( - "premium", - tag.asText(), - "tags array should NOT contain 'premium' for item: " + json.get("item").asText()); + if ("hair-care".equals(tag.asText())) { + hasHairCare = true; + } + if ("personal-care".equals(tag.asText())) { + hasPersonalCare = true; + } } + assertFalse(hasHairCare && hasPersonalCare); } } - // Should return rows where tags is NULL, empty, or doesn't contain "premium" - // Only row 1 (Soap) has "premium", so should return all other rows - assertTrue(count >= 9, "Should return at least 9 items without 'premium' tag"); + assertEquals(9, count); + assertNotEquals(2, items.stream().filter("Shampoo"::equals).count()); } @ParameterizedTest @ArgumentsSource(PostgresProvider.class) - @Disabled - void testContainsOnBooleanArray(String dataStoreName) throws JsonProcessingException { + void testContainsOnIntArray(String dataStoreName) throws JsonProcessingException { Datastore datastore = datastoreMap.get(dataStoreName); Collection flatCollection = datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); - // Test CONTAINS on boolean array (flags column) - Query containsQuery = + Query query = Query.builder() .addSelection(IdentifierExpression.of("item")) - .addSelection(ArrayIdentifierExpression.of("flags")) + .addSelection(ArrayIdentifierExpression.of("numbers", ArrayType.INTEGER)) .setFilter( RelationalExpression.of( - ArrayIdentifierExpression.of("flags", ArrayType.BOOLEAN), + ArrayIdentifierExpression.of("numbers", ArrayType.INTEGER), CONTAINS, - ConstantExpression.of(true))) + ConstantExpression.ofNumbers(List.of(1, 2)))) .build(); - Iterator results = flatCollection.find(containsQuery); + Iterator results = flatCollection.find(query); int count = 0; while (results.hasNext()) { @@ -4673,22 +4699,126 @@ void testContainsOnBooleanArray(String dataStoreName) throws JsonProcessingExcep JsonNode json = new ObjectMapper().readTree(doc.toJson()); count++; - // Verify that returned arrays contain true - JsonNode flags = json.get("flags"); - if (flags != null && flags.isArray()) { - boolean containsTrue = false; - for (JsonNode flag : flags) { - if (flag.asBoolean()) { - containsTrue = true; - break; + // Verify numbers field is a proper JSON array, not a PostgreSQL string like "{1,2,3}" + JsonNode numbers = json.get("numbers"); + assertNotNull(numbers); + assertTrue(numbers.isArray(), "numbers should be JSON array, got: " + numbers); + + // Verify array contains both 1 and 2 + boolean contains1 = false; + boolean contains2 = false; + for (JsonNode num : numbers) { + if (num.asInt() == 1) { + contains1 = true; + } + if (num.asInt() == 2) { + contains2 = true; + } + } + assertTrue(contains1); + assertTrue(contains2); + } + + assertEquals(2, count); + } + + @ParameterizedTest + @ArgumentsSource(PostgresProvider.class) + void testNotContainsOnIntArray(String dataStoreName) throws JsonProcessingException { + Datastore datastore = datastoreMap.get(dataStoreName); + Collection flatCollection = + datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); + + Query query = + Query.builder() + .addSelection(IdentifierExpression.of("item")) + .addSelection(ArrayIdentifierExpression.of("numbers", ArrayType.INTEGER)) + .setFilter( + RelationalExpression.of( + ArrayIdentifierExpression.of("numbers", ArrayType.INTEGER), + NOT_CONTAINS, + ConstantExpression.ofNumbers(List.of(10, 20)))) + .build(); + + Iterator results = flatCollection.find(query); + + int count = 0; + while (results.hasNext()) { + Document doc = results.next(); + JsonNode json = new ObjectMapper().readTree(doc.toJson()); + count++; + + // Verify numbers field is a proper JSON array + JsonNode numbers = json.get("numbers"); + if (numbers != null && !numbers.isNull()) { + assertTrue(numbers.isArray()); + + // Verify array does NOT contain BOTH 10 AND 20 + boolean has10 = false; + boolean has20 = false; + for (JsonNode num : numbers) { + if (num.asInt() == 10) { + has10 = true; + } + if (num.asInt() == 20) { + has20 = true; } } - assertTrue( - containsTrue, "Array should contain 'true' for item: " + json.get("item").asText()); + assertFalse(has10 && has20); + } + } + + // 8 rows: excludes rows 2 and 8 (have both 10 & 20) + // Includes rows 9, 10 (NULL) because NOT_CONTAINS uses "IS NULL OR NOT (...)" logic + assertEquals(8, count); + } + + @ParameterizedTest + @ArgumentsSource(PostgresProvider.class) + void testContainsOnDoubleArray(String dataStoreName) throws JsonProcessingException { + Datastore datastore = datastoreMap.get(dataStoreName); + Collection flatCollection = + datastore.getCollectionForType(FLAT_COLLECTION_NAME, DocumentType.FLAT); + + Query query = + Query.builder() + .addSelection(IdentifierExpression.of("item")) + .addSelection(ArrayIdentifierExpression.of("scores", ArrayType.DOUBLE_PRECISION)) + .setFilter( + RelationalExpression.of( + ArrayIdentifierExpression.of("scores", ArrayType.DOUBLE_PRECISION), + CONTAINS, + ConstantExpression.ofNumbers(List.of(3.14, 2.71)))) + .build(); + + Iterator results = flatCollection.find(query); + + int count = 0; + while (results.hasNext()) { + Document doc = results.next(); + JsonNode json = new ObjectMapper().readTree(doc.toJson()); + count++; + + JsonNode scores = json.get("scores"); + assertNotNull(scores); + assertTrue(scores.isArray(), "scores should be JSON array, got: " + scores); + + boolean contains314 = false; + boolean contains271 = false; + for (JsonNode score : scores) { + double val = score.asDouble(); + if (val == 3.14) { + contains314 = true; + } + if (val == 2.71) { + contains271 = true; + } } + assertTrue(contains314); + assertTrue(contains271); } - assertTrue(count >= 5, "Should return at least 5 items with 'true' flag"); + assertEquals(1, count); } @ParameterizedTest @@ -5149,7 +5279,7 @@ void testInOnUnnestedArray(String dataStoreName) throws Exception { int count = 0; while (resultIterator.hasNext()) { Document doc = resultIterator.next(); - Assertions.assertNotNull(doc); + assertNotNull(doc); // Parse JSON to extract the unnested value JsonNode json = new ObjectMapper().readTree(doc.toJson()); // The unnested value is aliased as "props.source-loc" @@ -5191,7 +5321,7 @@ void testNotInOnUnnestedArray(String dataStoreName) throws Exception { int count = 0; while (resultIterator.hasNext()) { Document doc = resultIterator.next(); - Assertions.assertNotNull(doc); + assertNotNull(doc); // Parse JSON to extract the unnested value JsonNode json = new ObjectMapper().readTree(doc.toJson()); JsonNode locationNode = json.get("props.source-loc"); @@ -5383,7 +5513,7 @@ void testExistsOnUnnestedArray(String dataStoreName) { int count = 0; while (resultIterator.hasNext()) { Document doc = resultIterator.next(); - Assertions.assertNotNull(doc); + assertNotNull(doc); count++; } assertEquals(6, count); @@ -5418,7 +5548,7 @@ void testNotExistsOnUnnestedArray(String dataStoreName) { int count = 0; while (resultIterator.hasNext()) { Document doc = resultIterator.next(); - Assertions.assertNotNull(doc); + assertNotNull(doc); count++; } assertEquals(7, count); diff --git a/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/PostgresCollection.java b/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/PostgresCollection.java index 890317c9..fa2519bf 100644 --- a/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/PostgresCollection.java +++ b/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/PostgresCollection.java @@ -1395,9 +1395,9 @@ private void addColumnToJsonNode( break; case "_text": // text array - Array array = resultSet.getArray(columnIndex); - if (array != null) { - String[] stringArray = (String[]) array.getArray(); + Array textArray = resultSet.getArray(columnIndex); + if (textArray != null) { + String[] stringArray = (String[]) textArray.getArray(); ArrayNode arrayNode = MAPPER.createArrayNode(); for (String item : stringArray) { arrayNode.add(item); @@ -1406,6 +1406,52 @@ private void addColumnToJsonNode( } break; + case "_int4": // integer array + case "_int8": // bigint array + Array intArray = resultSet.getArray(columnIndex); + if (intArray != null) { + Object[] intObjectArray = (Object[]) intArray.getArray(); + ArrayNode intArrayNode = MAPPER.createArrayNode(); + for (Object item : intObjectArray) { + if (item instanceof Integer) { + intArrayNode.add((Integer) item); + } else if (item instanceof Long) { + intArrayNode.add((Long) item); + } + } + jsonNode.set(columnName, intArrayNode); + } + break; + + case "_float8": // double precision array + case "_float4": // real/float array + Array doubleArray = resultSet.getArray(columnIndex); + if (doubleArray != null) { + Object[] doubleObjectArray = (Object[]) doubleArray.getArray(); + ArrayNode doubleArrayNode = MAPPER.createArrayNode(); + for (Object item : doubleObjectArray) { + if (item instanceof Double) { + doubleArrayNode.add((Double) item); + } else if (item instanceof Float) { + doubleArrayNode.add((Float) item); + } + } + jsonNode.set(columnName, doubleArrayNode); + } + break; + + case "_bool": // boolean array + Array boolArray = resultSet.getArray(columnIndex); + if (boolArray != null) { + Boolean[] boolObjectArray = (Boolean[]) boolArray.getArray(); + ArrayNode boolArrayNode = MAPPER.createArrayNode(); + for (Boolean item : boolObjectArray) { + boolArrayNode.add(item); + } + jsonNode.set(columnName, boolArrayNode); + } + break; + case "jsonb": case "json": String jsonString = resultSet.getString(columnIndex); diff --git a/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/parser/filter/nonjson/field/PostgresContainsRelationalFilterParserNonJsonField.java b/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/parser/filter/nonjson/field/PostgresContainsRelationalFilterParserNonJsonField.java index c330c7de..1db11c49 100644 --- a/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/parser/filter/nonjson/field/PostgresContainsRelationalFilterParserNonJsonField.java +++ b/document-store/src/main/java/org/hypertrace/core/documentstore/postgres/query/v1/parser/filter/nonjson/field/PostgresContainsRelationalFilterParserNonJsonField.java @@ -1,7 +1,9 @@ package org.hypertrace.core.documentstore.postgres.query.v1.parser.filter.nonjson.field; -import java.util.Collection; import java.util.Collections; +import java.util.stream.Collectors; +import java.util.stream.StreamSupport; +import org.hypertrace.core.documentstore.expression.impl.ArrayIdentifierExpression; import org.hypertrace.core.documentstore.expression.impl.RelationalExpression; import org.hypertrace.core.documentstore.postgres.query.v1.parser.filter.PostgresContainsRelationalFilterParserInterface; import org.hypertrace.core.documentstore.postgres.query.v1.parser.filter.PostgresRelationalFilterParser; @@ -22,17 +24,51 @@ public String parse( final String parsedLhs = expression.getLhs().accept(context.lhsParser()); final Object parsedRhs = expression.getRhs().accept(context.rhsParser()); - Object normalizedRhs = normalizeValue(parsedRhs); - context.getParamsBuilder().addObjectParam(normalizedRhs); + // Normalize to an Iterable (single value becomes a singleton list) + Iterable values = normalizeToIterable(parsedRhs); - return String.format("%s @> ARRAY[?]::text[]", parsedLhs); + // Add each value as an individual parameter (same as IN operator) + String placeholders = + StreamSupport.stream(values.spliterator(), false) + .map( + value -> { + context.getParamsBuilder().addObjectParam(value); + return "?"; + }) + .collect(Collectors.joining(", ")); + + // Check if this field has been unnested - if so, it's now a scalar, not an array + // For ArrayIdentifierExpression, get the field name + if (expression.getLhs() instanceof ArrayIdentifierExpression) { + ArrayIdentifierExpression arrayExpr = (ArrayIdentifierExpression) expression.getLhs(); + String fieldName = arrayExpr.getName(); + if (context.getPgColumnNames().containsKey(fieldName)) { + // Field is unnested - each element is now a scalar + // Use scalar IN operator: the scalar must be IN the set of values we're looking for + return String.format("%s IN (%s)", parsedLhs, placeholders); + } + } + + // Field is NOT unnested - use array containment operator + String arrayTypeCast = expression.getLhs().accept(new PostgresArrayTypeExtractor()); + + // Use ARRAY[?, ?, ...] syntax with appropriate type cast + if (arrayTypeCast != null && arrayTypeCast.equals("text[]")) { + return String.format("%s @> ARRAY[%s]::text[]", parsedLhs, placeholders); + } else if (arrayTypeCast != null) { + // INTEGER/BOOLEAN/DOUBLE arrays: Use the correct type cast + return String.format("%s @> ARRAY[%s]::%s", parsedLhs, placeholders, arrayTypeCast); + } else { + // Fallback: use text[] cast + return String.format("%s @> ARRAY[%s]::text[]", parsedLhs, placeholders); + } } - private Object normalizeValue(final Object value) { + private Iterable normalizeToIterable(final Object value) { if (value == null) { - return null; - } else if (value instanceof Collection) { - return value; + return Collections.emptyList(); + } else if (value instanceof Iterable) { + return (Iterable) value; } else { return Collections.singletonList(value); } diff --git a/document-store/src/test/java/org/hypertrace/core/documentstore/postgres/query/v1/PostgresQueryParserTest.java b/document-store/src/test/java/org/hypertrace/core/documentstore/postgres/query/v1/PostgresQueryParserTest.java index 64adc4da..402f073a 100644 --- a/document-store/src/test/java/org/hypertrace/core/documentstore/postgres/query/v1/PostgresQueryParserTest.java +++ b/document-store/src/test/java/org/hypertrace/core/documentstore/postgres/query/v1/PostgresQueryParserTest.java @@ -1556,7 +1556,7 @@ void testContainsWithFlatCollectionNonJsonField() { Params params = postgresQueryParser.getParamsBuilder().build(); assertEquals(1, params.getObjectParams().size()); - assertEquals(List.of("java"), params.getObjectParams().get(1)); + assertEquals("java", params.getObjectParams().get(1)); } @Nested @@ -1795,9 +1795,6 @@ void testFlatCollectionWithHyphenatedJsonbArrayFieldInUnnest() { String sql = postgresQueryParser.parse(); - // The key assertion: the alias in the LATERAL join must be quoted - // CORRECT: p1("customAttribute_dot_dev-ops-owner") - // INCORRECT: p1(customAttribute_dot_dev-ops-owner) <- causes PostgreSQL syntax error String expectedSql = "With \n" + "table0 as (SELECT * from \"testCollection\"),\n" From 18bf3171cd163389af54da4d7262cf1b601f17a2 Mon Sep 17 00:00:00 2001 From: Prashant Pandey Date: Wed, 26 Nov 2025 09:46:55 +0530 Subject: [PATCH 9/9] Fix failing tests --- .../postgres/query/v1/PostgresQueryParserTest.java | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/document-store/src/test/java/org/hypertrace/core/documentstore/postgres/query/v1/PostgresQueryParserTest.java b/document-store/src/test/java/org/hypertrace/core/documentstore/postgres/query/v1/PostgresQueryParserTest.java index 402f073a..9c24380c 100644 --- a/document-store/src/test/java/org/hypertrace/core/documentstore/postgres/query/v1/PostgresQueryParserTest.java +++ b/document-store/src/test/java/org/hypertrace/core/documentstore/postgres/query/v1/PostgresQueryParserTest.java @@ -1442,7 +1442,7 @@ void testNotContainsWithFlatCollectionNonJsonField() { Query.builder() .setFilter( RelationalExpression.of( - IdentifierExpression.of("tags"), + ArrayIdentifierExpression.of("tags", ArrayType.TEXT), NOT_CONTAINS, ConstantExpression.of("premium"))) .build(); @@ -1460,7 +1460,7 @@ void testNotContainsWithFlatCollectionNonJsonField() { Params params = postgresQueryParser.getParamsBuilder().build(); assertEquals(1, params.getObjectParams().size()); - assertEquals(List.of("premium"), params.getObjectParams().get(1)); + assertEquals("premium", params.getObjectParams().get(1)); } @Test @@ -1542,7 +1542,9 @@ void testContainsWithFlatCollectionNonJsonField() { Query.builder() .setFilter( RelationalExpression.of( - IdentifierExpression.of("keywords"), CONTAINS, ConstantExpression.of("java"))) + ArrayIdentifierExpression.of("keywords", ArrayType.TEXT), + CONTAINS, + ConstantExpression.of("java"))) .build(); PostgresQueryParser postgresQueryParser =