From d62de706ed4a43f586f08e533e633d50f335909e Mon Sep 17 00:00:00 2001 From: Daniel Frankcom Date: Wed, 20 May 2026 09:25:19 -0700 Subject: [PATCH 1/8] Add collation tests Signed-off-by: Daniel Frankcom --- .../tests/core/collation/__init__.py | 0 .../test_collation_aggregate_accumulators.py | 356 ++++++++ .../test_collation_aggregate_bucket.py | 197 +++++ .../test_collation_aggregate_expressions.py | 760 ++++++++++++++++++ .../test_collation_aggregate_geonear.py | 117 +++ .../test_collation_aggregate_graphlookup.py | 212 +++++ .../test_collation_aggregate_group.py | 411 ++++++++++ .../test_collation_aggregate_lookup.py | 255 ++++++ .../test_collation_aggregate_match.py | 393 +++++++++ .../test_collation_aggregate_merge_out.py | 394 +++++++++ .../test_collation_aggregate_propagation.py | 216 +++++ .../test_collation_aggregate_replaceroot.py | 109 +++ .../test_collation_aggregate_resolution.py | 384 +++++++++ .../test_collation_aggregate_sort_stages.py | 348 ++++++++ .../test_collation_aggregate_substages.py | 296 +++++++ .../test_collation_aggregate_unionwith.py | 154 ++++ .../test_collation_aggregate_views.py | 314 ++++++++ .../test_collation_aggregate_window.py | 613 ++++++++++++++ .../collation/test_collation_alternate.py | 347 ++++++++ .../collation/test_collation_backwards.py | 210 +++++ .../collation/test_collation_bulk_write.py | 174 ++++ .../collation/test_collation_casefirst.py | 296 +++++++ .../collation/test_collation_caselevel.py | 191 +++++ .../core/collation/test_collation_count.py | 121 +++ .../core/collation/test_collation_delete.py | 230 ++++++ .../core/collation/test_collation_distinct.py | 158 ++++ .../collation/test_collation_dotted_paths.py | 280 +++++++ .../collation/test_collation_edge_cases.py | 168 ++++ .../collation/test_collation_expr_filter.py | 214 +++++ .../core/collation/test_collation_find.py | 577 +++++++++++++ .../test_collation_find_and_modify.py | 314 ++++++++ ...test_collation_find_and_modify_pipeline.py | 153 ++++ .../test_collation_find_query_operators.py | 383 +++++++++ .../collation/test_collation_find_text.py | 89 ++ .../core/collation/test_collation_id_field.py | 198 +++++ .../core/collation/test_collation_index.py | 521 ++++++++++++ .../test_collation_index_multikey.py | 204 +++++ .../collation/test_collation_index_partial.py | 105 +++ .../collation/test_collation_let_variables.py | 166 ++++ .../core/collation/test_collation_locale.py | 69 ++ .../test_collation_locale_defaults.py | 254 ++++++ .../test_collation_locale_specific.py | 223 +++++ .../test_collation_long_strings_hint.py | 182 +++++ .../core/collation/test_collation_metadata.py | 130 +++ .../collation/test_collation_non_string.py | 151 ++++ .../collation/test_collation_normalization.py | 171 ++++ .../test_collation_numeric_ordering.py | 427 ++++++++++ .../collation/test_collation_projection.py | 113 +++ .../test_collation_schema_validation.py | 138 ++++ .../core/collation/test_collation_strength.py | 283 +++++++ .../test_collation_supplementary_unicode.py | 199 +++++ .../core/collation/test_collation_update.py | 273 +++++++ .../test_collation_update_array_filters.py | 169 ++++ .../test_collation_update_array_ops.py | 223 +++++ .../test_collation_update_field_ops.py | 196 +++++ .../test_collation_update_pipeline.py | 205 +++++ .../tests/core/collation/utils/__init__.py | 0 .../utils/collation_view_mismatch.py | 118 +++ .../commands/utils/command_test_case.py | 19 +- documentdb_tests/framework/assertions.py | 6 + documentdb_tests/framework/error_codes.py | 10 + .../framework/target_collection.py | 68 +- 62 files changed, 14234 insertions(+), 21 deletions(-) create mode 100644 documentdb_tests/compatibility/tests/core/collation/__init__.py create mode 100644 documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_accumulators.py create mode 100644 documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_bucket.py create mode 100644 documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_expressions.py create mode 100644 documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_geonear.py create mode 100644 documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_graphlookup.py create mode 100644 documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_group.py create mode 100644 documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_lookup.py create mode 100644 documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_match.py create mode 100644 documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_merge_out.py create mode 100644 documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_propagation.py create mode 100644 documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_replaceroot.py create mode 100644 documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_resolution.py create mode 100644 documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_sort_stages.py create mode 100644 documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_substages.py create mode 100644 documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_unionwith.py create mode 100644 documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_views.py create mode 100644 documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_window.py create mode 100644 documentdb_tests/compatibility/tests/core/collation/test_collation_alternate.py create mode 100644 documentdb_tests/compatibility/tests/core/collation/test_collation_backwards.py create mode 100644 documentdb_tests/compatibility/tests/core/collation/test_collation_bulk_write.py create mode 100644 documentdb_tests/compatibility/tests/core/collation/test_collation_casefirst.py create mode 100644 documentdb_tests/compatibility/tests/core/collation/test_collation_caselevel.py create mode 100644 documentdb_tests/compatibility/tests/core/collation/test_collation_count.py create mode 100644 documentdb_tests/compatibility/tests/core/collation/test_collation_delete.py create mode 100644 documentdb_tests/compatibility/tests/core/collation/test_collation_distinct.py create mode 100644 documentdb_tests/compatibility/tests/core/collation/test_collation_dotted_paths.py create mode 100644 documentdb_tests/compatibility/tests/core/collation/test_collation_edge_cases.py create mode 100644 documentdb_tests/compatibility/tests/core/collation/test_collation_expr_filter.py create mode 100644 documentdb_tests/compatibility/tests/core/collation/test_collation_find.py create mode 100644 documentdb_tests/compatibility/tests/core/collation/test_collation_find_and_modify.py create mode 100644 documentdb_tests/compatibility/tests/core/collation/test_collation_find_and_modify_pipeline.py create mode 100644 documentdb_tests/compatibility/tests/core/collation/test_collation_find_query_operators.py create mode 100644 documentdb_tests/compatibility/tests/core/collation/test_collation_find_text.py create mode 100644 documentdb_tests/compatibility/tests/core/collation/test_collation_id_field.py create mode 100644 documentdb_tests/compatibility/tests/core/collation/test_collation_index.py create mode 100644 documentdb_tests/compatibility/tests/core/collation/test_collation_index_multikey.py create mode 100644 documentdb_tests/compatibility/tests/core/collation/test_collation_index_partial.py create mode 100644 documentdb_tests/compatibility/tests/core/collation/test_collation_let_variables.py create mode 100644 documentdb_tests/compatibility/tests/core/collation/test_collation_locale.py create mode 100644 documentdb_tests/compatibility/tests/core/collation/test_collation_locale_defaults.py create mode 100644 documentdb_tests/compatibility/tests/core/collation/test_collation_locale_specific.py create mode 100644 documentdb_tests/compatibility/tests/core/collation/test_collation_long_strings_hint.py create mode 100644 documentdb_tests/compatibility/tests/core/collation/test_collation_metadata.py create mode 100644 documentdb_tests/compatibility/tests/core/collation/test_collation_non_string.py create mode 100644 documentdb_tests/compatibility/tests/core/collation/test_collation_normalization.py create mode 100644 documentdb_tests/compatibility/tests/core/collation/test_collation_numeric_ordering.py create mode 100644 documentdb_tests/compatibility/tests/core/collation/test_collation_projection.py create mode 100644 documentdb_tests/compatibility/tests/core/collation/test_collation_schema_validation.py create mode 100644 documentdb_tests/compatibility/tests/core/collation/test_collation_strength.py create mode 100644 documentdb_tests/compatibility/tests/core/collation/test_collation_supplementary_unicode.py create mode 100644 documentdb_tests/compatibility/tests/core/collation/test_collation_update.py create mode 100644 documentdb_tests/compatibility/tests/core/collation/test_collation_update_array_filters.py create mode 100644 documentdb_tests/compatibility/tests/core/collation/test_collation_update_array_ops.py create mode 100644 documentdb_tests/compatibility/tests/core/collation/test_collation_update_field_ops.py create mode 100644 documentdb_tests/compatibility/tests/core/collation/test_collation_update_pipeline.py create mode 100644 documentdb_tests/compatibility/tests/core/collation/utils/__init__.py create mode 100644 documentdb_tests/compatibility/tests/core/collation/utils/collation_view_mismatch.py diff --git a/documentdb_tests/compatibility/tests/core/collation/__init__.py b/documentdb_tests/compatibility/tests/core/collation/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_accumulators.py b/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_accumulators.py new file mode 100644 index 000000000..5640cdd4e --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_accumulators.py @@ -0,0 +1,356 @@ +"""Tests for collation effects on $top, $bottom, $topN, $bottomN, $minN, $maxN accumulators.""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [$top Accumulator Collation]: $top uses command-level collation for +# its sortBy comparison, returning the document with the smallest value under +# collation ordering. +COLLATION_TOP_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "top_sortby_numeric_ordering", + docs=[ + {"_id": 1, "x": "file10"}, + {"_id": 2, "x": "file2"}, + {"_id": 3, "x": "file1"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + { + "$group": { + "_id": None, + "result": {"$top": {"sortBy": {"x": 1}, "output": "$x"}}, + } + }, + ], + "cursor": {}, + "collation": {"locale": "en", "numericOrdering": True}, + }, + expected=[{"_id": None, "result": "file1"}], + msg="$top sortBy should use collation numericOrdering", + ), + CommandTestCase( + "top_sortby_case_insensitive", + docs=[ + {"_id": 1, "x": "Banana"}, + {"_id": 2, "x": "apple"}, + {"_id": 3, "x": "cherry"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + { + "$group": { + "_id": None, + "result": {"$top": {"sortBy": {"x": 1}, "output": "$x"}}, + } + }, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[{"_id": None, "result": "apple"}], + msg="$top sortBy should use collation for case-insensitive ordering", + ), +] + +# Property [$bottom Accumulator Collation]: $bottom uses command-level collation +# for its sortBy comparison, returning the document with the largest value under +# collation ordering. +COLLATION_BOTTOM_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "bottom_sortby_numeric_ordering", + docs=[ + {"_id": 1, "x": "file10"}, + {"_id": 2, "x": "file2"}, + {"_id": 3, "x": "file1"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + { + "$group": { + "_id": None, + "result": {"$bottom": {"sortBy": {"x": 1}, "output": "$x"}}, + } + }, + ], + "cursor": {}, + "collation": {"locale": "en", "numericOrdering": True}, + }, + expected=[{"_id": None, "result": "file10"}], + msg="$bottom sortBy should use collation numericOrdering", + ), + CommandTestCase( + "bottom_sortby_case_insensitive", + docs=[ + {"_id": 1, "x": "Banana"}, + {"_id": 2, "x": "apple"}, + {"_id": 3, "x": "cherry"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + { + "$group": { + "_id": None, + "result": {"$bottom": {"sortBy": {"x": 1}, "output": "$x"}}, + } + }, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[{"_id": None, "result": "cherry"}], + msg="$bottom sortBy should use collation for case-insensitive ordering", + ), +] + +# Property [$topN Accumulator Collation]: $topN uses command-level collation for +# its sortBy comparison, returning the N documents with the smallest values +# under collation ordering. +COLLATION_TOPN_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "topn_sortby_numeric_ordering", + docs=[ + {"_id": 1, "x": "file10"}, + {"_id": 2, "x": "file2"}, + {"_id": 3, "x": "file1"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + { + "$group": { + "_id": None, + "result": { + "$topN": {"n": 2, "sortBy": {"x": 1}, "output": "$x"}, + }, + } + }, + ], + "cursor": {}, + "collation": {"locale": "en", "numericOrdering": True}, + }, + expected=[{"_id": None, "result": ["file1", "file2"]}], + msg="$topN sortBy should use collation numericOrdering", + ), + CommandTestCase( + "topn_sortby_case_insensitive", + docs=[ + {"_id": 1, "x": "a"}, + {"_id": 2, "x": "B"}, + {"_id": 3, "x": "c"}, + {"_id": 4, "x": "D"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + { + "$group": { + "_id": None, + "result": { + "$topN": {"n": 2, "sortBy": {"x": 1}, "output": "$x"}, + }, + } + }, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[{"_id": None, "result": ["a", "B"]}], + msg="$topN sortBy with strength 1 should use linguistic ordering", + ), +] + +# Property [$bottomN Accumulator Collation]: $bottomN uses command-level +# collation for its sortBy comparison, returning the N documents with the +# largest values under collation ordering. +COLLATION_BOTTOMN_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "bottomn_sortby_numeric_ordering", + docs=[ + {"_id": 1, "x": "file10"}, + {"_id": 2, "x": "file2"}, + {"_id": 3, "x": "file1"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + { + "$group": { + "_id": None, + "result": { + "$bottomN": {"n": 2, "sortBy": {"x": 1}, "output": "$x"}, + }, + } + }, + ], + "cursor": {}, + "collation": {"locale": "en", "numericOrdering": True}, + }, + expected=[{"_id": None, "result": ["file2", "file10"]}], + msg="$bottomN sortBy should use collation numericOrdering", + ), + CommandTestCase( + "bottomn_sortby_case_insensitive", + docs=[ + {"_id": 1, "x": "a"}, + {"_id": 2, "x": "B"}, + {"_id": 3, "x": "c"}, + {"_id": 4, "x": "D"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + { + "$group": { + "_id": None, + "result": { + "$bottomN": {"n": 2, "sortBy": {"x": 1}, "output": "$x"}, + }, + } + }, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[{"_id": None, "result": ["c", "D"]}], + msg="$bottomN sortBy with strength 1 should use linguistic ordering", + ), +] + +# Property [$minN Accumulator Collation]: $minN uses command-level collation for +# string comparisons, returning the N smallest values under collation ordering. +COLLATION_MINN_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "minn_numeric_ordering", + docs=[ + {"_id": 1, "x": "file10"}, + {"_id": 2, "x": "file2"}, + {"_id": 3, "x": "file1"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + { + "$group": { + "_id": None, + "result": {"$minN": {"n": 2, "input": "$x"}}, + } + }, + ], + "cursor": {}, + "collation": {"locale": "en", "numericOrdering": True}, + }, + expected=[{"_id": None, "result": ["file1", "file2"]}], + msg="$minN should use collation numericOrdering for string comparison", + ), + CommandTestCase( + "minn_case_insensitive", + docs=[ + {"_id": 1, "x": "Cherry"}, + {"_id": 2, "x": "apple"}, + {"_id": 3, "x": "Banana"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + { + "$group": { + "_id": None, + "result": {"$minN": {"n": 2, "input": "$x"}}, + } + }, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[{"_id": None, "result": ["apple", "Banana"]}], + msg="$minN should use collation for case-insensitive ordering", + ), +] + +# Property [$maxN Accumulator Collation]: $maxN uses command-level collation for +# string comparisons, returning the N largest values under collation ordering. +COLLATION_MAXN_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "maxn_numeric_ordering", + docs=[ + {"_id": 1, "x": "file10"}, + {"_id": 2, "x": "file2"}, + {"_id": 3, "x": "file1"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + { + "$group": { + "_id": None, + "result": {"$maxN": {"n": 2, "input": "$x"}}, + } + }, + ], + "cursor": {}, + "collation": {"locale": "en", "numericOrdering": True}, + }, + expected=[{"_id": None, "result": ["file10", "file2"]}], + msg="$maxN should use collation numericOrdering for string comparison", + ), + CommandTestCase( + "maxn_case_insensitive", + docs=[ + {"_id": 1, "x": "Cherry"}, + {"_id": 2, "x": "apple"}, + {"_id": 3, "x": "Banana"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + { + "$group": { + "_id": None, + "result": {"$maxN": {"n": 2, "input": "$x"}}, + } + }, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[{"_id": None, "result": ["Cherry", "Banana"]}], + msg="$maxN should use collation for case-insensitive ordering", + ), +] + +COLLATION_AGGREGATE_ACCUMULATOR_TESTS: list[CommandTestCase] = ( + COLLATION_TOP_TESTS + + COLLATION_BOTTOM_TESTS + + COLLATION_TOPN_TESTS + + COLLATION_BOTTOMN_TESTS + + COLLATION_MINN_TESTS + + COLLATION_MAXN_TESTS +) + + +@pytest.mark.parametrize("test", pytest_params(COLLATION_AGGREGATE_ACCUMULATOR_TESTS)) +def test_collation_aggregate_accumulators(database_client, collection, test): + """Test collation effects on $top, $bottom, $topN, $bottomN, $minN, $maxN.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_bucket.py b/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_bucket.py new file mode 100644 index 000000000..02b850d79 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_bucket.py @@ -0,0 +1,197 @@ +"""Tests for collation effects on bucket and bucketAuto stages.""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.error_codes import BUCKET_BOUNDARIES_NOT_SORTED_ERROR +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Bucket Boundary Grouping]: collation affects $bucket boundary +# comparisons so that strings collation-equal to a boundary are grouped into +# the corresponding bucket. +COLLATION_BUCKET_GROUPING_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "bucket_strength1_case_variants_grouped", + docs=[ + {"_id": 1, "x": "Apple"}, + {"_id": 2, "x": "APPLE"}, + {"_id": 3, "x": "banana"}, + {"_id": 4, "x": "cherry"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + { + "$bucket": { + "groupBy": "$x", + "boundaries": ["apple", "banana", "cherry", "date"], + "default": "other", + } + } + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[ + {"_id": "apple", "count": 2}, + {"_id": "banana", "count": 1}, + {"_id": "cherry", "count": 1}, + ], + msg="$bucket with strength 1 should group case variants into the matching boundary bucket", + ), + CommandTestCase( + "bucket_no_collation_case_variants_to_default", + docs=[ + {"_id": 1, "x": "Apple"}, + {"_id": 2, "x": "APPLE"}, + {"_id": 3, "x": "banana"}, + {"_id": 4, "x": "cherry"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + { + "$bucket": { + "groupBy": "$x", + "boundaries": ["apple", "banana", "cherry", "date"], + "default": "other", + } + } + ], + "cursor": {}, + }, + # Binary comparison: 'A' < 'a', so 'Apple' and 'APPLE' fall below 'apple' + # boundary and go to default. + expected=[ + {"_id": "banana", "count": 1}, + {"_id": "cherry", "count": 1}, + {"_id": "other", "count": 2}, + ], + msg="$bucket without collation should use binary comparison for boundaries", + ), +] + +# Property [Bucket Collation-Equal Boundaries Error]: boundaries that are +# collation-equal produce BUCKET_BOUNDARIES_NOT_SORTED_ERROR because boundary +# validation uses the command-level collation. +COLLATION_BUCKET_BOUNDARY_ERROR_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "bucket_collation_equal_boundaries_case", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + { + "$bucket": { + "groupBy": "$x", + "boundaries": ["a", "A", "b"], + "default": "other", + } + } + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + error_code=BUCKET_BOUNDARIES_NOT_SORTED_ERROR, + msg="$bucket should reject boundaries that are collation-equal at strength 1", + ), + CommandTestCase( + "bucket_collation_equal_boundaries_diacritics", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + { + "$bucket": { + "groupBy": "$x", + "boundaries": ["cafe", "caf\u00e9", "z"], + "default": "other", + } + } + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + error_code=BUCKET_BOUNDARIES_NOT_SORTED_ERROR, + msg=( + "$bucket should reject boundaries that are collation-equal" + " due to diacritics at strength 1" + ), + ), +] + +# Property [BucketAuto Boundary Comparisons]: collation affects $bucketAuto +# boundary comparisons so that collation-equal strings are placed in the same +# automatically created bucket. +COLLATION_BUCKET_AUTO_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "bucket_auto_strength1_case_variants_same_bucket", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + {"_id": 4, "x": "cherry"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$bucketAuto": {"groupBy": "$x", "buckets": 3}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[ + {"_id": {"min": "apple", "max": "banana"}, "count": 2}, + {"_id": {"min": "banana", "max": "cherry"}, "count": 1}, + {"_id": {"min": "cherry", "max": "cherry"}, "count": 1}, + ], + msg="$bucketAuto with strength 1 should place case variants in the same bucket", + ), + CommandTestCase( + "bucket_auto_no_collation_case_variants_separate", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + {"_id": 4, "x": "cherry"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$bucketAuto": {"groupBy": "$x", "buckets": 3}}], + "cursor": {}, + }, + # Binary comparison: 'A' < 'a', so 'Apple' is in a separate bucket from + # 'apple'. + expected=[ + {"_id": {"min": "Apple", "max": "apple"}, "count": 1}, + {"_id": {"min": "apple", "max": "banana"}, "count": 1}, + {"_id": {"min": "banana", "max": "cherry"}, "count": 2}, + ], + msg="$bucketAuto without collation should use binary comparison for boundaries", + ), +] + +COLLATION_AGGREGATE_BUCKET_TESTS: list[CommandTestCase] = ( + COLLATION_BUCKET_GROUPING_TESTS + + COLLATION_BUCKET_BOUNDARY_ERROR_TESTS + + COLLATION_BUCKET_AUTO_TESTS +) + + +@pytest.mark.parametrize("test", pytest_params(COLLATION_AGGREGATE_BUCKET_TESTS)) +def test_collation_aggregate_bucket(database_client, collection, test): + """Test collation effects on $bucket stage boundary comparisons.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_expressions.py b/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_expressions.py new file mode 100644 index 000000000..34cacf9ad --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_expressions.py @@ -0,0 +1,760 @@ +"""Tests for collation effects on expression operators in aggregate.""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Expression Operators Affected by Collation]: comparison and set +# expression operators use command-level collation for string comparisons. +COLLATION_EXPR_OPS_AFFECTED_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "expr_eq_affected", + docs=[{"_id": 1, "a": "apple", "b": "Apple"}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$project": {"result": {"$eq": ["$a", "$b"]}}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[{"_id": 1, "result": True}], + msg="$eq expression should use collation for case-insensitive comparison", + ), + CommandTestCase( + "expr_ne_affected", + docs=[{"_id": 1, "a": "apple", "b": "Apple"}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$project": {"result": {"$ne": ["$a", "$b"]}}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[{"_id": 1, "result": False}], + msg="$ne expression should use collation for case-insensitive comparison", + ), + CommandTestCase( + "expr_gt_affected", + docs=[{"_id": 1, "a": "apple", "b": "Apple"}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$project": {"result": {"$gt": ["$a", "$b"]}}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[{"_id": 1, "result": False}], + msg="$gt expression should use collation for case-insensitive comparison", + ), + CommandTestCase( + "expr_gte_affected", + docs=[{"_id": 1, "a": "apple", "b": "Apple"}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$project": {"result": {"$gte": ["$a", "$b"]}}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[{"_id": 1, "result": True}], + msg="$gte expression should use collation for case-insensitive comparison", + ), + CommandTestCase( + "expr_lt_affected", + docs=[{"_id": 1, "a": "apple", "b": "Apple"}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$project": {"result": {"$lt": ["$a", "$b"]}}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[{"_id": 1, "result": False}], + msg="$lt expression should use collation for case-insensitive comparison", + ), + CommandTestCase( + "expr_lte_affected", + docs=[{"_id": 1, "a": "apple", "b": "Apple"}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$project": {"result": {"$lte": ["$a", "$b"]}}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[{"_id": 1, "result": True}], + msg="$lte expression should use collation for case-insensitive comparison", + ), + CommandTestCase( + "expr_cmp_affected", + docs=[{"_id": 1, "a": "apple", "b": "Apple"}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$project": {"result": {"$cmp": ["$a", "$b"]}}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[{"_id": 1, "result": 0}], + msg="$cmp expression should use collation for case-insensitive comparison", + ), + CommandTestCase( + "expr_in_affected", + docs=[{"_id": 1, "arr": ["Apple", "Banana", "Cherry"]}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$project": {"result": {"$in": ["apple", "$arr"]}}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[{"_id": 1, "result": True}], + msg="$in expression should use collation for case-insensitive membership test", + ), + CommandTestCase( + "expr_indexofarray_affected", + docs=[{"_id": 1, "arr": ["Apple", "Banana", "Cherry"]}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$project": {"result": {"$indexOfArray": ["$arr", "apple"]}}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[{"_id": 1, "result": 0}], + msg="$indexOfArray should use collation for case-insensitive search", + ), + CommandTestCase( + "expr_setequals_affected", + docs=[{"_id": 1, "arr": ["Apple", "Banana"]}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$project": {"result": {"$setEquals": ["$arr", ["apple", "banana"]]}}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[{"_id": 1, "result": True}], + msg="$setEquals should use collation for case-insensitive set comparison", + ), + CommandTestCase( + "expr_setintersection_affected", + docs=[{"_id": 1, "a": ["Apple", "Banana"], "b": ["apple", "cherry"]}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$project": {"result": {"$setIntersection": ["$a", "$b"]}}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[{"_id": 1, "result": ["Apple"]}], + msg="$setIntersection should use collation for case-insensitive intersection", + ), + CommandTestCase( + "expr_setdifference_affected", + docs=[{"_id": 1, "a": ["Apple", "Banana"], "b": ["apple"]}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$project": {"result": {"$setDifference": ["$a", "$b"]}}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[{"_id": 1, "result": ["Banana"]}], + msg="$setDifference should use collation for case-insensitive difference", + ), + CommandTestCase( + "expr_setunion_affected", + docs=[{"_id": 1, "a": ["Apple", "Banana"], "b": ["apple", "cherry"]}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$project": {"result": {"$setUnion": ["$a", "$b"]}}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[{"_id": 1, "result": ["Apple", "Banana", "cherry"]}], + msg="$setUnion should use collation to deduplicate case variants", + ), + CommandTestCase( + "expr_setissubset_affected", + docs=[{"_id": 1, "arr": ["Apple", "Banana"]}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$project": {"result": {"$setIsSubset": [["apple", "banana"], "$arr"]}}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[{"_id": 1, "result": True}], + msg="$setIsSubset should use collation for case-insensitive subset check", + ), + CommandTestCase( + "expr_filter_affected", + docs=[{"_id": 1, "items": ["Apple", "banana", "apple"]}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + { + "$project": { + "result": { + "$filter": { + "input": "$items", + "as": "item", + "cond": {"$eq": ["$$item", "apple"]}, + } + } + } + } + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[{"_id": 1, "result": ["Apple", "apple"]}], + msg="$filter should use collation in its condition expression", + ), + CommandTestCase( + "expr_reduce_affected", + docs=[{"_id": 1, "items": ["Apple", "Banana", "Cherry"]}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + { + "$project": { + "result": { + "$reduce": { + "input": "$items", + "initialValue": 0, + "in": { + "$cond": { + "if": {"$eq": ["$$this", "apple"]}, + "then": {"$add": ["$$value", 1]}, + "else": "$$value", + } + }, + } + } + } + } + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[{"_id": 1, "result": 1}], + msg="$reduce should use collation in its body expression", + ), + CommandTestCase( + "expr_map_affected", + docs=[{"_id": 1, "items": ["Apple", "Banana", "Cherry"]}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + { + "$project": { + "result": { + "$map": { + "input": "$items", + "as": "item", + "in": {"$eq": ["$$item", "apple"]}, + } + } + } + } + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[{"_id": 1, "result": [True, False, False]}], + msg="$map should use collation in its body expression", + ), + CommandTestCase( + "expr_cond_affected", + docs=[{"_id": 1, "x": "Apple"}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + { + "$project": { + "result": { + "$cond": { + "if": {"$eq": ["$x", "apple"]}, + "then": "matched", + "else": "no match", + } + } + } + } + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[{"_id": 1, "result": "matched"}], + msg="$cond should use collation in its condition expression", + ), + CommandTestCase( + "expr_switch_affected", + docs=[{"_id": 1, "x": "Apple"}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + { + "$project": { + "result": { + "$switch": { + "branches": [ + { + "case": {"$eq": ["$x", "apple"]}, + "then": "matched", + } + ], + "default": "no match", + } + } + } + } + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[{"_id": 1, "result": "matched"}], + msg="$switch should use collation in its branch condition expressions", + ), +] + +# Property [Expression Operators Not Affected by Collation]: string +# manipulation and regex operators always use binary comparison regardless of +# command-level collation. +COLLATION_EXPR_OPS_NOT_AFFECTED_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "expr_indexofbytes_not_affected", + docs=[{"_id": 1, "x": "Hello"}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$project": {"result": {"$indexOfBytes": ["$x", "hello"]}}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[{"_id": 1, "result": -1}], + msg="$indexOfBytes should not use collation for string search", + ), + CommandTestCase( + "expr_indexofcp_not_affected", + docs=[{"_id": 1, "x": "Hello"}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$project": {"result": {"$indexOfCP": ["$x", "hello"]}}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[{"_id": 1, "result": -1}], + msg="$indexOfCP should not use collation for string search", + ), + CommandTestCase( + "expr_regexmatch_not_affected", + docs=[{"_id": 1, "x": "Hello"}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$project": {"result": {"$regexMatch": {"input": "$x", "regex": "hello"}}}} + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[{"_id": 1, "result": False}], + msg="$regexMatch should not use collation for pattern matching", + ), + CommandTestCase( + "expr_regexfind_not_affected", + docs=[{"_id": 1, "x": "Hello"}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$project": {"result": {"$regexFind": {"input": "$x", "regex": "hello"}}}} + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[{"_id": 1, "result": None}], + msg="$regexFind should not use collation for pattern matching", + ), + CommandTestCase( + "expr_regexfindall_not_affected", + docs=[{"_id": 1, "x": "Hello"}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$project": {"result": {"$regexFindAll": {"input": "$x", "regex": "hello"}}}} + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[{"_id": 1, "result": []}], + msg="$regexFindAll should not use collation for pattern matching", + ), + CommandTestCase( + "expr_replaceone_not_affected", + docs=[{"_id": 1, "x": "Hello World"}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + { + "$project": { + "result": { + "$replaceOne": { + "input": "$x", + "find": "hello", + "replacement": "hi", + } + } + } + } + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[{"_id": 1, "result": "Hello World"}], + msg="$replaceOne should not use collation for find matching", + ), + CommandTestCase( + "expr_replaceall_not_affected", + docs=[{"_id": 1, "x": "Hello Hello"}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + { + "$project": { + "result": { + "$replaceAll": { + "input": "$x", + "find": "hello", + "replacement": "hi", + } + } + } + } + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[{"_id": 1, "result": "Hello Hello"}], + msg="$replaceAll should not use collation for find matching", + ), + CommandTestCase( + "expr_strcasecmp_not_affected", + docs=[{"_id": 1, "a": "cafe", "b": "caf\u00e9"}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$project": {"result": {"$strcasecmp": ["$a", "$b"]}}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + # $strcasecmp only folds case, not diacritics. Collation at strength 1 + # would treat "cafe" and "caf\u00e9" as equal, but $strcasecmp should not. + expected=[{"_id": 1, "result": -1}], + msg="$strcasecmp should not be affected by collation", + ), + CommandTestCase( + "expr_split_not_affected", + docs=[{"_id": 1, "x": "Hello World"}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$project": {"result": {"$split": ["$x", "hello"]}}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[{"_id": 1, "result": ["Hello World"]}], + msg="$split should not use collation for delimiter matching", + ), + CommandTestCase( + "expr_tolower_not_affected", + docs=[{"_id": 1, "x": "I"}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$project": {"result": {"$toLower": "$x"}}}], + "cursor": {}, + "collation": {"locale": "tr", "strength": 1}, + }, + # Turkish locale would map I -> \u0131 (dotless i), but $toLower uses + # simple Unicode folding (I -> i), proving collation is ignored. + expected=[{"_id": 1, "result": "i"}], + msg="$toLower should perform simple Unicode case folding regardless of collation locale", + ), + CommandTestCase( + "expr_toupper_not_affected", + docs=[{"_id": 1, "x": "i"}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$project": {"result": {"$toUpper": "$x"}}}], + "cursor": {}, + "collation": {"locale": "tr", "strength": 1}, + }, + # Turkish locale would map i -> \u0130 (I with dot above), but $toUpper + # uses simple Unicode folding (i -> I), proving collation is ignored. + expected=[{"_id": 1, "result": "I"}], + msg="$toUpper should perform simple Unicode case folding regardless of collation locale", + ), + CommandTestCase( + "expr_concat_not_affected", + docs=[{"_id": 1, "a": "Hello", "b": "World"}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$project": {"result": {"$concat": ["$a", "$b"]}}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[{"_id": 1, "result": "HelloWorld"}], + msg="$concat should concatenate without collation influence", + ), + CommandTestCase( + "expr_substrbytes_not_affected", + docs=[{"_id": 1, "x": "caf\u00e9"}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$project": {"result": {"$substrBytes": ["$x", 0, 3]}}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[{"_id": 1, "result": "caf"}], + msg="$substrBytes should use byte offsets regardless of collation", + ), + CommandTestCase( + "expr_substrcp_not_affected", + docs=[{"_id": 1, "x": "caf\u00e9"}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$project": {"result": {"$substrCP": ["$x", 0, 3]}}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[{"_id": 1, "result": "caf"}], + msg="$substrCP should use code point offsets regardless of collation", + ), + CommandTestCase( + "expr_strlenbytes_not_affected", + docs=[{"_id": 1, "x": "caf\u00e9"}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$project": {"result": {"$strLenBytes": "$x"}}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[{"_id": 1, "result": 5}], + msg="$strLenBytes should count bytes regardless of collation", + ), + CommandTestCase( + "expr_strlencp_not_affected", + docs=[{"_id": 1, "x": "caf\u00e9"}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$project": {"result": {"$strLenCP": "$x"}}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[{"_id": 1, "result": 4}], + msg="$strLenCP should count code points regardless of collation", + ), +] + +# Property [$expr $in Asymmetry]: $expr with $in using a field reference as +# the first argument in $match does not use collation, but a literal first +# argument does, and $project/$addFields use collation regardless of argument +# form. +COLLATION_EXPR_IN_ASYMMETRY_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "expr_in_match_field_ref_no_collation", + docs=[ + {"_id": 1, "x": "apple", "arr": ["Apple", "Banana"]}, + {"_id": 2, "x": "grape", "arr": ["Apple", "Banana"]}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"$expr": {"$in": ["$x", ["Apple", "Banana"]]}}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[], + msg="$expr $in with field ref first arg in $match should not use collation", + ), + CommandTestCase( + "expr_in_match_literal_uses_collation", + docs=[ + {"_id": 1, "x": "apple", "arr": ["Apple", "Banana"]}, + {"_id": 2, "x": "grape", "arr": ["Apple", "Banana"]}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"$expr": {"$in": ["apple", "$arr"]}}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[ + {"_id": 1, "x": "apple", "arr": ["Apple", "Banana"]}, + {"_id": 2, "x": "grape", "arr": ["Apple", "Banana"]}, + ], + msg="$expr $in with literal first arg in $match should use collation", + ), + CommandTestCase( + "expr_in_project_field_ref_uses_collation", + docs=[ + {"_id": 1, "x": "apple", "arr": ["Apple", "Banana"]}, + {"_id": 2, "x": "grape", "arr": ["Apple", "Banana"]}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$project": {"result": {"$in": ["$x", ["Apple", "Banana"]]}}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[ + {"_id": 1, "result": True}, + {"_id": 2, "result": False}, + ], + msg="$expr $in with field ref in $project should use collation", + ), + CommandTestCase( + "expr_in_addfields_field_ref_uses_collation", + docs=[ + {"_id": 1, "x": "apple", "arr": ["Apple", "Banana"]}, + {"_id": 2, "x": "grape", "arr": ["Apple", "Banana"]}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$addFields": {"result": {"$in": ["$x", ["Apple", "Banana"]]}}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[ + {"_id": 1, "x": "apple", "arr": ["Apple", "Banana"], "result": True}, + {"_id": 2, "x": "grape", "arr": ["Apple", "Banana"], "result": False}, + ], + msg="$expr $in with field ref in $addFields should use collation", + ), +] + +# Property [$max/$min Expression Collation]: $max and $min expression operators +# use command-level collation for string comparisons, returning the +# linguistically largest or smallest value. +COLLATION_EXPR_MAX_MIN_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "expr_max_affected", + docs=[{"_id": 1, "a": "a", "b": "B"}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$project": {"result": {"$max": ["$a", "$b"]}}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + # Linguistic: a < b, so max is "B". + expected=[{"_id": 1, "result": "B"}], + msg="$max expression should use collation for string comparison", + ), + CommandTestCase( + "expr_max_no_collation_binary", + docs=[{"_id": 1, "a": "a", "b": "B"}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$project": {"result": {"$max": ["$a", "$b"]}}}], + "cursor": {}, + }, + # Binary: 'B'(66) < 'a'(97), so max is "a". + expected=[{"_id": 1, "result": "a"}], + msg="$max expression without collation should use binary comparison", + ), + CommandTestCase( + "expr_min_affected", + docs=[{"_id": 1, "a": "a", "b": "B"}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$project": {"result": {"$min": ["$a", "$b"]}}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + # Linguistic: a < b, so min is "a". + expected=[{"_id": 1, "result": "a"}], + msg="$min expression should use collation for string comparison", + ), + CommandTestCase( + "expr_min_no_collation_binary", + docs=[{"_id": 1, "a": "a", "b": "B"}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$project": {"result": {"$min": ["$a", "$b"]}}}], + "cursor": {}, + }, + # Binary: 'B'(66) < 'a'(97), so min is "B". + expected=[{"_id": 1, "result": "B"}], + msg="$min expression without collation should use binary comparison", + ), +] + +# Property [$maxN/$minN Array Expression Collation]: $maxN and $minN array +# expression operators use command-level collation for string comparisons when +# selecting the N largest or smallest elements. +COLLATION_EXPR_MAXN_MINN_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "expr_maxn_affected", + docs=[{"_id": 1, "arr": ["a", "B", "c", "D"]}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$project": {"result": {"$maxN": {"n": 2, "input": "$arr"}}}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + # Linguistic: a < b < c < d, so maxN(2) = ["D", "c"]. + expected=[{"_id": 1, "result": ["D", "c"]}], + msg="$maxN array expression should use collation for string comparison", + ), + CommandTestCase( + "expr_maxn_no_collation_binary", + docs=[{"_id": 1, "arr": ["a", "B", "c", "D"]}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$project": {"result": {"$maxN": {"n": 2, "input": "$arr"}}}}], + "cursor": {}, + }, + # Binary: 'B'(66) < 'D'(68) < 'a'(97) < 'c'(99), so maxN(2) = ["c", "a"]. + expected=[{"_id": 1, "result": ["c", "a"]}], + msg="$maxN array expression without collation should use binary comparison", + ), + CommandTestCase( + "expr_minn_affected", + docs=[{"_id": 1, "arr": ["a", "B", "c", "D"]}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$project": {"result": {"$minN": {"n": 2, "input": "$arr"}}}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + # Linguistic: a < b < c < d, so minN(2) = ["a", "B"]. + expected=[{"_id": 1, "result": ["a", "B"]}], + msg="$minN array expression should use collation for string comparison", + ), + CommandTestCase( + "expr_minn_no_collation_binary", + docs=[{"_id": 1, "arr": ["a", "B", "c", "D"]}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$project": {"result": {"$minN": {"n": 2, "input": "$arr"}}}}], + "cursor": {}, + }, + # Binary: 'B'(66) < 'D'(68) < 'a'(97) < 'c'(99), so minN(2) = ["B", "D"]. + expected=[{"_id": 1, "result": ["B", "D"]}], + msg="$minN array expression without collation should use binary comparison", + ), +] + +COLLATION_AGGREGATE_EXPRESSIONS_TESTS: list[CommandTestCase] = ( + COLLATION_EXPR_OPS_AFFECTED_TESTS + + COLLATION_EXPR_OPS_NOT_AFFECTED_TESTS + + COLLATION_EXPR_IN_ASYMMETRY_TESTS + + COLLATION_EXPR_MAX_MIN_TESTS + + COLLATION_EXPR_MAXN_MINN_TESTS +) + + +@pytest.mark.parametrize("test", pytest_params(COLLATION_AGGREGATE_EXPRESSIONS_TESTS)) +def test_collation_aggregate_expressions(database_client, collection, test): + """Test collation effects on expression operators in aggregate.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_geonear.py b/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_geonear.py new file mode 100644 index 000000000..1f319ddc1 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_geonear.py @@ -0,0 +1,117 @@ +"""Tests for collation effects on the $geoNear aggregation stage query filter.""" + +from __future__ import annotations + +import pytest +from pymongo import IndexModel + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [GeoNear Query Filter with Collation]: the $geoNear stage's query +# filter uses command-level collation for string comparisons, enabling +# case-insensitive and accent-insensitive filtering of geospatial results. +COLLATION_GEONEAR_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "geonear_query_case_insensitive", + indexes=[IndexModel([("loc", "2dsphere")])], + docs=[ + {"_id": 1, "loc": {"type": "Point", "coordinates": [0, 0]}, "cat": "Apple"}, + {"_id": 2, "loc": {"type": "Point", "coordinates": [1, 1]}, "cat": "apple"}, + {"_id": 3, "loc": {"type": "Point", "coordinates": [2, 2]}, "cat": "banana"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + { + "$geoNear": { + "near": {"type": "Point", "coordinates": [0, 0]}, + "distanceField": "dist", + "query": {"cat": "apple"}, + } + }, + {"$project": {"_id": 1, "cat": 1}}, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[ + {"_id": 1, "cat": "Apple"}, + {"_id": 2, "cat": "apple"}, + ], + msg="$geoNear query filter should use collation for case-insensitive matching", + ), + CommandTestCase( + "geonear_query_no_collation_binary", + indexes=[IndexModel([("loc", "2dsphere")])], + docs=[ + {"_id": 1, "loc": {"type": "Point", "coordinates": [0, 0]}, "cat": "Apple"}, + {"_id": 2, "loc": {"type": "Point", "coordinates": [1, 1]}, "cat": "apple"}, + {"_id": 3, "loc": {"type": "Point", "coordinates": [2, 2]}, "cat": "banana"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + { + "$geoNear": { + "near": {"type": "Point", "coordinates": [0, 0]}, + "distanceField": "dist", + "query": {"cat": "apple"}, + } + }, + {"$project": {"_id": 1, "cat": 1}}, + ], + "cursor": {}, + }, + expected=[{"_id": 2, "cat": "apple"}], + msg="$geoNear query filter without collation should use binary comparison", + ), + CommandTestCase( + "geonear_query_comparison_operator_collation", + indexes=[IndexModel([("loc", "2dsphere")])], + docs=[ + {"_id": 1, "loc": {"type": "Point", "coordinates": [0, 0]}, "cat": "Apple"}, + {"_id": 2, "loc": {"type": "Point", "coordinates": [1, 1]}, "cat": "banana"}, + {"_id": 3, "loc": {"type": "Point", "coordinates": [2, 2]}, "cat": "cherry"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + { + "$geoNear": { + "near": {"type": "Point", "coordinates": [0, 0]}, + "distanceField": "dist", + "query": {"cat": {"$gt": "apple"}}, + } + }, + {"$project": {"_id": 1, "cat": 1}}, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[ + {"_id": 2, "cat": "banana"}, + {"_id": 3, "cat": "cherry"}, + ], + msg="$geoNear query $gt should use collation", + ), +] + + +@pytest.mark.parametrize("test", pytest_params(COLLATION_GEONEAR_TESTS)) +def test_collation_aggregate_geonear(database_client, collection, test): + """Test collation affects $geoNear query filter.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_graphlookup.py b/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_graphlookup.py new file mode 100644 index 000000000..611764db8 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_graphlookup.py @@ -0,0 +1,212 @@ +"""Tests for collation effects on $graphLookup equality matching.""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [GraphLookup Collation-Sensitive Traversal]: $graphLookup uses +# command-level collation for connectFromField/connectToField equality +# comparisons during recursive traversal, enabling case-insensitive and +# accent-insensitive graph walks. +COLLATION_GRAPHLOOKUP_TRAVERSAL_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "graphlookup_case_insensitive_traversal", + docs=[ + {"_id": 1, "name": "start", "connects": "A"}, + {"_id": 2, "name": "A", "connects": "b"}, + {"_id": 3, "name": "B", "connects": "c"}, + {"_id": 4, "name": "c", "connects": None}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$match": {"_id": 1}}, + { + "$graphLookup": { + "from": ctx.collection, + "startWith": "$connects", + "connectFromField": "connects", + "connectToField": "name", + "as": "chain", + } + }, + {"$project": {"_id": 1, "chain._id": 1}}, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[{"_id": 1, "chain": [{"_id": 2}, {"_id": 3}, {"_id": 4}]}], + ignore_order_in=["chain"], + msg="$graphLookup with strength 2 should traverse case-insensitively", + ), + CommandTestCase( + "graphlookup_no_collation_binary", + docs=[ + {"_id": 1, "name": "start", "connects": "A"}, + {"_id": 2, "name": "A", "connects": "b"}, + {"_id": 3, "name": "B", "connects": "c"}, + {"_id": 4, "name": "c", "connects": None}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$match": {"_id": 1}}, + { + "$graphLookup": { + "from": ctx.collection, + "startWith": "$connects", + "connectFromField": "connects", + "connectToField": "name", + "as": "chain", + } + }, + {"$project": {"_id": 1, "chain._id": 1}}, + ], + "cursor": {}, + }, + expected=[{"_id": 1, "chain": [{"_id": 2}]}], + ignore_order_in=["chain"], + msg="$graphLookup without collation should stop at case mismatch", + ), + CommandTestCase( + "graphlookup_accent_insensitive_traversal", + docs=[ + {"_id": 1, "name": "start", "connects": "cafe"}, + {"_id": 2, "name": "caf\u00e9", "connects": "latte"}, + {"_id": 3, "name": "latte", "connects": None}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$match": {"_id": 1}}, + { + "$graphLookup": { + "from": ctx.collection, + "startWith": "$connects", + "connectFromField": "connects", + "connectToField": "name", + "as": "chain", + } + }, + {"$project": {"_id": 1, "chain._id": 1}}, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[{"_id": 1, "chain": [{"_id": 2}, {"_id": 3}]}], + ignore_order_in=["chain"], + msg="$graphLookup with strength 1 should traverse accent-insensitively", + ), + CommandTestCase( + "graphlookup_max_depth_with_collation", + docs=[ + {"_id": 1, "name": "start", "connects": "A"}, + {"_id": 2, "name": "a", "connects": "B"}, + {"_id": 3, "name": "b", "connects": "C"}, + {"_id": 4, "name": "c", "connects": None}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$match": {"_id": 1}}, + { + "$graphLookup": { + "from": ctx.collection, + "startWith": "$connects", + "connectFromField": "connects", + "connectToField": "name", + "as": "chain", + "maxDepth": 1, + } + }, + {"$project": {"_id": 1, "chain._id": 1}}, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[{"_id": 1, "chain": [{"_id": 2}, {"_id": 3}]}], + ignore_order_in=["chain"], + msg="$graphLookup with collation and maxDepth should respect both", + ), + CommandTestCase( + "graphlookup_restrictsearchwithmatch_case_insensitive", + docs=[ + {"_id": 1, "name": "start", "connects": "nodeA", "status": "x"}, + {"_id": 2, "name": "nodeA", "connects": "nodeB", "status": "Active"}, + {"_id": 3, "name": "nodeB", "connects": None, "status": "ACTIVE"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$match": {"_id": 1}}, + { + "$graphLookup": { + "from": ctx.collection, + "startWith": "$connects", + "connectFromField": "connects", + "connectToField": "name", + "as": "chain", + "restrictSearchWithMatch": {"status": "active"}, + } + }, + {"$project": {"_id": 1, "chain._id": 1}}, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[{"_id": 1, "chain": [{"_id": 2}, {"_id": 3}]}], + ignore_order_in=["chain"], + msg="$graphLookup restrictSearchWithMatch should use collation for filter matching", + ), + CommandTestCase( + "graphlookup_restrictsearchwithmatch_no_collation", + docs=[ + {"_id": 1, "name": "start", "connects": "nodeA", "status": "x"}, + {"_id": 2, "name": "nodeA", "connects": "nodeB", "status": "Active"}, + {"_id": 3, "name": "nodeB", "connects": None, "status": "ACTIVE"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$match": {"_id": 1}}, + { + "$graphLookup": { + "from": ctx.collection, + "startWith": "$connects", + "connectFromField": "connects", + "connectToField": "name", + "as": "chain", + "restrictSearchWithMatch": {"status": "active"}, + } + }, + {"$project": {"_id": 1, "chain._id": 1}}, + ], + "cursor": {}, + }, + expected=[{"_id": 1, "chain": []}], + msg="$graphLookup restrictSearchWithMatch without collation should use binary matching", + ), +] + + +@pytest.mark.parametrize("test", pytest_params(COLLATION_GRAPHLOOKUP_TRAVERSAL_TESTS)) +def test_collation_aggregate_graphlookup(database_client, collection, test): + """Test collation affects $graphLookup equality matching during traversal.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + ignore_order_in=test.ignore_order_in, + msg=test.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_group.py b/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_group.py new file mode 100644 index 000000000..ff4cc441b --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_group.py @@ -0,0 +1,411 @@ +"""Tests for collation effects on group key deduplication and accumulators.""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Group Key Deduplication]: collation affects $group key comparisons +# so that collation-equal strings collapse into one group, with the +# first-encountered value used as the group key. +COLLATION_GROUP_KEY_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "group_strength1_all_collapse", + docs=[ + {"_id": 1, "x": "cafe"}, + {"_id": 2, "x": "Cafe"}, + {"_id": 3, "x": "caf\u00e9"}, + {"_id": 4, "x": "CAF\u00c9"}, + {"_id": 5, "x": "banana"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$sort": {"_id": 1}}, + {"$group": {"_id": "$x", "count": {"$sum": 1}}}, + {"$sort": {"_id": 1}}, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[ + {"_id": "banana", "count": 1}, + {"_id": "cafe", "count": 4}, + ], + msg="$group with strength 1 should collapse all case and accent variants", + ), + CommandTestCase( + "group_strength2_accent_distinct_case_collapse", + docs=[ + {"_id": 1, "x": "cafe"}, + {"_id": 2, "x": "Cafe"}, + {"_id": 3, "x": "caf\u00e9"}, + {"_id": 4, "x": "CAF\u00c9"}, + {"_id": 5, "x": "banana"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$sort": {"_id": 1}}, + {"$group": {"_id": "$x", "count": {"$sum": 1}}}, + {"$sort": {"_id": 1}}, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[ + {"_id": "banana", "count": 1}, + {"_id": "cafe", "count": 2}, + {"_id": "caf\u00e9", "count": 2}, + ], + msg="$group with strength 2 should collapse case but keep accents distinct", + ), + CommandTestCase( + "group_strength3_all_distinct", + docs=[ + {"_id": 1, "x": "cafe"}, + {"_id": 2, "x": "Cafe"}, + {"_id": 3, "x": "caf\u00e9"}, + {"_id": 4, "x": "CAF\u00c9"}, + {"_id": 5, "x": "banana"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$sort": {"_id": 1}}, + {"$group": {"_id": "$x", "count": {"$sum": 1}}}, + {"$sort": {"_id": 1}}, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 3}, + }, + expected=[ + {"_id": "banana", "count": 1}, + {"_id": "cafe", "count": 1}, + {"_id": "Cafe", "count": 1}, + {"_id": "caf\u00e9", "count": 1}, + {"_id": "CAF\u00c9", "count": 1}, + ], + msg="$group with strength 3 should treat all variants as distinct groups", + ), + CommandTestCase( + "group_first_encountered_key_asc", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "APPLE"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$sort": {"_id": 1}}, + {"$group": {"_id": "$x", "count": {"$sum": 1}}}, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[{"_id": "apple", "count": 3}], + msg="$group should use first-encountered value as group key (ascending sort)", + ), + CommandTestCase( + "group_first_encountered_key_desc", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "APPLE"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$sort": {"_id": -1}}, + {"$group": {"_id": "$x", "count": {"$sum": 1}}}, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[{"_id": "APPLE", "count": 3}], + msg="$group should use first-encountered value as group key (descending sort)", + ), +] + +# Property [Group Accumulators Affected by Collation]: $addToSet, $min, and +# $max accumulators use collation for comparisons, while $first, $last, and +# $push are unaffected. +COLLATION_GROUP_ACCUMULATOR_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "group_addtoset_strength1_deduplicates", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "APPLE"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$sort": {"_id": 1}}, + {"$group": {"_id": None, "unique_x": {"$addToSet": "$x"}}}, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[{"_id": None, "unique_x": ["apple"]}], + msg="$addToSet with strength 1 should deduplicate all case variants", + ), + CommandTestCase( + "group_addtoset_strength2_keeps_accents", + docs=[ + {"_id": 1, "x": "cafe"}, + {"_id": 2, "x": "Cafe"}, + {"_id": 3, "x": "caf\u00e9"}, + {"_id": 4, "x": "CAF\u00c9"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$sort": {"_id": 1}}, + {"$group": {"_id": None, "unique_x": {"$addToSet": "$x"}}}, + {"$project": {"unique_x": {"$sortArray": {"input": "$unique_x", "sortBy": 1}}}}, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[{"_id": None, "unique_x": ["cafe", "caf\u00e9"]}], + msg="$addToSet with strength 2 should keep accent variants but collapse case variants", + ), + CommandTestCase( + "group_min_uses_collation", + docs=[ + {"_id": 1, "x": "10"}, + {"_id": 2, "x": "2"}, + {"_id": 3, "x": "9"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$group": {"_id": None, "min_x": {"$min": "$x"}}}, + ], + "cursor": {}, + "collation": {"locale": "en", "numericOrdering": True}, + }, + expected=[{"_id": None, "min_x": "2"}], + msg="$min should use collation ordering (binary min would be '10')", + ), + CommandTestCase( + "group_max_uses_collation", + docs=[ + {"_id": 1, "x": "10"}, + {"_id": 2, "x": "2"}, + {"_id": 3, "x": "9"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$group": {"_id": None, "max_x": {"$max": "$x"}}}, + ], + "cursor": {}, + "collation": {"locale": "en", "numericOrdering": True}, + }, + expected=[{"_id": None, "max_x": "10"}], + msg="$max should use collation ordering (binary max would be '9')", + ), + CommandTestCase( + "group_min_case_insensitive", + docs=[ + {"_id": 1, "x": "a"}, + {"_id": 2, "x": "B"}, + {"_id": 3, "x": "c"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$group": {"_id": None, "min_x": {"$min": "$x"}}}, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[{"_id": None, "min_x": "a"}], + msg="$min with strength 1 should use linguistic ordering (binary min would be 'B')", + ), + CommandTestCase( + "group_max_case_insensitive", + docs=[ + {"_id": 1, "x": "a"}, + {"_id": 2, "x": "B"}, + {"_id": 3, "x": "c"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$group": {"_id": None, "max_x": {"$max": "$x"}}}, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[{"_id": None, "max_x": "c"}], + msg="$max with strength 1 should use linguistic ordering" + " (binary max would be 'c' but min would differ)", + ), + CommandTestCase( + "group_first_unaffected", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "APPLE"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$sort": {"_id": 1}}, + {"$group": {"_id": None, "first_x": {"$first": "$x"}}}, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[{"_id": None, "first_x": "apple"}], + msg="$first should return the first document value regardless of collation", + ), + CommandTestCase( + "group_last_unaffected", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "APPLE"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$sort": {"_id": 1}}, + {"$group": {"_id": None, "last_x": {"$last": "$x"}}}, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[{"_id": None, "last_x": "APPLE"}], + msg="$last should return the last document value regardless of collation", + ), + CommandTestCase( + "group_push_unaffected", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "APPLE"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$sort": {"_id": 1}}, + {"$group": {"_id": None, "all_x": {"$push": "$x"}}}, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[{"_id": None, "all_x": ["apple", "Apple", "APPLE"]}], + msg="$push should preserve all values regardless of collation", + ), +] + +# Property [Compound Group Key Deduplication]: collation affects all string +# fields in a compound $group _id, collapsing case/accent variants across +# every field in the compound key. +COLLATION_GROUP_COMPOUND_KEY_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "group_compound_strength1_collapses_both_fields", + docs=[ + {"_id": 1, "x": "apple", "y": "red"}, + {"_id": 2, "x": "Apple", "y": "Red"}, + {"_id": 3, "x": "apple", "y": "green"}, + {"_id": 4, "x": "banana", "y": "yellow"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$sort": {"_id": 1}}, + {"$group": {"_id": {"x": "$x", "y": "$y"}, "count": {"$sum": 1}}}, + {"$sort": {"_id": 1}}, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[ + {"_id": {"x": "apple", "y": "green"}, "count": 1}, + {"_id": {"x": "apple", "y": "red"}, "count": 2}, + {"_id": {"x": "banana", "y": "yellow"}, "count": 1}, + ], + msg="$group with compound _id and strength 1 should collapse case variants in all fields", + ), + CommandTestCase( + "group_compound_no_collation_all_distinct", + docs=[ + {"_id": 1, "x": "apple", "y": "red"}, + {"_id": 2, "x": "Apple", "y": "Red"}, + {"_id": 3, "x": "apple", "y": "green"}, + {"_id": 4, "x": "banana", "y": "yellow"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$sort": {"_id": 1}}, + {"$group": {"_id": {"x": "$x", "y": "$y"}, "count": {"$sum": 1}}}, + {"$sort": {"_id": 1}}, + ], + "cursor": {}, + }, + expected=[ + {"_id": {"x": "Apple", "y": "Red"}, "count": 1}, + {"_id": {"x": "apple", "y": "green"}, "count": 1}, + {"_id": {"x": "apple", "y": "red"}, "count": 1}, + {"_id": {"x": "banana", "y": "yellow"}, "count": 1}, + ], + msg="$group with compound _id without collation should treat all case variants as distinct", + ), + CommandTestCase( + "group_compound_second_field_distinguishes", + docs=[ + {"_id": 1, "x": "apple", "y": "red"}, + {"_id": 2, "x": "Apple", "y": "blue"}, + {"_id": 3, "x": "apple", "y": "Red"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$sort": {"_id": 1}}, + {"$group": {"_id": {"x": "$x", "y": "$y"}, "count": {"$sum": 1}}}, + {"$sort": {"_id": 1}}, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[ + {"_id": {"x": "Apple", "y": "blue"}, "count": 1}, + {"_id": {"x": "apple", "y": "red"}, "count": 2}, + ], + msg="$group with compound _id should use collation on second field to distinguish groups", + ), +] + +COLLATION_AGGREGATE_GROUP_TESTS: list[CommandTestCase] = ( + COLLATION_GROUP_KEY_TESTS + + COLLATION_GROUP_ACCUMULATOR_TESTS + + COLLATION_GROUP_COMPOUND_KEY_TESTS +) + + +@pytest.mark.parametrize("test", pytest_params(COLLATION_AGGREGATE_GROUP_TESTS)) +def test_collation_aggregate_group(database_client, collection, test): + """Test collation effects on $group key deduplication and accumulators.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_lookup.py b/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_lookup.py new file mode 100644 index 000000000..ce11275b1 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_lookup.py @@ -0,0 +1,255 @@ +"""Tests for collation interaction with $lookup stage.""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any + +import pytest + +from documentdb_tests.compatibility.tests.core.collation.utils.collation_view_mismatch import ( + SECONDARY, + ViewMismatchTestCase, +) +from documentdb_tests.compatibility.tests.core.operator.stages.lookup.utils.lookup_common import ( + FOREIGN, + LookupTestCase, + build_lookup_command, + setup_lookup, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.error_codes import ( + OPTION_NOT_SUPPORTED_ON_VIEW_ERROR, +) +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + + +@dataclass(frozen=True) +class LookupCollationTestCase(LookupTestCase): + """Test case for $lookup with optional command-level collation.""" + + command_collation: dict[str, Any] | None = None + + +# Property [Lookup Equality Join Collation]: collation affects join comparison +# in equality-based $lookup so that collation-equal strings match across +# localField and foreignField. +COLLATION_LOOKUP_EQUALITY_TESTS: list[LookupCollationTestCase] = [ + LookupCollationTestCase( + "lookup_equality_strength1_case_insensitive", + docs=[{"_id": 1, "product": "Apple"}, {"_id": 2, "product": "banana"}], + foreign_docs=[{"_id": 1, "name": "apple"}, {"_id": 2, "name": "Banana"}], + pipeline=[ + { + "$lookup": { + "from": FOREIGN, + "localField": "product", + "foreignField": "name", + "as": "matched", + } + }, + {"$sort": {"_id": 1}}, + ], + command_collation={"locale": "en", "strength": 1}, + expected=[ + {"_id": 1, "product": "Apple", "matched": [{"_id": 1, "name": "apple"}]}, + {"_id": 2, "product": "banana", "matched": [{"_id": 2, "name": "Banana"}]}, + ], + msg="$lookup equality join with strength 1 should match case-insensitively", + ), + LookupCollationTestCase( + "lookup_equality_no_collation_binary", + docs=[{"_id": 1, "product": "Apple"}, {"_id": 2, "product": "banana"}], + foreign_docs=[{"_id": 1, "name": "apple"}, {"_id": 2, "name": "Banana"}], + pipeline=[ + { + "$lookup": { + "from": FOREIGN, + "localField": "product", + "foreignField": "name", + "as": "matched", + } + }, + {"$sort": {"_id": 1}}, + ], + expected=[ + {"_id": 1, "product": "Apple", "matched": []}, + {"_id": 2, "product": "banana", "matched": []}, + ], + msg="$lookup equality join without collation should use binary comparison", + ), +] + +# Property [Lookup Pipeline Collation Propagation]: collation propagates into +# the pipeline form of $lookup so that sub-pipeline expressions inherit +# command-level collation. +COLLATION_LOOKUP_PIPELINE_TESTS: list[LookupCollationTestCase] = [ + LookupCollationTestCase( + "lookup_pipeline_strength1_case_insensitive", + docs=[{"_id": 1, "product": "Apple"}, {"_id": 2, "product": "banana"}], + foreign_docs=[{"_id": 1, "name": "apple"}, {"_id": 2, "name": "Banana"}], + pipeline=[ + { + "$lookup": { + "from": FOREIGN, + "let": {"prod": "$product"}, + "pipeline": [ + {"$match": {"$expr": {"$eq": ["$name", "$$prod"]}}}, + ], + "as": "matched", + } + }, + {"$sort": {"_id": 1}}, + ], + command_collation={"locale": "en", "strength": 1}, + expected=[ + {"_id": 1, "product": "Apple", "matched": [{"_id": 1, "name": "apple"}]}, + {"_id": 2, "product": "banana", "matched": [{"_id": 2, "name": "Banana"}]}, + ], + msg="$lookup pipeline form with strength 1 should inherit collation", + ), + LookupCollationTestCase( + "lookup_pipeline_no_collation_binary", + docs=[{"_id": 1, "product": "Apple"}, {"_id": 2, "product": "banana"}], + foreign_docs=[{"_id": 1, "name": "apple"}, {"_id": 2, "name": "Banana"}], + pipeline=[ + { + "$lookup": { + "from": FOREIGN, + "let": {"prod": "$product"}, + "pipeline": [ + {"$match": {"$expr": {"$eq": ["$name", "$$prod"]}}}, + ], + "as": "matched", + } + }, + {"$sort": {"_id": 1}}, + ], + expected=[ + {"_id": 1, "product": "Apple", "matched": []}, + {"_id": 2, "product": "banana", "matched": []}, + ], + msg="$lookup pipeline form without collation should use binary comparison", + ), +] + +COLLATION_LOOKUP_TESTS: list[LookupCollationTestCase] = ( + COLLATION_LOOKUP_EQUALITY_TESTS + COLLATION_LOOKUP_PIPELINE_TESTS +) + + +@pytest.mark.parametrize("test_case", pytest_params(COLLATION_LOOKUP_TESTS)) +def test_collation_aggregate_lookup(collection, test_case: LookupCollationTestCase): + """Test collation affects $lookup join comparison.""" + with setup_lookup(collection, test_case) as foreign_name: + command = build_lookup_command(collection, test_case, foreign_name) + if test_case.command_collation is not None: + command["collation"] = test_case.command_collation + result = execute_command(collection, command) + assertResult(result, expected=test_case.expected, msg=test_case.msg) + + +# Property [Lookup View Collation Mismatch]: $lookup from a collection or view +# to a view with mismatched collation produces OPTION_NOT_SUPPORTED_ON_VIEW_ERROR, +# while matching collation or lookup to a base collection succeeds. +COLLATION_LOOKUP_VIEW_TESTS: list[ViewMismatchTestCase] = [ + ViewMismatchTestCase( + "lookup_collection_to_view_mismatched", + docs=[{"_id": 1, "product": "Apple"}, {"_id": 2, "product": "banana"}], + secondary_docs=[{"_id": 1, "name": "apple"}, {"_id": 2, "name": "Banana"}], + pipeline=[ + { + "$lookup": { + "from": SECONDARY, + "localField": "product", + "foreignField": "name", + "as": "matched", + } + }, + {"$sort": {"_id": 1}}, + ], + secondary_view_collation={"locale": "fr", "strength": 2}, + command_collation={"locale": "en", "strength": 1}, + error_code=OPTION_NOT_SUPPORTED_ON_VIEW_ERROR, + msg="$lookup from collection to view with mismatched collation should error", + ), + ViewMismatchTestCase( + "lookup_view_to_view_different_collation", + docs=[{"_id": 1, "product": "Apple"}, {"_id": 2, "product": "banana"}], + secondary_docs=[{"_id": 1, "name": "apple"}, {"_id": 2, "name": "Banana"}], + pipeline=[ + { + "$lookup": { + "from": SECONDARY, + "localField": "product", + "foreignField": "name", + "as": "matched", + } + }, + {"$sort": {"_id": 1}}, + ], + secondary_view_collation={"locale": "fr", "strength": 2}, + source_view_collation={"locale": "en", "strength": 1}, + error_code=OPTION_NOT_SUPPORTED_ON_VIEW_ERROR, + msg="$lookup from view to view with different collation should error", + ), + ViewMismatchTestCase( + "lookup_collection_to_view_matching", + docs=[{"_id": 1, "product": "Apple"}, {"_id": 2, "product": "banana"}], + secondary_docs=[{"_id": 1, "name": "apple"}, {"_id": 2, "name": "Banana"}], + pipeline=[ + { + "$lookup": { + "from": SECONDARY, + "localField": "product", + "foreignField": "name", + "as": "matched", + } + }, + {"$sort": {"_id": 1}}, + ], + secondary_view_collation={"locale": "en", "strength": 1}, + command_collation={"locale": "en", "strength": 1}, + expected=[ + {"_id": 1, "product": "Apple", "matched": [{"_id": 1, "name": "apple"}]}, + {"_id": 2, "product": "banana", "matched": [{"_id": 2, "name": "Banana"}]}, + ], + msg="$lookup from collection to view with matching collation should succeed", + ), + ViewMismatchTestCase( + "lookup_view_to_base_collection", + docs=[{"_id": 1, "product": "Apple"}, {"_id": 2, "product": "banana"}], + secondary_docs=[{"_id": 1, "name": "apple"}, {"_id": 2, "name": "Banana"}], + pipeline=[ + { + "$lookup": { + "from": SECONDARY, + "localField": "product", + "foreignField": "name", + "as": "matched", + } + }, + {"$sort": {"_id": 1}}, + ], + source_view_collation={"locale": "en", "strength": 1}, + expected=[ + {"_id": 1, "product": "Apple", "matched": [{"_id": 1, "name": "apple"}]}, + {"_id": 2, "product": "banana", "matched": [{"_id": 2, "name": "Banana"}]}, + ], + msg="$lookup from view to base collection should succeed", + ), +] + + +@pytest.mark.parametrize("test_case", pytest_params(COLLATION_LOOKUP_VIEW_TESTS)) +def test_collation_aggregate_lookup_view(database_client, collection, test_case): + """Test $lookup collation mismatch behavior with views.""" + collection = test_case.prepare(database_client, collection) + result = execute_command(collection, test_case.build_command(collection)) + assertResult( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=test_case.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_match.py b/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_match.py new file mode 100644 index 000000000..23f6895eb --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_match.py @@ -0,0 +1,393 @@ +"""Tests for collation effects on match stage operators.""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Match Stage Operators]: collation affects equality ($eq, $ne), +# comparison ($gt, $gte, $lt, $lte), set ($in, $nin, $all), $elemMatch, $expr, +# $not, $nor, $or, and $and within $match; $regex and $exists/$type are NOT +# affected; field path matching always uses binary comparison. +COLLATION_MATCH_OPERATORS_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "match_eq_case_insensitive", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "APPLE"}, + {"_id": 4, "x": "banana"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": "apple"}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "APPLE"}, + ], + msg="$match $eq should use collation for case-insensitive matching", + ), + CommandTestCase( + "match_ne_case_insensitive", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": {"$ne": "apple"}}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[{"_id": 3, "x": "banana"}], + msg="$match $ne should use collation for case-insensitive exclusion", + ), + CommandTestCase( + "match_gt_case_insensitive", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + {"_id": 4, "x": "Banana"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$match": {"x": {"$gt": "apple"}}}, + {"$sort": {"_id": 1}}, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[{"_id": 3, "x": "banana"}, {"_id": 4, "x": "Banana"}], + msg="$match $gt should use collation for case-insensitive comparison", + ), + CommandTestCase( + "match_gte_lte_case_insensitive", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$match": {"x": {"$gte": "apple", "$lte": "apple"}}}, + {"$sort": {"_id": 1}}, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[{"_id": 1, "x": "apple"}, {"_id": 2, "x": "Apple"}], + msg="$match $gte/$lte should use collation for case-insensitive range", + ), + CommandTestCase( + "match_lt_case_insensitive", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + {"_id": 4, "x": "Banana"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$match": {"x": {"$lt": "banana"}}}, + {"$sort": {"_id": 1}}, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[{"_id": 1, "x": "apple"}, {"_id": 2, "x": "Apple"}], + msg="$match $lt should use collation for case-insensitive comparison", + ), + CommandTestCase( + "match_in_case_insensitive", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": {"$in": ["apple"]}}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[{"_id": 1, "x": "apple"}, {"_id": 2, "x": "Apple"}], + msg="$match $in should use collation for case-insensitive set membership", + ), + CommandTestCase( + "match_nin_case_insensitive", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": {"$nin": ["apple"]}}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[{"_id": 3, "x": "banana"}], + msg="$match $nin should use collation for case-insensitive set exclusion", + ), + CommandTestCase( + "match_all_case_insensitive", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": {"$all": ["apple"]}}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[{"_id": 1, "x": "apple"}, {"_id": 2, "x": "Apple"}], + msg="$match $all should use collation for case-insensitive matching", + ), + CommandTestCase( + "match_elemmatch_case_insensitive", + docs=[ + {"_id": 1, "x": ["apple", "banana"]}, + {"_id": 2, "x": ["Apple", "Banana"]}, + {"_id": 3, "x": ["cherry"]}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$match": {"x": {"$elemMatch": {"$eq": "apple"}}}}, + {"$sort": {"_id": 1}}, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[ + {"_id": 1, "x": ["apple", "banana"]}, + {"_id": 2, "x": ["Apple", "Banana"]}, + ], + msg="$match $elemMatch should use collation for case-insensitive matching", + ), + CommandTestCase( + "match_expr_case_insensitive", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"$expr": {"$eq": ["$x", "apple"]}}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[{"_id": 1, "x": "apple"}, {"_id": 2, "x": "Apple"}], + msg="$match $expr should use collation for case-insensitive comparison", + ), + CommandTestCase( + "match_not_case_insensitive", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$match": {"x": {"$not": {"$eq": "apple"}}}}, + {"$sort": {"_id": 1}}, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[{"_id": 3, "x": "banana"}], + msg="$match $not should use collation for case-insensitive negation", + ), + CommandTestCase( + "match_or_case_insensitive", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + {"_id": 4, "x": "cherry"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$match": {"$or": [{"x": "apple"}, {"x": "banana"}]}}, + {"$sort": {"_id": 1}}, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + ], + msg="$match $or should use collation for case-insensitive matching", + ), + CommandTestCase( + "match_and_case_insensitive", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$match": {"$and": [{"x": {"$gte": "apple"}}, {"x": {"$lte": "apple"}}]}}, + {"$sort": {"_id": 1}}, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[{"_id": 1, "x": "apple"}, {"_id": 2, "x": "Apple"}], + msg="$match $and should use collation for case-insensitive matching", + ), + CommandTestCase( + "match_nor_case_insensitive", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"$nor": [{"x": "apple"}]}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[{"_id": 3, "x": "banana"}], + msg="$match $nor should use collation for case-insensitive exclusion", + ), + CommandTestCase( + "match_regex_not_affected", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "APPLE"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": {"$regex": "^apple$"}}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[{"_id": 1, "x": "apple"}], + msg="$match $regex should NOT be affected by collation", + ), + CommandTestCase( + "match_exists_not_affected", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$match": {"x": {"$exists": True}}}, + {"$sort": {"_id": 1}}, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[{"_id": 1, "x": "apple"}, {"_id": 2, "x": "Apple"}], + msg="$match $exists should NOT be affected by collation", + ), + CommandTestCase( + "match_type_not_affected", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": 42}, + {"_id": 3, "x": "Apple"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$match": {"x": {"$type": "string"}}}, + {"$sort": {"_id": 1}}, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[{"_id": 1, "x": "apple"}, {"_id": 3, "x": "Apple"}], + msg="$match $type should NOT be affected by collation", + ), + CommandTestCase( + "match_field_path_binary_comparison", + docs=[ + {"_id": 1, "Name": "apple"}, + {"_id": 2, "name": "Apple"}, + {"_id": 3, "NAME": "banana"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"name": {"$exists": True}}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[{"_id": 2, "name": "Apple"}], + msg="field path matching should always use binary comparison regardless of collation", + ), + CommandTestCase( + "match_implicit_array_element_case_insensitive", + docs=[ + {"_id": 1, "arr": ["Apple", "banana"]}, + {"_id": 2, "arr": ["cherry", "date"]}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"arr": "apple"}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[{"_id": 1, "arr": ["Apple", "banana"]}], + msg="$match implicit array element matching should use collation", + ), + CommandTestCase( + "match_implicit_array_element_no_collation", + docs=[ + {"_id": 1, "arr": ["Apple", "banana"]}, + {"_id": 2, "arr": ["cherry", "date"]}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"arr": "apple"}}], + "cursor": {}, + }, + expected=[], + msg="$match implicit array element matching without collation should use binary comparison", + ), +] + + +@pytest.mark.parametrize("test", pytest_params(COLLATION_MATCH_OPERATORS_TESTS)) +def test_collation_aggregate_match(database_client, collection, test): + """Test collation effects on $match stage operators.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_merge_out.py b/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_merge_out.py new file mode 100644 index 000000000..b463f07e1 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_merge_out.py @@ -0,0 +1,394 @@ +"""Tests for collation behavior with merge and out stages.""" + +from __future__ import annotations + +import pytest +from pymongo import IndexModel + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.error_codes import ( + BAD_VALUE_ERROR, + DUPLICATE_KEY_ERROR, + IMMUTABLE_FIELD_ERROR, + MERGE_NO_MATCHING_UNIQUE_INDEX_ERROR, +) +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.target_collection import SiblingCollection + +# Property [Merge ID Immutable Field Error]: $merge on _id with collection +# default collation produces IMMUTABLE_FIELD_ERROR when _id values differ in +# case because the collation-aware _id index matches but the update would modify +# the immutable _id field. +COLLATION_MERGE_ID_ERROR_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "merge_id_case_mismatch_immutable_error", + docs=[{"_id": "Apple", "val": "updated"}], + siblings=[ + SiblingCollection( + suffix="_target", + collation={"locale": "en", "strength": 1}, + docs=[{"_id": "apple", "val": "original"}], + ), + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + { + "$merge": { + "into": ctx.collection + "_target", + "on": "_id", + "whenMatched": "merge", + "whenNotMatched": "insert", + } + }, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + error_code=IMMUTABLE_FIELD_ERROR, + msg="$merge on _id with case mismatch should error on immutable field", + ), +] + +# Property [Merge Index Collation Mismatch]: $merge on a non-_id field requires +# a matching unique index collation, and a command collation that conflicts with +# the target collection's default collation on its indexes produces +# MERGE_NO_MATCHING_UNIQUE_INDEX_ERROR. +COLLATION_MERGE_INDEX_ERROR_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "merge_non_id_mismatched_index_collation", + docs=[{"_id": 2, "key": "Apple", "val": "updated"}], + siblings=[ + SiblingCollection( + suffix="_target", + docs=[{"_id": 1, "key": "apple", "val": "original"}], + indexes=[IndexModel("key", unique=True, collation={"locale": "fr", "strength": 2})], + ), + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$project": {"_id": 0, "key": 1, "val": 1}}, + { + "$merge": { + "into": ctx.collection + "_target", + "on": "key", + "whenMatched": "merge", + "whenNotMatched": "insert", + } + }, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + error_code=MERGE_NO_MATCHING_UNIQUE_INDEX_ERROR, + msg="$merge with mismatched index collation should error", + ), + CommandTestCase( + "merge_command_collation_conflicts_with_collection_default", + docs=[{"_id": 2, "key": "Apple", "val": "updated"}], + siblings=[ + SiblingCollection( + suffix="_target", + collation={"locale": "fr", "strength": 2}, + docs=[{"_id": 1, "key": "apple", "val": "original"}], + indexes=[IndexModel("key", unique=True)], + ), + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$project": {"_id": 0, "key": 1, "val": 1}}, + { + "$merge": { + "into": ctx.collection + "_target", + "on": "key", + "whenMatched": "merge", + "whenNotMatched": "insert", + } + }, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + error_code=MERGE_NO_MATCHING_UNIQUE_INDEX_ERROR, + msg="$merge with command collation conflicting with target default should error", + ), +] + +# Property [Out Stage Collation Acceptance]: $out with a valid collation is +# accepted and stages before $out (e.g., $match, $sort) respect collation +# normally. +COLLATION_OUT_ACCEPTANCE_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "out_match_case_insensitive", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": "apple"}}, {"$out": ctx.collection + "_out"}], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[], + msg="$out should accept valid collation and $match before it should respect collation", + ), + CommandTestCase( + "out_sort_case_insensitive", + docs=[ + {"_id": 1, "x": "cherry"}, + {"_id": 2, "x": "Banana"}, + {"_id": 3, "x": "apple"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$sort": {"x": 1}}, + {"$limit": 1}, + {"$out": ctx.collection + "_out"}, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[], + msg="$out should accept valid collation and $sort before it should respect collation", + ), +] + +# Property [Out Stage Collation Validation]: $out with an invalid collation +# still produces validation errors because collation is validated regardless of +# $out presence. +COLLATION_OUT_VALIDATION_ERROR_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "out_invalid_collation_type", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$out": ctx.collection + "_out"}], + "cursor": {}, + "collation": {"locale": "invalid_xyz"}, + }, + error_code=BAD_VALUE_ERROR, + msg="$out with invalid locale should still produce validation error", + ), +] + +# Property [Merge Successful Collation-Aware Matching]: $merge with a +# collation-aware unique index correctly matches documents whose keys are +# collation-equal, enabling merge, replace, keepExisting, fail, and pipeline +# whenMatched behaviors. +COLLATION_MERGE_SUCCESS_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "merge_whenmatched_merge_case_insensitive", + docs=[{"_id": 2, "key": "Apple", "val": "updated"}], + siblings=[ + SiblingCollection( + suffix="_target", + collation={"locale": "en", "strength": 1}, + docs=[{"_id": 1, "key": "apple", "val": "original"}], + indexes=[IndexModel("key", unique=True)], + ), + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$project": {"_id": 0, "key": 1, "val": 1}}, + { + "$merge": { + "into": ctx.collection + "_target", + "on": "key", + "whenMatched": "merge", + "whenNotMatched": "insert", + } + }, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[], + msg="$merge whenMatched merge should match collation-equal keys", + ), + CommandTestCase( + "merge_whenmatched_replace_case_insensitive", + docs=[{"_id": 2, "key": "Apple", "val": "replaced"}], + siblings=[ + SiblingCollection( + suffix="_target", + collation={"locale": "en", "strength": 1}, + docs=[{"_id": 1, "key": "apple", "val": "original", "extra": "gone"}], + indexes=[IndexModel("key", unique=True)], + ), + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$project": {"_id": 0, "key": 1, "val": 1}}, + { + "$merge": { + "into": ctx.collection + "_target", + "on": "key", + "whenMatched": "replace", + "whenNotMatched": "insert", + } + }, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[], + msg="$merge whenMatched replace should replace the matched document entirely", + ), + CommandTestCase( + "merge_whenmatched_keepexisting_case_insensitive", + docs=[{"_id": 2, "key": "Apple", "val": "new"}], + siblings=[ + SiblingCollection( + suffix="_target", + collation={"locale": "en", "strength": 1}, + docs=[{"_id": 1, "key": "apple", "val": "original"}], + indexes=[IndexModel("key", unique=True)], + ), + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$project": {"_id": 0, "key": 1, "val": 1}}, + { + "$merge": { + "into": ctx.collection + "_target", + "on": "key", + "whenMatched": "keepExisting", + "whenNotMatched": "insert", + } + }, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[], + msg="$merge whenMatched keepExisting should not modify the existing document", + ), + CommandTestCase( + "merge_whenmatched_fail_case_insensitive", + docs=[{"_id": 2, "key": "Apple", "val": "new"}], + siblings=[ + SiblingCollection( + suffix="_target", + collation={"locale": "en", "strength": 1}, + docs=[{"_id": 1, "key": "apple", "val": "original"}], + indexes=[IndexModel("key", unique=True)], + ), + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$project": {"_id": 0, "key": 1, "val": 1}}, + { + "$merge": { + "into": ctx.collection + "_target", + "on": "key", + "whenMatched": "fail", + "whenNotMatched": "insert", + } + }, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + error_code=DUPLICATE_KEY_ERROR, + msg="$merge whenMatched fail should error when collation-equal key exists", + ), + CommandTestCase( + "merge_whenmatched_pipeline_case_insensitive", + docs=[{"_id": 2, "key": "Apple", "val": "new", "count": 5}], + siblings=[ + SiblingCollection( + suffix="_target", + collation={"locale": "en", "strength": 1}, + docs=[{"_id": 1, "key": "apple", "val": "original", "count": 1}], + indexes=[IndexModel("key", unique=True)], + ), + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$project": {"_id": 0, "key": 1, "val": 1, "count": 1}}, + { + "$merge": { + "into": ctx.collection + "_target", + "on": "key", + "whenMatched": [{"$set": {"count": {"$add": ["$count", "$$new.count"]}}}], + "whenNotMatched": "insert", + } + }, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[], + msg="$merge whenMatched pipeline should execute against collation-matched document", + ), + CommandTestCase( + "merge_whennotmatched_discard", + docs=[ + {"_id": 2, "key": "Apple", "val": "updated"}, + {"_id": 3, "key": "banana", "val": "new"}, + ], + siblings=[ + SiblingCollection( + suffix="_target", + collation={"locale": "en", "strength": 1}, + docs=[{"_id": 1, "key": "apple", "val": "original"}], + indexes=[IndexModel("key", unique=True)], + ), + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$project": {"_id": 0, "key": 1, "val": 1}}, + { + "$merge": { + "into": ctx.collection + "_target", + "on": "key", + "whenMatched": "merge", + "whenNotMatched": "discard", + } + }, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[], + msg="$merge whenNotMatched discard should not insert non-matching documents", + ), +] + +COLLATION_AGGREGATE_MERGE_OUT_TESTS: list[CommandTestCase] = ( + COLLATION_MERGE_ID_ERROR_TESTS + + COLLATION_MERGE_INDEX_ERROR_TESTS + + COLLATION_MERGE_SUCCESS_TESTS + + COLLATION_OUT_ACCEPTANCE_TESTS + + COLLATION_OUT_VALIDATION_ERROR_TESTS +) + + +@pytest.mark.parametrize("test", pytest_params(COLLATION_AGGREGATE_MERGE_OUT_TESTS)) +def test_collation_aggregate_merge_out(database_client, collection, test): + """Test collation effects on $merge and $out stages.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_propagation.py b/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_propagation.py new file mode 100644 index 000000000..c25c96de2 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_propagation.py @@ -0,0 +1,216 @@ +"""Tests for collation propagation through non-collation-sensitive stages. + +Confirms that collation set at the command level propagates through stages +that do not themselves perform string comparisons ($unwind, $limit, $skip, +$sample) and remains active for subsequent collation-sensitive stages. +""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Collation Propagation Through Non-Sensitive Stages]: collation +# propagates through $unwind, $limit, $skip, $sample, $addFields, $set, and +# $project, remaining active for subsequent $match and $sort stages. +COLLATION_PROPAGATION_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "propagates_through_unwind", + docs=[ + {"_id": 1, "tags": ["Apple", "banana"]}, + {"_id": 2, "tags": ["cherry"]}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$unwind": "$tags"}, + {"$match": {"tags": "apple"}}, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[{"_id": 1, "tags": "Apple"}], + msg="collation should propagate through $unwind to subsequent $match", + ), + CommandTestCase( + "propagates_through_limit", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$sort": {"_id": 1}}, + {"$limit": 3}, + {"$match": {"x": "apple"}}, + {"$sort": {"_id": 1}}, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + ], + msg="collation should propagate through $limit to subsequent $match", + ), + CommandTestCase( + "propagates_through_skip", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$sort": {"_id": 1}}, + {"$skip": 1}, + {"$match": {"x": "apple"}}, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[{"_id": 2, "x": "Apple"}], + msg="collation should propagate through $skip to subsequent $match", + ), + CommandTestCase( + "propagates_through_unwind_to_sort", + docs=[ + {"_id": 1, "items": ["banana", "Apple", "cherry"]}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$unwind": "$items"}, + {"$sort": {"items": 1}}, + {"$project": {"_id": 0, "items": 1}}, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[ + {"items": "Apple"}, + {"items": "banana"}, + {"items": "cherry"}, + ], + msg="collation should propagate through $unwind to subsequent $sort", + ), + CommandTestCase( + "propagates_through_multiple_stages", + docs=[ + {"_id": 1, "tags": ["Apple", "BANANA"], "cat": "fruit"}, + {"_id": 2, "tags": ["cherry"], "cat": "Fruit"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$match": {"cat": "fruit"}}, + {"$unwind": "$tags"}, + {"$sort": {"tags": 1}}, + {"$skip": 1}, + {"$limit": 2}, + {"$project": {"_id": 1, "tags": 1}}, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[ + {"_id": 1, "tags": "BANANA"}, + {"_id": 2, "tags": "cherry"}, + ], + msg="collation should propagate through chained non-sensitive stages", + ), + CommandTestCase( + "propagates_through_addfields", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$addFields": {"y": "computed"}}, + {"$match": {"x": "apple"}}, + {"$sort": {"_id": 1}}, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[ + {"_id": 1, "x": "apple", "y": "computed"}, + {"_id": 2, "x": "Apple", "y": "computed"}, + ], + msg="collation should propagate through $addFields to subsequent $match", + ), + CommandTestCase( + "propagates_through_set", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$set": {"y": "computed"}}, + {"$match": {"x": "apple"}}, + {"$sort": {"_id": 1}}, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[ + {"_id": 1, "x": "apple", "y": "computed"}, + {"_id": 2, "x": "Apple", "y": "computed"}, + ], + msg="collation should propagate through $set to subsequent $match", + ), + CommandTestCase( + "propagates_through_project", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$project": {"x": 1}}, + {"$match": {"x": "apple"}}, + {"$sort": {"_id": 1}}, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + ], + msg="collation should propagate through $project to subsequent $match", + ), +] + + +@pytest.mark.parametrize("test", pytest_params(COLLATION_PROPAGATION_TESTS)) +def test_collation_aggregate_propagation(database_client, collection, test): + """Test collation propagates through non-collation-sensitive stages.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_replaceroot.py b/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_replaceroot.py new file mode 100644 index 000000000..2c7c24c4c --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_replaceroot.py @@ -0,0 +1,109 @@ +"""Tests for collation effects on expressions within $replaceRoot and $replaceWith.""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [ReplaceRoot Expression Collation]: expressions within $replaceRoot +# and $replaceWith that perform string comparisons use command-level collation. +COLLATION_REPLACEROOT_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "replaceroot_cond_eq_case_insensitive", + docs=[ + {"_id": 1, "x": "Apple", "a": {"v": 1}, "b": {"v": 2}}, + {"_id": 2, "x": "banana", "a": {"v": 3}, "b": {"v": 4}}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$sort": {"_id": 1}}, + {"$replaceRoot": {"newRoot": {"$cond": [{"$eq": ["$x", "apple"]}, "$a", "$b"]}}}, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[{"v": 1}, {"v": 4}], + msg="$replaceRoot $cond $eq should use collation", + ), + CommandTestCase( + "replaceroot_no_collation_binary", + docs=[ + {"_id": 1, "x": "Apple", "a": {"v": 1}, "b": {"v": 2}}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$replaceRoot": {"newRoot": {"$cond": [{"$eq": ["$x", "apple"]}, "$a", "$b"]}}}, + ], + "cursor": {}, + }, + expected=[{"v": 2}], + msg="$replaceRoot without collation should use binary comparison", + ), + CommandTestCase( + "replacewith_cond_eq_case_insensitive", + docs=[ + {"_id": 1, "x": "Apple", "a": {"v": 1}, "b": {"v": 2}}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$replaceWith": {"$cond": [{"$eq": ["$x", "apple"]}, "$a", "$b"]}}, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[{"v": 1}], + msg="$replaceWith $cond $eq should use collation", + ), + CommandTestCase( + "replaceroot_filter_expr_case_insensitive", + docs=[ + {"_id": 1, "items": ["Apple", "banana", "Cherry"], "target": "apple"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + { + "$replaceRoot": { + "newRoot": { + "matched": { + "$filter": { + "input": "$items", + "as": "item", + "cond": {"$eq": ["$$item", "$target"]}, + } + } + } + } + }, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[{"matched": ["Apple"]}], + msg="$replaceRoot with $filter expression should use collation", + ), +] + + +@pytest.mark.parametrize("test", pytest_params(COLLATION_REPLACEROOT_TESTS)) +def test_collation_aggregate_replaceroot(database_client, collection, test): + """Test collation affects expressions within $replaceRoot/$replaceWith.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_resolution.py b/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_resolution.py new file mode 100644 index 000000000..b3f40881a --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_resolution.py @@ -0,0 +1,384 @@ +"""Tests for collation inheritance, multi-stage application, and parameter interactions.""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.target_collection import CustomCollection + +# Property [Collation Inheritance from Collection Default]: when collation is +# omitted, null, or empty from the aggregate command, the collection's default +# collation is used; an explicit collation overrides the collection default. +COLLATION_INHERITANCE_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "inherit_omitted_uses_collection_default", + target_collection=CustomCollection(options={"collation": {"locale": "en", "strength": 1}}), + docs=[{"_id": 1, "x": "apple"}, {"_id": 2, "x": "Apple"}, {"_id": 3, "x": "APPLE"}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": "apple"}}], + "cursor": {}, + }, + expected=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "APPLE"}, + ], + msg="aggregate should use collection default collation when collation is omitted", + ), + CommandTestCase( + "inherit_null_uses_collection_default", + target_collection=CustomCollection(options={"collation": {"locale": "en", "strength": 1}}), + docs=[{"_id": 1, "x": "apple"}, {"_id": 2, "x": "Apple"}, {"_id": 3, "x": "APPLE"}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": "apple"}}], + "cursor": {}, + "collation": None, + }, + expected=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "APPLE"}, + ], + msg="aggregate should use collection default collation when collation is null", + ), + CommandTestCase( + "inherit_empty_uses_collection_default", + target_collection=CustomCollection(options={"collation": {"locale": "en", "strength": 1}}), + docs=[{"_id": 1, "x": "apple"}, {"_id": 2, "x": "Apple"}, {"_id": 3, "x": "APPLE"}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": "apple"}}], + "cursor": {}, + "collation": {}, + }, + expected=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "APPLE"}, + ], + msg="aggregate should use collection default collation when collation is empty", + ), + CommandTestCase( + "inherit_explicit_overrides_collection_default", + target_collection=CustomCollection(options={"collation": {"locale": "en", "strength": 1}}), + docs=[{"_id": 1, "x": "apple"}, {"_id": 2, "x": "Apple"}, {"_id": 3, "x": "APPLE"}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": "apple"}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 3}, + }, + expected=[{"_id": 1, "x": "apple"}], + msg="aggregate should override collection default when explicit collation is provided", + ), + CommandTestCase( + "inherit_simple_overrides_collection_default", + target_collection=CustomCollection(options={"collation": {"locale": "en", "strength": 1}}), + docs=[{"_id": 1, "x": "apple"}, {"_id": 2, "x": "Apple"}, {"_id": 3, "x": "APPLE"}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": "apple"}}], + "cursor": {}, + "collation": {"locale": "simple"}, + }, + expected=[{"_id": 1, "x": "apple"}], + msg="aggregate should override collection default with locale simple for binary comparison", + ), +] + +# Property [Multi-Stage Uniform Application]: all collation-sensitive stages in +# a single pipeline use the command-level collation uniformly. +COLLATION_MULTI_STAGE_UNIFORM_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "multi_stage_two_match_stages", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + {"_id": 4, "x": "Banana"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$match": {"x": "apple"}}, + {"$match": {"x": "APPLE"}}, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + ], + msg="two $match stages in one pipeline should both use command-level collation", + ), + CommandTestCase( + "multi_stage_match_group_sort", + docs=[ + {"_id": 1, "x": "apple", "v": 1}, + {"_id": 2, "x": "Apple", "v": 2}, + {"_id": 3, "x": "banana", "v": 3}, + {"_id": 4, "x": "Banana", "v": 4}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$match": {"x": {"$in": ["apple", "banana"]}}}, + {"$group": {"_id": "$x", "total": {"$sum": "$v"}}}, + {"$sort": {"_id": 1}}, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[ + {"_id": "apple", "total": 3}, + {"_id": "banana", "total": 7}, + ], + msg="$match + $group + $sort pipeline should apply the same collation to all stages", + ), +] + +# Property [Parameter Interaction Edge Cases]: multiple collation parameters +# combine independently, each applying its own effect without conflict. +COLLATION_PARAMETER_INTERACTION_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "interaction_caselevel_strength1_casefirst_upper", + docs=[ + {"_id": 1, "x": "a"}, + {"_id": 2, "x": "A"}, + {"_id": 3, "x": "\u00e1"}, + {"_id": 4, "x": "b"}, + {"_id": 5, "x": "B"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$sort": {"x": 1}}], + "cursor": {}, + "collation": { + "locale": "en", + "strength": 1, + "caseLevel": True, + "caseFirst": "upper", + }, + }, + expected=[ + {"_id": 2, "x": "A"}, + {"_id": 1, "x": "a"}, + {"_id": 3, "x": "\u00e1"}, + {"_id": 5, "x": "B"}, + {"_id": 4, "x": "b"}, + ], + msg="caseLevel+strength1+caseFirst upper should sort uppercase first and ignore accents", + ), + CommandTestCase( + "interaction_shifted_numeric_ordering", + docs=[ + {"_id": 1, "x": "file 2"}, + {"_id": 2, "x": "file 10"}, + {"_id": 3, "x": "file2"}, + {"_id": 4, "x": "file10"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$sort": {"x": 1}}], + "cursor": {}, + "collation": { + "locale": "en", + "numericOrdering": True, + "alternate": "shifted", + }, + }, + expected=[ + {"_id": 1, "x": "file 2"}, + {"_id": 3, "x": "file2"}, + {"_id": 2, "x": "file 10"}, + {"_id": 4, "x": "file10"}, + ], + msg="shifted+numericOrdering should make space ignorable while preserving numeric sort", + ), + CommandTestCase( + "interaction_shifted_punct_strength1_match", + docs=[ + {"_id": 1, "x": "a_b"}, + {"_id": 2, "x": "a-b"}, + {"_id": 3, "x": "a b"}, + {"_id": 4, "x": "ab"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": "ab"}}], + "cursor": {}, + "collation": { + "locale": "en", + "strength": 1, + "alternate": "shifted", + "maxVariable": "punct", + }, + }, + expected=[ + {"_id": 1, "x": "a_b"}, + {"_id": 2, "x": "a-b"}, + {"_id": 3, "x": "a b"}, + {"_id": 4, "x": "ab"}, + ], + msg="shifted+punct+strength1 should treat all punctuation and whitespace as ignorable", + ), + CommandTestCase( + "interaction_caselevel_shifted_strength1_match", + docs=[ + {"_id": 1, "x": "a-b"}, + {"_id": 2, "x": "ab"}, + {"_id": 3, "x": "Ab"}, + {"_id": 4, "x": "AB"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": "ab"}}], + "cursor": {}, + "collation": { + "locale": "en", + "strength": 1, + "caseLevel": True, + "alternate": "shifted", + }, + }, + expected=[ + {"_id": 1, "x": "a-b"}, + {"_id": 2, "x": "ab"}, + ], + msg="caseLevel+shifted+strength1 should ignore punctuation but distinguish case", + ), + CommandTestCase( + "interaction_backwards_casefirst_upper", + docs=[ + {"_id": 1, "x": "a"}, + {"_id": 2, "x": "A"}, + {"_id": 3, "x": "b"}, + {"_id": 4, "x": "B"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$sort": {"x": 1}}], + "cursor": {}, + "collation": { + "locale": "en", + "strength": 3, + "backwards": True, + "caseFirst": "upper", + }, + }, + expected=[ + {"_id": 2, "x": "A"}, + {"_id": 1, "x": "a"}, + {"_id": 4, "x": "B"}, + {"_id": 3, "x": "b"}, + ], + msg="backwards+caseFirst upper should work independently without conflict", + ), + CommandTestCase( + "interaction_backwards_numeric_ordering", + docs=[ + {"_id": 1, "x": "a2"}, + {"_id": 2, "x": "a10"}, + {"_id": 3, "x": "a1"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$sort": {"x": 1}}], + "cursor": {}, + "collation": { + "locale": "en", + "strength": 3, + "backwards": True, + "numericOrdering": True, + }, + }, + expected=[ + {"_id": 3, "x": "a1"}, + {"_id": 1, "x": "a2"}, + {"_id": 2, "x": "a10"}, + ], + msg="backwards+numericOrdering should work independently without conflict", + ), + CommandTestCase( + "interaction_backwards_shifted", + docs=[ + {"_id": 1, "x": "a-b"}, + {"_id": 2, "x": "ab"}, + {"_id": 3, "x": "a b"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$sort": {"x": 1}}], + "cursor": {}, + "collation": { + "locale": "en", + "strength": 3, + "backwards": True, + "alternate": "shifted", + }, + }, + expected=[ + {"_id": 1, "x": "a-b"}, + {"_id": 2, "x": "ab"}, + {"_id": 3, "x": "a b"}, + ], + msg="backwards+shifted should work independently without conflict", + ), + CommandTestCase( + "interaction_backwards_caselevel_strength2", + docs=[ + {"_id": 1, "x": "cote"}, + {"_id": 2, "x": "c\u00f4te"}, + {"_id": 3, "x": "cot\u00e9"}, + {"_id": 4, "x": "c\u00f4t\u00e9"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$sort": {"x": 1}}], + "cursor": {}, + "collation": { + "locale": "en", + "strength": 2, + "backwards": True, + "caseLevel": True, + }, + }, + expected=[ + {"_id": 1, "x": "cote"}, + {"_id": 2, "x": "c\u00f4te"}, + {"_id": 3, "x": "cot\u00e9"}, + {"_id": 4, "x": "c\u00f4t\u00e9"}, + ], + msg="backwards+caseLevel+strength2 should reverse diacritic comparison direction", + ), +] + +COLLATION_AGGREGATE_RESOLUTION_TESTS: list[CommandTestCase] = ( + COLLATION_INHERITANCE_TESTS + + COLLATION_MULTI_STAGE_UNIFORM_TESTS + + COLLATION_PARAMETER_INTERACTION_TESTS +) + + +@pytest.mark.parametrize("test", pytest_params(COLLATION_AGGREGATE_RESOLUTION_TESTS)) +def test_collation_aggregate_resolution(database_client, collection, test): + """Test collation inheritance and resolution precedence in aggregate.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_sort_stages.py b/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_sort_stages.py new file mode 100644 index 000000000..92936126a --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_sort_stages.py @@ -0,0 +1,348 @@ +"""Tests for collation effects on sort, sortArray, and sortByCount stages.""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Sort Stage Multiple Sorts]: multiple $sort stages in one pipeline +# all use the command-level collation. +COLLATION_SORT_STAGE_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "sort_stage_multiple_sorts_use_collation", + docs=[ + {"_id": 1, "x": "B", "y": "cherry"}, + {"_id": 2, "x": "a", "y": "Banana"}, + {"_id": 3, "x": "A", "y": "apple"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$sort": {"x": 1}}, + {"$limit": 2}, + {"$sort": {"y": 1}}, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 2}, + }, + # Binary sort would give Banana < apple (B=66 < a=97), but + # strength 2 gives apple < Banana (case-insensitive alphabetical). + expected=[ + {"_id": 3, "x": "A", "y": "apple"}, + {"_id": 2, "x": "a", "y": "Banana"}, + ], + msg="multiple $sort stages in one pipeline should all use command-level collation", + ), +] + +# Property [$sortArray Collation]: $sortArray respects command-level collation +# for ordering elements within the array. +COLLATION_SORTARRAY_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "sortarray_strength1_case_insensitive", + docs=[{"_id": 1, "items": ["banana", "Apple", "cherry", "apple", "BANANA"]}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$project": {"sorted": {"$sortArray": {"input": "$items", "sortBy": 1}}}} + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + # Strength 1 treats case variants as equivalent; stable sort preserves + # insertion order for ties. + expected=[{"_id": 1, "sorted": ["Apple", "apple", "banana", "BANANA", "cherry"]}], + msg="$sortArray should use collation strength 1 for case-insensitive ordering", + ), + CommandTestCase( + "sortarray_strength3_case_sensitive", + docs=[{"_id": 1, "items": ["banana", "Apple", "cherry", "apple", "BANANA"]}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$project": {"sorted": {"$sortArray": {"input": "$items", "sortBy": 1}}}} + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 3}, + }, + expected=[{"_id": 1, "sorted": ["apple", "Apple", "banana", "BANANA", "cherry"]}], + msg="$sortArray should use collation strength 3 for case-sensitive ordering", + ), + CommandTestCase( + "sortarray_no_collation_binary", + docs=[{"_id": 1, "items": ["banana", "Apple", "cherry", "apple", "BANANA"]}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$project": {"sorted": {"$sortArray": {"input": "$items", "sortBy": 1}}}} + ], + "cursor": {}, + }, + # Binary comparison: uppercase letters (A=65, B=66) sort before + # lowercase (a=97, b=98). + expected=[{"_id": 1, "sorted": ["Apple", "BANANA", "apple", "banana", "cherry"]}], + msg="$sortArray without collation should use binary sort order", + ), + CommandTestCase( + "sortarray_numeric_ordering", + docs=[{"_id": 1, "items": ["item2", "item10", "item1"]}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$project": {"sorted": {"$sortArray": {"input": "$items", "sortBy": 1}}}} + ], + "cursor": {}, + "collation": {"locale": "en", "numericOrdering": True}, + }, + expected=[{"_id": 1, "sorted": ["item1", "item2", "item10"]}], + msg="$sortArray should respect numericOrdering collation option", + ), + CommandTestCase( + "sortarray_objects_case_insensitive", + docs=[ + { + "_id": 1, + "items": [ + {"name": "a", "v": 1}, + {"name": "B", "v": 2}, + {"name": "c", "v": 3}, + ], + } + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$project": {"sorted": {"$sortArray": {"input": "$items", "sortBy": {"name": 1}}}}} + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[ + { + "_id": 1, + "sorted": [ + {"name": "a", "v": 1}, + {"name": "B", "v": 2}, + {"name": "c", "v": 3}, + ], + } + ], + msg="$sortArray on objects should use collation for field-based sort", + ), + CommandTestCase( + "sortarray_objects_no_collation_binary", + docs=[ + { + "_id": 1, + "items": [ + {"name": "a", "v": 1}, + {"name": "B", "v": 2}, + {"name": "c", "v": 3}, + ], + } + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$project": {"sorted": {"$sortArray": {"input": "$items", "sortBy": {"name": 1}}}}} + ], + "cursor": {}, + }, + expected=[ + { + "_id": 1, + "sorted": [ + {"name": "B", "v": 2}, + {"name": "a", "v": 1}, + {"name": "c", "v": 3}, + ], + } + ], + msg="$sortArray on objects without collation should use binary field sort", + ), +] + +# Property [sortByCount Grouping]: collation affects grouping in $sortByCount +# so that collation-equal strings collapse into one group with the +# first-encountered value as the key. +COLLATION_SORT_BY_COUNT_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "sort_by_count_strength1_collapse", + docs=[ + {"_id": 1, "x": "cafe"}, + {"_id": 2, "x": "Cafe"}, + {"_id": 3, "x": "caf\u00e9"}, + {"_id": 4, "x": "banana"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$sort": {"_id": 1}}, + {"$sortByCount": "$x"}, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[ + {"_id": "cafe", "count": 3}, + {"_id": "banana", "count": 1}, + ], + msg="$sortByCount with strength 1 should collapse all case and accent variants", + ), + CommandTestCase( + "sort_by_count_strength2_accent_distinct", + docs=[ + {"_id": 1, "x": "cafe"}, + {"_id": 2, "x": "Cafe"}, + {"_id": 3, "x": "caf\u00e9"}, + {"_id": 4, "x": "banana"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$sort": {"_id": 1}}, + {"$sortByCount": "$x"}, + {"$sort": {"count": -1, "_id": 1}}, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[ + {"_id": "cafe", "count": 2}, + {"_id": "banana", "count": 1}, + {"_id": "caf\u00e9", "count": 1}, + ], + msg="$sortByCount with strength 2 should collapse case but keep accents distinct", + ), + CommandTestCase( + "sort_by_count_strength3_all_distinct", + docs=[ + {"_id": 1, "x": "cafe"}, + {"_id": 2, "x": "cafe"}, + {"_id": 3, "x": "cafe"}, + {"_id": 4, "x": "Cafe"}, + {"_id": 5, "x": "Cafe"}, + {"_id": 6, "x": "caf\u00e9"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$sort": {"_id": 1}}, + {"$sortByCount": "$x"}, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 3}, + }, + expected=[ + {"_id": "cafe", "count": 3}, + {"_id": "Cafe", "count": 2}, + {"_id": "caf\u00e9", "count": 1}, + ], + msg="$sortByCount with strength 3 should treat all variants as distinct groups", + ), +] + +# Property [Multi-Field Sort with Collation]: collation applies to all string +# fields in a compound sort key, not just the primary sort field. +COLLATION_MULTI_FIELD_SORT_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "sort_multi_field_collation_both_keys", + docs=[ + {"_id": 1, "x": "a", "y": "B"}, + {"_id": 2, "x": "A", "y": "b"}, + {"_id": 3, "x": "a", "y": "a"}, + {"_id": 4, "x": "A", "y": "A"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$sort": {"x": 1, "y": 1}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 2}, + }, + # Strength 2: x values all equal (case-insensitive), so sort falls + # through to y. y: a/A (equal) < b/B (equal). Stable sort preserves + # insertion order within ties. + expected=[ + {"_id": 3, "x": "a", "y": "a"}, + {"_id": 4, "x": "A", "y": "A"}, + {"_id": 1, "x": "a", "y": "B"}, + {"_id": 2, "x": "A", "y": "b"}, + ], + msg="multi-field sort with collation should apply collation to both sort keys", + ), + CommandTestCase( + "sort_multi_field_no_collation_binary", + docs=[ + {"_id": 1, "x": "a", "y": "B"}, + {"_id": 2, "x": "A", "y": "b"}, + {"_id": 3, "x": "a", "y": "a"}, + {"_id": 4, "x": "A", "y": "A"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$sort": {"x": 1, "y": 1}}], + "cursor": {}, + }, + # Binary: 'A'(65) < 'a'(97). Within x='A': 'A'(65) < 'b'(98). + # Within x='a': 'B'(66) < 'a'(97). + expected=[ + {"_id": 4, "x": "A", "y": "A"}, + {"_id": 2, "x": "A", "y": "b"}, + {"_id": 1, "x": "a", "y": "B"}, + {"_id": 3, "x": "a", "y": "a"}, + ], + msg="multi-field sort without collation should use binary comparison on both keys", + ), + CommandTestCase( + "sort_multi_field_strength3_distinguishes", + docs=[ + {"_id": 1, "x": "a", "y": "B"}, + {"_id": 2, "x": "A", "y": "b"}, + {"_id": 3, "x": "a", "y": "a"}, + {"_id": 4, "x": "A", "y": "A"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$sort": {"x": 1, "y": 1}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 3}, + }, + # Strength 3: x: 'a' < 'A'. Within x='a': y: 'a' < 'B'. + # Within x='A': y: 'A' < 'b'. + expected=[ + {"_id": 3, "x": "a", "y": "a"}, + {"_id": 1, "x": "a", "y": "B"}, + {"_id": 4, "x": "A", "y": "A"}, + {"_id": 2, "x": "A", "y": "b"}, + ], + msg="multi-field sort with strength 3 should distinguish case in both keys", + ), +] + +COLLATION_AGGREGATE_SORT_STAGES_TESTS: list[CommandTestCase] = ( + COLLATION_SORT_STAGE_TESTS + + COLLATION_SORTARRAY_TESTS + + COLLATION_SORT_BY_COUNT_TESTS + + COLLATION_MULTI_FIELD_SORT_TESTS +) + + +@pytest.mark.parametrize("test", pytest_params(COLLATION_AGGREGATE_SORT_STAGES_TESTS)) +def test_collation_aggregate_sort_stages(database_client, collection, test): + """Test collation effects on $sort, $sortArray, and $sortByCount stages.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_substages.py b/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_substages.py new file mode 100644 index 000000000..b3433db06 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_substages.py @@ -0,0 +1,296 @@ +"""Tests for collation effects on facet, redact, and text search stages.""" + +from __future__ import annotations + +import pytest +from pymongo import IndexModel + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Facet Stage Collation]: collation applies within $facet +# sub-pipelines. +COLLATION_FACET_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "facet_match_strength1", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "APPLE"}, + {"_id": 4, "x": "banana"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$facet": {"matched": [{"$match": {"x": "apple"}}]}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[ + { + "matched": [ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "APPLE"}, + ] + } + ], + msg="$facet sub-pipeline $match should use command-level collation", + ), + CommandTestCase( + "facet_sort_strength1", + docs=[ + {"_id": 1, "x": "banana"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "apple"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$facet": {"sorted": [{"$sort": {"x": 1}}]}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + # Strength 1 treats case variants as equivalent; stable sort preserves + # insertion order for ties. + expected=[ + { + "sorted": [ + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "apple"}, + {"_id": 1, "x": "banana"}, + ] + } + ], + msg="$facet sub-pipeline $sort should use command-level collation", + ), + CommandTestCase( + "facet_multiple_pipelines", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + { + "$facet": { + "matched": [{"$match": {"x": "apple"}}], + "sorted": [{"$sort": {"x": 1}}], + } + } + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[ + { + "matched": [ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + ], + "sorted": [ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + ], + } + ], + msg="$facet with multiple sub-pipelines should apply collation to all", + ), + CommandTestCase( + "facet_group_case_insensitive", + docs=[ + {"_id": 1, "x": "apple", "v": 1}, + {"_id": 2, "x": "Apple", "v": 2}, + {"_id": 3, "x": "banana", "v": 3}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + { + "$facet": { + "grouped": [ + {"$sort": {"_id": 1}}, + {"$group": {"_id": "$x", "total": {"$sum": "$v"}}}, + {"$sort": {"_id": 1}}, + ] + } + } + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[ + { + "grouped": [ + {"_id": "apple", "total": 3}, + {"_id": "banana", "total": 3}, + ] + } + ], + msg="$facet sub-pipeline $group should use collation for key deduplication", + ), +] + +# Property [Redact Stage Collation]: $redact expressions that involve string +# comparisons ($cmp, $eq) are affected by command-level collation. +COLLATION_REDACT_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "redact_eq_strength1_case_insensitive", + docs=[ + {"_id": 1, "level": "Public", "data": "visible"}, + {"_id": 2, "level": "private", "data": "hidden"}, + {"_id": 3, "level": "PUBLIC", "data": "also visible"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + { + "$redact": { + "$cond": { + "if": {"$eq": ["$level", "public"]}, + "then": "$$KEEP", + "else": "$$PRUNE", + } + } + } + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[ + {"_id": 1, "level": "Public", "data": "visible"}, + {"_id": 3, "level": "PUBLIC", "data": "also visible"}, + ], + msg="$redact with $eq should use collation for case-insensitive comparison", + ), + CommandTestCase( + "redact_cmp_strength1_case_insensitive", + docs=[ + {"_id": 1, "level": "Public", "data": "visible"}, + {"_id": 2, "level": "private", "data": "hidden"}, + {"_id": 3, "level": "PUBLIC", "data": "also visible"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + { + "$redact": { + "$cond": { + "if": {"$eq": [{"$cmp": ["$level", "public"]}, 0]}, + "then": "$$KEEP", + "else": "$$PRUNE", + } + } + } + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[ + {"_id": 1, "level": "Public", "data": "visible"}, + {"_id": 3, "level": "PUBLIC", "data": "also visible"}, + ], + msg="$redact with $cmp should use collation for case-insensitive comparison", + ), +] + +# Property [Text Search Ignores Collation]: $text search with command-level +# collation does not error but collation is silently ignored, and $text +# $caseSensitive and $diacriticSensitive options are not overridden by +# collation. +COLLATION_TEXT_SEARCH_IGNORED_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "text_collation_silently_ignored", + indexes=[IndexModel([("x", "text")])], + docs=[ + {"_id": 1, "x": "cafe"}, + {"_id": 2, "x": "Cafe"}, + {"_id": 3, "x": "CAFE"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$match": {"$text": {"$search": "cafe"}}}, + {"$sort": {"_id": 1}}, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 3}, + }, + expected=[ + {"_id": 1, "x": "cafe"}, + {"_id": 2, "x": "Cafe"}, + {"_id": 3, "x": "CAFE"}, + ], + msg="$text search should ignore collation and use text index semantics", + ), + CommandTestCase( + "text_case_sensitive_not_overridden", + indexes=[IndexModel([("x", "text")])], + docs=[ + {"_id": 1, "x": "cafe"}, + {"_id": 2, "x": "Cafe"}, + {"_id": 3, "x": "caf\u00e9"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$match": {"$text": {"$search": "cafe", "$caseSensitive": True}}}, + {"$sort": {"_id": 1}}, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[ + {"_id": 1, "x": "cafe"}, + {"_id": 3, "x": "caf\u00e9"}, + ], + msg="$text $caseSensitive should not be overridden by collation strength 1", + ), + CommandTestCase( + "text_diacritic_sensitive_not_overridden", + indexes=[IndexModel([("x", "text")])], + docs=[ + {"_id": 1, "x": "cafe"}, + {"_id": 2, "x": "Cafe"}, + {"_id": 3, "x": "caf\u00e9"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$match": {"$text": {"$search": "cafe", "$diacriticSensitive": True}}}, + {"$sort": {"_id": 1}}, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[ + {"_id": 1, "x": "cafe"}, + {"_id": 2, "x": "Cafe"}, + ], + msg="$text $diacriticSensitive should not be overridden by collation strength 1", + ), +] + +COLLATION_AGGREGATE_SUBSTAGES_TESTS: list[CommandTestCase] = ( + COLLATION_FACET_TESTS + COLLATION_REDACT_TESTS + COLLATION_TEXT_SEARCH_IGNORED_TESTS +) + + +@pytest.mark.parametrize("test", pytest_params(COLLATION_AGGREGATE_SUBSTAGES_TESTS)) +def test_collation_aggregate_substages(database_client, collection, test): + """Test collation effects on sub-pipelines in $facet and $lookup.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_unionwith.py b/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_unionwith.py new file mode 100644 index 000000000..699d019c4 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_unionwith.py @@ -0,0 +1,154 @@ +"""Tests for collation propagation into $unionWith sub-pipelines.""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.target_collection import SiblingCollection + +# Property [UnionWith Collation Propagation]: command-level collation propagates +# into $unionWith sub-pipelines, affecting $match and $sort within them. +COLLATION_UNIONWITH_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "unionwith_match_case_insensitive", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "banana"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$match": {"x": "apple"}}, + { + "$unionWith": { + "coll": ctx.collection, + "pipeline": [{"$match": {"x": "BANANA"}}], + } + }, + {"$sort": {"_id": 1}}, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "banana"}, + ], + msg="$unionWith sub-pipeline $match should use command-level collation", + ), + CommandTestCase( + "unionwith_no_collation_binary", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "banana"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$match": {"x": "apple"}}, + { + "$unionWith": { + "coll": ctx.collection, + "pipeline": [{"$match": {"x": "BANANA"}}], + } + }, + {"$sort": {"_id": 1}}, + ], + "cursor": {}, + }, + expected=[{"_id": 1, "x": "apple"}], + msg="$unionWith without collation should use binary comparison", + ), + CommandTestCase( + "unionwith_sort_case_insensitive", + docs=[ + {"_id": 1, "x": "banana"}, + {"_id": 2, "x": "Apple"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + { + "$unionWith": { + "coll": ctx.collection, + "pipeline": [{"$match": {"_id": 2}}], + } + }, + {"$sort": {"x": 1}}, + {"$project": {"_id": 1, "x": 1}}, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[ + {"_id": 2, "x": "Apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 1, "x": "banana"}, + ], + msg="$unionWith results should sort under command-level collation", + ), +] + +# Property [UnionWith Overrides Foreign Collection Collation]: command-level +# collation overrides the unioned collection's default collation for sub-pipeline +# operations. +COLLATION_UNIONWITH_OVERRIDE_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "unionwith_overrides_foreign_default", + docs=[{"_id": 1, "x": "apple"}], + siblings=[ + SiblingCollection( + suffix="_other", + collation={"locale": "fr", "strength": 3}, + docs=[ + {"_id": 10, "x": "Apple"}, + {"_id": 11, "x": "banana"}, + ], + ), + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + { + "$unionWith": { + "coll": ctx.collection + "_other", + "pipeline": [{"$match": {"x": "apple"}}], + } + }, + {"$sort": {"_id": 1}}, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[ + {"_id": 1, "x": "apple"}, + {"_id": 10, "x": "Apple"}, + ], + msg="command collation should override unioned collection's default collation", + ), +] + +COLLATION_UNIONWITH_ALL_TESTS: list[CommandTestCase] = ( + COLLATION_UNIONWITH_TESTS + COLLATION_UNIONWITH_OVERRIDE_TESTS +) + + +@pytest.mark.parametrize("test", pytest_params(COLLATION_UNIONWITH_ALL_TESTS)) +def test_collation_aggregate_unionwith(database_client, collection, test): + """Test collation propagation into $unionWith sub-pipelines.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_views.py b/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_views.py new file mode 100644 index 000000000..e63ec5ab0 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_views.py @@ -0,0 +1,314 @@ +"""Tests for collation constraints on views and cross-view stage behavior.""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.collation.utils.collation_view_mismatch import ( + SECONDARY, + ViewMismatchTestCase, +) +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.error_codes import OPTION_NOT_SUPPORTED_ON_VIEW_ERROR +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.target_collection import ViewCollection, ViewOnCustomCollection + +# Property [View Collation Constraints]: aggregating on a view with an explicit +# collation that differs from the view's default produces +# OPTION_NOT_SUPPORTED_ON_VIEW_ERROR; omitting collation uses the view's +# default; a view created without collation defaults to simple comparison. +COLLATION_VIEW_CONSTRAINT_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "view_no_explicit_uses_default", + target_collection=ViewCollection(options={"collation": {"locale": "en", "strength": 2}}), + docs=[{"_id": 1, "x": "apple"}, {"_id": 2, "x": "Apple"}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": "apple"}}], + "cursor": {}, + }, + expected=[{"_id": 1, "x": "apple"}, {"_id": 2, "x": "Apple"}], + msg="aggregate on view with no explicit collation should use view default", + ), + CommandTestCase( + "view_matching_collation_succeeds", + target_collection=ViewCollection(options={"collation": {"locale": "en", "strength": 2}}), + docs=[{"_id": 1, "x": "apple"}, {"_id": 2, "x": "Apple"}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": "apple"}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[{"_id": 1, "x": "apple"}, {"_id": 2, "x": "Apple"}], + msg="aggregate on view with matching collation should succeed", + ), + CommandTestCase( + "view_different_locale_rejected", + target_collection=ViewCollection(options={"collation": {"locale": "en", "strength": 2}}), + docs=[{"_id": 1, "x": "apple"}, {"_id": 2, "x": "Apple"}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": "apple"}}], + "cursor": {}, + "collation": {"locale": "fr", "strength": 2}, + }, + error_code=OPTION_NOT_SUPPORTED_ON_VIEW_ERROR, + msg="aggregate on view with different locale should be rejected", + ), + CommandTestCase( + "view_same_locale_different_strength_rejected", + target_collection=ViewCollection(options={"collation": {"locale": "en", "strength": 2}}), + docs=[{"_id": 1, "x": "apple"}, {"_id": 2, "x": "Apple"}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": "apple"}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + error_code=OPTION_NOT_SUPPORTED_ON_VIEW_ERROR, + msg="aggregate on view with same locale but different strength should be rejected", + ), + CommandTestCase( + "view_no_collation_defaults_to_simple", + target_collection=ViewCollection(), + docs=[{"_id": 1, "x": "apple"}, {"_id": 2, "x": "Apple"}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": "apple"}}], + "cursor": {}, + }, + expected=[{"_id": 1, "x": "apple"}], + msg="view without collation should default to simple binary comparison", + ), + CommandTestCase( + "view_no_collation_does_not_inherit_source", + target_collection=ViewOnCustomCollection( + source_options={"collation": {"locale": "en", "strength": 1}} + ), + docs=[{"_id": 1, "x": "apple"}, {"_id": 2, "x": "Apple"}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": "apple"}}], + "cursor": {}, + }, + expected=[{"_id": 1, "x": "apple"}], + msg="view without collation should not inherit source collection's collation", + ), + CommandTestCase( + "view_no_collation_explicit_simple_succeeds", + target_collection=ViewCollection(), + docs=[{"_id": 1, "x": "apple"}, {"_id": 2, "x": "Apple"}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": "apple"}}], + "cursor": {}, + "collation": {"locale": "simple"}, + }, + expected=[{"_id": 1, "x": "apple"}], + msg="aggregate on view without collation with explicit simple should succeed", + ), + CommandTestCase( + "view_no_collation_explicit_nonsimple_rejected", + target_collection=ViewCollection(), + docs=[{"_id": 1, "x": "apple"}, {"_id": 2, "x": "Apple"}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": "apple"}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 2}, + }, + error_code=OPTION_NOT_SUPPORTED_ON_VIEW_ERROR, + msg="aggregate on view without collation with non-simple collation should be rejected", + ), +] + + +@pytest.mark.parametrize("test", pytest_params(COLLATION_VIEW_CONSTRAINT_TESTS)) +def test_collation_aggregate_views(database_client, collection, test): + """Test collation constraints when aggregating on views.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + ) + + +# Property [GraphLookup View Collation Mismatch]: $graphLookup from a +# collection or view to a view with mismatched collation produces +# OPTION_NOT_SUPPORTED_ON_VIEW_ERROR, while matching collation succeeds. +COLLATION_GRAPHLOOKUP_VIEW_TESTS: list[ViewMismatchTestCase] = [ + ViewMismatchTestCase( + "graphlookup_collection_to_view_mismatched", + docs=[{"_id": 1, "start": "a"}], + secondary_docs=[ + {"_id": 1, "val": "a", "next": "b"}, + {"_id": 2, "val": "b", "next": "c"}, + ], + pipeline=[ + { + "$graphLookup": { + "from": SECONDARY, + "startWith": "$start", + "connectFromField": "next", + "connectToField": "val", + "as": "connected", + } + }, + ], + secondary_view_collation={"locale": "fr", "strength": 2}, + command_collation={"locale": "en", "strength": 1}, + ignore_order_in=["connected"], + error_code=OPTION_NOT_SUPPORTED_ON_VIEW_ERROR, + msg="$graphLookup from collection to view with mismatched collation should error", + ), + ViewMismatchTestCase( + "graphlookup_view_to_view_different_collation", + docs=[{"_id": 1, "start": "a"}], + secondary_docs=[ + {"_id": 1, "val": "a", "next": "b"}, + {"_id": 2, "val": "b", "next": "c"}, + ], + pipeline=[ + { + "$graphLookup": { + "from": SECONDARY, + "startWith": "$start", + "connectFromField": "next", + "connectToField": "val", + "as": "connected", + } + }, + ], + secondary_view_collation={"locale": "fr", "strength": 2}, + source_view_collation={"locale": "en", "strength": 1}, + ignore_order_in=["connected"], + error_code=OPTION_NOT_SUPPORTED_ON_VIEW_ERROR, + msg="$graphLookup from view to view with different collation should error", + ), + ViewMismatchTestCase( + "graphlookup_collection_to_view_matching", + docs=[{"_id": 1, "start": "a"}], + secondary_docs=[ + {"_id": 1, "val": "a", "next": "b"}, + {"_id": 2, "val": "b", "next": "c"}, + ], + pipeline=[ + { + "$graphLookup": { + "from": SECONDARY, + "startWith": "$start", + "connectFromField": "next", + "connectToField": "val", + "as": "connected", + } + }, + ], + secondary_view_collation={"locale": "en", "strength": 1}, + command_collation={"locale": "en", "strength": 1}, + ignore_order_in=["connected"], + expected=[ + { + "_id": 1, + "start": "a", + "connected": [ + {"_id": 2, "val": "b", "next": "c"}, + {"_id": 1, "val": "a", "next": "b"}, + ], + } + ], + msg="$graphLookup from collection to view with matching collation should succeed", + ), +] + +# Property [UnionWith View Collation Mismatch]: $unionWith from a collection to +# a view with mismatched or absent collation produces +# OPTION_NOT_SUPPORTED_ON_VIEW_ERROR, while matching collation or unionWith from +# a view to a base collection succeeds. +COLLATION_UNIONWITH_TESTS: list[ViewMismatchTestCase] = [ + ViewMismatchTestCase( + "unionwith_collection_to_view_mismatched", + docs=[{"_id": 1, "x": "hello"}, {"_id": 2, "x": "world"}], + secondary_docs=[{"_id": 3, "x": "foo"}, {"_id": 4, "x": "bar"}], + pipeline=[{"$unionWith": SECONDARY}], + secondary_view_collation={"locale": "fr", "strength": 2}, + command_collation={"locale": "en", "strength": 1}, + error_code=OPTION_NOT_SUPPORTED_ON_VIEW_ERROR, + msg="$unionWith from collection to view with mismatched collation should error", + ), + ViewMismatchTestCase( + "unionwith_subpipeline_to_view_mismatched", + docs=[{"_id": 1, "x": "hello"}, {"_id": 2, "x": "world"}], + secondary_docs=[{"_id": 3, "x": "foo"}, {"_id": 4, "x": "bar"}], + pipeline=[{"$unionWith": {"coll": SECONDARY, "pipeline": []}}], + secondary_view_collation={"locale": "fr", "strength": 2}, + command_collation={"locale": "en", "strength": 1}, + error_code=OPTION_NOT_SUPPORTED_ON_VIEW_ERROR, + msg="$unionWith sub-pipeline form to view with mismatched collation should error", + ), + ViewMismatchTestCase( + "unionwith_collection_to_view_matching", + docs=[{"_id": 1, "x": "hello"}, {"_id": 2, "x": "world"}], + secondary_docs=[{"_id": 3, "x": "foo"}, {"_id": 4, "x": "bar"}], + pipeline=[{"$unionWith": SECONDARY}], + secondary_view_collation={"locale": "en", "strength": 1}, + command_collation={"locale": "en", "strength": 1}, + expected=[ + {"_id": 1, "x": "hello"}, + {"_id": 2, "x": "world"}, + {"_id": 3, "x": "foo"}, + {"_id": 4, "x": "bar"}, + ], + msg="$unionWith from collection to view with matching collation should succeed", + ), + ViewMismatchTestCase( + "unionwith_no_collation_to_view", + docs=[{"_id": 1, "x": "hello"}, {"_id": 2, "x": "world"}], + secondary_docs=[{"_id": 3, "x": "foo"}, {"_id": 4, "x": "bar"}], + pipeline=[{"$unionWith": SECONDARY}], + secondary_view_collation={"locale": "fr", "strength": 2}, + error_code=OPTION_NOT_SUPPORTED_ON_VIEW_ERROR, + msg="$unionWith with no explicit collation to view should error", + ), + ViewMismatchTestCase( + "unionwith_view_to_base_collection", + docs=[{"_id": 1, "x": "hello"}, {"_id": 2, "x": "world"}], + secondary_docs=[{"_id": 3, "x": "foo"}, {"_id": 4, "x": "bar"}], + pipeline=[{"$unionWith": SECONDARY}], + source_view_collation={"locale": "en", "strength": 1}, + expected=[ + {"_id": 1, "x": "hello"}, + {"_id": 2, "x": "world"}, + {"_id": 3, "x": "foo"}, + {"_id": 4, "x": "bar"}, + ], + msg="$unionWith from view to base collection should succeed", + ), +] + +COLLATION_VIEW_MISMATCH_TESTS: list[ViewMismatchTestCase] = ( + COLLATION_GRAPHLOOKUP_VIEW_TESTS + COLLATION_UNIONWITH_TESTS +) + + +@pytest.mark.parametrize("test_case", pytest_params(COLLATION_VIEW_MISMATCH_TESTS)) +def test_collation_aggregate_views_mismatch(database_client, collection, test_case): + """Test collation view-mismatch behavior across stages.""" + collection = test_case.prepare(database_client, collection) + result = execute_command(collection, test_case.build_command(collection)) + assertResult( + result, + expected=test_case.expected, + error_code=test_case.error_code, + msg=test_case.msg, + ignore_order_in=test_case.ignore_order_in, + ) diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_window.py b/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_window.py new file mode 100644 index 000000000..7f3b40dbc --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_window.py @@ -0,0 +1,613 @@ +"""Tests for collation effects on setWindowFields, fill, and densify stages.""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [SetWindowFields Partitioning and Sorting]: collation affects +# $setWindowFields partitionBy grouping so that collation-equal strings are +# placed in the same partition, and affects sortBy ordering within partitions. +COLLATION_SET_WINDOW_FIELDS_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "swf_partition_strength1_case_variants_same_partition", + docs=[ + {"_id": 1, "category": "apple", "val": 10}, + {"_id": 2, "category": "Apple", "val": 20}, + {"_id": 3, "category": "APPLE", "val": 30}, + {"_id": 4, "category": "banana", "val": 40}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + { + "$setWindowFields": { + "partitionBy": "$category", + "sortBy": {"_id": 1}, + "output": {"rank": {"$rank": {}}}, + } + }, + {"$sort": {"_id": 1}}, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[ + {"_id": 1, "category": "apple", "val": 10, "rank": 1}, + {"_id": 2, "category": "Apple", "val": 20, "rank": 2}, + {"_id": 3, "category": "APPLE", "val": 30, "rank": 3}, + {"_id": 4, "category": "banana", "val": 40, "rank": 1}, + ], + msg="$setWindowFields with strength 1 should place case variants in the same partition", + ), + CommandTestCase( + "swf_partition_no_collation_case_variants_separate", + docs=[ + {"_id": 1, "category": "apple", "val": 10}, + {"_id": 2, "category": "Apple", "val": 20}, + {"_id": 3, "category": "APPLE", "val": 30}, + {"_id": 4, "category": "banana", "val": 40}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + { + "$setWindowFields": { + "partitionBy": "$category", + "sortBy": {"_id": 1}, + "output": {"rank": {"$rank": {}}}, + } + }, + {"$sort": {"_id": 1}}, + ], + "cursor": {}, + }, + # Binary comparison: each case variant is its own partition. + expected=[ + {"_id": 1, "category": "apple", "val": 10, "rank": 1}, + {"_id": 2, "category": "Apple", "val": 20, "rank": 1}, + {"_id": 3, "category": "APPLE", "val": 30, "rank": 1}, + {"_id": 4, "category": "banana", "val": 40, "rank": 1}, + ], + msg=( + "$setWindowFields without collation should treat case variants" + " as separate partitions" + ), + ), + CommandTestCase( + "swf_sortby_collation_aware_ordering", + docs=[ + {"_id": 1, "group": "x", "name": "banana"}, + {"_id": 2, "group": "x", "name": "apple"}, + {"_id": 3, "group": "x", "name": "Cherry"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + { + "$setWindowFields": { + "partitionBy": "$group", + "sortBy": {"name": 1}, + "output": {"rank": {"$rank": {}}}, + } + }, + {"$sort": {"_id": 1}}, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + # Collation-aware: apple < banana < Cherry. + expected=[ + {"_id": 1, "group": "x", "name": "banana", "rank": 2}, + {"_id": 2, "group": "x", "name": "apple", "rank": 1}, + {"_id": 3, "group": "x", "name": "Cherry", "rank": 3}, + ], + msg="$setWindowFields sortBy should use collation-aware ordering within partitions", + ), + CommandTestCase( + "swf_sortby_no_collation_binary_ordering", + docs=[ + {"_id": 1, "group": "x", "name": "banana"}, + {"_id": 2, "group": "x", "name": "apple"}, + {"_id": 3, "group": "x", "name": "Cherry"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + { + "$setWindowFields": { + "partitionBy": "$group", + "sortBy": {"name": 1}, + "output": {"rank": {"$rank": {}}}, + } + }, + {"$sort": {"_id": 1}}, + ], + "cursor": {}, + }, + # Binary comparison: 'C' (67) < 'a' (97) < 'b' (98). + expected=[ + {"_id": 1, "group": "x", "name": "banana", "rank": 3}, + {"_id": 2, "group": "x", "name": "apple", "rank": 2}, + {"_id": 3, "group": "x", "name": "Cherry", "rank": 1}, + ], + msg="$setWindowFields sortBy without collation should use binary ordering", + ), +] + +# Property [DenseRank Ordering]: $denseRank respects collation for sort +# ordering and tie detection, producing consecutive ranks without gaps when +# collation-equal values create ties. +COLLATION_DENSE_RANK_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "swf_denserank_collation_ties_no_gaps", + docs=[ + {"_id": 1, "group": "x", "name": "apple"}, + {"_id": 2, "group": "x", "name": "Apple"}, + {"_id": 3, "group": "x", "name": "banana"}, + {"_id": 4, "group": "x", "name": "Cherry"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + { + "$setWindowFields": { + "partitionBy": "$group", + "sortBy": {"name": 1}, + "output": {"dr": {"$denseRank": {}}}, + } + }, + {"$sort": {"_id": 1}}, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + # Collation strength 1: apple == Apple (tie at rank 1), banana=2, Cherry=3. + # $denseRank produces consecutive ranks without gaps. + expected=[ + {"_id": 1, "group": "x", "name": "apple", "dr": 1}, + {"_id": 2, "group": "x", "name": "Apple", "dr": 1}, + {"_id": 3, "group": "x", "name": "banana", "dr": 2}, + {"_id": 4, "group": "x", "name": "Cherry", "dr": 3}, + ], + msg="$denseRank with collation should produce consecutive ranks for collation-equal ties", + ), + CommandTestCase( + "swf_denserank_no_collation_binary_ordering", + docs=[ + {"_id": 1, "group": "x", "name": "apple"}, + {"_id": 2, "group": "x", "name": "Apple"}, + {"_id": 3, "group": "x", "name": "banana"}, + {"_id": 4, "group": "x", "name": "Cherry"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + { + "$setWindowFields": { + "partitionBy": "$group", + "sortBy": {"name": 1}, + "output": {"dr": {"$denseRank": {}}}, + } + }, + {"$sort": {"_id": 1}}, + ], + "cursor": {}, + }, + # Binary: Apple(1) < Cherry(2) < apple(3) < banana(4), no ties. + expected=[ + {"_id": 1, "group": "x", "name": "apple", "dr": 3}, + {"_id": 2, "group": "x", "name": "Apple", "dr": 1}, + {"_id": 3, "group": "x", "name": "banana", "dr": 4}, + {"_id": 4, "group": "x", "name": "Cherry", "dr": 2}, + ], + msg="$denseRank without collation should use binary ordering with no ties", + ), + CommandTestCase( + "swf_denserank_partition_collation_groups_variants", + docs=[ + {"_id": 1, "category": "apple", "val": 10}, + {"_id": 2, "category": "Apple", "val": 20}, + {"_id": 3, "category": "APPLE", "val": 30}, + {"_id": 4, "category": "banana", "val": 40}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + { + "$setWindowFields": { + "partitionBy": "$category", + "sortBy": {"_id": 1}, + "output": {"dr": {"$denseRank": {}}}, + } + }, + {"$sort": {"_id": 1}}, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + # Strength 1 merges case variants into one partition; _id ordering + # produces no ties so denseRank is sequential. + expected=[ + {"_id": 1, "category": "apple", "val": 10, "dr": 1}, + {"_id": 2, "category": "Apple", "val": 20, "dr": 2}, + {"_id": 3, "category": "APPLE", "val": 30, "dr": 3}, + {"_id": 4, "category": "banana", "val": 40, "dr": 1}, + ], + msg="$denseRank with collation should place case variants in the same partition", + ), + CommandTestCase( + "swf_denserank_partition_no_collation_separate", + docs=[ + {"_id": 1, "category": "apple", "val": 10}, + {"_id": 2, "category": "Apple", "val": 20}, + {"_id": 3, "category": "APPLE", "val": 30}, + {"_id": 4, "category": "banana", "val": 40}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + { + "$setWindowFields": { + "partitionBy": "$category", + "sortBy": {"_id": 1}, + "output": {"dr": {"$denseRank": {}}}, + } + }, + {"$sort": {"_id": 1}}, + ], + "cursor": {}, + }, + # Binary: each case variant is its own single-doc partition. + expected=[ + {"_id": 1, "category": "apple", "val": 10, "dr": 1}, + {"_id": 2, "category": "Apple", "val": 20, "dr": 1}, + {"_id": 3, "category": "APPLE", "val": 30, "dr": 1}, + {"_id": 4, "category": "banana", "val": 40, "dr": 1}, + ], + msg="$denseRank without collation should treat case variants as separate partitions", + ), +] + +# Property [Fill and Densify Partitioning]: $fill and $densify +# partitionByFields respect collation so that collation-equal strings are +# merged into one partition. +COLLATION_FILL_DENSIFY_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "fill_partition_strength1_case_variants_merged", + docs=[ + {"_id": 1, "category": "apple", "val": 10}, + {"_id": 2, "category": "Apple", "val": None}, + {"_id": 3, "category": "APPLE", "val": 30}, + {"_id": 4, "category": "banana", "val": None}, + {"_id": 5, "category": "banana", "val": 50}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$sort": {"_id": 1}}, + { + "$fill": { + "partitionByFields": ["category"], + "output": {"val": {"method": "locf"}}, + } + }, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + # Strength 1 merges all case variants into one partition, so LOCF + # carries _id:1 val=10 forward to _id:2. + expected=[ + {"_id": 1, "category": "apple", "val": 10}, + {"_id": 2, "category": "Apple", "val": 10}, + {"_id": 3, "category": "APPLE", "val": 30}, + {"_id": 4, "category": "banana", "val": None}, + {"_id": 5, "category": "banana", "val": 50}, + ], + msg="$fill with strength 1 should merge case variants into one partition for LOCF", + ), + CommandTestCase( + "fill_partition_no_collation_case_variants_separate", + docs=[ + {"_id": 1, "category": "apple", "val": 10}, + {"_id": 2, "category": "Apple", "val": None}, + {"_id": 3, "category": "banana", "val": None}, + {"_id": 4, "category": "banana", "val": 50}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$sort": {"_id": 1}}, + { + "$fill": { + "partitionByFields": ["category"], + "output": {"val": {"method": "locf"}}, + } + }, + ], + "cursor": {}, + }, + # Binary comparison: "Apple" and "apple" are separate partitions, so + # _id:2 has no prior value in its partition and remains None. + expected=[ + {"_id": 2, "category": "Apple", "val": None}, + {"_id": 1, "category": "apple", "val": 10}, + {"_id": 3, "category": "banana", "val": None}, + {"_id": 4, "category": "banana", "val": 50}, + ], + msg="$fill without collation should treat case variants as separate partitions", + ), + CommandTestCase( + "densify_partition_strength1_case_variants_merged", + docs=[ + {"_id": 1, "category": "apple", "val": 0}, + {"_id": 2, "category": "Apple", "val": 10}, + {"_id": 3, "category": "banana", "val": 0}, + {"_id": 4, "category": "banana", "val": 10}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + { + "$densify": { + "field": "val", + "partitionByFields": ["category"], + "range": {"step": 5, "bounds": "partition"}, + } + } + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + # Strength 1 merges apple and Apple into one partition (val 0, 10), + # so densify fills at val=5 within the merged partition. + expected=[ + {"_id": 1, "category": "apple", "val": 0}, + {"category": "Apple", "val": 5}, + {"_id": 2, "category": "Apple", "val": 10}, + {"_id": 3, "category": "banana", "val": 0}, + {"category": "banana", "val": 5}, + {"_id": 4, "category": "banana", "val": 10}, + ], + msg="$densify with strength 1 should merge case variants into one partition", + ), + CommandTestCase( + "densify_partition_no_collation_case_variants_separate", + docs=[ + {"_id": 1, "category": "apple", "val": 0}, + {"_id": 2, "category": "Apple", "val": 10}, + {"_id": 3, "category": "banana", "val": 0}, + {"_id": 4, "category": "banana", "val": 10}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + { + "$densify": { + "field": "val", + "partitionByFields": ["category"], + "range": {"step": 5, "bounds": "partition"}, + } + } + ], + "cursor": {}, + }, + # Binary comparison: "Apple" and "apple" are separate single-value + # partitions so no densification occurs for them. Only banana (0, 10) + # gets a fill at val=5. + expected=[ + {"_id": 2, "category": "Apple", "val": 10}, + {"_id": 1, "category": "apple", "val": 0}, + {"_id": 3, "category": "banana", "val": 0}, + {"category": "banana", "val": 5}, + {"_id": 4, "category": "banana", "val": 10}, + ], + msg="$densify without collation should treat case variants as separate partitions", + ), +] + +# Property [SetWindowFields Window Function Collation]: $min, $max, $first, +# $last, and $documentNumber window functions are affected by collation through +# the sortBy ordering. +COLLATION_WINDOW_FUNCTION_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "window_min_case_insensitive", + docs=[ + {"_id": 1, "cat": "fruit", "x": "a"}, + {"_id": 2, "cat": "fruit", "x": "B"}, + {"_id": 3, "cat": "fruit", "x": "c"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + { + "$setWindowFields": { + "partitionBy": "$cat", + "sortBy": {"_id": 1}, + "output": { + "wmin": {"$min": "$x"}, + "wmax": {"$max": "$x"}, + }, + } + }, + {"$project": {"x": 1, "wmin": 1, "wmax": 1}}, + {"$limit": 1}, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[{"_id": 1, "x": "a", "wmin": "a", "wmax": "c"}], + msg="$min/$max window functions should use collation for string comparison", + ), + CommandTestCase( + "window_min_no_collation_binary", + docs=[ + {"_id": 1, "cat": "fruit", "x": "a"}, + {"_id": 2, "cat": "fruit", "x": "B"}, + {"_id": 3, "cat": "fruit", "x": "c"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + { + "$setWindowFields": { + "partitionBy": "$cat", + "sortBy": {"_id": 1}, + "output": { + "wmin": {"$min": "$x"}, + "wmax": {"$max": "$x"}, + }, + } + }, + {"$project": {"x": 1, "wmin": 1, "wmax": 1}}, + {"$limit": 1}, + ], + "cursor": {}, + }, + expected=[{"_id": 1, "x": "a", "wmin": "B", "wmax": "c"}], + msg="$min/$max window functions without collation should use binary comparison", + ), + CommandTestCase( + "window_first_last_case_insensitive", + docs=[ + {"_id": 1, "cat": "fruit", "x": "a"}, + {"_id": 2, "cat": "fruit", "x": "B"}, + {"_id": 3, "cat": "fruit", "x": "c"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + { + "$setWindowFields": { + "partitionBy": "$cat", + "sortBy": {"x": 1}, + "output": { + "wfirst": {"$first": "$x"}, + "wlast": {"$last": "$x"}, + }, + } + }, + {"$project": {"x": 1, "wfirst": 1, "wlast": 1}}, + {"$limit": 1}, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[{"_id": 1, "x": "a", "wfirst": "a", "wlast": "c"}], + msg="$first/$last window functions should reflect collation-aware sortBy ordering", + ), + CommandTestCase( + "window_first_last_no_collation_binary", + docs=[ + {"_id": 1, "cat": "fruit", "x": "a"}, + {"_id": 2, "cat": "fruit", "x": "B"}, + {"_id": 3, "cat": "fruit", "x": "c"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + { + "$setWindowFields": { + "partitionBy": "$cat", + "sortBy": {"x": 1}, + "output": { + "wfirst": {"$first": "$x"}, + "wlast": {"$last": "$x"}, + }, + } + }, + {"$project": {"x": 1, "wfirst": 1, "wlast": 1}}, + {"$limit": 1}, + ], + "cursor": {}, + }, + expected=[{"_id": 2, "x": "B", "wfirst": "B", "wlast": "c"}], + msg="$first/$last window functions without collation should use binary sortBy ordering", + ), + CommandTestCase( + "window_documentnumber_case_insensitive", + docs=[ + {"_id": 1, "x": "a"}, + {"_id": 2, "x": "B"}, + {"_id": 3, "x": "c"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + { + "$setWindowFields": { + "sortBy": {"x": 1}, + "output": {"docNum": {"$documentNumber": {}}}, + } + }, + {"$sort": {"docNum": 1}}, + {"$project": {"x": 1, "docNum": 1}}, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[ + {"_id": 1, "x": "a", "docNum": 1}, + {"_id": 2, "x": "B", "docNum": 2}, + {"_id": 3, "x": "c", "docNum": 3}, + ], + msg="$documentNumber should reflect collation-aware sortBy ordering", + ), + CommandTestCase( + "window_documentnumber_no_collation_binary", + docs=[ + {"_id": 1, "x": "a"}, + {"_id": 2, "x": "B"}, + {"_id": 3, "x": "c"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + { + "$setWindowFields": { + "sortBy": {"x": 1}, + "output": {"docNum": {"$documentNumber": {}}}, + } + }, + {"$sort": {"docNum": 1}}, + {"$project": {"x": 1, "docNum": 1}}, + ], + "cursor": {}, + }, + expected=[ + {"_id": 2, "x": "B", "docNum": 1}, + {"_id": 1, "x": "a", "docNum": 2}, + {"_id": 3, "x": "c", "docNum": 3}, + ], + msg="$documentNumber without collation should use binary sortBy ordering", + ), +] + +COLLATION_AGGREGATE_WINDOW_TESTS: list[CommandTestCase] = ( + COLLATION_SET_WINDOW_FIELDS_TESTS + + COLLATION_DENSE_RANK_TESTS + + COLLATION_FILL_DENSIFY_TESTS + + COLLATION_WINDOW_FUNCTION_TESTS +) + + +@pytest.mark.parametrize("test", pytest_params(COLLATION_AGGREGATE_WINDOW_TESTS)) +def test_collation_aggregate_window(database_client, collection, test): + """Test collation effects on $setWindowFields stage.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_alternate.py b/documentdb_tests/compatibility/tests/core/collation/test_collation_alternate.py new file mode 100644 index 000000000..02aed2570 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/collation/test_collation_alternate.py @@ -0,0 +1,347 @@ +"""Tests for alternate and maxVariable behavior and null acceptance in collation.""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Alternate and MaxVariable Behavior]: alternate:"shifted" makes +# whitespace and punctuation ignorable at strength <= 3 (distinguished at +# strength 4+), maxVariable controls which characters are ignorable, and symbol +# characters are never ignorable regardless of settings. +COLLATION_ALTERNATE_MAXVARIABLE_BEHAVIOR_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "alternate_non_ignorable_default", + docs=[ + {"_id": 1, "x": "a b"}, + {"_id": 2, "x": "ab"}, + {"_id": 3, "x": "a-b"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": "ab"}}], + "cursor": {}, + "collation": {"locale": "en", "alternate": "non-ignorable"}, + }, + expected=[{"_id": 2, "x": "ab"}], + msg="alternate non-ignorable should treat whitespace and punctuation as base characters", + ), + CommandTestCase( + "alternate_shifted_strength3_punct", + docs=[ + {"_id": 1, "x": "a b"}, + {"_id": 2, "x": "ab"}, + {"_id": 3, "x": "a-b"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": "ab"}}], + "cursor": {}, + "collation": { + "locale": "en", + "alternate": "shifted", + "strength": 3, + "maxVariable": "punct", + }, + }, + expected=[ + {"_id": 1, "x": "a b"}, + {"_id": 2, "x": "ab"}, + {"_id": 3, "x": "a-b"}, + ], + msg="alternate shifted with maxVariable punct should ignore whitespace and punctuation", + ), + CommandTestCase( + "alternate_shifted_strength3_space", + docs=[ + {"_id": 1, "x": "a b"}, + {"_id": 2, "x": "ab"}, + {"_id": 3, "x": "a-b"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": "ab"}}], + "cursor": {}, + "collation": { + "locale": "en", + "alternate": "shifted", + "strength": 3, + "maxVariable": "space", + }, + }, + expected=[{"_id": 1, "x": "a b"}, {"_id": 2, "x": "ab"}], + msg="alternate shifted with maxVariable space should ignore only whitespace", + ), + CommandTestCase( + "alternate_shifted_strength4_distinguished", + docs=[ + {"_id": 1, "x": "a b"}, + {"_id": 2, "x": "ab"}, + {"_id": 3, "x": "a-b"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": "ab"}}], + "cursor": {}, + "collation": { + "locale": "en", + "alternate": "shifted", + "strength": 4, + "maxVariable": "punct", + }, + }, + expected=[{"_id": 2, "x": "ab"}], + msg="alternate shifted at strength 4 should distinguish punctuation and whitespace", + ), + CommandTestCase( + "maxvariable_no_effect_with_non_ignorable", + docs=[ + {"_id": 1, "x": "a b"}, + {"_id": 2, "x": "ab"}, + {"_id": 3, "x": "a-b"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": "ab"}}], + "cursor": {}, + "collation": { + "locale": "en", + "alternate": "non-ignorable", + "maxVariable": "punct", + }, + }, + expected=[{"_id": 2, "x": "ab"}], + msg="maxVariable should have no effect when alternate is non-ignorable", + ), + CommandTestCase( + "nbsp_classified_as_punctuation", + docs=[ + {"_id": 1, "x": "a\u00a0b"}, + {"_id": 2, "x": "ab"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": "ab"}}], + "cursor": {}, + "collation": { + "locale": "en", + "alternate": "shifted", + "strength": 3, + "maxVariable": "space", + }, + }, + expected=[{"_id": 2, "x": "ab"}], + msg="NBSP (U+00A0) should not be ignorable with maxVariable space (classified as punct)", + ), + CommandTestCase( + "nbsp_ignorable_with_punct", + docs=[ + {"_id": 1, "x": "a\u00a0b"}, + {"_id": 2, "x": "ab"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": "ab"}}], + "cursor": {}, + "collation": { + "locale": "en", + "alternate": "shifted", + "strength": 3, + "maxVariable": "punct", + }, + }, + expected=[{"_id": 1, "x": "a\u00a0b"}, {"_id": 2, "x": "ab"}], + msg="NBSP (U+00A0) should be ignorable with maxVariable punct", + ), + CommandTestCase( + "zero_width_ignorable_strength4_non_ignorable", + docs=[ + {"_id": 1, "x": "a\u200bb"}, + {"_id": 2, "x": "ab"}, + {"_id": 3, "x": "a\u200db"}, + {"_id": 4, "x": "a\ufeffb"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": "ab"}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 4, "alternate": "non-ignorable"}, + }, + expected=[ + {"_id": 1, "x": "a\u200bb"}, + {"_id": 2, "x": "ab"}, + {"_id": 3, "x": "a\u200db"}, + {"_id": 4, "x": "a\ufeffb"}, + ], + msg="zero-width chars should be ignorable at strength 4 regardless of alternate", + ), + CommandTestCase( + "zero_width_ignorable_strength4_shifted", + docs=[ + {"_id": 1, "x": "a\u200bb"}, + {"_id": 2, "x": "ab"}, + {"_id": 3, "x": "a\u200db"}, + {"_id": 4, "x": "a\ufeffb"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": "ab"}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 4, "alternate": "shifted"}, + }, + expected=[ + {"_id": 1, "x": "a\u200bb"}, + {"_id": 2, "x": "ab"}, + {"_id": 3, "x": "a\u200db"}, + {"_id": 4, "x": "a\ufeffb"}, + ], + msg="zero-width chars should be ignorable at strength 4 even with alternate shifted", + ), + CommandTestCase( + "zero_width_distinguished_strength5", + docs=[ + {"_id": 1, "x": "a\u200bb"}, + {"_id": 2, "x": "ab"}, + {"_id": 3, "x": "a\u200db"}, + {"_id": 4, "x": "a\ufeffb"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": "ab"}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 5, "alternate": "non-ignorable"}, + }, + expected=[{"_id": 2, "x": "ab"}], + msg="zero-width chars should be distinguished at strength 5", + ), + CommandTestCase( + "all_punct_whitespace_equals_empty", + docs=[ + {"_id": 1, "x": "..."}, + {"_id": 2, "x": " "}, + {"_id": 3, "x": ""}, + {"_id": 4, "x": "-_-"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": ""}}], + "cursor": {}, + "collation": { + "locale": "en", + "alternate": "shifted", + "strength": 3, + "maxVariable": "punct", + }, + }, + expected=[ + {"_id": 1, "x": "..."}, + {"_id": 2, "x": " "}, + {"_id": 3, "x": ""}, + {"_id": 4, "x": "-_-"}, + ], + msg="all-punct/whitespace strings should equal empty string with shifted+punct", + ), + CommandTestCase( + "null_byte_ignorable_shifted_punct", + docs=[ + {"_id": 1, "x": "a\x00b"}, + {"_id": 2, "x": "ab"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": "ab"}}], + "cursor": {}, + "collation": { + "locale": "en", + "alternate": "shifted", + "strength": 3, + "maxVariable": "punct", + }, + }, + expected=[{"_id": 1, "x": "a\x00b"}, {"_id": 2, "x": "ab"}], + msg="null byte (U+0000) should be ignorable with alternate shifted and maxVariable punct", + ), + CommandTestCase( + "symbols_never_ignorable", + docs=[ + {"_id": 1, "x": "a$b"}, + {"_id": 2, "x": "a+b"}, + {"_id": 3, "x": "a=b"}, + {"_id": 4, "x": "a\u20acb"}, + {"_id": 5, "x": "ab"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": "ab"}}], + "cursor": {}, + "collation": { + "locale": "en", + "alternate": "shifted", + "strength": 3, + "maxVariable": "punct", + }, + }, + expected=[{"_id": 5, "x": "ab"}], + msg="symbol characters (dollar, plus, equals, currency) should never be ignorable", + ), +] + +# Property [Alternate and MaxVariable Null Acceptance]: null for alternate is +# treated as omitted (default "non-ignorable"), and null for maxVariable is +# treated as omitted (default "punct"). +COLLATION_ALTERNATE_MAXVARIABLE_NULL_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "alternate_null_uses_default_non_ignorable", + docs=[{"_id": 1, "x": "a b"}, {"_id": 2, "x": "ab"}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": "ab"}}], + "cursor": {}, + "collation": {"locale": "en", "alternate": None}, + }, + expected=[{"_id": 2, "x": "ab"}], + msg="aggregate should treat null alternate as omitted (default non-ignorable)", + ), + CommandTestCase( + "maxvariable_null_uses_default_punct", + docs=[{"_id": 1, "x": "a.b"}, {"_id": 2, "x": "ab"}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": "ab"}}], + "cursor": {}, + "collation": { + "locale": "en", + "alternate": "shifted", + "strength": 3, + "maxVariable": None, + }, + }, + expected=[{"_id": 1, "x": "a.b"}, {"_id": 2, "x": "ab"}], + msg="aggregate should treat null maxVariable as omitted (default punct)", + ), +] + +COLLATION_ALTERNATE_TESTS: list[CommandTestCase] = ( + COLLATION_ALTERNATE_MAXVARIABLE_BEHAVIOR_TESTS + COLLATION_ALTERNATE_MAXVARIABLE_NULL_TESTS +) + + +@pytest.mark.parametrize("test", pytest_params(COLLATION_ALTERNATE_TESTS)) +def test_collation_alternate(database_client, collection, test): + """Test alternate and maxVariable collation options.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_backwards.py b/documentdb_tests/compatibility/tests/core/collation/test_collation_backwards.py new file mode 100644 index 000000000..901df9795 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/collation/test_collation_backwards.py @@ -0,0 +1,210 @@ +"""Tests for backwards sort ordering behavior in collation.""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Backwards Sort Ordering]: backwards:true reverses secondary +# (diacritic) comparison direction so strings with diacritics sort from back of +# string; backwards:false (default) compares diacritics front to back; +# backwards only affects ordering comparisons, not equality. +COLLATION_BACKWARDS_BEHAVIOR_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "backwards_false_sort_diacritics_front_to_back", + docs=[ + {"_id": 1, "x": "cote"}, + {"_id": 2, "x": "c\u00f4te"}, + {"_id": 3, "x": "cot\u00e9"}, + {"_id": 4, "x": "c\u00f4t\u00e9"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$sort": {"x": 1}}], + "cursor": {}, + "collation": {"locale": "fr", "strength": 2, "backwards": False}, + }, + expected=[ + {"_id": 1, "x": "cote"}, + {"_id": 3, "x": "cot\u00e9"}, + {"_id": 2, "x": "c\u00f4te"}, + {"_id": 4, "x": "c\u00f4t\u00e9"}, + ], + msg="backwards:false should compare diacritics front to back", + ), + CommandTestCase( + "backwards_true_sort_diacritics_back_to_front", + docs=[ + {"_id": 1, "x": "cote"}, + {"_id": 2, "x": "c\u00f4te"}, + {"_id": 3, "x": "cot\u00e9"}, + {"_id": 4, "x": "c\u00f4t\u00e9"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$sort": {"x": 1}}], + "cursor": {}, + "collation": {"locale": "fr", "strength": 2, "backwards": True}, + }, + expected=[ + {"_id": 1, "x": "cote"}, + {"_id": 2, "x": "c\u00f4te"}, + {"_id": 3, "x": "cot\u00e9"}, + {"_id": 4, "x": "c\u00f4t\u00e9"}, + ], + msg="backwards:true should compare diacritics back to front", + ), + CommandTestCase( + "backwards_true_lt_comparison", + docs=[ + {"_id": 1, "x": "cote"}, + {"_id": 2, "x": "c\u00f4te"}, + {"_id": 3, "x": "cot\u00e9"}, + {"_id": 4, "x": "c\u00f4t\u00e9"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": {"$lt": "c\u00f4te"}}}, {"$sort": {"_id": 1}}], + "cursor": {}, + "collation": {"locale": "fr", "strength": 2, "backwards": True}, + }, + expected=[{"_id": 1, "x": "cote"}], + msg="backwards:true should affect $lt ordering", + ), + CommandTestCase( + "backwards_false_lt_comparison", + docs=[ + {"_id": 1, "x": "cote"}, + {"_id": 2, "x": "c\u00f4te"}, + {"_id": 3, "x": "cot\u00e9"}, + {"_id": 4, "x": "c\u00f4t\u00e9"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": {"$lt": "c\u00f4te"}}}, {"$sort": {"_id": 1}}], + "cursor": {}, + "collation": {"locale": "fr", "strength": 2, "backwards": False}, + }, + expected=[{"_id": 1, "x": "cote"}, {"_id": 3, "x": "cot\u00e9"}], + msg="backwards:false should affect $lt ordering differently than backwards:true", + ), + CommandTestCase( + "backwards_true_eq_not_affected", + docs=[ + {"_id": 1, "x": "cote"}, + {"_id": 2, "x": "c\u00f4te"}, + {"_id": 3, "x": "cot\u00e9"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": "cote"}}], + "cursor": {}, + "collation": {"locale": "fr", "strength": 2, "backwards": True}, + }, + expected=[{"_id": 1, "x": "cote"}], + msg="backwards:true should not affect $eq matching", + ), + CommandTestCase( + "backwards_true_in_not_affected", + docs=[ + {"_id": 1, "x": "cote"}, + {"_id": 2, "x": "c\u00f4te"}, + {"_id": 3, "x": "cot\u00e9"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": {"$in": ["cote"]}}}], + "cursor": {}, + "collation": {"locale": "fr", "strength": 2, "backwards": True}, + }, + expected=[{"_id": 1, "x": "cote"}], + msg="backwards:true should not affect $in matching", + ), + CommandTestCase( + "backwards_fr_ca_default_true", + docs=[ + {"_id": 1, "x": "cote"}, + {"_id": 2, "x": "c\u00f4te"}, + {"_id": 3, "x": "cot\u00e9"}, + {"_id": 4, "x": "c\u00f4t\u00e9"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$sort": {"x": 1}}], + "cursor": {}, + "collation": {"locale": "fr_CA", "strength": 2}, + }, + expected=[ + {"_id": 1, "x": "cote"}, + {"_id": 2, "x": "c\u00f4te"}, + {"_id": 3, "x": "cot\u00e9"}, + {"_id": 4, "x": "c\u00f4t\u00e9"}, + ], + msg="fr_CA locale should default to backwards:true when field is omitted", + ), + CommandTestCase( + "backwards_default_false_non_fr_ca", + docs=[ + {"_id": 1, "x": "cote"}, + {"_id": 2, "x": "c\u00f4te"}, + {"_id": 3, "x": "cot\u00e9"}, + {"_id": 4, "x": "c\u00f4t\u00e9"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$sort": {"x": 1}}], + "cursor": {}, + "collation": {"locale": "fr", "strength": 2}, + }, + expected=[ + {"_id": 1, "x": "cote"}, + {"_id": 3, "x": "cot\u00e9"}, + {"_id": 2, "x": "c\u00f4te"}, + {"_id": 4, "x": "c\u00f4t\u00e9"}, + ], + msg="omitting backwards for non-fr_CA locale should default to backwards:false", + ), + CommandTestCase( + "backwards_fr_ca_explicit_false", + docs=[ + {"_id": 1, "x": "cote"}, + {"_id": 2, "x": "c\u00f4te"}, + {"_id": 3, "x": "cot\u00e9"}, + {"_id": 4, "x": "c\u00f4t\u00e9"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$sort": {"x": 1}}], + "cursor": {}, + "collation": {"locale": "fr_CA", "strength": 2, "backwards": False}, + }, + expected=[ + {"_id": 1, "x": "cote"}, + {"_id": 3, "x": "cot\u00e9"}, + {"_id": 2, "x": "c\u00f4te"}, + {"_id": 4, "x": "c\u00f4t\u00e9"}, + ], + msg="fr_CA with explicit backwards:false should override the locale default", + ), +] + + +@pytest.mark.parametrize("test", pytest_params(COLLATION_BACKWARDS_BEHAVIOR_TESTS)) +def test_collation_backwards(database_client, collection, test): + """Test backwards collation option for accent ordering.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_bulk_write.py b/documentdb_tests/compatibility/tests/core/collation/test_collation_bulk_write.py new file mode 100644 index 000000000..03da9eec3 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/collation/test_collation_bulk_write.py @@ -0,0 +1,174 @@ +"""Tests for collation in bulkWrite operations.""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.target_collection import CustomCollection + +# Property [BulkWrite Update Collation]: individual update operations within a +# bulkWrite can specify collation, affecting filter matching independently. +COLLATION_BULK_UPDATE_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "bulk_update_case_insensitive", + docs=[ + {"_id": 1, "x": "Apple", "v": 1}, + {"_id": 2, "x": "banana", "v": 1}, + ], + command=lambda ctx: { + "update": ctx.collection, + "updates": [ + { + "q": {"x": "apple"}, + "u": {"$set": {"v": 2}}, + "collation": {"locale": "en", "strength": 2}, + }, + { + "q": {"x": "BANANA"}, + "u": {"$set": {"v": 3}}, + "collation": {"locale": "en", "strength": 2}, + }, + ], + }, + expected={"ok": 1.0, "n": 2, "nModified": 2}, + msg="bulkWrite updates should each use their own collation", + ), + CommandTestCase( + "bulk_update_mixed_collation", + docs=[ + {"_id": 1, "x": "Apple", "v": 1}, + {"_id": 2, "x": "banana", "v": 1}, + ], + command=lambda ctx: { + "update": ctx.collection, + "updates": [ + { + "q": {"x": "apple"}, + "u": {"$set": {"v": 2}}, + "collation": {"locale": "en", "strength": 2}, + }, + { + "q": {"x": "BANANA"}, + "u": {"$set": {"v": 3}}, + }, + ], + }, + expected={"ok": 1.0, "n": 1, "nModified": 1}, + msg="bulkWrite with mixed collation: only collated op should match case-insensitively", + ), +] + +# Property [BulkWrite Delete Collation]: individual delete operations within a +# bulkWrite can specify collation, affecting filter matching independently. +COLLATION_BULK_DELETE_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "bulk_delete_case_insensitive", + docs=[ + {"_id": 1, "x": "Apple"}, + {"_id": 2, "x": "banana"}, + {"_id": 3, "x": "cherry"}, + ], + command=lambda ctx: { + "delete": ctx.collection, + "deletes": [ + { + "q": {"x": "apple"}, + "limit": 0, + "collation": {"locale": "en", "strength": 2}, + }, + ], + }, + expected={"ok": 1.0, "n": 1}, + msg="bulkWrite delete with collation should match case-insensitively", + ), + CommandTestCase( + "bulk_delete_no_collation_binary", + docs=[ + {"_id": 1, "x": "Apple"}, + {"_id": 2, "x": "apple"}, + {"_id": 3, "x": "banana"}, + ], + command=lambda ctx: { + "delete": ctx.collection, + "deletes": [ + { + "q": {"x": "apple"}, + "limit": 0, + }, + ], + }, + expected={"ok": 1.0, "n": 1}, + msg="bulkWrite delete without collation should use binary comparison", + ), +] + +# Property [BulkWrite Collection Default Collation]: when no per-operation +# collation is specified, bulkWrite operations inherit the collection default. +COLLATION_BULK_COLLECTION_DEFAULT_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "bulk_update_inherits_collection_collation", + target_collection=CustomCollection(options={"collation": {"locale": "en", "strength": 2}}), + docs=[ + {"_id": 1, "x": "Apple", "v": 1}, + {"_id": 2, "x": "banana", "v": 1}, + ], + command=lambda ctx: { + "update": ctx.collection, + "updates": [ + { + "q": {"x": "apple"}, + "u": {"$set": {"v": 2}}, + }, + ], + }, + expected={"ok": 1.0, "n": 1, "nModified": 1}, + msg="bulkWrite update should inherit collection default collation", + ), + CommandTestCase( + "bulk_delete_inherits_collection_collation", + target_collection=CustomCollection(options={"collation": {"locale": "en", "strength": 2}}), + docs=[ + {"_id": 1, "x": "Apple"}, + {"_id": 2, "x": "banana"}, + ], + command=lambda ctx: { + "delete": ctx.collection, + "deletes": [ + { + "q": {"x": "apple"}, + "limit": 0, + }, + ], + }, + expected={"ok": 1.0, "n": 1}, + msg="bulkWrite delete should inherit collection default collation", + ), +] + +COLLATION_BULK_WRITE_TESTS = ( + COLLATION_BULK_UPDATE_TESTS + + COLLATION_BULK_DELETE_TESTS + + COLLATION_BULK_COLLECTION_DEFAULT_TESTS +) + + +@pytest.mark.parametrize("test", pytest_params(COLLATION_BULK_WRITE_TESTS)) +def test_collation_bulk_write(database_client, collection, test): + """Test collation behavior in bulkWrite operations.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + raw_res=True, + ) diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_casefirst.py b/documentdb_tests/compatibility/tests/core/collation/test_collation_casefirst.py new file mode 100644 index 000000000..df2694fca --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/collation/test_collation_casefirst.py @@ -0,0 +1,296 @@ +"""Tests for caseFirst sort ordering and null acceptance in collation.""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [caseFirst Sort Ordering]: caseFirst "upper" sorts uppercase before +# lowercase within the same base character, "lower" sorts lowercase first, and +# "off" behaves identically to "lower"; caseFirst affects comparison operators +# but does NOT affect equality matching. +COLLATION_CASEFIRST_SORT_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "casefirst_upper_sort", + docs=[ + {"_id": 1, "x": "a"}, + {"_id": 2, "x": "A"}, + {"_id": 3, "x": "b"}, + {"_id": 4, "x": "B"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$sort": {"x": 1}}], + "cursor": {}, + "collation": {"locale": "en", "caseFirst": "upper"}, + }, + expected=[ + {"_id": 2, "x": "A"}, + {"_id": 1, "x": "a"}, + {"_id": 4, "x": "B"}, + {"_id": 3, "x": "b"}, + ], + msg="caseFirst upper should sort uppercase before lowercase", + ), + CommandTestCase( + "casefirst_lower_sort", + docs=[ + {"_id": 1, "x": "a"}, + {"_id": 2, "x": "A"}, + {"_id": 3, "x": "b"}, + {"_id": 4, "x": "B"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$sort": {"x": 1}}], + "cursor": {}, + "collation": {"locale": "en", "caseFirst": "lower"}, + }, + expected=[ + {"_id": 1, "x": "a"}, + {"_id": 2, "x": "A"}, + {"_id": 3, "x": "b"}, + {"_id": 4, "x": "B"}, + ], + msg="caseFirst lower should sort lowercase before uppercase", + ), + CommandTestCase( + "casefirst_off_sort", + docs=[ + {"_id": 1, "x": "a"}, + {"_id": 2, "x": "A"}, + {"_id": 3, "x": "b"}, + {"_id": 4, "x": "B"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$sort": {"x": 1}}], + "cursor": {}, + "collation": {"locale": "en", "caseFirst": "off"}, + }, + expected=[ + {"_id": 1, "x": "a"}, + {"_id": 2, "x": "A"}, + {"_id": 3, "x": "b"}, + {"_id": 4, "x": "B"}, + ], + msg="caseFirst off should behave identically to lower", + ), + CommandTestCase( + "casefirst_upper_gt_comparison", + docs=[ + {"_id": 1, "x": "a"}, + {"_id": 2, "x": "A"}, + {"_id": 3, "x": "b"}, + {"_id": 4, "x": "B"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": {"$gt": "A"}}}, {"$sort": {"_id": 1}}], + "cursor": {}, + "collation": {"locale": "en", "caseFirst": "upper"}, + }, + expected=[ + {"_id": 1, "x": "a"}, + {"_id": 3, "x": "b"}, + {"_id": 4, "x": "B"}, + ], + msg="caseFirst upper should affect $gt so that a > A", + ), + CommandTestCase( + "casefirst_upper_lt_comparison", + docs=[ + {"_id": 1, "x": "a"}, + {"_id": 2, "x": "A"}, + {"_id": 3, "x": "b"}, + {"_id": 4, "x": "B"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": {"$lt": "a"}}}, {"$sort": {"_id": 1}}], + "cursor": {}, + "collation": {"locale": "en", "caseFirst": "upper"}, + }, + expected=[{"_id": 2, "x": "A"}], + msg="caseFirst upper should affect $lt so that A < a", + ), + CommandTestCase( + "casefirst_lower_gt_comparison", + docs=[ + {"_id": 1, "x": "a"}, + {"_id": 2, "x": "A"}, + {"_id": 3, "x": "b"}, + {"_id": 4, "x": "B"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": {"$gt": "A"}}}, {"$sort": {"_id": 1}}], + "cursor": {}, + "collation": {"locale": "en", "caseFirst": "lower"}, + }, + expected=[{"_id": 3, "x": "b"}, {"_id": 4, "x": "B"}], + msg="caseFirst lower should affect $gt so that only b/B > A", + ), + CommandTestCase( + "casefirst_upper_eq_not_affected", + docs=[ + {"_id": 1, "x": "a"}, + {"_id": 2, "x": "A"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": "a"}}], + "cursor": {}, + "collation": {"locale": "en", "caseFirst": "upper"}, + }, + expected=[{"_id": 1, "x": "a"}], + msg="caseFirst upper should not affect $eq matching", + ), + CommandTestCase( + "casefirst_upper_ne_not_affected", + docs=[ + {"_id": 1, "x": "a"}, + {"_id": 2, "x": "A"}, + {"_id": 3, "x": "b"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": {"$ne": "a"}}}, {"$sort": {"_id": 1}}], + "cursor": {}, + "collation": {"locale": "en", "caseFirst": "upper"}, + }, + expected=[{"_id": 2, "x": "A"}, {"_id": 3, "x": "b"}], + msg="caseFirst upper should not affect $ne matching", + ), + CommandTestCase( + "casefirst_upper_in_not_affected", + docs=[ + {"_id": 1, "x": "a"}, + {"_id": 2, "x": "A"}, + {"_id": 3, "x": "b"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": {"$in": ["a"]}}}, {"$sort": {"_id": 1}}], + "cursor": {}, + "collation": {"locale": "en", "caseFirst": "upper"}, + }, + expected=[{"_id": 1, "x": "a"}], + msg="caseFirst upper should not affect $in matching", + ), + CommandTestCase( + "casefirst_upper_gte_comparison", + docs=[ + {"_id": 1, "x": "a"}, + {"_id": 2, "x": "A"}, + {"_id": 3, "x": "b"}, + {"_id": 4, "x": "B"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": {"$gte": "a"}}}, {"$sort": {"_id": 1}}], + "cursor": {}, + "collation": {"locale": "en", "caseFirst": "upper"}, + }, + expected=[ + {"_id": 1, "x": "a"}, + {"_id": 3, "x": "b"}, + {"_id": 4, "x": "B"}, + ], + msg="caseFirst upper should affect $gte so that a >= a but A < a", + ), + CommandTestCase( + "casefirst_upper_lte_comparison", + docs=[ + {"_id": 1, "x": "a"}, + {"_id": 2, "x": "A"}, + {"_id": 3, "x": "b"}, + {"_id": 4, "x": "B"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": {"$lte": "A"}}}, {"$sort": {"_id": 1}}], + "cursor": {}, + "collation": {"locale": "en", "caseFirst": "upper"}, + }, + expected=[{"_id": 2, "x": "A"}], + msg="caseFirst upper should affect $lte so that only A <= A", + ), + CommandTestCase( + "casefirst_upper_cmp_comparison", + docs=[ + {"_id": 1, "x": "a"}, + {"_id": 2, "x": "A"}, + {"_id": 3, "x": "b"}, + {"_id": 4, "x": "B"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$project": {"x": 1, "cmp": {"$cmp": ["$x", "A"]}}}, + {"$sort": {"_id": 1}}, + ], + "cursor": {}, + "collation": {"locale": "en", "caseFirst": "upper"}, + }, + expected=[ + {"_id": 1, "x": "a", "cmp": 1}, + {"_id": 2, "x": "A", "cmp": 0}, + {"_id": 3, "x": "b", "cmp": 1}, + {"_id": 4, "x": "B", "cmp": 1}, + ], + msg="caseFirst upper should affect $cmp so that a > A", + ), +] + +# Property [caseFirst Null Acceptance]: null for caseFirst is treated as +# omitted, using the default value of "off". +COLLATION_CASEFIRST_NULL_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "casefirst_null_uses_default_off", + docs=[ + {"_id": 1, "x": "a"}, + {"_id": 2, "x": "A"}, + {"_id": 3, "x": "b"}, + {"_id": 4, "x": "B"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$sort": {"x": 1}}], + "cursor": {}, + "collation": {"locale": "en", "caseFirst": None}, + }, + expected=[ + {"_id": 1, "x": "a"}, + {"_id": 2, "x": "A"}, + {"_id": 3, "x": "b"}, + {"_id": 4, "x": "B"}, + ], + msg="aggregate should treat null caseFirst as omitted (default off)", + ), +] + +COLLATION_CASEFIRST_TESTS: list[CommandTestCase] = ( + COLLATION_CASEFIRST_SORT_TESTS + COLLATION_CASEFIRST_NULL_TESTS +) + + +@pytest.mark.parametrize("test", pytest_params(COLLATION_CASEFIRST_TESTS)) +def test_collation_casefirst(database_client, collection, test): + """Test caseFirst collation option for upper/lower ordering.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_caselevel.py b/documentdb_tests/compatibility/tests/core/collation/test_collation_caselevel.py new file mode 100644 index 000000000..71e5f7e58 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/collation/test_collation_caselevel.py @@ -0,0 +1,191 @@ +"""Tests for caseLevel interaction with strength and null acceptance.""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [caseLevel Interaction with Strength]: caseLevel:true adds a case +# distinction at strength 1 and 2 (where case is normally ignored), while +# caseLevel:false preserves the default behavior; at strength 3+ caseLevel:true +# has no additional effect. +COLLATION_CASELEVEL_STRENGTH_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "caselevel_true_strength1_match_lowercase", + docs=[ + {"_id": 1, "x": "a"}, + {"_id": 2, "x": "A"}, + {"_id": 3, "x": "\u00e1"}, + {"_id": 4, "x": "\u00c1"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": "a"}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 1, "caseLevel": True}, + }, + expected=[{"_id": 1, "x": "a"}, {"_id": 3, "x": "\u00e1"}], + msg="caseLevel:true strength 1 should match same-case ignoring diacritics", + ), + CommandTestCase( + "caselevel_true_strength1_match_uppercase", + docs=[ + {"_id": 1, "x": "a"}, + {"_id": 2, "x": "A"}, + {"_id": 3, "x": "\u00e1"}, + {"_id": 4, "x": "\u00c1"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": "A"}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 1, "caseLevel": True}, + }, + expected=[{"_id": 2, "x": "A"}, {"_id": 4, "x": "\u00c1"}], + msg="caseLevel:true strength 1 should match uppercase ignoring diacritics", + ), + CommandTestCase( + "caselevel_true_strength2_distinguishes_case_and_diacritics", + docs=[ + {"_id": 1, "x": "a"}, + {"_id": 2, "x": "A"}, + {"_id": 3, "x": "\u00e1"}, + {"_id": 4, "x": "\u00c1"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": "a"}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 2, "caseLevel": True}, + }, + expected=[{"_id": 1, "x": "a"}], + msg="caseLevel:true strength 2 should distinguish both case and diacritics", + ), + CommandTestCase( + "caselevel_false_strength1_no_case_distinction", + docs=[ + {"_id": 1, "x": "a"}, + {"_id": 2, "x": "A"}, + {"_id": 3, "x": "\u00e1"}, + {"_id": 4, "x": "\u00c1"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": "a"}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 1, "caseLevel": False}, + }, + expected=[ + {"_id": 1, "x": "a"}, + {"_id": 2, "x": "A"}, + {"_id": 3, "x": "\u00e1"}, + {"_id": 4, "x": "\u00c1"}, + ], + msg="caseLevel:false strength 1 should not distinguish case", + ), + CommandTestCase( + "caselevel_false_strength2_no_case_distinction", + docs=[ + {"_id": 1, "x": "a"}, + {"_id": 2, "x": "A"}, + {"_id": 3, "x": "\u00e1"}, + {"_id": 4, "x": "\u00c1"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": "a"}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 2, "caseLevel": False}, + }, + expected=[{"_id": 1, "x": "a"}, {"_id": 2, "x": "A"}], + msg="caseLevel:false strength 2 should ignore case but distinguish diacritics", + ), + CommandTestCase( + "caselevel_true_strength3_no_additional_effect", + docs=[ + {"_id": 1, "x": "a"}, + {"_id": 2, "x": "A"}, + {"_id": 3, "x": "\u00e1"}, + {"_id": 4, "x": "\u00c1"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": "a"}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 3, "caseLevel": True}, + }, + expected=[{"_id": 1, "x": "a"}], + msg="caseLevel:true at strength 3 should have no additional effect", + ), + CommandTestCase( + "caselevel_true_strength1_sort_lowercase_before_uppercase", + docs=[ + {"_id": 1, "x": "B"}, + {"_id": 2, "x": "a"}, + {"_id": 3, "x": "A"}, + {"_id": 4, "x": "b"}, + {"_id": 5, "x": "\u00e1"}, + {"_id": 6, "x": "\u00c1"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$sort": {"x": 1}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 1, "caseLevel": True}, + }, + expected=[ + {"_id": 2, "x": "a"}, + {"_id": 5, "x": "\u00e1"}, + {"_id": 3, "x": "A"}, + {"_id": 6, "x": "\u00c1"}, + {"_id": 4, "x": "b"}, + {"_id": 1, "x": "B"}, + ], + msg="caseLevel:true strength 1 sort should place lowercase before uppercase", + ), +] + +# Property [caseLevel Null Acceptance]: null for caseLevel is treated as +# omitted, using the default value of false. +COLLATION_CASELEVEL_NULL_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "caselevel_null_uses_default_false", + docs=[ + {"_id": 1, "x": "a"}, + {"_id": 2, "x": "A"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": "a"}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 1, "caseLevel": None}, + }, + expected=[{"_id": 1, "x": "a"}, {"_id": 2, "x": "A"}], + msg="aggregate should treat null caseLevel as omitted (default false)", + ), +] + +COLLATION_CASELEVEL_TESTS: list[CommandTestCase] = ( + COLLATION_CASELEVEL_STRENGTH_TESTS + COLLATION_CASELEVEL_NULL_TESTS +) + + +@pytest.mark.parametrize("test", pytest_params(COLLATION_CASELEVEL_TESTS)) +def test_collation_caselevel(database_client, collection, test): + """Test caseLevel collation option behavior.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_count.py b/documentdb_tests/compatibility/tests/core/collation/test_collation_count.py new file mode 100644 index 000000000..7cb672790 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/collation/test_collation_count.py @@ -0,0 +1,121 @@ +"""Tests for collation semantics in the count command. + +Tests collation-specific behaviors (strength levels, accent handling) that go +beyond basic wiring validation covered in the count command tests. +""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.target_collection import CustomCollection + +# Property [Count Strength Semantics]: different strength levels produce +# different matching behavior - strength 1 ignores accents and case, +# strength 3 is case-sensitive. +COLLATION_COUNT_STRENGTH_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "strength1_accent_insensitive", + docs=[ + {"_id": 1, "x": "cafe"}, + {"_id": 2, "x": "caf\u00e9"}, + {"_id": 3, "x": "other"}, + ], + command=lambda ctx: { + "count": ctx.collection, + "query": {"x": "cafe"}, + "collation": {"locale": "en", "strength": 1}, + }, + expected={"n": 2, "ok": 1.0}, + msg="count with strength 1 should match accent-insensitively", + ), + CommandTestCase( + "strength3_case_sensitive", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "APPLE"}, + ], + command=lambda ctx: { + "count": ctx.collection, + "query": {"x": "apple"}, + "collation": {"locale": "en", "strength": 3}, + }, + expected={"n": 1, "ok": 1.0}, + msg="count with strength 3 should match case-sensitively", + ), + CommandTestCase( + "no_query_counts_all", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + ], + command=lambda ctx: { + "count": ctx.collection, + "collation": {"locale": "en", "strength": 2}, + }, + expected={"n": 3, "ok": 1.0}, + msg="count without query should return total count regardless of collation", + ), +] + +# Property [Count Collection Default Collation]: when no explicit collation is +# specified, the count command uses the collection's default collation. +COLLATION_COUNT_DEFAULT_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "collection_default_case_insensitive", + target_collection=CustomCollection(options={"collation": {"locale": "en", "strength": 2}}), + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + ], + command=lambda ctx: { + "count": ctx.collection, + "query": {"x": "apple"}, + }, + expected={"n": 2, "ok": 1.0}, + msg="count should use collection default collation when none specified", + ), + CommandTestCase( + "explicit_overrides_collection_default", + target_collection=CustomCollection(options={"collation": {"locale": "en", "strength": 1}}), + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + ], + command=lambda ctx: { + "count": ctx.collection, + "query": {"x": "apple"}, + "collation": {"locale": "en", "strength": 3}, + }, + expected={"n": 1, "ok": 1.0}, + msg="count with explicit collation should override collection default", + ), +] + +COLLATION_COUNT_TESTS = COLLATION_COUNT_STRENGTH_TESTS + COLLATION_COUNT_DEFAULT_TESTS + + +@pytest.mark.parametrize("test", pytest_params(COLLATION_COUNT_TESTS)) +def test_collation_count(database_client, collection, test): + """Test collation strength semantics in the count command.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + raw_res=True, + ) diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_delete.py b/documentdb_tests/compatibility/tests/core/collation/test_collation_delete.py new file mode 100644 index 000000000..9df553e17 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/collation/test_collation_delete.py @@ -0,0 +1,230 @@ +"""Tests for collation behavior in the delete command.""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.error_codes import ( + BAD_VALUE_ERROR, + MISSING_FIELD_ERROR, + TYPE_MISMATCH_ERROR, +) +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.target_collection import CustomCollection + +# Property [Delete Filter Matching]: collation affects which documents the +# delete filter selects, enabling case-insensitive and accent-insensitive +# matching for the query portion of the delete. +COLLATION_DELETE_FILTER_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "deleteone_case_insensitive", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + ], + command=lambda ctx: { + "delete": ctx.collection, + "deletes": [ + {"q": {"x": "apple"}, "limit": 1, "collation": {"locale": "en", "strength": 2}} + ], + }, + expected={"ok": 1.0, "n": 1}, + msg="deleteOne with strength 2 should match first case-insensitive document", + ), + CommandTestCase( + "deletemany_case_insensitive", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "APPLE"}, + {"_id": 4, "x": "banana"}, + ], + command=lambda ctx: { + "delete": ctx.collection, + "deletes": [ + {"q": {"x": "apple"}, "limit": 0, "collation": {"locale": "en", "strength": 2}} + ], + }, + expected={"ok": 1.0, "n": 3}, + msg="deleteMany with strength 2 should match all case variants", + ), + CommandTestCase( + "delete_accent_insensitive", + docs=[ + {"_id": 1, "x": "cafe"}, + {"_id": 2, "x": "caf\u00e9"}, + {"_id": 3, "x": "other"}, + ], + command=lambda ctx: { + "delete": ctx.collection, + "deletes": [ + {"q": {"x": "cafe"}, "limit": 0, "collation": {"locale": "en", "strength": 1}} + ], + }, + expected={"ok": 1.0, "n": 2}, + msg="delete with strength 1 should match accent variants", + ), + CommandTestCase( + "delete_no_collation_binary", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "APPLE"}, + ], + command=lambda ctx: { + "delete": ctx.collection, + "deletes": [{"q": {"x": "apple"}, "limit": 0}], + }, + expected={"ok": 1.0, "n": 1}, + msg="delete without collation should use binary comparison", + ), + CommandTestCase( + "delete_gt_filter", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + {"_id": 4, "x": "Banana"}, + ], + command=lambda ctx: { + "delete": ctx.collection, + "deletes": [ + { + "q": {"x": {"$gt": "apple"}}, + "limit": 0, + "collation": {"locale": "en", "strength": 2}, + } + ], + }, + expected={"ok": 1.0, "n": 2}, + msg="delete $gt with strength 2 should compare case-insensitively", + ), + CommandTestCase( + "delete_in_operator", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + {"_id": 4, "x": "cherry"}, + ], + command=lambda ctx: { + "delete": ctx.collection, + "deletes": [ + { + "q": {"x": {"$in": ["apple", "cherry"]}}, + "limit": 0, + "collation": {"locale": "en", "strength": 2}, + } + ], + }, + expected={"ok": 1.0, "n": 3}, + msg="delete $in with strength 2 should match case variants", + ), +] + +# Property [Delete Collation Validation]: the delete command validates the +# collation document in each delete statement. +COLLATION_DELETE_VALIDATION_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "validation_non_object_collation", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "delete": ctx.collection, + "deletes": [{"q": {"x": "a"}, "limit": 0, "collation": "en"}], + }, + error_code=TYPE_MISMATCH_ERROR, + msg="delete with non-object collation should produce an error", + ), + CommandTestCase( + "validation_missing_locale", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "delete": ctx.collection, + "deletes": [{"q": {"x": "a"}, "limit": 0, "collation": {"strength": 2}}], + }, + error_code=MISSING_FIELD_ERROR, + msg="delete with collation missing locale should produce an error", + ), + CommandTestCase( + "validation_invalid_locale", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "delete": ctx.collection, + "deletes": [ + {"q": {"x": "a"}, "limit": 0, "collation": {"locale": "invalid_locale_xyz"}} + ], + }, + error_code=BAD_VALUE_ERROR, + msg="delete with invalid locale string should produce an error", + ), +] + +# Property [Delete Collection Default Collation]: when no collation is specified +# on the delete statement, the collection's default collation is used for +# filter matching. +COLLATION_DELETE_COLLECTION_DEFAULT_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "collection_default_inherited", + target_collection=CustomCollection(options={"collation": {"locale": "en", "strength": 2}}), + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + ], + command=lambda ctx: { + "delete": ctx.collection, + "deletes": [{"q": {"x": "apple"}, "limit": 0}], + }, + expected={"ok": 1.0, "n": 2}, + msg="delete should inherit collection default collation", + ), + CommandTestCase( + "collection_default_overridden", + target_collection=CustomCollection(options={"collation": {"locale": "en", "strength": 2}}), + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + ], + command=lambda ctx: { + "delete": ctx.collection, + "deletes": [ + { + "q": {"x": "apple"}, + "limit": 0, + "collation": {"locale": "en", "strength": 3}, + } + ], + }, + expected={"ok": 1.0, "n": 1}, + msg="delete with explicit collation should override collection default", + ), +] + +COLLATION_DELETE_TESTS = ( + COLLATION_DELETE_FILTER_TESTS + + COLLATION_DELETE_VALIDATION_TESTS + + COLLATION_DELETE_COLLECTION_DEFAULT_TESTS +) + + +@pytest.mark.parametrize("test", pytest_params(COLLATION_DELETE_TESTS)) +def test_collation_delete(database_client, collection, test): + """Test collation behavior in the delete command.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + raw_res=True, + ) diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_distinct.py b/documentdb_tests/compatibility/tests/core/collation/test_collation_distinct.py new file mode 100644 index 000000000..f5a3e29f6 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/collation/test_collation_distinct.py @@ -0,0 +1,158 @@ +"""Tests for collation semantics in the distinct command. + +Tests collation-specific behaviors (strength levels, accent handling, numeric +ordering) that go beyond basic wiring validation covered in the distinct +command tests. +""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.target_collection import CustomCollection + +# Property [Distinct Strength Semantics]: different strength levels produce +# different deduplication behavior - strength 1 collapses accents and case, +# strength 3 preserves all variants. +COLLATION_DISTINCT_STRENGTH_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "strength1_accent_insensitive", + docs=[ + {"_id": 1, "x": "cafe"}, + {"_id": 2, "x": "caf\u00e9"}, + {"_id": 3, "x": "other"}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "collation": {"locale": "en", "strength": 1}, + }, + expected={"values": ["cafe", "other"], "ok": 1.0}, + msg="distinct with strength 1 should deduplicate accent variants", + ), + CommandTestCase( + "strength3_preserves_all", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "APPLE"}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "collation": {"locale": "en", "strength": 3}, + }, + expected={"values": ["apple", "Apple", "APPLE"], "ok": 1.0}, + ignore_order_in=["values"], + msg="distinct with strength 3 should preserve case-distinct values", + ), +] + +# Property [Distinct Query Filter with Non-String Predicate]: collation affects +# deduplication even when the query filter uses non-string comparisons. +COLLATION_DISTINCT_QUERY_SEMANTIC_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "query_non_string_filter_with_collation_dedup", + docs=[ + {"_id": 1, "x": "apple", "n": 1}, + {"_id": 2, "x": "Apple", "n": 2}, + {"_id": 3, "x": "banana", "n": 3}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "query": {"n": {"$gt": 1}}, + "collation": {"locale": "en", "strength": 2}, + }, + expected={"values": ["Apple", "banana"], "ok": 1.0}, + msg="distinct should apply collation to deduplication even with non-string query", + ), +] + +# Property [Distinct Numeric Ordering]: numericOrdering affects the ordering +# of distinct results that contain embedded numeric substrings. +COLLATION_DISTINCT_NUMERIC_ORDERING_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "numeric_ordering_dedup", + docs=[ + {"_id": 1, "x": "file2"}, + {"_id": 2, "x": "file10"}, + {"_id": 3, "x": "file1"}, + {"_id": 4, "x": "file2"}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "collation": {"locale": "en", "numericOrdering": True}, + }, + expected={"values": ["file1", "file2", "file10"], "ok": 1.0}, + msg="distinct with numericOrdering should order embedded numbers numerically", + ), +] + +# Property [Distinct Collection Default Collation]: when no explicit collation is +# specified, the distinct command uses the collection's default collation. +COLLATION_DISTINCT_DEFAULT_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "collection_default_deduplicates", + target_collection=CustomCollection(options={"collation": {"locale": "en", "strength": 2}}), + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + }, + expected={"values": ["apple", "banana"], "ok": 1.0}, + msg="distinct should use collection default collation for deduplication", + ), + CommandTestCase( + "explicit_overrides_collection_default", + target_collection=CustomCollection(options={"collation": {"locale": "en", "strength": 1}}), + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "collation": {"locale": "en", "strength": 3}, + }, + expected={"values": ["apple", "Apple", "banana"], "ok": 1.0}, + ignore_order_in=["values"], + msg="distinct with explicit collation should override collection default", + ), +] + +COLLATION_DISTINCT_TESTS = ( + COLLATION_DISTINCT_STRENGTH_TESTS + + COLLATION_DISTINCT_QUERY_SEMANTIC_TESTS + + COLLATION_DISTINCT_NUMERIC_ORDERING_TESTS + + COLLATION_DISTINCT_DEFAULT_TESTS +) + + +@pytest.mark.parametrize("test", pytest_params(COLLATION_DISTINCT_TESTS)) +def test_collation_distinct(database_client, collection, test): + """Test collation strength and ordering semantics in the distinct command.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + raw_res=True, + ignore_order_in=test.ignore_order_in, + ) diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_dotted_paths.py b/documentdb_tests/compatibility/tests/core/collation/test_collation_dotted_paths.py new file mode 100644 index 000000000..2b52b0069 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/collation/test_collation_dotted_paths.py @@ -0,0 +1,280 @@ +"""Tests for collation effects on dotted (nested) field paths.""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Dotted Path Filter Matching]: collation affects equality and +# comparison operators on dotted field paths in find and aggregate $match, +# enabling case-insensitive matching on nested document fields. +COLLATION_DOTTED_FILTER_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "find_dotted_eq_case_insensitive", + docs=[ + {"_id": 1, "a": {"b": "apple"}}, + {"_id": 2, "a": {"b": "Apple"}}, + {"_id": 3, "a": {"b": "banana"}}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"a.b": "apple"}, + "sort": {"_id": 1}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[ + {"_id": 1, "a": {"b": "apple"}}, + {"_id": 2, "a": {"b": "Apple"}}, + ], + msg="find on dotted path with strength 2 should match case-insensitively", + ), + CommandTestCase( + "find_dotted_gt_case_insensitive", + docs=[ + {"_id": 1, "a": {"b": "apple"}}, + {"_id": 2, "a": {"b": "Banana"}}, + {"_id": 3, "a": {"b": "cherry"}}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"a.b": {"$gt": "apple"}}, + "sort": {"_id": 1}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[ + {"_id": 2, "a": {"b": "Banana"}}, + {"_id": 3, "a": {"b": "cherry"}}, + ], + msg="find $gt on dotted path should use collation", + ), + CommandTestCase( + "find_dotted_in_case_insensitive", + docs=[ + {"_id": 1, "a": {"b": "apple"}}, + {"_id": 2, "a": {"b": "Apple"}}, + {"_id": 3, "a": {"b": "banana"}}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"a.b": {"$in": ["APPLE"]}}, + "sort": {"_id": 1}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[ + {"_id": 1, "a": {"b": "apple"}}, + {"_id": 2, "a": {"b": "Apple"}}, + ], + msg="find $in on dotted path should use collation", + ), + CommandTestCase( + "find_deeply_nested_eq", + docs=[ + {"_id": 1, "a": {"b": {"c": "apple"}}}, + {"_id": 2, "a": {"b": {"c": "Apple"}}}, + {"_id": 3, "a": {"b": {"c": "banana"}}}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"a.b.c": "apple"}, + "sort": {"_id": 1}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[ + {"_id": 1, "a": {"b": {"c": "apple"}}}, + {"_id": 2, "a": {"b": {"c": "Apple"}}}, + ], + msg="find on deeply nested dotted path should use collation", + ), + CommandTestCase( + "match_dotted_eq_case_insensitive", + docs=[ + {"_id": 1, "a": {"b": "apple"}}, + {"_id": 2, "a": {"b": "Apple"}}, + {"_id": 3, "a": {"b": "banana"}}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"a.b": "apple"}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[ + {"_id": 1, "a": {"b": "apple"}}, + {"_id": 2, "a": {"b": "Apple"}}, + ], + msg="$match on dotted path should use collation", + ), + CommandTestCase( + "find_dotted_no_collation_binary", + docs=[ + {"_id": 1, "a": {"b": "apple"}}, + {"_id": 2, "a": {"b": "Apple"}}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"a.b": "apple"}, + }, + expected=[{"_id": 1, "a": {"b": "apple"}}], + msg="find on dotted path without collation should use binary comparison", + ), +] + +# Property [Dotted Path Sort Ordering]: collation affects sort ordering when +# sorting on dotted field paths. +COLLATION_DOTTED_SORT_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "sort_dotted_case_insensitive", + docs=[ + {"_id": 1, "a": {"b": "banana"}}, + {"_id": 2, "a": {"b": "Apple"}}, + {"_id": 3, "a": {"b": "cherry"}}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {}, + "sort": {"a.b": 1}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[ + {"_id": 2, "a": {"b": "Apple"}}, + {"_id": 1, "a": {"b": "banana"}}, + {"_id": 3, "a": {"b": "cherry"}}, + ], + msg="find sort on dotted path should use collation for case-insensitive ordering", + ), + CommandTestCase( + "sort_dotted_numeric_ordering", + docs=[ + {"_id": 1, "a": {"b": "file10"}}, + {"_id": 2, "a": {"b": "file2"}}, + {"_id": 3, "a": {"b": "file1"}}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {}, + "sort": {"a.b": 1}, + "collation": {"locale": "en", "numericOrdering": True}, + }, + expected=[ + {"_id": 3, "a": {"b": "file1"}}, + {"_id": 2, "a": {"b": "file2"}}, + {"_id": 1, "a": {"b": "file10"}}, + ], + msg="find sort on dotted path should use collation numericOrdering", + ), + CommandTestCase( + "sort_deeply_nested", + docs=[ + {"_id": 1, "a": {"b": {"c": "banana"}}}, + {"_id": 2, "a": {"b": {"c": "Apple"}}}, + {"_id": 3, "a": {"b": {"c": "cherry"}}}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {}, + "sort": {"a.b.c": 1}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[ + {"_id": 2, "a": {"b": {"c": "Apple"}}}, + {"_id": 1, "a": {"b": {"c": "banana"}}}, + {"_id": 3, "a": {"b": {"c": "cherry"}}}, + ], + msg="find sort on deeply nested dotted path should use collation", + ), + CommandTestCase( + "aggregate_sort_dotted", + docs=[ + {"_id": 1, "a": {"b": "banana"}}, + {"_id": 2, "a": {"b": "Apple"}}, + {"_id": 3, "a": {"b": "cherry"}}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$sort": {"a.b": 1}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[ + {"_id": 2, "a": {"b": "Apple"}}, + {"_id": 1, "a": {"b": "banana"}}, + {"_id": 3, "a": {"b": "cherry"}}, + ], + msg="aggregate $sort on dotted path should use collation", + ), +] + +# Property [Dotted Path in Update Filter]: collation affects the filter on +# dotted paths in update commands. +COLLATION_DOTTED_UPDATE_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "update_dotted_filter_case_insensitive", + docs=[ + {"_id": 1, "a": {"b": "apple"}, "v": 1}, + {"_id": 2, "a": {"b": "Apple"}, "v": 1}, + {"_id": 3, "a": {"b": "banana"}, "v": 1}, + ], + command=lambda ctx: { + "update": ctx.collection, + "updates": [ + { + "q": {"a.b": "apple"}, + "u": {"$set": {"v": 2}}, + "multi": True, + "collation": {"locale": "en", "strength": 2}, + } + ], + }, + expected={"ok": 1.0, "n": 2, "nModified": 2}, + msg="update on dotted path filter should use collation", + ), +] + +# Property [Dotted Path in Distinct]: collation affects deduplication on dotted +# field paths. +COLLATION_DOTTED_DISTINCT_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "distinct_dotted_case_insensitive", + docs=[ + {"_id": 1, "a": {"b": "apple"}}, + {"_id": 2, "a": {"b": "Apple"}}, + {"_id": 3, "a": {"b": "banana"}}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "a.b", + "collation": {"locale": "en", "strength": 2}, + }, + expected={"values": ["apple", "banana"], "ok": 1.0}, + msg="distinct on dotted path should use collation for deduplication", + ), +] + +COLLATION_DOTTED_PATH_TESTS: list[CommandTestCase] = ( + COLLATION_DOTTED_FILTER_TESTS + + COLLATION_DOTTED_SORT_TESTS + + COLLATION_DOTTED_UPDATE_TESTS + + COLLATION_DOTTED_DISTINCT_TESTS +) + + +@pytest.mark.parametrize("test", pytest_params(COLLATION_DOTTED_PATH_TESTS)) +def test_collation_dotted_paths(database_client, collection, test): + """Test collation effects on dotted field paths.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + raw_res=not isinstance(test.build_expected(ctx), list), + ) diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_edge_cases.py b/documentdb_tests/compatibility/tests/core/collation/test_collation_edge_cases.py new file mode 100644 index 000000000..c7de1262d --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/collation/test_collation_edge_cases.py @@ -0,0 +1,168 @@ +"""Tests for collation edge cases with capped collections and text indexes.""" + +from __future__ import annotations + +import pytest +from pymongo import IndexModel + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.property_checks import Eq +from documentdb_tests.framework.target_collection import CappedCollection, CustomCollection + +# Property [Capped Collection Collation]: a capped collection can be created +# with a default collation, and collation affects filter matching and sort +# ordering on capped collections the same as regular collections. +COLLATION_CAPPED_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "capped_with_default_collation_filter", + target_collection=CustomCollection( + options={"capped": True, "size": 4096, "collation": {"locale": "en", "strength": 2}} + ), + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"x": "apple"}, + }, + expected=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + ], + msg="capped collection with default collation should use it for filter matching", + ), + CommandTestCase( + "capped_explicit_collation_filter", + target_collection=CappedCollection(size=4096), + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"x": "apple"}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + ], + msg="capped collection should support explicit collation on find", + ), + CommandTestCase( + "capped_collation_sort", + target_collection=CappedCollection(size=4096), + docs=[ + {"_id": 1, "x": "banana"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "cherry"}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {}, + "sort": {"x": 1}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[ + {"_id": 2, "x": "Apple"}, + {"_id": 1, "x": "banana"}, + {"_id": 3, "x": "cherry"}, + ], + msg="capped collection should support collation sort ordering", + ), + CommandTestCase( + "capped_count_with_collation", + target_collection=CappedCollection(size=4096), + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + ], + command=lambda ctx: { + "count": ctx.collection, + "query": {"x": "apple"}, + "collation": {"locale": "en", "strength": 2}, + }, + expected={"n": 2, "ok": 1.0}, + msg="count on capped collection should support collation", + ), +] + +# Property [Text Index Collation Incompatibility]: a text index cannot be +# created with a collation other than simple; creating one on a collection +# with a non-simple default collation requires specifying +# collation {locale: "simple"} on the index. +COLLATION_TEXT_INDEX_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "text_index_on_simple_collection", + docs=[{"_id": 1, "x": "hello world"}], + command=lambda ctx: { + "createIndexes": ctx.collection, + "indexes": [{"key": {"x": "text"}, "name": "x_text"}], + }, + expected={"ok": Eq(1.0)}, + msg="text index should be creatable on collection without collation", + ), + CommandTestCase( + "text_index_with_simple_collation_on_collated_collection", + target_collection=CustomCollection(options={"collation": {"locale": "en", "strength": 2}}), + docs=[{"_id": 1, "x": "hello world"}], + command=lambda ctx: { + "createIndexes": ctx.collection, + "indexes": [ + {"key": {"x": "text"}, "name": "x_text", "collation": {"locale": "simple"}} + ], + }, + expected={"ok": Eq(1.0)}, + msg="text index with simple collation should be creatable on collated collection", + ), + CommandTestCase( + "text_search_ignores_collection_collation", + target_collection=CustomCollection(options={"collation": {"locale": "en", "strength": 2}}), + docs=[ + {"_id": 1, "x": "cafe latte"}, + {"_id": 2, "x": "Cafe Mocha"}, + {"_id": 3, "x": "tea"}, + ], + indexes=[ + IndexModel([("x", "text")], collation={"locale": "simple"}, name="x_text"), + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"$text": {"$search": "cafe"}}, + "sort": {"_id": 1}, + }, + expected=[ + {"_id": 1, "x": "cafe latte"}, + {"_id": 2, "x": "Cafe Mocha"}, + ], + msg="text search should use text index semantics not collection collation", + ), +] + +COLLATION_EDGE_CASE_TESTS = COLLATION_CAPPED_TESTS + COLLATION_TEXT_INDEX_TESTS + + +@pytest.mark.parametrize("test", pytest_params(COLLATION_EDGE_CASE_TESTS)) +def test_collation_edge_cases(database_client, collection, test): + """Test collation edge cases with capped collections and text indexes.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + expected = test.build_expected(ctx) + assertResult( + result, + expected=expected, + error_code=test.error_code, + msg=test.msg, + raw_res=not isinstance(expected, list), + ) diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_expr_filter.py b/documentdb_tests/compatibility/tests/core/collation/test_collation_expr_filter.py new file mode 100644 index 000000000..46271e212 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/collation/test_collation_expr_filter.py @@ -0,0 +1,214 @@ +"""Tests for collation with $expr in find, update, and delete command filters.""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Find $expr with Collation]: $expr in the find command filter uses +# command-level collation for expression operators like $eq, $cmp, $gt. +COLLATION_FIND_EXPR_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "find_expr_eq_case_insensitive", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"$expr": {"$eq": ["$x", "apple"]}}, + "sort": {"_id": 1}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[{"_id": 1, "x": "apple"}, {"_id": 2, "x": "Apple"}], + msg="find $expr $eq with strength 2 should match case-insensitively", + ), + CommandTestCase( + "find_expr_gt_case_insensitive", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"$expr": {"$gt": ["$x", "apple"]}}, + "sort": {"_id": 1}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[{"_id": 3, "x": "banana"}], + msg="find $expr $gt with strength 2 should compare case-insensitively", + ), + CommandTestCase( + "find_expr_cmp_case_insensitive", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"$expr": {"$eq": [{"$cmp": ["$x", "apple"]}, 0]}}, + "sort": {"_id": 1}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[{"_id": 1, "x": "apple"}, {"_id": 2, "x": "Apple"}], + msg="find $expr $cmp with strength 2 should compare case-insensitively", + ), + CommandTestCase( + "find_expr_no_collation_binary", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"$expr": {"$eq": ["$x", "apple"]}}, + }, + expected=[{"_id": 1, "x": "apple"}], + msg="find $expr without collation should use binary comparison", + ), + CommandTestCase( + "find_expr_in_field_ref_no_collation", + docs=[ + {"_id": 1, "x": "apple", "arr": ["Apple", "Banana"]}, + {"_id": 2, "x": "grape", "arr": ["Apple", "Banana"]}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"$expr": {"$in": ["$x", ["Apple", "Banana"]]}}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[], + msg="find $expr $in with field ref first arg should not use collation", + ), + CommandTestCase( + "find_expr_in_literal_uses_collation", + docs=[ + {"_id": 1, "x": "apple", "arr": ["Apple", "Banana"]}, + {"_id": 2, "x": "grape", "arr": ["Apple", "Banana"]}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"$expr": {"$in": ["apple", "$arr"]}}, + "sort": {"_id": 1}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[ + {"_id": 1, "x": "apple", "arr": ["Apple", "Banana"]}, + {"_id": 2, "x": "grape", "arr": ["Apple", "Banana"]}, + ], + msg="find $expr $in with literal first arg should use collation", + ), +] + +# Property [Update and Delete $expr Filter with Collation]: $expr in update and +# delete filters uses per-statement collation for expression evaluation. +COLLATION_WRITE_EXPR_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "update_expr_eq_case_insensitive", + docs=[ + {"_id": 1, "x": "apple", "v": 1}, + {"_id": 2, "x": "Apple", "v": 1}, + {"_id": 3, "x": "banana", "v": 1}, + ], + command=lambda ctx: { + "update": ctx.collection, + "updates": [ + { + "q": {"$expr": {"$eq": ["$x", "apple"]}}, + "u": {"$set": {"v": 2}}, + "multi": True, + "collation": {"locale": "en", "strength": 2}, + } + ], + }, + expected={"ok": 1.0, "n": 2, "nModified": 2}, + msg="update $expr filter with strength 2 should match case-insensitively", + ), + CommandTestCase( + "update_expr_no_collation_binary", + docs=[ + {"_id": 1, "x": "apple", "v": 1}, + {"_id": 2, "x": "Apple", "v": 1}, + ], + command=lambda ctx: { + "update": ctx.collection, + "updates": [ + { + "q": {"$expr": {"$eq": ["$x", "apple"]}}, + "u": {"$set": {"v": 2}}, + "multi": True, + } + ], + }, + expected={"ok": 1.0, "n": 1, "nModified": 1}, + msg="update $expr filter without collation should use binary comparison", + ), + CommandTestCase( + "delete_expr_eq_case_insensitive", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + ], + command=lambda ctx: { + "delete": ctx.collection, + "deletes": [ + { + "q": {"$expr": {"$eq": ["$x", "apple"]}}, + "limit": 0, + "collation": {"locale": "en", "strength": 2}, + } + ], + }, + expected={"ok": 1.0, "n": 2}, + msg="delete $expr filter with strength 2 should match case-insensitively", + ), + CommandTestCase( + "delete_expr_no_collation_binary", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + ], + command=lambda ctx: { + "delete": ctx.collection, + "deletes": [ + { + "q": {"$expr": {"$eq": ["$x", "apple"]}}, + "limit": 0, + } + ], + }, + expected={"ok": 1.0, "n": 1}, + msg="delete $expr filter without collation should use binary comparison", + ), +] + +COLLATION_EXPR_FILTER_TESTS: list[CommandTestCase] = ( + COLLATION_FIND_EXPR_TESTS + COLLATION_WRITE_EXPR_TESTS +) + + +@pytest.mark.parametrize("test", pytest_params(COLLATION_EXPR_FILTER_TESTS)) +def test_collation_expr_filter(database_client, collection, test): + """Test collation behavior with $expr in find, update, and delete filters.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + expected = test.build_expected(ctx) + assertResult( + result, + expected=expected, + error_code=test.error_code, + msg=test.msg, + raw_res=not isinstance(expected, list), + ) diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_find.py b/documentdb_tests/compatibility/tests/core/collation/test_collation_find.py new file mode 100644 index 000000000..d8cab9885 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/collation/test_collation_find.py @@ -0,0 +1,577 @@ +"""Tests for collation behavior in the find command.""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.error_codes import ( + BAD_VALUE_ERROR, + MISSING_FIELD_ERROR, + TYPE_MISMATCH_ERROR, +) +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.target_collection import CustomCollection + +# Property [Find Filter Matching]: collation affects equality and comparison +# operators in the find filter, enabling case-insensitive and accent-insensitive +# matching depending on strength. +COLLATION_FIND_FILTER_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "filter_eq_case_insensitive", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "APPLE"}, + {"_id": 4, "x": "banana"}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"x": "apple"}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "APPLE"}, + ], + msg="find with strength 2 should match case-insensitively", + ), + CommandTestCase( + "filter_eq_accent_insensitive", + docs=[ + {"_id": 1, "x": "cafe"}, + {"_id": 2, "x": "caf\u00e9"}, + {"_id": 3, "x": "other"}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"x": "cafe"}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[ + {"_id": 1, "x": "cafe"}, + {"_id": 2, "x": "caf\u00e9"}, + ], + msg="find with strength 1 should match accent-insensitively", + ), + CommandTestCase( + "filter_ne_case_insensitive", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"x": {"$ne": "apple"}}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[{"_id": 3, "x": "banana"}], + msg="find $ne with strength 2 should exclude case variants", + ), + CommandTestCase( + "filter_gt_case_insensitive", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + {"_id": 4, "x": "Banana"}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"x": {"$gt": "apple"}}, + "sort": {"_id": 1}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[{"_id": 3, "x": "banana"}, {"_id": 4, "x": "Banana"}], + msg="find $gt with strength 2 should compare case-insensitively", + ), + CommandTestCase( + "filter_in_case_insensitive", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + {"_id": 4, "x": "cherry"}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"x": {"$in": ["apple", "cherry"]}}, + "sort": {"_id": 1}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 4, "x": "cherry"}, + ], + msg="find $in with strength 2 should match case variants", + ), + CommandTestCase( + "filter_lte_gte_range", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + {"_id": 4, "x": "cherry"}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"x": {"$gte": "apple", "$lte": "banana"}}, + "sort": {"_id": 1}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + ], + msg="find $gte/$lte with strength 2 should define range case-insensitively", + ), + CommandTestCase( + "filter_strength3_case_sensitive", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "APPLE"}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"x": "apple"}, + "collation": {"locale": "en", "strength": 3}, + }, + expected=[{"_id": 1, "x": "apple"}], + msg="find with strength 3 should match case-sensitively", + ), +] + +# Property [Find Sort Ordering]: collation affects the sort order of string +# values in find results, respecting locale-specific ordering rules. +COLLATION_FIND_SORT_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "sort_strength1_groups_variants", + docs=[ + {"_id": 1, "x": "b"}, + {"_id": 2, "x": "\u00e1"}, + {"_id": 3, "x": "A"}, + {"_id": 4, "x": "a"}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {}, + "sort": {"x": 1}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[ + {"_id": 2, "x": "\u00e1"}, + {"_id": 3, "x": "A"}, + {"_id": 4, "x": "a"}, + {"_id": 1, "x": "b"}, + ], + msg="find sort with strength 1 should group case and accent variants together", + ), + CommandTestCase( + "sort_strength2_separates_accents", + docs=[ + {"_id": 1, "x": "b"}, + {"_id": 2, "x": "\u00e1"}, + {"_id": 3, "x": "A"}, + {"_id": 4, "x": "a"}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {}, + "sort": {"x": 1}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[ + {"_id": 3, "x": "A"}, + {"_id": 4, "x": "a"}, + {"_id": 2, "x": "\u00e1"}, + {"_id": 1, "x": "b"}, + ], + msg="find sort with strength 2 should separate accented from unaccented", + ), + CommandTestCase( + "sort_numeric_ordering", + docs=[ + {"_id": 1, "x": "file2"}, + {"_id": 2, "x": "file10"}, + {"_id": 3, "x": "file1"}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {}, + "sort": {"x": 1}, + "collation": {"locale": "en", "numericOrdering": True}, + }, + expected=[ + {"_id": 3, "x": "file1"}, + {"_id": 1, "x": "file2"}, + {"_id": 2, "x": "file10"}, + ], + msg="find sort with numericOrdering should sort embedded numbers numerically", + ), + CommandTestCase( + "sort_casefirst_upper", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "APPLE"}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {}, + "sort": {"x": 1}, + "collation": {"locale": "en", "caseFirst": "upper"}, + }, + expected=[ + {"_id": 3, "x": "APPLE"}, + {"_id": 2, "x": "Apple"}, + {"_id": 1, "x": "apple"}, + ], + msg="find sort with caseFirst upper should sort uppercase before lowercase", + ), + CommandTestCase( + "sort_backwards_accents", + docs=[ + {"_id": 1, "x": "cot\u00e9"}, + {"_id": 2, "x": "c\u00f4te"}, + {"_id": 3, "x": "c\u00f4t\u00e9"}, + {"_id": 4, "x": "cote"}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {}, + "sort": {"x": 1}, + "collation": {"locale": "en", "backwards": True}, + }, + expected=[ + {"_id": 4, "x": "cote"}, + {"_id": 2, "x": "c\u00f4te"}, + {"_id": 1, "x": "cot\u00e9"}, + {"_id": 3, "x": "c\u00f4t\u00e9"}, + ], + msg="find sort with backwards should reverse secondary (accent) differences", + ), + CommandTestCase( + "sort_descending_with_collation", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "banana"}, + {"_id": 3, "x": "cherry"}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {}, + "sort": {"x": -1}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[ + {"_id": 3, "x": "cherry"}, + {"_id": 2, "x": "banana"}, + {"_id": 1, "x": "apple"}, + ], + msg="find sort descending should respect collation ordering in reverse", + ), +] + +# Property [Find Collation Validation]: the find command validates the collation +# document the same way as aggregate - non-object types, missing locale, and +# invalid locale strings produce errors. +COLLATION_FIND_VALIDATION_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "validation_non_object_collation", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "find": ctx.collection, + "filter": {}, + "collation": "en", + }, + error_code=TYPE_MISMATCH_ERROR, + msg="find with non-object collation should produce an error", + ), + CommandTestCase( + "validation_missing_locale", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "find": ctx.collection, + "filter": {}, + "collation": {"strength": 2}, + }, + error_code=MISSING_FIELD_ERROR, + msg="find with collation missing locale should produce an error", + ), + CommandTestCase( + "validation_invalid_locale", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "find": ctx.collection, + "filter": {}, + "collation": {"locale": "invalid_locale_xyz"}, + }, + error_code=BAD_VALUE_ERROR, + msg="find with invalid locale string should produce an error", + ), +] + +# Property [Find Collection Default Collation]: when no collation is specified +# on the find command, the collection's default collation is used; an explicit +# collation overrides the collection default. +COLLATION_FIND_COLLECTION_DEFAULT_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "collection_default_inherited", + target_collection=CustomCollection(options={"collation": {"locale": "en", "strength": 2}}), + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"x": "apple"}, + }, + expected=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + ], + msg="find should inherit collection default collation when none specified", + ), + CommandTestCase( + "collection_default_overridden", + target_collection=CustomCollection(options={"collation": {"locale": "en", "strength": 2}}), + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"x": "apple"}, + "collation": {"locale": "en", "strength": 3}, + }, + expected=[{"_id": 1, "x": "apple"}], + msg="find with explicit collation should override collection default", + ), + CommandTestCase( + "no_collation_binary_comparison", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"x": "apple"}, + }, + expected=[{"_id": 1, "x": "apple"}], + msg="find without collation should use binary comparison", + ), +] + +# Property [Find Min Max with Collation]: collation affects the min/max bounds +# used for index-based range queries in the find command. +COLLATION_FIND_MIN_MAX_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "sort_with_limit", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + {"_id": 4, "x": "cherry"}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {}, + "sort": {"x": 1}, + "limit": 2, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + ], + msg="find with collation sort and limit should return first N in collation order", + ), + CommandTestCase( + "sort_with_skip", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + {"_id": 4, "x": "cherry"}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {}, + "sort": {"x": 1}, + "skip": 2, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[ + {"_id": 3, "x": "banana"}, + {"_id": 4, "x": "cherry"}, + ], + msg="find with collation sort and skip should skip first N in collation order", + ), +] + +# Property [Find Non-String Values Unaffected]: collation does not change the +# comparison behavior of non-string types. +COLLATION_FIND_NON_STRING_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "non_string_int_filter", + docs=[ + {"_id": 1, "x": 1}, + {"_id": 2, "x": 2}, + {"_id": 3, "x": 3}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"x": {"$gt": 1}}, + "sort": {"_id": 1}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[{"_id": 2, "x": 2}, {"_id": 3, "x": 3}], + msg="collation should not affect integer comparison in find", + ), + CommandTestCase( + "non_string_null_filter", + docs=[ + {"_id": 1, "x": None}, + {"_id": 2, "x": "apple"}, + {"_id": 3, "x": "Apple"}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"x": None}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[{"_id": 1, "x": None}], + msg="collation should not affect null matching in find", + ), +] + +# Property [Find Array Field Matching]: collation affects element-wise +# comparison when filtering on array fields, matching any element that +# satisfies the collation comparison. +COLLATION_FIND_ARRAY_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "array_eq_case_insensitive", + docs=[ + {"_id": 1, "x": ["Apple", "banana"]}, + {"_id": 2, "x": ["cherry", "date"]}, + {"_id": 3, "x": ["APPLE", "fig"]}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"x": "apple"}, + "sort": {"_id": 1}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[ + {"_id": 1, "x": ["Apple", "banana"]}, + {"_id": 3, "x": ["APPLE", "fig"]}, + ], + msg="find on array field with strength 2 should match any case-variant element", + ), + CommandTestCase( + "array_in_case_insensitive", + docs=[ + {"_id": 1, "x": ["apple", "banana"]}, + {"_id": 2, "x": ["Cherry", "date"]}, + {"_id": 3, "x": ["fig"]}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"x": {"$in": ["APPLE", "cherry"]}}, + "sort": {"_id": 1}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[ + {"_id": 1, "x": ["apple", "banana"]}, + {"_id": 2, "x": ["Cherry", "date"]}, + ], + msg="find $in on array field with collation should match case-variant elements", + ), + CommandTestCase( + "array_gt_case_insensitive", + docs=[ + {"_id": 1, "x": ["apple"]}, + {"_id": 2, "x": ["banana"]}, + {"_id": 3, "x": ["cherry"]}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"x": {"$gt": "Apple"}}, + "sort": {"_id": 1}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[ + {"_id": 2, "x": ["banana"]}, + {"_id": 3, "x": ["cherry"]}, + ], + msg="find $gt on array field with collation should compare case-insensitively", + ), + CommandTestCase( + "array_elemmatch_case_insensitive", + docs=[ + {"_id": 1, "x": [{"v": "Apple"}, {"v": "banana"}]}, + {"_id": 2, "x": [{"v": "cherry"}]}, + {"_id": 3, "x": [{"v": "APPLE"}]}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"x": {"$elemMatch": {"v": "apple"}}}, + "sort": {"_id": 1}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[ + {"_id": 1, "x": [{"v": "Apple"}, {"v": "banana"}]}, + {"_id": 3, "x": [{"v": "APPLE"}]}, + ], + msg="find $elemMatch with collation should match case-variant elements", + ), + CommandTestCase( + "array_no_collation_binary", + docs=[ + {"_id": 1, "x": ["Apple", "banana"]}, + {"_id": 2, "x": ["apple", "cherry"]}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"x": "apple"}, + }, + expected=[{"_id": 2, "x": ["apple", "cherry"]}], + msg="find on array field without collation should use binary comparison", + ), +] + +COLLATION_FIND_TESTS = ( + COLLATION_FIND_FILTER_TESTS + + COLLATION_FIND_SORT_TESTS + + COLLATION_FIND_VALIDATION_TESTS + + COLLATION_FIND_COLLECTION_DEFAULT_TESTS + + COLLATION_FIND_MIN_MAX_TESTS + + COLLATION_FIND_NON_STRING_TESTS + + COLLATION_FIND_ARRAY_TESTS +) + + +@pytest.mark.parametrize("test", pytest_params(COLLATION_FIND_TESTS)) +def test_collation_find(database_client, collection, test): + """Test collation behavior in the find command.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_find_and_modify.py b/documentdb_tests/compatibility/tests/core/collation/test_collation_find_and_modify.py new file mode 100644 index 000000000..557fca814 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/collation/test_collation_find_and_modify.py @@ -0,0 +1,314 @@ +"""Tests for collation behavior in the findAndModify command.""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.error_codes import ( + BAD_VALUE_ERROR, + MISSING_FIELD_ERROR, + TYPE_MISMATCH_ERROR, +) +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.target_collection import CustomCollection + +# Property [FindAndModify Filter Matching]: collation affects which document +# the findAndModify filter selects, enabling case-insensitive and +# accent-insensitive matching. +COLLATION_FAM_FILTER_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "update_case_insensitive_match", + docs=[ + {"_id": 1, "x": "apple", "v": 1}, + {"_id": 2, "x": "Apple", "v": 1}, + {"_id": 3, "x": "banana", "v": 1}, + ], + command=lambda ctx: { + "findAndModify": ctx.collection, + "query": {"x": "apple"}, + "update": {"$set": {"v": 2}}, + "new": True, + "collation": {"locale": "en", "strength": 2}, + }, + expected={"_id": 1, "x": "apple", "v": 2}, + msg="findAndModify with strength 2 should match case-insensitively", + ), + CommandTestCase( + "update_accent_insensitive_match", + docs=[ + {"_id": 1, "x": "caf\u00e9", "v": 1}, + {"_id": 2, "x": "other", "v": 1}, + ], + command=lambda ctx: { + "findAndModify": ctx.collection, + "query": {"x": "cafe"}, + "update": {"$set": {"v": 2}}, + "new": True, + "collation": {"locale": "en", "strength": 1}, + }, + expected={"_id": 1, "x": "caf\u00e9", "v": 2}, + msg="findAndModify with strength 1 should match accent-insensitively", + ), + CommandTestCase( + "update_no_collation_binary", + docs=[ + {"_id": 1, "x": "apple", "v": 1}, + {"_id": 2, "x": "Apple", "v": 1}, + ], + command=lambda ctx: { + "findAndModify": ctx.collection, + "query": {"x": "Apple"}, + "update": {"$set": {"v": 2}}, + "new": True, + }, + expected={"_id": 2, "x": "Apple", "v": 2}, + msg="findAndModify without collation should use binary comparison", + ), + CommandTestCase( + "remove_case_insensitive", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + ], + command=lambda ctx: { + "findAndModify": ctx.collection, + "query": {"x": "apple"}, + "remove": True, + "collation": {"locale": "en", "strength": 2}, + }, + expected={"_id": 1, "x": "apple"}, + msg="findAndModify remove with collation should match case-insensitively", + ), +] + +# Property [FindAndModify Sort with Collation]: collation affects the sort +# order used to select which document to modify when multiple documents +# match the filter. +COLLATION_FAM_SORT_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "sort_ascending_with_collation", + docs=[ + {"_id": 1, "x": "banana", "v": 1}, + {"_id": 2, "x": "Apple", "v": 1}, + {"_id": 3, "x": "cherry", "v": 1}, + ], + command=lambda ctx: { + "findAndModify": ctx.collection, + "query": {}, + "sort": {"x": 1}, + "update": {"$set": {"v": 2}}, + "new": True, + "collation": {"locale": "en", "strength": 2}, + }, + expected={"_id": 2, "x": "Apple", "v": 2}, + msg="findAndModify sort should use collation ordering to pick first document", + ), + CommandTestCase( + "sort_numeric_ordering", + docs=[ + {"_id": 1, "x": "file10", "v": 1}, + {"_id": 2, "x": "file2", "v": 1}, + {"_id": 3, "x": "file1", "v": 1}, + ], + command=lambda ctx: { + "findAndModify": ctx.collection, + "query": {}, + "sort": {"x": 1}, + "update": {"$set": {"v": 2}}, + "new": True, + "collation": {"locale": "en", "numericOrdering": True}, + }, + expected={"_id": 3, "x": "file1", "v": 2}, + msg="findAndModify sort with numericOrdering should pick numerically first", + ), + CommandTestCase( + "sort_descending_with_collation", + docs=[ + {"_id": 1, "x": "apple", "v": 1}, + {"_id": 2, "x": "banana", "v": 1}, + {"_id": 3, "x": "cherry", "v": 1}, + ], + command=lambda ctx: { + "findAndModify": ctx.collection, + "query": {}, + "sort": {"x": -1}, + "update": {"$set": {"v": 2}}, + "new": True, + "collation": {"locale": "en", "strength": 2}, + }, + expected={"_id": 3, "x": "cherry", "v": 2}, + msg="findAndModify sort descending with collation should pick last in collation order", + ), +] + +# Property [FindAndModify Upsert with Collation]: collation affects the filter +# matching for upsert operations in findAndModify. +COLLATION_FAM_UPSERT_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "upsert_match_found_case_insensitive", + docs=[ + {"_id": 1, "x": "Apple", "v": 1}, + ], + command=lambda ctx: { + "findAndModify": ctx.collection, + "query": {"x": "apple"}, + "update": {"$set": {"v": 2}}, + "upsert": True, + "new": True, + "collation": {"locale": "en", "strength": 2}, + }, + expected={"_id": 1, "x": "Apple", "v": 2}, + msg="findAndModify upsert with collation should find existing case-variant", + ), + CommandTestCase( + "upsert_no_match_inserts", + docs=[ + {"_id": 1, "x": "banana", "v": 1}, + ], + command=lambda ctx: { + "findAndModify": ctx.collection, + "query": {"_id": 99, "x": "apple"}, + "update": {"$set": {"v": 2}}, + "upsert": True, + "new": True, + "collation": {"locale": "en", "strength": 2}, + }, + expected={"_id": 99, "x": "apple", "v": 2}, + msg="findAndModify upsert with collation should insert when no match found", + ), +] + +# Property [FindAndModify Collation Validation]: the findAndModify command +# validates the collation document. +COLLATION_FAM_VALIDATION_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "validation_non_object_collation", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "findAndModify": ctx.collection, + "query": {"x": "a"}, + "update": {"$set": {"v": 1}}, + "collation": "en", + }, + error_code=TYPE_MISMATCH_ERROR, + msg="findAndModify with non-object collation should produce an error", + ), + CommandTestCase( + "validation_missing_locale", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "findAndModify": ctx.collection, + "query": {"x": "a"}, + "update": {"$set": {"v": 1}}, + "collation": {"strength": 2}, + }, + error_code=MISSING_FIELD_ERROR, + msg="findAndModify with collation missing locale should produce an error", + ), + CommandTestCase( + "validation_invalid_locale", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "findAndModify": ctx.collection, + "query": {"x": "a"}, + "update": {"$set": {"v": 1}}, + "collation": {"locale": "invalid_locale_xyz"}, + }, + error_code=BAD_VALUE_ERROR, + msg="findAndModify with invalid locale string should produce an error", + ), +] + +# Property [FindAndModify Collection Default Collation]: when no collation is +# specified on the findAndModify command, the collection's default collation +# is used for filter matching and sort. +COLLATION_FAM_COLLECTION_DEFAULT_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "collection_default_inherited", + target_collection=CustomCollection(options={"collation": {"locale": "en", "strength": 2}}), + docs=[ + {"_id": 1, "x": "apple", "v": 1}, + {"_id": 2, "x": "Apple", "v": 1}, + {"_id": 3, "x": "banana", "v": 1}, + ], + command=lambda ctx: { + "findAndModify": ctx.collection, + "query": {"x": "apple"}, + "update": {"$set": {"v": 2}}, + "new": True, + }, + expected={"_id": 1, "x": "apple", "v": 2}, + msg="findAndModify should inherit collection default collation", + ), + CommandTestCase( + "collection_default_overridden", + target_collection=CustomCollection(options={"collation": {"locale": "en", "strength": 2}}), + docs=[ + {"_id": 1, "x": "apple", "v": 1}, + {"_id": 2, "x": "Apple", "v": 1}, + {"_id": 3, "x": "banana", "v": 1}, + ], + command=lambda ctx: { + "findAndModify": ctx.collection, + "query": {"x": "apple"}, + "update": {"$set": {"v": 2}}, + "new": True, + "collation": {"locale": "en", "strength": 3}, + }, + expected={"_id": 1, "x": "apple", "v": 2}, + msg="findAndModify with explicit collation should override collection default", + ), +] + +# Property [FindAndModify Return Old Document]: when new is false or omitted, +# findAndModify returns the document as it was before the update, but collation +# still affects which document is selected. +COLLATION_FAM_RETURN_OLD_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "return_old_case_insensitive", + docs=[ + {"_id": 1, "x": "apple", "v": 1}, + {"_id": 2, "x": "Apple", "v": 1}, + ], + command=lambda ctx: { + "findAndModify": ctx.collection, + "query": {"x": "APPLE"}, + "update": {"$set": {"v": 2}}, + "collation": {"locale": "en", "strength": 2}, + }, + expected={"_id": 1, "x": "apple", "v": 1}, + msg="findAndModify returning old doc should still use collation for selection", + ), +] + +COLLATION_FAM_TESTS = ( + COLLATION_FAM_FILTER_TESTS + + COLLATION_FAM_SORT_TESTS + + COLLATION_FAM_UPSERT_TESTS + + COLLATION_FAM_VALIDATION_TESTS + + COLLATION_FAM_COLLECTION_DEFAULT_TESTS + + COLLATION_FAM_RETURN_OLD_TESTS +) + + +@pytest.mark.parametrize("test", pytest_params(COLLATION_FAM_TESTS)) +def test_collation_find_and_modify(database_client, collection, test): + """Test collation behavior in the findAndModify command.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + if not isinstance(result, Exception): + result = {"cursor": {"firstBatch": [result.get("value")]}} + assertResult( + result, + expected=[test.build_expected(ctx)], + error_code=test.error_code, + msg=test.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_find_and_modify_pipeline.py b/documentdb_tests/compatibility/tests/core/collation/test_collation_find_and_modify_pipeline.py new file mode 100644 index 000000000..6bef7775a --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/collation/test_collation_find_and_modify_pipeline.py @@ -0,0 +1,153 @@ +"""Tests for collation with pipeline-style updates in findAndModify.""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.target_collection import CustomCollection + +# Property [FindAndModify Pipeline Update Filter Matching]: collation affects +# which document findAndModify selects when the update expression is an +# aggregation pipeline array. +COLLATION_FAM_PIPELINE_FILTER_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "fam_pipeline_case_insensitive", + docs=[ + {"_id": 1, "x": "apple", "v": 1}, + {"_id": 2, "x": "Apple", "v": 1}, + {"_id": 3, "x": "banana", "v": 1}, + ], + command=lambda ctx: { + "findAndModify": ctx.collection, + "query": {"x": "APPLE"}, + "update": [{"$set": {"v": 2}}], + "new": True, + "collation": {"locale": "en", "strength": 2}, + }, + expected={"_id": 1, "x": "apple", "v": 2}, + msg="findAndModify pipeline update with strength 2 should match case-insensitively", + ), + CommandTestCase( + "fam_pipeline_no_collation_binary", + docs=[ + {"_id": 1, "x": "apple", "v": 1}, + {"_id": 2, "x": "Apple", "v": 1}, + ], + command=lambda ctx: { + "findAndModify": ctx.collection, + "query": {"x": "apple"}, + "update": [{"$set": {"v": 2}}], + "new": True, + }, + expected={"_id": 1, "x": "apple", "v": 2}, + msg="findAndModify pipeline update without collation should use binary comparison", + ), + CommandTestCase( + "fam_pipeline_accent_insensitive", + docs=[ + {"_id": 1, "x": "cafe", "v": 1}, + {"_id": 2, "x": "caf\u00e9", "v": 1}, + ], + command=lambda ctx: { + "findAndModify": ctx.collection, + "query": {"x": "caf\u00e9"}, + "update": [{"$set": {"v": 2}}], + "new": True, + "collation": {"locale": "en", "strength": 1}, + }, + expected={"_id": 1, "x": "cafe", "v": 2}, + msg="findAndModify pipeline update with strength 1 should match accent-insensitively", + ), +] + +# Property [FindAndModify Pipeline Update Sort with Collation]: collation +# affects the sort order used to select which document to modify when using +# pipeline-style updates. +COLLATION_FAM_PIPELINE_SORT_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "fam_pipeline_sort_case_insensitive", + docs=[ + {"_id": 1, "x": "banana", "v": 1}, + {"_id": 2, "x": "Apple", "v": 1}, + {"_id": 3, "x": "cherry", "v": 1}, + ], + command=lambda ctx: { + "findAndModify": ctx.collection, + "query": {}, + "sort": {"x": 1}, + "update": [{"$set": {"v": 2}}], + "new": True, + "collation": {"locale": "en", "strength": 2}, + }, + expected={"_id": 2, "x": "Apple", "v": 2}, + msg="findAndModify pipeline update sort should use collation ordering", + ), +] + +# Property [FindAndModify Pipeline Update Collection Default]: when no collation +# is specified, the collection's default collation is used for filter matching +# with pipeline-style updates. +COLLATION_FAM_PIPELINE_COLLECTION_DEFAULT_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "fam_pipeline_collection_default_inherited", + target_collection=CustomCollection(options={"collation": {"locale": "en", "strength": 2}}), + docs=[ + {"_id": 1, "x": "apple", "v": 1}, + {"_id": 2, "x": "Apple", "v": 1}, + ], + command=lambda ctx: { + "findAndModify": ctx.collection, + "query": {"x": "APPLE"}, + "update": [{"$set": {"v": 2}}], + "new": True, + }, + expected={"_id": 1, "x": "apple", "v": 2}, + msg="findAndModify pipeline update should inherit collection default collation", + ), + CommandTestCase( + "fam_pipeline_collection_default_overridden", + target_collection=CustomCollection(options={"collation": {"locale": "en", "strength": 2}}), + docs=[ + {"_id": 1, "x": "apple", "v": 1}, + {"_id": 2, "x": "Apple", "v": 1}, + ], + command=lambda ctx: { + "findAndModify": ctx.collection, + "query": {"x": "APPLE"}, + "update": [{"$set": {"v": 2}}], + "new": True, + "collation": {"locale": "en", "strength": 3}, + }, + expected=None, + msg="findAndModify pipeline update with strength 3 should not match case variants", + ), +] + +COLLATION_FAM_PIPELINE_TESTS = ( + COLLATION_FAM_PIPELINE_FILTER_TESTS + + COLLATION_FAM_PIPELINE_SORT_TESTS + + COLLATION_FAM_PIPELINE_COLLECTION_DEFAULT_TESTS +) + + +@pytest.mark.parametrize("test", pytest_params(COLLATION_FAM_PIPELINE_TESTS)) +def test_collation_find_and_modify_pipeline(database_client, collection, test): + """Test collation with pipeline-style updates in findAndModify.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + if not isinstance(result, Exception): + result = {"cursor": {"firstBatch": [result.get("value")]}} + assertResult( + result, + expected=[test.build_expected(ctx)], + error_code=test.error_code, + msg=test.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_find_query_operators.py b/documentdb_tests/compatibility/tests/core/collation/test_collation_find_query_operators.py new file mode 100644 index 000000000..56cea6c09 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/collation/test_collation_find_query_operators.py @@ -0,0 +1,383 @@ +"""Representative query operator wiring tests for collation in the find command. + +One test per operator category confirms collation is correctly wired to the +query engine for find. Exhaustive collation behavior is tested in the +collation-specific parameter files (strength, locale, etc.). +""" + +from __future__ import annotations + +import pytest +from pymongo import IndexModel + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Find Query Operator Wiring]: the find command's collation parameter +# affects comparison, logical, array, and expression operators in the filter; +# $regex, $exists, $type, bitwise, and geospatial operators are NOT affected. +COLLATION_FIND_QUERY_OPERATOR_TESTS: list[CommandTestCase] = [ + # Comparison operators - affected. + CommandTestCase( + "find_eq_collation", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"x": {"$eq": "apple"}}, + "sort": {"_id": 1}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[{"_id": 1, "x": "apple"}, {"_id": 2, "x": "Apple"}], + msg="find $eq should use collation for case-insensitive matching", + ), + CommandTestCase( + "find_ne_collation", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"x": {"$ne": "apple"}}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[{"_id": 3, "x": "banana"}], + msg="find $ne should use collation", + ), + CommandTestCase( + "find_gt_collation", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"x": {"$gt": "apple"}}, + "sort": {"_id": 1}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[{"_id": 3, "x": "banana"}], + msg="find $gt should use collation", + ), + CommandTestCase( + "find_lte_collation", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"x": {"$lte": "apple"}}, + "sort": {"_id": 1}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[{"_id": 1, "x": "apple"}, {"_id": 2, "x": "Apple"}], + msg="find $lte should use collation", + ), + CommandTestCase( + "find_in_collation", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"x": {"$in": ["APPLE"]}}, + "sort": {"_id": 1}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[{"_id": 1, "x": "apple"}, {"_id": 2, "x": "Apple"}], + msg="find $in should use collation", + ), + CommandTestCase( + "find_nin_collation", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"x": {"$nin": ["APPLE"]}}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[{"_id": 3, "x": "banana"}], + msg="find $nin should use collation", + ), + # Logical operators - affected (they wrap affected operators). + CommandTestCase( + "find_and_collation", + docs=[ + {"_id": 1, "x": "apple", "y": "red"}, + {"_id": 2, "x": "Apple", "y": "Red"}, + {"_id": 3, "x": "banana", "y": "yellow"}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"$and": [{"x": "apple"}, {"y": "red"}]}, + "sort": {"_id": 1}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[ + {"_id": 1, "x": "apple", "y": "red"}, + {"_id": 2, "x": "Apple", "y": "Red"}, + ], + msg="find $and should use collation in sub-conditions", + ), + CommandTestCase( + "find_or_collation", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "banana"}, + {"_id": 3, "x": "Cherry"}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"$or": [{"x": "APPLE"}, {"x": "cherry"}]}, + "sort": {"_id": 1}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[{"_id": 1, "x": "apple"}, {"_id": 3, "x": "Cherry"}], + msg="find $or should use collation in sub-conditions", + ), + CommandTestCase( + "find_nor_collation", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "banana"}, + {"_id": 3, "x": "Cherry"}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"$nor": [{"x": "APPLE"}, {"x": "cherry"}]}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[{"_id": 2, "x": "banana"}], + msg="find $nor should use collation in sub-conditions", + ), + CommandTestCase( + "find_not_collation", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"x": {"$not": {"$eq": "apple"}}}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[{"_id": 3, "x": "banana"}], + msg="find $not should use collation in wrapped condition", + ), + # Array operators - affected. + CommandTestCase( + "find_all_collation", + docs=[ + {"_id": 1, "arr": ["Apple", "Banana"]}, + {"_id": 2, "arr": ["cherry", "date"]}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"arr": {"$all": ["apple", "banana"]}}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[{"_id": 1, "arr": ["Apple", "Banana"]}], + msg="find $all should use collation", + ), + CommandTestCase( + "find_elemMatch_collation", + docs=[ + {"_id": 1, "arr": [{"name": "Apple", "v": 1}]}, + {"_id": 2, "arr": [{"name": "banana", "v": 2}]}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"arr": {"$elemMatch": {"name": "apple"}}}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[{"_id": 1, "arr": [{"name": "Apple", "v": 1}]}], + msg="find $elemMatch should use collation", + ), + # $size - NOT affected (numeric comparison). + CommandTestCase( + "find_size_not_affected", + docs=[ + {"_id": 1, "arr": ["a", "b"]}, + {"_id": 2, "arr": ["a"]}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"arr": {"$size": 2}}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[{"_id": 1, "arr": ["a", "b"]}], + msg="find $size is not affected by collation (numeric)", + ), + # $exists - NOT affected. + CommandTestCase( + "find_exists_not_affected", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"x": {"$exists": True}}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[{"_id": 1, "x": "apple"}], + msg="find $exists is not affected by collation", + ), + # $type - NOT affected. + CommandTestCase( + "find_type_not_affected", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": 42}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"x": {"$type": "string"}}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[{"_id": 1, "x": "apple"}], + msg="find $type is not affected by collation", + ), + # $regex - NOT affected by collation (uses its own flags). + CommandTestCase( + "find_regex_not_affected", + docs=[ + {"_id": 1, "x": "Apple"}, + {"_id": 2, "x": "apple"}, + {"_id": 3, "x": "banana"}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"x": {"$regex": "^apple$"}}, + "sort": {"_id": 1}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[{"_id": 2, "x": "apple"}], + msg="find $regex is not affected by collation (only matches exact case)", + ), + # $mod - NOT affected (numeric). + CommandTestCase( + "find_mod_not_affected", + docs=[ + {"_id": 1, "x": 6}, + {"_id": 2, "x": 7}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"x": {"$mod": [3, 0]}}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[{"_id": 1, "x": 6}], + msg="find $mod is not affected by collation (numeric)", + ), + # Bitwise - NOT affected. + CommandTestCase( + "find_bitsAllSet_not_affected", + docs=[ + {"_id": 1, "flags": 7}, + {"_id": 2, "flags": 3}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"flags": {"$bitsAllSet": 5}}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[{"_id": 1, "flags": 7}], + msg="find $bitsAllSet is not affected by collation", + ), + # $expr - affected. + CommandTestCase( + "find_expr_collation", + docs=[ + {"_id": 1, "a": "apple", "b": "Apple"}, + {"_id": 2, "a": "apple", "b": "banana"}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"$expr": {"$eq": ["$a", "$b"]}}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[{"_id": 1, "a": "apple", "b": "Apple"}], + msg="find $expr should use collation for expression comparisons", + ), + # $where - NOT affected (JavaScript evaluation ignores collation). + CommandTestCase( + "find_where_not_affected", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"$where": "this.x == 'apple'"}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[{"_id": 1, "x": "apple"}], + msg="find $where is not affected by collation (JavaScript uses binary comparison)", + ), + # Geospatial - NOT affected. + CommandTestCase( + "find_geoWithin_not_affected", + docs=[ + {"_id": 1, "loc": [0, 0], "x": "Apple"}, + {"_id": 2, "loc": [50, 50], "x": "apple"}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"loc": {"$geoWithin": {"$center": [[0, 0], 10]}}}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[{"_id": 1, "loc": [0, 0], "x": "Apple"}], + msg="find $geoWithin is not affected by collation", + ), + # Text search - NOT affected by explicit collation. + CommandTestCase( + "find_text_not_affected", + indexes=[IndexModel([("x", "text")])], + docs=[ + {"_id": 1, "x": "hello world"}, + {"_id": 2, "x": "goodbye"}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"$text": {"$search": "hello"}}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[{"_id": 1, "x": "hello world"}], + msg="find $text uses its own locale, not command-level collation", + ), +] + + +@pytest.mark.parametrize("test", pytest_params(COLLATION_FIND_QUERY_OPERATOR_TESTS)) +def test_collation_find_query_operators(database_client, collection, test): + """Test collation wiring for each query operator category in find.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_find_text.py b/documentdb_tests/compatibility/tests/core/collation/test_collation_find_text.py new file mode 100644 index 000000000..edeb03750 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/collation/test_collation_find_text.py @@ -0,0 +1,89 @@ +"""Tests for $text in find with explicit command-level collation.""" + +from __future__ import annotations + +import pytest +from pymongo import IndexModel + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Find $text Ignores Explicit Collation]: when a find command +# specifies both a $text filter and an explicit collation, the collation is +# silently ignored for text matching; $text uses the text index's own +# language-based rules instead. +COLLATION_FIND_TEXT_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "text_search_ignores_explicit_collation", + docs=[ + {"_id": 1, "x": "cafe latte"}, + {"_id": 2, "x": "Cafe Mocha"}, + {"_id": 3, "x": "tea"}, + ], + indexes=[IndexModel([("x", "text")], name="x_text")], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"$text": {"$search": "cafe"}}, + "sort": {"_id": 1}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[ + {"_id": 1, "x": "cafe latte"}, + {"_id": 2, "x": "Cafe Mocha"}, + ], + msg="$text in find should ignore explicit collation and use text index rules", + ), + CommandTestCase( + "text_search_case_sensitive_ignores_collation", + docs=[ + {"_id": 1, "x": "cafe latte"}, + {"_id": 2, "x": "Cafe Mocha"}, + {"_id": 3, "x": "CAFE espresso"}, + ], + indexes=[IndexModel([("x", "text")], name="x_text")], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"$text": {"$search": "cafe", "$caseSensitive": True}}, + "sort": {"_id": 1}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[{"_id": 1, "x": "cafe latte"}], + msg="$text $caseSensitive should not be overridden by explicit collation", + ), + CommandTestCase( + "text_search_diacritic_sensitive_ignores_collation", + docs=[ + {"_id": 1, "x": "cafe latte"}, + {"_id": 2, "x": "caf\u00e9 mocha"}, + {"_id": 3, "x": "tea"}, + ], + indexes=[IndexModel([("x", "text")], name="x_text")], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"$text": {"$search": "cafe", "$diacriticSensitive": True}}, + "sort": {"_id": 1}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[{"_id": 1, "x": "cafe latte"}], + msg="$text $diacriticSensitive should not be overridden by explicit collation", + ), +] + + +@pytest.mark.parametrize("test", pytest_params(COLLATION_FIND_TEXT_TESTS)) +def test_collation_find_text(database_client, collection, test): + """Test that $text in find ignores explicit command-level collation.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_id_field.py b/documentdb_tests/compatibility/tests/core/collation/test_collation_id_field.py new file mode 100644 index 000000000..187002780 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/collation/test_collation_id_field.py @@ -0,0 +1,198 @@ +"""Tests for collation effects on _id field filtering.""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.target_collection import CustomCollection + +# Property [Find on _id with Collation]: collation affects equality and +# comparison operators on the _id field, enabling case-insensitive matching +# despite the unique _id index. +COLLATION_ID_FIND_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "find_id_eq_case_insensitive", + docs=[ + {"_id": "apple", "v": 1}, + {"_id": "Banana", "v": 2}, + {"_id": "cherry", "v": 3}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"_id": "APPLE"}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[{"_id": "apple", "v": 1}], + msg="find on _id with strength 2 should match case-insensitively", + ), + CommandTestCase( + "find_id_in_case_insensitive", + docs=[ + {"_id": "apple", "v": 1}, + {"_id": "Banana", "v": 2}, + {"_id": "cherry", "v": 3}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"_id": {"$in": ["APPLE", "BANANA"]}}, + "sort": {"_id": 1}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[ + {"_id": "apple", "v": 1}, + {"_id": "Banana", "v": 2}, + ], + msg="find $in on _id with collation should match case variants", + ), + CommandTestCase( + "find_id_gt_case_insensitive", + docs=[ + {"_id": "apple", "v": 1}, + {"_id": "Banana", "v": 2}, + {"_id": "cherry", "v": 3}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"_id": {"$gt": "banana"}}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[{"_id": "cherry", "v": 3}], + msg="find $gt on _id with collation should compare case-insensitively", + ), + CommandTestCase( + "find_id_no_collation_binary", + docs=[ + {"_id": "apple", "v": 1}, + {"_id": "Apple", "v": 2}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"_id": "apple"}, + }, + expected=[{"_id": "apple", "v": 1}], + msg="find on _id without collation should use binary comparison", + ), +] + +# Property [Collection Default Collation on _id]: a collection with a default +# collation uses it for _id field matching when no explicit collation is +# specified. +COLLATION_ID_COLLECTION_DEFAULT_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "collection_default_id_match", + target_collection=CustomCollection(options={"collation": {"locale": "en", "strength": 2}}), + docs=[ + {"_id": "apple", "v": 1}, + {"_id": "Banana", "v": 2}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"_id": "APPLE"}, + }, + expected=[{"_id": "apple", "v": 1}], + msg="collection default collation should apply to _id matching", + ), +] + +# Property [Update on _id with Collation]: collation affects the filter on _id +# in update commands. +COLLATION_ID_UPDATE_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "update_id_filter_case_insensitive", + docs=[ + {"_id": "apple", "v": 1}, + {"_id": "Banana", "v": 1}, + ], + command=lambda ctx: { + "update": ctx.collection, + "updates": [ + { + "q": {"_id": "APPLE"}, + "u": {"$set": {"v": 2}}, + "collation": {"locale": "en", "strength": 2}, + } + ], + }, + expected={"ok": 1.0, "n": 1, "nModified": 1}, + msg="update filter on _id should use collation", + ), +] + +# Property [Delete on _id with Collation]: collation affects the filter on _id +# in delete commands. +COLLATION_ID_DELETE_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "delete_id_filter_case_insensitive", + docs=[ + {"_id": "apple", "v": 1}, + {"_id": "Banana", "v": 2}, + ], + command=lambda ctx: { + "delete": ctx.collection, + "deletes": [ + { + "q": {"_id": "APPLE"}, + "limit": 1, + "collation": {"locale": "en", "strength": 2}, + } + ], + }, + expected={"ok": 1.0, "n": 1}, + msg="delete filter on _id should use collation", + ), +] + +# Property [Sort on _id with Collation]: collation affects sort ordering on the +# _id field. +COLLATION_ID_SORT_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "sort_id_case_insensitive", + docs=[ + {"_id": "cherry"}, + {"_id": "Apple"}, + {"_id": "banana"}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {}, + "sort": {"_id": 1}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[ + {"_id": "Apple"}, + {"_id": "banana"}, + {"_id": "cherry"}, + ], + msg="sort on _id should use collation for case-insensitive ordering", + ), +] + +COLLATION_ID_FIELD_TESTS: list[CommandTestCase] = ( + COLLATION_ID_FIND_TESTS + + COLLATION_ID_COLLECTION_DEFAULT_TESTS + + COLLATION_ID_UPDATE_TESTS + + COLLATION_ID_DELETE_TESTS + + COLLATION_ID_SORT_TESTS +) + + +@pytest.mark.parametrize("test", pytest_params(COLLATION_ID_FIELD_TESTS)) +def test_collation_id_field(database_client, collection, test): + """Test collation effects on _id field operations.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + raw_res=not isinstance(test.build_expected(ctx), list), + ) diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_index.py b/documentdb_tests/compatibility/tests/core/collation/test_collation_index.py new file mode 100644 index 000000000..f9e257e12 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/collation/test_collation_index.py @@ -0,0 +1,521 @@ +"""Tests for collation behavior with indexes.""" + +from __future__ import annotations + +import pytest +from pymongo import IndexModel + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.error_codes import ( + BAD_VALUE_ERROR, + DUPLICATE_KEY_ERROR, + MISSING_FIELD_ERROR, +) +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.property_checks import Eq +from documentdb_tests.framework.target_collection import CustomCollection + +# Property [Index Creation with Collation]: indexes can be created with a +# collation specification that determines how string values are compared +# within the index. +COLLATION_INDEX_CREATION_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "create_index_with_collation", + docs=[], + command=lambda ctx: { + "createIndexes": ctx.collection, + "indexes": [ + { + "key": {"x": 1}, + "name": "x_collated", + "collation": {"locale": "en", "strength": 2}, + } + ], + }, + expected={"ok": Eq(1.0)}, + msg="should create an index with collation specification", + ), + CommandTestCase( + "create_index_with_numeric_ordering", + docs=[], + command=lambda ctx: { + "createIndexes": ctx.collection, + "indexes": [ + { + "key": {"x": 1}, + "name": "x_numeric", + "collation": {"locale": "en", "numericOrdering": True}, + } + ], + }, + expected={"ok": Eq(1.0)}, + msg="should create an index with numericOrdering collation", + ), + CommandTestCase( + "create_index_invalid_collation", + docs=[], + command=lambda ctx: { + "createIndexes": ctx.collection, + "indexes": [ + { + "key": {"x": 1}, + "name": "x_bad", + "collation": {"locale": "invalid_locale_xyz"}, + } + ], + }, + error_code=BAD_VALUE_ERROR, + msg="creating index with invalid locale should produce an error", + ), + CommandTestCase( + "create_index_collation_missing_locale", + docs=[], + command=lambda ctx: { + "createIndexes": ctx.collection, + "indexes": [ + { + "key": {"x": 1}, + "name": "x_no_locale", + "collation": {"strength": 2}, + } + ], + }, + error_code=MISSING_FIELD_ERROR, + msg="creating index with collation missing locale should produce an error", + ), +] + +# Property [Unique Index Enforcement Under Collation]: a unique index with +# collation enforces uniqueness based on collation comparison, so values that +# are equal under the collation cannot coexist. +COLLATION_UNIQUE_INDEX_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "unique_index_rejects_case_variant", + indexes=[ + IndexModel( + [("x", 1)], + unique=True, + collation={"locale": "en", "strength": 2}, + name="x_unique_ci", + ) + ], + docs=[{"_id": 1, "x": "apple"}], + command=lambda ctx: { + "insert": ctx.collection, + "documents": [{"_id": 2, "x": "Apple"}], + }, + error_code=DUPLICATE_KEY_ERROR, + msg="unique index with strength 2 should reject case-variant duplicates", + ), + CommandTestCase( + "unique_index_allows_different_values", + indexes=[ + IndexModel( + [("x", 1)], + unique=True, + collation={"locale": "en", "strength": 2}, + name="x_unique_ci", + ) + ], + docs=[{"_id": 1, "x": "apple"}], + command=lambda ctx: { + "insert": ctx.collection, + "documents": [{"_id": 2, "x": "banana"}], + }, + expected={"ok": 1.0, "n": 1}, + msg="unique index with collation should allow distinct values", + ), + CommandTestCase( + "unique_index_strength3_allows_case_variants", + indexes=[ + IndexModel( + [("x", 1)], + unique=True, + collation={"locale": "en", "strength": 3}, + name="x_unique_cs", + ) + ], + docs=[{"_id": 1, "x": "apple"}], + command=lambda ctx: { + "insert": ctx.collection, + "documents": [{"_id": 2, "x": "Apple"}], + }, + expected={"ok": 1.0, "n": 1}, + msg="unique index with strength 3 should allow case-different values", + ), + CommandTestCase( + "unique_index_accent_insensitive_rejects", + indexes=[ + IndexModel( + [("x", 1)], + unique=True, + collation={"locale": "en", "strength": 1}, + name="x_unique_ai", + ) + ], + docs=[{"_id": 1, "x": "cafe"}], + command=lambda ctx: { + "insert": ctx.collection, + "documents": [{"_id": 2, "x": "caf\u00e9"}], + }, + error_code=DUPLICATE_KEY_ERROR, + msg="unique index with strength 1 should reject accent-variant duplicates", + ), +] + +# Property [Compound Index with Collation]: collation applies to all string +# fields in a compound index. +COLLATION_COMPOUND_INDEX_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "compound_unique_rejects_combined_variant", + indexes=[ + IndexModel( + [("x", 1), ("y", 1)], + unique=True, + collation={"locale": "en", "strength": 2}, + name="xy_unique_ci", + ) + ], + docs=[{"_id": 1, "x": "apple", "y": "red"}], + command=lambda ctx: { + "insert": ctx.collection, + "documents": [{"_id": 2, "x": "Apple", "y": "RED"}], + }, + error_code=DUPLICATE_KEY_ERROR, + msg="compound unique index with collation should reject case-variant combination", + ), + CommandTestCase( + "compound_unique_allows_different_second_field", + indexes=[ + IndexModel( + [("x", 1), ("y", 1)], + unique=True, + collation={"locale": "en", "strength": 2}, + name="xy_unique_ci", + ) + ], + docs=[{"_id": 1, "x": "apple", "y": "red"}], + command=lambda ctx: { + "insert": ctx.collection, + "documents": [{"_id": 2, "x": "Apple", "y": "blue"}], + }, + expected={"ok": 1.0, "n": 1}, + msg="compound unique index should allow when second field differs under collation", + ), +] + +# Property [Collection Default Collation Inherited by Indexes]: indexes created +# on a collection with a default collation inherit that collation unless they +# specify their own. +COLLATION_INDEX_INHERITANCE_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "index_inherits_collection_collation", + target_collection=CustomCollection(options={"collation": {"locale": "en", "strength": 2}}), + docs=[{"_id": 1, "x": "apple"}], + indexes=[ + IndexModel([("x", 1)], unique=True, name="x_inherited"), + ], + command=lambda ctx: { + "insert": ctx.collection, + "documents": [{"_id": 2, "x": "Apple"}], + }, + error_code=DUPLICATE_KEY_ERROR, + msg="index on collated collection should inherit collection collation for uniqueness", + ), + CommandTestCase( + "index_overrides_collection_collation", + target_collection=CustomCollection(options={"collation": {"locale": "en", "strength": 2}}), + docs=[{"_id": 1, "x": "apple"}], + indexes=[ + IndexModel( + [("x", 1)], + unique=True, + collation={"locale": "simple"}, + name="x_simple", + ), + ], + command=lambda ctx: { + "insert": ctx.collection, + "documents": [{"_id": 2, "x": "Apple"}], + }, + expected={"ok": 1.0, "n": 1}, + msg="index with explicit simple collation should override collection default", + ), +] + +COLLATION_INDEX_TESTS = ( + COLLATION_INDEX_CREATION_TESTS + + COLLATION_UNIQUE_INDEX_TESTS + + COLLATION_COMPOUND_INDEX_TESTS + + COLLATION_INDEX_INHERITANCE_TESTS +) + +# Property [Index Selection Based on Collation Match]: a query only uses a +# collated index if the query's collation matches the index's collation; +# otherwise the index is not selected and a collection scan is used. +COLLATION_INDEX_SELECTION_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "matching_collation_uses_index", + indexes=[ + IndexModel( + [("x", 1)], + collation={"locale": "en", "strength": 2}, + name="x_ci", + ) + ], + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"x": "apple"}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + ], + msg="find with matching collation should return correct results using index", + ), + CommandTestCase( + "mismatched_collation_still_correct", + indexes=[ + IndexModel( + [("x", 1)], + collation={"locale": "en", "strength": 2}, + name="x_ci", + ) + ], + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"x": "apple"}, + "collation": {"locale": "en", "strength": 3}, + }, + expected=[{"_id": 1, "x": "apple"}], + msg="find with different collation should still return correct results via scan", + ), + CommandTestCase( + "no_collation_binary_comparison", + indexes=[ + IndexModel( + [("x", 1)], + collation={"locale": "en", "strength": 2}, + name="x_ci", + ) + ], + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"x": "apple"}, + }, + expected=[{"_id": 1, "x": "apple"}], + msg="find without collation should use binary comparison not the index collation", + ), + CommandTestCase( + "sort_with_matching_collation", + indexes=[ + IndexModel( + [("x", 1)], + collation={"locale": "en", "strength": 2}, + name="x_ci", + ) + ], + docs=[ + {"_id": 1, "x": "banana"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "cherry"}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {}, + "sort": {"x": 1}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[ + {"_id": 2, "x": "Apple"}, + {"_id": 1, "x": "banana"}, + {"_id": 3, "x": "cherry"}, + ], + msg="find sort with matching index collation should return correct order", + ), +] + +# Property [Sparse Index with Collation]: a sparse unique index with collation +# allows multiple documents with missing indexed field while enforcing +# collation-aware uniqueness on present values. +COLLATION_SPARSE_INDEX_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "sparse_unique_allows_multiple_missing", + indexes=[ + IndexModel( + [("x", 1)], + unique=True, + sparse=True, + collation={"locale": "en", "strength": 2}, + name="x_sparse_unique_ci", + ) + ], + docs=[{"_id": 1, "y": "hello"}, {"_id": 2, "y": "world"}, {"_id": 3, "x": "apple"}], + command=lambda ctx: { + "insert": ctx.collection, + "documents": [{"_id": 4, "y": "another"}], + }, + expected={"ok": 1.0, "n": 1}, + msg="sparse unique index with collation should allow multiple docs with missing field", + ), + CommandTestCase( + "sparse_unique_rejects_collation_equal", + indexes=[ + IndexModel( + [("x", 1)], + unique=True, + sparse=True, + collation={"locale": "en", "strength": 2}, + name="x_sparse_unique_ci", + ) + ], + docs=[{"_id": 1, "y": "no_x"}, {"_id": 2, "x": "apple"}], + command=lambda ctx: { + "insert": ctx.collection, + "documents": [{"_id": 3, "x": "Apple"}], + }, + error_code=DUPLICATE_KEY_ERROR, + msg="sparse unique index with collation should reject case-variant duplicates", + ), +] + +# Property [Wildcard Index with Collation]: a wildcard index can be created +# with a collation specification, and queries with matching collation produce +# correct collation-aware results. +COLLATION_WILDCARD_INDEX_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "wildcard_index_creation_with_collation", + docs=[], + command=lambda ctx: { + "createIndexes": ctx.collection, + "indexes": [ + { + "key": {"$**": 1}, + "name": "wildcard_collated", + "collation": {"locale": "en", "strength": 2}, + } + ], + }, + expected={"ok": Eq(1.0)}, + msg="should create a wildcard index with collation specification", + ), + CommandTestCase( + "wildcard_index_query_with_matching_collation", + indexes=[ + IndexModel( + [("$**", 1)], + collation={"locale": "en", "strength": 2}, + name="wildcard_ci", + ) + ], + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"x": "apple"}, + "sort": {"_id": 1}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + ], + msg="wildcard index with matching collation should return case-insensitive results", + ), +] + +# Property [Hashed Index with Collation]: a hashed index can be created with a +# collation specification, and queries with matching collation produce correct +# collation-aware results. +COLLATION_HASHED_INDEX_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "hashed_index_creation_with_collation", + docs=[], + command=lambda ctx: { + "createIndexes": ctx.collection, + "indexes": [ + { + "key": {"x": "hashed"}, + "name": "x_hashed_collated", + "collation": {"locale": "en", "strength": 2}, + } + ], + }, + expected={"ok": Eq(1.0)}, + msg="should create a hashed index with collation specification", + ), + CommandTestCase( + "hashed_index_query_with_matching_collation", + indexes=[ + IndexModel( + [("x", "hashed")], + collation={"locale": "en", "strength": 2}, + name="x_hashed_ci", + ) + ], + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"x": "apple"}, + "sort": {"_id": 1}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + ], + msg="hashed index with matching collation should return case-insensitive results", + ), +] + +COLLATION_INDEX_ALL_TESTS = ( + COLLATION_INDEX_TESTS + + COLLATION_INDEX_SELECTION_TESTS + + COLLATION_SPARSE_INDEX_TESTS + + COLLATION_WILDCARD_INDEX_TESTS + + COLLATION_HASHED_INDEX_TESTS +) + + +@pytest.mark.parametrize("test", pytest_params(COLLATION_INDEX_ALL_TESTS)) +def test_collation_index(database_client, collection, test): + """Test collation behavior with indexes.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + expected = test.build_expected(ctx) + assertResult( + result, + expected=expected, + error_code=test.error_code, + msg=test.msg, + raw_res=not isinstance(expected, list), + ) diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_index_multikey.py b/documentdb_tests/compatibility/tests/core/collation/test_collation_index_multikey.py new file mode 100644 index 000000000..eed5f7c08 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/collation/test_collation_index_multikey.py @@ -0,0 +1,204 @@ +"""Tests for collation with multikey indexes and index build on existing duplicates.""" + +from __future__ import annotations + +import pytest +from pymongo import IndexModel + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.error_codes import DUPLICATE_KEY_ERROR +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.property_checks import Eq + +# Property [Multikey Index with Collation]: a collated index on an array field +# correctly handles multiple keys per document, using collation for both +# uniqueness enforcement and query matching across array elements. +COLLATION_MULTIKEY_INDEX_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "multikey_unique_rejects_case_variant_across_docs", + indexes=[ + IndexModel( + [("tags", 1)], + unique=True, + collation={"locale": "en", "strength": 2}, + name="tags_unique_ci", + ) + ], + docs=[{"_id": 1, "tags": ["apple", "banana"]}], + command=lambda ctx: { + "insert": ctx.collection, + "documents": [{"_id": 2, "tags": ["Apple"]}], + }, + error_code=DUPLICATE_KEY_ERROR, + msg="multikey unique index with collation should reject case-variant element", + ), + CommandTestCase( + "multikey_unique_allows_different_elements", + indexes=[ + IndexModel( + [("tags", 1)], + unique=True, + collation={"locale": "en", "strength": 2}, + name="tags_unique_ci", + ) + ], + docs=[{"_id": 1, "tags": ["apple", "banana"]}], + command=lambda ctx: { + "insert": ctx.collection, + "documents": [{"_id": 2, "tags": ["cherry"]}], + }, + expected={"ok": 1.0, "n": 1}, + msg="multikey unique index with collation should allow distinct elements", + ), + CommandTestCase( + "multikey_query_uses_collation", + indexes=[ + IndexModel( + [("tags", 1)], + collation={"locale": "en", "strength": 2}, + name="tags_ci", + ) + ], + docs=[ + {"_id": 1, "tags": ["Apple", "banana"]}, + {"_id": 2, "tags": ["cherry"]}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"tags": "apple"}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[{"_id": 1, "tags": ["Apple", "banana"]}], + msg="find on multikey index with matching collation should match case-insensitively", + ), + CommandTestCase( + "multikey_unique_rejects_within_same_doc", + docs=[{"_id": 1, "tags": ["other"]}], + command=lambda ctx: { + "createIndexes": ctx.collection, + "indexes": [ + { + "key": {"tags": 1}, + "name": "tags_unique_ci", + "unique": True, + "collation": {"locale": "en", "strength": 2}, + } + ], + }, + # First create the index, then try to insert a doc with collation-equal + # elements within the same array. + expected={"ok": Eq(1.0)}, + msg="creating multikey unique index should succeed on clean data", + ), +] + +# Property [Unique Index Build on Pre-Existing Duplicates]: creating a unique +# index with collation on a collection that already contains +# collation-equivalent values produces DUPLICATE_KEY_ERROR during index build. +COLLATION_INDEX_BUILD_DUPLICATE_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "build_unique_index_rejects_existing_case_duplicates", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + ], + command=lambda ctx: { + "createIndexes": ctx.collection, + "indexes": [ + { + "key": {"x": 1}, + "name": "x_unique_ci", + "unique": True, + "collation": {"locale": "en", "strength": 2}, + } + ], + }, + error_code=DUPLICATE_KEY_ERROR, + msg="building unique index should fail when existing data has collation-equivalent values", + ), + CommandTestCase( + "build_unique_index_rejects_existing_accent_duplicates", + docs=[ + {"_id": 1, "x": "cafe"}, + {"_id": 2, "x": "caf\u00e9"}, + ], + command=lambda ctx: { + "createIndexes": ctx.collection, + "indexes": [ + { + "key": {"x": 1}, + "name": "x_unique_ai", + "unique": True, + "collation": {"locale": "en", "strength": 1}, + } + ], + }, + error_code=DUPLICATE_KEY_ERROR, + msg="building unique index should fail when existing data has accent-equivalent values", + ), + CommandTestCase( + "build_unique_index_succeeds_on_distinct_data", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "banana"}, + ], + command=lambda ctx: { + "createIndexes": ctx.collection, + "indexes": [ + { + "key": {"x": 1}, + "name": "x_unique_ci", + "unique": True, + "collation": {"locale": "en", "strength": 2}, + } + ], + }, + expected={"ok": Eq(1.0)}, + msg="building unique index should succeed when no collation-equivalent duplicates exist", + ), + CommandTestCase( + "build_unique_multikey_rejects_cross_doc_duplicates", + docs=[ + {"_id": 1, "tags": ["apple", "banana"]}, + {"_id": 2, "tags": ["Apple", "cherry"]}, + ], + command=lambda ctx: { + "createIndexes": ctx.collection, + "indexes": [ + { + "key": {"tags": 1}, + "name": "tags_unique_ci", + "unique": True, + "collation": {"locale": "en", "strength": 2}, + } + ], + }, + error_code=DUPLICATE_KEY_ERROR, + msg="building unique multikey index should fail on cross-doc collation-equivalent elements", + ), +] + +COLLATION_INDEX_MULTIKEY_TESTS = ( + COLLATION_MULTIKEY_INDEX_TESTS + COLLATION_INDEX_BUILD_DUPLICATE_TESTS +) + + +@pytest.mark.parametrize("test", pytest_params(COLLATION_INDEX_MULTIKEY_TESTS)) +def test_collation_index_multikey(database_client, collection, test): + """Test collation with multikey indexes and index build on duplicates.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + expected = test.build_expected(ctx) + assertResult( + result, + expected=expected, + error_code=test.error_code, + msg=test.msg, + raw_res=not isinstance(expected, list), + ) diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_index_partial.py b/documentdb_tests/compatibility/tests/core/collation/test_collation_index_partial.py new file mode 100644 index 000000000..fdcdfb085 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/collation/test_collation_index_partial.py @@ -0,0 +1,105 @@ +"""Tests for collation with partial filter expression indexes.""" + +from __future__ import annotations + +import pytest +from pymongo import IndexModel + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.error_codes import DUPLICATE_KEY_ERROR +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Partial Index Filter Uses Index Collation]: the +# partialFilterExpression of an index uses the index's collation for its +# comparisons, so documents matching under collation are indexed while others +# are not. +COLLATION_PARTIAL_INDEX_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "partial_filter_case_insensitive_indexes_variant", + indexes=[ + IndexModel( + [("x", 1)], + unique=True, + collation={"locale": "en", "strength": 2}, + partialFilterExpression={"status": "active"}, + name="x_partial_ci", + ) + ], + docs=[ + {"_id": 1, "x": "apple", "status": "Active"}, + {"_id": 2, "x": "banana", "status": "inactive"}, + ], + command=lambda ctx: { + "insert": ctx.collection, + "documents": [{"_id": 3, "x": "apple", "status": "ACTIVE"}], + }, + error_code=DUPLICATE_KEY_ERROR, + msg="partial index with collation should match filter case-insensitively", + ), + CommandTestCase( + "partial_filter_excludes_non_matching", + indexes=[ + IndexModel( + [("x", 1)], + unique=True, + collation={"locale": "en", "strength": 2}, + partialFilterExpression={"status": "active"}, + name="x_partial_ci", + ) + ], + docs=[ + {"_id": 1, "x": "apple", "status": "active"}, + {"_id": 2, "x": "apple", "status": "inactive"}, + ], + command=lambda ctx: { + "insert": ctx.collection, + "documents": [{"_id": 3, "x": "apple", "status": "archived"}], + }, + expected={"ok": 1.0, "n": 1}, + msg="partial index should allow duplicate x when status does not match filter", + ), + CommandTestCase( + "partial_filter_query_with_matching_collation", + indexes=[ + IndexModel( + [("x", 1)], + collation={"locale": "en", "strength": 2}, + partialFilterExpression={"status": "active"}, + name="x_partial_ci", + ) + ], + docs=[ + {"_id": 1, "x": "apple", "status": "Active"}, + {"_id": 2, "x": "banana", "status": "Active"}, + {"_id": 3, "x": "apple", "status": "inactive"}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"x": "APPLE", "status": "active"}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[{"_id": 1, "x": "apple", "status": "Active"}], + msg="find with matching collation should use partial index for case-insensitive match", + ), +] + + +@pytest.mark.parametrize("test", pytest_params(COLLATION_PARTIAL_INDEX_TESTS)) +def test_collation_index_partial(database_client, collection, test): + """Test collation with partial filter expression indexes.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + expected = test.build_expected(ctx) + assertResult( + result, + expected=expected, + error_code=test.error_code, + msg=test.msg, + raw_res=not isinstance(expected, list), + ) diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_let_variables.py b/documentdb_tests/compatibility/tests/core/collation/test_collation_let_variables.py new file mode 100644 index 000000000..d773f7b44 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/collation/test_collation_let_variables.py @@ -0,0 +1,166 @@ +"""Tests for collation interaction with command-level let variables.""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Aggregate Let Variables with Collation]: command-level let variables +# used in $expr comparisons respect the command collation. +COLLATION_AGGREGATE_LET_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "aggregate_let_eq_case_insensitive", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$match": {"$expr": {"$eq": ["$x", "$$target"]}}}, + {"$sort": {"_id": 1}}, + ], + "cursor": {}, + "let": {"target": "apple"}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + ], + msg="aggregate let variable comparison should use command collation", + ), + CommandTestCase( + "aggregate_let_no_collation_binary", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$match": {"$expr": {"$eq": ["$x", "$$target"]}}}, + ], + "cursor": {}, + "let": {"target": "apple"}, + }, + expected=[{"_id": 1, "x": "apple"}], + msg="aggregate let variable comparison without collation should use binary matching", + ), +] + +# Property [Find Let Variables with Collation]: command-level let variables +# used in $expr comparisons in find respect the command collation. +COLLATION_FIND_LET_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "find_let_eq_case_insensitive", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"$expr": {"$eq": ["$x", "$$target"]}}, + "sort": {"_id": 1}, + "let": {"target": "apple"}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + ], + msg="find let variable comparison should use command collation", + ), + CommandTestCase( + "find_let_no_collation_binary", + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"$expr": {"$eq": ["$x", "$$target"]}}, + "let": {"target": "apple"}, + }, + expected=[{"_id": 1, "x": "apple"}], + msg="find let variable comparison without collation should use binary matching", + ), +] + +# Property [Update Let Variables with Collation]: command-level let variables +# used in $expr filter comparisons in update respect the per-statement collation. +COLLATION_UPDATE_LET_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "update_let_eq_case_insensitive", + docs=[ + {"_id": 1, "x": "apple", "v": 0}, + {"_id": 2, "x": "Apple", "v": 0}, + {"_id": 3, "x": "banana", "v": 0}, + ], + command=lambda ctx: { + "update": ctx.collection, + "updates": [ + { + "q": {"$expr": {"$eq": ["$x", "$$target"]}}, + "u": {"$set": {"v": 1}}, + "multi": True, + "collation": {"locale": "en", "strength": 2}, + } + ], + "let": {"target": "apple"}, + }, + expected={"ok": 1.0, "n": 2, "nModified": 2}, + msg="update let variable comparison should use per-statement collation", + ), + CommandTestCase( + "update_let_no_collation_binary", + docs=[ + {"_id": 1, "x": "apple", "v": 0}, + {"_id": 2, "x": "Apple", "v": 0}, + {"_id": 3, "x": "banana", "v": 0}, + ], + command=lambda ctx: { + "update": ctx.collection, + "updates": [ + { + "q": {"$expr": {"$eq": ["$x", "$$target"]}}, + "u": {"$set": {"v": 1}}, + "multi": True, + } + ], + "let": {"target": "apple"}, + }, + expected={"ok": 1.0, "n": 1, "nModified": 1}, + msg="update let variable comparison without collation should use binary matching", + ), +] + +COLLATION_LET_TESTS: list[CommandTestCase] = ( + COLLATION_AGGREGATE_LET_TESTS + COLLATION_FIND_LET_TESTS + COLLATION_UPDATE_LET_TESTS +) + + +@pytest.mark.parametrize("test", pytest_params(COLLATION_LET_TESTS)) +def test_collation_let_variables(database_client, collection, test): + """Test collation interaction with command-level let variables.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + raw_res=not isinstance(test.build_expected(ctx), list), + ) diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_locale.py b/documentdb_tests/compatibility/tests/core/collation/test_collation_locale.py new file mode 100644 index 000000000..a0a691712 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/collation/test_collation_locale.py @@ -0,0 +1,69 @@ +"""Tests for locale-specific collation semantics.""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Locale Semantic Behavior]: the simple locale produces binary sort +# order, and locale variants using @collation= syntax are accepted and produce +# locale-specific behavior. +COLLATION_LOCALE_SEMANTIC_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "locale_simple_binary_comparison", + docs=[{"_id": 1, "x": "b"}, {"_id": 2, "x": "A"}, {"_id": 3, "x": "a"}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$sort": {"x": 1}}], + "cursor": {}, + "collation": {"locale": "simple"}, + }, + expected=[{"_id": 2, "x": "A"}, {"_id": 3, "x": "a"}, {"_id": 1, "x": "b"}], + msg="aggregate with simple locale should produce binary sort order", + ), + CommandTestCase( + "locale_variant_de_phonebook", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [], + "cursor": {}, + "collation": {"locale": "de@collation=phonebook"}, + }, + expected=[{"_id": 1, "x": "a"}], + msg="aggregate should accept de@collation=phonebook locale variant", + ), + CommandTestCase( + "locale_variant_zh_unihan", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [], + "cursor": {}, + "collation": {"locale": "zh@collation=unihan"}, + }, + expected=[{"_id": 1, "x": "a"}], + msg="aggregate should accept zh@collation=unihan locale variant", + ), +] + + +@pytest.mark.parametrize("test", pytest_params(COLLATION_LOCALE_SEMANTIC_TESTS)) +def test_collation_locale(database_client, collection, test): + """Test locale-specific collation semantics.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_locale_defaults.py b/documentdb_tests/compatibility/tests/core/collation/test_collation_locale_defaults.py new file mode 100644 index 000000000..03aa13e03 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/collation/test_collation_locale_defaults.py @@ -0,0 +1,254 @@ +"""Tests for locale-specific default behavior and simple locale overrides.""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.error_codes import BAD_VALUE_ERROR +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Simple Locale Overrides All Options]: locale "simple" causes all +# other collation options to be accepted but have no observable effect because +# binary comparison is always used. +COLLATION_SIMPLE_LOCALE_OVERRIDES_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "simple_casefirst_upper_strength1_accepted", + docs=[{"_id": 1, "x": "a"}, {"_id": 2, "x": "A"}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$sort": {"x": 1}}], + "cursor": {}, + "collation": { + "locale": "simple", + "strength": 1, + "caseFirst": "upper", + }, + }, + # Binary sort: A (65) < a (97). + expected=[{"_id": 2, "x": "A"}, {"_id": 1, "x": "a"}], + msg="simple locale should accept caseFirst upper with strength 1 without error", + ), + CommandTestCase( + "simple_backwards_true_strength1_accepted", + docs=[{"_id": 1, "x": "a"}, {"_id": 2, "x": "A"}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$sort": {"x": 1}}], + "cursor": {}, + "collation": { + "locale": "simple", + "strength": 1, + "backwards": True, + }, + }, + # Binary sort: A (65) < a (97). + expected=[{"_id": 2, "x": "A"}, {"_id": 1, "x": "a"}], + msg="simple locale should accept backwards true with strength 1 without error", + ), + CommandTestCase( + "simple_numeric_ordering_no_effect", + docs=[{"_id": 1, "x": "10"}, {"_id": 2, "x": "2"}, {"_id": 3, "x": "1"}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$sort": {"x": 1}}], + "cursor": {}, + "collation": {"locale": "simple", "numericOrdering": True}, + }, + # Binary/lexicographic sort: "1" < "10" < "2". + expected=[ + {"_id": 3, "x": "1"}, + {"_id": 1, "x": "10"}, + {"_id": 2, "x": "2"}, + ], + msg="simple locale should ignore numericOrdering and use binary comparison", + ), + CommandTestCase( + "simple_strength1_binary_comparison", + docs=[{"_id": 1, "x": "a"}, {"_id": 2, "x": "A"}, {"_id": 3, "x": "B"}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$sort": {"x": 1}}], + "cursor": {}, + "collation": {"locale": "simple", "strength": 1}, + }, + # Binary sort: A (65) < B (66) < a (97). + expected=[ + {"_id": 2, "x": "A"}, + {"_id": 3, "x": "B"}, + {"_id": 1, "x": "a"}, + ], + msg="simple locale with strength 1 should still use binary comparison", + ), +] + +# Property [Locale-Specific Default Differences]: certain locales have +# non-standard default behaviors that differ from the general collation rules. +COLLATION_LOCALE_SPECIFIC_DEFAULTS_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "locale_th_shifted_default_match", + docs=[ + {"_id": 1, "x": "a b"}, + {"_id": 2, "x": "ab"}, + {"_id": 3, "x": "a-b"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": "ab"}}], + "cursor": {}, + "collation": {"locale": "th", "strength": 1}, + }, + expected=[ + {"_id": 1, "x": "a b"}, + {"_id": 2, "x": "ab"}, + {"_id": 3, "x": "a-b"}, + ], + msg="th locale should default to alternate:shifted making punctuation/whitespace ignorable", + ), + CommandTestCase( + "locale_zh_secondary_weight_ordering", + docs=[ + {"_id": 1, "x": "a"}, + {"_id": 2, "x": "\u00e0"}, + {"_id": 3, "x": "\u00e1"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$sort": {"x": 1}}], + "cursor": {}, + "collation": {"locale": "zh", "strength": 2}, + }, + expected=[ + {"_id": 3, "x": "\u00e1"}, + {"_id": 2, "x": "\u00e0"}, + {"_id": 1, "x": "a"}, + ], + msg="zh locale should sort accented Latin characters before unaccented at secondary level", + ), + CommandTestCase( + "locale_en_us_posix_strength1_not_case_insensitive", + docs=[ + {"_id": 1, "x": "abc"}, + {"_id": 2, "x": "ABC"}, + {"_id": 3, "x": "Abc"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": "abc"}}], + "cursor": {}, + "collation": {"locale": "en_US_POSIX", "strength": 1}, + }, + expected=[{"_id": 1, "x": "abc"}], + msg="en_US_POSIX strength 1 should NOT enable case-insensitive matching", + ), + CommandTestCase( + "locale_en_us_posix_enforces_casefirst_constraint", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [], + "cursor": {}, + "collation": {"locale": "en_US_POSIX", "caseFirst": "upper", "strength": 1}, + }, + error_code=BAD_VALUE_ERROR, + msg="en_US_POSIX should enforce caseFirst constraint unlike simple locale", + ), + CommandTestCase( + "locale_de_eszett_equals_ss_strength1", + docs=[ + {"_id": 1, "x": "stra\u00dfe"}, + {"_id": 2, "x": "strasse"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": "strasse"}}], + "cursor": {}, + "collation": {"locale": "de", "strength": 1}, + }, + expected=[ + {"_id": 1, "x": "stra\u00dfe"}, + {"_id": 2, "x": "strasse"}, + ], + msg="de locale strength 1 should treat \u00df as equivalent to ss", + ), + CommandTestCase( + "locale_de_eszett_distinct_strength2", + docs=[ + {"_id": 1, "x": "stra\u00dfe"}, + {"_id": 2, "x": "strasse"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": "strasse"}}], + "cursor": {}, + "collation": {"locale": "de", "strength": 2}, + }, + expected=[{"_id": 2, "x": "strasse"}], + msg="de locale strength 2 should distinguish \u00df from ss", + ), + CommandTestCase( + "locale_tr_i_maps_to_dotted_I", + docs=[ + {"_id": 1, "x": "i"}, + {"_id": 2, "x": "I"}, + {"_id": 3, "x": "\u0130"}, + {"_id": 4, "x": "\u0131"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": "i"}}], + "cursor": {}, + # Turkish: i <-> \u0130 (dotted I). + "collation": {"locale": "tr", "strength": 1}, + }, + expected=[ + {"_id": 1, "x": "i"}, + {"_id": 3, "x": "\u0130"}, + ], + msg="tr locale should fold i to \u0130 (not I) at strength 1", + ), + CommandTestCase( + "locale_tr_I_maps_to_dotless_i", + docs=[ + {"_id": 1, "x": "i"}, + {"_id": 2, "x": "I"}, + {"_id": 3, "x": "\u0130"}, + {"_id": 4, "x": "\u0131"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": "I"}}], + "cursor": {}, + # Turkish: I <-> \u0131 (dotless i). + "collation": {"locale": "tr", "strength": 1}, + }, + expected=[ + {"_id": 2, "x": "I"}, + {"_id": 4, "x": "\u0131"}, + ], + msg="tr locale should fold I to \u0131 (not i) at strength 1", + ), +] + +COLLATION_LOCALE_DEFAULTS_TESTS: list[CommandTestCase] = ( + COLLATION_SIMPLE_LOCALE_OVERRIDES_TESTS + COLLATION_LOCALE_SPECIFIC_DEFAULTS_TESTS +) + + +@pytest.mark.parametrize("test", pytest_params(COLLATION_LOCALE_DEFAULTS_TESTS)) +def test_collation_locale_defaults(database_client, collection, test): + """Test locale-specific default behavior and simple locale overrides.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_locale_specific.py b/documentdb_tests/compatibility/tests/core/collation/test_collation_locale_specific.py new file mode 100644 index 000000000..0e977533a --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/collation/test_collation_locale_specific.py @@ -0,0 +1,223 @@ +"""Tests for locale-specific collation behaviors not covered by other locale tests.""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Swedish Sort Order]: the Swedish locale sorts å, ä, ö after z, +# unlike English which sorts them near their base characters. +COLLATION_SWEDISH_SORT_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "sv_a_ring_after_z", + docs=[ + {"_id": 1, "x": "z"}, + {"_id": 2, "x": "\u00e5"}, + {"_id": 3, "x": "\u00e4"}, + {"_id": 4, "x": "a"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$sort": {"x": 1}}], + "cursor": {}, + "collation": {"locale": "sv", "strength": 3}, + }, + expected=[ + {"_id": 4, "x": "a"}, + {"_id": 1, "x": "z"}, + {"_id": 2, "x": "\u00e5"}, + {"_id": 3, "x": "\u00e4"}, + ], + msg="sv locale should sort \u00e5 and \u00e4 after z", + ), + CommandTestCase( + "en_a_ring_before_z", + docs=[ + {"_id": 1, "x": "z"}, + {"_id": 2, "x": "\u00e5"}, + {"_id": 3, "x": "\u00e4"}, + {"_id": 4, "x": "a"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$sort": {"x": 1}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 3}, + }, + expected=[ + {"_id": 4, "x": "a"}, + {"_id": 2, "x": "\u00e5"}, + {"_id": 3, "x": "\u00e4"}, + {"_id": 1, "x": "z"}, + ], + msg="en locale should sort \u00e5 and \u00e4 before z", + ), + CommandTestCase( + "sv_match_a_ring_distinct_from_a", + docs=[ + {"_id": 1, "x": "a"}, + {"_id": 2, "x": "\u00e5"}, + {"_id": 3, "x": "\u00e4"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": "a"}}], + "cursor": {}, + "collation": {"locale": "sv", "strength": 1}, + }, + expected=[{"_id": 1, "x": "a"}], + msg="sv locale strength 1 should distinguish a from \u00e5 and \u00e4", + ), +] + +# Property [Japanese Hiragana/Katakana Equivalence]: Hiragana and Katakana +# forms of the same character are treated as equivalent at strengths 1-3; +# they are distinguished at strength 4 (quaternary). +COLLATION_JAPANESE_KANA_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "ja_strength1_hiragana_equals_katakana_match", + docs=[ + # U+3042 hiragana a, U+30A2 katakana a. + {"_id": 1, "x": "\u3042"}, + {"_id": 2, "x": "\u30a2"}, + {"_id": 3, "x": "\u3044"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": "\u3042"}}, {"$sort": {"_id": 1}}], + "cursor": {}, + "collation": {"locale": "ja", "strength": 1}, + }, + expected=[ + {"_id": 1, "x": "\u3042"}, + {"_id": 2, "x": "\u30a2"}, + ], + msg="ja locale strength 1 should treat hiragana and katakana as equivalent", + ), + CommandTestCase( + "ja_strength3_hiragana_equals_katakana", + docs=[ + {"_id": 1, "x": "\u3042"}, + {"_id": 2, "x": "\u30a2"}, + {"_id": 3, "x": "\u3044"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": "\u3042"}}, {"$sort": {"_id": 1}}], + "cursor": {}, + "collation": {"locale": "ja", "strength": 3}, + }, + expected=[ + {"_id": 1, "x": "\u3042"}, + {"_id": 2, "x": "\u30a2"}, + ], + msg="ja locale strength 3 should still treat hiragana and katakana as equivalent", + ), + CommandTestCase( + "ja_strength4_hiragana_distinct_from_katakana", + docs=[ + {"_id": 1, "x": "\u3042"}, + {"_id": 2, "x": "\u30a2"}, + {"_id": 3, "x": "\u3044"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": "\u3042"}}], + "cursor": {}, + "collation": {"locale": "ja", "strength": 4}, + }, + expected=[{"_id": 1, "x": "\u3042"}], + msg="ja locale strength 4 should distinguish hiragana from katakana", + ), + CommandTestCase( + "ja_strength1_multichar_kana_match", + docs=[ + # Hiragana "ka" + "na" vs Katakana "ka" + "na". + {"_id": 1, "x": "\u304b\u306a"}, + {"_id": 2, "x": "\u30ab\u30ca"}, + {"_id": 3, "x": "\u305f"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": "\u304b\u306a"}}, {"$sort": {"_id": 1}}], + "cursor": {}, + "collation": {"locale": "ja", "strength": 1}, + }, + expected=[ + {"_id": 1, "x": "\u304b\u306a"}, + {"_id": 2, "x": "\u30ab\u30ca"}, + ], + msg="ja locale strength 1 should treat multi-character hiragana/katakana as equivalent", + ), +] + +# Property [Korean Jamo vs Precomposed Syllables]: Korean precomposed syllables +# and their Jamo decompositions are treated as equivalent under normalization +# or at appropriate strength levels. +COLLATION_KOREAN_JAMO_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "ko_precomposed_equals_jamo_sort", + docs=[ + # U+AC00 = precomposed "ga", U+1100 U+1161 = Jamo "g" + "a". + {"_id": 1, "x": "\uac00"}, + {"_id": 2, "x": "\u1100\u1161"}, + {"_id": 3, "x": "\uac01"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": "\uac00"}}, {"$sort": {"_id": 1}}], + "cursor": {}, + "collation": {"locale": "ko", "strength": 1}, + }, + expected=[ + {"_id": 1, "x": "\uac00"}, + {"_id": 2, "x": "\u1100\u1161"}, + ], + msg="ko locale should treat precomposed syllable and Jamo decomposition as equivalent", + ), + CommandTestCase( + "ko_precomposed_sort_order", + docs=[ + {"_id": 1, "x": "\ub098"}, + {"_id": 2, "x": "\uac00"}, + {"_id": 3, "x": "\ub2e4"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$sort": {"x": 1}}], + "cursor": {}, + "collation": {"locale": "ko", "strength": 1}, + }, + expected=[ + {"_id": 2, "x": "\uac00"}, + {"_id": 1, "x": "\ub098"}, + {"_id": 3, "x": "\ub2e4"}, + ], + msg="ko locale should sort precomposed syllables in Korean alphabetical order", + ), +] + +COLLATION_LOCALE_SPECIFIC_TESTS: list[CommandTestCase] = ( + COLLATION_SWEDISH_SORT_TESTS + COLLATION_JAPANESE_KANA_TESTS + COLLATION_KOREAN_JAMO_TESTS +) + + +@pytest.mark.parametrize("test", pytest_params(COLLATION_LOCALE_SPECIFIC_TESTS)) +def test_collation_locale_specific(database_client, collection, test): + """Test locale-specific collation behaviors.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_long_strings_hint.py b/documentdb_tests/compatibility/tests/core/collation/test_collation_long_strings_hint.py new file mode 100644 index 000000000..3466cb372 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/collation/test_collation_long_strings_hint.py @@ -0,0 +1,182 @@ +"""Tests for collation with long strings and hint parameter interaction.""" + +from __future__ import annotations + +import pytest +from pymongo import IndexModel + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Long String Collation]: collation correctly compares strings of +# 10,000+ characters, including cases where the distinguishing difference +# appears near the end of the string. +COLLATION_LONG_STRING_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "long_string_match_exact", + docs=[ + {"_id": 1, "x": "a" * 10_000}, + {"_id": 2, "x": "a" * 9_999 + "b"}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"x": "a" * 10_000}, + "collation": {"locale": "en", "strength": 3}, + }, + expected=[{"_id": 1, "x": "a" * 10_000}], + msg="collation should correctly match long strings without truncation", + ), + CommandTestCase( + "long_string_case_difference_at_end", + docs=[ + {"_id": 1, "x": "a" * 9_999 + "b"}, + {"_id": 2, "x": "a" * 9_999 + "B"}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"x": "a" * 9_999 + "b"}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[ + {"_id": 1, "x": "a" * 9_999 + "b"}, + {"_id": 2, "x": "a" * 9_999 + "B"}, + ], + msg="collation should detect case difference at end of long string", + ), + CommandTestCase( + "long_string_sort_difference_at_end", + docs=[ + {"_id": 1, "x": "a" * 9_999 + "c"}, + {"_id": 2, "x": "a" * 9_999 + "b"}, + {"_id": 3, "x": "a" * 9_999 + "a"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$sort": {"x": 1}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 3}, + }, + expected=[ + {"_id": 3, "x": "a" * 9_999 + "a"}, + {"_id": 2, "x": "a" * 9_999 + "b"}, + {"_id": 1, "x": "a" * 9_999 + "c"}, + ], + msg="collation should correctly sort strings differing only at position 10000", + ), + CommandTestCase( + "long_string_group_dedup", + docs=[ + {"_id": 1, "x": "a" * 10_000 + "b", "v": 1}, + {"_id": 2, "x": "a" * 10_000 + "B", "v": 2}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$sort": {"_id": 1}}, + {"$group": {"_id": "$x", "total": {"$sum": "$v"}}}, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[{"_id": "a" * 10_000 + "b", "total": 3}], + msg="collation should correctly deduplicate long strings differing only in case at end", + ), +] + +# Property [Hint with Mismatched Collation]: when hint forces usage of a +# collated index but the query specifies a different collation, the server +# still returns correct results according to the query's collation. +COLLATION_HINT_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "hint_collated_index_different_query_collation", + indexes=[ + IndexModel( + [("x", 1)], + collation={"locale": "en", "strength": 2}, + name="x_ci", + ) + ], + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"x": "apple"}, + "hint": "x_ci", + "collation": {"locale": "en", "strength": 3}, + }, + expected=[{"_id": 1, "x": "apple"}], + msg="hint with mismatched collation should return correct results per query collation", + ), + CommandTestCase( + "hint_collated_index_matching_collation", + indexes=[ + IndexModel( + [("x", 1)], + collation={"locale": "en", "strength": 2}, + name="x_ci", + ) + ], + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"x": "apple"}, + "hint": "x_ci", + "collation": {"locale": "en", "strength": 2}, + }, + expected=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + ], + msg="hint with matching collation should return case-insensitive results", + ), + CommandTestCase( + "hint_collated_index_no_query_collation", + indexes=[ + IndexModel( + [("x", 1)], + collation={"locale": "en", "strength": 2}, + name="x_ci", + ) + ], + docs=[ + {"_id": 1, "x": "apple"}, + {"_id": 2, "x": "Apple"}, + {"_id": 3, "x": "banana"}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"x": "apple"}, + "hint": "x_ci", + }, + expected=[{"_id": 1, "x": "apple"}], + msg="hint without query collation should use binary comparison not index collation", + ), +] + +COLLATION_LONG_STRING_AND_HINT_TESTS = COLLATION_LONG_STRING_TESTS + COLLATION_HINT_TESTS + + +@pytest.mark.parametrize("test", pytest_params(COLLATION_LONG_STRING_AND_HINT_TESTS)) +def test_collation_long_strings_hint(database_client, collection, test): + """Test collation with long strings and hint parameter.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_metadata.py b/documentdb_tests/compatibility/tests/core/collation/test_collation_metadata.py new file mode 100644 index 000000000..053c78eeb --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/collation/test_collation_metadata.py @@ -0,0 +1,130 @@ +"""Tests for collation metadata reported by listCollections and listIndexes.""" + +from __future__ import annotations + +import pytest +from pymongo import IndexModel + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.property_checks import Eq +from documentdb_tests.framework.target_collection import CustomCollection + +# Property [ListCollections Reports Full Collation]: listCollections reports the +# complete collation document including all specified options, not just locale. +COLLATION_LIST_COLLECTIONS_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "list_collections_full_collation", + target_collection=CustomCollection( + options={"collation": {"locale": "en", "strength": 2, "caseLevel": True}} + ), + docs=[], + command=lambda ctx: {"listCollections": 1, "filter": {"name": ctx.collection}}, + expected={ + "cursor.firstBatch.0": { + "options.collation.locale": Eq("en"), + "options.collation.strength": Eq(2), + "options.collation.caseLevel": Eq(True), + }, + }, + msg="listCollections should report full collation document with all options", + ), + CommandTestCase( + "list_collections_numeric_ordering", + target_collection=CustomCollection( + options={"collation": {"locale": "en", "numericOrdering": True}} + ), + docs=[], + command=lambda ctx: {"listCollections": 1, "filter": {"name": ctx.collection}}, + expected={ + "cursor.firstBatch.0": { + "options.collation.locale": Eq("en"), + "options.collation.numericOrdering": Eq(True), + }, + }, + msg="listCollections should report numericOrdering in collation", + ), + CommandTestCase( + "list_collections_casefirst", + target_collection=CustomCollection( + options={"collation": {"locale": "en", "caseFirst": "upper"}} + ), + docs=[], + command=lambda ctx: {"listCollections": 1, "filter": {"name": ctx.collection}}, + expected={ + "cursor.firstBatch.0": { + "options.collation.locale": Eq("en"), + "options.collation.caseFirst": Eq("upper"), + }, + }, + msg="listCollections should report caseFirst in collation", + ), +] + +# Property [ListIndexes Reports Index Collation]: listIndexes reports the +# collation document for indexes created with a specific collation, including +# all specified collation options. +COLLATION_LIST_INDEXES_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "list_indexes_reports_collation_locale_and_strength", + docs=[{"_id": 1, "x": "a"}], + indexes=[ + IndexModel( + [("x", 1)], + name="x_collated", + collation={"locale": "fr", "strength": 1}, + ) + ], + command=lambda ctx: {"listIndexes": ctx.collection}, + expected={ + "cursor.firstBatch.1": { + "name": Eq("x_collated"), + "collation.locale": Eq("fr"), + "collation.strength": Eq(1), + }, + }, + msg="listIndexes should report collation locale and strength on index", + ), + CommandTestCase( + "list_indexes_reports_numeric_ordering", + docs=[{"_id": 1, "x": "a"}], + indexes=[ + IndexModel( + [("x", 1)], + name="x_numeric", + collation={"locale": "en", "numericOrdering": True}, + ) + ], + command=lambda ctx: {"listIndexes": ctx.collection}, + expected={ + "cursor.firstBatch.1": { + "name": Eq("x_numeric"), + "collation.locale": Eq("en"), + "collation.numericOrdering": Eq(True), + }, + }, + msg="listIndexes should report numericOrdering in index collation", + ), +] + +COLLATION_METADATA_TESTS = COLLATION_LIST_COLLECTIONS_TESTS + COLLATION_LIST_INDEXES_TESTS + + +@pytest.mark.parametrize("test", pytest_params(COLLATION_METADATA_TESTS)) +def test_collation_metadata(database_client, collection, test): + """Test that collation metadata is correctly reported.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + raw_res=True, + ) diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_non_string.py b/documentdb_tests/compatibility/tests/core/collation/test_collation_non_string.py new file mode 100644 index 000000000..b8dfb1f31 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/collation/test_collation_non_string.py @@ -0,0 +1,151 @@ +"""Tests for non-string values being unaffected by collation.""" + +from __future__ import annotations + +from datetime import datetime, timezone + +import pytest +from bson import Decimal128, Int64, ObjectId + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Non-String Values Unaffected]: non-string types use BSON type +# ordering regardless of collation settings, and collation only affects +# string-to-string comparisons. +COLLATION_NON_STRING_UNAFFECTED_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "nonstring_sort_bson_type_order", + docs=[ + {"_id": 1, "x": 42}, + {"_id": 2, "x": True}, + {"_id": 3, "x": None}, + {"_id": 4, "x": "hello"}, + {"_id": 5, "x": ObjectId("000000000000000000000001")}, + {"_id": 6, "x": datetime(2021, 1, 1, tzinfo=timezone.utc)}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$sort": {"x": 1}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[ + {"_id": 3, "x": None}, + {"_id": 1, "x": 42}, + {"_id": 4, "x": "hello"}, + {"_id": 5, "x": ObjectId("000000000000000000000001")}, + {"_id": 2, "x": True}, + {"_id": 6, "x": datetime(2021, 1, 1, tzinfo=timezone.utc)}, + ], + msg="non-string types should sort in BSON type order regardless of collation", + ), + CommandTestCase( + "nonstring_sort_numeric_values", + docs=[ + {"_id": 1, "x": 10}, + {"_id": 2, "x": 2}, + {"_id": 3, "x": Int64(100)}, + {"_id": 4, "x": 1.5}, + {"_id": 5, "x": Decimal128("3")}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$sort": {"x": 1}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[ + {"_id": 4, "x": 1.5}, + {"_id": 2, "x": 2}, + {"_id": 5, "x": Decimal128("3")}, + {"_id": 1, "x": 10}, + {"_id": 3, "x": Int64(100)}, + ], + msg="numeric values should sort by value regardless of collation", + ), + CommandTestCase( + "nonstring_null_missing_before_empty_string", + docs=[ + {"_id": 1, "x": ""}, + {"_id": 2, "x": None}, + {"_id": 3}, + {"_id": 4, "x": "a"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$sort": {"x": 1}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[ + {"_id": 2, "x": None}, + {"_id": 3}, + {"_id": 1, "x": ""}, + {"_id": 4, "x": "a"}, + ], + msg="null and missing should sort before empty string in collation-aware sort", + ), + CommandTestCase( + "nonstring_match_null_matches_null_and_missing", + docs=[ + {"_id": 1, "x": "hello"}, + {"_id": 2, "x": None}, + {"_id": 3}, + {"_id": 4, "x": 42}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": None}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[ + {"_id": 2, "x": None}, + {"_id": 3}, + ], + msg="$match with null should match both null and missing regardless of collation", + ), + CommandTestCase( + "nonstring_group_null_and_missing_together", + docs=[ + {"_id": 1, "x": None, "v": 10}, + {"_id": 2, "v": 20}, + {"_id": 3, "x": "a", "v": 30}, + {"_id": 4, "x": "A", "v": 40}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$group": {"_id": "$x", "total": {"$sum": "$v"}}}, + {"$sort": {"_id": 1}}, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[ + {"_id": None, "total": 30}, + {"_id": "a", "total": 70}, + ], + msg="$group should group null and missing values together regardless of collation", + ), +] + + +@pytest.mark.parametrize("test", pytest_params(COLLATION_NON_STRING_UNAFFECTED_TESTS)) +def test_collation_non_string(database_client, collection, test): + """Test that non-string values are unaffected by collation.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_normalization.py b/documentdb_tests/compatibility/tests/core/collation/test_collation_normalization.py new file mode 100644 index 000000000..1943fa6e3 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/collation/test_collation_normalization.py @@ -0,0 +1,171 @@ +"""Tests for normalization behavior and null acceptance in collation.""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Normalization Behavior]: normalization:true checks and performs +# normalization for comparison; ICU handles canonical equivalence (NFC/NFD) +# regardless of the flag value; compatibility decomposition (ligatures, +# superscripts) is handled by strength levels not normalization; simple locale +# disables normalization entirely; normalization does NOT modify stored or +# output values. +COLLATION_NORMALIZATION_BEHAVIOR_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "normalization_true_accepted", + docs=[{"_id": 1, "x": "hello"}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [], + "cursor": {}, + "collation": {"locale": "en", "normalization": True}, + }, + expected=[{"_id": 1, "x": "hello"}], + msg="aggregate should accept normalization:true", + ), + CommandTestCase( + "normalization_false_accepted", + docs=[{"_id": 1, "x": "hello"}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [], + "cursor": {}, + "collation": {"locale": "en", "normalization": False}, + }, + expected=[{"_id": 1, "x": "hello"}], + msg="aggregate should accept normalization:false", + ), + CommandTestCase( + "normalization_canonical_equivalence_regardless_of_flag", + docs=[ + {"_id": 1, "x": "\u00e9"}, # Precomposed e-acute. + {"_id": 2, "x": "e\u0301"}, # Decomposed e + combining acute. + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": "\u00e9"}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 3, "normalization": False}, + }, + expected=[ + {"_id": 1, "x": "\u00e9"}, + {"_id": 2, "x": "e\u0301"}, + ], + msg="ICU should handle canonical equivalence even with normalization:false", + ), + CommandTestCase( + "normalization_compatibility_decomposition_by_strength_not_flag", + docs=[ + {"_id": 1, "x": "\ufb01"}, # fi ligature (compatibility decomposition). + {"_id": 2, "x": "fi"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": "fi"}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 3, "normalization": True}, + }, + expected=[{"_id": 2, "x": "fi"}], + msg="compatibility decomposition not affected by normalization at strength 3", + ), + CommandTestCase( + "normalization_compatibility_decomposition_at_strength_1", + docs=[ + {"_id": 1, "x": "\ufb01"}, # fi ligature. + {"_id": 2, "x": "fi"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": "fi"}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 1, "normalization": False}, + }, + expected=[ + {"_id": 1, "x": "\ufb01"}, + {"_id": 2, "x": "fi"}, + ], + msg="compatibility decomposition handled by strength 1 regardless of normalization", + ), + CommandTestCase( + "normalization_simple_locale_disables", + docs=[ + {"_id": 1, "x": "\u00e9"}, # Precomposed e-acute. + {"_id": 2, "x": "e\u0301"}, # Decomposed e + combining acute. + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": "\u00e9"}}], + "cursor": {}, + "collation": {"locale": "simple", "normalization": True}, + }, + expected=[{"_id": 1, "x": "\u00e9"}], + msg="simple locale should disable normalization entirely (binary comparison)", + ), + CommandTestCase( + "normalization_group_key_preserves_first_encountered", + docs=[ + {"_id": 1, "x": "e\u0301", "v": 1}, # Decomposed e-acute first. + {"_id": 2, "x": "\u00e9", "v": 2}, # Precomposed e-acute second. + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$sort": {"_id": 1}}, + {"$group": {"_id": "$x", "vals": {"$push": "$v"}}}, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 3, "normalization": True}, + }, + expected=[{"_id": "e\u0301", "vals": [1, 2]}], + msg="normalization should not modify stored values; group key preserves first-encountered", + ), +] + +# Property [Normalization Null Acceptance]: null for normalization is treated as +# omitted (uses default false). +COLLATION_NORMALIZATION_NULL_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "normalization_null_uses_default_false", + docs=[ + {"_id": 1, "x": "\u00e9"}, + {"_id": 2, "x": "e\u0301"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": "\u00e9"}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 3, "normalization": None}, + }, + expected=[ + {"_id": 1, "x": "\u00e9"}, + {"_id": 2, "x": "e\u0301"}, + ], + msg="aggregate should treat null normalization as omitted (default false)", + ), +] + +COLLATION_NORMALIZATION_TESTS: list[CommandTestCase] = ( + COLLATION_NORMALIZATION_BEHAVIOR_TESTS + COLLATION_NORMALIZATION_NULL_TESTS +) + + +@pytest.mark.parametrize("test", pytest_params(COLLATION_NORMALIZATION_TESTS)) +def test_collation_normalization(database_client, collection, test): + """Test Unicode normalization behavior under collation.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_numeric_ordering.py b/documentdb_tests/compatibility/tests/core/collation/test_collation_numeric_ordering.py new file mode 100644 index 000000000..bcc03b215 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/collation/test_collation_numeric_ordering.py @@ -0,0 +1,427 @@ +"""Tests for numericOrdering behavior and null acceptance in collation.""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [numericOrdering Behavior]: numericOrdering:true compares contiguous +# digit substrings as numbers rather than lexicographically. +COLLATION_NUMERIC_ORDERING_BEHAVIOR_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "numeric_ordering_true_sort", + docs=[ + {"_id": 1, "x": "10"}, + {"_id": 2, "x": "2"}, + {"_id": 3, "x": "1"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$sort": {"x": 1}}], + "cursor": {}, + "collation": {"locale": "en", "numericOrdering": True}, + }, + expected=[ + {"_id": 3, "x": "1"}, + {"_id": 2, "x": "2"}, + {"_id": 1, "x": "10"}, + ], + msg="numericOrdering:true should sort digit substrings numerically", + ), + CommandTestCase( + "numeric_ordering_false_sort", + docs=[ + {"_id": 1, "x": "10"}, + {"_id": 2, "x": "2"}, + {"_id": 3, "x": "1"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$sort": {"x": 1}}], + "cursor": {}, + "collation": {"locale": "en", "numericOrdering": False}, + }, + expected=[ + {"_id": 3, "x": "1"}, + {"_id": 1, "x": "10"}, + {"_id": 2, "x": "2"}, + ], + msg="numericOrdering:false should sort digit substrings lexicographically", + ), + CommandTestCase( + "numeric_ordering_separator_dot", + docs=[ + {"_id": 1, "x": "1.10"}, + {"_id": 2, "x": "1.2"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$sort": {"x": 1}}], + "cursor": {}, + "collation": {"locale": "en", "numericOrdering": True}, + }, + expected=[ + {"_id": 2, "x": "1.2"}, + {"_id": 1, "x": "1.10"}, + ], + msg="dot should split digit groups so 1.2 < 1.10 numerically", + ), + CommandTestCase( + "numeric_ordering_separator_hyphen", + docs=[ + {"_id": 1, "x": "1-10"}, + {"_id": 2, "x": "1-2"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$sort": {"x": 1}}], + "cursor": {}, + "collation": {"locale": "en", "numericOrdering": True}, + }, + expected=[ + {"_id": 2, "x": "1-2"}, + {"_id": 1, "x": "1-10"}, + ], + msg="hyphen should split digit groups so 1-2 < 1-10 numerically", + ), + CommandTestCase( + "numeric_ordering_separator_comma", + docs=[ + {"_id": 1, "x": "1,10"}, + {"_id": 2, "x": "1,2"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$sort": {"x": 1}}], + "cursor": {}, + "collation": {"locale": "en", "numericOrdering": True}, + }, + expected=[ + {"_id": 2, "x": "1,2"}, + {"_id": 1, "x": "1,10"}, + ], + msg="comma should split digit groups so 1,2 < 1,10 numerically", + ), + CommandTestCase( + "numeric_ordering_separator_space", + docs=[ + {"_id": 1, "x": "1 10"}, + {"_id": 2, "x": "1 2"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$sort": {"x": 1}}], + "cursor": {}, + "collation": {"locale": "en", "numericOrdering": True}, + }, + expected=[ + {"_id": 2, "x": "1 2"}, + {"_id": 1, "x": "1 10"}, + ], + msg="space should split digit groups so 1 2 < 1 10 numerically", + ), + CommandTestCase( + "numeric_ordering_separator_e", + docs=[ + {"_id": 1, "x": "1e10"}, + {"_id": 2, "x": "1e2"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$sort": {"x": 1}}], + "cursor": {}, + "collation": {"locale": "en", "numericOrdering": True}, + }, + expected=[ + {"_id": 2, "x": "1e2"}, + {"_id": 1, "x": "1e10"}, + ], + msg="letter e should split digit groups and not be treated as exponent", + ), + CommandTestCase( + "numeric_ordering_leading_zeros_equal_strength4", + docs=[ + {"_id": 1, "x": "007"}, + {"_id": 2, "x": "7"}, + {"_id": 3, "x": "07"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": "7"}}], + "cursor": {}, + "collation": {"locale": "en", "numericOrdering": True, "strength": 4}, + }, + expected=[ + {"_id": 1, "x": "007"}, + {"_id": 2, "x": "7"}, + {"_id": 3, "x": "07"}, + ], + msg="leading zeros should be numerically equal at strength 4", + ), + CommandTestCase( + "numeric_ordering_leading_zeros_distinct_strength5", + docs=[ + {"_id": 1, "x": "007"}, + {"_id": 2, "x": "7"}, + {"_id": 3, "x": "07"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": "7"}}], + "cursor": {}, + "collation": {"locale": "en", "numericOrdering": True, "strength": 5}, + }, + expected=[{"_id": 2, "x": "7"}], + msg="leading zeros should be distinguished at strength 5", + ), + CommandTestCase( + "numeric_ordering_leading_zeros_sort_strength5", + docs=[ + {"_id": 1, "x": "7"}, + {"_id": 2, "x": "007"}, + {"_id": 3, "x": "07"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$sort": {"x": 1}}], + "cursor": {}, + "collation": {"locale": "en", "numericOrdering": True, "strength": 5}, + }, + expected=[ + {"_id": 2, "x": "007"}, + {"_id": 3, "x": "07"}, + {"_id": 1, "x": "7"}, + ], + msg="at strength 5 more leading zeros should sort earlier", + ), + CommandTestCase( + "numeric_ordering_unicode_arabic_indic", + docs=[ + {"_id": 1, "x": "\u0661\u0660"}, + {"_id": 2, "x": "\u0662"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$sort": {"x": 1}}], + "cursor": {}, + "collation": {"locale": "en", "numericOrdering": True}, + }, + expected=[ + {"_id": 2, "x": "\u0662"}, + {"_id": 1, "x": "\u0661\u0660"}, + ], + msg="Arabic-Indic Nd digits should be compared numerically", + ), + CommandTestCase( + "numeric_ordering_unicode_fullwidth", + docs=[ + {"_id": 1, "x": "\uff11\uff10"}, + {"_id": 2, "x": "\uff12"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$sort": {"x": 1}}], + "cursor": {}, + "collation": {"locale": "en", "numericOrdering": True}, + }, + expected=[ + {"_id": 2, "x": "\uff12"}, + {"_id": 1, "x": "\uff11\uff10"}, + ], + msg="Fullwidth Nd digits should be compared numerically", + ), + CommandTestCase( + "numeric_ordering_unicode_devanagari", + docs=[ + {"_id": 1, "x": "\u0967\u0966"}, + {"_id": 2, "x": "\u0968"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$sort": {"x": 1}}], + "cursor": {}, + "collation": {"locale": "en", "numericOrdering": True}, + }, + expected=[ + {"_id": 2, "x": "\u0968"}, + {"_id": 1, "x": "\u0967\u0966"}, + ], + msg="Devanagari Nd digits should be compared numerically", + ), + CommandTestCase( + "numeric_ordering_unicode_thai", + docs=[ + {"_id": 1, "x": "\u0e51\u0e50"}, + {"_id": 2, "x": "\u0e52"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$sort": {"x": 1}}], + "cursor": {}, + "collation": {"locale": "en", "numericOrdering": True}, + }, + expected=[ + {"_id": 2, "x": "\u0e52"}, + {"_id": 1, "x": "\u0e51\u0e50"}, + ], + msg="Thai Nd digits should be compared numerically", + ), + CommandTestCase( + "numeric_ordering_mixed_script_merged", + docs=[ + {"_id": 1, "x": "1\u0662"}, + {"_id": 2, "x": "5"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$sort": {"x": 1}}], + "cursor": {}, + "collation": {"locale": "en", "numericOrdering": True}, + }, + expected=[ + {"_id": 2, "x": "5"}, + {"_id": 1, "x": "1\u0662"}, + ], + msg="mixed-script adjacent Nd digits are merged into one numeric group", + ), + CommandTestCase( + "numeric_ordering_254_boundary_split", + docs=[ + {"_id": 1, "x": "9" * 254}, + {"_id": 2, "x": "1" * 255}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$sort": {"x": 1}}], + "cursor": {}, + "collation": {"locale": "en", "numericOrdering": True}, + }, + expected=[ + {"_id": 2, "x": "1" * 255}, + {"_id": 1, "x": "9" * 254}, + ], + msg="255-digit number should sort before 254-digit number due to 254-char split", + ), + CommandTestCase( + "numeric_ordering_254_no_split", + docs=[ + {"_id": 1, "x": "9" * 253}, + {"_id": 2, "x": "1" * 254}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$sort": {"x": 1}}], + "cursor": {}, + "collation": {"locale": "en", "numericOrdering": True}, + }, + expected=[ + {"_id": 1, "x": "9" * 253}, + {"_id": 2, "x": "1" * 254}, + ], + msg="254-digit number should sort correctly without split (more digits = larger)", + ), + CommandTestCase( + "numeric_ordering_non_nd_superscript", + docs=[ + {"_id": 1, "x": "a\u00b3"}, + {"_id": 2, "x": "a\u00b9\u00b2"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$sort": {"x": 1}}], + "cursor": {}, + "collation": {"locale": "en", "numericOrdering": True}, + }, + expected=[ + {"_id": 2, "x": "a\u00b9\u00b2"}, + {"_id": 1, "x": "a\u00b3"}, + ], + msg="superscript digits should not be treated as numeric digits", + ), + CommandTestCase( + "numeric_ordering_non_nd_circled", + docs=[ + {"_id": 1, "x": "a\u2462"}, + {"_id": 2, "x": "a\u2460\u2461"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$sort": {"x": 1}}], + "cursor": {}, + "collation": {"locale": "en", "numericOrdering": True}, + }, + expected=[ + {"_id": 2, "x": "a\u2460\u2461"}, + {"_id": 1, "x": "a\u2462"}, + ], + msg="circled digits should not be treated as numeric digits", + ), + CommandTestCase( + "numeric_ordering_non_nd_roman", + docs=[ + {"_id": 1, "x": "a\u2161"}, + {"_id": 2, "x": "a3"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$sort": {"x": 1}}], + "cursor": {}, + "collation": {"locale": "en", "numericOrdering": True}, + }, + expected=[ + {"_id": 2, "x": "a3"}, + {"_id": 1, "x": "a\u2161"}, + ], + msg="Roman numeral characters should not be treated as numeric digits", + ), +] + +# Property [numericOrdering Null Acceptance]: null for numericOrdering is +# treated as omitted, using the default value of false. +COLLATION_NUMERIC_ORDERING_NULL_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "numeric_ordering_null_uses_default_false", + docs=[ + {"_id": 1, "x": "10"}, + {"_id": 2, "x": "2"}, + {"_id": 3, "x": "1"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$sort": {"x": 1}}], + "cursor": {}, + "collation": {"locale": "en", "numericOrdering": None}, + }, + expected=[ + {"_id": 3, "x": "1"}, + {"_id": 1, "x": "10"}, + {"_id": 2, "x": "2"}, + ], + msg="aggregate should treat null numericOrdering as omitted (default false, lexicographic)", + ), +] + +COLLATION_NUMERIC_ORDERING_TESTS: list[CommandTestCase] = ( + COLLATION_NUMERIC_ORDERING_BEHAVIOR_TESTS + COLLATION_NUMERIC_ORDERING_NULL_TESTS +) + + +@pytest.mark.parametrize("test", pytest_params(COLLATION_NUMERIC_ORDERING_TESTS)) +def test_collation_numeric_ordering(database_client, collection, test): + """Test numericOrdering collation option behavior.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_projection.py b/documentdb_tests/compatibility/tests/core/collation/test_collation_projection.py new file mode 100644 index 000000000..3f79322dd --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/collation/test_collation_projection.py @@ -0,0 +1,113 @@ +"""Tests for collation effects on positional $ and $elemMatch projection.""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Positional $ Projection with Collation]: the positional $ operator +# projects the first array element that matched the query filter under the +# active collation, so case-insensitive matching projects the correct element. +COLLATION_POSITIONAL_PROJECTION_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "positional_projects_collation_matched_element", + docs=[{"_id": 1, "items": ["Banana", "Apple", "cherry"]}], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"items": "apple"}, + "projection": {"items.$": 1}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[{"_id": 1, "items": ["Apple"]}], + msg="positional $ should project element matched under collation", + ), + CommandTestCase( + "positional_no_collation_binary", + docs=[{"_id": 1, "items": ["Apple", "apple", "banana"]}], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"items": "apple"}, + "projection": {"items.$": 1}, + }, + expected=[{"_id": 1, "items": ["apple"]}], + msg="positional $ without collation should project binary-matched element", + ), + CommandTestCase( + "positional_accent_insensitive", + docs=[{"_id": 1, "items": ["tea", "caf\u00e9", "juice"]}], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"items": "cafe"}, + "projection": {"items.$": 1}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[{"_id": 1, "items": ["caf\u00e9"]}], + msg="positional $ with strength 1 should project accent-variant match", + ), +] + +# Property [$elemMatch Projection with Collation]: the $elemMatch projection +# operator uses the active collation to determine which array element to +# project, enabling case-insensitive element selection. +COLLATION_ELEMMATCH_PROJECTION_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "elemmatch_proj_case_insensitive", + docs=[{"_id": 1, "items": [{"v": "Banana"}, {"v": "Apple"}, {"v": "cherry"}]}], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"_id": 1}, + "projection": {"items": {"$elemMatch": {"v": "apple"}}}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[{"_id": 1, "items": [{"v": "Apple"}]}], + msg="$elemMatch projection with strength 2 should match case-insensitively", + ), + CommandTestCase( + "elemmatch_proj_no_collation_binary", + docs=[{"_id": 1, "items": [{"v": "Apple"}, {"v": "apple"}, {"v": "banana"}]}], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"_id": 1}, + "projection": {"items": {"$elemMatch": {"v": "apple"}}}, + }, + expected=[{"_id": 1, "items": [{"v": "apple"}]}], + msg="$elemMatch projection without collation should use binary comparison", + ), + CommandTestCase( + "elemmatch_proj_comparison_operator", + docs=[{"_id": 1, "items": [{"v": "apple"}, {"v": "Banana"}, {"v": "cherry"}]}], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"_id": 1}, + "projection": {"items": {"$elemMatch": {"v": {"$gte": "banana"}}}}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[{"_id": 1, "items": [{"v": "Banana"}]}], + msg="$elemMatch projection with $gte should use collation", + ), +] + +COLLATION_PROJECTION_TESTS = ( + COLLATION_POSITIONAL_PROJECTION_TESTS + COLLATION_ELEMMATCH_PROJECTION_TESTS +) + + +@pytest.mark.parametrize("test", pytest_params(COLLATION_PROJECTION_TESTS)) +def test_collation_projection(database_client, collection, test): + """Test collation effects on array projection operators.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_schema_validation.py b/documentdb_tests/compatibility/tests/core/collation/test_collation_schema_validation.py new file mode 100644 index 000000000..1d165098c --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/collation/test_collation_schema_validation.py @@ -0,0 +1,138 @@ +"""Tests for collation interaction with document schema validation.""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.error_codes import DOCUMENT_VALIDATION_FAILURE_ERROR +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.target_collection import CustomCollection + +# Property [Validator Enum Ignores Collection Collation]: $jsonSchema enum +# validation always uses binary comparison regardless of the collection's default +# collation, so case-variant values are rejected. +COLLATION_VALIDATOR_ENUM_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "enum_case_insensitive_rejects_case_variant", + target_collection=CustomCollection( + options={ + "collation": {"locale": "en", "strength": 2}, + "validator": { + "$jsonSchema": { + "properties": {"status": {"enum": ["active", "inactive"]}}, + } + }, + } + ), + docs=[], + command=lambda ctx: { + "insert": ctx.collection, + "documents": [{"_id": 1, "status": "Active"}], + }, + error_code=DOCUMENT_VALIDATION_FAILURE_ERROR, + msg="$jsonSchema enum should reject case-variant even with case-insensitive collation", + ), + CommandTestCase( + "enum_exact_match_accepts", + target_collection=CustomCollection( + options={ + "collation": {"locale": "en", "strength": 2}, + "validator": { + "$jsonSchema": { + "properties": {"status": {"enum": ["active", "inactive"]}}, + } + }, + } + ), + docs=[], + command=lambda ctx: { + "insert": ctx.collection, + "documents": [{"_id": 1, "status": "active"}], + }, + expected={"ok": 1.0, "n": 1}, + msg="$jsonSchema enum should accept exact match value", + ), + CommandTestCase( + "enum_rejects_non_match", + target_collection=CustomCollection( + options={ + "collation": {"locale": "en", "strength": 2}, + "validator": { + "$jsonSchema": { + "properties": {"status": {"enum": ["active", "inactive"]}}, + } + }, + } + ), + docs=[], + command=lambda ctx: { + "insert": ctx.collection, + "documents": [{"_id": 1, "status": "pending"}], + }, + error_code=DOCUMENT_VALIDATION_FAILURE_ERROR, + msg="$jsonSchema enum should reject non-matching value", + ), +] + +# Property [Validator Comparison with Collection Collation]: validator +# expressions using comparison operators respect the collection's default +# collation. +COLLATION_VALIDATOR_COMPARISON_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "validator_expr_eq_case_insensitive", + target_collection=CustomCollection( + options={ + "collation": {"locale": "en", "strength": 2}, + "validator": {"status": {"$in": ["active", "inactive"]}}, + } + ), + docs=[], + command=lambda ctx: { + "insert": ctx.collection, + "documents": [{"_id": 1, "status": "ACTIVE"}], + }, + expected={"ok": 1.0, "n": 1}, + msg="validator $in should use collection collation for case-insensitive matching", + ), + CommandTestCase( + "validator_expr_eq_rejects", + target_collection=CustomCollection( + options={ + "collation": {"locale": "en", "strength": 2}, + "validator": {"status": {"$in": ["active", "inactive"]}}, + } + ), + docs=[], + command=lambda ctx: { + "insert": ctx.collection, + "documents": [{"_id": 1, "status": "pending"}], + }, + error_code=DOCUMENT_VALIDATION_FAILURE_ERROR, + msg="validator $in should reject non-matching values even with collation", + ), +] + +COLLATION_SCHEMA_VALIDATION_TESTS: list[CommandTestCase] = ( + COLLATION_VALIDATOR_ENUM_TESTS + COLLATION_VALIDATOR_COMPARISON_TESTS +) + + +@pytest.mark.parametrize("test", pytest_params(COLLATION_SCHEMA_VALIDATION_TESTS)) +def test_collation_schema_validation(database_client, collection, test): + """Test collation interaction with document schema validation.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + raw_res=True, + ) diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_strength.py b/documentdb_tests/compatibility/tests/core/collation/test_collation_strength.py new file mode 100644 index 000000000..e3fca589e --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/collation/test_collation_strength.py @@ -0,0 +1,283 @@ +"""Tests for strength field behavior in collation.""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Strength in $sort]: strength 1 treats all case and accent variants +# as equivalent (stable sort preserves insertion order), strength 2 distinguishes +# accents but not case, strength 3 distinguishes all, strength 4 additionally +# distinguishes punctuation when alternate is shifted, and strength 5 +# distinguishes leading zeros in numeric ordering. +COLLATION_STRENGTH_SORT_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "sort_strength1_equivalent_variants", + docs=[ + {"_id": 1, "x": "b"}, + {"_id": 2, "x": "\u00e1"}, + {"_id": 3, "x": "A"}, + {"_id": 4, "x": "a"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$sort": {"x": 1}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[ + {"_id": 2, "x": "\u00e1"}, + {"_id": 3, "x": "A"}, + {"_id": 4, "x": "a"}, + {"_id": 1, "x": "b"}, + ], + msg="strength 1 sort should treat case and accent variants as equivalent", + ), + CommandTestCase( + "sort_strength2_accent_distinct_case_equivalent", + docs=[ + {"_id": 1, "x": "b"}, + {"_id": 2, "x": "\u00e1"}, + {"_id": 3, "x": "A"}, + {"_id": 4, "x": "a"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$sort": {"x": 1}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[ + {"_id": 3, "x": "A"}, + {"_id": 4, "x": "a"}, + {"_id": 2, "x": "\u00e1"}, + {"_id": 1, "x": "b"}, + ], + msg="strength 2 sort should distinguish accents but treat case variants as equivalent", + ), + CommandTestCase( + "sort_strength3_all_distinct", + docs=[ + {"_id": 1, "x": "b"}, + {"_id": 2, "x": "\u00e1"}, + {"_id": 3, "x": "A"}, + {"_id": 4, "x": "a"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$sort": {"x": 1}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 3}, + }, + expected=[ + {"_id": 4, "x": "a"}, + {"_id": 3, "x": "A"}, + {"_id": 2, "x": "\u00e1"}, + {"_id": 1, "x": "b"}, + ], + msg="strength 3 sort should produce distinct positions for case and accent variants", + ), + CommandTestCase( + "sort_strength4_punctuation_with_shifted", + docs=[ + {"_id": 1, "x": "a-b"}, + {"_id": 2, "x": "ab"}, + {"_id": 3, "x": "a_b"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$sort": {"x": 1}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 4, "alternate": "shifted"}, + }, + expected=[ + {"_id": 3, "x": "a_b"}, + {"_id": 1, "x": "a-b"}, + {"_id": 2, "x": "ab"}, + ], + msg="strength 4 with shifted should distinguish punctuation in sort order", + ), + CommandTestCase( + "sort_strength3_shifted_punctuation_equivalent", + docs=[ + {"_id": 1, "x": "a-b"}, + {"_id": 2, "x": "ab"}, + {"_id": 3, "x": "a_b"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$sort": {"x": 1}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 3, "alternate": "shifted"}, + }, + expected=[ + {"_id": 1, "x": "a-b"}, + {"_id": 2, "x": "ab"}, + {"_id": 3, "x": "a_b"}, + ], + msg="strength 3 with shifted should treat punctuation as equivalent", + ), + CommandTestCase( + "sort_strength5_zero_width_characters", + docs=[ + {"_id": 1, "x": "a\u200bbc"}, + {"_id": 2, "x": "abc"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$sort": {"x": 1}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 5}, + }, + expected=[ + {"_id": 2, "x": "abc"}, + {"_id": 1, "x": "a\u200bbc"}, + ], + msg="strength 5 should distinguish strings differing only by zero-width characters", + ), + CommandTestCase( + "sort_strength4_zero_width_equivalent", + docs=[ + {"_id": 1, "x": "a\u200bbc"}, + {"_id": 2, "x": "abc"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$sort": {"x": 1}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 4}, + }, + expected=[ + {"_id": 1, "x": "a\u200bbc"}, + {"_id": 2, "x": "abc"}, + ], + msg="strength 4 should treat strings differing only by zero-width characters as equivalent", + ), +] + +# Property [Strength in $match]: strength 1 ignores diacritics and case, +# strength 2 ignores case but distinguishes diacritics, strength 3 (default) +# distinguishes both. +COLLATION_STRENGTH_MATCH_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "strength1_ignores_case_and_diacritics", + docs=[ + {"_id": 1, "x": "cafe"}, + {"_id": 2, "x": "Cafe"}, + {"_id": 3, "x": "caf\u00e9"}, + {"_id": 4, "x": "CAF\u00c9"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": "caf\u00e9"}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 1}, + }, + expected=[ + {"_id": 1, "x": "cafe"}, + {"_id": 2, "x": "Cafe"}, + {"_id": 3, "x": "caf\u00e9"}, + {"_id": 4, "x": "CAF\u00c9"}, + ], + msg="strength 1 should match all case and diacritic variants", + ), + CommandTestCase( + "strength2_ignores_case_not_diacritics", + docs=[ + {"_id": 1, "x": "cafe"}, + {"_id": 2, "x": "Cafe"}, + {"_id": 3, "x": "caf\u00e9"}, + {"_id": 4, "x": "CAF\u00c9"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": "cafe"}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[ + {"_id": 1, "x": "cafe"}, + {"_id": 2, "x": "Cafe"}, + ], + msg="strength 2 should match case variants but not diacritic variants", + ), + CommandTestCase( + "strength2_diacritics_distinct", + docs=[ + {"_id": 1, "x": "cafe"}, + {"_id": 2, "x": "Cafe"}, + {"_id": 3, "x": "caf\u00e9"}, + {"_id": 4, "x": "CAF\u00c9"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": "caf\u00e9"}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[ + {"_id": 3, "x": "caf\u00e9"}, + {"_id": 4, "x": "CAF\u00c9"}, + ], + msg="strength 2 should match diacritic variants ignoring case", + ), + CommandTestCase( + "strength3_distinguishes_case_and_diacritics", + docs=[ + {"_id": 1, "x": "cafe"}, + {"_id": 2, "x": "Cafe"}, + {"_id": 3, "x": "caf\u00e9"}, + {"_id": 4, "x": "CAF\u00c9"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": "cafe"}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 3}, + }, + expected=[{"_id": 1, "x": "cafe"}], + msg="strength 3 should match only exact case and diacritic", + ), + CommandTestCase( + "strength_default_is_3", + docs=[ + {"_id": 1, "x": "cafe"}, + {"_id": 2, "x": "Cafe"}, + {"_id": 3, "x": "caf\u00e9"}, + {"_id": 4, "x": "CAF\u00c9"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"x": "cafe"}}], + "cursor": {}, + "collation": {"locale": "en"}, + }, + expected=[{"_id": 1, "x": "cafe"}], + msg="default strength (omitted) should behave as strength 3", + ), +] + +COLLATION_STRENGTH_TESTS: list[CommandTestCase] = ( + COLLATION_STRENGTH_SORT_TESTS + COLLATION_STRENGTH_MATCH_TESTS +) + + +@pytest.mark.parametrize("test", pytest_params(COLLATION_STRENGTH_TESTS)) +def test_collation_strength(database_client, collection, test): + """Test collation strength levels in sort and match.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + ) diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_supplementary_unicode.py b/documentdb_tests/compatibility/tests/core/collation/test_collation_supplementary_unicode.py new file mode 100644 index 000000000..25dc5d8be --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/collation/test_collation_supplementary_unicode.py @@ -0,0 +1,199 @@ +"""Tests for collation behavior with supplementary Unicode characters (outside BMP).""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params + +# Property [Supplementary Character Matching]: collation correctly handles +# characters outside the Basic Multilingual Plane (U+10000+) for equality +# and comparison operations. +COLLATION_SUPPLEMENTARY_MATCH_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "emoji_equality", + docs=[ + # U+1F34E red apple, U+1F34F green apple, U+1F34A tangerine. + {"_id": 1, "x": "\U0001f34e"}, + {"_id": 2, "x": "\U0001f34f"}, + {"_id": 3, "x": "\U0001f34a"}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"x": "\U0001f34e"}, + "collation": {"locale": "en", "strength": 3}, + }, + expected=[{"_id": 1, "x": "\U0001f34e"}], + msg="find should match supplementary characters exactly under collation", + ), + CommandTestCase( + "emoji_ne", + docs=[ + {"_id": 1, "x": "\U0001f34e"}, + {"_id": 2, "x": "\U0001f34f"}, + {"_id": 3, "x": "\U0001f34a"}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"x": {"$ne": "\U0001f34e"}}, + "sort": {"_id": 1}, + "collation": {"locale": "en", "strength": 3}, + }, + expected=[ + {"_id": 2, "x": "\U0001f34f"}, + {"_id": 3, "x": "\U0001f34a"}, + ], + msg="find $ne should correctly exclude supplementary characters under collation", + ), + CommandTestCase( + "musical_symbols_equality", + docs=[ + # U+1D11E musical symbol G clef, U+1D122 musical symbol F clef. + {"_id": 1, "x": "\U0001d11e"}, + {"_id": 2, "x": "\U0001d122"}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"x": "\U0001d11e"}, + "collation": {"locale": "en", "strength": 3}, + }, + expected=[{"_id": 1, "x": "\U0001d11e"}], + msg="find should match supplementary musical symbols under collation", + ), +] + +# Property [Supplementary Character Sort Ordering]: collation produces a +# consistent sort order for supplementary characters. +COLLATION_SUPPLEMENTARY_SORT_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "emoji_sort_consistent", + docs=[ + {"_id": 1, "x": "\U0001f34e"}, + {"_id": 2, "x": "\U0001f34a"}, + {"_id": 3, "x": "apple"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$sort": {"x": 1}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 3}, + }, + expected=[ + {"_id": 2, "x": "\U0001f34a"}, + {"_id": 1, "x": "\U0001f34e"}, + {"_id": 3, "x": "apple"}, + ], + msg="supplementary characters should sort before Latin letters under collation", + ), + CommandTestCase( + "supplementary_mixed_with_bmp_sort", + docs=[ + {"_id": 1, "x": "z"}, + {"_id": 2, "x": "\U00010000"}, + {"_id": 3, "x": "a"}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$sort": {"x": 1}}], + "cursor": {}, + "collation": {"locale": "en", "strength": 3}, + }, + expected=[ + {"_id": 3, "x": "a"}, + {"_id": 1, "x": "z"}, + {"_id": 2, "x": "\U00010000"}, + ], + msg="supplementary U+10000 should sort after Latin letters under collation", + ), +] + +# Property [Supplementary Character Deduplication]: collation correctly +# deduplicates supplementary characters in $group and distinct. +COLLATION_SUPPLEMENTARY_DEDUP_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "distinct_supplementary", + docs=[ + {"_id": 1, "x": "\U0001f34e"}, + {"_id": 2, "x": "\U0001f34e"}, + {"_id": 3, "x": "\U0001f34a"}, + ], + command=lambda ctx: { + "distinct": ctx.collection, + "key": "x", + "collation": {"locale": "en", "strength": 3}, + }, + expected={"values": ["\U0001f34a", "\U0001f34e"], "ok": 1.0}, + msg="distinct should correctly deduplicate supplementary characters", + ), + CommandTestCase( + "group_supplementary", + docs=[ + {"_id": 1, "x": "\U0001f34e", "v": 1}, + {"_id": 2, "x": "\U0001f34e", "v": 2}, + {"_id": 3, "x": "\U0001f34a", "v": 3}, + ], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [ + {"$group": {"_id": "$x", "total": {"$sum": "$v"}}}, + {"$sort": {"_id": 1}}, + ], + "cursor": {}, + "collation": {"locale": "en", "strength": 3}, + }, + expected=[ + {"_id": "\U0001f34a", "total": 3}, + {"_id": "\U0001f34e", "total": 3}, + ], + msg="$group should correctly group supplementary characters under collation", + ), +] + +# Property [Supplementary Characters with String Prefix]: strings that share a +# BMP prefix but differ in a supplementary character suffix are correctly +# distinguished. +COLLATION_SUPPLEMENTARY_PREFIX_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "prefix_with_different_supplementary_suffix", + docs=[ + {"_id": 1, "x": "fruit\U0001f34e"}, + {"_id": 2, "x": "fruit\U0001f34a"}, + {"_id": 3, "x": "fruit"}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"x": "fruit\U0001f34e"}, + "collation": {"locale": "en", "strength": 3}, + }, + expected=[{"_id": 1, "x": "fruit\U0001f34e"}], + msg="find should distinguish strings differing only in supplementary suffix", + ), +] + +COLLATION_SUPPLEMENTARY_TESTS: list[CommandTestCase] = ( + COLLATION_SUPPLEMENTARY_MATCH_TESTS + + COLLATION_SUPPLEMENTARY_SORT_TESTS + + COLLATION_SUPPLEMENTARY_DEDUP_TESTS + + COLLATION_SUPPLEMENTARY_PREFIX_TESTS +) + + +@pytest.mark.parametrize("test", pytest_params(COLLATION_SUPPLEMENTARY_TESTS)) +def test_collation_supplementary_unicode(database_client, collection, test): + """Test collation behavior with supplementary Unicode characters.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + raw_res=not isinstance(test.build_expected(ctx), list), + ) diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_update.py b/documentdb_tests/compatibility/tests/core/collation/test_collation_update.py new file mode 100644 index 000000000..3a3268b4d --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/collation/test_collation_update.py @@ -0,0 +1,273 @@ +"""Tests for collation behavior in the update command.""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.error_codes import ( + BAD_VALUE_ERROR, + MISSING_FIELD_ERROR, + TYPE_MISMATCH_ERROR, +) +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.target_collection import CustomCollection + +# Property [Update Filter Matching]: collation affects which documents the +# update filter selects, enabling case-insensitive and accent-insensitive +# matching for the query portion of the update. +COLLATION_UPDATE_FILTER_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "updateone_case_insensitive", + docs=[ + {"_id": 1, "x": "apple", "v": 1}, + {"_id": 2, "x": "Apple", "v": 1}, + {"_id": 3, "x": "banana", "v": 1}, + ], + command=lambda ctx: { + "update": ctx.collection, + "updates": [ + { + "q": {"x": "apple"}, + "u": {"$set": {"v": 2}}, + "collation": {"locale": "en", "strength": 2}, + } + ], + }, + expected={"ok": 1.0, "n": 1, "nModified": 1}, + msg="updateOne with strength 2 should match first case-insensitive document", + ), + CommandTestCase( + "updatemany_case_insensitive", + docs=[ + {"_id": 1, "x": "apple", "v": 1}, + {"_id": 2, "x": "Apple", "v": 1}, + {"_id": 3, "x": "APPLE", "v": 1}, + {"_id": 4, "x": "banana", "v": 1}, + ], + command=lambda ctx: { + "update": ctx.collection, + "updates": [ + { + "q": {"x": "apple"}, + "u": {"$set": {"v": 2}}, + "multi": True, + "collation": {"locale": "en", "strength": 2}, + } + ], + }, + expected={"ok": 1.0, "n": 3, "nModified": 3}, + msg="updateMany with strength 2 should match all case variants", + ), + CommandTestCase( + "update_accent_insensitive", + docs=[ + {"_id": 1, "x": "cafe", "v": 1}, + {"_id": 2, "x": "caf\u00e9", "v": 1}, + {"_id": 3, "x": "other", "v": 1}, + ], + command=lambda ctx: { + "update": ctx.collection, + "updates": [ + { + "q": {"x": "cafe"}, + "u": {"$set": {"v": 2}}, + "multi": True, + "collation": {"locale": "en", "strength": 1}, + } + ], + }, + expected={"ok": 1.0, "n": 2, "nModified": 2}, + msg="update with strength 1 should match accent variants", + ), + CommandTestCase( + "update_no_collation_binary", + docs=[ + {"_id": 1, "x": "apple", "v": 1}, + {"_id": 2, "x": "Apple", "v": 1}, + ], + command=lambda ctx: { + "update": ctx.collection, + "updates": [{"q": {"x": "apple"}, "u": {"$set": {"v": 2}}, "multi": True}], + }, + expected={"ok": 1.0, "n": 1, "nModified": 1}, + msg="update without collation should use binary comparison", + ), + CommandTestCase( + "update_gt_filter", + docs=[ + {"_id": 1, "x": "apple", "v": 1}, + {"_id": 2, "x": "Apple", "v": 1}, + {"_id": 3, "x": "banana", "v": 1}, + {"_id": 4, "x": "Banana", "v": 1}, + ], + command=lambda ctx: { + "update": ctx.collection, + "updates": [ + { + "q": {"x": {"$gt": "apple"}}, + "u": {"$set": {"v": 2}}, + "multi": True, + "collation": {"locale": "en", "strength": 2}, + } + ], + }, + expected={"ok": 1.0, "n": 2, "nModified": 2}, + msg="update $gt with strength 2 should compare case-insensitively", + ), +] + +# Property [Update Upsert with Collation]: collation affects the filter +# matching for upsert operations - if no document matches under the collation, +# a new document is inserted. +COLLATION_UPDATE_UPSERT_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "upsert_match_found_case_insensitive", + docs=[ + {"_id": 1, "x": "Apple", "v": 1}, + ], + command=lambda ctx: { + "update": ctx.collection, + "updates": [ + { + "q": {"x": "apple"}, + "u": {"$set": {"v": 2}}, + "upsert": True, + "collation": {"locale": "en", "strength": 2}, + } + ], + }, + expected={"ok": 1.0, "n": 1, "nModified": 1}, + msg="upsert with collation should find existing case-variant and update it", + ), + CommandTestCase( + "upsert_no_match_inserts", + docs=[ + {"_id": 1, "x": "banana", "v": 1}, + ], + command=lambda ctx: { + "update": ctx.collection, + "updates": [ + { + "q": {"_id": 99, "x": "apple"}, + "u": {"$set": {"v": 2}}, + "upsert": True, + "collation": {"locale": "en", "strength": 2}, + } + ], + }, + expected={"ok": 1.0, "n": 1, "nModified": 0, "upserted": [{"index": 0, "_id": 99}]}, + msg="upsert with collation should insert when no match found", + ), +] + +# Property [Update Collation Validation]: the update command validates the +# collation document in each update statement. +COLLATION_UPDATE_VALIDATION_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "validation_non_object_collation", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "update": ctx.collection, + "updates": [{"q": {"x": "a"}, "u": {"$set": {"v": 1}}, "collation": "en"}], + }, + error_code=TYPE_MISMATCH_ERROR, + msg="update with non-object collation should produce an error", + ), + CommandTestCase( + "validation_missing_locale", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "update": ctx.collection, + "updates": [{"q": {"x": "a"}, "u": {"$set": {"v": 1}}, "collation": {"strength": 2}}], + }, + error_code=MISSING_FIELD_ERROR, + msg="update with collation missing locale should produce an error", + ), + CommandTestCase( + "validation_invalid_locale", + docs=[{"_id": 1, "x": "a"}], + command=lambda ctx: { + "update": ctx.collection, + "updates": [ + { + "q": {"x": "a"}, + "u": {"$set": {"v": 1}}, + "collation": {"locale": "invalid_locale_xyz"}, + } + ], + }, + error_code=BAD_VALUE_ERROR, + msg="update with invalid locale string should produce an error", + ), +] + +# Property [Update Collection Default Collation]: when no collation is specified +# on the update statement, the collection's default collation is used for +# filter matching. +COLLATION_UPDATE_COLLECTION_DEFAULT_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "collection_default_inherited", + target_collection=CustomCollection(options={"collation": {"locale": "en", "strength": 2}}), + docs=[ + {"_id": 1, "x": "apple", "v": 1}, + {"_id": 2, "x": "Apple", "v": 1}, + {"_id": 3, "x": "banana", "v": 1}, + ], + command=lambda ctx: { + "update": ctx.collection, + "updates": [{"q": {"x": "apple"}, "u": {"$set": {"v": 2}}, "multi": True}], + }, + expected={"ok": 1.0, "n": 2, "nModified": 2}, + msg="update should inherit collection default collation", + ), + CommandTestCase( + "collection_default_overridden", + target_collection=CustomCollection(options={"collation": {"locale": "en", "strength": 2}}), + docs=[ + {"_id": 1, "x": "apple", "v": 1}, + {"_id": 2, "x": "Apple", "v": 1}, + {"_id": 3, "x": "banana", "v": 1}, + ], + command=lambda ctx: { + "update": ctx.collection, + "updates": [ + { + "q": {"x": "apple"}, + "u": {"$set": {"v": 2}}, + "multi": True, + "collation": {"locale": "en", "strength": 3}, + } + ], + }, + expected={"ok": 1.0, "n": 1, "nModified": 1}, + msg="update with explicit collation should override collection default", + ), +] + +COLLATION_UPDATE_TESTS = ( + COLLATION_UPDATE_FILTER_TESTS + + COLLATION_UPDATE_UPSERT_TESTS + + COLLATION_UPDATE_VALIDATION_TESTS + + COLLATION_UPDATE_COLLECTION_DEFAULT_TESTS +) + + +@pytest.mark.parametrize("test", pytest_params(COLLATION_UPDATE_TESTS)) +def test_collation_update(database_client, collection, test): + """Test collation behavior in the update command.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + raw_res=True, + ) diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_update_array_filters.py b/documentdb_tests/compatibility/tests/core/collation/test_collation_update_array_filters.py new file mode 100644 index 000000000..56df7c4f1 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/collation/test_collation_update_array_filters.py @@ -0,0 +1,169 @@ +"""Tests for collation effects on arrayFilters and positional update operators.""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.target_collection import CustomCollection + +# Property [ArrayFilters with Collation]: arrayFilters conditions use collation +# for string comparisons, enabling case-insensitive and accent-insensitive +# matching when selecting which array elements to update. +COLLATION_ARRAY_FILTERS_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "arrayfilter_eq_case_insensitive", + docs=[{"_id": 1, "items": [{"name": "Apple", "v": 1}, {"name": "banana", "v": 2}]}], + command=lambda ctx: { + "update": ctx.collection, + "updates": [ + { + "q": {"_id": 1}, + "u": {"$set": {"items.$[elem].v": 99}}, + "arrayFilters": [{"elem.name": "apple"}], + "collation": {"locale": "en", "strength": 2}, + } + ], + }, + expected={"ok": 1.0, "n": 1, "nModified": 1}, + msg="arrayFilters with strength 2 should match case-insensitively", + ), + CommandTestCase( + "arrayfilter_no_collation_binary", + docs=[{"_id": 1, "items": [{"name": "Apple", "v": 1}, {"name": "apple", "v": 2}]}], + command=lambda ctx: { + "update": ctx.collection, + "updates": [ + { + "q": {"_id": 1}, + "u": {"$set": {"items.$[elem].v": 99}}, + "arrayFilters": [{"elem.name": "apple"}], + } + ], + }, + expected={"ok": 1.0, "n": 1, "nModified": 1}, + msg="arrayFilters without collation should use binary comparison", + ), + CommandTestCase( + "arrayfilter_comparison_case_insensitive", + docs=[{"_id": 1, "items": [{"name": "Apple", "v": 1}, {"name": "cherry", "v": 2}]}], + command=lambda ctx: { + "update": ctx.collection, + "updates": [ + { + "q": {"_id": 1}, + "u": {"$set": {"items.$[elem].v": 99}}, + "arrayFilters": [{"elem.name": {"$gt": "banana"}}], + "collation": {"locale": "en", "strength": 2}, + } + ], + }, + expected={"ok": 1.0, "n": 1, "nModified": 1}, + msg="arrayFilters $gt with collation should compare case-insensitively", + ), + CommandTestCase( + "arrayfilter_in_case_insensitive", + docs=[ + { + "_id": 1, + "items": [ + {"name": "Apple", "v": 1}, + {"name": "BANANA", "v": 2}, + {"name": "cherry", "v": 3}, + ], + } + ], + command=lambda ctx: { + "update": ctx.collection, + "updates": [ + { + "q": {"_id": 1}, + "u": {"$set": {"items.$[elem].v": 99}}, + "arrayFilters": [{"elem.name": {"$in": ["apple", "banana"]}}], + "collation": {"locale": "en", "strength": 2}, + } + ], + }, + expected={"ok": 1.0, "n": 1, "nModified": 1}, + msg="arrayFilters $in with collation should match case-insensitively", + ), + CommandTestCase( + "arrayfilter_collection_default_collation", + target_collection=CustomCollection(options={"collation": {"locale": "en", "strength": 2}}), + docs=[{"_id": 1, "items": [{"name": "Apple", "v": 1}, {"name": "banana", "v": 2}]}], + command=lambda ctx: { + "update": ctx.collection, + "updates": [ + { + "q": {"_id": 1}, + "u": {"$set": {"items.$[elem].v": 99}}, + "arrayFilters": [{"elem.name": "apple"}], + } + ], + }, + expected={"ok": 1.0, "n": 1, "nModified": 1}, + msg="arrayFilters should inherit collection default collation", + ), +] + +# Property [Positional $ Update with Collation]: the positional $ operator in +# an update targets the first array element matched by the query filter under +# the active collation. +COLLATION_POSITIONAL_UPDATE_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "positional_update_case_insensitive", + docs=[{"_id": 1, "tags": ["Apple", "banana", "cherry"]}], + command=lambda ctx: { + "update": ctx.collection, + "updates": [ + { + "q": {"_id": 1, "tags": "apple"}, + "u": {"$set": {"tags.$": "REPLACED"}}, + "collation": {"locale": "en", "strength": 2}, + } + ], + }, + expected={"ok": 1.0, "n": 1, "nModified": 1}, + msg="positional $ should target element matched under collation", + ), + CommandTestCase( + "positional_update_no_collation_binary", + docs=[{"_id": 1, "tags": ["Apple", "apple", "banana"]}], + command=lambda ctx: { + "update": ctx.collection, + "updates": [ + { + "q": {"_id": 1, "tags": "apple"}, + "u": {"$set": {"tags.$": "REPLACED"}}, + } + ], + }, + expected={"ok": 1.0, "n": 1, "nModified": 1}, + msg="positional $ without collation should use binary match", + ), +] + +COLLATION_UPDATE_ARRAY_FILTERS_TESTS = ( + COLLATION_ARRAY_FILTERS_TESTS + COLLATION_POSITIONAL_UPDATE_TESTS +) + + +@pytest.mark.parametrize("test", pytest_params(COLLATION_UPDATE_ARRAY_FILTERS_TESTS)) +def test_collation_update_array_filters(database_client, collection, test): + """Test collation behavior in arrayFilters and positional update operators.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + raw_res=True, + ) diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_update_array_ops.py b/documentdb_tests/compatibility/tests/core/collation/test_collation_update_array_ops.py new file mode 100644 index 000000000..0d820513e --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/collation/test_collation_update_array_ops.py @@ -0,0 +1,223 @@ +"""Tests for collation effects on array update operators ($pull, $pullAll, $addToSet).""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.target_collection import CustomCollection + +# Property [Pull with Collation]: $pull uses collation to determine which array +# elements match the removal condition, enabling case-insensitive and +# accent-insensitive element removal. +COLLATION_PULL_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "pull_case_insensitive", + docs=[{"_id": 1, "tags": ["Apple", "BANANA", "cherry"]}], + command=lambda ctx: { + "update": ctx.collection, + "updates": [ + { + "q": {"_id": 1}, + "u": {"$pull": {"tags": "apple"}}, + "collation": {"locale": "en", "strength": 2}, + } + ], + }, + expected={"ok": 1.0, "n": 1, "nModified": 1}, + msg="$pull with strength 2 should remove case-variant element", + ), + CommandTestCase( + "pull_accent_insensitive", + docs=[{"_id": 1, "tags": ["cafe", "caf\u00e9", "tea"]}], + command=lambda ctx: { + "update": ctx.collection, + "updates": [ + { + "q": {"_id": 1}, + "u": {"$pull": {"tags": "cafe"}}, + "collation": {"locale": "en", "strength": 1}, + } + ], + }, + expected={"ok": 1.0, "n": 1, "nModified": 1}, + msg="$pull with strength 1 should remove accent-variant elements", + ), + CommandTestCase( + "pull_no_collation_binary", + docs=[{"_id": 1, "tags": ["Apple", "apple", "banana"]}], + command=lambda ctx: { + "update": ctx.collection, + "updates": [ + { + "q": {"_id": 1}, + "u": {"$pull": {"tags": "apple"}}, + } + ], + }, + expected={"ok": 1.0, "n": 1, "nModified": 1}, + msg="$pull without collation should use binary comparison", + ), + CommandTestCase( + "pull_condition_case_insensitive", + docs=[{"_id": 1, "tags": ["Apple", "banana", "Cherry"]}], + command=lambda ctx: { + "update": ctx.collection, + "updates": [ + { + "q": {"_id": 1}, + "u": {"$pull": {"tags": {"$gte": "banana"}}}, + "collation": {"locale": "en", "strength": 2}, + } + ], + }, + expected={"ok": 1.0, "n": 1, "nModified": 1}, + msg="$pull with comparison condition should use collation", + ), + CommandTestCase( + "pull_collection_default_collation", + target_collection=CustomCollection(options={"collation": {"locale": "en", "strength": 2}}), + docs=[{"_id": 1, "tags": ["Apple", "banana"]}], + command=lambda ctx: { + "update": ctx.collection, + "updates": [ + { + "q": {"_id": 1}, + "u": {"$pull": {"tags": "apple"}}, + } + ], + }, + expected={"ok": 1.0, "n": 1, "nModified": 1}, + msg="$pull should inherit collection default collation", + ), +] + +# Property [PullAll with Collation]: $pullAll uses collation to compare each +# value in the removal list against array elements. +COLLATION_PULLALL_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "pullall_case_insensitive", + docs=[{"_id": 1, "tags": ["Apple", "BANANA", "cherry"]}], + command=lambda ctx: { + "update": ctx.collection, + "updates": [ + { + "q": {"_id": 1}, + "u": {"$pullAll": {"tags": ["apple", "banana"]}}, + "collation": {"locale": "en", "strength": 2}, + } + ], + }, + expected={"ok": 1.0, "n": 1, "nModified": 1}, + msg="$pullAll with strength 2 should remove case-variant elements", + ), + CommandTestCase( + "pullall_no_collation_binary", + docs=[{"_id": 1, "tags": ["Apple", "apple", "banana"]}], + command=lambda ctx: { + "update": ctx.collection, + "updates": [ + { + "q": {"_id": 1}, + "u": {"$pullAll": {"tags": ["apple"]}}, + } + ], + }, + expected={"ok": 1.0, "n": 1, "nModified": 1}, + msg="$pullAll without collation should use binary comparison", + ), +] + +# Property [AddToSet with Collation]: $addToSet uses collation to determine +# whether a value already exists in the array, preventing collation-equal +# duplicates. +COLLATION_ADDTOSET_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "addtoset_duplicate_case_insensitive", + docs=[{"_id": 1, "tags": ["Apple"]}], + command=lambda ctx: { + "update": ctx.collection, + "updates": [ + { + "q": {"_id": 1}, + "u": {"$addToSet": {"tags": "apple"}}, + "collation": {"locale": "en", "strength": 2}, + } + ], + }, + expected={"ok": 1.0, "n": 1, "nModified": 0}, + msg="$addToSet with strength 2 should not add case-variant duplicate", + ), + CommandTestCase( + "addtoset_new_value_case_insensitive", + docs=[{"_id": 1, "tags": ["Apple"]}], + command=lambda ctx: { + "update": ctx.collection, + "updates": [ + { + "q": {"_id": 1}, + "u": {"$addToSet": {"tags": "banana"}}, + "collation": {"locale": "en", "strength": 2}, + } + ], + }, + expected={"ok": 1.0, "n": 1, "nModified": 1}, + msg="$addToSet with collation should add genuinely new value", + ), + CommandTestCase( + "addtoset_no_collation_allows_case_variant", + docs=[{"_id": 1, "tags": ["Apple"]}], + command=lambda ctx: { + "update": ctx.collection, + "updates": [ + { + "q": {"_id": 1}, + "u": {"$addToSet": {"tags": "apple"}}, + } + ], + }, + expected={"ok": 1.0, "n": 1, "nModified": 1}, + msg="$addToSet without collation should treat case variants as distinct", + ), + CommandTestCase( + "addtoset_collection_default_collation", + target_collection=CustomCollection(options={"collation": {"locale": "en", "strength": 2}}), + docs=[{"_id": 1, "tags": ["Apple"]}], + command=lambda ctx: { + "update": ctx.collection, + "updates": [ + { + "q": {"_id": 1}, + "u": {"$addToSet": {"tags": "apple"}}, + } + ], + }, + expected={"ok": 1.0, "n": 1, "nModified": 0}, + msg="$addToSet should inherit collection default collation for dedup", + ), +] + +COLLATION_UPDATE_ARRAY_OPS_TESTS = ( + COLLATION_PULL_TESTS + COLLATION_PULLALL_TESTS + COLLATION_ADDTOSET_TESTS +) + + +@pytest.mark.parametrize("test", pytest_params(COLLATION_UPDATE_ARRAY_OPS_TESTS)) +def test_collation_update_array_ops(database_client, collection, test): + """Test collation behavior in array update operators.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + raw_res=True, + ) diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_update_field_ops.py b/documentdb_tests/compatibility/tests/core/collation/test_collation_update_field_ops.py new file mode 100644 index 000000000..445aecad8 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/collation/test_collation_update_field_ops.py @@ -0,0 +1,196 @@ +"""Tests for collation effects on field update operators ($min, $max, $push+$sort).""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.target_collection import CustomCollection + +# Property [Update $min with Collation]: the $min update operator uses collation +# for string comparison, only replacing the field value if the new value is less +# than the current value under the active collation. +COLLATION_UPDATE_MIN_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "min_replaces_when_less_case_insensitive", + docs=[{"_id": 1, "x": "Banana"}], + command=lambda ctx: { + "update": ctx.collection, + "updates": [ + { + "q": {"_id": 1}, + "u": {"$min": {"x": "apple"}}, + "collation": {"locale": "en", "strength": 2}, + } + ], + }, + expected={"ok": 1.0, "n": 1, "nModified": 1}, + msg="$min with collation should replace when new value sorts earlier", + ), + CommandTestCase( + "min_no_replace_when_greater_case_insensitive", + docs=[{"_id": 1, "x": "Apple"}], + command=lambda ctx: { + "update": ctx.collection, + "updates": [ + { + "q": {"_id": 1}, + "u": {"$min": {"x": "banana"}}, + "collation": {"locale": "en", "strength": 2}, + } + ], + }, + expected={"ok": 1.0, "n": 1, "nModified": 0}, + msg="$min with collation should not replace when new value sorts later", + ), + CommandTestCase( + "min_no_collation_binary", + docs=[{"_id": 1, "x": "banana"}], + command=lambda ctx: { + "update": ctx.collection, + "updates": [ + { + "q": {"_id": 1}, + "u": {"$min": {"x": "Apple"}}, + } + ], + }, + expected={"ok": 1.0, "n": 1, "nModified": 1}, + msg="$min without collation should use binary comparison (uppercase < lowercase)", + ), +] + +# Property [Update $max with Collation]: the $max update operator uses collation +# for string comparison, only replacing the field value if the new value is +# greater than the current value under the active collation. +COLLATION_UPDATE_MAX_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "max_replaces_when_greater_case_insensitive", + docs=[{"_id": 1, "x": "Apple"}], + command=lambda ctx: { + "update": ctx.collection, + "updates": [ + { + "q": {"_id": 1}, + "u": {"$max": {"x": "banana"}}, + "collation": {"locale": "en", "strength": 2}, + } + ], + }, + expected={"ok": 1.0, "n": 1, "nModified": 1}, + msg="$max with collation should replace when new value sorts later", + ), + CommandTestCase( + "max_no_replace_when_less_case_insensitive", + docs=[{"_id": 1, "x": "Banana"}], + command=lambda ctx: { + "update": ctx.collection, + "updates": [ + { + "q": {"_id": 1}, + "u": {"$max": {"x": "apple"}}, + "collation": {"locale": "en", "strength": 2}, + } + ], + }, + expected={"ok": 1.0, "n": 1, "nModified": 0}, + msg="$max with collation should not replace when new value sorts earlier", + ), + CommandTestCase( + "max_collection_default_collation", + target_collection=CustomCollection(options={"collation": {"locale": "en", "strength": 2}}), + docs=[{"_id": 1, "x": "Apple"}], + command=lambda ctx: { + "update": ctx.collection, + "updates": [ + { + "q": {"_id": 1}, + "u": {"$max": {"x": "banana"}}, + } + ], + }, + expected={"ok": 1.0, "n": 1, "nModified": 1}, + msg="$max should inherit collection default collation", + ), +] + +# Property [Push with Sort and Collation]: $push with the $sort modifier uses +# collation for string ordering when sorting array elements. +COLLATION_PUSH_SORT_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "push_sort_case_insensitive", + docs=[{"_id": 1, "items": ["banana", "Apple"]}], + command=lambda ctx: { + "update": ctx.collection, + "updates": [ + { + "q": {"_id": 1}, + "u": {"$push": {"items": {"$each": ["cherry"], "$sort": 1}}}, + "collation": {"locale": "en", "strength": 2}, + } + ], + }, + expected={"ok": 1.0, "n": 1, "nModified": 1}, + msg="$push with $sort should use collation for string ordering", + ), + CommandTestCase( + "push_sort_no_collation_binary", + docs=[{"_id": 1, "items": ["banana", "Apple"]}], + command=lambda ctx: { + "update": ctx.collection, + "updates": [ + { + "q": {"_id": 1}, + "u": {"$push": {"items": {"$each": ["cherry"], "$sort": 1}}}, + } + ], + }, + expected={"ok": 1.0, "n": 1, "nModified": 1}, + msg="$push with $sort without collation should use binary ordering", + ), + CommandTestCase( + "push_sort_nested_field_case_insensitive", + docs=[{"_id": 1, "items": [{"name": "banana", "v": 1}, {"name": "Apple", "v": 2}]}], + command=lambda ctx: { + "update": ctx.collection, + "updates": [ + { + "q": {"_id": 1}, + "u": { + "$push": { + "items": {"$each": [{"name": "cherry", "v": 3}], "$sort": {"name": 1}} + } + }, + "collation": {"locale": "en", "strength": 2}, + } + ], + }, + expected={"ok": 1.0, "n": 1, "nModified": 1}, + msg="$push with $sort on nested field should use collation", + ), +] + +COLLATION_UPDATE_FIELD_OPS_TESTS = ( + COLLATION_UPDATE_MIN_TESTS + COLLATION_UPDATE_MAX_TESTS + COLLATION_PUSH_SORT_TESTS +) + + +@pytest.mark.parametrize("test", pytest_params(COLLATION_UPDATE_FIELD_OPS_TESTS)) +def test_collation_update_field_ops(database_client, collection, test): + """Test collation behavior in $min, $max, and $push+$sort update operators.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + raw_res=True, + ) diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_update_pipeline.py b/documentdb_tests/compatibility/tests/core/collation/test_collation_update_pipeline.py new file mode 100644 index 000000000..d98af471f --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/collation/test_collation_update_pipeline.py @@ -0,0 +1,205 @@ +"""Tests for collation with pipeline-style updates.""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.target_collection import CustomCollection + +# Property [Pipeline Update Filter Matching]: collation affects which documents +# the filter selects when the update expression is an aggregation pipeline array, +# the same as for traditional update operators. +COLLATION_PIPELINE_UPDATE_FILTER_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "pipeline_updateone_case_insensitive", + docs=[ + {"_id": 1, "x": "apple", "v": 1}, + {"_id": 2, "x": "Apple", "v": 1}, + {"_id": 3, "x": "banana", "v": 1}, + ], + command=lambda ctx: { + "update": ctx.collection, + "updates": [ + { + "q": {"x": "apple"}, + "u": [{"$set": {"v": 2}}], + "collation": {"locale": "en", "strength": 2}, + } + ], + }, + expected={"ok": 1.0, "n": 1, "nModified": 1}, + msg="pipeline updateOne with strength 2 should match case-insensitively", + ), + CommandTestCase( + "pipeline_updatemany_case_insensitive", + docs=[ + {"_id": 1, "x": "apple", "v": 1}, + {"_id": 2, "x": "Apple", "v": 1}, + {"_id": 3, "x": "APPLE", "v": 1}, + {"_id": 4, "x": "banana", "v": 1}, + ], + command=lambda ctx: { + "update": ctx.collection, + "updates": [ + { + "q": {"x": "apple"}, + "u": [{"$set": {"v": 2}}], + "multi": True, + "collation": {"locale": "en", "strength": 2}, + } + ], + }, + expected={"ok": 1.0, "n": 3, "nModified": 3}, + msg="pipeline updateMany with strength 2 should match all case variants", + ), + CommandTestCase( + "pipeline_update_no_collation_binary", + docs=[ + {"_id": 1, "x": "apple", "v": 1}, + {"_id": 2, "x": "Apple", "v": 1}, + ], + command=lambda ctx: { + "update": ctx.collection, + "updates": [ + { + "q": {"x": "apple"}, + "u": [{"$set": {"v": 2}}], + "multi": True, + } + ], + }, + expected={"ok": 1.0, "n": 1, "nModified": 1}, + msg="pipeline update without collation should use binary comparison", + ), + CommandTestCase( + "pipeline_update_accent_insensitive", + docs=[ + {"_id": 1, "x": "cafe", "v": 1}, + {"_id": 2, "x": "caf\u00e9", "v": 1}, + {"_id": 3, "x": "other", "v": 1}, + ], + command=lambda ctx: { + "update": ctx.collection, + "updates": [ + { + "q": {"x": "cafe"}, + "u": [{"$set": {"v": 2}}], + "multi": True, + "collation": {"locale": "en", "strength": 1}, + } + ], + }, + expected={"ok": 1.0, "n": 2, "nModified": 2}, + msg="pipeline update with strength 1 should match accent variants", + ), +] + +# Property [Pipeline Update Collection Default Collation]: when no collation is +# specified on a pipeline-style update, the collection's default collation is +# used for filter matching. +COLLATION_PIPELINE_UPDATE_COLLECTION_DEFAULT_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "pipeline_collection_default_inherited", + target_collection=CustomCollection(options={"collation": {"locale": "en", "strength": 2}}), + docs=[ + {"_id": 1, "x": "apple", "v": 1}, + {"_id": 2, "x": "Apple", "v": 1}, + {"_id": 3, "x": "banana", "v": 1}, + ], + command=lambda ctx: { + "update": ctx.collection, + "updates": [{"q": {"x": "apple"}, "u": [{"$set": {"v": 2}}], "multi": True}], + }, + expected={"ok": 1.0, "n": 2, "nModified": 2}, + msg="pipeline update should inherit collection default collation", + ), + CommandTestCase( + "pipeline_collection_default_overridden", + target_collection=CustomCollection(options={"collation": {"locale": "en", "strength": 2}}), + docs=[ + {"_id": 1, "x": "apple", "v": 1}, + {"_id": 2, "x": "Apple", "v": 1}, + {"_id": 3, "x": "banana", "v": 1}, + ], + command=lambda ctx: { + "update": ctx.collection, + "updates": [ + { + "q": {"x": "apple"}, + "u": [{"$set": {"v": 2}}], + "multi": True, + "collation": {"locale": "en", "strength": 3}, + } + ], + }, + expected={"ok": 1.0, "n": 1, "nModified": 1}, + msg="pipeline update with explicit collation should override collection default", + ), +] + +# Property [Pipeline Update Upsert with Collation]: collation affects the +# filter matching for upsert operations using pipeline-style updates. +COLLATION_PIPELINE_UPDATE_UPSERT_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "pipeline_upsert_match_found", + docs=[{"_id": 1, "x": "Apple", "v": 1}], + command=lambda ctx: { + "update": ctx.collection, + "updates": [ + { + "q": {"x": "apple"}, + "u": [{"$set": {"v": 2}}], + "upsert": True, + "collation": {"locale": "en", "strength": 2}, + } + ], + }, + expected={"ok": 1.0, "n": 1, "nModified": 1}, + msg="pipeline upsert with collation should find existing case-variant and update it", + ), + CommandTestCase( + "pipeline_upsert_no_match_inserts", + docs=[{"_id": 1, "x": "banana", "v": 1}], + command=lambda ctx: { + "update": ctx.collection, + "updates": [ + { + "q": {"_id": 99, "x": "apple"}, + "u": [{"$set": {"v": 2}}], + "upsert": True, + "collation": {"locale": "en", "strength": 2}, + } + ], + }, + expected={"ok": 1.0, "n": 1, "nModified": 0, "upserted": [{"index": 0, "_id": 99}]}, + msg="pipeline upsert with collation should insert when no match found", + ), +] + +COLLATION_PIPELINE_UPDATE_TESTS = ( + COLLATION_PIPELINE_UPDATE_FILTER_TESTS + + COLLATION_PIPELINE_UPDATE_COLLECTION_DEFAULT_TESTS + + COLLATION_PIPELINE_UPDATE_UPSERT_TESTS +) + + +@pytest.mark.parametrize("test", pytest_params(COLLATION_PIPELINE_UPDATE_TESTS)) +def test_collation_update_pipeline(database_client, collection, test): + """Test collation with pipeline-style updates.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + assertResult( + result, + expected=test.build_expected(ctx), + error_code=test.error_code, + msg=test.msg, + raw_res=True, + ) diff --git a/documentdb_tests/compatibility/tests/core/collation/utils/__init__.py b/documentdb_tests/compatibility/tests/core/collation/utils/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/documentdb_tests/compatibility/tests/core/collation/utils/collation_view_mismatch.py b/documentdb_tests/compatibility/tests/core/collation/utils/collation_view_mismatch.py new file mode 100644 index 000000000..6c875f8e4 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/collation/utils/collation_view_mismatch.py @@ -0,0 +1,118 @@ +"""Shared infrastructure for collation view-mismatch tests. + +Many stages ($lookup, $graphLookup, $unionWith) share the same pattern: +a pipeline references a secondary collection that may be wrapped in a +collated view, the source may also be wrapped in a collated view, and +the aggregate command may carry its own collation. This module provides +a generic test case with prepare/build_command methods matching the +CommandTestCase interface. +""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Any + +from pymongo.collection import Collection +from pymongo.database import Database + +from documentdb_tests.framework.test_case import BaseTestCase + +# Sentinel used in pipelines to mark where the secondary collection name +# should be substituted at runtime. +SECONDARY = object() + + +def _substitute_sentinel(obj: Any, sentinel: object, replacement: str) -> Any: + """Recursively replace *sentinel* with *replacement* in nested structures.""" + if obj is sentinel: + return replacement + if isinstance(obj, dict): + return {k: _substitute_sentinel(v, sentinel, replacement) for k, v in obj.items()} + if isinstance(obj, list): + return [_substitute_sentinel(item, sentinel, replacement) for item in obj] + return obj + + +@dataclass(frozen=True) +class ViewMismatchTestCase(BaseTestCase): + """Test case for collation view-mismatch behavior across stages. + + Attributes: + docs: Documents to insert into the source collection. + secondary_docs: Documents to insert into the secondary collection. + pipeline: Aggregation pipeline. Use the SECONDARY sentinel wherever + the secondary collection name should appear. + secondary_view_collation: If set, creates a view on the secondary + collection with this collation and targets the view instead. + source_view_collation: If set, creates a view on the source + collection with this collation and aggregates from the view. + command_collation: If set, added as the ``collation`` field on the + aggregate command. + ignore_order_in: Passed through to assertResult. + """ + + docs: list[dict[str, Any]] = field(default_factory=list) + secondary_docs: list[dict[str, Any]] = field(default_factory=list) + pipeline: list[dict[str, Any]] = field(default_factory=list) + secondary_view_collation: dict[str, Any] | None = None + source_view_collation: dict[str, Any] | None = None + command_collation: dict[str, Any] | None = None + ignore_order_in: list[str] | None = None + + def prepare(self, db: Database, collection: Collection) -> Collection: + """Set up source, secondary, and optional views. Returns the agg source.""" + secondary_col_name = f"{collection.name}_secondary" + + # Populate source. + if self.docs: + collection.insert_many(self.docs) + + # Populate secondary. + db.create_collection(secondary_col_name) + if self.secondary_docs: + db[secondary_col_name].insert_many(self.secondary_docs) + + # Optionally wrap secondary in a view. + if self.secondary_view_collation is not None: + db.command( + "create", + f"{secondary_col_name}_view", + viewOn=secondary_col_name, + pipeline=[], + collation=self.secondary_view_collation, + ) + + # Optionally wrap source in a view. + if self.source_view_collation is not None: + source_view_name = f"{collection.name}_view" + db.command( + "create", + source_view_name, + viewOn=collection.name, + pipeline=[], + collation=self.source_view_collation, + ) + return db[source_view_name] + + return collection + + def build_command(self, collection: Collection) -> dict[str, Any]: + """Build the aggregate command with sentinel substitution.""" + # Derive the fixture name: if we wrapped the source in a view, strip the suffix. + fixture_name = ( + collection.name.removesuffix("_view") + if self.source_view_collation is not None + else collection.name + ) + base = f"{fixture_name}_secondary" + secondary_name = f"{base}_view" if self.secondary_view_collation is not None else base + pipeline = _substitute_sentinel(self.pipeline, SECONDARY, secondary_name) + command: dict[str, Any] = { + "aggregate": collection.name, + "pipeline": pipeline, + "cursor": {}, + } + if self.command_collation is not None: + command["collation"] = self.command_collation + return command diff --git a/documentdb_tests/compatibility/tests/core/collections/commands/utils/command_test_case.py b/documentdb_tests/compatibility/tests/core/collections/commands/utils/command_test_case.py index 7d6072b29..26ff3717a 100644 --- a/documentdb_tests/compatibility/tests/core/collections/commands/utils/command_test_case.py +++ b/documentdb_tests/compatibility/tests/core/collections/commands/utils/command_test_case.py @@ -72,26 +72,33 @@ class CommandTestCase(BaseTestCase): docs: list[dict[str, Any]] | None = None command: dict[str, Any] | Callable[..., dict[str, Any]] | None = None expected: dict[str, Any] | list[dict[str, Any]] | Callable[..., dict[str, Any]] | None = None + ignore_order_in: list[str] | None = None def prepare(self, db: Database, collection: Collection) -> Collection: """Resolve the target collection and apply indexes/docs. + Documents and indexes are inserted into the collection returned + by ``target_collection.writable(source, resolved)``. For views + this is the source; for regular collections it is the resolved + collection itself. + - If ``docs=None``, the collection is not created and will not exist. - If ``docs=[]``, the collection is explicitly created but left empty. - If ``docs=[...]``, the collection is created and documents are inserted. """ - collection = self.target_collection.resolve(db, collection) + resolved = self.target_collection.resolve(db, collection) + target = self.target_collection.writable(collection, resolved) if self.indexes: - collection.create_indexes(self.indexes) + target.create_indexes(self.indexes) if self.docs is not None: - if collection.name not in collection.database.list_collection_names(): - collection.database.create_collection(collection.name) + if target.name not in target.database.list_collection_names(): + target.database.create_collection(target.name) if self.docs: - collection.insert_many(self.docs) + target.insert_many(self.docs) if self.siblings: for sibling in self.siblings: sibling.create(db, collection) - return collection + return resolved def build_command(self, ctx: CommandContext) -> dict[str, Any]: """Resolve the command dict from a callable or plain dict.""" diff --git a/documentdb_tests/framework/assertions.py b/documentdb_tests/framework/assertions.py index eb9116298..399a73fae 100644 --- a/documentdb_tests/framework/assertions.py +++ b/documentdb_tests/framework/assertions.py @@ -73,6 +73,12 @@ def _sort_if_list(value): def _sort_fields(docs, fields): """Sort list values for the named fields in each document.""" + if isinstance(docs, dict): + doc = dict(docs) + for field in fields: + if field in doc: + doc[field] = _sort_if_list(doc[field]) + return doc sorted_docs = [] for doc in docs: doc = dict(doc) diff --git a/documentdb_tests/framework/error_codes.py b/documentdb_tests/framework/error_codes.py index adbc5c20d..ff4241add 100644 --- a/documentdb_tests/framework/error_codes.py +++ b/documentdb_tests/framework/error_codes.py @@ -13,7 +13,10 @@ NAMESPACE_NOT_FOUND_ERROR = 26 INDEX_NOT_FOUND_ERROR = 27 NAMESPACE_EXISTS_ERROR = 48 +INVALID_BSON_ID_ERROR = 53 +EMPTY_FIELD_NAME_ERROR = 56 COMMAND_NOT_FOUND_ERROR = 59 +IMMUTABLE_FIELD_ERROR = 66 CANNOT_CREATE_INDEX_ERROR = 67 INDEX_ALREADY_EXISTS_ERROR = 68 INVALID_OPTIONS_ERROR = 72 @@ -43,6 +46,7 @@ DUPLICATE_KEY_ERROR = 11000 SORT_COMPOUND_KEY_LIMIT_ERROR = 13103 BSON_FIELD_NOT_BOOL_ERROR = 13111 +MERGE_WHEN_NOT_MATCHED_FAIL_ERROR = 13113 INVALID_DB_NAME_ERROR = 13280 DATABASE_DIFFER_CASE_ERROR = 13297 GROUP_NON_OBJECT_ERROR = 15947 @@ -326,6 +330,12 @@ REGEX_NULL_BYTE_ERROR = 51109 REGEX_OPTIONS_NULL_BYTE_ERROR = 51110 REGEX_BAD_PATTERN_ERROR = 51111 +MERGE_SPARSE_NULL_ON_FIELD_ERROR = 51132 +LET_SYSTEM_VARIABLE_IN_VALUE_ERROR = 51144 +MERGE_UNSUPPORTED_MODE_COMBINATION_ERROR = 51181 +MERGE_NO_MATCHING_UNIQUE_INDEX_ERROR = 51183 +MERGE_ARRAY_ON_FIELD_ERROR = 51185 +MERGE_LET_WITH_STRING_MODE_ERROR = 51199 PROJECT_EMPTY_SUB_PROJECTION_ERROR = 51270 PROJECT_EMPTY_SPEC_ERROR = 51272 REPLACE_MISSING_REPLACEMENT_ERROR = 51747 diff --git a/documentdb_tests/framework/target_collection.py b/documentdb_tests/framework/target_collection.py index f4d31f75b..1b3cd29db 100644 --- a/documentdb_tests/framework/target_collection.py +++ b/documentdb_tests/framework/target_collection.py @@ -11,6 +11,7 @@ from dataclasses import dataclass, field from typing import Any +from pymongo import IndexModel from pymongo.collection import Collection from pymongo.database import Database @@ -22,16 +23,30 @@ class TargetCollection: def resolve(self, db: Database, collection: Collection) -> Collection: return collection + def writable(self, source: Collection, resolved: Collection) -> Collection: + """Return the collection where docs and indexes should be inserted.""" + return resolved + @dataclass(frozen=True) class ViewCollection(TargetCollection): - """A view on the fixture collection.""" + """A view on the fixture collection. + + Pass any extra keyword arguments accepted by the ``create`` command + (e.g. ``pipeline``, ``collation``) via the ``options`` dict. + """ + + options: dict[str, Any] = field(default_factory=dict) + suffix: str = "_view" def resolve(self, db: Database, collection: Collection) -> Collection: - view_name = f"{collection.name}_view" - db.command("create", view_name, viewOn=collection.name, pipeline=[]) + view_name = f"{collection.name}{self.suffix}" + db.command("create", view_name, viewOn=collection.name, **self.options) return db[view_name] + def writable(self, source: Collection, resolved: Collection) -> Collection: + return source + @dataclass(frozen=True) class SystemViewsCollection(ViewCollection): @@ -132,6 +147,9 @@ def resolve(self, db: Database, collection: Collection) -> Collection: source = name return db[source] + def writable(self, source: Collection, resolved: Collection) -> Collection: + return source + @dataclass(frozen=True) class ExistingCollection(TargetCollection): @@ -182,19 +200,9 @@ def resolve(self, db: Database, collection: Collection) -> Collection: return db[f"system.buckets.{name}"] -@dataclass(frozen=True) -class ViewWithPipelineCollection(TargetCollection): +def ViewWithPipelineCollection() -> ViewCollection: """A view on the fixture collection with a non-empty pipeline.""" - - def resolve(self, db: Database, collection: Collection) -> Collection: - view_name = f"{collection.name}_vpipe" - db.command( - "create", - view_name, - viewOn=collection.name, - pipeline=[{"$match": {"x": 1}}], - ) - return db[view_name] + return ViewCollection(options={"pipeline": [{"$match": {"x": 1}}]}, suffix="_vpipe") @dataclass(frozen=True) @@ -318,6 +326,30 @@ def resolve(self, db: Database, collection: Collection) -> Collection: return collection +@dataclass(frozen=True) +class ViewOnCustomCollection(TargetCollection): + """A view on a source collection created with custom options. + + Creates the source collection with ``source_options`` then creates + a view on it. + """ + + source_options: dict[str, Any] = field(default_factory=dict) + + def resolve(self, db: Database, collection: Collection) -> Collection: + source_name = f"{collection.name}_source" + db.command("create", source_name, **self.source_options) + view_name = f"{collection.name}_view" + db.command("create", view_name, viewOn=source_name, pipeline=[]) + return db[view_name] + + def writable(self, source: Collection, resolved: Collection) -> Collection: + """Insert docs into the underlying source collection.""" + db = source.database + source_name = f"{source.name}_source" + return db[source_name] + + @dataclass(frozen=True) class SiblingCollection: """Describes an additional collection to create alongside the source. @@ -329,6 +361,8 @@ class SiblingCollection: suffix: str = "_target" view_on_source: bool = False timeseries_field: str | None = None + collation: dict[str, Any] | None = None + indexes: list[IndexModel] | None = None docs: list[dict[str, Any]] | None = None def create(self, db: Database, collection: Collection) -> None: @@ -338,7 +372,11 @@ def create(self, db: Database, collection: Collection) -> None: db.create_collection(name, viewOn=collection.name, pipeline=[]) elif self.timeseries_field: db.create_collection(name, timeseries={"timeField": self.timeseries_field}) + elif self.collation: + db.create_collection(name, collation=self.collation) else: db.create_collection(name) + if self.indexes: + db[name].create_indexes(self.indexes) if self.docs: db[name].insert_many(self.docs) From a78e06765008c50ffacd8168810fc85190269fcc Mon Sep 17 00:00:00 2001 From: Daniel Frankcom Date: Fri, 22 May 2026 16:15:24 -0700 Subject: [PATCH 2/8] Restructure tests into subdirs Signed-off-by: Daniel Frankcom --- .../tests/core/collation/collection_level/__init__.py | 0 .../test_collection_level_propagation.py} | 0 .../test_collection_level_views.py} | 0 .../compatibility/tests/core/collation/command_level/__init__.py | 0 .../tests/core/collation/command_level/operations/__init__.py | 0 .../operations/test_operations_bulk_write.py} | 0 .../operations/test_operations_count.py} | 0 .../operations/test_operations_delete.py} | 0 .../operations/test_operations_distinct.py} | 0 .../operations/test_operations_find.py} | 0 .../operations/test_operations_find_and_modify.py} | 0 .../operations/test_operations_find_and_modify_pipeline.py} | 0 .../operations/test_operations_find_query_operators.py} | 0 .../operations/test_operations_find_text.py} | 0 .../operations/test_operations_update.py} | 0 .../operations/test_operations_update_array_filters.py} | 0 .../operations/test_operations_update_array_ops.py} | 0 .../operations/test_operations_update_field_ops.py} | 0 .../operations/test_operations_update_pipeline.py} | 0 .../tests/core/collation/command_level/stages/__init__.py | 0 .../stages/test_stages_accumulators.py} | 0 .../stages/test_stages_bucket.py} | 0 .../stages/test_stages_expressions.py} | 0 .../stages/test_stages_geonear.py} | 0 .../stages/test_stages_graphlookup.py} | 0 .../stages/test_stages_group.py} | 0 .../stages/test_stages_lookup.py} | 0 .../stages/test_stages_match.py} | 0 .../stages/test_stages_merge_out.py} | 0 .../stages/test_stages_replaceroot.py} | 0 .../stages/test_stages_sort.py} | 0 .../stages/test_stages_substages.py} | 0 .../stages/test_stages_unionwith.py} | 0 .../stages/test_stages_window.py} | 0 .../compatibility/tests/core/collation/index_level/__init__.py | 0 .../test_index_level_basic.py} | 0 .../test_index_level_multikey.py} | 0 .../test_index_level_partial.py} | 0 .../compatibility/tests/core/collation/options/__init__.py | 0 .../test_options_alternate.py} | 0 .../test_options_backwards.py} | 0 .../test_options_casefirst.py} | 0 .../test_options_caselevel.py} | 0 .../test_options_edge_cases.py} | 0 .../{test_collation_locale.py => options/test_options_locale.py} | 0 .../test_options_locale_defaults.py} | 0 .../test_options_locale_specific.py} | 0 .../test_options_normalization.py} | 0 .../test_options_numeric_ordering.py} | 0 .../test_options_strength.py} | 0 .../compatibility/tests/core/collation/resolution/__init__.py | 0 .../test_resolution_precedence.py} | 0 52 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 documentdb_tests/compatibility/tests/core/collation/collection_level/__init__.py rename documentdb_tests/compatibility/tests/core/collation/{test_collation_aggregate_propagation.py => collection_level/test_collection_level_propagation.py} (100%) rename documentdb_tests/compatibility/tests/core/collation/{test_collation_aggregate_views.py => collection_level/test_collection_level_views.py} (100%) create mode 100644 documentdb_tests/compatibility/tests/core/collation/command_level/__init__.py create mode 100644 documentdb_tests/compatibility/tests/core/collation/command_level/operations/__init__.py rename documentdb_tests/compatibility/tests/core/collation/{test_collation_bulk_write.py => command_level/operations/test_operations_bulk_write.py} (100%) rename documentdb_tests/compatibility/tests/core/collation/{test_collation_count.py => command_level/operations/test_operations_count.py} (100%) rename documentdb_tests/compatibility/tests/core/collation/{test_collation_delete.py => command_level/operations/test_operations_delete.py} (100%) rename documentdb_tests/compatibility/tests/core/collation/{test_collation_distinct.py => command_level/operations/test_operations_distinct.py} (100%) rename documentdb_tests/compatibility/tests/core/collation/{test_collation_find.py => command_level/operations/test_operations_find.py} (100%) rename documentdb_tests/compatibility/tests/core/collation/{test_collation_find_and_modify.py => command_level/operations/test_operations_find_and_modify.py} (100%) rename documentdb_tests/compatibility/tests/core/collation/{test_collation_find_and_modify_pipeline.py => command_level/operations/test_operations_find_and_modify_pipeline.py} (100%) rename documentdb_tests/compatibility/tests/core/collation/{test_collation_find_query_operators.py => command_level/operations/test_operations_find_query_operators.py} (100%) rename documentdb_tests/compatibility/tests/core/collation/{test_collation_find_text.py => command_level/operations/test_operations_find_text.py} (100%) rename documentdb_tests/compatibility/tests/core/collation/{test_collation_update.py => command_level/operations/test_operations_update.py} (100%) rename documentdb_tests/compatibility/tests/core/collation/{test_collation_update_array_filters.py => command_level/operations/test_operations_update_array_filters.py} (100%) rename documentdb_tests/compatibility/tests/core/collation/{test_collation_update_array_ops.py => command_level/operations/test_operations_update_array_ops.py} (100%) rename documentdb_tests/compatibility/tests/core/collation/{test_collation_update_field_ops.py => command_level/operations/test_operations_update_field_ops.py} (100%) rename documentdb_tests/compatibility/tests/core/collation/{test_collation_update_pipeline.py => command_level/operations/test_operations_update_pipeline.py} (100%) create mode 100644 documentdb_tests/compatibility/tests/core/collation/command_level/stages/__init__.py rename documentdb_tests/compatibility/tests/core/collation/{test_collation_aggregate_accumulators.py => command_level/stages/test_stages_accumulators.py} (100%) rename documentdb_tests/compatibility/tests/core/collation/{test_collation_aggregate_bucket.py => command_level/stages/test_stages_bucket.py} (100%) rename documentdb_tests/compatibility/tests/core/collation/{test_collation_aggregate_expressions.py => command_level/stages/test_stages_expressions.py} (100%) rename documentdb_tests/compatibility/tests/core/collation/{test_collation_aggregate_geonear.py => command_level/stages/test_stages_geonear.py} (100%) rename documentdb_tests/compatibility/tests/core/collation/{test_collation_aggregate_graphlookup.py => command_level/stages/test_stages_graphlookup.py} (100%) rename documentdb_tests/compatibility/tests/core/collation/{test_collation_aggregate_group.py => command_level/stages/test_stages_group.py} (100%) rename documentdb_tests/compatibility/tests/core/collation/{test_collation_aggregate_lookup.py => command_level/stages/test_stages_lookup.py} (100%) rename documentdb_tests/compatibility/tests/core/collation/{test_collation_aggregate_match.py => command_level/stages/test_stages_match.py} (100%) rename documentdb_tests/compatibility/tests/core/collation/{test_collation_aggregate_merge_out.py => command_level/stages/test_stages_merge_out.py} (100%) rename documentdb_tests/compatibility/tests/core/collation/{test_collation_aggregate_replaceroot.py => command_level/stages/test_stages_replaceroot.py} (100%) rename documentdb_tests/compatibility/tests/core/collation/{test_collation_aggregate_sort_stages.py => command_level/stages/test_stages_sort.py} (100%) rename documentdb_tests/compatibility/tests/core/collation/{test_collation_aggregate_substages.py => command_level/stages/test_stages_substages.py} (100%) rename documentdb_tests/compatibility/tests/core/collation/{test_collation_aggregate_unionwith.py => command_level/stages/test_stages_unionwith.py} (100%) rename documentdb_tests/compatibility/tests/core/collation/{test_collation_aggregate_window.py => command_level/stages/test_stages_window.py} (100%) create mode 100644 documentdb_tests/compatibility/tests/core/collation/index_level/__init__.py rename documentdb_tests/compatibility/tests/core/collation/{test_collation_index.py => index_level/test_index_level_basic.py} (100%) rename documentdb_tests/compatibility/tests/core/collation/{test_collation_index_multikey.py => index_level/test_index_level_multikey.py} (100%) rename documentdb_tests/compatibility/tests/core/collation/{test_collation_index_partial.py => index_level/test_index_level_partial.py} (100%) create mode 100644 documentdb_tests/compatibility/tests/core/collation/options/__init__.py rename documentdb_tests/compatibility/tests/core/collation/{test_collation_alternate.py => options/test_options_alternate.py} (100%) rename documentdb_tests/compatibility/tests/core/collation/{test_collation_backwards.py => options/test_options_backwards.py} (100%) rename documentdb_tests/compatibility/tests/core/collation/{test_collation_casefirst.py => options/test_options_casefirst.py} (100%) rename documentdb_tests/compatibility/tests/core/collation/{test_collation_caselevel.py => options/test_options_caselevel.py} (100%) rename documentdb_tests/compatibility/tests/core/collation/{test_collation_edge_cases.py => options/test_options_edge_cases.py} (100%) rename documentdb_tests/compatibility/tests/core/collation/{test_collation_locale.py => options/test_options_locale.py} (100%) rename documentdb_tests/compatibility/tests/core/collation/{test_collation_locale_defaults.py => options/test_options_locale_defaults.py} (100%) rename documentdb_tests/compatibility/tests/core/collation/{test_collation_locale_specific.py => options/test_options_locale_specific.py} (100%) rename documentdb_tests/compatibility/tests/core/collation/{test_collation_normalization.py => options/test_options_normalization.py} (100%) rename documentdb_tests/compatibility/tests/core/collation/{test_collation_numeric_ordering.py => options/test_options_numeric_ordering.py} (100%) rename documentdb_tests/compatibility/tests/core/collation/{test_collation_strength.py => options/test_options_strength.py} (100%) create mode 100644 documentdb_tests/compatibility/tests/core/collation/resolution/__init__.py rename documentdb_tests/compatibility/tests/core/collation/{test_collation_aggregate_resolution.py => resolution/test_resolution_precedence.py} (100%) diff --git a/documentdb_tests/compatibility/tests/core/collation/collection_level/__init__.py b/documentdb_tests/compatibility/tests/core/collation/collection_level/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_propagation.py b/documentdb_tests/compatibility/tests/core/collation/collection_level/test_collection_level_propagation.py similarity index 100% rename from documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_propagation.py rename to documentdb_tests/compatibility/tests/core/collation/collection_level/test_collection_level_propagation.py diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_views.py b/documentdb_tests/compatibility/tests/core/collation/collection_level/test_collection_level_views.py similarity index 100% rename from documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_views.py rename to documentdb_tests/compatibility/tests/core/collation/collection_level/test_collection_level_views.py diff --git a/documentdb_tests/compatibility/tests/core/collation/command_level/__init__.py b/documentdb_tests/compatibility/tests/core/collation/command_level/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/documentdb_tests/compatibility/tests/core/collation/command_level/operations/__init__.py b/documentdb_tests/compatibility/tests/core/collation/command_level/operations/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_bulk_write.py b/documentdb_tests/compatibility/tests/core/collation/command_level/operations/test_operations_bulk_write.py similarity index 100% rename from documentdb_tests/compatibility/tests/core/collation/test_collation_bulk_write.py rename to documentdb_tests/compatibility/tests/core/collation/command_level/operations/test_operations_bulk_write.py diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_count.py b/documentdb_tests/compatibility/tests/core/collation/command_level/operations/test_operations_count.py similarity index 100% rename from documentdb_tests/compatibility/tests/core/collation/test_collation_count.py rename to documentdb_tests/compatibility/tests/core/collation/command_level/operations/test_operations_count.py diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_delete.py b/documentdb_tests/compatibility/tests/core/collation/command_level/operations/test_operations_delete.py similarity index 100% rename from documentdb_tests/compatibility/tests/core/collation/test_collation_delete.py rename to documentdb_tests/compatibility/tests/core/collation/command_level/operations/test_operations_delete.py diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_distinct.py b/documentdb_tests/compatibility/tests/core/collation/command_level/operations/test_operations_distinct.py similarity index 100% rename from documentdb_tests/compatibility/tests/core/collation/test_collation_distinct.py rename to documentdb_tests/compatibility/tests/core/collation/command_level/operations/test_operations_distinct.py diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_find.py b/documentdb_tests/compatibility/tests/core/collation/command_level/operations/test_operations_find.py similarity index 100% rename from documentdb_tests/compatibility/tests/core/collation/test_collation_find.py rename to documentdb_tests/compatibility/tests/core/collation/command_level/operations/test_operations_find.py diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_find_and_modify.py b/documentdb_tests/compatibility/tests/core/collation/command_level/operations/test_operations_find_and_modify.py similarity index 100% rename from documentdb_tests/compatibility/tests/core/collation/test_collation_find_and_modify.py rename to documentdb_tests/compatibility/tests/core/collation/command_level/operations/test_operations_find_and_modify.py diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_find_and_modify_pipeline.py b/documentdb_tests/compatibility/tests/core/collation/command_level/operations/test_operations_find_and_modify_pipeline.py similarity index 100% rename from documentdb_tests/compatibility/tests/core/collation/test_collation_find_and_modify_pipeline.py rename to documentdb_tests/compatibility/tests/core/collation/command_level/operations/test_operations_find_and_modify_pipeline.py diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_find_query_operators.py b/documentdb_tests/compatibility/tests/core/collation/command_level/operations/test_operations_find_query_operators.py similarity index 100% rename from documentdb_tests/compatibility/tests/core/collation/test_collation_find_query_operators.py rename to documentdb_tests/compatibility/tests/core/collation/command_level/operations/test_operations_find_query_operators.py diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_find_text.py b/documentdb_tests/compatibility/tests/core/collation/command_level/operations/test_operations_find_text.py similarity index 100% rename from documentdb_tests/compatibility/tests/core/collation/test_collation_find_text.py rename to documentdb_tests/compatibility/tests/core/collation/command_level/operations/test_operations_find_text.py diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_update.py b/documentdb_tests/compatibility/tests/core/collation/command_level/operations/test_operations_update.py similarity index 100% rename from documentdb_tests/compatibility/tests/core/collation/test_collation_update.py rename to documentdb_tests/compatibility/tests/core/collation/command_level/operations/test_operations_update.py diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_update_array_filters.py b/documentdb_tests/compatibility/tests/core/collation/command_level/operations/test_operations_update_array_filters.py similarity index 100% rename from documentdb_tests/compatibility/tests/core/collation/test_collation_update_array_filters.py rename to documentdb_tests/compatibility/tests/core/collation/command_level/operations/test_operations_update_array_filters.py diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_update_array_ops.py b/documentdb_tests/compatibility/tests/core/collation/command_level/operations/test_operations_update_array_ops.py similarity index 100% rename from documentdb_tests/compatibility/tests/core/collation/test_collation_update_array_ops.py rename to documentdb_tests/compatibility/tests/core/collation/command_level/operations/test_operations_update_array_ops.py diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_update_field_ops.py b/documentdb_tests/compatibility/tests/core/collation/command_level/operations/test_operations_update_field_ops.py similarity index 100% rename from documentdb_tests/compatibility/tests/core/collation/test_collation_update_field_ops.py rename to documentdb_tests/compatibility/tests/core/collation/command_level/operations/test_operations_update_field_ops.py diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_update_pipeline.py b/documentdb_tests/compatibility/tests/core/collation/command_level/operations/test_operations_update_pipeline.py similarity index 100% rename from documentdb_tests/compatibility/tests/core/collation/test_collation_update_pipeline.py rename to documentdb_tests/compatibility/tests/core/collation/command_level/operations/test_operations_update_pipeline.py diff --git a/documentdb_tests/compatibility/tests/core/collation/command_level/stages/__init__.py b/documentdb_tests/compatibility/tests/core/collation/command_level/stages/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_accumulators.py b/documentdb_tests/compatibility/tests/core/collation/command_level/stages/test_stages_accumulators.py similarity index 100% rename from documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_accumulators.py rename to documentdb_tests/compatibility/tests/core/collation/command_level/stages/test_stages_accumulators.py diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_bucket.py b/documentdb_tests/compatibility/tests/core/collation/command_level/stages/test_stages_bucket.py similarity index 100% rename from documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_bucket.py rename to documentdb_tests/compatibility/tests/core/collation/command_level/stages/test_stages_bucket.py diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_expressions.py b/documentdb_tests/compatibility/tests/core/collation/command_level/stages/test_stages_expressions.py similarity index 100% rename from documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_expressions.py rename to documentdb_tests/compatibility/tests/core/collation/command_level/stages/test_stages_expressions.py diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_geonear.py b/documentdb_tests/compatibility/tests/core/collation/command_level/stages/test_stages_geonear.py similarity index 100% rename from documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_geonear.py rename to documentdb_tests/compatibility/tests/core/collation/command_level/stages/test_stages_geonear.py diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_graphlookup.py b/documentdb_tests/compatibility/tests/core/collation/command_level/stages/test_stages_graphlookup.py similarity index 100% rename from documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_graphlookup.py rename to documentdb_tests/compatibility/tests/core/collation/command_level/stages/test_stages_graphlookup.py diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_group.py b/documentdb_tests/compatibility/tests/core/collation/command_level/stages/test_stages_group.py similarity index 100% rename from documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_group.py rename to documentdb_tests/compatibility/tests/core/collation/command_level/stages/test_stages_group.py diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_lookup.py b/documentdb_tests/compatibility/tests/core/collation/command_level/stages/test_stages_lookup.py similarity index 100% rename from documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_lookup.py rename to documentdb_tests/compatibility/tests/core/collation/command_level/stages/test_stages_lookup.py diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_match.py b/documentdb_tests/compatibility/tests/core/collation/command_level/stages/test_stages_match.py similarity index 100% rename from documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_match.py rename to documentdb_tests/compatibility/tests/core/collation/command_level/stages/test_stages_match.py diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_merge_out.py b/documentdb_tests/compatibility/tests/core/collation/command_level/stages/test_stages_merge_out.py similarity index 100% rename from documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_merge_out.py rename to documentdb_tests/compatibility/tests/core/collation/command_level/stages/test_stages_merge_out.py diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_replaceroot.py b/documentdb_tests/compatibility/tests/core/collation/command_level/stages/test_stages_replaceroot.py similarity index 100% rename from documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_replaceroot.py rename to documentdb_tests/compatibility/tests/core/collation/command_level/stages/test_stages_replaceroot.py diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_sort_stages.py b/documentdb_tests/compatibility/tests/core/collation/command_level/stages/test_stages_sort.py similarity index 100% rename from documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_sort_stages.py rename to documentdb_tests/compatibility/tests/core/collation/command_level/stages/test_stages_sort.py diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_substages.py b/documentdb_tests/compatibility/tests/core/collation/command_level/stages/test_stages_substages.py similarity index 100% rename from documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_substages.py rename to documentdb_tests/compatibility/tests/core/collation/command_level/stages/test_stages_substages.py diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_unionwith.py b/documentdb_tests/compatibility/tests/core/collation/command_level/stages/test_stages_unionwith.py similarity index 100% rename from documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_unionwith.py rename to documentdb_tests/compatibility/tests/core/collation/command_level/stages/test_stages_unionwith.py diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_window.py b/documentdb_tests/compatibility/tests/core/collation/command_level/stages/test_stages_window.py similarity index 100% rename from documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_window.py rename to documentdb_tests/compatibility/tests/core/collation/command_level/stages/test_stages_window.py diff --git a/documentdb_tests/compatibility/tests/core/collation/index_level/__init__.py b/documentdb_tests/compatibility/tests/core/collation/index_level/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_index.py b/documentdb_tests/compatibility/tests/core/collation/index_level/test_index_level_basic.py similarity index 100% rename from documentdb_tests/compatibility/tests/core/collation/test_collation_index.py rename to documentdb_tests/compatibility/tests/core/collation/index_level/test_index_level_basic.py diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_index_multikey.py b/documentdb_tests/compatibility/tests/core/collation/index_level/test_index_level_multikey.py similarity index 100% rename from documentdb_tests/compatibility/tests/core/collation/test_collation_index_multikey.py rename to documentdb_tests/compatibility/tests/core/collation/index_level/test_index_level_multikey.py diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_index_partial.py b/documentdb_tests/compatibility/tests/core/collation/index_level/test_index_level_partial.py similarity index 100% rename from documentdb_tests/compatibility/tests/core/collation/test_collation_index_partial.py rename to documentdb_tests/compatibility/tests/core/collation/index_level/test_index_level_partial.py diff --git a/documentdb_tests/compatibility/tests/core/collation/options/__init__.py b/documentdb_tests/compatibility/tests/core/collation/options/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_alternate.py b/documentdb_tests/compatibility/tests/core/collation/options/test_options_alternate.py similarity index 100% rename from documentdb_tests/compatibility/tests/core/collation/test_collation_alternate.py rename to documentdb_tests/compatibility/tests/core/collation/options/test_options_alternate.py diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_backwards.py b/documentdb_tests/compatibility/tests/core/collation/options/test_options_backwards.py similarity index 100% rename from documentdb_tests/compatibility/tests/core/collation/test_collation_backwards.py rename to documentdb_tests/compatibility/tests/core/collation/options/test_options_backwards.py diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_casefirst.py b/documentdb_tests/compatibility/tests/core/collation/options/test_options_casefirst.py similarity index 100% rename from documentdb_tests/compatibility/tests/core/collation/test_collation_casefirst.py rename to documentdb_tests/compatibility/tests/core/collation/options/test_options_casefirst.py diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_caselevel.py b/documentdb_tests/compatibility/tests/core/collation/options/test_options_caselevel.py similarity index 100% rename from documentdb_tests/compatibility/tests/core/collation/test_collation_caselevel.py rename to documentdb_tests/compatibility/tests/core/collation/options/test_options_caselevel.py diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_edge_cases.py b/documentdb_tests/compatibility/tests/core/collation/options/test_options_edge_cases.py similarity index 100% rename from documentdb_tests/compatibility/tests/core/collation/test_collation_edge_cases.py rename to documentdb_tests/compatibility/tests/core/collation/options/test_options_edge_cases.py diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_locale.py b/documentdb_tests/compatibility/tests/core/collation/options/test_options_locale.py similarity index 100% rename from documentdb_tests/compatibility/tests/core/collation/test_collation_locale.py rename to documentdb_tests/compatibility/tests/core/collation/options/test_options_locale.py diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_locale_defaults.py b/documentdb_tests/compatibility/tests/core/collation/options/test_options_locale_defaults.py similarity index 100% rename from documentdb_tests/compatibility/tests/core/collation/test_collation_locale_defaults.py rename to documentdb_tests/compatibility/tests/core/collation/options/test_options_locale_defaults.py diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_locale_specific.py b/documentdb_tests/compatibility/tests/core/collation/options/test_options_locale_specific.py similarity index 100% rename from documentdb_tests/compatibility/tests/core/collation/test_collation_locale_specific.py rename to documentdb_tests/compatibility/tests/core/collation/options/test_options_locale_specific.py diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_normalization.py b/documentdb_tests/compatibility/tests/core/collation/options/test_options_normalization.py similarity index 100% rename from documentdb_tests/compatibility/tests/core/collation/test_collation_normalization.py rename to documentdb_tests/compatibility/tests/core/collation/options/test_options_normalization.py diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_numeric_ordering.py b/documentdb_tests/compatibility/tests/core/collation/options/test_options_numeric_ordering.py similarity index 100% rename from documentdb_tests/compatibility/tests/core/collation/test_collation_numeric_ordering.py rename to documentdb_tests/compatibility/tests/core/collation/options/test_options_numeric_ordering.py diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_strength.py b/documentdb_tests/compatibility/tests/core/collation/options/test_options_strength.py similarity index 100% rename from documentdb_tests/compatibility/tests/core/collation/test_collation_strength.py rename to documentdb_tests/compatibility/tests/core/collation/options/test_options_strength.py diff --git a/documentdb_tests/compatibility/tests/core/collation/resolution/__init__.py b/documentdb_tests/compatibility/tests/core/collation/resolution/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_resolution.py b/documentdb_tests/compatibility/tests/core/collation/resolution/test_resolution_precedence.py similarity index 100% rename from documentdb_tests/compatibility/tests/core/collation/test_collation_aggregate_resolution.py rename to documentdb_tests/compatibility/tests/core/collation/resolution/test_resolution_precedence.py From 7a7f089a98463fd9c0048c91950ae32a64a9f941 Mon Sep 17 00:00:00 2001 From: Daniel Frankcom Date: Fri, 22 May 2026 16:35:04 -0700 Subject: [PATCH 3/8] Add test verifying view collation does not affect source collection Signed-off-by: Daniel Frankcom --- .../test_collection_level_views.py | 20 ++++++++++++++++++- .../framework/target_collection.py | 19 ++++++++++++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/documentdb_tests/compatibility/tests/core/collation/collection_level/test_collection_level_views.py b/documentdb_tests/compatibility/tests/core/collation/collection_level/test_collection_level_views.py index e63ec5ab0..5626d6dea 100644 --- a/documentdb_tests/compatibility/tests/core/collation/collection_level/test_collection_level_views.py +++ b/documentdb_tests/compatibility/tests/core/collation/collection_level/test_collection_level_views.py @@ -16,7 +16,11 @@ from documentdb_tests.framework.error_codes import OPTION_NOT_SUPPORTED_ON_VIEW_ERROR from documentdb_tests.framework.executor import execute_command from documentdb_tests.framework.parametrize import pytest_params -from documentdb_tests.framework.target_collection import ViewCollection, ViewOnCustomCollection +from documentdb_tests.framework.target_collection import ( + CollectionWithView, + ViewCollection, + ViewOnCustomCollection, +) # Property [View Collation Constraints]: aggregating on a view with an explicit # collation that differs from the view's default produces @@ -126,6 +130,20 @@ error_code=OPTION_NOT_SUPPORTED_ON_VIEW_ERROR, msg="aggregate on view without collation with non-simple collation should be rejected", ), + CommandTestCase( + "view_collation_does_not_affect_source", + target_collection=CollectionWithView( + view_options={"collation": {"locale": "en", "strength": 1}} + ), + docs=[{"_id": 1, "name": "cafe"}, {"_id": 2, "name": "Cafe"}, {"_id": 3, "name": "CAFE"}], + command=lambda ctx: { + "aggregate": ctx.collection, + "pipeline": [{"$match": {"name": "cafe"}}], + "cursor": {}, + }, + expected=[{"_id": 1, "name": "cafe"}], + msg="querying source collection should use binary comparison, unaffected by view collation", + ), ] diff --git a/documentdb_tests/framework/target_collection.py b/documentdb_tests/framework/target_collection.py index 1b3cd29db..46c586284 100644 --- a/documentdb_tests/framework/target_collection.py +++ b/documentdb_tests/framework/target_collection.py @@ -350,6 +350,25 @@ def writable(self, source: Collection, resolved: Collection) -> Collection: return db[source_name] +@dataclass(frozen=True) +class CollectionWithView(TargetCollection): + """A collection with a view created on top of it. + + Creates the source collection, then creates a view on it with + ``view_options``. Resolves to the source collection so tests can + verify the view does not affect the underlying collection. + """ + + view_options: dict[str, Any] = field(default_factory=dict) + + def resolve(self, db: Database, collection: Collection) -> Collection: + source_name = f"{collection.name}_source" + db.command("create", source_name) + view_name = f"{collection.name}_view" + db.command("create", view_name, viewOn=source_name, pipeline=[], **self.view_options) + return db[source_name] + + @dataclass(frozen=True) class SiblingCollection: """Describes an additional collection to create alongside the source. From 26e54f65200276d5fb623b53993609b23a922ce3 Mon Sep 17 00:00:00 2001 From: Daniel Frankcom Date: Mon, 25 May 2026 13:47:36 -0700 Subject: [PATCH 4/8] Create second index on same key with different collation Signed-off-by: Daniel Frankcom --- .../index_level/test_index_level_basic.py | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/documentdb_tests/compatibility/tests/core/collation/index_level/test_index_level_basic.py b/documentdb_tests/compatibility/tests/core/collation/index_level/test_index_level_basic.py index f9e257e12..3437f1f6e 100644 --- a/documentdb_tests/compatibility/tests/core/collation/index_level/test_index_level_basic.py +++ b/documentdb_tests/compatibility/tests/core/collation/index_level/test_index_level_basic.py @@ -88,6 +88,27 @@ error_code=MISSING_FIELD_ERROR, msg="creating index with collation missing locale should produce an error", ), + CommandTestCase( + "create_two_indexes_same_key_different_collation", + docs=[], + command=lambda ctx: { + "createIndexes": ctx.collection, + "indexes": [ + { + "key": {"name": 1}, + "name": "name_s1", + "collation": {"locale": "en", "strength": 1}, + }, + { + "key": {"name": 1}, + "name": "name_s2", + "collation": {"locale": "en", "strength": 2}, + }, + ], + }, + expected={"ok": Eq(1.0)}, + msg="should allow two indexes on same key with different collations", + ), ] # Property [Unique Index Enforcement Under Collation]: a unique index with From 2d2996fdfabf097b82028577bb7fdd214f096543 Mon Sep 17 00:00:00 2001 From: Daniel Frankcom Date: Thu, 28 May 2026 10:29:15 -0700 Subject: [PATCH 5/8] Redistribute edge case tests, add geospatial index coverage Move capped collection tests from options/ to collection_level/. Move text index tests from options/ to index_level/ and add 2d/2dsphere collation tests covering the restriction boundary (2d rejects non-simple, 2dsphere accepts any collation). Signed-off-by: Daniel Frankcom --- .../test_collection_level_capped.py} | 64 +----- .../index_level/test_index_level_special.py | 209 ++++++++++++++++++ 2 files changed, 213 insertions(+), 60 deletions(-) rename documentdb_tests/compatibility/tests/core/collation/{options/test_options_edge_cases.py => collection_level/test_collection_level_capped.py} (58%) create mode 100644 documentdb_tests/compatibility/tests/core/collation/index_level/test_index_level_special.py diff --git a/documentdb_tests/compatibility/tests/core/collation/options/test_options_edge_cases.py b/documentdb_tests/compatibility/tests/core/collation/collection_level/test_collection_level_capped.py similarity index 58% rename from documentdb_tests/compatibility/tests/core/collation/options/test_options_edge_cases.py rename to documentdb_tests/compatibility/tests/core/collation/collection_level/test_collection_level_capped.py index c7de1262d..9061d5dc9 100644 --- a/documentdb_tests/compatibility/tests/core/collation/options/test_options_edge_cases.py +++ b/documentdb_tests/compatibility/tests/core/collation/collection_level/test_collection_level_capped.py @@ -1,9 +1,8 @@ -"""Tests for collation edge cases with capped collections and text indexes.""" +"""Tests for collation behavior with capped collections.""" from __future__ import annotations import pytest -from pymongo import IndexModel from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( CommandContext, @@ -12,7 +11,6 @@ from documentdb_tests.framework.assertions import assertResult from documentdb_tests.framework.executor import execute_command from documentdb_tests.framework.parametrize import pytest_params -from documentdb_tests.framework.property_checks import Eq from documentdb_tests.framework.target_collection import CappedCollection, CustomCollection # Property [Capped Collection Collation]: a capped collection can be created @@ -97,64 +95,10 @@ ), ] -# Property [Text Index Collation Incompatibility]: a text index cannot be -# created with a collation other than simple; creating one on a collection -# with a non-simple default collation requires specifying -# collation {locale: "simple"} on the index. -COLLATION_TEXT_INDEX_TESTS: list[CommandTestCase] = [ - CommandTestCase( - "text_index_on_simple_collection", - docs=[{"_id": 1, "x": "hello world"}], - command=lambda ctx: { - "createIndexes": ctx.collection, - "indexes": [{"key": {"x": "text"}, "name": "x_text"}], - }, - expected={"ok": Eq(1.0)}, - msg="text index should be creatable on collection without collation", - ), - CommandTestCase( - "text_index_with_simple_collation_on_collated_collection", - target_collection=CustomCollection(options={"collation": {"locale": "en", "strength": 2}}), - docs=[{"_id": 1, "x": "hello world"}], - command=lambda ctx: { - "createIndexes": ctx.collection, - "indexes": [ - {"key": {"x": "text"}, "name": "x_text", "collation": {"locale": "simple"}} - ], - }, - expected={"ok": Eq(1.0)}, - msg="text index with simple collation should be creatable on collated collection", - ), - CommandTestCase( - "text_search_ignores_collection_collation", - target_collection=CustomCollection(options={"collation": {"locale": "en", "strength": 2}}), - docs=[ - {"_id": 1, "x": "cafe latte"}, - {"_id": 2, "x": "Cafe Mocha"}, - {"_id": 3, "x": "tea"}, - ], - indexes=[ - IndexModel([("x", "text")], collation={"locale": "simple"}, name="x_text"), - ], - command=lambda ctx: { - "find": ctx.collection, - "filter": {"$text": {"$search": "cafe"}}, - "sort": {"_id": 1}, - }, - expected=[ - {"_id": 1, "x": "cafe latte"}, - {"_id": 2, "x": "Cafe Mocha"}, - ], - msg="text search should use text index semantics not collection collation", - ), -] - -COLLATION_EDGE_CASE_TESTS = COLLATION_CAPPED_TESTS + COLLATION_TEXT_INDEX_TESTS - -@pytest.mark.parametrize("test", pytest_params(COLLATION_EDGE_CASE_TESTS)) -def test_collation_edge_cases(database_client, collection, test): - """Test collation edge cases with capped collections and text indexes.""" +@pytest.mark.parametrize("test", pytest_params(COLLATION_CAPPED_TESTS)) +def test_collation_capped(database_client, collection, test): + """Test collation behavior with capped collections.""" collection = test.prepare(database_client, collection) ctx = CommandContext.from_collection(collection) result = execute_command(collection, test.build_command(ctx)) diff --git a/documentdb_tests/compatibility/tests/core/collation/index_level/test_index_level_special.py b/documentdb_tests/compatibility/tests/core/collation/index_level/test_index_level_special.py new file mode 100644 index 000000000..93bf01a40 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/collation/index_level/test_index_level_special.py @@ -0,0 +1,209 @@ +"""Tests for collation with text and geospatial indexes.""" + +from __future__ import annotations + +import pytest +from pymongo import IndexModel + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.error_codes import CANNOT_CREATE_INDEX_ERROR +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.property_checks import Eq +from documentdb_tests.framework.target_collection import CustomCollection + +# Property [Text Index Collation Incompatibility]: a text index cannot be +# created with a collation other than simple; creating one on a collection +# with a non-simple default collation requires specifying +# collation {locale: "simple"} on the index. +COLLATION_TEXT_INDEX_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "text_index_with_non_simple_collation_rejected", + docs=[], + command=lambda ctx: { + "createIndexes": ctx.collection, + "indexes": [{"key": {"x": "text"}, "name": "x_text", "collation": {"locale": "en"}}], + }, + error_code=CANNOT_CREATE_INDEX_ERROR, + msg="text index with non-simple collation should be rejected", + ), + CommandTestCase( + "text_index_inherits_non_simple_collation_rejected", + target_collection=CustomCollection(options={"collation": {"locale": "en", "strength": 2}}), + docs=[], + command=lambda ctx: { + "createIndexes": ctx.collection, + "indexes": [{"key": {"x": "text"}, "name": "x_text"}], + }, + error_code=CANNOT_CREATE_INDEX_ERROR, + msg="text index inheriting non-simple collation from collection should be rejected", + ), + CommandTestCase( + "text_index_on_simple_collection", + docs=[{"_id": 1, "x": "hello world"}], + command=lambda ctx: { + "createIndexes": ctx.collection, + "indexes": [{"key": {"x": "text"}, "name": "x_text"}], + }, + expected={"ok": Eq(1.0)}, + msg="text index should be creatable on collection without collation", + ), + CommandTestCase( + "text_index_with_simple_collation_on_collated_collection", + target_collection=CustomCollection(options={"collation": {"locale": "en", "strength": 2}}), + docs=[{"_id": 1, "x": "hello world"}], + command=lambda ctx: { + "createIndexes": ctx.collection, + "indexes": [ + {"key": {"x": "text"}, "name": "x_text", "collation": {"locale": "simple"}} + ], + }, + expected={"ok": Eq(1.0)}, + msg="text index with simple collation should be creatable on collated collection", + ), + CommandTestCase( + "text_search_ignores_collation", + target_collection=CustomCollection(options={"collation": {"locale": "en", "strength": 2}}), + docs=[ + {"_id": 1, "x": "cafe latte"}, + {"_id": 2, "x": "Cafe Mocha"}, + {"_id": 3, "x": "tea"}, + ], + indexes=[ + IndexModel([("x", "text")], collation={"locale": "simple"}, name="x_text"), + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"$text": {"$search": "cafe"}}, + "sort": {"_id": 1}, + }, + expected=[ + {"_id": 1, "x": "cafe latte"}, + {"_id": 2, "x": "Cafe Mocha"}, + ], + msg="text search should use its own case-folding, not collection or index collation", + ), +] + +# Property [2d Index Collation Restriction]: a 2d index cannot be created with +# a non-simple collation, including when inherited from a collated collection; +# specifying {locale: "simple"} explicitly is accepted. +COLLATION_2D_INDEX_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "2d_index_rejects_non_simple_collation", + docs=[], + command=lambda ctx: { + "createIndexes": ctx.collection, + "indexes": [ + { + "key": {"loc": "2d"}, + "name": "loc_2d", + "collation": {"locale": "en", "strength": 2}, + } + ], + }, + error_code=CANNOT_CREATE_INDEX_ERROR, + msg="2d index should reject non-simple collation", + ), + CommandTestCase( + "2d_index_inherits_non_simple_collation_rejected", + target_collection=CustomCollection(options={"collation": {"locale": "en", "strength": 2}}), + docs=[], + command=lambda ctx: { + "createIndexes": ctx.collection, + "indexes": [{"key": {"loc": "2d"}, "name": "loc_2d"}], + }, + error_code=CANNOT_CREATE_INDEX_ERROR, + msg="2d index inheriting non-simple collation from collection should be rejected", + ), + CommandTestCase( + "2d_index_accepts_simple_collation", + docs=[], + command=lambda ctx: { + "createIndexes": ctx.collection, + "indexes": [ + { + "key": {"loc": "2d"}, + "name": "loc_2d", + "collation": {"locale": "simple"}, + } + ], + }, + expected={"ok": Eq(1.0)}, + msg="2d index should accept simple collation", + ), + CommandTestCase( + "2d_on_collated_collection_explicit_simple_accepted", + target_collection=CustomCollection(options={"collation": {"locale": "en", "strength": 2}}), + docs=[], + command=lambda ctx: { + "createIndexes": ctx.collection, + "indexes": [ + { + "key": {"loc": "2d"}, + "name": "loc_2d", + "collation": {"locale": "simple"}, + } + ], + }, + expected={"ok": Eq(1.0)}, + msg="2d index with explicit simple collation on collated collection should succeed", + ), +] + +# Property [2dsphere Index Collation Support]: a 2dsphere index can be created +# with any collation, including non-simple and inherited from a collated +# collection. +COLLATION_2DSPHERE_INDEX_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "2dsphere_index_accepts_non_simple_collation", + docs=[], + command=lambda ctx: { + "createIndexes": ctx.collection, + "indexes": [ + { + "key": {"loc": "2dsphere"}, + "name": "loc_2dsphere_collated", + "collation": {"locale": "en", "strength": 2}, + } + ], + }, + expected={"ok": Eq(1.0)}, + msg="2dsphere index should accept non-simple collation", + ), + CommandTestCase( + "2dsphere_on_collated_collection_inherits", + target_collection=CustomCollection(options={"collation": {"locale": "en", "strength": 2}}), + docs=[], + command=lambda ctx: { + "createIndexes": ctx.collection, + "indexes": [{"key": {"loc": "2dsphere"}, "name": "loc_2ds"}], + }, + expected={"ok": Eq(1.0)}, + msg="2dsphere index on collated collection should inherit collation", + ), +] + +COLLATION_SPECIAL_INDEX_TESTS = ( + COLLATION_TEXT_INDEX_TESTS + COLLATION_2D_INDEX_TESTS + COLLATION_2DSPHERE_INDEX_TESTS +) + + +@pytest.mark.parametrize("test", pytest_params(COLLATION_SPECIAL_INDEX_TESTS)) +def test_collation_special_indexes(database_client, collection, test): + """Test collation with text and geospatial indexes.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + expected = test.build_expected(ctx) + assertResult( + result, + expected=expected, + error_code=test.error_code, + msg=test.msg, + raw_res=not isinstance(expected, list), + ) From 91ae5351f2dd5e985fdd2c6aa917180329a26112 Mon Sep 17 00:00:00 2001 From: Daniel Frankcom Date: Thu, 28 May 2026 10:30:24 -0700 Subject: [PATCH 6/8] Add timeseries and clustered collection collation tests Signed-off-by: Daniel Frankcom --- .../test_collection_level_clustered.py | 94 ++++++++++++ .../test_collection_level_timeseries.py | 135 ++++++++++++++++++ 2 files changed, 229 insertions(+) create mode 100644 documentdb_tests/compatibility/tests/core/collation/collection_level/test_collection_level_clustered.py create mode 100644 documentdb_tests/compatibility/tests/core/collation/collection_level/test_collection_level_timeseries.py diff --git a/documentdb_tests/compatibility/tests/core/collation/collection_level/test_collection_level_clustered.py b/documentdb_tests/compatibility/tests/core/collation/collection_level/test_collection_level_clustered.py new file mode 100644 index 000000000..429de041e --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/collation/collection_level/test_collection_level_clustered.py @@ -0,0 +1,94 @@ +"""Tests for collation behavior with clustered collections.""" + +from __future__ import annotations + +import pytest + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.error_codes import DUPLICATE_KEY_ERROR +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.target_collection import CustomCollection + +# Property [Clustered Collection Collation]: a clustered collection can be +# created with a default collation, and collation affects filter matching and +# _id uniqueness enforcement. +COLLATION_CLUSTERED_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "clustered_default_collation_filter", + target_collection=CustomCollection( + options={ + "clusteredIndex": {"key": {"_id": 1}, "unique": True}, + "collation": {"locale": "en", "strength": 2}, + } + ), + docs=[ + {"_id": "apple", "v": 1}, + {"_id": "banana", "v": 2}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"_id": "Apple"}, + }, + expected=[{"_id": "apple", "v": 1}], + msg="clustered collection with default collation should use it for _id filter matching", + ), + CommandTestCase( + "clustered_collation_rejects_case_variant_id", + target_collection=CustomCollection( + options={ + "clusteredIndex": {"key": {"_id": 1}, "unique": True}, + "collation": {"locale": "en", "strength": 2}, + } + ), + docs=[{"_id": "apple", "v": 1}], + command=lambda ctx: { + "insert": ctx.collection, + "documents": [{"_id": "Apple", "v": 2}], + }, + error_code=DUPLICATE_KEY_ERROR, + msg="clustered collection with collation should reject case-variant _id as duplicate", + ), + CommandTestCase( + "clustered_explicit_collation_filter", + target_collection=CustomCollection( + options={"clusteredIndex": {"key": {"_id": 1}, "unique": True}} + ), + docs=[ + {"_id": "apple", "v": 1}, + {"_id": "Apple", "v": 2}, + {"_id": "banana", "v": 3}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"_id": "apple"}, + "sort": {"_id": 1}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[ + {"_id": "Apple", "v": 2}, + {"_id": "apple", "v": 1}, + ], + msg="clustered collection should support explicit collation on find", + ), +] + + +@pytest.mark.parametrize("test", pytest_params(COLLATION_CLUSTERED_TESTS)) +def test_collation_clustered(database_client, collection, test): + """Test collation behavior with clustered collections.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + expected = test.build_expected(ctx) + assertResult( + result, + expected=expected, + error_code=test.error_code, + msg=test.msg, + raw_res=not isinstance(expected, list), + ) diff --git a/documentdb_tests/compatibility/tests/core/collation/collection_level/test_collection_level_timeseries.py b/documentdb_tests/compatibility/tests/core/collation/collection_level/test_collection_level_timeseries.py new file mode 100644 index 000000000..b0a957131 --- /dev/null +++ b/documentdb_tests/compatibility/tests/core/collation/collection_level/test_collection_level_timeseries.py @@ -0,0 +1,135 @@ +"""Tests for collation behavior with timeseries collections.""" + +from __future__ import annotations + +import datetime +from datetime import timezone + +import pytest + +from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( + CommandContext, + CommandTestCase, +) +from documentdb_tests.framework.assertions import assertResult +from documentdb_tests.framework.executor import execute_command +from documentdb_tests.framework.parametrize import pytest_params +from documentdb_tests.framework.target_collection import CustomCollection + +# Property [Timeseries Collection Collation]: a timeseries collection can be +# created with a default collation, and collation affects filter matching on +# the metaField. +COLLATION_TIMESERIES_TESTS: list[CommandTestCase] = [ + CommandTestCase( + "timeseries_default_collation_filter", + target_collection=CustomCollection( + options={ + "timeseries": {"timeField": "ts", "metaField": "meta"}, + "collation": {"locale": "en", "strength": 2}, + } + ), + docs=[ + { + "_id": 1, + "ts": datetime.datetime(2024, 1, 1, tzinfo=timezone.utc), + "meta": "apple", + "v": 1, + }, + { + "_id": 2, + "ts": datetime.datetime(2024, 1, 2, tzinfo=timezone.utc), + "meta": "Apple", + "v": 2, + }, + { + "_id": 3, + "ts": datetime.datetime(2024, 1, 3, tzinfo=timezone.utc), + "meta": "banana", + "v": 3, + }, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"meta": "apple"}, + "sort": {"_id": 1}, + }, + expected=[ + { + "_id": 1, + "ts": datetime.datetime(2024, 1, 1, tzinfo=timezone.utc), + "meta": "apple", + "v": 1, + }, + { + "_id": 2, + "ts": datetime.datetime(2024, 1, 2, tzinfo=timezone.utc), + "meta": "Apple", + "v": 2, + }, + ], + msg="timeseries collection with default collation should use it for filter matching", + ), + CommandTestCase( + "timeseries_explicit_collation_filter", + target_collection=CustomCollection( + options={"timeseries": {"timeField": "ts", "metaField": "meta"}} + ), + docs=[ + { + "_id": 1, + "ts": datetime.datetime(2024, 1, 1, tzinfo=timezone.utc), + "meta": "apple", + "v": 1, + }, + { + "_id": 2, + "ts": datetime.datetime(2024, 1, 2, tzinfo=timezone.utc), + "meta": "Apple", + "v": 2, + }, + { + "_id": 3, + "ts": datetime.datetime(2024, 1, 3, tzinfo=timezone.utc), + "meta": "banana", + "v": 3, + }, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"meta": "apple"}, + "sort": {"_id": 1}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[ + { + "_id": 1, + "ts": datetime.datetime(2024, 1, 1, tzinfo=timezone.utc), + "meta": "apple", + "v": 1, + }, + { + "_id": 2, + "ts": datetime.datetime(2024, 1, 2, tzinfo=timezone.utc), + "meta": "Apple", + "v": 2, + }, + ], + msg="timeseries collection should support explicit collation on find", + ), +] + + +@pytest.mark.parametrize("test", pytest_params(COLLATION_TIMESERIES_TESTS)) +def test_collation_timeseries(database_client, collection, test): + """Test collation behavior with timeseries collections.""" + collection = test.prepare(database_client, collection) + ctx = CommandContext.from_collection(collection) + result = execute_command(collection, test.build_command(ctx)) + expected = test.build_expected(ctx) + assertResult( + result, + expected=expected, + error_code=test.error_code, + msg=test.msg, + raw_res=not isinstance(expected, list), + ) From 205c1c8704879a70ae576bf9b76260814d0eb084 Mon Sep 17 00:00:00 2001 From: Daniel Frankcom Date: Thu, 28 May 2026 10:31:07 -0700 Subject: [PATCH 7/8] Move projection tests to command_level/operations Signed-off-by: Daniel Frankcom --- .../operations/test_operations_projection.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename documentdb_tests/compatibility/tests/core/collation/{test_collation_projection.py => command_level/operations/test_operations_projection.py} (100%) diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_projection.py b/documentdb_tests/compatibility/tests/core/collation/command_level/operations/test_operations_projection.py similarity index 100% rename from documentdb_tests/compatibility/tests/core/collation/test_collation_projection.py rename to documentdb_tests/compatibility/tests/core/collation/command_level/operations/test_operations_projection.py From cbc047079148a7076a508513667d5e097724c0a4 Mon Sep 17 00:00:00 2001 From: Daniel Frankcom Date: Thu, 28 May 2026 10:44:52 -0700 Subject: [PATCH 8/8] Fold dotted path test into find operations Remove dedicated dotted path file (12 tests across multiple commands for the same concern). Add a single dotted path test to test_operations_find.py filter tests instead. Signed-off-by: Daniel Frankcom --- .../operations/test_operations_find.py | 19 ++ .../collation/test_collation_dotted_paths.py | 280 ------------------ 2 files changed, 19 insertions(+), 280 deletions(-) delete mode 100644 documentdb_tests/compatibility/tests/core/collation/test_collation_dotted_paths.py diff --git a/documentdb_tests/compatibility/tests/core/collation/command_level/operations/test_operations_find.py b/documentdb_tests/compatibility/tests/core/collation/command_level/operations/test_operations_find.py index d8cab9885..0c183ae8a 100644 --- a/documentdb_tests/compatibility/tests/core/collation/command_level/operations/test_operations_find.py +++ b/documentdb_tests/compatibility/tests/core/collation/command_level/operations/test_operations_find.py @@ -149,6 +149,25 @@ expected=[{"_id": 1, "x": "apple"}], msg="find with strength 3 should match case-sensitively", ), + CommandTestCase( + "filter_dotted_path_case_insensitive", + docs=[ + {"_id": 1, "a": {"b": "apple"}}, + {"_id": 2, "a": {"b": "Apple"}}, + {"_id": 3, "a": {"b": "banana"}}, + ], + command=lambda ctx: { + "find": ctx.collection, + "filter": {"a.b": "apple"}, + "sort": {"_id": 1}, + "collation": {"locale": "en", "strength": 2}, + }, + expected=[ + {"_id": 1, "a": {"b": "apple"}}, + {"_id": 2, "a": {"b": "Apple"}}, + ], + msg="find on dotted path should use collation for case-insensitive matching", + ), ] # Property [Find Sort Ordering]: collation affects the sort order of string diff --git a/documentdb_tests/compatibility/tests/core/collation/test_collation_dotted_paths.py b/documentdb_tests/compatibility/tests/core/collation/test_collation_dotted_paths.py deleted file mode 100644 index 2b52b0069..000000000 --- a/documentdb_tests/compatibility/tests/core/collation/test_collation_dotted_paths.py +++ /dev/null @@ -1,280 +0,0 @@ -"""Tests for collation effects on dotted (nested) field paths.""" - -from __future__ import annotations - -import pytest - -from documentdb_tests.compatibility.tests.core.collections.commands.utils.command_test_case import ( - CommandContext, - CommandTestCase, -) -from documentdb_tests.framework.assertions import assertResult -from documentdb_tests.framework.executor import execute_command -from documentdb_tests.framework.parametrize import pytest_params - -# Property [Dotted Path Filter Matching]: collation affects equality and -# comparison operators on dotted field paths in find and aggregate $match, -# enabling case-insensitive matching on nested document fields. -COLLATION_DOTTED_FILTER_TESTS: list[CommandTestCase] = [ - CommandTestCase( - "find_dotted_eq_case_insensitive", - docs=[ - {"_id": 1, "a": {"b": "apple"}}, - {"_id": 2, "a": {"b": "Apple"}}, - {"_id": 3, "a": {"b": "banana"}}, - ], - command=lambda ctx: { - "find": ctx.collection, - "filter": {"a.b": "apple"}, - "sort": {"_id": 1}, - "collation": {"locale": "en", "strength": 2}, - }, - expected=[ - {"_id": 1, "a": {"b": "apple"}}, - {"_id": 2, "a": {"b": "Apple"}}, - ], - msg="find on dotted path with strength 2 should match case-insensitively", - ), - CommandTestCase( - "find_dotted_gt_case_insensitive", - docs=[ - {"_id": 1, "a": {"b": "apple"}}, - {"_id": 2, "a": {"b": "Banana"}}, - {"_id": 3, "a": {"b": "cherry"}}, - ], - command=lambda ctx: { - "find": ctx.collection, - "filter": {"a.b": {"$gt": "apple"}}, - "sort": {"_id": 1}, - "collation": {"locale": "en", "strength": 2}, - }, - expected=[ - {"_id": 2, "a": {"b": "Banana"}}, - {"_id": 3, "a": {"b": "cherry"}}, - ], - msg="find $gt on dotted path should use collation", - ), - CommandTestCase( - "find_dotted_in_case_insensitive", - docs=[ - {"_id": 1, "a": {"b": "apple"}}, - {"_id": 2, "a": {"b": "Apple"}}, - {"_id": 3, "a": {"b": "banana"}}, - ], - command=lambda ctx: { - "find": ctx.collection, - "filter": {"a.b": {"$in": ["APPLE"]}}, - "sort": {"_id": 1}, - "collation": {"locale": "en", "strength": 2}, - }, - expected=[ - {"_id": 1, "a": {"b": "apple"}}, - {"_id": 2, "a": {"b": "Apple"}}, - ], - msg="find $in on dotted path should use collation", - ), - CommandTestCase( - "find_deeply_nested_eq", - docs=[ - {"_id": 1, "a": {"b": {"c": "apple"}}}, - {"_id": 2, "a": {"b": {"c": "Apple"}}}, - {"_id": 3, "a": {"b": {"c": "banana"}}}, - ], - command=lambda ctx: { - "find": ctx.collection, - "filter": {"a.b.c": "apple"}, - "sort": {"_id": 1}, - "collation": {"locale": "en", "strength": 2}, - }, - expected=[ - {"_id": 1, "a": {"b": {"c": "apple"}}}, - {"_id": 2, "a": {"b": {"c": "Apple"}}}, - ], - msg="find on deeply nested dotted path should use collation", - ), - CommandTestCase( - "match_dotted_eq_case_insensitive", - docs=[ - {"_id": 1, "a": {"b": "apple"}}, - {"_id": 2, "a": {"b": "Apple"}}, - {"_id": 3, "a": {"b": "banana"}}, - ], - command=lambda ctx: { - "aggregate": ctx.collection, - "pipeline": [{"$match": {"a.b": "apple"}}], - "cursor": {}, - "collation": {"locale": "en", "strength": 2}, - }, - expected=[ - {"_id": 1, "a": {"b": "apple"}}, - {"_id": 2, "a": {"b": "Apple"}}, - ], - msg="$match on dotted path should use collation", - ), - CommandTestCase( - "find_dotted_no_collation_binary", - docs=[ - {"_id": 1, "a": {"b": "apple"}}, - {"_id": 2, "a": {"b": "Apple"}}, - ], - command=lambda ctx: { - "find": ctx.collection, - "filter": {"a.b": "apple"}, - }, - expected=[{"_id": 1, "a": {"b": "apple"}}], - msg="find on dotted path without collation should use binary comparison", - ), -] - -# Property [Dotted Path Sort Ordering]: collation affects sort ordering when -# sorting on dotted field paths. -COLLATION_DOTTED_SORT_TESTS: list[CommandTestCase] = [ - CommandTestCase( - "sort_dotted_case_insensitive", - docs=[ - {"_id": 1, "a": {"b": "banana"}}, - {"_id": 2, "a": {"b": "Apple"}}, - {"_id": 3, "a": {"b": "cherry"}}, - ], - command=lambda ctx: { - "find": ctx.collection, - "filter": {}, - "sort": {"a.b": 1}, - "collation": {"locale": "en", "strength": 2}, - }, - expected=[ - {"_id": 2, "a": {"b": "Apple"}}, - {"_id": 1, "a": {"b": "banana"}}, - {"_id": 3, "a": {"b": "cherry"}}, - ], - msg="find sort on dotted path should use collation for case-insensitive ordering", - ), - CommandTestCase( - "sort_dotted_numeric_ordering", - docs=[ - {"_id": 1, "a": {"b": "file10"}}, - {"_id": 2, "a": {"b": "file2"}}, - {"_id": 3, "a": {"b": "file1"}}, - ], - command=lambda ctx: { - "find": ctx.collection, - "filter": {}, - "sort": {"a.b": 1}, - "collation": {"locale": "en", "numericOrdering": True}, - }, - expected=[ - {"_id": 3, "a": {"b": "file1"}}, - {"_id": 2, "a": {"b": "file2"}}, - {"_id": 1, "a": {"b": "file10"}}, - ], - msg="find sort on dotted path should use collation numericOrdering", - ), - CommandTestCase( - "sort_deeply_nested", - docs=[ - {"_id": 1, "a": {"b": {"c": "banana"}}}, - {"_id": 2, "a": {"b": {"c": "Apple"}}}, - {"_id": 3, "a": {"b": {"c": "cherry"}}}, - ], - command=lambda ctx: { - "find": ctx.collection, - "filter": {}, - "sort": {"a.b.c": 1}, - "collation": {"locale": "en", "strength": 2}, - }, - expected=[ - {"_id": 2, "a": {"b": {"c": "Apple"}}}, - {"_id": 1, "a": {"b": {"c": "banana"}}}, - {"_id": 3, "a": {"b": {"c": "cherry"}}}, - ], - msg="find sort on deeply nested dotted path should use collation", - ), - CommandTestCase( - "aggregate_sort_dotted", - docs=[ - {"_id": 1, "a": {"b": "banana"}}, - {"_id": 2, "a": {"b": "Apple"}}, - {"_id": 3, "a": {"b": "cherry"}}, - ], - command=lambda ctx: { - "aggregate": ctx.collection, - "pipeline": [{"$sort": {"a.b": 1}}], - "cursor": {}, - "collation": {"locale": "en", "strength": 2}, - }, - expected=[ - {"_id": 2, "a": {"b": "Apple"}}, - {"_id": 1, "a": {"b": "banana"}}, - {"_id": 3, "a": {"b": "cherry"}}, - ], - msg="aggregate $sort on dotted path should use collation", - ), -] - -# Property [Dotted Path in Update Filter]: collation affects the filter on -# dotted paths in update commands. -COLLATION_DOTTED_UPDATE_TESTS: list[CommandTestCase] = [ - CommandTestCase( - "update_dotted_filter_case_insensitive", - docs=[ - {"_id": 1, "a": {"b": "apple"}, "v": 1}, - {"_id": 2, "a": {"b": "Apple"}, "v": 1}, - {"_id": 3, "a": {"b": "banana"}, "v": 1}, - ], - command=lambda ctx: { - "update": ctx.collection, - "updates": [ - { - "q": {"a.b": "apple"}, - "u": {"$set": {"v": 2}}, - "multi": True, - "collation": {"locale": "en", "strength": 2}, - } - ], - }, - expected={"ok": 1.0, "n": 2, "nModified": 2}, - msg="update on dotted path filter should use collation", - ), -] - -# Property [Dotted Path in Distinct]: collation affects deduplication on dotted -# field paths. -COLLATION_DOTTED_DISTINCT_TESTS: list[CommandTestCase] = [ - CommandTestCase( - "distinct_dotted_case_insensitive", - docs=[ - {"_id": 1, "a": {"b": "apple"}}, - {"_id": 2, "a": {"b": "Apple"}}, - {"_id": 3, "a": {"b": "banana"}}, - ], - command=lambda ctx: { - "distinct": ctx.collection, - "key": "a.b", - "collation": {"locale": "en", "strength": 2}, - }, - expected={"values": ["apple", "banana"], "ok": 1.0}, - msg="distinct on dotted path should use collation for deduplication", - ), -] - -COLLATION_DOTTED_PATH_TESTS: list[CommandTestCase] = ( - COLLATION_DOTTED_FILTER_TESTS - + COLLATION_DOTTED_SORT_TESTS - + COLLATION_DOTTED_UPDATE_TESTS - + COLLATION_DOTTED_DISTINCT_TESTS -) - - -@pytest.mark.parametrize("test", pytest_params(COLLATION_DOTTED_PATH_TESTS)) -def test_collation_dotted_paths(database_client, collection, test): - """Test collation effects on dotted field paths.""" - collection = test.prepare(database_client, collection) - ctx = CommandContext.from_collection(collection) - result = execute_command(collection, test.build_command(ctx)) - assertResult( - result, - expected=test.build_expected(ctx), - error_code=test.error_code, - msg=test.msg, - raw_res=not isinstance(test.build_expected(ctx), list), - )