From 9c08781db3c98bb2746bd52d6ae889d757e6abf4 Mon Sep 17 00:00:00 2001 From: zhiqiang-hhhh Date: Tue, 7 Apr 2026 21:52:40 +0800 Subject: [PATCH 1/2] D --- .../ann_index_p0/ann_index_basic.groovy | 22 ++++++++-------- .../suites/ann_index_p0/ivf_index_test.groovy | 26 +++++++++---------- 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/regression-test/suites/ann_index_p0/ann_index_basic.groovy b/regression-test/suites/ann_index_p0/ann_index_basic.groovy index b64e795c993aea..41ae9e5c309864 100644 --- a/regression-test/suites/ann_index_p0/ann_index_basic.groovy +++ b/regression-test/suites/ann_index_p0/ann_index_basic.groovy @@ -22,9 +22,9 @@ suite ("ann_index_basic") { sql "set enable_common_expr_pushdown=true;" // 1) Basic L2 ANN table: dim=3 - sql "drop table if exists tbl_ann_l2" + sql "drop table if exists basic_tbl_ann_l2" sql """ - CREATE TABLE tbl_ann_l2 ( + CREATE TABLE basic_tbl_ann_l2 ( id INT NOT NULL, embedding ARRAY NOT NULL, INDEX idx_emb (`embedding`) USING ANN PROPERTIES( @@ -39,19 +39,19 @@ suite ("ann_index_basic") { """ qt_sql_l2_insert """ - INSERT INTO tbl_ann_l2 VALUES + INSERT INTO basic_tbl_ann_l2 VALUES (1, [1.0, 2.0, 3.0]), (2, [0.5, 2.1, 2.9]), (3, [10.0, 10.0, 10.0]); """ // Query: l2 distance ascending (closest first) - qt_sql_l2_query "select id, l2_distance_approximate(embedding, [1.0,2.0,3.0]) as dist from tbl_ann_l2 order by dist limit 3;" + qt_sql_l2_query "select id, l2_distance_approximate(embedding, [1.0,2.0,3.0]) as dist from basic_tbl_ann_l2 order by dist limit 3;" // 2) Basic inner_product ANN table: dim=4 - sql "drop table if exists tbl_ann_ip" + sql "drop table if exists basic_tbl_ann_ip" sql """ - CREATE TABLE tbl_ann_ip ( + CREATE TABLE basic_tbl_ann_ip ( id INT NOT NULL, embedding ARRAY NOT NULL, INDEX idx_emb (`embedding`) USING ANN PROPERTIES( @@ -66,23 +66,23 @@ suite ("ann_index_basic") { """ qt_sql_ip_insert """ - INSERT INTO tbl_ann_ip VALUES + INSERT INTO basic_tbl_ann_ip VALUES (1, [0.1, 0.2, 0.3, 0.4]), (2, [0.5, 0.6, 0.7, 0.8]), (3, [1.0, 1.0, 1.0, 1.0]); """ // Query: inner product descending (higher score first) - qt_sql_ip_query "select id from tbl_ann_ip order by inner_product_approximate(embedding, [0.1,0.2,0.3,0.4]) desc limit 3;" + qt_sql_ip_query "select id from basic_tbl_ann_ip order by inner_product_approximate(embedding, [0.1,0.2,0.3,0.4]) desc limit 3;" // 3) Simple threshold filter using l2_distance_approximate - qt_sql_l2_threshold "select id from tbl_ann_l2 where l2_distance_approximate(embedding, [1.0,2.0,3.0]) < 5.0 order by id;" + qt_sql_l2_threshold "select id from basic_tbl_ann_l2 where l2_distance_approximate(embedding, [1.0,2.0,3.0]) < 5.0 order by id;" // 4) Descending l2 order (should exercise path where Desc topn for l2/cosine cannot be evaluated by ann index) - qt_sql_l2_desc "select id from tbl_ann_l2 order by l2_distance_approximate(embedding, [1.0,2.0,3.0]) desc limit 2;" + qt_sql_l2_desc "select id from basic_tbl_ann_l2 order by l2_distance_approximate(embedding, [1.0,2.0,3.0]) desc limit 2;" // 5) Ascending inner_product order (should exercise path where Asc topn for inner product cannot be evaluated by ann index) - qt_sql_ip_asc "select id from tbl_ann_ip order by inner_product_approximate(embedding, [0.1,0.2,0.3,0.4]) asc limit 2;" + qt_sql_ip_asc "select id from basic_tbl_ann_ip order by inner_product_approximate(embedding, [0.1,0.2,0.3,0.4]) asc limit 2;" // 6) Large table to exercise predicate-input-ratio check (create many rows and run topn with small-range predicate) sql "drop table if exists tbl_ann_l2_large" diff --git a/regression-test/suites/ann_index_p0/ivf_index_test.groovy b/regression-test/suites/ann_index_p0/ivf_index_test.groovy index c806eed306885c..39f9e1bf9d0d13 100644 --- a/regression-test/suites/ann_index_p0/ivf_index_test.groovy +++ b/regression-test/suites/ann_index_p0/ivf_index_test.groovy @@ -19,9 +19,9 @@ suite ("ivf_index_test") { sql "set enable_common_expr_pushdown=true;" // IVF index - sql "drop table if exists tbl_ann_l2" + sql "drop table if exists ivf_tbl_ann_l2" sql """ - CREATE TABLE tbl_ann_l2 ( + CREATE TABLE ivf_tbl_ann_l2 ( id INT NOT NULL, embedding ARRAY NOT NULL, INDEX idx_emb (`embedding`) USING ANN PROPERTIES( @@ -37,7 +37,7 @@ suite ("ivf_index_test") { """ sql """ - INSERT INTO tbl_ann_l2 VALUES + INSERT INTO ivf_tbl_ann_l2 VALUES (1, [1.0, 2.0, 3.0]), (2, [0.5, 2.1, 2.9]), (3, [10.0, 10.0, 10.0]), @@ -45,15 +45,15 @@ suite ("ivf_index_test") { (5, [50.0, 20.0, 20.0]), (6, [60.0, 20.0, 20.0]); """ - qt_sql "select * from tbl_ann_l2;" + qt_sql "select * from ivf_tbl_ann_l2;" // just approximate search - sql "select id, l2_distance_approximate(embedding, [1.0,2.0,3.0]) as dist from tbl_ann_l2 order by dist limit 2;" + sql "select id, l2_distance_approximate(embedding, [1.0,2.0,3.0]) as dist from ivf_tbl_ann_l2 order by dist limit 2;" - sql """drop table if exists tbl_ann_l2""" + sql """drop table if exists ivf_tbl_ann_l2""" test { // missing nlist sql """ - CREATE TABLE tbl_ann_l2 ( + CREATE TABLE ivf_tbl_ann_l2 ( id INT NOT NULL, embedding ARRAY NOT NULL, INDEX idx_emb (`embedding`) USING ANN PROPERTIES( @@ -70,7 +70,7 @@ suite ("ivf_index_test") { } sql """ - CREATE TABLE tbl_ann_l2 ( + CREATE TABLE ivf_tbl_ann_l2 ( id INT NOT NULL, embedding ARRAY NOT NULL, INDEX idx_emb (`embedding`) USING ANN PROPERTIES( @@ -91,9 +91,9 @@ suite ("ivf_index_test") { (2, [0.5, 2.1, 2.9]); """ - sql "drop table if exists tbl_ann_ip" + sql "drop table if exists ivf_tbl_ann_ip" sql """ - CREATE TABLE tbl_ann_ip ( + CREATE TABLE ivf_tbl_ann_ip ( id INT NOT NULL, embedding ARRAY NOT NULL, INDEX idx_emb (`embedding`) USING ANN PROPERTIES( @@ -109,7 +109,7 @@ suite ("ivf_index_test") { """ sql """ - INSERT INTO tbl_ann_ip VALUES + INSERT INTO ivf_tbl_ann_ip VALUES (1, [1.0, 2.0, 3.0]), (2, [0.5, 2.1, 2.9]), (3, [10.0, 10.0, 10.0]), @@ -117,7 +117,7 @@ suite ("ivf_index_test") { (5, [50.0, 20.0, 20.0]), (6, [60.0, 20.0, 20.0]); """ - qt_sql "select * from tbl_ann_ip;" + qt_sql "select * from ivf_tbl_ann_ip;" // just approximate search - sql "select id, inner_product_approximate(embedding, [1.0,2.0,3.0]) as dist from tbl_ann_ip order by dist desc limit 2;" + sql "select id, inner_product_approximate(embedding, [1.0,2.0,3.0]) as dist from ivf_tbl_ann_ip order by dist desc limit 2;" } From eb3e9411f2f40270d35c450c0f2183172831e8eb Mon Sep 17 00:00:00 2001 From: zhiqiang-hhhh Date: Wed, 8 Apr 2026 00:00:49 +0800 Subject: [PATCH 2/2] [test](regression) Strengthen ANN IVF regression assertions ### What problem does this PR solve? Issue Number: None Related PR: None Problem Summary: IVF and IVF_ON_DISK regression cases only executed several ANN queries without asserting their results, and some snapshot queries relied on implicit row order. This could miss regressions in ANN behavior and make the cases flaky. ### Release note None ### Check List (For Author) - Test: Regression test - `./run-regression-test.sh --run -f regression-test/suites/ann_index_p0/ivf_index_test.groovy -forceGenOut` - `./run-regression-test.sh --run -f regression-test/suites/ann_index_p0/ivf_on_disk_index_test.groovy -forceGenOut` - Behavior changed: No - Does this need documentation: No --- .../data/ann_index_p0/ivf_index_test.out | 12 ++++++++ .../ann_index_p0/ivf_on_disk_index_test.out | 30 +++++++++++++++++++ .../suites/ann_index_p0/ivf_index_test.groovy | 13 ++++---- .../ivf_on_disk_index_test.groovy | 18 +++++------ 4 files changed, 56 insertions(+), 17 deletions(-) diff --git a/regression-test/data/ann_index_p0/ivf_index_test.out b/regression-test/data/ann_index_p0/ivf_index_test.out index 91c7483c806fd6..3e484c0b696ca4 100644 --- a/regression-test/data/ann_index_p0/ivf_index_test.out +++ b/regression-test/data/ann_index_p0/ivf_index_test.out @@ -7,6 +7,14 @@ 5 [50, 20, 20] 6 [60, 20, 20] +-- !sql_l2_topn -- +1 +2 + +-- !sql_l2_insufficient_train_rows -- +1 +2 + -- !sql -- 1 [1, 2, 3] 2 [0.5, 2.1, 2.9] @@ -15,3 +23,7 @@ 5 [50, 20, 20] 6 [60, 20, 20] +-- !sql_ip_topn -- +6 +5 + diff --git a/regression-test/data/ann_index_p0/ivf_on_disk_index_test.out b/regression-test/data/ann_index_p0/ivf_on_disk_index_test.out index ffeeb85e1335b3..bcd94f4ac52341 100644 --- a/regression-test/data/ann_index_p0/ivf_on_disk_index_test.out +++ b/regression-test/data/ann_index_p0/ivf_on_disk_index_test.out @@ -7,6 +7,10 @@ 5 [50, 20, 20] 6 [60, 20, 20] +-- !sql_l2_topn -- +1 +2 + -- !sql -- 1 [1, 2, 3] 2 [0.5, 2.1, 2.9] @@ -15,6 +19,22 @@ 5 [50, 20, 20] 6 [60, 20, 20] +-- !sql_ip_topn -- +6 +5 + +-- !sql_stream_load_rows -- +1 [1, 2, 3] +2 [0.5, 2.1, 2.9] +3 [10, 10, 10] +4 [20, 20, 20] +5 [50, 20, 20] +6 [60, 20, 20] + +-- !sql_stream_load_topn -- +1 +2 + -- !sql -- 1 [1, 2, 3] 2 [0.5, 2.1, 2.9] @@ -27,3 +47,13 @@ 9 [0, 0, 0] 10 [30, 30, 30] +-- !sql_large_topn -- +1 +2 +9 + +-- !sql_range_search -- +1 +2 +3 + diff --git a/regression-test/suites/ann_index_p0/ivf_index_test.groovy b/regression-test/suites/ann_index_p0/ivf_index_test.groovy index 39f9e1bf9d0d13..58eda2f2fce829 100644 --- a/regression-test/suites/ann_index_p0/ivf_index_test.groovy +++ b/regression-test/suites/ann_index_p0/ivf_index_test.groovy @@ -45,9 +45,8 @@ suite ("ivf_index_test") { (5, [50.0, 20.0, 20.0]), (6, [60.0, 20.0, 20.0]); """ - qt_sql "select * from ivf_tbl_ann_l2;" - // just approximate search - sql "select id, l2_distance_approximate(embedding, [1.0,2.0,3.0]) as dist from ivf_tbl_ann_l2 order by dist limit 2;" + qt_sql "select * from ivf_tbl_ann_l2 order by id;" + qt_sql_l2_topn "select id from ivf_tbl_ann_l2 order by l2_distance_approximate(embedding, [1.0,2.0,3.0]) limit 2;" sql """drop table if exists ivf_tbl_ann_l2""" test { @@ -86,10 +85,11 @@ suite ("ivf_index_test") { """ // Not enough training points: should not throw exception anymore, just skip index building. sql """ - INSERT INTO tbl_ann_l2 VALUES + INSERT INTO ivf_tbl_ann_l2 VALUES (1, [1.0, 2.0, 3.0]), (2, [0.5, 2.1, 2.9]); """ + qt_sql_l2_insufficient_train_rows "select id from ivf_tbl_ann_l2 order by l2_distance_approximate(embedding, [1.0,2.0,3.0]) limit 2;" sql "drop table if exists ivf_tbl_ann_ip" sql """ @@ -117,7 +117,6 @@ suite ("ivf_index_test") { (5, [50.0, 20.0, 20.0]), (6, [60.0, 20.0, 20.0]); """ - qt_sql "select * from ivf_tbl_ann_ip;" - // just approximate search - sql "select id, inner_product_approximate(embedding, [1.0,2.0,3.0]) as dist from ivf_tbl_ann_ip order by dist desc limit 2;" + qt_sql "select * from ivf_tbl_ann_ip order by id;" + qt_sql_ip_topn "select id from ivf_tbl_ann_ip order by inner_product_approximate(embedding, [1.0,2.0,3.0]) desc limit 2;" } diff --git a/regression-test/suites/ann_index_p0/ivf_on_disk_index_test.groovy b/regression-test/suites/ann_index_p0/ivf_on_disk_index_test.groovy index ac77fa1c60b5f7..a9eed51d7a4125 100644 --- a/regression-test/suites/ann_index_p0/ivf_on_disk_index_test.groovy +++ b/regression-test/suites/ann_index_p0/ivf_on_disk_index_test.groovy @@ -45,9 +45,8 @@ suite ("ivf_on_disk_index_test") { (5, [50.0, 20.0, 20.0]), (6, [60.0, 20.0, 20.0]); """ - qt_sql "select * from tbl_ivf_on_disk_l2;" - // approximate search with l2_distance - sql "select id, l2_distance_approximate(embedding, [1.0,2.0,3.0]) as dist from tbl_ivf_on_disk_l2 order by dist limit 2;" + qt_sql "select * from tbl_ivf_on_disk_l2 order by id;" + qt_sql_l2_topn "select id from tbl_ivf_on_disk_l2 order by l2_distance_approximate(embedding, [1.0,2.0,3.0]) limit 2;" // ========== Error: missing nlist for ivf_on_disk ========== sql "drop table if exists tbl_ivf_on_disk_l2" @@ -121,9 +120,8 @@ suite ("ivf_on_disk_index_test") { (5, [50.0, 20.0, 20.0]), (6, [60.0, 20.0, 20.0]); """ - qt_sql "select * from tbl_ivf_on_disk_ip;" - // approximate search with inner_product - sql "select id, inner_product_approximate(embedding, [1.0,2.0,3.0]) as dist from tbl_ivf_on_disk_ip order by dist desc limit 2;" + qt_sql "select * from tbl_ivf_on_disk_ip order by id;" + qt_sql_ip_topn "select id from tbl_ivf_on_disk_ip order by inner_product_approximate(embedding, [1.0,2.0,3.0]) desc limit 2;" // ========== IVF_ON_DISK with stream load ========== sql "drop table if exists tbl_ivf_on_disk_stream_load" @@ -163,6 +161,8 @@ suite ("ivf_on_disk_index_test") { assertEquals(0, json.NumberFilteredRows) } } + qt_sql_stream_load_rows "select * from tbl_ivf_on_disk_stream_load order by id;" + qt_sql_stream_load_topn "select id from tbl_ivf_on_disk_stream_load order by l2_distance_approximate(embedding, [1.0,2.0,3.0]) limit 2;" // ========== IVF_ON_DISK with larger dataset (more rows than nlist) ========== sql "drop table if exists tbl_ivf_on_disk_large" @@ -196,8 +196,7 @@ suite ("ivf_on_disk_index_test") { (10, [30.0, 30.0, 30.0]); """ qt_sql "select * from tbl_ivf_on_disk_large order by id;" - // approximate search on larger dataset - sql "select id, l2_distance_approximate(embedding, [1.0,2.0,3.0]) as dist from tbl_ivf_on_disk_large order by dist limit 3;" + qt_sql_large_topn "select id from tbl_ivf_on_disk_large order by l2_distance_approximate(embedding, [1.0,2.0,3.0]) limit 3;" // ========== IVF_ON_DISK range search with l2_distance ========== sql "drop table if exists tbl_ivf_on_disk_range" @@ -226,6 +225,5 @@ suite ("ivf_on_disk_index_test") { (5, [50.0, 20.0, 20.0]), (6, [60.0, 20.0, 20.0]); """ - // range search: find vectors within distance threshold - sql "select id from tbl_ivf_on_disk_range where l2_distance_approximate(embedding, [1.0, 2.0, 3.0]) < 20.0 order by id;" + qt_sql_range_search "select id from tbl_ivf_on_disk_range where l2_distance_approximate(embedding, [1.0, 2.0, 3.0]) < 20.0 order by id;" }