Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions regression-test/data/ann_index_p0/ivf_index_test.out
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,14 @@
5 [50, 20, 20]
6 [60, 20, 20]

-- !sql_l2_topn --
1
2

-- !sql_l2_insufficient_train_rows --
1
2

-- !sql --
1 [1, 2, 3]
2 [0.5, 2.1, 2.9]
Expand All @@ -15,3 +23,7 @@
5 [50, 20, 20]
6 [60, 20, 20]

-- !sql_ip_topn --
6
5

30 changes: 30 additions & 0 deletions regression-test/data/ann_index_p0/ivf_on_disk_index_test.out
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@
5 [50, 20, 20]
6 [60, 20, 20]

-- !sql_l2_topn --
1
2

-- !sql --
1 [1, 2, 3]
2 [0.5, 2.1, 2.9]
Expand All @@ -15,6 +19,22 @@
5 [50, 20, 20]
6 [60, 20, 20]

-- !sql_ip_topn --
6
5

-- !sql_stream_load_rows --
1 [1, 2, 3]
2 [0.5, 2.1, 2.9]
3 [10, 10, 10]
4 [20, 20, 20]
5 [50, 20, 20]
6 [60, 20, 20]

-- !sql_stream_load_topn --
1
2

-- !sql --
1 [1, 2, 3]
2 [0.5, 2.1, 2.9]
Expand All @@ -27,3 +47,13 @@
9 [0, 0, 0]
10 [30, 30, 30]

-- !sql_large_topn --
1
2
9

-- !sql_range_search --
1
2
3

22 changes: 11 additions & 11 deletions regression-test/suites/ann_index_p0/ann_index_basic.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ suite ("ann_index_basic") {
sql "set enable_common_expr_pushdown=true;"

// 1) Basic L2 ANN table: dim=3
sql "drop table if exists tbl_ann_l2"
sql "drop table if exists basic_tbl_ann_l2"
sql """
CREATE TABLE tbl_ann_l2 (
CREATE TABLE basic_tbl_ann_l2 (
id INT NOT NULL,
embedding ARRAY<FLOAT> NOT NULL,
INDEX idx_emb (`embedding`) USING ANN PROPERTIES(
Expand All @@ -39,19 +39,19 @@ suite ("ann_index_basic") {
"""

qt_sql_l2_insert """
INSERT INTO tbl_ann_l2 VALUES
INSERT INTO basic_tbl_ann_l2 VALUES
(1, [1.0, 2.0, 3.0]),
(2, [0.5, 2.1, 2.9]),
(3, [10.0, 10.0, 10.0]);
"""

// Query: l2 distance ascending (closest first)
qt_sql_l2_query "select id, l2_distance_approximate(embedding, [1.0,2.0,3.0]) as dist from tbl_ann_l2 order by dist limit 3;"
qt_sql_l2_query "select id, l2_distance_approximate(embedding, [1.0,2.0,3.0]) as dist from basic_tbl_ann_l2 order by dist limit 3;"

// 2) Basic inner_product ANN table: dim=4
sql "drop table if exists tbl_ann_ip"
sql "drop table if exists basic_tbl_ann_ip"
sql """
CREATE TABLE tbl_ann_ip (
CREATE TABLE basic_tbl_ann_ip (
id INT NOT NULL,
embedding ARRAY<FLOAT> NOT NULL,
INDEX idx_emb (`embedding`) USING ANN PROPERTIES(
Expand All @@ -66,23 +66,23 @@ suite ("ann_index_basic") {
"""

qt_sql_ip_insert """
INSERT INTO tbl_ann_ip VALUES
INSERT INTO basic_tbl_ann_ip VALUES
(1, [0.1, 0.2, 0.3, 0.4]),
(2, [0.5, 0.6, 0.7, 0.8]),
(3, [1.0, 1.0, 1.0, 1.0]);
"""

// Query: inner product descending (higher score first)
qt_sql_ip_query "select id from tbl_ann_ip order by inner_product_approximate(embedding, [0.1,0.2,0.3,0.4]) desc limit 3;"
qt_sql_ip_query "select id from basic_tbl_ann_ip order by inner_product_approximate(embedding, [0.1,0.2,0.3,0.4]) desc limit 3;"

// 3) Simple threshold filter using l2_distance_approximate
qt_sql_l2_threshold "select id from tbl_ann_l2 where l2_distance_approximate(embedding, [1.0,2.0,3.0]) < 5.0 order by id;"
qt_sql_l2_threshold "select id from basic_tbl_ann_l2 where l2_distance_approximate(embedding, [1.0,2.0,3.0]) < 5.0 order by id;"

// 4) Descending l2 order (should exercise path where Desc topn for l2/cosine cannot be evaluated by ann index)
qt_sql_l2_desc "select id from tbl_ann_l2 order by l2_distance_approximate(embedding, [1.0,2.0,3.0]) desc limit 2;"
qt_sql_l2_desc "select id from basic_tbl_ann_l2 order by l2_distance_approximate(embedding, [1.0,2.0,3.0]) desc limit 2;"

// 5) Ascending inner_product order (should exercise path where Asc topn for inner product cannot be evaluated by ann index)
qt_sql_ip_asc "select id from tbl_ann_ip order by inner_product_approximate(embedding, [0.1,0.2,0.3,0.4]) asc limit 2;"
qt_sql_ip_asc "select id from basic_tbl_ann_ip order by inner_product_approximate(embedding, [0.1,0.2,0.3,0.4]) asc limit 2;"

// 6) Large table to exercise predicate-input-ratio check (create many rows and run topn with small-range predicate)
sql "drop table if exists tbl_ann_l2_large"
Expand Down
31 changes: 15 additions & 16 deletions regression-test/suites/ann_index_p0/ivf_index_test.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@ suite ("ivf_index_test") {
sql "set enable_common_expr_pushdown=true;"

// IVF index
sql "drop table if exists tbl_ann_l2"
sql "drop table if exists ivf_tbl_ann_l2"
sql """
CREATE TABLE tbl_ann_l2 (
CREATE TABLE ivf_tbl_ann_l2 (
id INT NOT NULL,
embedding ARRAY<FLOAT> NOT NULL,
INDEX idx_emb (`embedding`) USING ANN PROPERTIES(
Expand All @@ -37,23 +37,22 @@ suite ("ivf_index_test") {
"""

sql """
INSERT INTO tbl_ann_l2 VALUES
INSERT INTO ivf_tbl_ann_l2 VALUES
(1, [1.0, 2.0, 3.0]),
(2, [0.5, 2.1, 2.9]),
(3, [10.0, 10.0, 10.0]),
(4, [20.0, 20.0, 20.0]),
(5, [50.0, 20.0, 20.0]),
(6, [60.0, 20.0, 20.0]);
"""
qt_sql "select * from tbl_ann_l2;"
// just approximate search
sql "select id, l2_distance_approximate(embedding, [1.0,2.0,3.0]) as dist from tbl_ann_l2 order by dist limit 2;"
qt_sql "select * from ivf_tbl_ann_l2 order by id;"
qt_sql_l2_topn "select id from ivf_tbl_ann_l2 order by l2_distance_approximate(embedding, [1.0,2.0,3.0]) limit 2;"

sql """drop table if exists tbl_ann_l2"""
sql """drop table if exists ivf_tbl_ann_l2"""
test {
// missing nlist
sql """
CREATE TABLE tbl_ann_l2 (
CREATE TABLE ivf_tbl_ann_l2 (
id INT NOT NULL,
embedding ARRAY<FLOAT> NOT NULL,
INDEX idx_emb (`embedding`) USING ANN PROPERTIES(
Expand All @@ -70,7 +69,7 @@ suite ("ivf_index_test") {
}

sql """
CREATE TABLE tbl_ann_l2 (
CREATE TABLE ivf_tbl_ann_l2 (
id INT NOT NULL,
embedding ARRAY<FLOAT> NOT NULL,
INDEX idx_emb (`embedding`) USING ANN PROPERTIES(
Expand All @@ -86,14 +85,15 @@ suite ("ivf_index_test") {
"""
// Not enough training points: should not throw exception anymore, just skip index building.
sql """
INSERT INTO tbl_ann_l2 VALUES
INSERT INTO ivf_tbl_ann_l2 VALUES
(1, [1.0, 2.0, 3.0]),
(2, [0.5, 2.1, 2.9]);
"""
qt_sql_l2_insufficient_train_rows "select id from ivf_tbl_ann_l2 order by l2_distance_approximate(embedding, [1.0,2.0,3.0]) limit 2;"

sql "drop table if exists tbl_ann_ip"
sql "drop table if exists ivf_tbl_ann_ip"
sql """
CREATE TABLE tbl_ann_ip (
CREATE TABLE ivf_tbl_ann_ip (
id INT NOT NULL,
embedding ARRAY<FLOAT> NOT NULL,
INDEX idx_emb (`embedding`) USING ANN PROPERTIES(
Expand All @@ -109,15 +109,14 @@ suite ("ivf_index_test") {
"""

sql """
INSERT INTO tbl_ann_ip VALUES
INSERT INTO ivf_tbl_ann_ip VALUES
(1, [1.0, 2.0, 3.0]),
(2, [0.5, 2.1, 2.9]),
(3, [10.0, 10.0, 10.0]),
(4, [20.0, 20.0, 20.0]),
(5, [50.0, 20.0, 20.0]),
(6, [60.0, 20.0, 20.0]);
"""
qt_sql "select * from tbl_ann_ip;"
// just approximate search
sql "select id, inner_product_approximate(embedding, [1.0,2.0,3.0]) as dist from tbl_ann_ip order by dist desc limit 2;"
qt_sql "select * from ivf_tbl_ann_ip order by id;"
qt_sql_ip_topn "select id from ivf_tbl_ann_ip order by inner_product_approximate(embedding, [1.0,2.0,3.0]) desc limit 2;"
}
18 changes: 8 additions & 10 deletions regression-test/suites/ann_index_p0/ivf_on_disk_index_test.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,8 @@ suite ("ivf_on_disk_index_test") {
(5, [50.0, 20.0, 20.0]),
(6, [60.0, 20.0, 20.0]);
"""
qt_sql "select * from tbl_ivf_on_disk_l2;"
// approximate search with l2_distance
sql "select id, l2_distance_approximate(embedding, [1.0,2.0,3.0]) as dist from tbl_ivf_on_disk_l2 order by dist limit 2;"
qt_sql "select * from tbl_ivf_on_disk_l2 order by id;"
qt_sql_l2_topn "select id from tbl_ivf_on_disk_l2 order by l2_distance_approximate(embedding, [1.0,2.0,3.0]) limit 2;"

// ========== Error: missing nlist for ivf_on_disk ==========
sql "drop table if exists tbl_ivf_on_disk_l2"
Expand Down Expand Up @@ -121,9 +120,8 @@ suite ("ivf_on_disk_index_test") {
(5, [50.0, 20.0, 20.0]),
(6, [60.0, 20.0, 20.0]);
"""
qt_sql "select * from tbl_ivf_on_disk_ip;"
// approximate search with inner_product
sql "select id, inner_product_approximate(embedding, [1.0,2.0,3.0]) as dist from tbl_ivf_on_disk_ip order by dist desc limit 2;"
qt_sql "select * from tbl_ivf_on_disk_ip order by id;"
qt_sql_ip_topn "select id from tbl_ivf_on_disk_ip order by inner_product_approximate(embedding, [1.0,2.0,3.0]) desc limit 2;"

// ========== IVF_ON_DISK with stream load ==========
sql "drop table if exists tbl_ivf_on_disk_stream_load"
Expand Down Expand Up @@ -163,6 +161,8 @@ suite ("ivf_on_disk_index_test") {
assertEquals(0, json.NumberFilteredRows)
}
}
qt_sql_stream_load_rows "select * from tbl_ivf_on_disk_stream_load order by id;"
qt_sql_stream_load_topn "select id from tbl_ivf_on_disk_stream_load order by l2_distance_approximate(embedding, [1.0,2.0,3.0]) limit 2;"

// ========== IVF_ON_DISK with larger dataset (more rows than nlist) ==========
sql "drop table if exists tbl_ivf_on_disk_large"
Expand Down Expand Up @@ -196,8 +196,7 @@ suite ("ivf_on_disk_index_test") {
(10, [30.0, 30.0, 30.0]);
"""
qt_sql "select * from tbl_ivf_on_disk_large order by id;"
// approximate search on larger dataset
sql "select id, l2_distance_approximate(embedding, [1.0,2.0,3.0]) as dist from tbl_ivf_on_disk_large order by dist limit 3;"
qt_sql_large_topn "select id from tbl_ivf_on_disk_large order by l2_distance_approximate(embedding, [1.0,2.0,3.0]) limit 3;"

// ========== IVF_ON_DISK range search with l2_distance ==========
sql "drop table if exists tbl_ivf_on_disk_range"
Expand Down Expand Up @@ -226,6 +225,5 @@ suite ("ivf_on_disk_index_test") {
(5, [50.0, 20.0, 20.0]),
(6, [60.0, 20.0, 20.0]);
"""
// range search: find vectors within distance threshold
sql "select id from tbl_ivf_on_disk_range where l2_distance_approximate(embedding, [1.0, 2.0, 3.0]) < 20.0 order by id;"
qt_sql_range_search "select id from tbl_ivf_on_disk_range where l2_distance_approximate(embedding, [1.0, 2.0, 3.0]) < 20.0 order by id;"
}
Loading