From ef69f3c12fe1a4705d2c3eb1767c57ed700760b2 Mon Sep 17 00:00:00 2001 From: SubhamSinghal Date: Sat, 13 Sep 2025 22:10:29 +0530 Subject: [PATCH 1/2] Adds micro-benchmark queries for existence joins --- benchmarks/src/nlj.rs | 81 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) diff --git a/benchmarks/src/nlj.rs b/benchmarks/src/nlj.rs index e412c0ade8a8..6909d082e850 100644 --- a/benchmarks/src/nlj.rs +++ b/benchmarks/src/nlj.rs @@ -146,6 +146,87 @@ const NLJ_QUERIES: &[&str] = &[ FULL JOIN range(30000) AS t2 ON (t1.value > t2.value); "#, + // Q13: SEMI JOIN 10K x 10K | LOW 0.1% + r#" + SELECT * + FROM range(10000) AS t1 + WHERE EXISTS ( + SELECT 1 + FROM range(10000) AS t2 + WHERE t1.id = t2.id AND t2.id % 10000 = 0 + ) + "#, + // Q14: SEMI JOIN 10K x 10K | Medium 20% + r#" + SELECT * + FROM range(10000) AS t1 + WHERE EXISTS ( + SELECT 1 + FROM range(10000) AS t2 + WHERE t1.id = t2.id AND t2.id % 5 = 0 + ) + "#, + // Q15: SEMI JOIN 10K x 10K | High 90% + r#" + SELECT * + FROM range(10000) AS t1 + WHERE EXISTS ( + SELECT 1 + FROM range(10000) AS t2 + WHERE t1.id = t2.id AND t2.id % 10 <> 0 + ) + "#, + // Q16: ANTI JOIN 10K * 10K | LOW 0.1% + r#" + SELECT * + FROM range(10000) AS t1 + WHERE NOT EXISTS ( + SELECT 1 FROM range(10000) AS t2 WHERE t1.id = t2.id + ) + "#, + // Q17: ANTI JOIN 10K * 10K | MEDIUM 20% + r#" + SELECT * + FROM range(10000) AS t1 + WHERE NOT EXISTS ( + SELECT 1 FROM range(10000) AS t2 WHERE t1.id = t2.id AND t2.id % 5 = 0 + ) + "#, + // Q18: ANTI JOIN 10K * 10K | HIGH 90% + r#" + SELECT * + FROM range(10000) AS t1 + WHERE NOT EXISTS ( + SELECT 1 FROM range(10000) AS t2 WHERE t1.id = t2.id AND t2.id % 10 = 0 + ) + "#, + // Q19: Mark joins 10K * 10K | LOW 0.1% + r#" + SELECT + t1.id, + EXISTS ( + SELECT 1 FROM range(10000) AS t2 WHERE t1.id = t2.id + ) AS has_match + FROM range(10000) AS t1 + "#, + // Q20: Mark joins 10K * 10K | MEDIUM 20% + r#" + SELECT + t1.id, + EXISTS ( + SELECT 1 FROM range(10000) AS t2 WHERE t1.id = t2.id AND t2.id % 5 = 0 + ) AS has_match + FROM range(10000) AS t1 + "#, + // Q21: Mark joins 10K * 10K | HIGH 90% + r#" + SELECT + t1.id, + EXISTS ( + SELECT 1 FROM range(10000) AS t2 WHERE t1.id = t2.id AND t2.id % 10 != 0 + ) AS has_match + FROM range(10000) AS t1 + "# ]; impl RunOpt { From e656917b55667535b9a695ddbf57ba7f68f5dae3 Mon Sep 17 00:00:00 2001 From: Subham Singhal Date: Mon, 15 Sep 2025 11:52:25 +0530 Subject: [PATCH 2/2] Update nlj.rs --- benchmarks/src/nlj.rs | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/benchmarks/src/nlj.rs b/benchmarks/src/nlj.rs index 6909d082e850..e72a56555938 100644 --- a/benchmarks/src/nlj.rs +++ b/benchmarks/src/nlj.rs @@ -153,7 +153,7 @@ const NLJ_QUERIES: &[&str] = &[ WHERE EXISTS ( SELECT 1 FROM range(10000) AS t2 - WHERE t1.id = t2.id AND t2.id % 10000 = 0 + WHERE t1.id > t2.id AND t2.id = 0 ) "#, // Q14: SEMI JOIN 10K x 10K | Medium 20% @@ -163,7 +163,7 @@ const NLJ_QUERIES: &[&str] = &[ WHERE EXISTS ( SELECT 1 FROM range(10000) AS t2 - WHERE t1.id = t2.id AND t2.id % 5 = 0 + WHERE t1.id > t2.id AND t2.id % 5 = 0 ) "#, // Q15: SEMI JOIN 10K x 10K | High 90% @@ -173,7 +173,7 @@ const NLJ_QUERIES: &[&str] = &[ WHERE EXISTS ( SELECT 1 FROM range(10000) AS t2 - WHERE t1.id = t2.id AND t2.id % 10 <> 0 + WHERE t1.id > t2.id AND t2.id % 10 <> 0 ) "#, // Q16: ANTI JOIN 10K * 10K | LOW 0.1% @@ -181,7 +181,8 @@ const NLJ_QUERIES: &[&str] = &[ SELECT * FROM range(10000) AS t1 WHERE NOT EXISTS ( - SELECT 1 FROM range(10000) AS t2 WHERE t1.id = t2.id + SELECT 1 FROM range(10000) AS t2 + WHERE t1.id < t2.id AND t2.id % 10000 = 0 ) "#, // Q17: ANTI JOIN 10K * 10K | MEDIUM 20% @@ -189,7 +190,8 @@ const NLJ_QUERIES: &[&str] = &[ SELECT * FROM range(10000) AS t1 WHERE NOT EXISTS ( - SELECT 1 FROM range(10000) AS t2 WHERE t1.id = t2.id AND t2.id % 5 = 0 + SELECT 1 FROM range(10000) AS t2 + WHERE t1.id > t2.id AND t2.id % 5 = 0 ) "#, // Q18: ANTI JOIN 10K * 10K | HIGH 90% @@ -197,7 +199,8 @@ const NLJ_QUERIES: &[&str] = &[ SELECT * FROM range(10000) AS t1 WHERE NOT EXISTS ( - SELECT 1 FROM range(10000) AS t2 WHERE t1.id = t2.id AND t2.id % 10 = 0 + SELECT 1 FROM range(10000) AS t2 + WHERE t1.id <> t2.id AND t2.id % 10 <> 0 ) "#, // Q19: Mark joins 10K * 10K | LOW 0.1% @@ -205,7 +208,8 @@ const NLJ_QUERIES: &[&str] = &[ SELECT t1.id, EXISTS ( - SELECT 1 FROM range(10000) AS t2 WHERE t1.id = t2.id + SELECT 1 FROM range(10000) AS t2 + WHERE t1.id < t2.id AND t2.id % 1000 = 0 ) AS has_match FROM range(10000) AS t1 "#, @@ -214,7 +218,8 @@ const NLJ_QUERIES: &[&str] = &[ SELECT t1.id, EXISTS ( - SELECT 1 FROM range(10000) AS t2 WHERE t1.id = t2.id AND t2.id % 5 = 0 + SELECT 1 FROM range(10000) AS t2 + WHERE t1.id % 5 <> t2.id % 5 AND t2.id % 5 = 0 ) AS has_match FROM range(10000) AS t1 "#, @@ -223,7 +228,8 @@ const NLJ_QUERIES: &[&str] = &[ SELECT t1.id, EXISTS ( - SELECT 1 FROM range(10000) AS t2 WHERE t1.id = t2.id AND t2.id % 10 != 0 + SELECT 1 FROM range(10000) AS t2 + WHERE t1.id % 10 <> t2.id % 10 AND t2.id % 10 != 0 ) AS has_match FROM range(10000) AS t1 "#