Skip to content

Commit

Permalink
feat(cubesql): Flatten IN lists expressions to improve performance (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
Nikita-str committed May 14, 2024
1 parent 2c56be6 commit 66aa01d
Show file tree
Hide file tree
Showing 7 changed files with 185 additions and 135 deletions.
77 changes: 76 additions & 1 deletion rust/cubesql/cubesql/benches/benchmarks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use criterion::{criterion_group, criterion_main, Criterion};
use cubesql::compile::test::rewrite_engine::{
cube_context, query_to_logical_plan, rewrite_rules, rewrite_runner,
};
use itertools::Itertools;
use std::sync::Arc;

macro_rules! bench_func {
Expand Down Expand Up @@ -348,9 +349,83 @@ pub fn power_bi_sum_wrap(c: &mut Criterion) {
bench_func!("power_bi_sum_wrap", get_power_bi_sum_wrap(), c);
}

fn get_simple_long_in_expr() -> String {
const N: usize = 50;
let set = (1..=N).join(", ");
format!("SELECT * FROM NumberCube WHERE someNumber IN ({set})")
}

pub fn long_simple_in_expr(c: &mut Criterion) {
std::env::set_var("CUBESQL_SQL_PUSH_DOWN", "true");
bench_func!("long_simple_in_expr", get_simple_long_in_expr(), c);
}

fn get_long_in_expr() -> String {
r#"
SELECT
"WideCube"."dim1" as "column1",
"WideCube"."dim2" as "column2",
"WideCube"."dim3" as "column3",
"WideCube"."dim4" as "column4",
"WideCube"."dim5" as "column5",
"WideCube"."dim6" as "column6",
"WideCube"."dim7" as "column7",
"WideCube"."dim8" as "column8",
"WideCube"."dim9" as "column9",
"WideCube"."dim10" as "column10",
"WideCube"."dim11" as "column11",
"WideCube"."dim12" as "column12",
"WideCube"."dim13" as "column13",
"WideCube"."dim14" as "column14",
"WideCube"."dim15" as "column15",
SUM("WideCube"."dim16") as "some_sum"
FROM
"WideCube"
WHERE
"WideCube"."dim1" = 1
AND "WideCube"."dim2" = 2
AND "WideCube"."dim3" = 3
AND "WideCube"."dim4" = 4
AND "WideCube"."dim5" = 5
AND "WideCube"."dim6" = 6
AND "WideCube"."dim7" = 7
AND "WideCube"."dim8" = 8
AND "WideCube"."dim9" = 9
AND "WideCube"."dim10" = 10
AND ("WideCube"."dim11" = 42 OR "WideCube"."dim11" IS NULL)
AND (
"WideCube"."dim12" IN (
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50
) OR "WideCube"."dim12" IS NULL
) AND "WideCube"."dim20" = 55
GROUP BY
"WideCube"."dim1",
"WideCube"."dim2",
"WideCube"."dim3",
"WideCube"."dim4",
"WideCube"."dim5",
"WideCube"."dim6",
"WideCube"."dim7",
"WideCube"."dim8",
"WideCube"."dim9",
"WideCube"."dim10",
"WideCube"."dim11",
"WideCube"."dim12",
"WideCube"."dim13",
"WideCube"."dim14",
"WideCube"."dim15"
"#.into()
}

pub fn long_in_expr(c: &mut Criterion) {
std::env::set_var("CUBESQL_SQL_PUSH_DOWN", "true");
bench_func!("long_in_expr", get_long_in_expr(), c);
}

criterion_group! {
name = benches;
config = Criterion::default().measurement_time(std::time::Duration::from_secs(15)).sample_size(10);
targets = split_query, split_query_count_distinct, wrapped_query, power_bi_wrap, power_bi_sum_wrap
targets = split_query, split_query_count_distinct, wrapped_query, power_bi_wrap, power_bi_sum_wrap, long_in_expr, long_simple_in_expr
}
criterion_main!(benches);
131 changes: 49 additions & 82 deletions rust/cubesql/cubesql/src/compile/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19422,6 +19422,9 @@ ORDER BY "source"."str0" ASC

#[tokio::test]
async fn test_thoughtspot_where_binary_in_true_false() {
if !Rewriter::sql_push_down_enabled() {
return;
}
init_logger();

let logical_plan = convert_select_to_query_plan(
Expand Down Expand Up @@ -19453,88 +19456,17 @@ ORDER BY "source"."str0" ASC
.await
.as_logical_plan();

assert_eq!(
logical_plan.find_cube_scan().request,
V1LoadRequestQuery {
measures: Some(vec!["KibanaSampleDataEcommerce.count".to_string()]),
dimensions: Some(vec!["KibanaSampleDataEcommerce.customer_gender".to_string()]),
segments: Some(vec![]),
time_dimensions: None,
order: None,
limit: None,
offset: None,
filters: Some(vec![
V1LoadRequestQueryFilterItem {
member: None,
operator: None,
values: None,
or: Some(vec![
json!(V1LoadRequestQueryFilterItem {
member: None,
operator: None,
values: None,
or: None,
and: Some(vec![
json!(V1LoadRequestQueryFilterItem {
member: Some(
"KibanaSampleDataEcommerce.customer_gender".to_string()
),
operator: Some("startsWith".to_string()),
values: Some(vec!["female".to_string()]),
or: None,
and: None,
}),
json!(V1LoadRequestQueryFilterItem {
member: Some(
"KibanaSampleDataEcommerce.customer_gender".to_string()
),
operator: Some("endsWith".to_string()),
values: Some(vec!["female".to_string()]),
or: None,
and: None,
}),
]),
}),
json!(V1LoadRequestQueryFilterItem {
member: None,
operator: None,
values: None,
or: None,
and: Some(vec![
json!(V1LoadRequestQueryFilterItem {
member: Some(
"KibanaSampleDataEcommerce.customer_gender".to_string()
),
operator: Some("startsWith".to_string()),
values: Some(vec!["male".to_string()]),
or: None,
and: None,
}),
json!(V1LoadRequestQueryFilterItem {
member: Some(
"KibanaSampleDataEcommerce.customer_gender".to_string()
),
operator: Some("endsWith".to_string()),
values: Some(vec!["male".to_string()]),
or: None,
and: None,
}),
]),
}),
]),
and: None,
},
V1LoadRequestQueryFilterItem {
member: Some("KibanaSampleDataEcommerce.customer_gender".to_string()),
operator: Some("set".to_string()),
values: None,
or: None,
and: None,
},
]),
ungrouped: None,
}
)
// check if contains `(LOWER(..) = .. OR ..LOWER(..) = ..) IN (TRUE, FALSE)`
let re = Regex::new(r"\(LOWER ?\(.+\) = .+ OR .+LOWER ?\(.+\) = .+\) IN \(TRUE, FALSE\)")
.unwrap();

let sql = logical_plan
.find_cube_scan_wrapper()
.wrapped_sql
.unwrap()
.sql;

assert!(re.is_match(&sql));
}

#[tokio::test]
Expand Down Expand Up @@ -22901,4 +22833,39 @@ LIMIT {{ limit }}{% endif %}"#.to_string(),
displayable(physical_plan.as_ref()).indent()
);
}

#[tokio::test]
async fn test_long_in_expr() {
if !Rewriter::sql_push_down_enabled() {
return;
}

const N: usize = 50;
let set = (1..=N).join(", ");

let query = format!("SELECT * FROM NumberCube WHERE someNumber IN ({set})");
let query_plan = convert_select_to_query_plan(query, DatabaseProtocol::PostgreSQL).await;
let logical_plan = query_plan.as_logical_plan();

assert_eq!(
logical_plan.find_cube_scan().request,
V1LoadRequestQuery {
measures: Some(vec!["NumberCube.someNumber".into()]),
dimensions: Some(vec![]),
segments: Some(vec![]),
time_dimensions: None,
order: None,
limit: None,
offset: None,
filters: Some(vec![V1LoadRequestQueryFilterItem {
member: Some("NumberCube.someNumber".into()),
operator: Some("equals".into()),
values: Some((1..=N).map(|x| x.to_string()).collect()),
or: None,
and: None
}]),
ungrouped: Some(true),
}
);
}
}
12 changes: 11 additions & 1 deletion rust/cubesql/cubesql/src/compile/rewrite/converter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,16 @@ macro_rules! add_expr_list_node {
}};
}

macro_rules! add_expr_flat_list_node {
($graph:expr, $value_expr:expr, $query_params:expr, $field_variant:ident) => {{
let list = $value_expr
.iter()
.map(|expr| Self::add_expr_replace_params($graph, expr, $query_params))
.collect::<Result<Vec<_>, _>>()?;
$graph.add(LogicalPlanLanguage::$field_variant(list))
}};
}

macro_rules! add_binary_expr_list_node {
($graph:expr, $value_expr:expr, $query_params:expr, $field_variant:ident) => {{
fn to_binary_tree(
Expand Down Expand Up @@ -421,7 +431,7 @@ impl LogicalPlanToLanguageConverter {
negated,
} => {
let expr = Self::add_expr_replace_params(graph, expr, query_params)?;
let list = add_expr_list_node!(graph, list, query_params, InListExprList);
let list = add_expr_flat_list_node!(graph, list, query_params, InListExprList);
let negated = add_expr_data_node!(graph, negated, InListExprNegated);
graph.add(LogicalPlanLanguage::InListExpr([expr, list, negated]))
}
Expand Down
7 changes: 7 additions & 0 deletions rust/cubesql/cubesql/src/compile/rewrite/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -735,6 +735,13 @@ fn list_expr(list_type: impl Display, list: Vec<impl Display>) -> String {
current
}

#[allow(unused)]
fn flat_list_expr(list_type: impl Display, list: Vec<impl Display>) -> String {
use itertools::Itertools;
let list = list.iter().join(" ");
format!("({list_type} {list})")
}

fn udf_expr(fun_name: impl Display, args: Vec<impl Display>) -> String {
udf_expr_var_arg(fun_name, list_expr("ScalarUDFExprArgs", args))
}
Expand Down
52 changes: 1 addition & 51 deletions rust/cubesql/cubesql/src/compile/rewrite/rules/filters.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use crate::{
column_expr, cube_scan, cube_scan_filters, cube_scan_filters_empty_tail, cube_scan_members,
dimension_expr, expr_column_name, filter, filter_member, filter_op, filter_op_filters,
filter_op_filters_empty_tail, filter_replacer, filter_simplify_replacer, fun_expr,
fun_expr_var_arg, inlist_expr, is_not_null_expr, is_null_expr, like_expr, limit, list_expr,
fun_expr_var_arg, inlist_expr, is_not_null_expr, is_null_expr, like_expr, limit,
literal_bool, literal_expr, literal_int, literal_string, measure_expr,
member_name_by_alias, negative_expr, not_expr, projection, rewrite,
rewriter::RewriteRules,
Expand Down Expand Up @@ -1674,56 +1674,6 @@ impl RewriteRules for FilterRules {
"?filter_aliases",
),
),
rewrite(
"filter-thoughtspot-lower-in-true-false",
filter_replacer(
inlist_expr(
binary_expr(
binary_expr(
fun_expr("Lower", vec![column_expr("?column")]),
"=",
literal_expr("?left_literal"),
),
"OR",
binary_expr(
fun_expr("Lower", vec![column_expr("?column")]),
"=",
literal_expr("?right_literal"),
),
),
list_expr(
"InListExprList",
vec![literal_bool(true), literal_bool(false)],
),
"InListExprNegated:false",
),
"?alias_to_cube",
"?members",
"?filter_aliases",
),
filter_replacer(
binary_expr(
binary_expr(
binary_expr(
fun_expr("Lower", vec![column_expr("?column")]),
"=",
literal_expr("?left_literal"),
),
"OR",
binary_expr(
fun_expr("Lower", vec![column_expr("?column")]),
"=",
literal_expr("?right_literal"),
),
),
"AND",
is_not_null_expr(column_expr("?column")),
),
"?alias_to_cube",
"?members",
"?filter_aliases",
),
),
transforming_rewrite(
"extract-year-equals",
filter_replacer(
Expand Down

0 comments on commit 66aa01d

Please sign in to comment.