Skip to content

Commit

Permalink
Merge pull request #6921 from RinChanNOWWW/MIGREATE-TRIM
Browse files Browse the repository at this point in the history
feat(functions): migrate trim functions to new expression framework.
  • Loading branch information
mergify[bot] committed Aug 2, 2022
2 parents 5d57068 + 6867ae6 commit 223ad77
Show file tree
Hide file tree
Showing 4 changed files with 1,298 additions and 1 deletion.
165 changes: 165 additions & 0 deletions common/functions-v2/src/scalars/string.rs
Expand Up @@ -193,6 +193,125 @@ pub fn register(registry: &mut FunctionRegistry) {
},
|val| val.first().cloned().unwrap_or_default(),
);

// Trim functions
registry.register_passthrough_nullable_1_arg::<StringType, StringType, _, _>(
"ltrim",
FunctionProperty::default(),
|_| None,
vectorize_string_to_string(
|col| col.data.len(),
|val, writer| {
let pos = val.iter().position(|ch| *ch != b' ' && *ch != b'\t');
if let Some(idx) = pos {
writer.put_slice(&val.as_bytes()[idx..]);
}
writer.commit_row();
Ok(())
},
),
);

registry.register_passthrough_nullable_1_arg::<StringType, StringType, _, _>(
"rtrim",
FunctionProperty::default(),
|_| None,
vectorize_string_to_string(
|col| col.data.len(),
|val, writer| {
let pos = val.iter().rev().position(|ch| *ch != b' ' && *ch != b'\t');
if let Some(idx) = pos {
writer.put_slice(&val.as_bytes()[..val.len() - idx]);
}
writer.commit_row();
Ok(())
},
),
);

registry.register_passthrough_nullable_1_arg::<StringType, StringType, _, _>(
"trim",
FunctionProperty::default(),
|_| None,
vectorize_string_to_string(
|col| col.data.len(),
|val, writer| {
let start_pos = val.iter().position(|ch| *ch != b' ' && *ch != b'\t');
let end_pos = val.iter().rev().position(|ch| *ch != b' ' && *ch != b'\t');
if let (Some(start_idx), Some(end_idx)) = (start_pos, end_pos) {
writer.put_slice(&val.as_bytes()[start_idx..val.len() - end_idx]);
}
writer.commit_row();
Ok(())
},
),
);

registry.register_passthrough_nullable_2_arg::<StringType, StringType, StringType, _, _>(
"trim_leading",
FunctionProperty::default(),
|_, _| None,
vectorize_string_to_string_2_arg(
|col, _| col.data.len(),
|val, trim_str, writer| {
let chunk_size = trim_str.len();
let pos = val.chunks(chunk_size).position(|chunk| chunk != trim_str);
if let Some(idx) = pos {
writer.put_slice(&val.as_bytes()[idx * chunk_size..]);
}
writer.commit_row();
Ok(())
},
),
);

registry.register_passthrough_nullable_2_arg::<StringType, StringType, StringType, _, _>(
"trim_trailing",
FunctionProperty::default(),
|_, _| None,
vectorize_string_to_string_2_arg(
|col, _| col.data.len(),
|val, trim_str, writer| {
let chunk_size = trim_str.len();
let pos = val.rchunks(chunk_size).position(|chunk| chunk != trim_str);
if let Some(idx) = pos {
writer.put_slice(&val.as_bytes()[..val.len() - idx * chunk_size]);
}
writer.commit_row();
Ok(())
},
),
);

registry.register_passthrough_nullable_2_arg::<StringType, StringType, StringType, _, _>(
"trim_both",
FunctionProperty::default(),
|_, _| None,
vectorize_string_to_string_2_arg(
|col, _| col.data.len(),
|val, trim_str, writer| {
let chunk_size = trim_str.len();
let start_pos = val.chunks(chunk_size).position(|chunk| chunk != trim_str);

// Trim all
if start_pos.is_none() {
writer.commit_row();
return Ok(());
}

let end_pos = val.rchunks(chunk_size).position(|chunk| chunk != trim_str);

if let (Some(start_idx), Some(end_idx)) = (start_pos, end_pos) {
writer.put_slice(
&val.as_bytes()[start_idx * chunk_size..val.len() - end_idx * chunk_size],
);
}

writer.commit_row();
Ok(())
},
),
);
}

// Vectorize string to string function with customer estimate_bytes.
Expand All @@ -217,3 +336,49 @@ fn vectorize_string_to_string(
}
}
}

// Vectorize (string, string) -> string function with customer estimate_bytes.
fn vectorize_string_to_string_2_arg(
estimate_bytes: impl Fn(&StringColumn, &StringColumn) -> usize + Copy,
func: impl Fn(&[u8], &[u8], &mut StringColumnBuilder) -> Result<(), String> + Copy,
) -> impl Fn(
ValueRef<StringType>,
ValueRef<StringType>,
&GenericMap,
) -> Result<Value<StringType>, String>
+ Copy {
move |arg1, arg2, _| match (arg1, arg2) {
(ValueRef::Scalar(arg1), ValueRef::Scalar(arg2)) => {
let mut builder = StringColumnBuilder::with_capacity(1, 0);
func(arg1, arg2, &mut builder)?;
Ok(Value::Scalar(builder.build_scalar()))
}
(ValueRef::Scalar(arg1), ValueRef::Column(arg2)) => {
let data_capacity =
estimate_bytes(&StringColumnBuilder::repeat(arg1, 1).build(), &arg2);
let mut builder = StringColumnBuilder::with_capacity(arg2.len(), data_capacity);
for val in arg2.iter() {
func(arg1, val, &mut builder)?;
}
Ok(Value::Column(builder.build()))
}
(ValueRef::Column(arg1), ValueRef::Scalar(arg2)) => {
let data_capacity =
estimate_bytes(&arg1, &StringColumnBuilder::repeat(arg2, 1).build());
let mut builder = StringColumnBuilder::with_capacity(arg1.len(), data_capacity);
for val in arg1.iter() {
func(val, arg2, &mut builder)?;
}
Ok(Value::Column(builder.build()))
}
(ValueRef::Column(arg1), ValueRef::Column(arg2)) => {
let data_capacity = estimate_bytes(&arg1, &arg2);
let mut builder = StringColumnBuilder::with_capacity(arg1.len(), data_capacity);
let iter = arg1.iter().zip(arg2.iter());
for (val1, val2) in iter {
func(val1, val2, &mut builder)?;
}
Ok(Value::Column(builder.build()))
}
}
}
44 changes: 44 additions & 0 deletions common/functions-v2/tests/it/scalars/parser.rs
Expand Up @@ -112,6 +112,50 @@ pub fn transform_expr(ast: common_ast::ast::Expr, columns: &[(&str, DataType)])
transform_expr(*right, columns),
],
},
common_ast::ast::Expr::Trim {
span,
expr,
trim_where,
} => {
if let Some(inner) = trim_where {
match inner.0 {
common_ast::ast::TrimWhere::Both => RawExpr::FunctionCall {
span: transform_span(span),
name: "trim_both".to_string(),
params: vec![],
args: vec![
transform_expr(*expr, columns),
transform_expr(*inner.1, columns),
],
},
common_ast::ast::TrimWhere::Leading => RawExpr::FunctionCall {
span: transform_span(span),
name: "trim_leading".to_string(),
params: vec![],
args: vec![
transform_expr(*expr, columns),
transform_expr(*inner.1, columns),
],
},
common_ast::ast::TrimWhere::Trailing => RawExpr::FunctionCall {
span: transform_span(span),
name: "trim_trailing".to_string(),
params: vec![],
args: vec![
transform_expr(*expr, columns),
transform_expr(*inner.1, columns),
],
},
}
} else {
RawExpr::FunctionCall {
span: transform_span(span),
name: "trim".to_string(),
params: vec![],
args: vec![transform_expr(*expr, columns)],
}
}
}
_ => unimplemented!(),
}
}
Expand Down

1 comment on commit 223ad77

@vercel
Copy link

@vercel vercel bot commented on 223ad77 Aug 2, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Successfully deployed to the following URLs:

databend – ./

databend-git-main-databend.vercel.app
databend.rs
databend-databend.vercel.app
databend.vercel.app

Please sign in to comment.