Skip to content

Commit

Permalink
copr: Add vectorized FromBase64 (tikv#7767)
Browse files Browse the repository at this point in the history
Signed-off-by: Alex Chi <iskyzh@gmail.com>
  • Loading branch information
skyzh committed May 11, 2020
1 parent 817445d commit 5b3b1e0
Show file tree
Hide file tree
Showing 4 changed files with 79 additions and 10 deletions.
11 changes: 2 additions & 9 deletions components/tidb_query_normal_expr/src/builtin_string.rs
Expand Up @@ -11,8 +11,8 @@ use hex::{self, FromHex};
use tidb_query_datatype::prelude::*;
use tidb_query_shared_expr::conv::i64_to_usize;
use tidb_query_shared_expr::string::{
encoded_size, line_wrap, validate_target_len_for_pad, BASE64_ENCODED_CHUNK_LENGTH,
BASE64_INPUT_CHUNK_LENGTH,
encoded_size, line_wrap, strip_whitespace, validate_target_len_for_pad,
BASE64_ENCODED_CHUNK_LENGTH, BASE64_INPUT_CHUNK_LENGTH,
};
use tikv_util::try_opt_or;

Expand Down Expand Up @@ -1020,13 +1020,6 @@ impl ScalarFunc {
}
}

#[inline]
fn strip_whitespace(input: &[u8]) -> Vec<u8> {
let mut input_copy = Vec::<u8>::with_capacity(input.len());
input_copy.extend(input.iter().filter(|b| !b" \n\t\r\x0b\x0c".contains(b)));
input_copy
}

#[inline]
fn substring_index_positive(s: &str, delim: &str, count: usize) -> String {
let mut bg = 0;
Expand Down
7 changes: 7 additions & 0 deletions components/tidb_query_shared_expr/src/string.rs
Expand Up @@ -79,6 +79,13 @@ pub fn line_wrap(buf: &mut [u8], input_len: usize) {
}
}

#[inline]
pub fn strip_whitespace(input: &[u8]) -> Vec<u8> {
let mut input_copy = Vec::<u8>::with_capacity(input.len());
input_copy.extend(input.iter().filter(|b| !b" \n\t\r\x0b\x0c".contains(b)));
input_copy
}

#[cfg(test)]
mod tests {
#[test]
Expand Down
70 changes: 69 additions & 1 deletion components/tidb_query_vec_expr/src/impl_string.rs
Expand Up @@ -6,7 +6,10 @@ use tidb_query_codegen::rpn_fn;
use tidb_query_common::Result;
use tidb_query_datatype::codec::data_type::*;
use tidb_query_datatype::*;
use tidb_query_shared_expr::string::{encoded_size, line_wrap, validate_target_len_for_pad};
use tidb_query_shared_expr::string::{
encoded_size, line_wrap, strip_whitespace, validate_target_len_for_pad,
BASE64_ENCODED_CHUNK_LENGTH, BASE64_INPUT_CHUNK_LENGTH,
};

const SPACE: u8 = 0o40u8;

Expand Down Expand Up @@ -492,6 +495,28 @@ pub fn to_base64(bs: &Option<Bytes>) -> Result<Option<Bytes>> {
}
}

#[rpn_fn]
#[inline]
pub fn from_base64(bs: &Option<Bytes>) -> Result<Option<Bytes>> {
match bs.as_ref() {
Some(bytes) => {
let input_copy = strip_whitespace(bytes);
let will_overflow = input_copy
.len()
.checked_mul(BASE64_INPUT_CHUNK_LENGTH)
.is_none();
// mysql will return "" when the input is incorrectly padded
let invalid_padding = input_copy.len() % BASE64_ENCODED_CHUNK_LENGTH != 0;
if will_overflow || invalid_padding {
Ok(Some(Vec::new()))
} else {
Ok(base64::decode_config(&input_copy, base64::STANDARD).ok())
}
}
_ => Ok(None),
}
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down Expand Up @@ -1919,4 +1944,47 @@ mod tests {
assert_eq!(output, expected_output);
}
}

#[test]
fn test_from_base64() {
let tests = vec![
("", ""),
("YWJj", "abc"),
("YWIgYw==", "ab c"),
("YWIKYw==", "ab\nc"),
("YWIJYw==", "ab\tc"),
("cXdlcnR5MTIzNDU2", "qwerty123456"),
(
"QUJDREVGR0hJSktMTU5PUFFSU1RVVldYWVphYmNkZWZnaGlqa2xtbm9wcXJzdHV2d3h5ejAxMjM0\nNTY3ODkrL0FCQ0RFRkdISUpLTE1OT1BRUlNUVVZXWFlaYWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4\neXowMTIzNDU2Nzg5Ky9BQkNERUZHSElKS0xNTk9QUVJTVFVWV1hZWmFiY2RlZmdoaWprbG1ub3Bx\ncnN0dXZ3eHl6MDEyMzQ1Njc4OSsv",
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/",
),
(
"QUJDREVGR0hJSktMTU5PUFFSU1RVVldYWVphYmNkZWZnaGlqa2xtbm9wcXJzdHV2d3h5ejAxMjM0NTY3ODkrLw==",
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/",
),
(
"QUJDREVGR0hJSktMTU5PUFFSU1RVVldYWVphYmNkZWZnaGlqa2xtbm9wcXJzdHV2d3h5ejAxMjM0NTY3ODkrLw==",
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/",
),
(
"QUJDREVGR0hJSkt\tMTU5PUFFSU1RVVld\nYWVphYmNkZ\rWZnaGlqa2xt bm9wcXJzdHV2d3h5ejAxMjM0NTY3ODkrLw==",
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/",
),
];
for (arg, expected) in tests {
let param = Some(arg.to_string().into_bytes());
let expected_output = Some(expected.to_string().into_bytes());
let output = RpnFnScalarEvaluator::new()
.push_param(param)
.evaluate::<Bytes>(ScalarFuncSig::FromBase64)
.unwrap();
assert_eq!(output, expected_output);
}

let invalid_base64_output = RpnFnScalarEvaluator::new()
.push_param(Some(b"src".to_vec()))
.evaluate(ScalarFuncSig::FromBase64)
.unwrap();
assert_eq!(invalid_base64_output, Some(b"".to_vec()));
}
}
1 change: 1 addition & 0 deletions components/tidb_query_vec_expr/src/lib.rs
Expand Up @@ -516,6 +516,7 @@ fn map_expr_node_to_rpn_func(expr: &Expr) -> Result<RpnFnMeta> {
ScalarFuncSig::RTrim => rtrim_fn_meta(),
ScalarFuncSig::Lpad => lpad_fn_meta(),
ScalarFuncSig::Trim1Arg => trim_1_arg_fn_meta(),
ScalarFuncSig::FromBase64 => from_base64_fn_meta(),
ScalarFuncSig::Replace => replace_fn_meta(),
ScalarFuncSig::Left => left_fn_meta(),
ScalarFuncSig::LeftUtf8 => left_utf8_fn_meta(),
Expand Down

0 comments on commit 5b3b1e0

Please sign in to comment.