Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ORC-464: [C++] avoid computing zigzag values for DELTA and SHORT_REPEAT. #361

Merged
merged 1 commit into from Feb 25, 2019
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
16 changes: 11 additions & 5 deletions c++/src/RleEncoderV2.cc
Expand Up @@ -302,14 +302,16 @@ void RleEncoderV2::preparePatchedBlob(EncodingOption& option) {
}

void RleEncoderV2::determineEncoding(EncodingOption& option) {
// we need to compute zigzag values for DIRECT encoding if we decide to
// break early for delta overflows or for shorter runs
computeZigZagLiterals(option);

option.zzBits100p = percentileBits(zigzagLiterals, 0, numLiterals, 1.0);
// We need to compute zigzag values for DIRECT and PATCHED_BASE encodings,
// but not for SHORT_REPEAT or DELTA. So we only perform the zigzag
// computation when it's determined to be necessary.

// not a big win for shorter runs to determine encoding
if (numLiterals <= MIN_REPEAT) {
// we need to compute zigzag values for DIRECT encoding if we decide to
// break early for delta overflows or for shorter runs
computeZigZagLiterals(option);
option.zzBits100p = percentileBits(zigzagLiterals, 0, numLiterals, 1.0);
option.encoding = DIRECT;
return;
}
Expand Down Expand Up @@ -349,6 +351,8 @@ void RleEncoderV2::determineEncoding(EncodingOption& option) {
// PATCHED_BASE condition as encoding using DIRECT is faster and has less
// overhead than PATCHED_BASE
if (!isSafeSubtract(max, option.min)) {
computeZigZagLiterals(option);
option.zzBits100p = percentileBits(zigzagLiterals, 0, numLiterals, 1.0);
option.encoding = DIRECT;
return;
}
Expand Down Expand Up @@ -404,6 +408,8 @@ void RleEncoderV2::determineEncoding(EncodingOption& option) {
// beyond a threshold then we need to patch the values. if the variation
// is not significant then we can use direct encoding

computeZigZagLiterals(option);
option.zzBits100p = percentileBits(zigzagLiterals, 0, numLiterals, 1.0);
option.zzBits90p = percentileBits(zigzagLiterals, 0, numLiterals, 0.9, true);
uint32_t diffBitsLH = option.zzBits100p - option.zzBits90p;

Expand Down