Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion be/src/core/column/column_complex.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,9 @@ class ColumnComplexType final : public COWHelper<IColumn, ColumnComplexType<T>>
}

if constexpr (T == TYPE_BITMAP) {
pvalue->deserialize(pos);
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This newly bounded decode can fail, but insert_binary_data has already inserted a default value and then ignores the false return. A malformed bitmap binary cell will therefore stay in the column as an empty/default bitmap rather than making the load/read fail, which violates the hardening goal and can silently corrupt query results. Please check the return value and throw/propagate an error before leaving the inserted value in place.

if (!pvalue->deserialize(pos, length)) {
throw Exception(Status::DataQualityError("Failed to deserialize bitmap data"));
}
} else if constexpr (T == TYPE_HLL) {
pvalue->deserialize(Slice(pos, length));
} else if constexpr (T == TYPE_QUANTILE_STATE) {
Expand Down
13 changes: 10 additions & 3 deletions be/src/core/data_type/data_type_bitmap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
#include <utility>

#include "agent/be_exec_version_manager.h"
#include "common/exception.h"
#include "common/status.h"
#include "core/assert_cast.h"
#include "core/column/column.h"
#include "core/column/column_complex.h"
Expand Down Expand Up @@ -90,8 +92,11 @@ const char* DataTypeBitMap::deserialize(const char* buf, MutableColumnPtr* colum
const auto* meta_ptr = reinterpret_cast<const size_t*>(buf);
const char* data_ptr = buf + sizeof(size_t) * real_have_saved_num;
for (size_t i = 0; i < real_have_saved_num; ++i) {
data[i].deserialize(data_ptr);
data_ptr += unaligned_load<size_t>(&meta_ptr[i]);
auto one_size = unaligned_load<size_t>(&meta_ptr[i]);
if (!data[i].deserialize(data_ptr, one_size)) {
throw Exception(Status::DataQualityError("Failed to deserialize bitmap data"));
}
data_ptr += one_size;
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

BitmapValue::deserialize now returns false for truncated or malformed input, but this block deserialization path still ignores that result. If a serialized block carries a bad bitmap payload, data[i] remains default/partially initialized and the reader advances by the declared size, so execution can continue with wrong bitmap values instead of failing. Please check the return value here and raise/propagate an error; the same applies to deserialize_as_stream below, which currently ignores value.deserialize(ref.data, ref.size) as well.

}
return data_ptr;
}
Expand All @@ -115,6 +120,8 @@ void DataTypeBitMap::serialize_as_stream(const BitmapValue& cvalue, BufferWritab
void DataTypeBitMap::deserialize_as_stream(BitmapValue& value, BufferReadable& buf) {
StringRef ref;
buf.read_binary(ref);
value.deserialize(ref.data);
if (!value.deserialize(ref.data, ref.size)) {
throw Exception(Status::DataQualityError("Failed to deserialize bitmap data"));
}
}
} // namespace doris
8 changes: 4 additions & 4 deletions be/src/core/data_type_serde/data_type_bitmap_serde.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ Status DataTypeBitMapSerDe::deserialize_one_cell_from_json(IColumn& column, Slic
auto& data = data_column.get_data();

BitmapValue value;
if (!value.deserialize(slice.data)) {
if (!value.deserialize(slice.data, slice.size)) {
return Status::InternalError("deserialize BITMAP from string fail!");
}
data.push_back(std::move(value));
Expand Down Expand Up @@ -98,7 +98,7 @@ Status DataTypeBitMapSerDe::write_column_to_pb(const IColumn& column, PValues& r
Status DataTypeBitMapSerDe::read_column_from_pb(IColumn& column, const PValues& arg) const {
auto& col = reinterpret_cast<ColumnBitmap&>(column);
for (int i = 0; i < arg.bytes_value_size(); ++i) {
BitmapValue value(arg.bytes_value(i).data());
BitmapValue value(arg.bytes_value(i).data(), arg.bytes_value(i).size());
col.insert_value(value);
}
return Status::OK();
Expand Down Expand Up @@ -144,7 +144,7 @@ Status DataTypeBitMapSerDe::write_column_to_arrow(const IColumn& column, const N
void DataTypeBitMapSerDe::read_one_cell_from_jsonb(IColumn& column, const JsonbValue* arg) const {
auto& col = reinterpret_cast<ColumnBitmap&>(column);
auto* blob = arg->unpack<JsonbBinaryVal>();
BitmapValue bitmap_value(blob->getBlob());
BitmapValue bitmap_value(blob->getBlob(), blob->getBlobLen());
col.insert_value(bitmap_value);
}

Expand Down Expand Up @@ -224,7 +224,7 @@ Status DataTypeBitMapSerDe::from_string(StringRef& str, IColumn& column,
Status DataTypeBitMapSerDe::from_olap_string(const std::string& str, Field& field,
const FormatOptions& options) const {
BitmapValue value;
if (!value.deserialize(str.data())) {
if (!value.deserialize(str.data(), str.size())) {
return Status::InternalError("deserialize BITMAP from string fail!");
}
field = Field::create_field<TYPE_BITMAP>(std::move(value));
Expand Down
148 changes: 118 additions & 30 deletions be/src/core/value/bitmap_value.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
#include "common/config.h"
#include "common/exception.h"
#include "common/logging.h"
#include "common/status.h"
#include "core/pod_array.h"
#include "core/pod_array_fwd.h"
#include "util/coding.h"
Expand Down Expand Up @@ -563,41 +564,95 @@ class Roaring64Map {
}

/**
* read a bitmap from a serialized version.
* Read a Doris-encoded BITMAP* payload from `buf`. Reads no more than
* `maxbytes` bytes. Throws doris::Exception if the buffer is too small
* or the encoding is otherwise malformed.
*
* This function is unsafe in the sense that if you provide bad data,
* many bytes could be read, possibly causing a buffer overflow. See also readSafe.
* Wraps the upstream CRoaring *_deserialize_safe primitives so a
* tampered map_size varint or per-container count cannot trigger a
* heap out-of-bounds read.
*/
static Roaring64Map read(const char* buf) {
static Roaring64Map readSafe(const char* buf, size_t maxbytes) {
Roaring64Map result;

if (maxbytes < 1) {
throw Exception(ErrorCode::INVALID_ARGUMENT,
"ran out of bytes while reading bitmap type code");
}
bool is_v1 = BitmapTypeCode::BITMAP32 == *buf || BitmapTypeCode::BITMAP64 == *buf;
bool is_bitmap32 = BitmapTypeCode::BITMAP32 == *buf || BitmapTypeCode::BITMAP32_V2 == *buf;
bool is_bitmap64 = BitmapTypeCode::BITMAP64 == *buf || BitmapTypeCode::BITMAP64_V2 == *buf;
if (!is_bitmap32 && !is_bitmap64) {
throw Exception(ErrorCode::INVALID_ARGUMENT, "invalid bitmap type code for read: {}",
(int)(*buf));
}
buf++;
maxbytes--;

// Doris convention: Roaring::read/write second arg is `portable`, and
// is_v1=true means portable serialization (per existing read() above).
auto read_one_roaring = [&](roaring::Roaring& out) {
roaring::api::roaring_bitmap_t* rb = nullptr;
if (is_v1) {
size_t need = roaring::api::roaring_bitmap_portable_deserialize_size(buf, maxbytes);
if (need == 0 || need > maxbytes) {
throw Exception(ErrorCode::INVALID_ARGUMENT,
"ran out of bytes or invalid portable roaring bitmap");
}
rb = roaring::api::roaring_bitmap_portable_deserialize_safe(buf, maxbytes);
} else {
rb = roaring::api::roaring_bitmap_deserialize_safe(buf, maxbytes);
}
if (rb == nullptr) {
throw Exception(ErrorCode::INVALID_ARGUMENT,
"ran out of bytes or invalid roaring bitmap container");
}
out = roaring::Roaring(rb);
size_t consumed = out.getSizeInBytes(is_v1);
if (consumed > maxbytes) {
throw Exception(ErrorCode::INVALID_ARGUMENT,
"inconsistent roaring bitmap size after read");
}
buf += consumed;
maxbytes -= consumed;
};

if (is_bitmap32) {
roaring::Roaring read = roaring::Roaring::read(buf + 1, is_v1);
result.emplaceOrInsert(0, std::move(read));
roaring::Roaring r;
read_one_roaring(r);
result.emplaceOrInsert(0, std::move(r));
return result;
}

DCHECK(is_bitmap64);
buf++;

// get map size (varint64 took 1~10 bytes)
uint64_t map_size;
buf = reinterpret_cast<const char*>(
// is_bitmap64: read map_size varint within remaining buffer.
uint64_t map_size = 0;
size_t varint_max = maxbytes < 10 ? maxbytes : 10;
const uint8_t* p =
decode_varint64_ptr(reinterpret_cast<const uint8_t*>(buf),
reinterpret_cast<const uint8_t*>(buf + 10), &map_size));
DCHECK(buf != nullptr);
reinterpret_cast<const uint8_t*>(buf + varint_max), &map_size);
if (p == nullptr) {
throw Exception(ErrorCode::INVALID_ARGUMENT,
"varint decode failure for bitmap map_size");
}
size_t consumed = reinterpret_cast<const char*>(p) - buf;
buf = reinterpret_cast<const char*>(p);
maxbytes -= consumed;

// Cheap upper-bound: each entry takes at least 4 bytes (the map key).
if (map_size > maxbytes / sizeof(uint32_t)) {
throw Exception(ErrorCode::INVALID_ARGUMENT, "declared bitmap map_size exceeds buffer");
}

for (uint64_t lcv = 0; lcv < map_size; lcv++) {
// get map key
if (maxbytes < sizeof(uint32_t)) {
throw Exception(ErrorCode::INVALID_ARGUMENT, "ran out of bytes for bitmap map key");
}
uint32_t key = decode_fixed32_le(reinterpret_cast<const uint8_t*>(buf));
buf += sizeof(uint32_t);
// read map value Roaring
roaring::Roaring read_var = roaring::Roaring::read(buf, is_v1);
// forward buffer past the last Roaring Bitmap
buf += read_var.getSizeInBytes(is_v1);
result.emplaceOrInsert(key, std::move(read_var));
maxbytes -= sizeof(uint32_t);

roaring::Roaring r;
read_one_roaring(r);
result.emplaceOrInsert(key, std::move(r));
}
return result;
}
Expand Down Expand Up @@ -882,10 +937,13 @@ class BitmapValue {
explicit BitmapValue(uint64_t value)
: _sv(value), _bitmap(nullptr), _type(SINGLE), _is_shared(false) {}

// Construct a bitmap from serialized data.
explicit BitmapValue(const char* src) : _is_shared(false) {
bool res = deserialize(src);
DCHECK(res);
// Construct a bitmap from serialized data with a bounded buffer length.
// Throws if the input is truncated or malformed.
BitmapValue(const char* src, size_t maxbytes) : _is_shared(false) {
if (!deserialize(src, maxbytes)) {
throw Exception(ErrorCode::INTERNAL_ERROR,
"BitmapValue: failed to deserialize from buffer");
}
}

// !FIXME: We should rethink the logic here
Expand Down Expand Up @@ -1937,12 +1995,21 @@ class BitmapValue {

// Deserialize a bitmap value from `src`.
// Return false if `src` begins with unknown type code, true otherwise.
bool deserialize(const char* src) {
//
// Bounded deserialize: reads no more than `maxbytes` bytes from `src`.
// Returns false on unknown type code or any bounds/format violation.
bool deserialize(const char* src, size_t maxbytes) {
if (maxbytes < 1) {
return false;
}
switch (*src) {
case BitmapTypeCode::EMPTY:
_type = EMPTY;
break;
case BitmapTypeCode::SINGLE32:
if (maxbytes < 1 + sizeof(uint32_t)) {
return false;
}
_type = SINGLE;
_sv = decode_fixed32_le(reinterpret_cast<const uint8_t*>(src + 1));
if (config::enable_set_in_bitmap_value) {
Expand All @@ -1951,6 +2018,9 @@ class BitmapValue {
}
break;
case BitmapTypeCode::SINGLE64:
if (maxbytes < 1 + sizeof(uint64_t)) {
return false;
}
_type = SINGLE;
_sv = decode_fixed64_le(reinterpret_cast<const uint8_t*>(src + 1));
if (config::enable_set_in_bitmap_value) {
Expand All @@ -1965,21 +2035,32 @@ class BitmapValue {
_type = BITMAP;
_is_shared = false;
try {
_bitmap = std::make_shared<detail::Roaring64Map>(detail::Roaring64Map::read(src));
_bitmap = std::make_shared<detail::Roaring64Map>(
detail::Roaring64Map::readSafe(src, maxbytes));
} catch (const doris::Exception& e) {
LOG(ERROR) << "Decode roaring bitmap failed: " << e.what();
return false;
} catch (const std::runtime_error& e) {
LOG(ERROR) << "Decode roaring bitmap failed, " << e.what();
LOG(ERROR) << "Decode roaring bitmap failed: " << e.what();
return false;
}
break;
case BitmapTypeCode::SET: {
if (maxbytes < 2) {
return false;
}
_type = SET;
++src;
uint8_t count = *src;
++src;
size_t remaining = maxbytes - 2;
if (count > SET_TYPE_THRESHOLD) {
throw Exception(ErrorCode::INTERNAL_ERROR,
"bitmap value with incorrect set count, count: {}", count);
}
if (remaining < static_cast<size_t>(count) * sizeof(uint64_t)) {
return false;
}
_set.reserve(count);
for (uint8_t i = 0; i != count; ++i, src += sizeof(uint64_t)) {
_set.insert(decode_fixed64_le(reinterpret_cast<const uint8_t*>(src)));
Expand All @@ -2001,15 +2082,22 @@ class BitmapValue {
break;
}
case BitmapTypeCode::SET_V2: {
if (maxbytes < 1 + sizeof(uint32_t)) {
return false;
}
uint32_t size = 0;
memcpy(&size, src + 1, sizeof(uint32_t));
src += sizeof(uint32_t) + 1;
size_t remaining = maxbytes - 1 - sizeof(uint32_t);
if (static_cast<size_t>(size) > remaining / sizeof(uint64_t)) {
return false;
}

if (!config::enable_set_in_bitmap_value || size > SET_TYPE_THRESHOLD) {
_type = BITMAP;
_prepare_bitmap_for_write();

for (int i = 0; i < size; ++i) {
for (uint32_t i = 0; i < size; ++i) {
uint64_t key {};
memcpy(&key, src, sizeof(uint64_t));
_bitmap->add(key);
Expand All @@ -2019,7 +2107,7 @@ class BitmapValue {
_type = SET;
_set.reserve(size);

for (int i = 0; i < size; ++i) {
for (uint32_t i = 0; i < size; ++i) {
uint64_t key {};
memcpy(&key, src, sizeof(uint64_t));
_set.insert(key);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@
#include <string_view>
#include <type_traits>

#include "common/exception.h"
#include "common/status.h"
#include "core/column/column_complex.h"
#include "core/column/column_vector.h"
#include "core/data_type/data_type_bitmap.h"
Expand Down Expand Up @@ -167,7 +169,10 @@ struct AggIntersectCount : public AggOrthBitmapBaseData<T> {
buf.read_binary(AggOrthBitmapBaseData<T>::first_init);
std::string data;
buf.read_binary(data);
AggOrthBitmapBaseData<T>::bitmap.deserialize(data.data());
if (!AggOrthBitmapBaseData<T>::bitmap.deserialize(data.data(), data.size())) {
throw Exception(ErrorCode::INTERNAL_ERROR,
"failed to deserialize BitmapIntersect state");
}
}

void get(IColumn& to) const {
Expand Down
5 changes: 1 addition & 4 deletions be/src/exprs/function/function_bitmap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -260,9 +260,6 @@ struct BitmapFromString {
}
};

struct NameBitmapFromBase64 {
static constexpr auto name = "bitmap_from_base64";
};
struct BitmapFromBase64 {
using ArgumentType = DataTypeString;

Expand Down Expand Up @@ -302,7 +299,7 @@ struct BitmapFromBase64 {
null_map[i] = 1;
} else {
BitmapValue bitmap_val;
if (!bitmap_val.deserialize(decode_buff.data())) {
if (!bitmap_val.deserialize(decode_buff.data(), outlen)) {
return Status::RuntimeError("bitmap_from_base64 decode failed: base64: {}",
std::string(src_str, src_size));
}
Expand Down
2 changes: 0 additions & 2 deletions be/src/util/bitmap_expr_calculation.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,6 @@ class BitmapExprCalculation : public BitmapIntersect<std::string> {
public:
BitmapExprCalculation() = default;

explicit BitmapExprCalculation(const char* src) { deserialize(src); }

void bitmap_calculation_init(std::string& input_str) {
_polish = reverse_polish(input_str);
std::string bitmap_key;
Expand Down
Loading
Loading