Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[feature](function) support bitmap type in min/max_by agg function #25430

Merged
merged 1 commit into from Oct 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
60 changes: 60 additions & 0 deletions be/src/vec/aggregate_functions/aggregate_function_min_max_by.h
Expand Up @@ -18,15 +18,70 @@
#pragma once

#include "common/logging.h"
#include "util/bitmap_value.h"
#include "vec/aggregate_functions/aggregate_function.h"
#include "vec/aggregate_functions/aggregate_function_min_max.h"
#include "vec/aggregate_functions/helpers.h"
#include "vec/columns/column_complex.h"
#include "vec/columns/column_decimal.h"
#include "vec/columns/column_vector.h"
#include "vec/common/assert_cast.h"
#include "vec/data_types/data_type_bitmap.h"
#include "vec/io/io_helper.h"

namespace doris::vectorized {

/// For bitmap value
struct BitmapValueData {
private:
using Self = BitmapValueData;
bool has_value = false;
BitmapValue value;

public:
BitmapValueData() = default;
BitmapValueData(bool has_value_, BitmapValue value_) : has_value(has_value_), value(value_) {}
[[nodiscard]] bool has() const { return has_value; }

void insert_result_into(IColumn& to) const {
if (has()) {
assert_cast<ColumnBitmap&>(to).get_data().push_back(value);
} else {
assert_cast<ColumnBitmap&>(to).insert_default();
}
}

void reset() {
if (has()) {
has_value = false;
}
}

void write(BufferWritable& buf) const {
write_binary(has(), buf);
if (has()) {
DataTypeBitMap::serialize_as_stream(value, buf);
}
}

void read(BufferReadable& buf, Arena* arena) {
read_binary(has_value, buf);
if (has()) {
DataTypeBitMap::deserialize_as_stream(value, buf);
}
}

void change(const IColumn& column, size_t row_num, Arena*) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: all parameters should be named in a function [readability-named-parameter]

Suggested change
void change(const IColumn& column, size_t row_num, Arena*) {
void change(const IColumn& column, size_t row_num, Arena* /*unused*/) {

has_value = true;
value = assert_cast<const ColumnBitmap&>(column).get_data()[row_num];
}

void change(const Self& to, Arena*) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: all parameters should be named in a function [readability-named-parameter]

Suggested change
void change(const Self& to, Arena*) {
void change(const Self& to, Arena* /*unused*/) {

has_value = true;
value = to.value;
}
};

template <typename VT, typename KT>
struct AggregateFunctionMinMaxByBaseData {
protected:
Expand Down Expand Up @@ -229,6 +284,11 @@ AggregateFunctionPtr create_aggregate_function_min_max_by(const String& name,
SingleValueDataFixed<UInt64>>(
argument_types, result_is_nullable);
}
if (which.idx == TypeIndex::BitMap) {
return create_aggregate_function_min_max_by_impl<AggregateFunctionTemplate, Data,
BitmapValueData>(argument_types,
result_is_nullable);
}
return nullptr;
}

Expand Down
Expand Up @@ -104,3 +104,17 @@
-- !select_count2 --
15

-- !select_minmax1 --
20200622 1 \N
20200622 2 \N
20200622 3 \N

-- !select_minmax2 --
20200622

-- !select_minmax3 --
287667876573

-- !select_minmax4 --
243

Expand Up @@ -81,5 +81,36 @@ suite("test_aggregate_all_functions2") {
qt_select_topn_array6 """ select topn_array(k11,3,100) from baseall; """
qt_select_count1 """ select count(distinct k1,k2,k5) from baseall; """
qt_select_count2 """ select count(distinct k1,k2,cast(k5 as decimalv3(38,18))) from baseall; """



sql "DROP DATABASE IF EXISTS metric_table"
sql """
CREATE TABLE `metric_table` (
`datekey` int(11) NULL,
`hour` int(11) NULL,
`device_id` bitmap BITMAP_UNION NOT NULL
) ENGINE=OLAP
AGGREGATE KEY(`datekey`, `hour`)
COMMENT 'OLAP'
DISTRIBUTED BY HASH(`datekey`, `hour`) BUCKETS 1
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"is_being_synced" = "false",
"storage_format" = "V2",
"light_schema_change" = "true",
"disable_auto_compaction" = "false",
"enable_single_replica_compaction" = "false"
);
"""
sql """
insert into metric_table values
(20200622, 1, to_bitmap(243)),
(20200622, 2, bitmap_from_array([1,2,3,4,5,434543])),
(20200622, 3, to_bitmap(287667876573));
"""

qt_select_minmax1 """ select * from metric_table order by hour; """
qt_select_minmax2 """ select max_by(datekey,hour) from metric_table; """
qt_select_minmax3 """ select bitmap_to_string(max_by(device_id,hour)) from metric_table; """
qt_select_minmax4 """ select bitmap_to_string(min_by(device_id,hour)) from metric_table; """
}