Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

added bitmapSubsetLimit #6957

Merged
merged 2 commits into from Sep 22, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
72 changes: 56 additions & 16 deletions dbms/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h
Expand Up @@ -63,7 +63,12 @@ class RoaringBitmapWithSmallSet : private boost::noncopyable
roaring_bitmap_add(rb, value);
}

UInt64 size() const { return isSmall() ? small.size() : roaring_bitmap_get_cardinality(rb); }
UInt64 size() const
{
return isSmall()
? small.size()
: roaring_bitmap_get_cardinality(rb);
}

void merge(const RoaringBitmapWithSmallSet & r1)
{
Expand Down Expand Up @@ -91,7 +96,7 @@ class RoaringBitmapWithSmallSet : private boost::noncopyable
std::string s;
readStringBinary(s,in);
rb = roaring_bitmap_portable_deserialize(s.c_str());
for (const auto & x : small) //merge from small
for (const auto & x : small) // merge from small
roaring_bitmap_add(rb, x.getValue());
}
else
Expand Down Expand Up @@ -245,13 +250,13 @@ class RoaringBitmapWithSmallSet : private boost::noncopyable
{
for (const auto & x : small)
if (r1.small.find(x.getValue()) != r1.small.end())
retSize++;
++retSize;
}
else if (isSmall() && r1.isLarge())
{
for (const auto & x : small)
if (roaring_bitmap_contains(r1.rb, x.getValue()))
retSize++;
++retSize;
}
else
{
Expand Down Expand Up @@ -391,8 +396,7 @@ class RoaringBitmapWithSmallSet : private boost::noncopyable
*/
UInt8 rb_contains(const UInt32 x) const
{
return isSmall() ? small.find(x) != small.end() :
roaring_bitmap_contains(rb, x);
return isSmall() ? small.find(x) != small.end() : roaring_bitmap_contains(rb, x);
}

/**
Expand Down Expand Up @@ -460,21 +464,20 @@ class RoaringBitmapWithSmallSet : private boost::noncopyable
/**
* Return new set with specified range (not include the range_end)
*/
UInt64 rb_range(UInt32 range_start, UInt32 range_end, RoaringBitmapWithSmallSet& r1) const
UInt64 rb_range(UInt32 range_start, UInt32 range_end, RoaringBitmapWithSmallSet & r1) const
{
UInt64 count = 0;
if (range_start >= range_end)
return count;
if (isSmall())
{
std::vector<T> ans;
for (const auto & x : small)
{
T val = x.getValue();
if ((UInt32)val >= range_start && (UInt32)val < range_end)
if (UInt32(val) >= range_start && UInt32(val) < range_end)
{
r1.add(val);
count++;
++count;
}
}
}
Expand All @@ -483,13 +486,50 @@ class RoaringBitmapWithSmallSet : private boost::noncopyable
roaring_uint32_iterator_t iterator;
roaring_init_iterator(rb, &iterator);
roaring_move_uint32_iterator_equalorlarger(&iterator, range_start);
while (iterator.has_value)
while (iterator.has_value && UInt32(iterator.current_value) < range_end)
{
if ((UInt32)iterator.current_value >= range_end)
break;
r1.add(iterator.current_value);
roaring_advance_uint32_iterator(&iterator);
count++;
++count;
}
}
return count;
}

/**
* Return new set of the smallest `limit` values in set which is no less than `range_start`.
*/
UInt64 rb_limit(UInt32 range_start, UInt32 limit, RoaringBitmapWithSmallSet & r1) const
{
UInt64 count = 0;
if (isSmall())
{
std::vector<T> ans;
for (const auto & x : small)
{
T val = x.getValue();
if (UInt32(val) >= range_start)
{
ans.push_back(val);
}
}
sort(ans.begin(), ans.end());
if (limit > ans.size())
limit = ans.size();
for (size_t i = 0; i < limit; ++i)
r1.add(ans[i]);
count = UInt64(limit);
}
else
{
roaring_uint32_iterator_t iterator;
roaring_init_iterator(rb, &iterator);
roaring_move_uint32_iterator_equalorlarger(&iterator, range_start);
while (UInt32(count) < limit && iterator.has_value)
{
r1.add(iterator.current_value);
roaring_advance_uint32_iterator(&iterator);
++count;
}
}
return count;
Expand Down Expand Up @@ -552,8 +592,8 @@ class RoaringBitmapWithSmallSet : private boost::noncopyable
readBinary(val, dbBuf);
container = containerptr_roaring_bitmap_add(r, val, &typecode, &containerindex);
prev = val;
i++;
for (; i < n_args; i++)
++i;
for (; i < n_args; ++i)
{
readBinary(val, dbBuf);
if (((prev ^ val) >> 16) == 0)
Expand Down
1 change: 1 addition & 0 deletions dbms/src/Functions/FunctionsBitmap.cpp
Expand Up @@ -10,6 +10,7 @@ void registerFunctionsBitmap(FunctionFactory & factory)
factory.registerFunction<FunctionBitmapBuild>();
factory.registerFunction<FunctionBitmapToArray>();
factory.registerFunction<FunctionBitmapSubsetInRange>();
factory.registerFunction<FunctionBitmapSubsetLimit>();

factory.registerFunction<FunctionBitmapSelfCardinality>();
factory.registerFunction<FunctionBitmapMin>();
Expand Down
37 changes: 33 additions & 4 deletions dbms/src/Functions/FunctionsBitmap.h
Expand Up @@ -34,6 +34,9 @@ namespace ErrorCodes
* Return subset in specified range (not include the range_end):
* bitmapSubsetInRange: bitmap,integer,integer -> bitmap
*
* Return subset of the smallest `limit` values in set which is no smaller than `range_start`.
* bitmapSubsetInRange: bitmap,integer,integer -> bitmap
*
* Two bitmap and calculation:
* bitmapAnd: bitmap,bitmap -> bitmap
*
Expand Down Expand Up @@ -250,12 +253,13 @@ class FunctionBitmapToArrayImpl : public IFunction
}
};

class FunctionBitmapSubsetInRange : public IFunction
template <typename Impl>
class FunctionBitmapSubset : public IFunction
{
public:
static constexpr auto name = "bitmapSubsetInRange";
static constexpr auto name = Impl::name;

static FunctionPtr create(const Context &) { return std::make_shared<FunctionBitmapSubsetInRange>(); }
static FunctionPtr create(const Context &) { return std::make_shared<FunctionBitmapSubset<Impl>>(); }

String getName() const override { return name; }

Expand Down Expand Up @@ -357,12 +361,37 @@ class FunctionBitmapSubsetInRange : public IFunction
col_to->insertDefault();
AggregateFunctionGroupBitmapData<T> & bd2
= *reinterpret_cast<AggregateFunctionGroupBitmapData<T> *>(col_to->getData()[i]);
bd0.rbs.rb_range(range_start, range_end, bd2.rbs);
Impl::apply(bd0, range_start, range_end, bd2);
}
block.getByPosition(result).column = std::move(col_to);
}
};

struct BitmapSubsetInRangeImpl
{
public:
static constexpr auto name = "bitmapSubsetInRange";
template <typename T>
static void apply(const AggregateFunctionGroupBitmapData<T> & bd0, UInt32 range_start, UInt32 range_end, AggregateFunctionGroupBitmapData<T> & bd2)
{
bd0.rbs.rb_range(range_start, range_end, bd2.rbs);
}
};

struct BitmapSubsetLimitImpl
{
public:
static constexpr auto name = "bitmapSubsetLimit";
template <typename T>
static void apply(const AggregateFunctionGroupBitmapData<T> & bd0, UInt32 range_start, UInt32 range_end, AggregateFunctionGroupBitmapData<T> & bd2)
{
bd0.rbs.rb_limit(range_start, range_end, bd2.rbs);
}
};

using FunctionBitmapSubsetInRange = FunctionBitmapSubset<BitmapSubsetInRangeImpl>;
using FunctionBitmapSubsetLimit = FunctionBitmapSubset<BitmapSubsetLimitImpl>;

template <typename Impl>
class FunctionBitmapSelfCardinalityImpl : public IFunction
{
Expand Down
Expand Up @@ -67,6 +67,14 @@
[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33]
[30,31,32,33,100]
[100]
[]
[]
[1,5,7,9]
[]
[5,7,9]
[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,100,200,500]
[30,31,32,33,100,200,500]
[100,200,500]
4294967295
4294967295
4294967295
Expand Down
19 changes: 19 additions & 0 deletions dbms/tests/queries/0_stateless/00829_bitmap_function.sql
Expand Up @@ -212,6 +212,25 @@ select bitmapToArray(bitmapSubsetInRange(bitmapBuild([
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
100,200,500]), toUInt32(100), toUInt32(200)));

-- bitmapSubsetLimit:
---- Empty
SELECT bitmapToArray(bitmapSubsetLimit(bitmapBuild(emptyArrayUInt32()), toUInt32(0), toUInt32(10)));
SELECT bitmapToArray(bitmapSubsetLimit(bitmapBuild(emptyArrayUInt16()), toUInt32(0), toUInt32(10)));
---- Small
select bitmapToArray(bitmapSubsetLimit(bitmapBuild([1,5,7,9]), toUInt32(0), toUInt32(4)));
select bitmapToArray(bitmapSubsetLimit(bitmapBuild([1,5,7,9]), toUInt32(10), toUInt32(10)));
select bitmapToArray(bitmapSubsetLimit(bitmapBuild([1,5,7,9]), toUInt32(3), toUInt32(7)));
---- Large
select bitmapToArray(bitmapSubsetLimit(bitmapBuild([
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
100,200,500]), toUInt32(0), toUInt32(100)));
select bitmapToArray(bitmapSubsetLimit(bitmapBuild([
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
100,200,500]), toUInt32(30), toUInt32(200)));
select bitmapToArray(bitmapSubsetLimit(bitmapBuild([
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
100,200,500]), toUInt32(100), toUInt32(200)));

-- bitmapMin:
---- Empty
SELECT bitmapMin(bitmapBuild(emptyArrayUInt8()));
Expand Down
26 changes: 26 additions & 0 deletions docs/en/query_language/functions/bitmap_functions.md
Expand Up @@ -82,6 +82,32 @@ SELECT bitmapToArray(bitmapSubsetInRange(bitmapBuild([0,1,2,3,4,5,6,7,8,9,10,11,
└───────────────────┘
```

## bitmapSubsetLimit {#bitmap_functions-bitmapsubsetlimit}

Return subset of the smallest `limit` values in set which is no less than `range_start`.

```
bitmapSubsetLimit(bitmap, range_start, limit)
```

**Parameters**

- `bitmap` – [Bitmap object](#bitmap_functions-bitmapbuild).
- `range_start` – range start point. Type: [UInt32](../../data_types/int_uint.md).
- `limit` – subset cardinality upper limit. Type: [UInt32](../../data_types/int_uint.md).

**Example**

``` sql
SELECT bitmapToArray(bitmapSubsetLimit(bitmapBuild([0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,100,200,500]), toUInt32(30), toUInt32(200))) AS res
```

```
┌─res───────────────────────┐
│ [30,31,32,33,100,200,500] │
└───────────────────────────┘
```

## bitmapContains {#bitmap_functions-bitmapcontains}

Checks whether the bitmap contains an element.
Expand Down
26 changes: 26 additions & 0 deletions docs/zh/query_language/functions/bitmap_functions.md
Expand Up @@ -77,6 +77,32 @@ SELECT bitmapToArray(bitmapSubsetInRange(bitmapBuild([0,1,2,3,4,5,6,7,8,9,10,11,
└───────────────────┘
```

## bitmapSubsetLimit

将位图指定范围(起始点和数目上限)转换为另一个位图。

```
bitmapSubsetLimit(bitmap, range_start, limit)
```

**参数**

- `bitmap` – 位图对象.
- `range_start` – 范围起始点(含).
- `limit` – 子位图基数上限.

**示例**

``` sql
SELECT bitmapToArray(bitmapSubsetInRange(bitmapBuild([0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,100,200,500]), toUInt32(30), toUInt32(200))) AS res
```

```
┌─res───────────────────────┐
│ [30,31,32,33,100,200,500] │
└───────────────────────────┘
```

## bitmapContains

检查位图是否包含指定元素。
Expand Down