Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make the quadratic Arena::allocContinue less bad. #6951

Merged
merged 1 commit into from Sep 18, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
121 changes: 67 additions & 54 deletions dbms/src/Common/Arena.h
Expand Up @@ -97,13 +97,23 @@ class Arena : private boost::noncopyable
size_t size_after_grow = 0;

if (head->size() < linear_growth_threshold)
size_after_grow = head->size() * growth_factor;
{
size_after_grow = std::max(min_next_size, head->size() * growth_factor);
}
else
size_after_grow = linear_growth_threshold;

if (size_after_grow < min_next_size)
size_after_grow = min_next_size;
{
// allocContinue() combined with linear growth results in quadratic
// behavior: we append the data by small amounts, and when it
// doesn't fit, we create a new chunk and copy all the previous data
// into it. The number of times we do this is directly proportional
// to the total size of data that is going to be serialized. To make
// the copying happen less often, round the next size up to the
// linear_growth_threshold.
size_after_grow = ((min_next_size + linear_growth_threshold - 1)
/ linear_growth_threshold) * linear_growth_threshold;
}

assert(size_after_grow >= min_next_size);
return roundUpToPageSize(size_after_grow);
}

Expand Down Expand Up @@ -180,65 +190,68 @@ class Arena : private boost::noncopyable
return head->pos;
}

/** Begin or expand allocation of contiguous piece of memory without alignment.
* 'begin' - current begin of piece of memory, if it need to be expanded, or nullptr, if it need to be started.
* If there is no space in chunk to expand current piece of memory - then copy all piece to new chunk and change value of 'begin'.
* NOTE This method is usable only for latest allocation. For earlier allocations, see 'realloc' method.
/** Begin or expand a contiguous range of memory.
* 'range_start' is the start of range. If nullptr, a new range is
* allocated.
* If there is no space in the current chunk to expand the range,
* the entire range is copied to a new, bigger memory chunk, and the value
* of 'range_start' is updated.
* If the optional 'start_alignment' is specified, the start of range is
* kept aligned to this value.
*
* NOTE This method is usable only for the last allocation made on this
* Arena. For earlier allocations, see 'realloc' method.
*/
char * allocContinue(size_t size, char const *& begin)
char * allocContinue(size_t additional_bytes, char const *& range_start,
size_t start_alignment = 0)
{
while (unlikely(head->pos + size > head->end))
if (!range_start)
{
char * prev_end = head->pos;
addChunk(size);
// Start a new memory range.
char * result = start_alignment
? alignedAlloc(additional_bytes, start_alignment)
: alloc(additional_bytes);

if (begin)
begin = insert(begin, prev_end - begin);
else
break;
range_start = result;
return result;
}

char * res = head->pos;
head->pos += size;

if (!begin)
begin = res;

ASAN_UNPOISON_MEMORY_REGION(res, size + pad_right);
return res;
}
// Extend an existing memory range with 'additional_bytes'.

char * alignedAllocContinue(size_t size, char const *& begin, size_t alignment)
{
char * res;
// This method only works for extending the last allocation. For lack of
// original size, check a weaker condition: that 'begin' is at least in
// the current Chunk.
assert(range_start >= head->begin && range_start < head->end);

do
if (head->pos + additional_bytes <= head->end)
{
void * head_pos = head->pos;
size_t space = head->end - head->pos;

res = static_cast<char *>(std::align(alignment, size, head_pos, space));
if (res)
{
head->pos = static_cast<char *>(head_pos);
head->pos += size;
break;
}

char * prev_end = head->pos;
addChunk(size + alignment);

if (begin)
begin = alignedInsert(begin, prev_end - begin, alignment);
else
break;
} while (true);

if (!begin)
begin = res;
// The new size fits into the last chunk, so just alloc the
// additional size. We can alloc without alignment here, because it
// only applies to the start of the range, and we don't change it.
return alloc(additional_bytes);
}

ASAN_UNPOISON_MEMORY_REGION(res, size + pad_right);
return res;
// New range doesn't fit into this chunk, will copy to a new one.
//
// Note: among other things, this method is used to provide a hack-ish
// implementation of realloc over Arenas in ArenaAllocators. It wastes a
// lot of memory -- quadratically so when we reach the linear allocation
// threshold. This deficiency is intentionally left as is, and should be
// solved not by complicating this method, but by rethinking the
// approach to memory management for aggregate function states, so that
// we can provide a proper realloc().
const size_t existing_bytes = head->pos - range_start;
const size_t new_bytes = existing_bytes + additional_bytes;
const char * old_range = range_start;

char * new_range = start_alignment
? alignedAlloc(new_bytes, start_alignment)
: alloc(new_bytes);

memcpy(new_range, old_range, existing_bytes);

range_start = new_range;
return new_range + existing_bytes;
}

/// NOTE Old memory region is wasted.
Expand Down
2 changes: 1 addition & 1 deletion dbms/src/Common/ArenaAllocator.h
Expand Up @@ -54,7 +54,7 @@ class AlignedArenaAllocator

if (data + old_size == arena->head->pos)
{
arena->alignedAllocContinue(new_size - old_size, data, alignment);
arena->allocContinue(new_size - old_size, data, alignment);
return reinterpret_cast<void *>(const_cast<char *>(data));
}
else
Expand Down
@@ -0,0 +1,3 @@
-- serialization of big arrays shouldn't use too much memory
set max_memory_usage = 3000000000;
select ignore(x) from (select groupArray(number) x from numbers(33554433)) group by x format Null;