Skip to content

Commit

Permalink
Bug #32738705: "OUT OF SORT MEMORY ERROR" HAS AN INCONSISTENT RELATIO…
Browse files Browse the repository at this point in the history
…NSHIP WITH THE BUFFER SIZE

Bug #33501541: Unmanageable Sort Buffer Behavior in 8.0.20+

Implement direct disk-to-disk copies of large packed addons during the
filesort merge phase; if a single row is so large that its addons do not
fit into its slice of the sort buffer during merging (even after
emptying that slice of all other rows), but the sort key _does_ fit,
simply sort the truncated row as usual, and then copy the rest of the
addon incrementally from the input to the output, 4 kB at a time, when
the row is to be written to the merge output. This is possible because
the addon itself doesn't need to be in RAM for the row to be compared
against other rows; only the sort key must.

This greatly relaxes the sort buffer requirements for successful merging,
especially when it comes to JSON rows or small blobs (which are
typically used as packed addons, not sort keys). The rules used to be:

 1. During initial chunk generation: The sort buffer must be at least
    as large as the largest row to be sorted.
 2. During merging: Merging is guaranteed to pass if the sort buffer is
    at least 15 times as large as the largest row (sort key + addons),
    but one may be lucky and pass with only the demands from #1.

Now, for sorts implemented using packed addons (which is the common case
for small blobs and JSON), the new rules are:

 1. Unchanged from #1 above.
 2. During merging: Merging is guaranteed to pass if the sort buffer is
    at least 15 times are large as the largest _sort key_ (plus 4-byte
    length marker), but one may be lucky and pass with only the demands
    from #1.

In practice, this means that filesort merging will almost never fail
due to insufficient buffer space anymore; the query will either fail
because a single row is too large in the sort step, or it will pass
nearly all of the time.

However, do note that while such merges will work, they will not always
be very performant, as having lots of 1-row merge chunks will mean many
merge passes and little work being done during the initial in-memory
sort. Thus, the main use of this functionality is to be able to do sorts
where there are a few rows with large JSON values or similar, but where
most fit comfortably into the buffer. Also note that since requirement
 #1 is unchanged, one still cannot sort e.g. 500 kB JSON values using the
default 256 kB sort buffer.

Older recommendations to keep sort buffers small at nearly any cost are
no longer valid, and have not been for a while. Sort buffers should be
sized to as much RAM as one can afford without interfering with other
tasks (such as the buffer pool, join buffers, or other concurrent
sorts), and small sorts are not affected by the maximum sort buffer size
being set to a larger value, as the sort buffer is incrementally
allocated.

Change-Id: I85745cd513402a42ed5fc4f5b7ddcf13c5793100
  • Loading branch information
Steinar H. Gunderson committed Nov 11, 2021
1 parent 1de8c52 commit 463d78d
Show file tree
Hide file tree
Showing 4 changed files with 155 additions and 25 deletions.
21 changes: 21 additions & 0 deletions mysql-test/r/filesort.result
Original file line number Diff line number Diff line change
Expand Up @@ -195,3 +195,24 @@ LIMIT 1
);
ERROR 22023: Invalid GIS data provided to function st_astext.
DROP TABLE t;
#
# Bug #32738705: "OUT OF SORT MEMORY ERROR" HAS AN INCONSISTENT RELATIONSHIP WITH THE BUFFER SIZE
#
CREATE TABLE t1 ( a INTEGER, b TEXT );
INSERT INTO t1 VALUES (1, REPEAT('x', 40001));
INSERT INTO t1 VALUES (2, REPEAT('x', 40002));
INSERT INTO t1 VALUES (3, REPEAT('x', 40003));
INSERT INTO t1 VALUES (4, REPEAT('x', 40005));
INSERT INTO t1 VALUES (5, REPEAT('x', 40008));
INSERT INTO t1 VALUES (6, REPEAT('x', 40013));
SET sort_buffer_size=65536;
SELECT a, LENGTH(b) FROM t1 ORDER BY a DESC;
a LENGTH(b)
6 40013
5 40008
4 40005
3 40003
2 40002
1 40001
SET sort_buffer_size=DEFAULT;
DROP TABLE t1;
21 changes: 21 additions & 0 deletions mysql-test/t/filesort.test
Original file line number Diff line number Diff line change
Expand Up @@ -189,3 +189,24 @@ SELECT
LIMIT 1
);
DROP TABLE t;

--echo #
--echo # Bug #32738705: "OUT OF SORT MEMORY ERROR" HAS AN INCONSISTENT RELATIONSHIP WITH THE BUFFER SIZE
--echo #

CREATE TABLE t1 ( a INTEGER, b TEXT );
INSERT INTO t1 VALUES (1, REPEAT('x', 40001));
INSERT INTO t1 VALUES (2, REPEAT('x', 40002));
INSERT INTO t1 VALUES (3, REPEAT('x', 40003));
INSERT INTO t1 VALUES (4, REPEAT('x', 40005));
INSERT INTO t1 VALUES (5, REPEAT('x', 40008));
INSERT INTO t1 VALUES (6, REPEAT('x', 40013));

# Set up a sort with large addons (since b is TEXT and not HUGETEXT,
# it's treated as a packed addon). We can keep one row and then some
# in the sort buffer, but not two, so we need special handling during merging.
SET sort_buffer_size=65536;
SELECT a, LENGTH(b) FROM t1 ORDER BY a DESC;
SET sort_buffer_size=DEFAULT;

DROP TABLE t1;
93 changes: 85 additions & 8 deletions sql/filesort.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1737,6 +1737,9 @@ static uint read_to_buffer(IO_CACHE *fromfile, Merge_chunk *merge_chunk,
bytes_to_read = rec_length * static_cast<size_t>(count);
if (count == 0) {
// Not even room for the first row.
// TODO(sgunders): Consider adopting the single-row
// fallback from packed addons below, if it becomes
// an issue.
my_error(ER_OUT_OF_SORTMEMORY, ME_FATALERROR);
LogErr(ERROR_LEVEL, ER_SERVER_OUT_OF_SORTMEMORY);
return (uint)-1;
Expand All @@ -1750,6 +1753,8 @@ static uint read_to_buffer(IO_CACHE *fromfile, Merge_chunk *merge_chunk,
if (mysql_file_pread(fromfile->file, merge_chunk->buffer_start(),
bytes_to_read, merge_chunk->file_position(), MYF_RW))
return (uint)-1; /* purecov: inspected */
merge_chunk->set_valid_buffer_end(merge_chunk->buffer_start() +
bytes_to_read);

size_t num_bytes_read;
if (packed_addon_fields || using_varlen_keys) {
Expand All @@ -1760,6 +1765,7 @@ static uint read_to_buffer(IO_CACHE *fromfile, Merge_chunk *merge_chunk,
*/
uchar *record = merge_chunk->buffer_start();
uint ix = 0;
uint extra_bytes_to_advance = 0;
for (; ix < count; ++ix) {
if (using_varlen_keys &&
(record + Sort_param::size_of_varlength_field) >=
Expand All @@ -1780,8 +1786,24 @@ static uint read_to_buffer(IO_CACHE *fromfile, Merge_chunk *merge_chunk,
? Addon_fields::read_addon_length(start_of_payload)
: param->fixed_res_length;

if (start_of_payload + res_length >= merge_chunk->buffer_end())
break; // Incomplete record.
// NOTE: There are some dances with the arithmetic here to avoid
// forcing invalid pointers (start_of_payload + record may be
// outside the legal areas).
if (start_of_payload > merge_chunk->buffer_end() - res_length) {
// Incomplete record, but importantly, we're only missing the
// addon fields, so in a pinch, we can still merge the row
// and only stream the addon fields from disk to disk if needed.
// So we pretend we've read these bytes, and we'll stream
// the remaining ones from disk when we actually copy the row.
//
// We do this as a last-resort if we otherwise couldn't fit
// any rows at all, so that the merge doesn't fail.
if (ix == 0) {
ix = 1;
extra_bytes_to_advance = res_length + (start_of_payload - record);
}
break;
}

assert(res_length > 0);
record = start_of_payload + res_length;
Expand All @@ -1794,6 +1816,7 @@ static uint read_to_buffer(IO_CACHE *fromfile, Merge_chunk *merge_chunk,
}
count = ix;
num_bytes_read = record - merge_chunk->buffer_start();
num_bytes_read += extra_bytes_to_advance;
DBUG_PRINT("info", ("read %llu bytes of complete records",
static_cast<ulonglong>(bytes_to_read)));
} else
Expand All @@ -1809,6 +1832,61 @@ static uint read_to_buffer(IO_CACHE *fromfile, Merge_chunk *merge_chunk,
return 0;
} /* read_to_buffer */

/**
Copy “count” bytes from one file, starting at offset “offset”, to the current
write position (usually the end) of the other.
*/
static int copy_bytes(IO_CACHE *to_file, IO_CACHE *from_file, size_t count,
my_off_t offset) {
// TODO(sgunders): Consider reusing the merge buffer if it's larger
// than 4 kB. However, note that we can only use the payload part of it,
// since we may need the sort key for deduplication purposes.
uchar buf[4096];
while (count > 0) {
size_t bytes_to_copy = min(count, sizeof(buf));
if (mysql_file_pread(from_file->file, buf, bytes_to_copy, offset, MYF_RW)) {
return 1; /* purecov: inspected */
}
if (my_b_write(to_file, buf, bytes_to_copy)) {
return 1; /* purecov: inspected */
}
count -= bytes_to_copy;
offset += bytes_to_copy;
}
return 0;
}

/**
Copy a row from “from_file” to “to_file” (this is used during merging).
Most commonly, we'll already have all (or most) of it in memory,
as indicated by “merge_chunk”, which must be positioned on the row.
But in very tight circumstances (see read_to_buffer(), some of it
may still be on disk, and will need to be copied directly from file to file.
*/
static int copy_row(IO_CACHE *to_file, IO_CACHE *from_file,
Merge_chunk *merge_chunk, uint offset,
uint bytes_to_write) {
// NOTE: We need to use valid_buffer_end() and not buffer_end(), as the buffer
// may have grown since we read data into it.
uchar *row_start = merge_chunk->current_key() + offset;
size_t bytes_in_buffer =
min<size_t>(merge_chunk->valid_buffer_end() - row_start, bytes_to_write);
size_t remaining_bytes = bytes_to_write - bytes_in_buffer;

if (bytes_in_buffer > 0) {
if (my_b_write(to_file, row_start, bytes_in_buffer)) {
return 1; /* purecov: inspected */
}
}
if (remaining_bytes > 0) {
if (copy_bytes(to_file, from_file, remaining_bytes,
merge_chunk->file_position() - remaining_bytes)) {
return 1;
}
}
return 0;
}

/**
Merge buffers to one buffer.
Expand Down Expand Up @@ -1911,9 +1989,9 @@ static int merge_buffers(THD *thd, Sort_param *param, IO_CACHE *from_file,
}

if (!is_duplicate) {
if (my_b_write(to_file, merge_chunk->current_key() + offset,
bytes_to_write)) {
return 1; /* purecov: inspected */
if (copy_row(to_file, from_file, merge_chunk, offset,
bytes_to_write)) {
return 1;
}
if (!--max_rows) {
error = 0; /* purecov: inspected */
Expand Down Expand Up @@ -1964,9 +2042,8 @@ static int merge_buffers(THD *thd, Sort_param *param, IO_CACHE *from_file,
!mcl.key_is_greater_than(merge_chunk->current_key(),
param->m_last_key_seen));
if (!is_duplicate) {
if (my_b_write(to_file, merge_chunk->current_key() + offset,
bytes_to_write)) {
return 1; /* purecov: inspected */
if (copy_row(to_file, from_file, merge_chunk, offset, bytes_to_write)) {
return 1;
}
if (!--max_rows) {
error = 0; /* purecov: inspected */
Expand Down
45 changes: 28 additions & 17 deletions sql/sql_sort.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,21 +55,13 @@ constexpr size_t VARLEN_PREFIX = 4;
*/
struct Merge_chunk {
public:
Merge_chunk()
: m_current_key(nullptr),
m_file_position(0),
m_buffer_start(nullptr),
m_buffer_end(nullptr),
m_rowcount(0),
m_mem_count(0),
m_max_keys(0) {}

my_off_t file_position() const { return m_file_position; }
void set_file_position(my_off_t val) { m_file_position = val; }
void advance_file_position(my_off_t val) { m_file_position += val; }

uchar *buffer_start() { return m_buffer_start; }
const uchar *buffer_end() const { return m_buffer_end; }
const uchar *valid_buffer_end() const { return m_valid_buffer_end; }

void set_buffer(uchar *start, uchar *end) {
m_buffer_start = start;
Expand All @@ -80,6 +72,10 @@ struct Merge_chunk {
assert(m_buffer_end == nullptr || end <= m_buffer_end);
m_buffer_end = end;
}
void set_valid_buffer_end(uchar *end) {
assert(end <= m_buffer_end);
m_valid_buffer_end = end;
}

void init_current_key() { m_current_key = m_buffer_start; }
uchar *current_key() const { return m_current_key; }
Expand Down Expand Up @@ -118,14 +114,29 @@ struct Merge_chunk {
}

private:
uchar *m_current_key; ///< The current key for this chunk.
my_off_t m_file_position; ///< Current position in the file to be sorted.
uchar *m_buffer_start; ///< Start of main-memory buffer for this chunk.
uchar *m_buffer_end; ///< End of main-memory buffer for this chunk.
ha_rows m_rowcount; ///< Number of unread rows in this chunk.
ha_rows m_mem_count; ///< Number of rows in the main-memory buffer.
ha_rows m_max_keys; ///< If we have fixed-size rows:
/// max number of rows in buffer.
/// The current key for this chunk.
uchar *m_current_key = nullptr;

/// Current position in the file to be sorted.
my_off_t m_file_position = 0;

/// Start of main-memory buffer for this chunk.
uchar *m_buffer_start = nullptr;

/// End of main-memory buffer for this chunk.
uchar *m_buffer_end = nullptr;

/// End of actual, valid data for this chunk.
uchar *m_valid_buffer_end;

/// Number of unread rows in this chunk.
ha_rows m_rowcount = 0;

/// Number of rows in the main-memory buffer.
ha_rows m_mem_count = 0;

/// If we have fixed-size rows: max number of rows in buffer.
ha_rows m_max_keys = 0;
};

typedef Bounds_checked_array<Merge_chunk> Merge_chunk_array;
Expand Down

0 comments on commit 463d78d

Please sign in to comment.