Permalink
Browse files

Merge aio page read requests

Summary:
Tries to submit multiple aio page read requests together to improve read
performance.

This code adds an array to buffer aio requests in os_aio_array_t. So far
only os_aio_read_array uses it. A new parameter (should_buffer) is added
to indicate whether an aio request should be buffered or submitted. If
should_submit is true, it will submit all bufferred aio requests on the
os_aio_array.

Only buf_read_ahead_linear is modified to utilize this functionality so
far. All other call sites are setting should_submit to true. Other
os_aio_array_t arrays will also ignore this.

If one thread calling buf_read_ahead_linear is buffering io requests but
another thread issues a normal os_aio_request, that other request will
submit all the buffered requests from buf_read_ahead_linear. This is
still better than nothing I suppose.

Test Plan:
Perf tests were run manually and approved by Yoshinori.

Reviewers: steaphan, jtolmer, yoshinori, mcallaghan

Reviewed By: steaphan, nizamordulu
  • Loading branch information...
rongrong authored and steaphangreene committed May 20, 2013
1 parent 2783df6 commit f9d1a5332eb2c82c028638d3b93b5a3592a69ffa
@@ -23,10 +23,16 @@ IO_WRITE_BYTES > 40000000 IO_WRITE_REQUESTS > 800 IO_WRITE_WAIT_USECS > IO_WRITE
select INNODB_PAGES_WRITTEN > 800, INNODB_PAGES_WRITTEN_INDEX > 800, INNODB_PAGES_WRITTEN_BLOB from information_schema.table_statistics where table_name = 't1';
INNODB_PAGES_WRITTEN > 800 INNODB_PAGES_WRITTEN_INDEX > 800 INNODB_PAGES_WRITTEN_BLOB
1 1 0
+show global status like "innodb_buffered_aio_submitted";
+Variable_name Value
+Innodb_buffered_aio_submitted 0
select * from t1;
select count(*) from t1;
count(*)
65536
+show global status like "innodb_buffered_aio_submitted";
+Variable_name Value
+Innodb_buffered_aio_submitted 1151
select IO_READ_BYTES, IO_READ_REQUESTS, IO_READ_SVC_USECS < IO_READ_WAIT_USECS, IO_READ_SVC_USECS_MAX < IO_READ_WAIT_USECS_MAX, IO_READ_SLOW_IOS from information_schema.table_statistics where table_name = 't1';
IO_READ_BYTES IO_READ_REQUESTS IO_READ_SVC_USECS < IO_READ_WAIT_USECS IO_READ_SVC_USECS_MAX < IO_READ_WAIT_USECS_MAX IO_READ_SLOW_IOS
20512768 1252 1 1 0
@@ -0,0 +1,30 @@
+DROP TABLE if exists t1;
+CREATE TABLE t1 (a INT NOT NULL PRIMARY KEY AUTO_INCREMENT, b VARCHAR(256)) ENGINE=INNODB;
+INSERT INTO t1 VALUES (0, REPEAT('a',256));
+INSERT INTO t1 SELECT 0, b FROM t1;
+INSERT INTO t1 SELECT 0, b FROM t1;
+INSERT INTO t1 SELECT 0, b FROM t1;
+INSERT INTO t1 SELECT 0, b FROM t1;
+INSERT INTO t1 SELECT 0, b FROM t1;
+INSERT INTO t1 SELECT 0, b FROM t1;
+INSERT INTO t1 SELECT 0, b FROM t1;
+INSERT INTO t1 SELECT 0, b FROM t1;
+INSERT INTO t1 SELECT 0, b FROM t1;
+INSERT INTO t1 SELECT 0, b FROM t1;
+INSERT INTO t1 SELECT 0, b FROM t1;
+INSERT INTO t1 SELECT 0, b FROM t1;
+INSERT INTO t1 SELECT 0, b FROM t1;
+INSERT INTO t1 SELECT 0, b FROM t1;
+INSERT INTO t1 SELECT 0, b FROM t1;
+INSERT INTO t1 SELECT 0, b FROM t1;
+show global status like "innodb_buffered_aio_submitted";
+Variable_name Value
+Innodb_buffered_aio_submitted 0
+select * from t1;
+select count(*) from t1;
+count(*)
+65536
+show global status like "innodb_buffered_aio_submitted";
+Variable_name Value
+Innodb_buffered_aio_submitted 1151
+DROP TABLE t1;
@@ -33,12 +33,15 @@ select INNODB_PAGES_WRITTEN > 800, INNODB_PAGES_WRITTEN_INDEX > 800, INNODB_PAGE
--source include/restart_mysqld.inc
+show global status like "innodb_buffered_aio_submitted";
+
--disable_result_log
select * from t1;
--enable_result_log
select count(*) from t1;
+show global status like "innodb_buffered_aio_submitted";
select IO_READ_BYTES, IO_READ_REQUESTS, IO_READ_SVC_USECS < IO_READ_WAIT_USECS, IO_READ_SVC_USECS_MAX < IO_READ_WAIT_USECS_MAX, IO_READ_SLOW_IOS from information_schema.table_statistics where table_name = 't1';
select INNODB_PAGES_READ, INNODB_PAGES_READ_INDEX, INNODB_PAGES_READ_BLOB from information_schema.table_statistics where table_name = 't1';
@@ -0,0 +1,42 @@
+--source include/have_innodb.inc
+--source include/have_native_aio.inc
+
+--disable_warnings
+DROP TABLE if exists t1;
+--enable_warnings
+
+# Create table.
+CREATE TABLE t1 (a INT NOT NULL PRIMARY KEY AUTO_INCREMENT, b VARCHAR(256)) ENGINE=INNODB;
+
+# Populate table.
+INSERT INTO t1 VALUES (0, REPEAT('a',256));
+INSERT INTO t1 SELECT 0, b FROM t1;
+INSERT INTO t1 SELECT 0, b FROM t1;
+INSERT INTO t1 SELECT 0, b FROM t1;
+INSERT INTO t1 SELECT 0, b FROM t1;
+INSERT INTO t1 SELECT 0, b FROM t1;
+INSERT INTO t1 SELECT 0, b FROM t1;
+INSERT INTO t1 SELECT 0, b FROM t1;
+INSERT INTO t1 SELECT 0, b FROM t1;
+INSERT INTO t1 SELECT 0, b FROM t1;
+INSERT INTO t1 SELECT 0, b FROM t1;
+INSERT INTO t1 SELECT 0, b FROM t1;
+INSERT INTO t1 SELECT 0, b FROM t1;
+INSERT INTO t1 SELECT 0, b FROM t1;
+INSERT INTO t1 SELECT 0, b FROM t1;
+INSERT INTO t1 SELECT 0, b FROM t1;
+INSERT INTO t1 SELECT 0, b FROM t1;
+
+--source include/restart_mysqld.inc
+
+show global status like "innodb_buffered_aio_submitted";
+
+--disable_result_log
+select * from t1;
+--enable_result_log
+
+select count(*) from t1;
+
+show global status like "innodb_buffered_aio_submitted";
+
+DROP TABLE t1;
@@ -120,7 +120,12 @@ buf_read_page_low(
use to stop dangling page reads from a tablespace
which we have DISCARDed + IMPORTed back */
ulint offset, /*!< in: page number */
- trx_t* trx)
+ trx_t* trx,
+ ibool should_buffer) /*!< in: whether to buffer an aio request.
+ AIO read ahead uses this. If you plan to
+ use this parameter, make sure you remember
+ to call os_aio_linux_dispatch_read_array_submit
+ when you are read to commit all your requests.*/

This comment has been minimized.

Show comment
Hide comment
@zeha

zeha Oct 11, 2013

likely typo in comment: read <-> ready

@zeha

zeha Oct 11, 2013

likely typo in comment: read <-> ready

This comment has been minimized.

Show comment
Hide comment
@steaphan-fb-com

steaphan-fb-com Oct 11, 2013

Yep, that's a typo. Thanks. We'll fix it.

@steaphan-fb-com

steaphan-fb-com Oct 11, 2013

Yep, that's a typo. Thanks. We'll fix it.

This comment has been minimized.

Show comment
Hide comment
{
buf_page_t* bpage;
ulint wake_later;
@@ -186,15 +191,17 @@ buf_read_page_low(
| ignore_nonexistent_pages,
sync, space, zip_size, offset, 0, zip_size,
bpage->zip.data, bpage,
- trx ? &trx->table_io_perf : NULL);
+ trx ? &trx->table_io_perf : NULL,
+ should_buffer);
} else {
ut_a(buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
*err = _fil_io(OS_FILE_READ | wake_later
| ignore_nonexistent_pages,
sync, space, 0, offset, 0, UNIV_PAGE_SIZE,
((buf_block_t*) bpage)->frame, bpage,
- trx ? &trx->table_io_perf : NULL);
+ trx ? &trx->table_io_perf : NULL,
+ should_buffer);
}
if (sync) {
@@ -349,7 +356,7 @@ buf_read_ahead_random(
&err, false,
ibuf_mode | OS_AIO_SIMULATED_WAKE_LATER,
space, zip_size, FALSE,
- tablespace_version, i, trx);
+ tablespace_version, i, trx, FALSE);
if (err == DB_TABLESPACE_DELETED) {
ut_print_timestamp(stderr);
fprintf(stderr,
@@ -413,7 +420,7 @@ buf_read_page(
count = buf_read_page_low(&err, true, BUF_READ_ANY_PAGE, space,
zip_size, FALSE,
- tablespace_version, offset, trx);
+ tablespace_version, offset, trx, FALSE);
srv_stats.buf_pool_reads.add(count);
if (err == DB_TABLESPACE_DELETED) {
ut_print_timestamp(stderr);
@@ -461,7 +468,7 @@ buf_read_page_async(
| OS_AIO_SIMULATED_WAKE_LATER
| BUF_READ_IGNORE_NONEXISTENT_PAGES,
space, zip_size, FALSE,
- tablespace_version, offset, NULL);
+ tablespace_version, offset, NULL, FALSE);
srv_stats.buf_pool_reads.add(count);
/* We do not increment number of I/O operations used for LRU policy
@@ -716,13 +723,12 @@ buf_read_ahead_linear(
for (i = low; i < high; i++) {
/* It is only sensible to do read-ahead in the non-sync
aio mode: hence FALSE as the first parameter */
-
if (!ibuf_bitmap_page(zip_size, i)) {
count += buf_read_page_low(
&err, false,
ibuf_mode,
space, zip_size, FALSE, tablespace_version, i,
- trx);
+ trx, TRUE);
if (err == DB_TABLESPACE_DELETED) {
ut_print_timestamp(stderr);
fprintf(stderr,
@@ -735,6 +741,10 @@ buf_read_ahead_linear(
}
}
}
+#if defined(LINUX_NATIVE_AIO)
+ /* Tell aio to submit all buffered requests. */
+ ut_a(os_aio_linux_dispatch_read_array_submit());
+#endif
/* In simulated aio we wake the aio handler threads only after
queuing all aio requests, in native aio the following call does
@@ -812,7 +822,7 @@ buf_read_ibuf_merge_pages(
buf_read_page_low(&err, sync && (i + 1 == n_stored),
BUF_READ_ANY_PAGE, space_ids[i],
zip_size, TRUE, space_versions[i],
- page_nos[i], NULL);
+ page_nos[i], NULL, FALSE);
if (UNIV_UNLIKELY(err == DB_TABLESPACE_DELETED)) {
tablespace_deleted:
@@ -907,13 +917,13 @@ buf_read_recv_pages(
if ((i + 1 == n_stored) && sync) {
buf_read_page_low(&err, true, BUF_READ_ANY_PAGE, space,
zip_size, TRUE, tablespace_version,
- page_nos[i], NULL);
+ page_nos[i], NULL, FALSE);
} else {
buf_read_page_low(&err, false, BUF_READ_ANY_PAGE
| OS_AIO_SIMULATED_WAKE_LATER,
space, zip_size, TRUE,
tablespace_version, page_nos[i],
- NULL);
+ NULL, FALSE);
}
}
@@ -5355,7 +5355,7 @@ fil_extend_space_to_desired_size(
index IO stats for system table space */
(TRX_SYS_SPACE == space->id)
? NULL : &space->primary_index_id,
- &space->io_perf2, NULL);
+ &space->io_perf2, NULL, TRUE);
#endif /* UNIV_HOTBACKUP */
if (success) {
os_has_said_disk_full = FALSE;
@@ -5742,9 +5742,14 @@ _fil_io(
appropriately aligned */
void* message, /*!< in: message for aio handler if non-sync
aio used, else ignored */
- os_io_table_perf_t* table_io_perf)/* in/out: tracks table IO stats
+ os_io_table_perf_t* table_io_perf,/* in/out: tracks table IO stats
to be counted in IS.user_statistics only
for sync reads and writes */
+ ibool should_buffer) /*!< in: whether to buffer an aio request.
+ AIO read ahead uses this. If you plan to
+ use this parameter, make sure you remember
+ to call os_aio_linux_dispatch_read_array_submit
+ when you are read to commit all your requests.*/
{
ulint mode;
fil_space_t* space;
@@ -5955,7 +5960,7 @@ _fil_io(
? NULL : &space->primary_index_id,
/*(io_flags & OS_AIO_DOUBLE_WRITE)
? &io_perf_doublewrite : */&space->io_perf2,
- table_io_perf);
+ table_io_perf, should_buffer);
#endif /* UNIV_HOTBACKUP */
ut_a(ret);
@@ -899,6 +899,8 @@ static SHOW_VAR innodb_status_variables[]= {
(char*) &export_vars.innodb_trx_n_rollback_partial, SHOW_LONG},
{"transaction_rollback_total",
(char*) &export_vars.innodb_trx_n_rollback_total, SHOW_LONG},
+ {"buffered_aio_submitted",
+ (char*) &export_vars.innodb_buffered_aio_submitted, SHOW_LONG},
{"zip_1024_compressed",
(char*) &export_vars.zip1024_compressed, SHOW_LONG},
{"zip_1024_compressed_ok",
@@ -993,7 +993,7 @@ fil_space_get_n_reserved_extents(
#define fil_io(type, sync, space_id, zip_size, block_offset, \
byte_offset, len, buf, message) \
_fil_io(type, sync, space_id, zip_size, block_offset, \
- byte_offset, len, buf, message, NULL)
+ byte_offset, len, buf, message, NULL, TRUE)
/****************************************************************//**
Update stats with per-table data from InnoDB tables. */
@@ -1041,9 +1041,11 @@ _fil_io(
appropriately aligned */
void* message, /*!< in: message for aio handler if non-sync
aio used, else ignored */
- os_io_table_perf_t* table_io_perf) /*!< in/out: tracks table IO stats
+ os_io_table_perf_t* table_io_perf, /*!< in/out: tracks table IO stats
to be used in IS.user_statistics only for
sync reads and writes */
+ ibool should_buffer) /*!< in: whether to buffer an aio request.
+ Only used by aio read ahead*/
__attribute__((nonnull(8)));
/**********************************************************************//**
Waits for an aio operation to complete. This function is used to write the
@@ -325,10 +325,11 @@ The wrapper functions have the prefix of "innodb_". */
pfs_os_file_close_func(file, __FILE__, __LINE__)
# define os_aio(type, mode, name, file, buf, offset, \
- n, message1, message2, primary_index_id, io_perf2, tab) \
+ n, message1, message2, primary_index_id, io_perf2, tab, \
+ should_buffer) \
pfs_os_aio_func(type, mode, name, file, buf, offset, \
n, message1, message2, __FILE__, __LINE__, \
- primary_index_id, io_perf2, tab)
+ primary_index_id, io_perf2, tab, should_buffer)
# define os_file_read(file, buf, offset, n) \
pfs_os_file_read_func(file, buf, offset, n, __FILE__, __LINE__)
@@ -370,9 +371,11 @@ to original un-instrumented file I/O APIs */
# define os_file_close(file) os_file_close_func(file)
# define os_aio(type, mode, name, file, buf, offset, n, \
- message1, message2, primary_index_id, io_perf2, tab) \
+ message1, message2, primary_index_id, io_perf2, tab, \
+ should_buffer) \
os_aio_func(type, mode, name, file, buf, offset, n, \
- message1, message2, primary_index_id, io_perf2, tab)
+ message1, message2, primary_index_id, io_perf2, tab,\
+ should_buffer)
# define os_file_read(file, buf, offset, n) \
os_file_read_func(file, buf, offset, n)
@@ -826,10 +829,16 @@ pfs_os_aio_func(
ib_uint64_t* primary_index_id,/*!< in: index_id of primary index */
os_io_perf2_t* io_perf2,/*!< in: per fil_space_t performance
counters */
- os_io_table_perf_t* table_io_perf);
+ os_io_table_perf_t* table_io_perf,
/*!< in/out: table IO stats counted for
IS.user_statistics only for sync read
and writes */
+ ibool should_buffer);
+ /*!< in: Whether to buffer an aio request.
+ AIO read ahead uses this. If you plan to
+ use this parameter, make sure you remember
+ to call os_aio_linux_dispatch_read_array_submit
+ when you are read to commit all your requests.*/
/*******************************************************************//**
NOTE! Please use the corresponding macro os_file_write(), not directly
this function!
@@ -1195,10 +1204,15 @@ os_aio_func(
ib_uint64_t* primary_index_id,/*!< in: index_id of primary index */
os_io_perf2_t* io_perf2,/*!< in: per fil_space_t performance
counters */
- os_io_table_perf_t* table_io_perf);
+ os_io_table_perf_t* table_io_perf,
/*!< in/out: table IO stats counted for
IS.user_statistics only for sync read
and writes */
+ ibool should_buffer); /*!< in: Whether to buffer an aio request.
+ AIO read ahead uses this. If you plan to
+ use this parameter, make sure you remember
+ to call os_aio_linux_dispatch_read_array_submit
+ when you are read to commit all your requests.*/
/************************************************************************//**
Wakes up all async i/o threads so that they know to exit themselves in
shutdown. */
@@ -1362,6 +1376,12 @@ os_aio_linux_handle(
parameters are valid and can be used to
restart the operation. */
ulint* type); /*!< out: OS_FILE_WRITE or ..._READ */
+/*******************************************************************//**
+Submit buffered AIO requests on the given segment to the kernel.
+@return TRUE on success. */
+UNIV_INTERN
+ibool
+os_aio_linux_dispatch_read_array_submit();
#endif /* LINUX_NATIVE_AIO */
#ifndef UNIV_NONINL
@@ -214,8 +214,11 @@ pfs_os_aio_func(
ulint src_line,/*!< in: line where the func invoked */
ib_uint64_t* primary_index_id,/*!< in: index_id of primary index */
os_io_perf2_t* io_perf2,/*!< in: per fil_space_t performance counters */
- os_io_table_perf_t* table_io_perf)/*!< in/out: used for per-table
+ os_io_table_perf_t* table_io_perf,/*!< in/out: used for per-table
file stats */
+ ibool should_buffer)
+ /*!< in: whether to buffer an aio request.
+ Only used by aio read ahead*/
{
ibool result;
struct PSI_file_locker* locker = NULL;
@@ -230,7 +233,7 @@ pfs_os_aio_func(
result = os_aio_func(type, mode, name, file, buf, offset,
n, message1, message2, primary_index_id,
- io_perf2, table_io_perf);
+ io_perf2, table_io_perf, should_buffer);
register_pfs_file_io_end(locker, n);
Oops, something went wrong.

0 comments on commit f9d1a53

Please sign in to comment.