Skip to content

Commit d3054d2

Browse files
committed
Dump core for mysqld without large memory buffer
Summary: Core files can be very helpful for debugging production crashes but core dump has been disabled on production due to multiple reasons. One of the reasons is that the core file can be huge due to the huge InnoDB buffer pool within the process. This diff introduces a new command line argument '--innodb-dump-core-without-large-mem-buf', when it is set, all the large memory allocation by function os_mem_alloc_large() in InnoDB won't be dumped in the core file by calling Linux API madvise() with argument MADV_DONTDUMP. It can be disabled by '--skip-innodb-dump-core-without-large-mem-buf'. os_mem_alloc_large() is used to allocate large memory chunk for InnoDB buffer pool, file data merge, row log buffer, etc. Buffer pool is the main one. With this feature, the core size will be less than 3GB when the buffer pool size is 60GB so it won't impact production. -- This sys var only take effect when core dump is enabled by setting 'core-file' in my.cnf. -- By default '--innodb-dump-core-without-large-mem-buf' is set in prod to dump smaller cores, but it is not set by default in MTR so that full core file can be dumped. -- This diff also makes sys var core_file readonly. -- There are other bugs that prevent mysqld from dumping cores, e.g. when mysqld switching user from root to a non-root user, etc. Those will be addressed in separated diffs. Test Plan: -- New test cases: (1) innodb.mysqld_core_dump_without_large_mem_buf.test (2) innodb.mysqld_core_dump_without_large_mem_buf_with_resizing.test (3) sys_vars.innodb_dump_core_without_large_mem_buf_basic.test -- Manual test on a prod host and confirmed that small core is dumped (1) Build RPM with this diff and install it on a reserved prod host (2) Add 'core-file' into the '[mysqld]' section to enable core dump (3) Set 'innodb_buffer_pool_size' to 53GB (4) Start mysqld and find out the PID of mysqld (5) Use 'kill -s SIGSEGV <PID>' to kill mysqld and generate core file (6) Use 'cat /proc/sys/kernel/core_pattern' to find out the core file pattern and location (7) Find the core file at the location with the expected name mysqld.<PID>, the size was about 3GB Reviewers: santoshb, jtolmer Reviewed By: jtolmer Subscribers: webscalesql-eng Differential Revision: https://reviews.facebook.net/D54555
1 parent c69af69 commit d3054d2

15 files changed

+315
-41
lines changed
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
--echo # Get the full path name of the PID file
2+
--let $pid_file= query_get_value(SELECT @@pid_file, @@pid_file, 1)
3+
--let PIDFILE= $pid_file
4+
5+
--echo # Expecting a "crash", but don't restart the server until it is told to
6+
--echo # Expected max core size is $expected_max_core_size MB
7+
--let MAXCORESIZE= $expected_max_core_size
8+
9+
--exec echo "wait" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
10+
11+
perl;
12+
13+
my $pid_file = $ENV{'PIDFILE'} or die "PIDFILE not set";
14+
my $expected_max_core_size = $ENV{'MAXCORESIZE'} or die "MAXCORESIZE not set";
15+
16+
# The argument is in MB
17+
$expected_max_core_size = $expected_max_core_size * 1024 * 1024;
18+
19+
# Get PID of mysqld
20+
open(my $fh, '<', $pid_file) || die "Cannot open pid file $pid_file\n";
21+
my $pid = <$fh>;
22+
$pid =~ s/\s//g;
23+
close($fh);
24+
25+
if ($pid eq "") {
26+
die "Couldn't retrieve PID from PID file.\n";
27+
}
28+
29+
# The current time in seconds since epoch
30+
$cur_time = time;
31+
32+
# Kill mysqld to dump a core
33+
system("kill", "-s", "SIGSEGV", "$pid");
34+
print "# Perl: Sent a SIGSEGV to mysqld to dump a core.\n";
35+
36+
# Get the core file pattern, e.g. /var/tmp/cores/%e.%p
37+
$core_pattern = `cat /proc/sys/kernel/core_pattern`;
38+
39+
$last_slash = rindex($core_pattern, '/');
40+
41+
# The core file directory, e.g. /var/tmp/cores
42+
$core_dir = substr($core_pattern, 0, $last_slash);
43+
44+
$found_core = 0;
45+
$core_size = 0;
46+
$core_size_good = 0;
47+
48+
# Check the files in the core file directory
49+
$wait_sec = 60;
50+
while ($wait_sec > 0) {
51+
opendir(my $dir, $core_dir) or die "Failed to open dir $core_dir: $!\n";
52+
while (my $file = readdir($dir)) {
53+
# If the core file name contains the PID
54+
if (index($file, $pid) != -1) {
55+
# The last write time in seconds since epoch
56+
$full_path = $core_dir . '/' . $file;
57+
@stat = stat($full_path);
58+
$core_size = $stat[7];
59+
$write_secs = $stat[9];
60+
61+
# If the file was written within a minute
62+
if ($cur_time <= $write_secs && $write_secs - $cur_time < 60) {
63+
$found_core = 1;
64+
if ($core_size < $expected_max_core_size) {
65+
$core_size_good = 1;
66+
}
67+
# Remove the core file to avoid it get accumulated over time
68+
unlink $full_path;
69+
last;
70+
}
71+
}
72+
}
73+
closedir($dir);
74+
75+
if ($found_core) {
76+
last;
77+
}
78+
# Sleep 1 second and try again
79+
--$wait_sec;
80+
sleep 1;
81+
}
82+
83+
if ($found_core) {
84+
if ($core_size_good) {
85+
print "# Perl: OK! Found the core file and it's small!\n";
86+
} else {
87+
print "# Perl: Failed! Found the core file but it's too big ($core_size)!\n";
88+
}
89+
} else {
90+
print "# Perl: Failed! Didn't find the core file!\n";
91+
}
92+
93+
EOF
94+
95+
--echo # Make server restart
96+
--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
97+
98+
--enable_reconnect
99+
100+
--echo # Wait for server to be back online
101+
--source include/wait_until_connected_again.inc
102+
103+
--disable_reconnect

mysql-test/mysql-test-run.pl

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5283,6 +5283,7 @@ ($$$)
52835283
# override defaults above.
52845284

52855285
my $found_skip_core= 0;
5286+
my $innodb_dump_core_without_large_mem_buf= 0;
52865287
my $found_no_console= 0;
52875288
my $found_log_error= 0;
52885289
foreach my $arg ( @$extra_opts )
@@ -5301,6 +5302,10 @@ ($$$)
53015302
{
53025303
$found_skip_core= 1;
53035304
}
5305+
elsif ($arg eq "--innodb-dump-core-without-large-mem-buf")
5306+
{
5307+
$innodb_dump_core_without_large_mem_buf= 1;
5308+
}
53045309
elsif ($arg eq "--no-console")
53055310
{
53065311
$found_no_console= 1;
@@ -5335,6 +5340,11 @@ ($$$)
53355340
mtr_add_arg($args, "%s", "--core-file");
53365341
}
53375342

5343+
# Set the default value to false so that the full core will be dumped
5344+
if ( !$innodb_dump_core_without_large_mem_buf)
5345+
{
5346+
mtr_add_arg($args, "--skip-innodb-dump-core-without-large-mem-buf");
5347+
}
53385348
return $args;
53395349
}
53405350

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# Shutdown server
2+
# Restart server with --log-error
3+
# Get the full path name of the PID file
4+
# Expecting a "crash", but don't restart the server until it is told to
5+
# Expected max core size is 3584 MB
6+
# Perl: Sent a SIGSEGV to mysqld to dump a core.
7+
# Perl: OK! Found the core file and it's small!
8+
# Make server restart
9+
# Wait for server to be back online
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# Shutdown server
2+
# Restart server with --log-error
3+
set global innodb_file_format=`Barracuda`;
4+
set global innodb_file_per_table=ON;
5+
set global innodb_adaptive_hash_index=ON;
6+
set global innodb_buffer_pool_size = 21474836480;
7+
set global innodb_adaptive_hash_index=OFF;
8+
set global innodb_buffer_pool_size = 64424509440;
9+
# Get the full path name of the PID file
10+
# Expecting a "crash", but don't restart the server until it is told to
11+
# Expected max core size is 5632 MB
12+
# Perl: Sent a SIGSEGV to mysqld to dump a core.
13+
# Perl: OK! Found the core file and it's small!
14+
# Make server restart
15+
# Wait for server to be back online
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
--innodb-dump-core-without-large-mem-buf
2+
--innodb-buffer-pool-size=60G
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
################################################################################
2+
# This test is to test if mysqld can dump a core without large memory buffers.
3+
# See opt file for the config:
4+
# (1) --innodb-dump-core-without-large-mem-buf is set
5+
# (2) the buffer pool is set to be large so that without dropping the large
6+
# memory buffers the core size will be much greater than 3.5GB (the actual
7+
# core size is less than 3GB now but set the limit to 3.5GB here in case
8+
# the memory footprint increases in the future)
9+
10+
--source include/not_valgrind.inc
11+
--source include/have_innodb.inc
12+
13+
# Embedded mode doesn't support restart
14+
--source include/not_embedded.inc
15+
16+
--echo # Shutdown server
17+
--source include/shutdown_mysqld.inc
18+
19+
--echo # Restart server with --log-error
20+
--exec echo "restart:--log-error=$MYSQLTEST_VARDIR/log/core_dump.err" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
21+
--enable_reconnect
22+
--source include/wait_until_connected_again.inc
23+
24+
--let $expected_max_core_size = 3584
25+
--source include/mysqld_core_dump.inc
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
--innodb-dump-core-without-large-mem-buf
2+
--innodb-buffer-pool-size=60G
3+
--innodb-buffer-pool-resizing-timeout=60
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
################################################################################
2+
# This test is to test if mysqld can dump a core without large memory buffers.
3+
# See opt file for the config:
4+
# (1) --innodb-dump-core-without-large-mem-buf is set
5+
# (2) the buffer pool is set to be large initially, shrink it, then expand
6+
# it back to the original large size, without dropping the large memory
7+
# buffers the core size will be much greater than 5.5GB (the actual
8+
# core size is less than 5GB now but set the limit to 5.5GB here in case
9+
# the memory footprint increases in the future)
10+
11+
--source include/not_valgrind.inc
12+
--source include/have_innodb.inc
13+
14+
# Embedded mode doesn't support restart
15+
--source include/not_embedded.inc
16+
17+
--echo # Shutdown server
18+
--source include/shutdown_mysqld.inc
19+
20+
--echo # Restart server with --log-error
21+
--exec echo "restart:--log-error=$MYSQLTEST_VARDIR/log/core_dump_with_resizing.err" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
22+
--enable_reconnect
23+
--source include/wait_until_connected_again.inc
24+
25+
--source suite/innodb/t/innodb-buffer-pool-resize-setup.inc
26+
27+
# Shrink buffer pool to 20GB
28+
set global innodb_buffer_pool_size = 21474836480;
29+
--source include/wait_condition.inc
30+
31+
set global innodb_adaptive_hash_index=OFF;
32+
33+
# Expand buffer pool back to 60GB
34+
set global innodb_buffer_pool_size = 64424509440;
35+
--source include/wait_condition.inc
36+
37+
--disable_query_log
38+
set global innodb_buffer_pool_size = @old_innodb_buffer_pool_size;
39+
set global innodb_file_format = @old_innodb_file_format;
40+
set global innodb_file_per_table = @old_innodb_file_per_table;
41+
set global innodb_adaptive_hash_index = @old_innodb_adaptive_hash_index;
42+
--enable_query_log
43+
--source include/wait_condition.inc
44+
45+
--let $expected_max_core_size = 5632
46+
--source include/mysqld_core_dump.inc
Lines changed: 18 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,23 @@
1-
SELECT * FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES WHERE VARIABLE_NAME LIKE '%core%';
2-
VARIABLE_NAME VARIABLE_VALUE
3-
CORE_FILE ON
4-
SHOW GLOBAL VARIABLES LIKE '%core%';
5-
Variable_name Value
6-
core_file ON
7-
SET @old_val = @@global.core_file;
8-
SELECT @old_val;
9-
@old_val
10-
1
11-
SET GLOBAL core_file = FALSE;
12-
SELECT @@core_file;
13-
@@core_file
14-
0
15-
SHOW GLOBAL VARIABLES LIKE '%core%';
16-
Variable_name Value
17-
core_file OFF
18-
SET GLOBAL core_file = TRUE;
19-
SELECT @@core_file;
20-
@@core_file
1+
select @@global.core_file;
2+
@@global.core_file
213
1
22-
SHOW GLOBAL VARIABLES LIKE '%core%';
4+
select @@session.core_file;
5+
ERROR HY000: Variable 'core_file' is a GLOBAL variable
6+
show global variables like 'core_file';
237
Variable_name Value
248
core_file ON
25-
SET @@global.core_file = @old_val;
26-
SELECT @@global.core_file;
27-
@@global.core_file
28-
1
29-
SHOW GLOBAL VARIABLES LIKE '%core%';
9+
show session variables like 'core_file';
3010
Variable_name Value
3111
core_file ON
12+
select * from information_schema.global_variables where variable_name='core_file';
13+
VARIABLE_NAME VARIABLE_VALUE
14+
CORE_FILE ON
15+
select * from information_schema.session_variables where variable_name='core_file';
16+
VARIABLE_NAME VARIABLE_VALUE
17+
CORE_FILE ON
18+
set global core_file = default;
19+
ERROR HY000: Variable 'core_file' is a read only variable
20+
Expected error 'Read only variable'
21+
set global core_file = true;
22+
ERROR HY000: Variable 'core_file' is a read only variable
23+
Expected error 'Read only variable'
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
select @@global.innodb_dump_core_without_large_mem_buf;
2+
@@global.innodb_dump_core_without_large_mem_buf
3+
0
4+
select @@session.innodb_dump_core_without_large_mem_buf;
5+
ERROR HY000: Variable 'innodb_dump_core_without_large_mem_buf' is a GLOBAL variable
6+
show global variables like 'innodb_dump_core_without_large_mem_buf';
7+
Variable_name Value
8+
innodb_dump_core_without_large_mem_buf OFF
9+
show session variables like 'innodb_dump_core_without_large_mem_buf';
10+
Variable_name Value
11+
innodb_dump_core_without_large_mem_buf OFF
12+
select * from information_schema.global_variables where variable_name='innodb_dump_core_without_large_mem_buf';
13+
VARIABLE_NAME VARIABLE_VALUE
14+
INNODB_DUMP_CORE_WITHOUT_LARGE_MEM_BUF OFF
15+
select * from information_schema.session_variables where variable_name='innodb_dump_core_without_large_mem_buf';
16+
VARIABLE_NAME VARIABLE_VALUE
17+
INNODB_DUMP_CORE_WITHOUT_LARGE_MEM_BUF OFF
18+
set global innodb_dump_core_without_large_mem_buf = default;
19+
ERROR HY000: Variable 'innodb_dump_core_without_large_mem_buf' is a read only variable
20+
Expected error 'Read only variable'
21+
set global innodb_dump_core_without_large_mem_buf = true;
22+
ERROR HY000: Variable 'innodb_dump_core_without_large_mem_buf' is a read only variable
23+
Expected error 'Read only variable'

0 commit comments

Comments
 (0)