diff --git a/ceph.spec.in b/ceph.spec.in index 614132994db51..d4a7c7369f42c 100644 --- a/ceph.spec.in +++ b/ceph.spec.in @@ -1007,6 +1007,8 @@ mkdir -p %{buildroot}%{_localstatedir}/lib/ceph/mon mkdir -p %{buildroot}%{_localstatedir}/lib/ceph/osd mkdir -p %{buildroot}%{_localstatedir}/lib/ceph/mds mkdir -p %{buildroot}%{_localstatedir}/lib/ceph/mgr +mkdir -p %{buildroot}%{_localstatedir}/lib/ceph/crash +mkdir -p %{buildroot}%{_localstatedir}/lib/ceph/crash/posted mkdir -p %{buildroot}%{_localstatedir}/lib/ceph/radosgw mkdir -p %{buildroot}%{_localstatedir}/lib/ceph/bootstrap-osd mkdir -p %{buildroot}%{_localstatedir}/lib/ceph/bootstrap-mds @@ -1028,6 +1030,7 @@ rm -rf %{buildroot} %files %files base +%{_bindir}/ceph-crash %{_bindir}/crushtool %{_bindir}/monmaptool %{_bindir}/osdmaptool @@ -1046,6 +1049,7 @@ rm -rf %{buildroot} %{_libdir}/ceph/erasure-code/libec_*.so* %dir %{_libdir}/ceph/compressor %{_libdir}/ceph/compressor/libceph_*.so* +%{_unitdir}/ceph-crash.service %ifarch x86_64 %dir %{_libdir}/ceph/crypto %{_libdir}/ceph/crypto/libceph_*.so* @@ -1091,6 +1095,8 @@ rm -rf %{buildroot} %{_mandir}/man8/monmaptool.8* %{_mandir}/man8/ceph-kvstore-tool.8* #set up placeholder directories +%attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/crash +%attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/crash/posted %attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/tmp %attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/bootstrap-osd %attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/bootstrap-mds @@ -1103,22 +1109,22 @@ rm -rf %{buildroot} %if 0%{?suse_version} %fillup_only if [ $1 -eq 1 ] ; then -/usr/bin/systemctl preset ceph-disk@\*.service ceph.target >/dev/null 2>&1 || : +/usr/bin/systemctl preset ceph-disk@\*.service ceph.target ceph-crash.service >/dev/null 2>&1 || : fi %endif %if 0%{?fedora} || 0%{?rhel} -%systemd_post ceph-disk@\*.service ceph.target +%systemd_post ceph-disk@\*.service ceph.target ceph-crash.service %endif if [ $1 -eq 1 ] ; then -/usr/bin/systemctl start ceph.target >/dev/null 2>&1 || : +/usr/bin/systemctl start ceph.target ceph-crash.service >/dev/null 2>&1 || : fi %preun base %if 0%{?suse_version} -%service_del_preun ceph-disk@\*.service ceph.target +%service_del_preun ceph-disk@\*.service ceph.target ceph-crash.service %endif %if 0%{?fedora} || 0%{?rhel} -%systemd_preun ceph-disk@\*.service ceph.target +%systemd_preun ceph-disk@\*.service ceph.target ceph-crash.service %endif %postun base diff --git a/debian/ceph-base.dirs b/debian/ceph-base.dirs index 2ae6860022e03..6f580230a65f4 100644 --- a/debian/ceph-base.dirs +++ b/debian/ceph-base.dirs @@ -4,3 +4,5 @@ var/lib/ceph/bootstrap-osd var/lib/ceph/bootstrap-rgw var/lib/ceph/bootstrap-rbd var/lib/ceph/tmp +var/lib/ceph/crash +var/lib/ceph/crash/posted diff --git a/debian/ceph-base.install b/debian/ceph-base.install index 65d1511d75bb3..1f5619cf8241f 100644 --- a/debian/ceph-base.install +++ b/debian/ceph-base.install @@ -1,4 +1,6 @@ etc/init.d/ceph +lib/systemd/system/ceph-crash.service +usr/bin/ceph-crash usr/bin/ceph-debugpack usr/bin/ceph-detect-init usr/bin/ceph-run diff --git a/doc/mgr/crash.rst b/doc/mgr/crash.rst new file mode 100644 index 0000000000000..5639f32ebb2b9 --- /dev/null +++ b/doc/mgr/crash.rst @@ -0,0 +1,60 @@ +Crash plugin +============ +The crash plugin collects information about daemon crashdumps and stores +it in the Ceph cluster for later analysis. + +Daemon crashdumps are dumped in /var/lib/ceph/crash by default; this can +be configured with the option 'crash dir'. Crash directories are named by +time and date and a randomly-generated UUID, and contain a metadata file +'meta' and a recent log file, with a "crash_id" that is the same. +This plugin allows the metadata about those dumps to be persisted in +the monitors' storage. + +Enabling +-------- + +The *crash* module is enabled with:: + + ceph mgr module enable crash + +Commands +-------- +:: + + ceph crash post -i + +Save a crash dump. The metadata file is a JSON blob stored in the crash +dir as ``meta``. As usual, the ceph command can be invoked with ``-i -``, +and will read from stdin. + +:: + + ceph rm + +Remove a specific crash dump. + +:: + + ceph crash ls + +List the timestamp/uuid crashids for all saved crash info. + +:: + + ceph crash stat + +Show a summary of saved crash info grouped by age. + +:: + + ceph crash info + +Show all details of a saved crash. + +:: + + ceph crash prune + +Remove saved crashes older than 'keep' days. must be an integer. + + diff --git a/doc/mgr/index.rst b/doc/mgr/index.rst index d8440cb8e6a3d..86b7889fd4e02 100644 --- a/doc/mgr/index.rst +++ b/doc/mgr/index.rst @@ -38,3 +38,4 @@ sensible. Telemetry plugin Telegraf plugin Iostat plugin + Crash plugin diff --git a/qa/suites/rados/mgr/tasks/crash.yaml b/qa/suites/rados/mgr/tasks/crash.yaml new file mode 100644 index 0000000000000..77183c74f320e --- /dev/null +++ b/qa/suites/rados/mgr/tasks/crash.yaml @@ -0,0 +1,16 @@ + +tasks: + - install: + - ceph: + # tests may leave mgrs broken, so don't try and call into them + # to invoke e.g. pg dump during teardown. + wait-for-scrub: false + log-whitelist: + - overall HEALTH_ + - \(MGR_DOWN\) + - \(PG_ + - replacing it with standby + - No standby daemons available + - cephfs_test_runner: + modules: + - tasks.mgr.test_crash diff --git a/qa/suites/rados/singleton/all/test-crash.yaml b/qa/suites/rados/singleton/all/test-crash.yaml new file mode 100644 index 0000000000000..6dbffb48c31b3 --- /dev/null +++ b/qa/suites/rados/singleton/all/test-crash.yaml @@ -0,0 +1,14 @@ +roles: + - [client.0, mon.a, mgr.x, osd.0, osd.1, osd.2] + +tasks: + - install: + - ceph: + log-whitelist: + - Reduced data availability + - OSD_.*DOWN + - workunit: + clients: + client.0: + - rados/test_crash.sh + - ceph.restart: [osd.*] diff --git a/qa/tasks/ceph_manager.py b/qa/tasks/ceph_manager.py index 758c7e1484173..e507782f13619 100644 --- a/qa/tasks/ceph_manager.py +++ b/qa/tasks/ceph_manager.py @@ -1135,7 +1135,7 @@ def raw_cluster_cmd(self, *args): ) return proc.stdout.getvalue() - def raw_cluster_cmd_result(self, *args): + def raw_cluster_cmd_result(self, *args, **kwargs): """ Start ceph on a cluster. Return success or failure information. """ @@ -1152,10 +1152,9 @@ def raw_cluster_cmd_result(self, *args): self.cluster, ] ceph_args.extend(args) - proc = self.controller.run( - args=ceph_args, - check_status=False, - ) + kwargs['args'] = ceph_args + kwargs['check_status'] = False + proc = self.controller.run(**kwargs) return proc.exitstatus def run_ceph_w(self): diff --git a/qa/tasks/mgr/test_crash.py b/qa/tasks/mgr/test_crash.py new file mode 100644 index 0000000000000..0c751d1c98d31 --- /dev/null +++ b/qa/tasks/mgr/test_crash.py @@ -0,0 +1,108 @@ + + +from mgr_test_case import MgrTestCase + +import json +import logging +import datetime + +log = logging.getLogger(__name__) +UUID = 'd5775432-0742-44a3-a435-45095e32e6b1' +DATEFMT = '%Y-%m-%d %H:%M:%S.%f' + + +class TestCrash(MgrTestCase): + + def setUp(self): + self.setup_mgrs() + self._load_module('crash') + + # Whip up some crash data + self.crashes = dict() + now = datetime.datetime.utcnow() + + for i in (0, 1, 3, 4, 8): + timestamp = now - datetime.timedelta(days=i) + timestamp = timestamp.strftime(DATEFMT) + 'Z' + crash_id = '_'.join((timestamp, UUID)).replace(' ', '_') + self.crashes[crash_id] = { + 'crash_id': crash_id, 'timestamp': timestamp, + } + + self.assertEqual( + 0, + self.mgr_cluster.mon_manager.raw_cluster_cmd_result( + 'crash', 'post', '-i', '-', + stdin=json.dumps(self.crashes[crash_id]), + ) + ) + + retstr = self.mgr_cluster.mon_manager.raw_cluster_cmd( + 'crash', 'ls', + ) + log.warning("setUp: crash ls returns %s" % retstr) + + self.oldest_crashid = crash_id + + def tearDown(self): + for crash in self.crashes.itervalues(): + self.mgr_cluster.mon_manager.raw_cluster_cmd_result( + 'crash', 'rm', crash['crash_id'] + ) + + def test_info(self): + for crash in self.crashes.itervalues(): + log.warning('test_info: crash %s' % crash) + retstr = self.mgr_cluster.mon_manager.raw_cluster_cmd( + 'crash', 'ls' + ) + log.warning('ls output: %s' % retstr) + retstr = self.mgr_cluster.mon_manager.raw_cluster_cmd( + 'crash', 'info', crash['crash_id'], + ) + log.warning('crash info output: %s' % retstr) + crashinfo = json.loads(retstr) + self.assertIn('crash_id', crashinfo) + self.assertIn('timestamp', crashinfo) + + def test_ls(self): + retstr = self.mgr_cluster.mon_manager.raw_cluster_cmd( + 'crash', 'ls', + ) + for crash in self.crashes.itervalues(): + self.assertIn(crash['crash_id'], retstr) + + def test_rm(self): + crashid = self.crashes.keys()[0] + self.assertEqual( + 0, + self.mgr_cluster.mon_manager.raw_cluster_cmd_result( + 'crash', 'rm', crashid, + ) + ) + + retstr = self.mgr_cluster.mon_manager.raw_cluster_cmd( + 'crash', 'ls', + ) + self.assertNotIn(crashid, retstr) + + def test_stat(self): + retstr = self.mgr_cluster.mon_manager.raw_cluster_cmd( + 'crash', 'stat', + ) + self.assertIn('5 crashes recorded', retstr) + self.assertIn('4 older than 1 days old:', retstr) + self.assertIn('3 older than 3 days old:', retstr) + self.assertIn('1 older than 7 days old:', retstr) + + def test_prune(self): + self.assertEqual( + 0, + self.mgr_cluster.mon_manager.raw_cluster_cmd_result( + 'crash', 'prune', '5' + ) + ) + retstr = self.mgr_cluster.mon_manager.raw_cluster_cmd( + 'crash', 'ls', + ) + self.assertNotIn(self.oldest_crashid, retstr) diff --git a/qa/tasks/mgr/test_module_selftest.py b/qa/tasks/mgr/test_module_selftest.py index f3920eb09e83c..d1bc04b6ec333 100644 --- a/qa/tasks/mgr/test_module_selftest.py +++ b/qa/tasks/mgr/test_module_selftest.py @@ -58,6 +58,9 @@ def test_selftest_run(self): def test_telemetry(self): self._selftest_plugin("telemetry") + def test_crash(self): + self._selftest_plugin("crash") + def test_selftest_config_update(self): """ That configuration updates are seen by running mgr modules diff --git a/qa/tasks/vstart_runner.py b/qa/tasks/vstart_runner.py index e958c93ecc479..4e6d15a98e201 100644 --- a/qa/tasks/vstart_runner.py +++ b/qa/tasks/vstart_runner.py @@ -558,19 +558,20 @@ def run_ceph_w(self): proc = self.controller.run([os.path.join(BIN_PREFIX, "ceph"), "-w"], wait=False, stdout=StringIO()) return proc - def raw_cluster_cmd(self, *args): + def raw_cluster_cmd(self, *args, **kwargs): """ args like ["osd", "dump"} return stdout string """ - proc = self.controller.run([os.path.join(BIN_PREFIX, "ceph")] + list(args)) + proc = self.controller.run([os.path.join(BIN_PREFIX, "ceph")] + list(args), **kwargs) return proc.stdout.getvalue() - def raw_cluster_cmd_result(self, *args): + def raw_cluster_cmd_result(self, *args, **kwargs): """ like raw_cluster_cmd but don't check status, just return rc """ - proc = self.controller.run([os.path.join(BIN_PREFIX, "ceph")] + list(args), check_status=False) + kwargs['check_status'] = False + proc = self.controller.run([os.path.join(BIN_PREFIX, "ceph")] + list(args), **kwargs) return proc.exitstatus def admin_socket(self, daemon_type, daemon_id, command, check_status=True): diff --git a/qa/workunits/rados/test_crash.sh b/qa/workunits/rados/test_crash.sh new file mode 100755 index 0000000000000..6e7aaaaba6348 --- /dev/null +++ b/qa/workunits/rados/test_crash.sh @@ -0,0 +1,33 @@ +#!/bin/sh + +set -x + +# run on a single-node three-OSD cluster + +sudo killall -ABRT ceph-osd +sleep 5 + +# kill caused coredumps; find them and delete them, carefully, so as +# not to disturb other coredumps, or else teuthology will see them +# and assume test failure. sudos are because the core files are +# root/600 +for f in $(find $TESTDIR/archive/coredump -type f); do + gdb_output=$(echo "quit" | sudo gdb /usr/bin/ceph-osd $f) + if expr match "$gdb_output" ".*generated.*ceph-osd.*" && \ + ( \ + + expr match "$gdb_output" ".*terminated.*signal 6.*" || \ + expr match "$gdb_output" ".*terminated.*signal SIGABRT.*" \ + ) + then + sudo rm $f + fi +done + +# let daemon find crashdumps on startup +sudo systemctl restart ceph-crash +sleep 30 + +# must be 3 crashdumps registered and moved to crash/posted +[ $(ceph crash ls | wc -l) = 3 ] || exit 1 +[ $(sudo find /var/lib/ceph/crash/posted/ -name meta | wc -l) = 3 ] || exit 1 diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 772da92f29ee1..b52c55a680b3b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -950,6 +950,9 @@ configure_file(${CMAKE_SOURCE_DIR}/src/init-ceph.in configure_file(ceph-post-file.in ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ceph-post-file @ONLY) +configure_file(ceph-crash.in + ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ceph-crash @ONLY) + if(WITH_TESTS) install(PROGRAMS ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ceph-debugpack @@ -960,6 +963,7 @@ endif() install(PROGRAMS ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ceph ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ceph-post-file + ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ceph-crash ${CMAKE_SOURCE_DIR}/src/ceph-run ${CMAKE_SOURCE_DIR}/src/ceph-clsinfo DESTINATION bin) diff --git a/src/ceph-crash.in b/src/ceph-crash.in new file mode 100755 index 0000000000000..b43cd782c3fe3 --- /dev/null +++ b/src/ceph-crash.in @@ -0,0 +1,83 @@ +#!@PYTHON_EXECUTABLE@ +# -*- mode:python -*- +# vim: ts=4 sw=4 smarttab expandtab + +import argparse +import logging +import os +import subprocess +import sys +import time + +logging.basicConfig(level=logging.INFO) +log = logging.getLogger(__name__) + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument( + '-p', '--path', default='/var/lib/ceph/crash', + help='base path to monitor for crash dumps') + parser.add_argument( + '-d', '--delay', default=10.0, type=float, + help='minutes to delay between scans (0 to exit after one)', + ) + return parser.parse_args() + + +def post_crash(path): + pr = subprocess.Popen( + args=['timeout', '30', 'ceph', 'crash', 'post', '-i', '-'], + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + f = open(os.path.join(path, 'meta'), 'r') + stdout, stderr = pr.communicate(input=f.read()) + rc = pr.wait() + f.close() + if rc != 0: + log.warning('post %s failed: %s' % (path, stderr)) + return rc + + +def scrape_path(path): + for p in os.listdir(path): + crashpath = os.path.join(path, p) + metapath = os.path.join(crashpath, 'meta') + donepath = os.path.join(crashpath, 'done') + if os.path.isfile(metapath): + if not os.path.isfile(donepath): + # hang out just for a bit; either we interrupted the dump + # or the daemon crashed before finishing it + time.sleep(1) + if not os.path.isfile(donepath): + return + # ok, we can process this one + rc = post_crash(crashpath) + if rc == 0: + os.rename(crashpath, os.path.join(path, 'posted/', p)) + log.debug( + "posted %s and renamed %s -> %s " % + (metapath, p, os.path.join('posted/', p)) + ) + + +def main(): + args = parse_args() + postdir = os.path.join(args.path, 'posted') + + while not os.path.isdir(postdir): + log.error("%s does not exist; please create" % postdir) + time.sleep(30) + + log.info("monitoring path %s, delay %ds" % (args.path, args.delay * 60.0)) + while True: + scrape_path(args.path) + if args.delay == 0: + sys.exit(0) + time.sleep(args.delay * 60) + + +if __name__ == "__main__": + main() diff --git a/src/common/BackTrace.cc b/src/common/BackTrace.cc index c179d1dac1252..90b83df356c1c 100644 --- a/src/common/BackTrace.cc +++ b/src/common/BackTrace.cc @@ -1,9 +1,13 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + #include #include #include #include "BackTrace.h" #include "common/version.h" +#include "common/Formatter.h" #define _STR(x) #x #define STRINGIFY(x) _STR(x) @@ -70,4 +74,66 @@ void BackTrace::print(std::ostream& out) const } } +void BackTrace::dump(Formatter *f) const +{ + f->open_array_section("backtrace"); + for (size_t i = skip; i < size; i++) { + // out << " " << (i-skip+1) << ": " << strings[i] << std::endl; + + size_t sz = 1024; // just a guess, template names will go much wider + char *function = (char *)malloc(sz); + if (!function) + return; + char *begin = 0, *end = 0; + + // find the parentheses and address offset surrounding the mangled name +#ifdef __FreeBSD__ + static constexpr char OPEN = '<'; +#else + static constexpr char OPEN = '('; +#endif + for (char *j = strings[i]; *j; ++j) { + if (*j == OPEN) + begin = j+1; + else if (*j == '+') + end = j; + } + if (begin && end) { + int len = end - begin; + char *foo = (char *)malloc(len+1); + if (!foo) { + free(function); + return; + } + memcpy(foo, begin, len); + foo[len] = 0; + + int status; + char *ret = nullptr; + // only demangle a C++ mangled name + if (foo[0] == '_' && foo[1] == 'Z') + ret = abi::__cxa_demangle(foo, function, &sz, &status); + if (ret) { + // return value may be a realloc() of the input + function = ret; + } + else { + // demangling failed, just pretend it's a C function with no args + strncpy(function, foo, sz); + strncat(function, "()", sz); + function[sz-1] = 0; + } + f->dump_stream("frame") << OPEN << function << end; + //fprintf(out, " %s:%s\n", stack.strings[i], function); + free(foo); + } else { + // didn't find the mangled name, just print the whole line + //out << " " << (i-skip+1) << ": " << strings[i] << std::endl; + f->dump_string("frame", strings[i]); + } + free(function); + } + f->close_section(); +} + } diff --git a/src/common/BackTrace.h b/src/common/BackTrace.h index 372788e6b64e6..5cb73d47bd6c4 100644 --- a/src/common/BackTrace.h +++ b/src/common/BackTrace.h @@ -1,3 +1,6 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + #ifndef CEPH_BACKTRACE_H #define CEPH_BACKTRACE_H @@ -10,6 +13,8 @@ namespace ceph { +class Formatter; + struct BackTrace { const static int max = 100; @@ -36,6 +41,7 @@ struct BackTrace { const BackTrace& operator=(const BackTrace& other); void print(std::ostream& out) const; + void dump(Formatter *f) const; }; inline std::ostream& operator<<(std::ostream& out, const BackTrace& bt) { diff --git a/src/common/assert.cc b/src/common/assert.cc index 0bc04a385a8b2..45d44d12ea1aa 100644 --- a/src/common/assert.cc +++ b/src/common/assert.cc @@ -35,6 +35,14 @@ namespace ceph { void __ceph_assert_fail(const char *assertion, const char *file, int line, const char *func) { + g_assert_condition = assertion; + g_assert_file = file; + g_assert_line = line; + g_assert_func = func; + g_assert_thread = (unsigned long long)pthread_self(); + pthread_getname_np(pthread_self(), g_assert_thread_name, + sizeof(g_assert_thread_name)); + ostringstream tss; tss << ceph_clock_now(); @@ -51,16 +59,14 @@ namespace ceph { oss << BackTrace(1); dout_emergency(oss.str()); - dout_emergency(" NOTE: a copy of the executable, or `objdump -rdS ` " - "is needed to interpret this.\n"); - if (g_assert_context) { lderr(g_assert_context) << buf << std::endl; - *_dout << oss.str(); - *_dout << " NOTE: a copy of the executable, or `objdump -rdS ` " - << "is needed to interpret this.\n" << dendl; + *_dout << oss.str() << dendl; - g_assert_context->_log->dump_recent(); + // dump recent only if the abort signal handler won't do it for us + if (!g_assert_context->_conf->fatal_signal_handlers) { + g_assert_context->_log->dump_recent(); + } } abort(); @@ -77,6 +83,14 @@ namespace ceph { ostringstream tss; tss << ceph_clock_now(); + g_assert_condition = assertion; + g_assert_file = file; + g_assert_line = line; + g_assert_func = func; + g_assert_thread = (unsigned long long)pthread_self(); + pthread_getname_np(pthread_self(), g_assert_thread_name, + sizeof(g_assert_thread_name)); + class BufAppender { public: BufAppender(char* buf, int size) : bufptr(buf), remaining(size) { @@ -126,16 +140,14 @@ namespace ceph { oss << *bt; dout_emergency(oss.str()); - dout_emergency(" NOTE: a copy of the executable, or `objdump -rdS ` " - "is needed to interpret this.\n"); - if (g_assert_context) { lderr(g_assert_context) << buf << std::endl; - *_dout << oss.str(); - *_dout << " NOTE: a copy of the executable, or `objdump -rdS ` " - << "is needed to interpret this.\n" << dendl; + *_dout << oss.str() << dendl; - g_assert_context->_log->dump_recent(); + // dump recent only if the abort signal handler won't do it for us + if (!g_assert_context->_conf->fatal_signal_handlers) { + g_assert_context->_log->dump_recent(); + } } abort(); diff --git a/src/common/ceph_context.cc b/src/common/ceph_context.cc index 5fee784edae9f..014741b276fe6 100644 --- a/src/common/ceph_context.cc +++ b/src/common/ceph_context.cc @@ -391,6 +391,12 @@ void CephContext::do_command(std::string_view command, const cmdmap_t& cmdmap, } lgeneric_dout(this, 1) << "do_command '" << command << "' '" << ss.str() << dendl; + if (command == "assert" && _conf->debug_asok_assert_abort) { + assert(0 == "assert"); + } + if (command == "abort" && _conf->debug_asok_assert_abort) { + abort(); + } if (command == "perfcounters_dump" || command == "1" || command == "perf dump") { std::string logger; @@ -582,6 +588,8 @@ CephContext::CephContext(uint32_t module_type_, _plugin_registry = new PluginRegistry(this); _admin_hook = new CephContextHook(this); + _admin_socket->register_command("assert", "assert", _admin_hook, ""); + _admin_socket->register_command("abort", "abort", _admin_hook, ""); _admin_socket->register_command("perfcounters_dump", "perfcounters_dump", _admin_hook, ""); _admin_socket->register_command("1", "1", _admin_hook, ""); _admin_socket->register_command("perf dump", "perf dump name=logger,type=CephString,req=false name=counter,type=CephString,req=false", _admin_hook, "dump perfcounters value"); diff --git a/src/common/legacy_config_opts.h b/src/common/legacy_config_opts.h index a664e9b18f468..99659458828f1 100644 --- a/src/common/legacy_config_opts.h +++ b/src/common/legacy_config_opts.h @@ -34,6 +34,7 @@ OPTION(chdir, OPT_STR) OPTION(restapi_log_level, OPT_STR) // default set by Python code OPTION(restapi_base_url, OPT_STR) // " OPTION(fatal_signal_handlers, OPT_BOOL) +OPTION(crash_dir, OPT_STR) SAFE_OPTION(erasure_code_dir, OPT_STR) // default location for erasure-code plugins OPTION(log_file, OPT_STR) // default changed by common_preinit() @@ -1519,6 +1520,7 @@ OPTION(rgw_torrent_sha_unit, OPT_INT) // torrent field piece length 512K OPTION(event_tracing, OPT_BOOL) // true if LTTng-UST tracepoints should be enabled OPTION(debug_deliberately_leak_memory, OPT_BOOL) +OPTION(debug_asok_assert_abort, OPT_BOOL) OPTION(rgw_swift_custom_header, OPT_STR) // option to enable swift custom headers diff --git a/src/common/options.cc b/src/common/options.cc index 8bf2f24830cbf..718523931c0f0 100644 --- a/src/common/options.cc +++ b/src/common/options.cc @@ -537,6 +537,10 @@ std::vector