-
Notifications
You must be signed in to change notification settings - Fork 6.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Better handling of fatal errors #46846
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -237,6 +237,7 @@ void ThreadStatus::setFatalErrorCallback(std::function<void()> callback) | |
|
||
void ThreadStatus::onFatalError() | ||
{ | ||
std::lock_guard lock(thread_group->mutex); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @alexey-milovidov, |
||
if (fatal_error_callback) | ||
fatal_error_callback(); | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -134,6 +134,8 @@ static void terminateRequestedSignalHandler(int sig, siginfo_t *, void *) | |
} | ||
|
||
|
||
static std::atomic<bool> fatal_error_printed{false}; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's not quite correct to use std::atomic in a signal handler, it would be better to use |
||
|
||
/** Handler for "fault" or diagnostic signals. Send data about fault to separate thread to write into log. | ||
*/ | ||
static void signalHandler(int sig, siginfo_t * info, void * context) | ||
|
@@ -159,7 +161,16 @@ static void signalHandler(int sig, siginfo_t * info, void * context) | |
if (sig != SIGTSTP) /// This signal is used for debugging. | ||
{ | ||
/// The time that is usually enough for separate thread to print info into log. | ||
sleepForSeconds(20); /// FIXME: use some feedback from threads that process stacktrace | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Implement TODO. |
||
/// Under MSan full stack unwinding with DWARF info about inline functions takes 101 seconds in one case. | ||
for (size_t i = 0; i < 300; ++i) | ||
{ | ||
/// We will synchronize with the thread printing the messages with an atomic variable to finish earlier. | ||
if (fatal_error_printed) | ||
break; | ||
|
||
/// This coarse method of synchronization is perfectly ok for fatal signals. | ||
sleepForSeconds(1); | ||
} | ||
call_default_signal_handler(sig); | ||
} | ||
|
||
|
@@ -309,7 +320,9 @@ class SignalListener : public Poco::Runnable | |
} | ||
|
||
if (auto logs_queue = thread_ptr->getInternalTextLogsQueue()) | ||
{ | ||
DB::CurrentThread::attachInternalTextLogsQueue(logs_queue, DB::LogsLevel::trace); | ||
} | ||
} | ||
|
||
std::string signal_description = "Unknown signal"; | ||
|
@@ -407,6 +420,8 @@ class SignalListener : public Poco::Runnable | |
/// When everything is done, we will try to send these error messages to client. | ||
if (thread_ptr) | ||
thread_ptr->onFatalError(); | ||
|
||
fatal_error_printed = true; | ||
} | ||
}; | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -611,6 +611,8 @@ void TCPHandler::runImpl() | |
/// It is important to destroy query context here. We do not want it to live arbitrarily longer than the query. | ||
query_context.reset(); | ||
|
||
CurrentThread::setFatalErrorCallback({}); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We reset the callback before the destruction of TCPHandler. |
||
|
||
if (is_interserver_mode) | ||
{ | ||
/// We don't really have session in interserver mode, new one is created for each query. It's better to reset it now. | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This allows it to be reset.