ClickHouse · alexey-milovidov · Feb 25, 2023 · Feb 24, 2023 · alexey-milovidov · Feb 24, 2023
diff --git a/src/Common/ThreadStatus.cpp b/src/Common/ThreadStatus.cpp
@@ -237,6 +237,7 @@ void ThreadStatus::setFatalErrorCallback(std::function<void()> callback)
 
 void ThreadStatus::onFatalError()
 {
+    std::lock_guard lock(thread_group->mutex);
     if (fatal_error_callback)
         fatal_error_callback();
 }

diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp
@@ -134,6 +134,8 @@ static void terminateRequestedSignalHandler(int sig, siginfo_t *, void *)
 }
 
 
+static std::atomic<bool> fatal_error_printed{false};
+
 /** Handler for "fault" or diagnostic signals. Send data about fault to separate thread to write into log.
   */
 static void signalHandler(int sig, siginfo_t * info, void * context)
@@ -159,7 +161,16 @@ static void signalHandler(int sig, siginfo_t * info, void * context)
     if (sig != SIGTSTP) /// This signal is used for debugging.
     {
         /// The time that is usually enough for separate thread to print info into log.
-        sleepForSeconds(20);  /// FIXME: use some feedback from threads that process stacktrace
+        /// Under MSan full stack unwinding with DWARF info about inline functions takes 101 seconds in one case.
+        for (size_t i = 0; i < 300; ++i)
+        {
+            /// We will synchronize with the thread printing the messages with an atomic variable to finish earlier.
+            if (fatal_error_printed)
+                break;
+
+            /// This coarse method of synchronization is perfectly ok for fatal signals.
+            sleepForSeconds(1);
+        }
         call_default_signal_handler(sig);
     }
 
@@ -309,7 +320,9 @@ class SignalListener : public Poco::Runnable
             }
 
             if (auto logs_queue = thread_ptr->getInternalTextLogsQueue())
+            {
                 DB::CurrentThread::attachInternalTextLogsQueue(logs_queue, DB::LogsLevel::trace);
+            }
         }
 
         std::string signal_description = "Unknown signal";
@@ -407,6 +420,8 @@ class SignalListener : public Poco::Runnable
         /// When everything is done, we will try to send these error messages to client.
         if (thread_ptr)
             thread_ptr->onFatalError();
+
+        fatal_error_printed = true;
     }
 };
 

diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp
@@ -611,6 +611,8 @@ void TCPHandler::runImpl()
         /// It is important to destroy query context here. We do not want it to live arbitrarily longer than the query.
         query_context.reset();
 
+        CurrentThread::setFatalErrorCallback({});
+
         if (is_interserver_mode)
         {
             /// We don't really have session in interserver mode, new one is created for each query. It's better to reset it now.