Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Collect stack traces from system.stack_trace table #9834

Merged
merged 2 commits into from
Mar 24, 2020
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
21 changes: 16 additions & 5 deletions dbms/tests/clickhouse-test
Original file line number Diff line number Diff line change
Expand Up @@ -98,20 +98,29 @@ def get_processlist(client_cmd):
return "" # server seems dead


def get_stacktraces(server_pid):
# collect server stacktraces using gdb
def get_stacktraces_from_gdb(server_pid):
cmd = "gdb -batch -ex 'thread apply all backtrace' -p {}".format(server_pid)
try:
return subprocess.check_output(cmd, shell=True)
except Exception as ex:
return "Error occured while receiving stack traces {}".format(str(ex))
return "Error occured while receiving stack traces from gdb: {}".format(str(ex))


# collect server stacktraces from system.stack_trace table
def get_stacktraces_from_clickhouse(client):
try:
return subprocess.check_call("{} --allow_introspection_functions=1 --query \"SELECT arrayStringConcat(arrayMap(x, y -> concat(x, ': ', y), arrayMap(x -> addressToLine(x), trace), arrayMap(x -> demangle(addressToSymbol(x)), trace)), '\n') as trace FROM system.stack_trace format Vertical\"".format(client), shell=True)
except Exception as ex:
return "Error occured while receiving stack traces from client: {}".format(str(ex))


def get_server_pid(server_tcp_port):
cmd = "lsof -i tcp:{port} -s tcp:LISTEN -Fp | awk '/^p[0-9]+$/{{print substr($0, 2)}}'".format(port=server_tcp_port)
try:
output = subprocess.check_output(cmd, shell=True)
if output:
return int(output[1:])
return int(output)
else:
return None # server dead
except Exception as ex:
Expand Down Expand Up @@ -459,8 +468,10 @@ def main(args):
server_pid = get_server_pid(clickhouse_tcp_port)
if server_pid:
print("\nLocated ClickHouse server process {} listening at TCP port {}".format(server_pid, clickhouse_tcp_port))
print("\nCollecting stacktraces from all running threads:")
print(get_stacktraces(server_pid))
print("\nCollecting stacktraces from system.stacktraces table:")
print(get_stacktraces_from_clickhouse(args.client))
print("\nCollecting stacktraces from all running threads with gdb:")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The only thing I'm not sure is do we need line feed here?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it looks OK:

6 tests passed. 0 tests skipped.                                                                                                                                                                                                                                                                                                                                                                                                                                                                Found hung queries in processlist:                                                                                                                                                                                                              Row 1:                                                                                                                                                                                                                                          
──────                                                                                                                                                                                                                                          is_initial_query:     1                                                                                                                                                                                                                         user:                 default                                                                                                                                                                                                                   query_id:             ec757b28-92ce-4e50-aa0f-9bb546e1766d                                                                                                                                                                                      
address:              ::1                                                                                                                                                                                                                       
port:                 46834                                                                                                                                                                                                                     
initial_user:         default                                                                                                                                                                                                                   initial_query_id:     ec757b28-92ce-4e50-aa0f-9bb546e1766d                                                                                                                                                                                      initial_address:      ::1                                                                                                                                                                                                                       initial_port:         46834                                                                                                                                                                                                                     interface:            1                                                                                                                                                                                                                         
os_user:              alesap                                                                                                                                                                                                                    
client_hostname:      click.sas.yp-c.yandex.net                                                                                                                                                                                                 client_name:          ClickHouse client                                                                                                                                                                                                         
client_revision:      54433                                                                                                                                                                                                                     
client_version_major: 20                                                                                                                                                                                                                        
client_version_minor: 3                                                                                                                                                                                                                         
client_version_patch: 1                                                                                                                                                                                                                         http_method:          0                                                                                                                                                                                                                         
http_user_agent:                                                                                                                                                                                                                                
quota_key:                                                                                                                                                                                                                                      elapsed:              527.716612732                                                                                                                                                                                                             is_cancelled:         0                                                                                                                                                                                                                         read_rows:            65536                                                                                                                                                                                                                     read_bytes:           524288                                                                                                                                                                                                                    
total_rows_approx:    0                                                                                                                                                                                                                         
written_rows:         0                                                                                                                                                                                                                         
written_bytes:        0                                                                                                                                                                                                                         
memory_usage:         0                                                                                                                                                                                                                         
peak_memory_usage:    0                                                                                                                                                                                                                         
query:                SELECT sleepEachRow(1) FROM system.numbers                                                                                                                                                                                
thread_ids:           [20696,20542]                                                                                                                                                                                                             
ProfileEvents.Names:  ['Query','SelectQuery','ReadCompressedBytes','CompressedReadBufferBlocks','CompressedReadBufferBytes','IOBufferAllocs','IOBufferAllocBytes','FunctionExecute','NetworkSendElapsedMicroseconds','ContextLock','RWLockAcquir
edReadLocks']                                                                                                                                                                                                                                   ProfileEvents.Values: [1,1,36,1,10,2,1048680,2,422518,19,1]                                                                                                                                                                                     
Settings.Names:       ['use_uncompressed_cache','load_balancing','log_queries','max_memory_usage','allow_introspection_functions']                                                                                                              
Settings.Values:      ['0','random','1','10000000000','1']                                                                                                                                                                                      
                                                                                                                                                                                                                                                
                                                                                                                                                                                                                                                
Located ClickHouse server process 20516 listening at TCP port 9000                                                                                                                                                                              
                                                                                                                                                                                                                                                
Collecting stacktraces from system.stacktraces table:                                                                                                                                                                                           
Row 1:                                                                                                                                                                                                                                          
──────                                                                                                                                                                                                                                          
trace: /lib/x86_64-linux-gnu/libpthread-2.27.so: pthread_cond_wait                                                                                                                                                                              
/home/alesap/code/cpp/ClickHouse/contrib/libcxx/src/condition_variable.cpp:0: std::__1::condition_variable::wait(std::__1::unique_lock<std::__1::mutex>&)                                                                                       
/home/alesap/code/cpp/ClickHouse/contrib/libcxx/include/atomic:970: BaseDaemon::waitForTerminationRequest()                                                                                                                                     
/home/alesap/code/cpp/ClickHouse/dbms/programs/server/Server.cpp:0: DB::Server::main(std::__1::vector<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::allocator<std::__1::basic_string<char, st
d::__1::char_traits<char>, std::__1::allocator<char> > > > const&)                                                                                                                                                                              
/home/alesap/code/cpp/ClickHouse/contrib/poco/Util/src/Application.cpp:0: Poco::Util::Application::run()                                                                                                                                        
/home/alesap/code/cpp/ClickHouse/dbms/programs/server/Server.cpp:178: DB::Server::run()                                                                                                                                                         
/home/alesap/code/cpp/ClickHouse/dbms/programs/server/Server.cpp:1042: mainEntryClickHouseServer(int, char**)                                                                                                                                   
/home/alesap/code/cpp/ClickHouse/dbms/programs/main.cpp:0: main                                                                                                                                                                                 
/build/glibc-OTsEL5/glibc-2.27/csu/../csu/libc-start.c:344: __libc_start_main                                                                                                                                                                   
/home/alesap/code/cpp/BuildCH/dbms/programs/clickhouse: _start
...
Row 183:                                                                                                                                                                                                                                        
────────                                                                                                                                                                                                                                        
trace: /build/glibc-OTsEL5/glibc-2.27/misc/../sysdeps/unix/sysv/linux/epoll_wait.c:30: epoll_wait                                                                                                                                               
/home/alesap/code/cpp/ClickHouse/contrib/poco/Net/src/SocketImpl.cpp:0: Poco::Net::SocketImpl::pollImpl(Poco::Timespan&, int)                                                                                                                   
/home/alesap/code/cpp/ClickHouse/contrib/poco/Net/src/SocketImpl.cpp:586: Poco::Net::SocketImpl::poll(Poco::Timespan const&, int)                                                                                                               
/home/alesap/code/cpp/ClickHouse/contrib/poco/Net/src/TCPServer.cpp:133: Poco::Net::TCPServer::run()                                                                                                                                            
/home/alesap/code/cpp/ClickHouse/contrib/poco/Foundation/src/Thread_POSIX.cpp:0: Poco::ThreadImpl::runnableEntry(void*)                                                                                                                         
/lib/x86_64-linux-gnu/libpthread-2.27.so: start_thread                                                                                                                                                                                          
/build/glibc-OTsEL5/glibc-2.27/misc/../sysdeps/unix/sysv/linux/x86_64/clone.S:97: __GI___clone                                                                                                                                                                                                                                                                                                                                                                                            
                                                                                                                                                                                                                                                
Collecting stacktraces from all running threads with gdb:                                                                                                                                                                                       
88      ../sysdeps/unix/sysv/linux/futex-internal.h: No such file or directory.                                                                                                                                                                 
[New LWP 20517]                                                                                                                                                                                                                                 
[New LWP 20518]                                                                                                                                                                                                                                 
[New LWP 20519]                                                                                                                                                                                                                                 
[New LWP 20520]

print(get_stacktraces_from_gdb(server_pid))
else:
print(
colored(
Expand Down